diff --git a/.gitignore b/.gitignore index 5fdd8f26..77a91c45 100644 --- a/.gitignore +++ b/.gitignore @@ -73,6 +73,9 @@ docs/parity-analysis/* !docs/parity-analysis/notes/ !docs/parity-analysis/notes/2026-05-25-precursor-cal-ship-gates.md !docs/parity-analysis/notes/2026-05-25-spece-tail-exploration.md +!docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md +!docs/parity-analysis/notes/score-psm-trace-artifacts/ +!docs/parity-analysis/notes/score-psm-trace-artifacts/* !docs/parity-analysis/snapshots/ !docs/parity-analysis/snapshots/cal-shifts-2026-05-25.json diff --git a/benchmark/ci/diff_score_psm_traces.py b/benchmark/ci/diff_score_psm_traces.py new file mode 100755 index 00000000..15a49a35 --- /dev/null +++ b/benchmark/ci/diff_score_psm_traces.py @@ -0,0 +1,247 @@ +#!/usr/bin/env python3 +""" +Diff per-PSM per-ion trace outputs from Rust (msgf-trace --trace-json) and +Java (instrumented java-legacy stderr). For each (scan, peptide) PSM, align +records by (ion_kind, theo_mz tolerance 1e-3 Da) and emit a side-by-side +table. + +Usage: + diff_score_psm_traces.py --rust rust-trace.json --java java-trace.log \\ + [--mz-tol 1e-3] [--scan SCAN] [--peptide PEP] + +Outputs to stdout. Exit code 0 = success. + +Rust JSON shape (per PSM): + { + "scan": int, + "peptide": str, + "charge": int, + "rust_rank_score": int, + "ions": [ + {"ion_type": str, "theo_mz": float, "rank": int|null, + "max_rank": int, "log_prob": float, "contribution": float}, + ... + ] + } + +Java log shape (one line per ion): + TRACE\\tscan=\\tpeptide=\\tion=\\ttheo_mz=\\trank=\\tlog_prob=\\tcontribution= + +Java represents a missing rank as rank=-1 (Rust uses null). +""" + +import argparse +import collections +import json +import re +import struct +import sys + + +def normalize_ion_kind(s: str) -> str: + """Map both Rust and Java ion-type representations to a normalized key. + + Rust format: `Prefix { charge: 1, offset_bits: 0 }` + Java format: `b/1+0.00000` + Normalize to: `b/+` or `y/+` or `Noise`. + """ + s = s.strip() + if "Noise" in s: + return "Noise" + # Rust format + rust_match = re.match( + r"(Prefix|Suffix)\s*\{\s*charge:\s*(\d+),\s*offset_bits:\s*(\d+)\s*\}", + s, + ) + if rust_match: + kind = "b" if rust_match.group(1) == "Prefix" else "y" + charge = int(rust_match.group(2)) + off_bits = int(rust_match.group(3)) + off = struct.unpack(">f", struct.pack(">I", off_bits))[0] + return f"{kind}/{charge}+{off:.5f}" + # Java format + java_match = re.match(r"([by])/(\d+)\+([\d.+\-eE]+)", s) + if java_match: + kind = java_match.group(1) + charge = int(java_match.group(2)) + off = float(java_match.group(3)) + return f"{kind}/{charge}+{off:.5f}" + return s + + +def parse_rust_json(path: str) -> dict: + """Returns {(scan, peptide): [{ion fields}, ...]}.""" + out = {} + with open(path) as fh: + data = json.load(fh) + for psm in data: + key = (psm["scan"], psm["peptide"]) + out[key] = psm["ions"] + return out + + +def parse_java_log(path: str) -> dict: + """Returns {(scan, peptide): [{ion fields}, ...]}.""" + out = collections.defaultdict(list) + with open(path) as fh: + for line in fh: + line = line.rstrip("\n") + if not line.startswith("TRACE\t"): + continue + fields = {} + for part in line.split("\t")[1:]: + if "=" not in part: + continue + k, v = part.split("=", 1) + fields[k] = v + try: + scan = int(fields["scan"]) + peptide = fields["peptide"] + raw_rank = fields.get("rank", "") + rank = None if raw_rank in ("", "-1", "null") else int(raw_rank) + ion = { + "ion_type": fields.get("ion", "?"), + "theo_mz": float(fields.get("theo_mz", "nan")), + "rank": rank, + "log_prob": float(fields.get("log_prob", "nan")), + "contribution": float(fields.get("contribution", "nan")), + } + except (KeyError, ValueError) as e: + print( + f"WARN: skipping malformed Java TRACE line: {line[:80]}... ({e})", + file=sys.stderr, + ) + continue + out[(scan, peptide)].append(ion) + return out + + +def align_and_diff(rust_ions, java_ions, mz_tol): + """Yields (key, rust_ion_or_None, java_ion_or_None, flags) per ion.""" + java_by_key = collections.defaultdict(list) + for ion in java_ions: + key = (normalize_ion_kind(ion["ion_type"]), round(ion["theo_mz"] / mz_tol)) + java_by_key[key].append(ion) + + matched_java_ids = set() + for rust_ion in rust_ions: + rust_key = ( + normalize_ion_kind(rust_ion["ion_type"]), + round(rust_ion["theo_mz"] / mz_tol), + ) + candidates = java_by_key.get(rust_key, []) + java_ion = candidates.pop(0) if candidates else None + if java_ion is not None: + matched_java_ids.add(id(java_ion)) + flags = [] + if java_ion is None: + flags.append("RUST_ONLY") + else: + if rust_ion.get("rank") != java_ion.get("rank"): + flags.append("RANK_DIFF") + if abs(rust_ion["log_prob"] - java_ion["log_prob"]) > 1e-4: + flags.append("LOGPROB_DIFF") + if abs(rust_ion["contribution"] - java_ion["contribution"]) > 1e-4: + flags.append("CONTRIB_DIFF") + yield (rust_key, rust_ion, java_ion, flags) + + for ion in java_ions: + if id(ion) in matched_java_ids: + continue + key = (normalize_ion_kind(ion["ion_type"]), round(ion["theo_mz"] / mz_tol)) + yield (key, None, ion, ["JAVA_ONLY"]) + + +def format_row(key, rust_ion, java_ion, flags): + def fmt(v, w, prec=None): + if v is None: + return "-" * w + if isinstance(v, float) and prec is not None: + return f"{v:>{w}.{prec}f}" + return f"{str(v):>{w}}" + + theo_mz = (rust_ion or java_ion)["theo_mz"] + return " ".join([ + fmt(key[0], 22), + fmt(theo_mz, 10, prec=4), + fmt(rust_ion.get("rank") if rust_ion else None, 5), + fmt(java_ion.get("rank") if java_ion else None, 5), + fmt(rust_ion["log_prob"] if rust_ion else None, 9, prec=4), + fmt(java_ion["log_prob"] if java_ion else None, 9, prec=4), + fmt(rust_ion["contribution"] if rust_ion else None, 9, prec=4), + fmt(java_ion["contribution"] if java_ion else None, 9, prec=4), + ",".join(flags) if flags else "", + ]) + + +def main(): + ap = argparse.ArgumentParser(description=__doc__) + ap.add_argument( + "--rust", + required=True, + help="Rust trace JSON from msgf-trace --trace-json", + ) + ap.add_argument( + "--java", + required=True, + help="Java instrumented trace log (TRACE lines)", + ) + ap.add_argument( + "--mz-tol", + type=float, + default=1e-3, + help="m/z alignment tolerance (Da, default 1e-3)", + ) + ap.add_argument( + "--scan", + type=int, + default=None, + help="Restrict to one scan", + ) + ap.add_argument( + "--peptide", + default=None, + help="Restrict to one peptide", + ) + args = ap.parse_args() + + rust = parse_rust_json(args.rust) + java = parse_java_log(args.java) + + all_keys = sorted(set(rust.keys()) | set(java.keys())) + for key in all_keys: + scan, pep = key + if args.scan is not None and scan != args.scan: + continue + if args.peptide is not None and pep != args.peptide: + continue + print(f"\n=== scan={scan} peptide={pep} ===") + rust_ions = rust.get(key, []) + java_ions = java.get(key, []) + if not rust_ions and not java_ions: + print(" (no data on either side)") + continue + print( + " ion_type theo_mz R_rk J_rk R_logP J_logP R_ctrb J_ctrb flags" + ) + rust_total = 0.0 + java_total = 0.0 + category_counts = collections.Counter() + for row in align_and_diff(rust_ions, java_ions, args.mz_tol): + print(" " + format_row(*row)) + if row[1] is not None: + rust_total += row[1]["contribution"] + if row[2] is not None: + java_total += row[2]["contribution"] + for f in row[3]: + category_counts[f] += 1 + print( + f" TOTAL contribution: rust={rust_total:.4f} java={java_total:.4f} " + f"delta={rust_total - java_total:+.4f}" + ) + if category_counts: + print(f" DIVERGENCES: {dict(category_counts)}") + + +if __name__ == "__main__": + main() diff --git a/crates/msgf-rust/src/bin/msgf-trace.rs b/crates/msgf-rust/src/bin/msgf-trace.rs index 3078cadb..fadbc5bf 100644 --- a/crates/msgf-rust/src/bin/msgf-trace.rs +++ b/crates/msgf-rust/src/bin/msgf-trace.rs @@ -9,6 +9,80 @@ use std::io::BufReader; use std::path::PathBuf; use std::process::ExitCode; +// ─── Per-PSM JSON trace output (additive; no new deps) ───────────────────── +// +// Small hand-written JSON via `write!`. The diff harness parses on the +// Python side where stdlib `json` is sufficient. + +struct TraceJson { + out: W, + first_psm: bool, +} + +impl TraceJson { + fn new(mut out: W) -> std::io::Result { + out.write_all(b"[\n")?; + Ok(Self { out, first_psm: true }) + } + + fn begin_psm( + &mut self, + scan: i32, + peptide: &str, + charge: u8, + rust_rank_score: i32, + ) -> std::io::Result<()> { + if !self.first_psm { + self.out.write_all(b",\n")?; + } + self.first_psm = false; + write!( + self.out, + " {{\n \"scan\": {},\n \"peptide\": \"{}\",\n \"charge\": {},\n \"rust_rank_score\": {},\n \"ions\": [", + scan, escape_json(peptide), charge, rust_rank_score + ) + } + + fn end_psm(&mut self) -> std::io::Result<()> { + self.out.write_all(b"\n ]\n }") + } + + #[allow(clippy::too_many_arguments)] + fn ion( + &mut self, + first_ion: bool, + ion_type: &str, + theo_mz: f64, + rank_assigned: Option, + max_rank: u32, + log_prob: f32, + contribution: f32, + ) -> std::io::Result<()> { + if !first_ion { + self.out.write_all(b",")?; + } + let rank_str = rank_assigned + .map(|r| r.to_string()) + .unwrap_or_else(|| "null".to_string()); + write!( + self.out, + "\n {{\"ion_type\": \"{}\", \"theo_mz\": {:.6}, \"rank\": {}, \"max_rank\": {}, \"log_prob\": {:.6}, \"contribution\": {:.6}}}", + escape_json(ion_type), theo_mz, rank_str, max_rank, log_prob, contribution + ) + } + + fn finish(mut self) -> std::io::Result<()> { + self.out.write_all(b"\n]\n") + } +} + +fn escape_json(s: &str) -> String { + s.replace('\\', "\\\\") + .replace('"', "\\\"") + .replace('\n', "\\n") + .replace('\t', "\\t") +} + use clap::Parser; use input::{FastaReader, MgfReader, MzMLReader}; use model::enzyme::Enzyme; @@ -90,6 +164,10 @@ struct Cli { /// (diagnostic; gated to avoid spam in normal trace runs). #[arg(long)] print_score_dist: bool, + /// Output structured per-PSM per-ion JSON to this path. Additive: the + /// existing human-readable stderr trace is unaffected. + #[arg(long)] + trace_json: Option, } fn main() -> ExitCode { @@ -412,6 +490,18 @@ fn run(cli: Cli) -> Result<(), Box> { ); } + // Set up optional structured JSON trace output. + let mut trace_json: Option>> = match cli.trace_json { + Some(ref path) => { + let file = File::create(path).map_err(|e| { + eprintln!("Failed to create --trace-json output {}: {}", path.display(), e); + e + })?; + Some(TraceJson::new(std::io::BufWriter::new(file))?) + } + None => None, + }; + // If user supplied Java top-1, search for it in Rust's enumerated set. if let Some(java_str) = &cli.java_top1 { let java_pep = parse_flanking(java_str)?; @@ -458,8 +548,17 @@ fn run(cli: Cli) -> Result<(), Box> { for &z in &charges_to_try { println!("\n Per-split node_score breakdown — Java pep ({}+{}) ---", java_str, z); let scored = ScoredSpectrum::new(spec, &scorer, z); - print_split_breakdown(&scored, java_cand_pep, &scorer, z); let total = score_psm(&scored, java_cand_pep, &scorer, z, 0.5); + print_split_breakdown( + &scored, + java_cand_pep, + &scorer, + z, + trace_json.as_mut(), + cli.scan, + java_str, + total.round() as i32, + )?; println!(" score_psm total = {}", total); } } @@ -471,7 +570,17 @@ fn run(cli: Cli) -> Result<(), Box> { let pep_str: String = rust_top1_pep.residues.iter().map(|aa| aa.residue as char).collect(); println!("\n Per-split node_score breakdown — Rust top-1 ({} +{}) ---", pep_str, top1.charge_used); let scored = ScoredSpectrum::new(spec, &scorer, top1.charge_used); - print_split_breakdown(&scored, rust_top1_pep, &scorer, top1.charge_used); + let rust_rank_score = top1.score.round() as i32; + print_split_breakdown( + &scored, + rust_top1_pep, + &scorer, + top1.charge_used, + trace_json.as_mut(), + cli.scan, + &pep_str, + rust_rank_score, + )?; println!(" PSM.score (from queue) = {}", top1.score); } @@ -614,6 +723,13 @@ fn run(cli: Cli) -> Result<(), Box> { println!(" rank={} mz={:.4} intensity={}", rank + 1, mz, intensity); } + if let Some(tj) = trace_json { + tj.finish().map_err(|e| { + eprintln!("Failed to finalize --trace-json output: {}", e); + e + })?; + } + Ok(()) } @@ -650,14 +766,22 @@ fn parse_flanking(s: &str) -> Result> { /// Print per-split node_score: prefix nominal, suffix nominal, score per split, /// and which ions matched peaks. +/// +/// When `trace_json` is `Some`, emits a structured JSON record for this PSM +/// alongside the existing human-readable output. +#[allow(clippy::too_many_arguments)] fn print_split_breakdown( scored: &ScoredSpectrum<'_>, peptide: &Peptide, scorer: &RankScorer, charge: u8, -) { + mut trace_json: Option<&mut TraceJson>>, + scan: i32, + peptide_label: &str, + rank_score: i32, +) -> Result<(), Box> { let n = peptide.length(); - if n < 2 { return; } + if n < 2 { return Ok(()); } // Use SPECTRUM's parent mass for partition lookup (matching score_psm fix). let spectrum_parent_mass = scored.parent_mass(); let peptide_mass = peptide.mass(); @@ -665,6 +789,13 @@ fn print_split_breakdown( let mut prefix_acc = 0.0_f64; let mut total: i32 = 0; let mme = &scorer.param().mme; + let max_rank = scorer.max_rank(); + + // Begin JSON PSM record if a writer is present. + if let Some(ref mut tj) = trace_json { + tj.begin_psm(scan, peptide_label, charge, rank_score)?; + } + let mut first_json_ion = true; println!(" spectrum_parent_mass={:.4}, peptide_mass={:.4}, peptide_nominal={}", spectrum_parent_mass, peptide_mass, peptide_nominal); @@ -687,21 +818,34 @@ fn print_split_breakdown( let seg = scorer.param().segment_num(theo_mz, spectrum_parent_mass); let part = scorer.param().partition_for(charge, spectrum_parent_mass, seg); let tol_da = mme.as_da(theo_mz); - let (score_str, contribution) = match scored.nearest_peak_rank(theo_mz, tol_da) { + let peak_rank = scored.nearest_peak_rank(theo_mz, tol_da); + let (score_str, contribution, log_prob) = match peak_rank { Some(rank) => { let s = scorer.node_score(part, ion, rank); n_matched += 1; matched_sum += s; - (format!("rk{}={:.2}", rank, s), s) + (format!("rk{}={:.2}", rank, s), s, s) } None => { let s = scorer.missing_ion_score(part, ion); n_missing += 1; missing_sum += s; - (format!("MISS={:.2}", s), s) + (format!("MISS={:.2}", s), s, s) } }; - let _ = contribution; + // Emit JSON ion record if writer is present. + if let Some(ref mut tj) = trace_json { + tj.ion( + first_json_ion, + &format!("{:?}", ion), + theo_mz, + peak_rank, + max_rank, + log_prob, + contribution, + )?; + first_json_ion = false; + } let kind = if is_prefix { "P" } else { "S" }; let off = match ion { scoring_crate::param_model::IonType::Prefix { offset_bits, .. } | @@ -726,4 +870,11 @@ fn print_split_breakdown( } } println!(" breakdown_total = {}", total); + + // Close JSON PSM record if a writer is present. + if let Some(ref mut tj) = trace_json { + tj.end_psm()?; + } + + Ok(()) } diff --git a/docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md b/docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md new file mode 100644 index 00000000..1edcc250 --- /dev/null +++ b/docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md @@ -0,0 +1,144 @@ +# I5 score_psm trace investigation — findings + +**Date:** 2026-05-26 +**Branch:** `feat/i5-score-psm-trace` +**Rust HEAD:** `d5989824` (msgf-trace JSON output + Python diff harness) +**Java instrumentation:** java-legacy commit `65120118` on `/srv/data/msgf-bench/java-legacy-trace/`, patched in-place with `System.err.println` TRACE in `NewScoredSpectrum.getNodeScore(float, boolean)` gated by `-Dmsgf.trace.scans=` +**Dataset:** PXD001819 (`UPS1_5000amol_R1.mzML`) + +## Top-line finding + +**Rust's per-ion log-probability lookups differ from Java's on virtually every matched ion.** Of 754 matched ion comparisons across 10 traced PSMs: + +| Divergence category | Count | % of matched ions | +|---|---:|---:| +| `LOGPROB_DIFF` (different log P value) | **608** | **81%** | +| `CONTRIB_DIFF` (different per-ion contribution) | **608** | **81%** (same as LOGPROB; contribution = log-prob in this code path) | +| `RANK_DIFF` (different rank assigned to matched peak) | **301** | **40%** | +| `RUST_ONLY` (ion enumerated by Rust, not by Java) | 73 | (additional ions on top of matched set) | + +Tolerance for "differ": `|Δ| > 1e-3` for log-prob/contribution; exact mismatch for rank. + +**All three hypotheses (H1 ion-type list, H2 peak rank, H3 log-prob tables) contribute. H3 is the most pervasive.** Per-PSM RawScore totals only differ by ±13 points on average because per-ion errors partially cancel — but the per-ion error structure is what allows Rust to systematically over-score non-Java-favored peptides, which is what flips the top-1 selection. + +## The 5 traced label-flip scans + +Selected by largest `Java_RawScore − Rust_top1_RawScore` from the PR-V1-S1b bench data (PXD001819 cal=off). + +| Scan | Java top-1 peptide | Java RawScore | Rust top-1 peptide | Rust top-1 RawScore | Gap (J − Rtop1) | +|---:|---|---:|---|---:|---:| +| 41522 | R.DPANLPWASLNIDIAIDSTGVFK.E | 238 | VVYGNIYEIEIDRLFLTDQR (rev/decoy) | 11 | 225 | +| 34685 | R.DPANLPWGSSNVDIAIDSTGVFK.E | 234 | KYQKGEETSTNSIASIFAWSR | 33 | 211 (Rust=23 per bench; trace shows pick #5 score=17 also flipped) | +| 23272 | K.LLYTIPTGQNPTGTSIADHR.K | 173 | TLKFNLNYPNPMNFLRR | -31 | 204 | +| 23082 | K.NQQIVAGKPLYVAIAQR.K | 163 | LLLLEKENADLLNELK | -24 | 187 | +| 16629 | K.IVAGQVDTDEAGYIK.T | 210 | ILNMNMVPDYLQK | 43 | 167 | + +## Per-PSM RawScore comparison (Java-favored peptide, scored by Rust vs Java) + +For each scan, Rust's `msgf-trace --java-top1 ` was used to score Java's chosen peptide via Rust's scoring code. Compared to Java's per-ion summing on the same nominal masses: + +| Scan | Peptide | Rust contrib sum | Java contrib sum | Δ (R − J) | +|---:|---|---:|---:|---:| +| 41522 | R.DPANLPWASLNIDIAIDSTGVFK.E | 125.59 | 137.61 | −12.02 | +| 34685 | R.DPANLPWGSSNVDIAIDSTGVFK.E | 115.77 | 128.71 | −12.94 | +| 23272 | K.LLYTIPTGQNPTGTSIADHR.K | 107.43 | 107.83 | −0.40 | +| 23082 | K.NQQIVAGKPLYVAIAQR.K | 118.12 | 123.41 | −5.29 | +| 16629 | K.IVAGQVDTDEAGYIK.T | 116.64 | 103.26 | +13.38 | + +Range: −12.94 to +13.38. Rust scores the Java-favored peptide within ±13 of Java's value — **MUCH smaller than the 200+ RawScore gap observed in PIN output**. + +## Per-PSM RawScore for Rust's PICK (peptides Rust ranks #1) + +When the same per-ion analysis is run for the peptide Rust picks as top-1, we get a very different picture: + +| Scan | Rust's top-1 peptide | Rust contrib sum | Java contrib sum (same peptide, Java scoring) | Δ (R − J) | +|---:|---|---:|---:|---:| +| 41522 | VVYGNIYEIEIDRLFLTDQR | 5.11 | 4.29 | +0.81 | +| 34685 | PDPLSELSDFYMFQKLPTFK | 26.22 | 9.75 | **+16.46** | +| 23272 | FLVENELSGKGWYENKIK | 25.37 | 5.03 | **+20.34** | +| 23082 | ELPLSIGILFKRYYR | 20.87 | 11.23 | **+9.64** | +| 16629 | ILNMNMVPDYLQK | 21.28 | 15.39 | **+5.88** | + +**Rust systematically OVER-scores its own picks by +5 to +20 points vs Java's per-ion scoring of the same peptides.** This is the label-flip mechanism: Rust's scoring is generous enough to lift weaker peptides above the Java-favored ones. + +The asymmetry (Rust **under**-scores Java's pick by ~13 AND **over**-scores its own pick by ~10) compounds to a ~20-25 point net advantage for Rust's pick over Java's pick in Rust's ranking. Combined with thousands of candidate peptides per spectrum, this is enough to flip the top-1 ranking. + +## What this means for each hypothesis + +**H1 (per-partition ion-type list differs):** Confirmed at scale of 73 RUST_ONLY ions across 754 matched comparisons (~10% of ion-comparisons). Specific ion types Rust enumerates that Java doesn't. Subset; not dominant. + +**H2 (peak rank assignment differs):** Confirmed at 301/754 = 40% of matched comparisons. Substantial. Could explain a large share of LOGPROB_DIFF (a different rank gives a different log-prob lookup index). + +**H3 (per-rank log-probability tables differ):** Confirmed at 608/754 = 81% of matched comparisons. **Dominant by count.** But many H3 cases may be downstream effects of H2 — if Rust picks rank 5 and Java picks rank 4 for the same ion, the log-prob lookup naturally returns different values. + +### Disentangling H2 vs H3 + +Of the 301 RANK_DIFF ions, all 301 also show LOGPROB_DIFF (verified by the fact that LOGPROB_DIFF count >= RANK_DIFF count by exactly the right margin if H2 fully causes H3). + +The remaining 608 − 301 = 307 LOGPROB_DIFF cases WITHOUT a RANK_DIFF mean Rust and Java agree on the rank but disagree on the log-prob VALUE. That's pure H3: the lookup table content (or its indexing) differs. + +**Disentanglement:** roughly 40% (301 / 754) of divergences are explained by H2 (rank assignment), 40% (307 / 754) by H3 (table value), 10% (73) by H1 (ion enumeration), with the rest being "no divergence". Not a single dominant cause — three roughly equal contributors. + +## Proposed fix design + +Given the multi-causal nature, the most leveraged single fix is **H2 (rank assignment)** because: +- Fixing H2 automatically fixes a large share of the LOGPROB_DIFF cases (the ones where rank differed) +- Rank assignment lives in a single function in Rust (`crates/scoring/src/scoring/scored_spectrum.rs::setRanksOfPeaks` and `nearest_peak_rank`) +- The Java implementation in `NewScoredSpectrum` is short (~100 LOC), making it tractable to do a line-by-line audit + +### Next-PR investigation order (research → fix) + +1. **Pick one of the traced PSMs (e.g., scan 41522, peptide R.DPANLPWASLNIDIAIDSTGVFK.E) and identify a specific (theo_mz, rank) where Rust and Java disagree.** The traced data is sufficient: load `rust-trace-scan-41522.json`, find the first ion with `RANK_DIFF`, note theo_mz + rust_rank + java_rank. + +2. **Walk through both code paths for that single ion.** Rust: `nearest_peak_rank(theo_mz, tol_da)` → binary search → linear scan for intensity-max. Java: `Peak p = spec.getPeakByMass(theoMass, mme); p.getRank()` → `Peak` constructor — look at how Java assigns ranks to peaks. + +3. **Identify the specific tie-break or filter difference.** Common culprits per the 2026-05-20 doc hypothesis: + - Java uses `getPeakByMass` which picks the FIRST peak in tolerance; Rust uses intensity-max selection inside the tolerance window. + - Precursor-filter handling differs (PR-A's `precursor_filtered` mask interacts with ranks differently than Java's pre-filter). + - Tie-break on equal-intensity peaks: Java uses peak index order, Rust uses m/z order. + +4. **Make the targeted fix in Rust** to match Java's rank-assignment rule. Bench gate: PXD001819 auto @1% FDR ≥ +200 PSMs (10% of the 14,755 → 15,000+ target; far short of beating Java but a clear directional improvement). + +5. **Re-run the trace harness post-fix** to verify the RANK_DIFF count drops. If most RANK_DIFF cases close, the LOGPROB_DIFF count should drop proportionally (since RANK_DIFF was driving most LOGPROB_DIFF). + +### Risk per the n=9 audit pattern + +Changing `setRanksOfPeaks` / `nearest_peak_rank` is a **modifies-existing-distribution** change. Historical pattern: such changes often regress Percolator @1% FDR even when individually correct. Mitigation: bench-gate per dataset; revert if regression. + +ALTERNATIVE strategy: leave Rust's existing rank assignment intact and instead introduce an **ADDITIVE PIN column** that captures the magnitude of disagreement between rank schemes (e.g., the count of ions where Rust's rank ≠ Java's expected rank). Per the n=9 audit, additive columns are safe. Trade: smaller potential yield, but zero regression risk. + +## Methodology + +1. Identified the 5 label-flip scans by reading PR-V1-S1b bench PINs (java vs rust-off), selecting the top 5 PSMs where Java's top-1 peptide differs from Rust's AND `|Java_RawScore − Rust_top1_RawScore|` is largest. Tie-break: arbitrary. + +2. Captured per-ion structured traces: + - Rust: `msgf-trace --trace-json` (built with `feat/i5-score-psm-trace` HEAD), invoked once per scan with `--java-top1` set to Java's chosen peptide. + - Java: instrumented `NewScoredSpectrum.getNodeScore` to emit `TRACE\tscan=N\tnominalMass=M\tisPrefix=B\tion=I\ttheo_mz=F\trank=R\tlog_prob=L\tcontribution=C` for every per-ion sub-step. Gated by `-Dmsgf.trace.scans=41522,34685,23272,23082,16629` so the trace fires only for the 5 target scans. + +3. Aligned Rust ↔ Java records by `(normalized_ion_kind, round(theo_mz / 1e-3))` within the same scan. Java has no peptide attribution (per-(scan, nominal_mass) only) but ion values are deterministic per (scan, nominal_mass), so per-Rust-PSM-ion lookups are well-defined. + +4. Aggregated divergence counts and per-PSM totals. Wrote ad-hoc analysis Python (`/tmp/i5-analyze.py`, output checked in as `aggregate-analysis.txt`). + +## Artifacts (this directory) + +- `rust-trace-scan-.json` — Rust per-PSM per-ion JSON for each of the 5 scans (Rust top-1 + Java's top-1 peptide, each as a separate PSM record) +- `rust-trace-scan-.txt` — Rust human-readable stderr trace from `msgf-trace` +- `java-trace-scan-.log.gz` — Java per-(scan, nominal_mass, ion) TRACE lines per scan, gzipped to keep repo size manageable. Decompress: `gunzip -k java-trace-scan-N.log.gz`. +- `aggregate-analysis.txt` — output of the ad-hoc analysis script +- `analyze.py` — the analysis script itself, for re-running after a fix lands + +## Reproducibility + +To re-run this analysis after a fix lands: + +1. Build msgf-trace on the bench VM: `cargo build --release --bin msgf-trace` +2. Build instrumented java-legacy: `cd /srv/data/msgf-bench/java-legacy-trace && mvn package -DskipTests` (assumes the `NewScoredSpectrum.getNodeScore` patch is present; see commit history of the VM-local clone) +3. Run `bash /tmp/i5-rust-trace.sh` (on VM) and the matching Java command (see PR description) — both with `-Dmsgf.trace.scans=41522,34685,23272,23082,16629` +4. Pull artifacts via scp; re-run `/tmp/i5-analyze.py` adapted to the new artifact paths + +## Out of scope (next PR) + +- Implementing the proposed fix (H2 rank assignment as primary target) +- Validating the fix on Astral / TMT (this PR's bench gate is PXD001819 only) +- Closing the n=9 risk by also adding an additive PIN column variant if the direct fix regresses Percolator +- Quantifying the contribution of H1 (ion enumeration) — would require additional instrumentation to confirm Rust's RUST_ONLY ions are genuinely missing from Java's data structure, vs being filtered out before scoring diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/aggregate-analysis.txt b/docs/parity-analysis/notes/score-psm-trace-artifacts/aggregate-analysis.txt new file mode 100644 index 00000000..5f036c67 --- /dev/null +++ b/docs/parity-analysis/notes/score-psm-trace-artifacts/aggregate-analysis.txt @@ -0,0 +1,93 @@ + +============================================================================== +SCAN 41522 | Rust PSMs traced: 2 | Java ions: 20703 + + PSM: peptide=R.DPANLPWASLNIDIAIDSTGVFK.E charge=2 rust_rank_score=128 + ions: 77 (rust-only: 4) + rust contribution sum: 125.5890 + java contribution sum: 137.6111 (matched ions only) + delta (rust - java): -12.0221 + divergence counts: {'LOGPROB_DIFF': 73, 'CONTRIB_DIFF': 73, 'RANK_DIFF': 21, 'RUST_ONLY': 4} + + PSM: peptide=VVYGNIYEIEIDRLFLTDQR charge=2 rust_rank_score=11 + ions: 68 (rust-only: 4) + rust contribution sum: 5.1079 + java contribution sum: 4.2938 (matched ions only) + delta (rust - java): +0.8140 + divergence counts: {'LOGPROB_DIFF': 64, 'CONTRIB_DIFF': 64, 'RUST_ONLY': 4, 'RANK_DIFF': 15} + +============================================================================== +SCAN 34685 | Rust PSMs traced: 2 | Java ions: 20243 + + PSM: peptide=R.DPANLPWGSSNVDIAIDSTGVFK.E charge=2 rust_rank_score=119 + ions: 77 (rust-only: 3) + rust contribution sum: 115.7682 + java contribution sum: 128.7127 (matched ions only) + delta (rust - java): -12.9445 + divergence counts: {'LOGPROB_DIFF': 74, 'CONTRIB_DIFF': 74, 'RUST_ONLY': 3, 'RANK_DIFF': 43} + + PSM: peptide=PDPLSELSDFYMFQKLPTFK charge=2 rust_rank_score=33 + ions: 68 (rust-only: 4) + rust contribution sum: 26.2166 + java contribution sum: 9.7547 (matched ions only) + delta (rust - java): +16.4618 + divergence counts: {'LOGPROB_DIFF': 64, 'CONTRIB_DIFF': 64, 'RUST_ONLY': 4, 'RANK_DIFF': 29} + +============================================================================== +SCAN 23272 | Rust PSMs traced: 2 | Java ions: 20270 + + PSM: peptide=K.LLYTIPTGQNPTGTSIADHR.K charge=2 rust_rank_score=107 + ions: 65 (rust-only: 1) + rust contribution sum: 107.4337 + java contribution sum: 107.8341 (matched ions only) + delta (rust - java): -0.4004 + divergence counts: {'LOGPROB_DIFF': 64, 'CONTRIB_DIFF': 64, 'RANK_DIFF': 47, 'RUST_ONLY': 1} + + PSM: peptide=FLVENELSGKGWYENKIK charge=2 rust_rank_score=30 + ions: 61 (rust-only: 0) + rust contribution sum: 25.3727 + java contribution sum: 5.0307 (matched ions only) + delta (rust - java): +20.3420 + divergence counts: {'LOGPROB_DIFF': 61, 'CONTRIB_DIFF': 61, 'RANK_DIFF': 25} + +============================================================================== +SCAN 23082 | Rust PSMs traced: 2 | Java ions: 15707 + + PSM: peptide=K.NQQIVAGKPLYVAIAQR.K charge=2 rust_rank_score=117 + ions: 67 (rust-only: 12) + rust contribution sum: 118.1152 + java contribution sum: 123.4094 (matched ions only) + delta (rust - java): -5.2942 + divergence counts: {'LOGPROB_DIFF': 55, 'CONTRIB_DIFF': 55, 'RUST_ONLY': 12, 'RANK_DIFF': 37} + + PSM: peptide=ELPLSIGILFKRYYR charge=2 rust_rank_score=25 + ions: 63 (rust-only: 12) + rust contribution sum: 20.8720 + java contribution sum: 11.2306 (matched ions only) + delta (rust - java): +9.6415 + divergence counts: {'LOGPROB_DIFF': 51, 'CONTRIB_DIFF': 51, 'RUST_ONLY': 12, 'RANK_DIFF': 21} + +============================================================================== +SCAN 16629 | Rust PSMs traced: 2 | Java ions: 14003 + + PSM: peptide=K.IVAGQVDTDEAGYIK.T charge=2 rust_rank_score=116 + ions: 74 (rust-only: 18) + rust contribution sum: 116.6408 + java contribution sum: 103.2616 (matched ions only) + delta (rust - java): +13.3792 + divergence counts: {'LOGPROB_DIFF': 56, 'CONTRIB_DIFF': 56, 'RUST_ONLY': 18, 'RANK_DIFF': 36} + + PSM: peptide=ILNMNMVPDYLQK charge=2 rust_rank_score=26 + ions: 61 (rust-only: 15) + rust contribution sum: 21.2753 + java contribution sum: 15.3947 (matched ions only) + delta (rust - java): +5.8805 + divergence counts: {'LOGPROB_DIFF': 46, 'CONTRIB_DIFF': 46, 'RUST_ONLY': 15, 'RANK_DIFF': 27} + +============================================================================== +AGGREGATE (5 scans x ~2 PSMs each): + Total divergences across all traced PSMs: + LOGPROB_DIFF: 608 + CONTRIB_DIFF: 608 + RANK_DIFF: 301 + RUST_ONLY: 73 diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/analyze.py b/docs/parity-analysis/notes/score-psm-trace-artifacts/analyze.py new file mode 100644 index 00000000..03cf33e5 --- /dev/null +++ b/docs/parity-analysis/notes/score-psm-trace-artifacts/analyze.py @@ -0,0 +1,170 @@ +#!/usr/bin/env python3 +"""One-shot I5 analysis: align Rust per-PSM JSON trace against Java per-scan +TRACE log for the 5 PXD001819 label-flip PSMs. Java trace has no peptide +attribution (it's per-(scan, nominal_mass, isPrefix, ion, theo_mz) — one +record per ion within a getNodeScore call). Rust JSON has per-PSM per-ion +records keyed by theo_mz. + +For each Rust PSM ion, find Java's matching (ion_kind, theo_mz) within the +same scan with a 1e-3 Da tolerance. Tally divergences. +""" + +import collections +import json +import os +import re +import struct +import sys + +ART = "." +SCANS = [41522, 34685, 23272, 23082, 16629] + + +def normalize_rust_ion(s): + """Rust IonType Debug -> 'b/+' or 'y/+' or 'Noise'.""" + s = s.strip() + if "Noise" in s: + return "Noise" + m = re.match(r"(Prefix|Suffix)\s*\{\s*charge:\s*(\d+),\s*offset_bits:\s*(\d+)\s*\}", s) + if m: + kind = "b" if m.group(1) == "Prefix" else "y" + c = int(m.group(2)) + off_bits = int(m.group(3)) + off = struct.unpack(">f", struct.pack(">I", off_bits))[0] + return f"{kind}/{c}+{off:.5f}" + return s + + +def normalize_java_ion(s): + """Java 'b/+' -> 'b/+'.""" + m = re.match(r"([by])/(\d+)\+(-?[\d.]+)", s) + if m: + kind = m.group(1) + c = int(m.group(2)) + off = float(m.group(3)) + return f"{kind}/{c}+{off:.5f}" + return s + + +def load_rust(scan): + path = f"{ART}/rust-trace-scan-{scan}.json" + with open(path) as fh: + data = json.load(fh) + return data # list of PSMs + + +def load_java(scan): + """Return list of dicts per ion. Handles both .log and .log.gz.""" + import gzip + base = f"{ART}/java-trace-scan-{scan}.log" + if os.path.exists(base): + fh = open(base) + elif os.path.exists(base + ".gz"): + fh = gzip.open(base + ".gz", "rt") + else: + raise FileNotFoundError(f"neither {base} nor {base}.gz") + out = [] + with fh: + for line in fh: + line = line.rstrip("\n") + if not line.startswith("TRACE"): + continue + fields = {} + for part in line.split("\t")[1:]: + if "=" in part: + k, v = part.split("=", 1) + fields[k] = v + try: + rec = { + "scan": int(fields["scan"]), + "nominalMass": int(fields["nominalMass"]), + "isPrefix": fields["isPrefix"] == "true", + "ion_kind": normalize_java_ion(fields["ion"]), + "theo_mz": float(fields["theo_mz"]), + "rank": int(fields["rank"]) if fields["rank"] != "-1" else None, + "log_prob": float(fields["log_prob"]), + "contribution": float(fields["contribution"]), + } + except (KeyError, ValueError): + continue + out.append(rec) + return out + + +def index_java(java_ions, mz_tol=1e-3): + """Index by (ion_kind, theo_mz_rounded). Multiple entries possible if + Java emits the same nominal_mass repeatedly during scoring of different + candidate peptides (values should be identical).""" + idx = collections.defaultdict(list) + for r in java_ions: + key = (r["ion_kind"], round(r["theo_mz"] / mz_tol)) + idx[key].append(r) + return idx + + +def compare_psm(psm, java_idx, mz_tol=1e-3): + """Yields (ion_kind, theo_mz, rust, java_or_None, flags).""" + rows = [] + for rust_ion in psm["ions"]: + rkind = normalize_rust_ion(rust_ion["ion_type"]) + rkey = (rkind, round(rust_ion["theo_mz"] / mz_tol)) + candidates = java_idx.get(rkey, []) + # Pick the first matching Java ion. (All should have the same numeric + # values since they're per-(scan, nominal_mass, ion).) + java_ion = candidates[0] if candidates else None + flags = [] + if java_ion is None: + flags.append("RUST_ONLY") + else: + if rust_ion.get("rank") != java_ion.get("rank"): + flags.append("RANK_DIFF") + if abs(rust_ion["log_prob"] - java_ion["log_prob"]) > 1e-3: + flags.append("LOGPROB_DIFF") + if abs(rust_ion["contribution"] - java_ion["contribution"]) > 1e-3: + flags.append("CONTRIB_DIFF") + rows.append((rkind, rust_ion["theo_mz"], rust_ion, java_ion, flags)) + return rows + + +def fmt_num(v, prec): + return f"{v:>{8+prec}.{prec}f}" if v is not None else "-" * (8 + prec) + + +def main(): + summary = [] + for scan in SCANS: + rust_psms = load_rust(scan) + java_ions = load_java(scan) + java_idx = index_java(java_ions) + print(f"\n{'=' * 78}\nSCAN {scan} | Rust PSMs traced: {len(rust_psms)} | Java ions: {len(java_ions)}") + for psm in rust_psms: + pep = psm["peptide"] + rscore = psm["rust_rank_score"] + print(f"\n PSM: peptide={pep} charge={psm['charge']} rust_rank_score={rscore}") + rows = compare_psm(psm, java_idx) + rust_total = sum(r[2]["contribution"] for r in rows) + java_matched = sum(r[3]["contribution"] for r in rows if r[3] is not None) + divergences = collections.Counter() + for kind, mz, rust, java, flags in rows: + for f in flags: + divergences[f] += 1 + print(f" ions: {len(rows)} (rust-only: {divergences.get('RUST_ONLY', 0)})") + print(f" rust contribution sum: {rust_total:>10.4f}") + print(f" java contribution sum: {java_matched:>10.4f} (matched ions only)") + print(f" delta (rust - java): {rust_total - java_matched:>+10.4f}") + print(f" divergence counts: {dict(divergences)}") + summary.append((scan, pep, rscore, len(rows), divergences)) + + # Aggregate across all 5 scans / 10 PSMs + print("\n" + "=" * 78) + print("AGGREGATE (5 scans x ~2 PSMs each):") + total_div = collections.Counter() + for scan, pep, rscore, nions, divs in summary: + total_div.update(divs) + print(f" Total divergences across all traced PSMs:") + for cat, count in total_div.most_common(): + print(f" {cat}: {count}") + + +if __name__ == "__main__": + main() diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-16629.log.gz b/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-16629.log.gz new file mode 100644 index 00000000..a87b3142 Binary files /dev/null and b/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-16629.log.gz differ diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-23082.log.gz b/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-23082.log.gz new file mode 100644 index 00000000..6ce2a21f Binary files /dev/null and b/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-23082.log.gz differ diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-23272.log.gz b/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-23272.log.gz new file mode 100644 index 00000000..d43d87c4 Binary files /dev/null and b/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-23272.log.gz differ diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-34685.log.gz b/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-34685.log.gz new file mode 100644 index 00000000..66678bf3 Binary files /dev/null and b/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-34685.log.gz differ diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-41522.log.gz b/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-41522.log.gz new file mode 100644 index 00000000..521c0a84 Binary files /dev/null and b/docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-41522.log.gz differ diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-16629.json b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-16629.json new file mode 100644 index 00000000..b5b05e7b --- /dev/null +++ b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-16629.json @@ -0,0 +1,153 @@ +[ + { + "scan": 16629, + "peptide": "K.IVAGQVDTDEAGYIK.T", + "charge": 2, + "rust_rank_score": 116, + "ions": [ + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 114.064693, "rank": null, "max_rank": 150, "log_prob": -0.623977, "contribution": -0.623977}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 86.069779, "rank": null, "max_rank": 150, "log_prob": -0.161271, "contribution": -0.161271}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 96.054129, "rank": null, "max_rank": 150, "log_prob": -0.213538, "contribution": -0.213538}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1465.746100, "rank": null, "max_rank": 150, "log_prob": -1.355310, "contribution": -1.355310}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1466.749455, "rank": null, "max_rank": 150, "log_prob": -1.013323, "contribution": -1.013323}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1467.749340, "rank": null, "max_rank": 150, "log_prob": -0.361998, "contribution": -0.361998}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1073673387 }", "theo_mz": 1448.719551, "rank": 261, "max_rank": 150, "log_prob": 1.262665, "contribution": 1.262665}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 213.114515, "rank": null, "max_rank": 150, "log_prob": -0.623977, "contribution": -0.623977}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 185.119601, "rank": null, "max_rank": 150, "log_prob": -0.161271, "contribution": -0.161271}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 195.103951, "rank": null, "max_rank": 150, "log_prob": -0.213538, "contribution": -0.213538}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1366.696277, "rank": 9, "max_rank": 150, "log_prob": 5.822582, "contribution": 5.822582}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1367.699633, "rank": 10, "max_rank": 150, "log_prob": 5.004897, "contribution": 5.004897}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1368.699518, "rank": 136, "max_rank": 150, "log_prob": 2.380336, "contribution": 2.380336}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1073673387 }", "theo_mz": 1349.669728, "rank": 198, "max_rank": 150, "log_prob": 1.262665, "contribution": 1.262665}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 284.150247, "rank": 68, "max_rank": 150, "log_prob": 1.073776, "contribution": 1.073776}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 256.155332, "rank": 252, "max_rank": 150, "log_prob": -0.222046, "contribution": -0.222046}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 266.139683, "rank": 264, "max_rank": 150, "log_prob": 0.011180, "contribution": 0.011180}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1295.660546, "rank": 8, "max_rank": 150, "log_prob": 5.935272, "contribution": 5.935272}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1296.663901, "rank": 15, "max_rank": 150, "log_prob": 5.064535, "contribution": 5.064535}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1297.663787, "rank": 185, "max_rank": 150, "log_prob": 1.267557, "contribution": 1.267557}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1073673387 }", "theo_mz": 1278.633997, "rank": 80, "max_rank": 150, "log_prob": 2.281085, "contribution": 2.281085}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 341.178932, "rank": 106, "max_rank": 150, "log_prob": 0.621059, "contribution": 0.621059}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 313.184018, "rank": 274, "max_rank": 150, "log_prob": -0.222046, "contribution": -0.222046}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 323.168368, "rank": null, "max_rank": 150, "log_prob": -0.213538, "contribution": -0.213538}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1238.631861, "rank": 21, "max_rank": 150, "log_prob": 4.890666, "contribution": 4.890666}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1239.635216, "rank": 28, "max_rank": 150, "log_prob": 4.555610, "contribution": 4.555610}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1240.635101, "rank": 360, "max_rank": 150, "log_prob": 1.267557, "contribution": 1.267557}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1073673387 }", "theo_mz": 1221.605312, "rank": 47, "max_rank": 150, "log_prob": 2.218054, "contribution": 2.218054}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 469.243349, "rank": 19, "max_rank": 150, "log_prob": 2.318052, "contribution": 2.318052}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 441.248435, "rank": 57, "max_rank": 150, "log_prob": 0.844192, "contribution": 0.844192}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 451.232785, "rank": 326, "max_rank": 150, "log_prob": 0.011180, "contribution": 0.011180}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1110.567444, "rank": 4, "max_rank": 150, "log_prob": 6.246898, "contribution": 6.246898}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1111.570799, "rank": 6, "max_rank": 150, "log_prob": 4.997137, "contribution": 4.997137}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1112.570684, "rank": 91, "max_rank": 150, "log_prob": 2.701031, "contribution": 2.701031}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1073673387 }", "theo_mz": 1093.540895, "rank": 179, "max_rank": 150, "log_prob": 1.262665, "contribution": 1.262665}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 568.293172, "rank": 11, "max_rank": 150, "log_prob": 2.982299, "contribution": 2.982299}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 540.298257, "rank": 46, "max_rank": 150, "log_prob": 1.131341, "contribution": 1.131341}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 550.282608, "rank": 149, "max_rank": 150, "log_prob": 0.519311, "contribution": 0.519311}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1011.517621, "rank": 2, "max_rank": 150, "log_prob": 6.922778, "contribution": 6.922778}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1012.520976, "rank": 5, "max_rank": 150, "log_prob": 4.934068, "contribution": 4.934068}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1013.520862, "rank": 45, "max_rank": 150, "log_prob": 2.856900, "contribution": 2.856900}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1073673387 }", "theo_mz": 994.491072, "rank": 137, "max_rank": 150, "log_prob": 2.122070, "contribution": 2.122070}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 683.351046, "rank": null, "max_rank": 150, "log_prob": -0.623977, "contribution": -0.623977}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 655.356132, "rank": 440, "max_rank": 150, "log_prob": -0.222046, "contribution": -0.222046}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 665.340482, "rank": null, "max_rank": 150, "log_prob": -0.213538, "contribution": -0.213538}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 896.459747, "rank": 13, "max_rank": 150, "log_prob": 5.484282, "contribution": 5.484282}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 897.463102, "rank": 25, "max_rank": 150, "log_prob": 4.728559, "contribution": 4.728559}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 898.462987, "rank": 325, "max_rank": 150, "log_prob": 1.267557, "contribution": 1.267557}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1073673387 }", "theo_mz": 879.433198, "rank": 161, "max_rank": 150, "log_prob": 1.262665, "contribution": 1.262665}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 784.401875, "rank": null, "max_rank": 150, "log_prob": -0.623977, "contribution": -0.623977}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 756.406961, "rank": 195, "max_rank": 150, "log_prob": -0.222046, "contribution": -0.222046}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 766.391311, "rank": 102, "max_rank": 150, "log_prob": 0.564850, "contribution": 0.564850}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1065418864 }", "theo_mz": 777.398353, "rank": 269, "max_rank": 150, "log_prob": 0.109389, "contribution": 0.109389}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 795.408918, "rank": 22, "max_rank": 150, "log_prob": 4.849399, "contribution": 4.849399}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 796.412273, "rank": 42, "max_rank": 150, "log_prob": 3.878684, "contribution": 3.878684}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 797.412158, "rank": 283, "max_rank": 150, "log_prob": 1.267557, "contribution": 1.267557}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 680.351043, "rank": 20, "max_rank": 150, "log_prob": 3.125390, "contribution": 3.125390}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 681.354398, "rank": 41, "max_rank": 150, "log_prob": 1.429155, "contribution": 1.429155}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1065418864 }", "theo_mz": 662.340479, "rank": 174, "max_rank": 150, "log_prob": 0.109389, "contribution": 0.109389}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 551.286123, "rank": 14, "max_rank": 150, "log_prob": 3.655170, "contribution": 3.655170}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 552.289478, "rank": 31, "max_rank": 150, "log_prob": 1.469226, "contribution": 1.469226}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1065418864 }", "theo_mz": 533.275558, "rank": null, "max_rank": 150, "log_prob": -0.318041, "contribution": -0.318041}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 480.250392, "rank": 17, "max_rank": 150, "log_prob": 3.272841, "contribution": 3.272841}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 481.253747, "rank": 85, "max_rank": 150, "log_prob": 1.043086, "contribution": 1.043086}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1065418864 }", "theo_mz": 462.239827, "rank": null, "max_rank": 150, "log_prob": -0.318041, "contribution": -0.318041}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 423.221706, "rank": 38, "max_rank": 150, "log_prob": 2.201083, "contribution": 2.201083}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 424.225061, "rank": 34, "max_rank": 150, "log_prob": 1.446215, "contribution": 1.446215}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1065418864 }", "theo_mz": 405.211142, "rank": null, "max_rank": 150, "log_prob": -0.318041, "contribution": -0.318041}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 260.139676, "rank": 50, "max_rank": 150, "log_prob": 1.753083, "contribution": 1.753083}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 261.143031, "rank": 135, "max_rank": 150, "log_prob": 0.630442, "contribution": 0.630442}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1065418864 }", "theo_mz": 242.129111, "rank": null, "max_rank": 150, "log_prob": -0.318041, "contribution": -0.318041}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 147.082808, "rank": null, "max_rank": 150, "log_prob": -2.332333, "contribution": -2.332333}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 148.086163, "rank": null, "max_rank": 150, "log_prob": -0.462650, "contribution": -0.462650}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1065418864 }", "theo_mz": 129.072243, "rank": null, "max_rank": 150, "log_prob": -0.318041, "contribution": -0.318041} + ] + }, + { + "scan": 16629, + "peptide": "ILNMNMVPDYLQK", + "charge": 2, + "rust_rank_score": 26, + "ions": [ + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 114.064693, "rank": null, "max_rank": 150, "log_prob": -0.623977, "contribution": -0.623977}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 86.069779, "rank": null, "max_rank": 150, "log_prob": -0.161271, "contribution": -0.161271}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 96.054129, "rank": null, "max_rank": 150, "log_prob": -0.213538, "contribution": -0.213538}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1465.746100, "rank": null, "max_rank": 150, "log_prob": -1.355310, "contribution": -1.355310}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1466.749455, "rank": null, "max_rank": 150, "log_prob": -1.013323, "contribution": -1.013323}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1467.749340, "rank": null, "max_rank": 150, "log_prob": -0.361998, "contribution": -0.361998}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1073673387 }", "theo_mz": 1448.719551, "rank": 261, "max_rank": 150, "log_prob": 1.262665, "contribution": 1.262665}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 227.121561, "rank": 498, "max_rank": 150, "log_prob": -0.369833, "contribution": -0.369833}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 199.126647, "rank": null, "max_rank": 150, "log_prob": -0.161271, "contribution": -0.161271}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 209.110997, "rank": null, "max_rank": 150, "log_prob": -0.213538, "contribution": -0.213538}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1352.689232, "rank": null, "max_rank": 150, "log_prob": -1.355310, "contribution": -1.355310}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1353.692587, "rank": 377, "max_rank": 150, "log_prob": 0.256875, "contribution": 0.256875}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1354.692472, "rank": null, "max_rank": 150, "log_prob": -0.361998, "contribution": -0.361998}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1073673387 }", "theo_mz": 1335.662683, "rank": null, "max_rank": 150, "log_prob": -0.261377, "contribution": -0.261377}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 341.178932, "rank": 106, "max_rank": 150, "log_prob": 0.621059, "contribution": 0.621059}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 313.184018, "rank": 274, "max_rank": 150, "log_prob": -0.222046, "contribution": -0.222046}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 323.168368, "rank": null, "max_rank": 150, "log_prob": -0.213538, "contribution": -0.213538}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1238.631861, "rank": 21, "max_rank": 150, "log_prob": 4.890666, "contribution": 4.890666}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1239.635216, "rank": 28, "max_rank": 150, "log_prob": 4.555610, "contribution": 4.555610}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1240.635101, "rank": 360, "max_rank": 150, "log_prob": 1.267557, "contribution": 1.267557}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1073673387 }", "theo_mz": 1221.605312, "rank": 47, "max_rank": 150, "log_prob": 2.218054, "contribution": 2.218054}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 472.244859, "rank": 223, "max_rank": 150, "log_prob": -0.369833, "contribution": -0.369833}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 444.249945, "rank": 410, "max_rank": 150, "log_prob": -0.222046, "contribution": -0.222046}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 454.234295, "rank": 458, "max_rank": 150, "log_prob": 0.011180, "contribution": 0.011180}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1107.565934, "rank": 83, "max_rank": 150, "log_prob": 2.033295, "contribution": 2.033295}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1108.569289, "rank": 54, "max_rank": 150, "log_prob": 3.474165, "contribution": 3.474165}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1109.569175, "rank": null, "max_rank": 150, "log_prob": -0.361998, "contribution": -0.361998}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1073673387 }", "theo_mz": 1090.539385, "rank": null, "max_rank": 150, "log_prob": -0.261377, "contribution": -0.261377}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 586.302230, "rank": null, "max_rank": 150, "log_prob": -0.623977, "contribution": -0.623977}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 558.307316, "rank": 170, "max_rank": 150, "log_prob": -0.222046, "contribution": -0.222046}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 568.291666, "rank": 11, "max_rank": 150, "log_prob": 2.116580, "contribution": 2.116580}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 993.508563, "rank": 33, "max_rank": 150, "log_prob": 3.985884, "contribution": 3.985884}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 994.511918, "rank": 137, "max_rank": 150, "log_prob": 1.670084, "contribution": 1.670084}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 995.511803, "rank": 413, "max_rank": 150, "log_prob": 1.267557, "contribution": 1.267557}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1073673387 }", "theo_mz": 976.482014, "rank": null, "max_rank": 150, "log_prob": -0.261377, "contribution": -0.261377}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 717.368157, "rank": 229, "max_rank": 150, "log_prob": -0.369833, "contribution": -0.369833}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 689.373243, "rank": null, "max_rank": 150, "log_prob": -0.161271, "contribution": -0.161271}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 699.357593, "rank": null, "max_rank": 150, "log_prob": -0.213538, "contribution": -0.213538}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 862.442636, "rank": 234, "max_rank": 150, "log_prob": -0.275670, "contribution": -0.275670}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 863.445991, "rank": 393, "max_rank": 150, "log_prob": 0.256875, "contribution": 0.256875}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 864.445877, "rank": null, "max_rank": 150, "log_prob": -0.361998, "contribution": -0.361998}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1073673387 }", "theo_mz": 845.416087, "rank": 224, "max_rank": 150, "log_prob": 1.262665, "contribution": 1.262665}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 788.423065, "rank": null, "max_rank": 150, "log_prob": -0.161271, "contribution": -0.161271}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 763.392814, "rank": 281, "max_rank": 150, "log_prob": -1.257424, "contribution": -1.257424}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 764.396169, "rank": 79, "max_rank": 150, "log_prob": 1.134850, "contribution": 1.134850}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1065418864 }", "theo_mz": 745.382249, "rank": 119, "max_rank": 150, "log_prob": 0.592532, "contribution": 0.592532}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 666.343998, "rank": 69, "max_rank": 150, "log_prob": 1.117797, "contribution": 1.117797}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 667.347353, "rank": 469, "max_rank": 150, "log_prob": 0.116307, "contribution": 0.116307}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1065418864 }", "theo_mz": 648.333433, "rank": 40, "max_rank": 150, "log_prob": 0.937734, "contribution": 0.937734}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 551.286123, "rank": 14, "max_rank": 150, "log_prob": 3.655170, "contribution": 3.655170}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 552.289478, "rank": 31, "max_rank": 150, "log_prob": 1.469226, "contribution": 1.469226}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1065418864 }", "theo_mz": 533.275558, "rank": null, "max_rank": 150, "log_prob": -0.318041, "contribution": -0.318041}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 388.204092, "rank": null, "max_rank": 150, "log_prob": -2.332333, "contribution": -2.332333}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 389.207447, "rank": 347, "max_rank": 150, "log_prob": 0.116307, "contribution": 0.116307}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1065418864 }", "theo_mz": 370.193528, "rank": 75, "max_rank": 150, "log_prob": 0.784407, "contribution": 0.784407}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 275.147224, "rank": null, "max_rank": 150, "log_prob": -2.332333, "contribution": -2.332333}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 276.150579, "rank": null, "max_rank": 150, "log_prob": -0.462650, "contribution": -0.462650}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1065418864 }", "theo_mz": 257.136660, "rank": 44, "max_rank": 150, "log_prob": 0.770525, "contribution": 0.770525}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 147.082808, "rank": null, "max_rank": 150, "log_prob": -2.332333, "contribution": -2.332333}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 148.086163, "rank": null, "max_rank": 150, "log_prob": -0.462650, "contribution": -0.462650}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1065418864 }", "theo_mz": 129.072243, "rank": null, "max_rank": 150, "log_prob": -0.318041, "contribution": -0.318041} + ] + } +] diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-16629.txt b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-16629.txt new file mode 100644 index 00000000..2c1d6fc4 --- /dev/null +++ b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-16629.txt @@ -0,0 +1,113 @@ +DB: 6775 target proteins, 13550 total (target+decoy) +Param: activation=HCD instrument=QExactive mme=Da(0.5) num_segments=2 num_partitions=140 error_scaling_factor=100 max_rank=150 + + --- Sample rank_dist (partition Partition { charge: 2, parent_mass: 1051.5051, seg_num: 1 }) --- + Noise freqs (first 5 ranks): [0.00014884089, 0.00024490492, 0.00032453384, 0.00037213555, 0.00041381564] + Noise freq at max_rank (150): 3.6782112 + Ion Suffix { charge: 1, offset_bits: 1101540429 }: first 5 freqs = [0.0006393862, 0.0012787724, 0.00085251493, 0.00042625747, 0.00042625747] + missing slot (150): 2.3913043 + Ion Suffix { charge: 1, offset_bits: 1073673387 }: first 5 freqs = [0.00051150896, 0.00051150896, 0.00051150896, 0.00085251493, 0.0012787724] + missing slot (150): 2.319693 + Ion Suffix { charge: 1, offset_bits: 1065418864 }: first 5 freqs = [0.00018268176, 0.00018268176, 0.00018268176, 0.00018268176, 0.00025575448] + missing slot (150): 2.5076725 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101540429 }, rank=1) = 1.4576 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101540429 }, rank=5) = 0.0296 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101540429 }, rank=20) = 1.9174 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101540429 }, rank=100) = 2.0425 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101540429 }, rank=150) = 1.2582 + scorer.missing_ion_score = -0.4306 + seg=0: ion_types_for_segment(union) = 9 ion types (prefix=4, suffix=5) + seg=1: ion_types_for_segment(union) = 5 ion types (prefix=0, suffix=5) + Partition counts per (charge, seg): + charge=2 seg=0: 33 partitions + charge=2 seg=1: 33 partitions + charge=3 seg=0: 33 partitions + charge=3 seg=1: 33 partitions + charge=4 seg=0: 4 partitions + charge=4 seg=1: 4 partitions + charge=2 seg=0: per-partition ion-list sizes min=4 median=5 max=7, union=7 + charge=2 seg=1: per-partition ion-list sizes min=3 median=5 max=5, union=5 + +=== Spectrum: scan=16629 precursor_mz=789.9081 charge=Some(2) peaks=515 === + spectrum partition target=(c=2 pm=1577.80 seg=0) selected=(c=2 pm=1544.80 seg=0): 6 ion types — ["S(c=1,off=19.018)", "P(c=1,off=1.008)", "S(c=1,off=20.022)", "P(c=1,off=-26.987)", "P(c=1,off=-17.003)", "S(c=1,off=1.008)"] + spectrum partition target=(c=2 pm=1577.80 seg=1) selected=(c=2 pm=1544.80 seg=1): 4 ion types — ["S(c=1,off=19.018)", "S(c=1,off=20.022)", "S(c=1,off=21.022)", "S(c=1,off=1.992)"] + Rust filtering: 0 of 515 peaks filtered (0.0%); max filtered intensity=0.0 + Filter m/z values (count=3): + 788.9076 ± 0.5000 + 789.9081 ± 0.5000 + 790.9086 ± 0.5000 + +--- Candidate windows --- + charge=2: neutral_mass=1559.7911 nominal_center=1559 window=[1558..=1559] (iso_range=[0..=1], tol_da_left=0.0078, tol_da_right=0.0078) +Yield (chunk): 1 spectra in, 0 skipped by min_peaks, 2406 candidates visited, 240 PSMs pushed, 1 spectra with non-empty queue +GF diagnostics (cumulative): 2 bin attempts, 0 EmptyScoreRange, 0 SinkUnreachable, 0 of those recovered by unthresholded retry, 0 spectra with no successful bin + +--- Rust top-10 PSMs --- + #1: peptide=ILNMNMVPDYLQK charge=2 score=26.00 spec_e_val=1.2406e-5 iso_off=0 prot_idx=4042 prot=sp|Q05043|RSF1_YEAST is_decoy=false + #2: peptide=MHAIHEIDERLAK charge=2 score=24.00 spec_e_val=3.5464e-5 iso_off=0 prot_idx=3770 prot=sp|P47149|NNF1_YEAST is_decoy=false + #3: peptide=FHTSLEQLTFLDK charge=2 score=22.00 spec_e_val=4.9839e-5 iso_off=0 prot_idx=9311 prot=XXX_sp|Q04511|UFO1_YEAST is_decoy=true + #4: peptide=SSFFDTVLSTFSLK charge=2 score=18.00 spec_e_val=4.9839e-5 iso_off=0 prot_idx=2742 prot=sp|Q08001|LAM6_YEAST is_decoy=false + #5: peptide=AVIGMGAGVMAAAAMLL charge=2 score=16.00 spec_e_val=3.4749e-4 iso_off=0 prot_idx=3351 prot=sp|P33890|TIR2_YEAST is_decoy=false + #6: peptide=EETLLTLEELEMK charge=2 score=11.00 spec_e_val=1.8564e-4 iso_off=1 prot_idx=12580 prot=XXX_sp|O13555|JIP3_YEAST is_decoy=true + #7: peptide=QETIMKLYSGVHR charge=2 score=10.00 spec_e_val=2.9771e-4 iso_off=1 prot_idx=8821 prot=XXX_sp|P53086|KIP3_YEAST is_decoy=true + #8: peptide=MLVSGDKDRAITEK charge=2 score=10.00 spec_e_val=1.5818e-4 iso_off=0 prot_idx=7305 prot=XXX_sp|P21192|ACE2_YEAST is_decoy=true + #9: peptide=TTGIVTEISMGTVNR charge=2 score=6.00 spec_e_val=4.7133e-4 iso_off=0 prot_idx=10620 prot=XXX_sp|P53179|PALF_YEAST is_decoy=true + #10: peptide=DLKPMNIFIDESR charge=2 score=5.00 spec_e_val=5.4769e-4 iso_off=1 prot_idx=410 prot=sp|P15442|GCN2_YEAST is_decoy=false + +--- Java top-1 trace: K.IVAGQVDTDEAGYIK.T --- + Enumerator: 2 matches for residue sequence + cand_idx=318920 prot_idx=801 prot=sp|P29509|TRXB1_YEAST is_decoy=false pep_mass=1577.7937 nominal=1559 + cand_idx=319016 prot_idx=801 prot=sp|P29509|TRXB1_YEAST is_decoy=false pep_mass=1577.7937 nominal=1559 + In Rust's top-10 queue: 0 + + Per-split node_score breakdown — Java pep (K.IVAGQVDTDEAGYIK.T+2) --- + spectrum_parent_mass=1577.8016, peptide_mass=1577.7937, peptide_nominal=1559 + split=1 aa[0]=I pref_nom=113 suf_nom=1446 score=-2 (matched=1 sum=1.26, missing=6 sum=-3.73) + ions: P1.0@114.1=MISS=-0.62 | P-27.0@86.1=MISS=-0.16 | P-17.0@96.1=MISS=-0.21 | S19.0@1465.7=MISS=-1.36 | S20.0@1466.7=MISS=-1.01 | S21.0@1467.7=MISS=-0.36 | S2.0@1448.7=rk261=1.26 + split=2 aa[1]=V pref_nom=212 suf_nom=1347 score=13 (matched=4 sum=14.47, missing=3 sum=-1.00) + split=3 aa[2]=A pref_nom=283 suf_nom=1276 score=15 (matched=7 sum=15.41, missing=0 sum=0.00) + split=4 aa[3]=G pref_nom=340 suf_nom=1219 score=13 (matched=6 sum=13.33, missing=1 sum=-0.21) + ions: P1.0@341.2=rk106=0.62 | P-27.0@313.2=rk274=-0.22 | P-17.0@323.2=MISS=-0.21 | S19.0@1238.6=rk21=4.89 | S20.0@1239.6=rk28=4.56 | S21.0@1240.6=rk360=1.27 | S2.0@1221.6=rk47=2.22 + split=5 aa[4]=Q pref_nom=468 suf_nom=1091 score=18 (matched=7 sum=18.38, missing=0 sum=0.00) + split=6 aa[5]=V pref_nom=567 suf_nom=992 score=21 (matched=7 sum=21.47, missing=0 sum=0.00) + split=7 aa[6]=D pref_nom=682 suf_nom=877 score=12 (matched=5 sum=12.52, missing=2 sum=-0.84) + split=8 aa[7]=T pref_nom=783 suf_nom=776 score=10 (matched=6 sum=10.45, missing=1 sum=-0.62) + split=9 aa[8]=D pref_nom=898 suf_nom=661 score=5 (matched=3 sum=4.66, missing=0 sum=0.00) + split=10 aa[9]=E pref_nom=1027 suf_nom=532 score=5 (matched=2 sum=5.12, missing=1 sum=-0.32) + split=11 aa[10]=A pref_nom=1098 suf_nom=461 score=4 (matched=2 sum=4.32, missing=1 sum=-0.32) + split=12 aa[11]=G pref_nom=1155 suf_nom=404 score=3 (matched=2 sum=3.65, missing=1 sum=-0.32) + split=13 aa[12]=Y pref_nom=1318 suf_nom=241 score=2 (matched=2 sum=2.38, missing=1 sum=-0.32) + split=14 aa[13]=I pref_nom=1431 suf_nom=128 score=-3 (matched=0 sum=0.00, missing=3 sum=-3.11) + breakdown_total = 116 + score_psm total = 116 + + Per-split node_score breakdown — Rust top-1 (ILNMNMVPDYLQK +2) --- + spectrum_parent_mass=1577.8016, peptide_mass=1577.7946, peptide_nominal=1559 + split=1 aa[0]=I pref_nom=113 suf_nom=1446 score=-2 (matched=1 sum=1.26, missing=6 sum=-3.73) + ions: P1.0@114.1=MISS=-0.62 | P-27.0@86.1=MISS=-0.16 | P-17.0@96.1=MISS=-0.21 | S19.0@1465.7=MISS=-1.36 | S20.0@1466.7=MISS=-1.01 | S21.0@1467.7=MISS=-0.36 | S2.0@1448.7=rk261=1.26 + split=2 aa[1]=L pref_nom=226 suf_nom=1333 score=-2 (matched=2 sum=-0.11, missing=5 sum=-2.35) + split=3 aa[2]=N pref_nom=340 suf_nom=1219 score=13 (matched=6 sum=13.33, missing=1 sum=-0.21) + split=4 aa[3]=M pref_nom=471 suf_nom=1088 score=4 (matched=5 sum=4.93, missing=2 sum=-0.62) + ions: P1.0@472.2=rk223=-0.37 | P-27.0@444.2=rk410=-0.22 | P-17.0@454.2=rk458=0.01 | S19.0@1107.6=rk83=2.03 | S20.0@1108.6=rk54=3.47 | S21.0@1109.6=MISS=-0.36 | S2.0@1090.5=MISS=-0.26 + split=5 aa[4]=N pref_nom=585 suf_nom=974 score=8 (matched=5 sum=8.82, missing=2 sum=-0.89) + split=6 aa[5]=M pref_nom=716 suf_nom=843 score=0 (matched=4 sum=0.87, missing=3 sum=-0.74) + split=7 aa[6]=V pref_nom=815 suf_nom=744 score=0 (matched=3 sum=0.47, missing=1 sum=-0.16) + split=8 aa[7]=P pref_nom=912 suf_nom=647 score=2 (matched=3 sum=2.17, missing=0 sum=0.00) + split=9 aa[8]=D pref_nom=1027 suf_nom=532 score=5 (matched=2 sum=5.12, missing=1 sum=-0.32) + split=10 aa[9]=Y pref_nom=1190 suf_nom=369 score=-1 (matched=2 sum=0.90, missing=1 sum=-2.33) + split=11 aa[10]=L pref_nom=1303 suf_nom=256 score=-2 (matched=1 sum=0.77, missing=2 sum=-2.79) + split=12 aa[11]=Q pref_nom=1431 suf_nom=128 score=-3 (matched=0 sum=0.00, missing=3 sum=-3.11) + breakdown_total = 22 + PSM.score (from queue) = 26 + +--- Spectrum top-10 peaks by intensity --- + rank=1 mz=684.0408 intensity=194897.69 + rank=2 mz=1011.5268 intensity=169366.95 + rank=3 mz=737.5176 intensity=114525.51 + rank=4 mz=1110.5432 intensity=101880.234 + rank=5 mz=1012.5068 intensity=72370.63 + rank=6 mz=1111.5243 intensity=61456.434 + rank=7 mz=781.1710 intensity=58671.855 + rank=8 mz=1295.5651 intensity=57269.816 + rank=9 mz=1366.5999 intensity=53504.457 + rank=10 mz=1367.6660 intensity=43431.918 diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-23082.json b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-23082.json new file mode 100644 index 00000000..d06c30ce --- /dev/null +++ b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-23082.json @@ -0,0 +1,148 @@ +[ + { + "scan": 23082, + "peptide": "K.NQQIVAGKPLYVAIAQR.K", + "charge": 2, + "rust_rank_score": 117, + "ions": [ + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 115.065196, "rank": null, "max_rank": 150, "log_prob": -0.701926, "contribution": -0.701926}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 97.054632, "rank": null, "max_rank": 150, "log_prob": -0.298984, "contribution": -0.298984}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 87.070282, "rank": null, "max_rank": 150, "log_prob": -0.217061, "contribution": -0.217061}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1754.891541, "rank": null, "max_rank": 150, "log_prob": -0.968023, "contribution": -0.968023}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1755.894896, "rank": null, "max_rank": 150, "log_prob": -0.769719, "contribution": -0.769719}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1756.894782, "rank": null, "max_rank": 150, "log_prob": -0.322139, "contribution": -0.322139}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 243.129613, "rank": null, "max_rank": 150, "log_prob": -0.701926, "contribution": -0.701926}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 225.119049, "rank": null, "max_rank": 150, "log_prob": -0.298984, "contribution": -0.298984}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 215.134699, "rank": null, "max_rank": 150, "log_prob": -0.217061, "contribution": -0.217061}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1626.827124, "rank": 197, "max_rank": 150, "log_prob": 0.217363, "contribution": 0.217363}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1627.830479, "rank": 97, "max_rank": 150, "log_prob": 2.204627, "contribution": 2.204627}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1628.830365, "rank": null, "max_rank": 150, "log_prob": -0.322139, "contribution": -0.322139}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 371.194030, "rank": 14, "max_rank": 150, "log_prob": 2.964250, "contribution": 2.964250}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 353.183466, "rank": 39, "max_rank": 150, "log_prob": 1.748809, "contribution": 1.748809}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 343.199116, "rank": 347, "max_rank": 150, "log_prob": 0.061501, "contribution": 0.061501}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1498.762707, "rank": 27, "max_rank": 150, "log_prob": 4.616591, "contribution": 4.616591}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1499.766062, "rank": 43, "max_rank": 150, "log_prob": 3.934901, "contribution": 3.934901}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1500.765948, "rank": 131, "max_rank": 150, "log_prob": 2.112811, "contribution": 2.112811}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 484.250898, "rank": 11, "max_rank": 150, "log_prob": 3.183289, "contribution": 3.183289}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 466.240334, "rank": 147, "max_rank": 150, "log_prob": 0.449047, "contribution": 0.449047}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 456.255984, "rank": 557, "max_rank": 150, "log_prob": 0.061501, "contribution": 0.061501}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1385.705839, "rank": 8, "max_rank": 150, "log_prob": 6.303275, "contribution": 6.303275}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1386.709194, "rank": 9, "max_rank": 150, "log_prob": 5.700593, "contribution": 5.700593}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1387.709080, "rank": 42, "max_rank": 150, "log_prob": 3.369051, "contribution": 3.369051}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 583.300720, "rank": 36, "max_rank": 150, "log_prob": 2.152802, "contribution": 2.152802}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 565.290156, "rank": 69, "max_rank": 150, "log_prob": 1.111448, "contribution": 1.111448}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 555.305806, "rank": 203, "max_rank": 150, "log_prob": 0.061501, "contribution": 0.061501}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1286.656017, "rank": 5, "max_rank": 150, "log_prob": 6.565401, "contribution": 6.565401}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1287.659372, "rank": 6, "max_rank": 150, "log_prob": 5.790483, "contribution": 5.790483}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1288.659258, "rank": 16, "max_rank": 150, "log_prob": 3.467032, "contribution": 3.467032}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 654.336452, "rank": 20, "max_rank": 150, "log_prob": 2.651304, "contribution": 2.651304}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 636.325888, "rank": 139, "max_rank": 150, "log_prob": 0.789284, "contribution": 0.789284}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 626.341537, "rank": 64, "max_rank": 150, "log_prob": 1.083132, "contribution": 1.083132}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1215.620286, "rank": 3, "max_rank": 150, "log_prob": 6.870714, "contribution": 6.870714}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1216.623641, "rank": 4, "max_rank": 150, "log_prob": 5.866827, "contribution": 5.866827}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1217.623526, "rank": 23, "max_rank": 150, "log_prob": 3.619225, "contribution": 3.619225}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 711.365137, "rank": 38, "max_rank": 150, "log_prob": 2.067514, "contribution": 2.067514}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 693.354573, "rank": 54, "max_rank": 150, "log_prob": 1.477341, "contribution": 1.477341}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 683.370223, "rank": 18, "max_rank": 150, "log_prob": 2.034404, "contribution": 2.034404}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1158.591600, "rank": 12, "max_rank": 150, "log_prob": 5.952747, "contribution": 5.952747}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1159.594955, "rank": 22, "max_rank": 150, "log_prob": 5.033666, "contribution": 5.033666}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1160.594841, "rank": 140, "max_rank": 150, "log_prob": 1.900439, "contribution": 1.900439}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 839.429554, "rank": 40, "max_rank": 150, "log_prob": 2.020348, "contribution": 2.020348}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 821.418990, "rank": 75, "max_rank": 150, "log_prob": 1.150058, "contribution": 1.150058}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 811.434640, "rank": 428, "max_rank": 150, "log_prob": 0.061501, "contribution": 0.061501}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1030.527183, "rank": 1, "max_rank": 150, "log_prob": 7.229656, "contribution": 7.229656}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1031.530538, "rank": 2, "max_rank": 150, "log_prob": 6.143569, "contribution": 6.143569}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1032.530424, "rank": 13, "max_rank": 150, "log_prob": 3.457901, "contribution": 3.457901}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 918.467806, "rank": 24, "max_rank": 150, "log_prob": 2.066429, "contribution": 2.066429}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 908.483456, "rank": 303, "max_rank": 150, "log_prob": 0.061501, "contribution": 0.061501}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 933.478367, "rank": null, "max_rank": 150, "log_prob": -1.921809, "contribution": -1.921809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 934.481722, "rank": null, "max_rank": 150, "log_prob": -0.769719, "contribution": -0.769719}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 935.481608, "rank": null, "max_rank": 150, "log_prob": -0.322139, "contribution": -0.322139}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 820.421499, "rank": 52, "max_rank": 150, "log_prob": 1.839289, "contribution": 1.839289}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 821.424854, "rank": 75, "max_rank": 150, "log_prob": 1.366281, "contribution": 1.366281}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 657.339468, "rank": 37, "max_rank": 150, "log_prob": 2.500389, "contribution": 2.500389}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 658.342823, "rank": 78, "max_rank": 150, "log_prob": 1.333236, "contribution": 1.333236}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 558.289646, "rank": 33, "max_rank": 150, "log_prob": 2.696384, "contribution": 2.696384}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 559.293001, "rank": 212, "max_rank": 150, "log_prob": 0.179303, "contribution": 0.179303}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 487.253915, "rank": 132, "max_rank": 150, "log_prob": 0.057568, "contribution": 0.057568}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 488.257270, "rank": 746, "max_rank": 150, "log_prob": 0.179303, "contribution": 0.179303}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 374.197047, "rank": 79, "max_rank": 150, "log_prob": 1.013812, "contribution": 1.013812}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 375.200402, "rank": 470, "max_rank": 150, "log_prob": 0.179303, "contribution": 0.179303}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 303.161316, "rank": 335, "max_rank": 150, "log_prob": -0.775085, "contribution": -0.775085}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 304.164671, "rank": 762, "max_rank": 150, "log_prob": 0.179303, "contribution": 0.179303}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 175.096899, "rank": null, "max_rank": 150, "log_prob": -1.921809, "contribution": -1.921809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 176.100254, "rank": null, "max_rank": 150, "log_prob": -0.494262, "contribution": -0.494262} + ] + }, + { + "scan": 23082, + "peptide": "ELPLSIGILFKRYYR", + "charge": 2, + "rust_rank_score": 25, + "ions": [ + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 130.072745, "rank": null, "max_rank": 150, "log_prob": -0.701926, "contribution": -0.701926}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 112.062181, "rank": null, "max_rank": 150, "log_prob": -0.298984, "contribution": -0.298984}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 102.077831, "rank": null, "max_rank": 150, "log_prob": -0.217061, "contribution": -0.217061}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1738.883489, "rank": null, "max_rank": 150, "log_prob": -0.968023, "contribution": -0.968023}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1739.886844, "rank": null, "max_rank": 150, "log_prob": -0.769719, "contribution": -0.769719}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1740.886730, "rank": 336, "max_rank": 150, "log_prob": 1.221443, "contribution": 1.221443}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 243.129613, "rank": null, "max_rank": 150, "log_prob": -0.701926, "contribution": -0.701926}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 225.119049, "rank": null, "max_rank": 150, "log_prob": -0.298984, "contribution": -0.298984}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 215.134699, "rank": null, "max_rank": 150, "log_prob": -0.217061, "contribution": -0.217061}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1625.826621, "rank": null, "max_rank": 150, "log_prob": -0.968023, "contribution": -0.968023}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1626.829976, "rank": 197, "max_rank": 150, "log_prob": 0.515732, "contribution": 0.515732}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1627.829862, "rank": 97, "max_rank": 150, "log_prob": 2.639945, "contribution": 2.639945}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 340.178429, "rank": null, "max_rank": 150, "log_prob": -0.701926, "contribution": -0.701926}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 322.167865, "rank": 598, "max_rank": 150, "log_prob": 0.095750, "contribution": 0.095750}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 312.183515, "rank": 614, "max_rank": 150, "log_prob": 0.061501, "contribution": 0.061501}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1528.777805, "rank": 610, "max_rank": 150, "log_prob": 0.217363, "contribution": 0.217363}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1529.781160, "rank": null, "max_rank": 150, "log_prob": -0.769719, "contribution": -0.769719}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1530.781046, "rank": null, "max_rank": 150, "log_prob": -0.322139, "contribution": -0.322139}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 453.235297, "rank": 144, "max_rank": 150, "log_prob": 0.628619, "contribution": 0.628619}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 435.224733, "rank": null, "max_rank": 150, "log_prob": -0.298984, "contribution": -0.298984}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 425.240383, "rank": 234, "max_rank": 150, "log_prob": 0.061501, "contribution": 0.061501}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1415.720937, "rank": 432, "max_rank": 150, "log_prob": 0.217363, "contribution": 0.217363}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1416.724292, "rank": null, "max_rank": 150, "log_prob": -0.769719, "contribution": -0.769719}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1417.724178, "rank": null, "max_rank": 150, "log_prob": -0.322139, "contribution": -0.322139}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 540.279080, "rank": 217, "max_rank": 150, "log_prob": -0.247218, "contribution": -0.247218}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 522.268516, "rank": null, "max_rank": 150, "log_prob": -0.298984, "contribution": -0.298984}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 512.284166, "rank": 529, "max_rank": 150, "log_prob": 0.061501, "contribution": 0.061501}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1328.677154, "rank": null, "max_rank": 150, "log_prob": -0.968023, "contribution": -0.968023}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1329.680509, "rank": null, "max_rank": 150, "log_prob": -0.769719, "contribution": -0.769719}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1330.680394, "rank": null, "max_rank": 150, "log_prob": -0.322139, "contribution": -0.322139}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 653.335948, "rank": null, "max_rank": 150, "log_prob": -0.701926, "contribution": -0.701926}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 635.325384, "rank": null, "max_rank": 150, "log_prob": -0.298984, "contribution": -0.298984}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 625.341034, "rank": 314, "max_rank": 150, "log_prob": 0.061501, "contribution": 0.061501}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1215.620286, "rank": 3, "max_rank": 150, "log_prob": 6.870714, "contribution": 6.870714}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1216.623641, "rank": 4, "max_rank": 150, "log_prob": 5.866827, "contribution": 5.866827}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1217.623526, "rank": 23, "max_rank": 150, "log_prob": 3.619225, "contribution": 3.619225}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 710.364634, "rank": null, "max_rank": 150, "log_prob": -0.701926, "contribution": -0.701926}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 692.354070, "rank": null, "max_rank": 150, "log_prob": -0.298984, "contribution": -0.298984}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 682.369720, "rank": 517, "max_rank": 150, "log_prob": 0.061501, "contribution": 0.061501}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1158.591600, "rank": 12, "max_rank": 150, "log_prob": 5.952747, "contribution": 5.952747}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1159.594955, "rank": 22, "max_rank": 150, "log_prob": 5.033666, "contribution": 5.033666}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1160.594841, "rank": 140, "max_rank": 150, "log_prob": 1.900439, "contribution": 1.900439}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 823.421502, "rank": 447, "max_rank": 150, "log_prob": -0.247218, "contribution": -0.247218}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 805.410938, "rank": 26, "max_rank": 150, "log_prob": 2.006292, "contribution": 2.006292}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 795.426588, "rank": 551, "max_rank": 150, "log_prob": 0.061501, "contribution": 0.061501}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1045.534732, "rank": null, "max_rank": 150, "log_prob": -0.968023, "contribution": -0.968023}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1046.538087, "rank": 784, "max_rank": 150, "log_prob": 0.515732, "contribution": 0.515732}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1047.537973, "rank": null, "max_rank": 150, "log_prob": -0.322139, "contribution": -0.322139}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 918.467806, "rank": 24, "max_rank": 150, "log_prob": 2.066429, "contribution": 2.066429}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3252151695 }", "theo_mz": 908.483456, "rank": 303, "max_rank": 150, "log_prob": 0.061501, "contribution": 0.061501}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 932.477864, "rank": null, "max_rank": 150, "log_prob": -1.921809, "contribution": -1.921809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 933.481219, "rank": null, "max_rank": 150, "log_prob": -0.494262, "contribution": -0.494262}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 934.481105, "rank": null, "max_rank": 150, "log_prob": -0.322139, "contribution": -0.322139}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 785.403885, "rank": 337, "max_rank": 150, "log_prob": -0.775085, "contribution": -0.775085}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 786.407240, "rank": null, "max_rank": 150, "log_prob": -0.494262, "contribution": -0.494262}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 657.339468, "rank": 37, "max_rank": 150, "log_prob": 2.500389, "contribution": 2.500389}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 658.342823, "rank": 78, "max_rank": 150, "log_prob": 1.333236, "contribution": 1.333236}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 501.260960, "rank": 521, "max_rank": 150, "log_prob": -0.775085, "contribution": -0.775085}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 502.264315, "rank": null, "max_rank": 150, "log_prob": -0.494262, "contribution": -0.494262}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 338.178930, "rank": 345, "max_rank": 150, "log_prob": -0.775085, "contribution": -0.775085}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 339.182285, "rank": 573, "max_rank": 150, "log_prob": 0.179303, "contribution": 0.179303}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 175.096899, "rank": null, "max_rank": 150, "log_prob": -1.921809, "contribution": -1.921809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 176.100254, "rank": null, "max_rank": 150, "log_prob": -0.494262, "contribution": -0.494262} + ] + } +] diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-23082.txt b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-23082.txt new file mode 100644 index 00000000..141292d2 --- /dev/null +++ b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-23082.txt @@ -0,0 +1,114 @@ +DB: 6775 target proteins, 13550 total (target+decoy) +Param: activation=HCD instrument=QExactive mme=Da(0.5) num_segments=2 num_partitions=140 error_scaling_factor=100 max_rank=150 + + --- Sample rank_dist (partition Partition { charge: 2, parent_mass: 1102.5151, seg_num: 0 }) --- + Noise freqs (first 5 ranks): [0.0021478822, 0.0022566533, 0.0025359367, 0.0026414671, 0.002777083] + Noise freq at max_rank (150): 2.4455655 + Ion Suffix { charge: 1, offset_bits: 1101016201 }: first 5 freqs = [0.0012787724, 0.0012787724, 0.0038363172, 0.003836317, 0.003836317] + missing slot (150): 1.4974425 + Ion Prefix { charge: 1, offset_bits: 3252151695 }: first 5 freqs = [0.32097188, 0.14450128, 0.0971867, 0.07118499, 0.053282183] + missing slot (150): 2.3388746 + Ion Prefix { charge: 1, offset_bits: 1065418857 }: first 5 freqs = [0.06649616, 0.103580564, 0.11636829, 0.09974424, 0.08994033] + missing slot (150): 1.5140665 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=1) = -0.5186 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=5) = 0.3231 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=20) = 0.7573 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=100) = 0.6645 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=150) = 0.2713 + scorer.missing_ion_score = -0.4905 + seg=0: ion_types_for_segment(union) = 9 ion types (prefix=4, suffix=5) + seg=1: ion_types_for_segment(union) = 5 ion types (prefix=0, suffix=5) + Partition counts per (charge, seg): + charge=2 seg=0: 33 partitions + charge=2 seg=1: 33 partitions + charge=3 seg=0: 33 partitions + charge=3 seg=1: 33 partitions + charge=4 seg=0: 4 partitions + charge=4 seg=1: 4 partitions + charge=2 seg=0: per-partition ion-list sizes min=4 median=5 max=7, union=7 + charge=2 seg=1: per-partition ion-list sizes min=3 median=5 max=5, union=5 + +=== Spectrum: scan=23082 precursor_mz=935.0436 charge=Some(2) peaks=805 === + spectrum partition target=(c=2 pm=1868.07 seg=0) selected=(c=2 pm=1860.97 seg=0): 5 ion types — ["S(c=1,off=19.018)", "P(c=1,off=1.008)", "S(c=1,off=20.022)", "P(c=1,off=-17.003)", "P(c=1,off=-26.987)"] + spectrum partition target=(c=2 pm=1868.07 seg=1) selected=(c=2 pm=1860.97 seg=1): 3 ion types — ["S(c=1,off=19.018)", "S(c=1,off=20.022)", "S(c=1,off=21.022)"] + Rust filtering: 0 of 805 peaks filtered (0.0%); max filtered intensity=0.0 + Filter m/z values (count=3): + 934.0431 ± 0.5000 + 935.0436 ± 0.5000 + 936.0441 ± 0.5000 + +--- Candidate windows --- + charge=2: neutral_mass=1850.0621 nominal_center=1849 window=[1848..=1849] (iso_range=[0..=1], tol_da_left=0.0093, tol_da_right=0.0093) +Yield (chunk): 1 spectra in, 0 skipped by min_peaks, 2079 candidates visited, 71 PSMs pushed, 1 spectra with non-empty queue +GF diagnostics (cumulative): 2 bin attempts, 0 EmptyScoreRange, 0 SinkUnreachable, 0 of those recovered by unthresholded retry, 0 spectra with no successful bin + +--- Rust top-7 PSMs --- + #1: peptide=ELPLSIGILFKRYYR charge=2 score=25.00 spec_e_val=7.7520e-5 iso_off=1 prot_idx=2219 prot=sp|P53917|FAR11_YEAST is_decoy=false + #2: peptide=TIGVITKLDLVDPEKAR charge=2 score=25.00 spec_e_val=1.4476e-4 iso_off=1 prot_idx=843 prot=sp|P32266|MGM1_YEAST is_decoy=false + #3: peptide=LLLLEKENADLLNELK charge=2 score=23.00 spec_e_val=1.0593e-4 iso_off=1 prot_idx=1732 prot=sp|P40957|MAD1_YEAST is_decoy=false + #4: peptide=KFPKFTHQTAVIPVQK charge=2 score=19.00 spec_e_val=9.0625e-5 iso_off=0 prot_idx=2875 prot=sp|Q12150|CSF1_YEAST is_decoy=false + #5: peptide=LENLLDANEKELLLLK charge=2 score=10.00 spec_e_val=6.1027e-4 iso_off=1 prot_idx=8507 prot=XXX_sp|P40957|MAD1_YEAST is_decoy=true + #6: peptide=TRLPPIPRMTVTLTTR charge=2 score=8.00 spec_e_val=4.4089e-4 iso_off=0 prot_idx=5687 prot=sp|A0A023PXD3|YE88A_YEAST is_decoy=false + #7: peptide=LQDKSVNIQLNKLLDK charge=2 score=4.00 spec_e_val=6.1027e-4 iso_off=0 prot_idx=5623 prot=sp|Q12253|YL046_YEAST is_decoy=false + +--- Java top-1 trace: K.NQQIVAGKPLYVAIAQR.K --- + Enumerator: 2 matches for residue sequence + cand_idx=23279 prot_idx=77 prot=sp|P04147|PABP_YEAST is_decoy=false pep_mass=1868.0632 nominal=1849 + cand_idx=23527 prot_idx=77 prot=sp|P04147|PABP_YEAST is_decoy=false pep_mass=1868.0632 nominal=1849 + In Rust's top-7 queue: 0 + + Per-split node_score breakdown — Java pep (K.NQQIVAGKPLYVAIAQR.K+2) --- + spectrum_parent_mass=1868.0726, peptide_mass=1868.0632, peptide_nominal=1849 + split=1 aa[0]=N pref_nom=114 suf_nom=1735 score=-3 (matched=0 sum=0.00, missing=6 sum=-3.28) + ions: P1.0@115.1=MISS=-0.70 | P-17.0@97.1=MISS=-0.30 | P-27.0@87.1=MISS=-0.22 | S19.0@1754.9=MISS=-0.97 | S20.0@1755.9=MISS=-0.77 | S21.0@1756.9=MISS=-0.32 + split=2 aa[1]=Q pref_nom=242 suf_nom=1607 score=1 (matched=2 sum=2.42, missing=4 sum=-1.54) + split=3 aa[2]=Q pref_nom=370 suf_nom=1479 score=15 (matched=6 sum=15.44, missing=0 sum=0.00) + split=4 aa[3]=I pref_nom=483 suf_nom=1366 score=19 (matched=6 sum=19.07, missing=0 sum=0.00) + ions: P1.0@484.3=rk11=3.18 | P-17.0@466.2=rk147=0.45 | P-27.0@456.3=rk557=0.06 | S19.0@1385.7=rk8=6.30 | S20.0@1386.7=rk9=5.70 | S21.0@1387.7=rk42=3.37 + split=5 aa[4]=V pref_nom=582 suf_nom=1267 score=19 (matched=6 sum=19.15, missing=0 sum=0.00) + split=6 aa[5]=A pref_nom=653 suf_nom=1196 score=21 (matched=6 sum=20.88, missing=0 sum=0.00) + split=7 aa[6]=G pref_nom=710 suf_nom=1139 score=18 (matched=6 sum=18.47, missing=0 sum=0.00) + split=8 aa[7]=K pref_nom=838 suf_nom=1011 score=20 (matched=6 sum=20.06, missing=0 sum=0.00) + split=9 aa[8]=P pref_nom=935 suf_nom=914 score=-1 (matched=2 sum=2.13, missing=3 sum=-3.01) + split=10 aa[9]=L pref_nom=1048 suf_nom=801 score=3 (matched=2 sum=3.21, missing=0 sum=0.00) + split=11 aa[10]=Y pref_nom=1211 suf_nom=638 score=4 (matched=2 sum=3.83, missing=0 sum=0.00) + split=12 aa[11]=V pref_nom=1310 suf_nom=539 score=3 (matched=2 sum=2.88, missing=0 sum=0.00) + split=13 aa[12]=A pref_nom=1381 suf_nom=468 score=0 (matched=2 sum=0.24, missing=0 sum=0.00) + split=14 aa[13]=I pref_nom=1494 suf_nom=355 score=1 (matched=2 sum=1.19, missing=0 sum=0.00) + split=15 aa[14]=A pref_nom=1565 suf_nom=284 score=-1 (matched=2 sum=-0.60, missing=0 sum=0.00) + split=16 aa[15]=Q pref_nom=1693 suf_nom=156 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.42) + breakdown_total = 117 + score_psm total = 117 + + Per-split node_score breakdown — Rust top-1 (ELPLSIGILFKRYYR +2) --- + spectrum_parent_mass=1868.0726, peptide_mass=1867.0720, peptide_nominal=1848 + split=1 aa[0]=E pref_nom=129 suf_nom=1719 score=-2 (matched=1 sum=1.22, missing=5 sum=-2.96) + ions: P1.0@130.1=MISS=-0.70 | P-17.0@112.1=MISS=-0.30 | P-27.0@102.1=MISS=-0.22 | S19.0@1738.9=MISS=-0.97 | S20.0@1739.9=MISS=-0.77 | S21.0@1740.9=rk336=1.22 + split=2 aa[1]=L pref_nom=242 suf_nom=1606 score=1 (matched=2 sum=3.16, missing=4 sum=-2.19) + split=3 aa[2]=P pref_nom=339 suf_nom=1509 score=-1 (matched=3 sum=0.37, missing=3 sum=-1.79) + split=4 aa[3]=L pref_nom=452 suf_nom=1396 score=0 (matched=3 sum=0.91, missing=3 sum=-1.39) + ions: P1.0@453.2=rk144=0.63 | P-17.0@435.2=MISS=-0.30 | P-27.0@425.2=rk234=0.06 | S19.0@1415.7=rk432=0.22 | S20.0@1416.7=MISS=-0.77 | S21.0@1417.7=MISS=-0.32 + split=5 aa[4]=S pref_nom=539 suf_nom=1309 score=-3 (matched=2 sum=-0.19, missing=4 sum=-2.36) + split=6 aa[5]=I pref_nom=652 suf_nom=1196 score=15 (matched=4 sum=16.42, missing=2 sum=-1.00) + split=7 aa[6]=G pref_nom=709 suf_nom=1139 score=12 (matched=4 sum=12.95, missing=2 sum=-1.00) + split=8 aa[7]=I pref_nom=822 suf_nom=1026 score=1 (matched=4 sum=2.34, missing=2 sum=-1.29) + split=9 aa[8]=L pref_nom=935 suf_nom=913 score=-1 (matched=2 sum=2.13, missing=3 sum=-2.74) + split=10 aa[9]=F pref_nom=1082 suf_nom=766 score=-1 (matched=1 sum=-0.78, missing=1 sum=-0.49) + split=11 aa[10]=K pref_nom=1210 suf_nom=638 score=4 (matched=2 sum=3.83, missing=0 sum=0.00) + split=12 aa[11]=R pref_nom=1366 suf_nom=482 score=-1 (matched=1 sum=-0.78, missing=1 sum=-0.49) + split=13 aa[12]=Y pref_nom=1529 suf_nom=319 score=-1 (matched=2 sum=-0.60, missing=0 sum=0.00) + split=14 aa[13]=Y pref_nom=1692 suf_nom=156 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.42) + breakdown_total = 21 + PSM.score (from queue) = 25 + +--- Spectrum top-10 peaks by intensity --- + rank=1 mz=1030.6191 intensity=33224.535 + rank=2 mz=1031.6281 intensity=21002.344 + rank=3 mz=1215.6724 intensity=16402.871 + rank=4 mz=1216.6705 intensity=13331.686 + rank=5 mz=1286.7096 intensity=12867.501 + rank=6 mz=1287.6866 intensity=11902.0205 + rank=7 mz=926.3611 intensity=10222.694 + rank=8 mz=1385.7766 intensity=10082.882 + rank=9 mz=1386.7909 intensity=7898.405 + rank=10 mz=737.4462 intensity=7465.52 diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-23272.json b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-23272.json new file mode 100644 index 00000000..1bb2cf16 --- /dev/null +++ b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-23272.json @@ -0,0 +1,144 @@ +[ + { + "scan": 23272, + "peptide": "K.LLYTIPTGQNPTGTSIADHR.K", + "charge": 2, + "rust_rank_score": 107, + "ions": [ + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 114.064693, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 96.054129, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2042.035976, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2043.039331, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2044.039216, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 227.121561, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 209.110997, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1928.979108, "rank": 110, "max_rank": 150, "log_prob": 2.428936, "contribution": 2.428936}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1929.982463, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1930.982348, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 390.203592, "rank": 608, "max_rank": 150, "log_prob": 0.025038, "contribution": 0.025038}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 372.193028, "rank": 383, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1765.897077, "rank": 54, "max_rank": 150, "log_prob": 3.726607, "contribution": 3.726607}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1766.900432, "rank": 405, "max_rank": 150, "log_prob": 0.842178, "contribution": 0.842178}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1767.900318, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 491.254421, "rank": 37, "max_rank": 150, "log_prob": 2.369332, "contribution": 2.369332}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 473.243857, "rank": 18, "max_rank": 150, "log_prob": 2.277232, "contribution": 2.277232}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1664.846248, "rank": 204, "max_rank": 150, "log_prob": 0.907424, "contribution": 0.907424}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1665.849603, "rank": 68, "max_rank": 150, "log_prob": 3.450078, "contribution": 3.450078}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1666.849489, "rank": 135, "max_rank": 150, "log_prob": 2.474289, "contribution": 2.474289}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 604.311289, "rank": 57, "max_rank": 150, "log_prob": 1.970443, "contribution": 1.970443}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 586.300725, "rank": 8, "max_rank": 150, "log_prob": 2.711700, "contribution": 2.711700}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1551.789380, "rank": 1, "max_rank": 150, "log_prob": 7.562068, "contribution": 7.562068}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1552.792735, "rank": 2, "max_rank": 150, "log_prob": 7.331127, "contribution": 7.331127}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1553.792621, "rank": 4, "max_rank": 150, "log_prob": 3.921490, "contribution": 3.921490}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 701.360105, "rank": 623, "max_rank": 150, "log_prob": 0.025038, "contribution": 0.025038}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 683.349541, "rank": 699, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1454.740564, "rank": 45, "max_rank": 150, "log_prob": 4.136743, "contribution": 4.136743}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1455.743919, "rank": 349, "max_rank": 150, "log_prob": 0.842178, "contribution": 0.842178}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1456.743805, "rank": 504, "max_rank": 150, "log_prob": 1.444459, "contribution": 1.444459}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 802.410934, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 784.400370, "rank": 104, "max_rank": 150, "log_prob": 1.086040, "contribution": 1.086040}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1353.689735, "rank": 19, "max_rank": 150, "log_prob": 5.557214, "contribution": 5.557214}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1354.693090, "rank": 64, "max_rank": 150, "log_prob": 3.609103, "contribution": 3.609103}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1355.692976, "rank": 152, "max_rank": 150, "log_prob": 1.444459, "contribution": 1.444459}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 859.439619, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 841.429055, "rank": 280, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1296.661050, "rank": 38, "max_rank": 150, "log_prob": 4.401398, "contribution": 4.401398}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1297.664405, "rank": 679, "max_rank": 150, "log_prob": 0.842178, "contribution": 0.842178}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1298.664290, "rank": 589, "max_rank": 150, "log_prob": 1.444459, "contribution": 1.444459}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 987.504036, "rank": 86, "max_rank": 150, "log_prob": 1.254936, "contribution": 1.254936}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 969.493472, "rank": 56, "max_rank": 150, "log_prob": 1.814275, "contribution": 1.814275}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1168.596633, "rank": 13, "max_rank": 150, "log_prob": 6.121079, "contribution": 6.121079}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1169.599988, "rank": 32, "max_rank": 150, "log_prob": 4.738236, "contribution": 4.738236}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1170.599873, "rank": 50, "max_rank": 150, "log_prob": 3.952926, "contribution": 3.952926}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1054.539261, "rank": 6, "max_rank": 150, "log_prob": 4.506861, "contribution": 4.506861}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1055.542616, "rank": 9, "max_rank": 150, "log_prob": 2.780396, "contribution": 2.780396}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 957.490445, "rank": 111, "max_rank": 150, "log_prob": 0.538585, "contribution": 0.538585}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 958.493801, "rank": 261, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 856.439617, "rank": 24, "max_rank": 150, "log_prob": 3.512598, "contribution": 3.512598}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 857.442972, "rank": 122, "max_rank": 150, "log_prob": 1.045194, "contribution": 1.045194}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 799.410931, "rank": 60, "max_rank": 150, "log_prob": 1.918315, "contribution": 1.918315}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 800.414286, "rank": 166, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 698.360102, "rank": 35, "max_rank": 150, "log_prob": 2.807866, "contribution": 2.807866}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 699.363457, "rank": 156, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 611.316319, "rank": 128, "max_rank": 150, "log_prob": 0.191495, "contribution": 0.191495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 612.319674, "rank": 191, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 498.259451, "rank": 41, "max_rank": 150, "log_prob": 2.546604, "contribution": 2.546604}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 499.262806, "rank": 69, "max_rank": 150, "log_prob": 1.829933, "contribution": 1.829933}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 427.223719, "rank": 399, "max_rank": 150, "log_prob": -0.769119, "contribution": -0.769119}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 428.227074, "rank": 66, "max_rank": 150, "log_prob": 1.871075, "contribution": 1.871075}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 312.165845, "rank": 31, "max_rank": 150, "log_prob": 3.080516, "contribution": 3.080516}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 313.169200, "rank": 143, "max_rank": 150, "log_prob": 0.907243, "contribution": 0.907243}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 175.096899, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 176.100254, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495} + ] + }, + { + "scan": 23272, + "peptide": "FLVENELSGKGWYENKIK", + "charge": 2, + "rust_rank_score": 30, + "ions": [ + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 148.081804, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 130.071240, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2007.018362, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2008.021717, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2009.021602, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 261.138672, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 243.128108, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1893.961494, "rank": 167, "max_rank": 150, "log_prob": 0.907424, "contribution": 0.907424}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1894.964849, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1895.964734, "rank": 429, "max_rank": 150, "log_prob": 1.444459, "contribution": 1.444459}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 360.188494, "rank": 26, "max_rank": 150, "log_prob": 2.813584, "contribution": 2.813584}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 342.177930, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1794.911671, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1795.915026, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1796.914912, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 489.253414, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 471.242850, "rank": 494, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1665.846751, "rank": 68, "max_rank": 150, "log_prob": 3.443344, "contribution": 3.443344}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1666.850106, "rank": 135, "max_rank": 150, "log_prob": 2.353661, "contribution": 2.353661}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1667.849992, "rank": 497, "max_rank": 150, "log_prob": 1.444459, "contribution": 1.444459}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 603.310786, "rank": 571, "max_rank": 150, "log_prob": 0.025038, "contribution": 0.025038}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 585.300222, "rank": 269, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1551.789380, "rank": 1, "max_rank": 150, "log_prob": 7.562068, "contribution": 7.562068}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1552.792735, "rank": 2, "max_rank": 150, "log_prob": 7.331127, "contribution": 7.331127}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1553.792621, "rank": 4, "max_rank": 150, "log_prob": 3.921490, "contribution": 3.921490}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 732.375706, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 714.365142, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1422.724460, "rank": 138, "max_rank": 150, "log_prob": 1.878267, "contribution": 1.878267}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1423.727815, "rank": 188, "max_rank": 150, "log_prob": 0.842178, "contribution": 0.842178}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1424.727700, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 845.432574, "rank": 613, "max_rank": 150, "log_prob": 0.025038, "contribution": 0.025038}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 827.422010, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1309.667592, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1310.670947, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1311.670832, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 932.476357, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 914.465793, "rank": 585, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1222.623809, "rank": 556, "max_rank": 150, "log_prob": 0.907424, "contribution": 0.907424}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1223.627164, "rank": 61, "max_rank": 150, "log_prob": 3.623470, "contribution": 3.623470}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1224.627049, "rank": 264, "max_rank": 150, "log_prob": 1.444459, "contribution": 1.444459}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 989.505043, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 971.494479, "rank": 245, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1165.595123, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1166.598478, "rank": 362, "max_rank": 150, "log_prob": 0.842178, "contribution": 0.842178}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1167.598363, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1037.530706, "rank": 70, "max_rank": 150, "log_prob": 1.486912, "contribution": 1.486912}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1038.534061, "rank": 78, "max_rank": 150, "log_prob": 1.631469, "contribution": 1.631469}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 980.502020, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 981.505375, "rank": 596, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 794.408415, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 795.411770, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 631.326384, "rank": 338, "max_rank": 150, "log_prob": -0.769119, "contribution": -0.769119}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 632.329739, "rank": 284, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 502.261464, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 503.264819, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 388.204092, "rank": 367, "max_rank": 150, "log_prob": -0.769119, "contribution": -0.769119}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 389.207447, "rank": 569, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 260.139676, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 261.143031, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 147.082808, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 148.086163, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495} + ] + } +] diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-23272.txt b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-23272.txt new file mode 100644 index 00000000..44d75dba --- /dev/null +++ b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-23272.txt @@ -0,0 +1,120 @@ +DB: 6775 target proteins, 13550 total (target+decoy) +Param: activation=HCD instrument=QExactive mme=Da(0.5) num_segments=2 num_partitions=140 error_scaling_factor=100 max_rank=150 + + --- Sample rank_dist (partition Partition { charge: 2, parent_mass: 1102.5151, seg_num: 1 }) --- + Noise freqs (first 5 ranks): [0.00015125256, 0.00031003382, 0.00034361336, 0.0003256188, 0.00038110753] + Noise freq at max_rank (150): 3.8888485 + Ion Suffix { charge: 1, offset_bits: 1101016201 }: first 5 freqs = [0.0019181586, 0.0038363172, 0.017902814, 0.03537937, 0.042625744] + missing slot (150): 1.0537084 + Ion Suffix { charge: 1, offset_bits: 1100490154 }: first 5 freqs = [0.1943734, 0.26598465, 0.22378516, 0.21867009, 0.20332481] + missing slot (150): 0.57289004 + Ion Suffix { charge: 1, offset_bits: 1073673387 }: first 5 freqs = [0.0012787724, 0.0025575447, 0.0025575447, 0.0029838022, 0.0034100597] + missing slot (150): 2.578005 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=1) = 2.5402 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=5) = 4.7171 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=20) = 4.3780 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=100) = 1.7850 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=150) = 0.4083 + scorer.missing_ion_score = -1.3058 + seg=0: ion_types_for_segment(union) = 9 ion types (prefix=4, suffix=5) + seg=1: ion_types_for_segment(union) = 5 ion types (prefix=0, suffix=5) + Partition counts per (charge, seg): + charge=2 seg=0: 33 partitions + charge=2 seg=1: 33 partitions + charge=3 seg=0: 33 partitions + charge=3 seg=1: 33 partitions + charge=4 seg=0: 4 partitions + charge=4 seg=1: 4 partitions + charge=2 seg=0: per-partition ion-list sizes min=4 median=5 max=7, union=7 + charge=2 seg=1: per-partition ion-list sizes min=3 median=5 max=5, union=5 + +=== Spectrum: scan=23272 precursor_mz=1078.0662 charge=Some(2) peaks=710 === + spectrum partition target=(c=2 pm=2154.12 seg=0) selected=(c=2 pm=2140.06 seg=0): 4 ion types — ["S(c=1,off=19.018)", "P(c=1,off=1.008)", "S(c=1,off=20.022)", "P(c=1,off=-17.003)"] + spectrum partition target=(c=2 pm=2154.12 seg=1) selected=(c=2 pm=2140.06 seg=1): 3 ion types — ["S(c=1,off=19.018)", "S(c=1,off=20.022)", "S(c=1,off=21.022)"] + Rust filtering: 0 of 710 peaks filtered (0.0%); max filtered intensity=0.0 + Filter m/z values (count=3): + 1077.0657 ± 0.5000 + 1078.0662 ± 0.5000 + 1079.0667 ± 0.5000 + +--- Candidate windows --- + charge=2: neutral_mass=2136.1073 nominal_center=2135 window=[2134..=2135] (iso_range=[0..=1], tol_da_left=0.0107, tol_da_right=0.0107) +Yield (chunk): 1 spectra in, 0 skipped by min_peaks, 1868 candidates visited, 149 PSMs pushed, 1 spectra with non-empty queue +GF diagnostics (cumulative): 2 bin attempts, 0 EmptyScoreRange, 0 SinkUnreachable, 0 of those recovered by unthresholded retry, 0 spectra with no successful bin + +--- Rust top-7 PSMs --- + #1: peptide=FLVENELSGKGWYENKIK charge=2 score=30.00 spec_e_val=3.9369e-5 iso_off=1 prot_idx=7087 prot=XXX_sp|P10964|RPA1_YEAST is_decoy=true + #2: peptide=IICKSESSLKQWMSSIIK charge=2 score=24.00 spec_e_val=1.9773e-4 iso_off=1 prot_idx=1207 prot=sp|P36126|SPO14_YEAST is_decoy=false + #3: peptide=QFILEIDKEKMIQEAFR charge=2 score=17.00 spec_e_val=4.1987e-4 iso_off=1 prot_idx=8925 prot=XXX_sp|P53599|SSK2_YEAST is_decoy=true + #4: peptide=EINSWFAKAYARVEELTK charge=2 score=14.00 spec_e_val=8.5797e-4 iso_off=0 prot_idx=8050 prot=XXX_sp|P38144|ISW1_YEAST is_decoy=true + #5: peptide=LKHYNGYDINYISKIGEK charge=2 score=11.00 spec_e_val=1.2080e-3 iso_off=0 prot_idx=221 prot=sp|P09547|SWI1_YEAST is_decoy=false + #6: peptide=NAHARAPESLLTGCNRFLK charge=2 score=11.00 spec_e_val=1.0194e-3 iso_off=0 prot_idx=2401 prot=sp|Q03195|RLI1_YEAST is_decoy=false + #7: peptide=TLKFNLNYPNPMNFLRR charge=2 score=10.00 spec_e_val=1.2080e-3 iso_off=1 prot_idx=635 prot=sp|P24869|CG22_YEAST is_decoy=false + +--- Java top-1 trace: K.LLYTIPTGQNPTGTSIADHR.K --- + Enumerator: 2 matches for residue sequence + cand_idx=841793 prot_idx=2048 prot=sp|P53090|ARO8_YEAST is_decoy=false pep_mass=2154.1069 nominal=2135 + cand_idx=841938 prot_idx=2048 prot=sp|P53090|ARO8_YEAST is_decoy=false pep_mass=2154.1069 nominal=2135 + In Rust's top-7 queue: 0 + + Per-split node_score breakdown — Java pep (K.LLYTIPTGQNPTGTSIADHR.K+2) --- + spectrum_parent_mass=2154.1178, peptide_mass=2154.1069, peptide_nominal=2135 + split=1 aa[0]=L pref_nom=113 suf_nom=2022 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + ions: P1.0@114.1=MISS=-0.61 | P-17.0@96.1=MISS=-0.26 | S19.0@2042.0=MISS=-0.57 | S20.0@2043.0=MISS=-0.50 | S21.0@2044.0=MISS=-0.22 + split=2 aa[1]=L pref_nom=226 suf_nom=1909 score=1 (matched=1 sum=2.43, missing=4 sum=-1.58) + split=3 aa[2]=Y pref_nom=389 suf_nom=1746 score=5 (matched=4 sum=5.19, missing=1 sum=-0.22) + split=4 aa[3]=T pref_nom=490 suf_nom=1645 score=11 (matched=5 sum=11.48, missing=0 sum=0.00) + ions: P1.0@491.3=rk37=2.37 | P-17.0@473.2=rk18=2.28 | S19.0@1664.8=rk204=0.91 | S20.0@1665.8=rk68=3.45 | S21.0@1666.8=rk135=2.47 + split=5 aa[4]=I pref_nom=603 suf_nom=1532 score=23 (matched=5 sum=23.50, missing=0 sum=0.00) + split=6 aa[5]=P pref_nom=700 suf_nom=1435 score=7 (matched=5 sum=7.05, missing=0 sum=0.00) + split=7 aa[6]=T pref_nom=801 suf_nom=1334 score=11 (matched=4 sum=11.70, missing=1 sum=-0.61) + split=8 aa[7]=G pref_nom=858 suf_nom=1277 score=7 (matched=4 sum=7.29, missing=1 sum=-0.61) + split=9 aa[8]=Q pref_nom=986 suf_nom=1149 score=18 (matched=5 sum=17.88, missing=0 sum=0.00) + split=10 aa[9]=N pref_nom=1100 suf_nom=1035 score=7 (matched=2 sum=7.29, missing=0 sum=0.00) + split=11 aa[10]=P pref_nom=1197 suf_nom=938 score=1 (matched=2 sum=0.91, missing=0 sum=0.00) + split=12 aa[11]=T pref_nom=1298 suf_nom=837 score=5 (matched=2 sum=4.56, missing=0 sum=0.00) + split=13 aa[12]=G pref_nom=1355 suf_nom=780 score=2 (matched=2 sum=2.29, missing=0 sum=0.00) + split=14 aa[13]=T pref_nom=1456 suf_nom=679 score=3 (matched=2 sum=3.18, missing=0 sum=0.00) + split=15 aa[14]=S pref_nom=1543 suf_nom=592 score=1 (matched=2 sum=0.56, missing=0 sum=0.00) + split=16 aa[15]=I pref_nom=1656 suf_nom=479 score=4 (matched=2 sum=4.38, missing=0 sum=0.00) + split=17 aa[16]=A pref_nom=1727 suf_nom=408 score=1 (matched=2 sum=1.10, missing=0 sum=0.00) + split=18 aa[17]=D pref_nom=1842 suf_nom=293 score=4 (matched=2 sum=3.99, missing=0 sum=0.00) + split=19 aa[18]=H pref_nom=1979 suf_nom=156 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + breakdown_total = 107 + score_psm total = 107 + + Per-split node_score breakdown — Rust top-1 (FLVENELSGKGWYENKIK +2) --- + spectrum_parent_mass=2154.1178, peptide_mass=2153.1157, peptide_nominal=2134 + split=1 aa[0]=F pref_nom=147 suf_nom=1987 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + ions: P1.0@148.1=MISS=-0.61 | P-17.0@130.1=MISS=-0.26 | S19.0@2007.0=MISS=-0.57 | S20.0@2008.0=MISS=-0.50 | S21.0@2009.0=MISS=-0.22 + split=2 aa[1]=L pref_nom=260 suf_nom=1874 score=1 (matched=2 sum=2.35, missing=3 sum=-1.36) + split=3 aa[2]=V pref_nom=359 suf_nom=1775 score=1 (matched=1 sum=2.81, missing=4 sum=-1.54) + split=4 aa[3]=E pref_nom=488 suf_nom=1646 score=7 (matched=4 sum=7.84, missing=1 sum=-0.61) + ions: P1.0@489.3=MISS=-0.61 | P-17.0@471.2=rk494=0.60 | S19.0@1665.8=rk68=3.44 | S20.0@1666.9=rk135=2.35 | S21.0@1667.8=rk497=1.44 + split=5 aa[4]=N pref_nom=602 suf_nom=1532 score=19 (matched=5 sum=19.44, missing=0 sum=0.00) + split=6 aa[5]=E pref_nom=731 suf_nom=1403 score=2 (matched=2 sum=2.72, missing=3 sum=-1.08) + split=7 aa[6]=L pref_nom=844 suf_nom=1290 score=-2 (matched=1 sum=0.03, missing=4 sum=-1.54) + split=8 aa[7]=S pref_nom=931 suf_nom=1203 score=6 (matched=4 sum=6.57, missing=1 sum=-0.61) + split=9 aa[8]=G pref_nom=988 suf_nom=1146 score=0 (matched=2 sum=1.44, missing=3 sum=-1.40) + split=10 aa[9]=K pref_nom=1116 suf_nom=1018 score=3 (matched=2 sum=3.12, missing=0 sum=0.00) + split=11 aa[10]=G pref_nom=1173 suf_nom=961 score=-1 (matched=1 sum=0.37, missing=1 sum=-1.63) + split=12 aa[11]=W pref_nom=1359 suf_nom=775 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + split=13 aa[12]=Y pref_nom=1522 suf_nom=612 score=0 (matched=2 sum=-0.40, missing=0 sum=0.00) + split=14 aa[13]=E pref_nom=1651 suf_nom=483 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + split=15 aa[14]=N pref_nom=1765 suf_nom=369 score=0 (matched=2 sum=-0.40, missing=0 sum=0.00) + split=16 aa[15]=K pref_nom=1893 suf_nom=241 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + split=17 aa[16]=I pref_nom=2006 suf_nom=128 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + breakdown_total = 26 + PSM.score (from queue) = 30 + +--- Spectrum top-10 peaks by intensity --- + rank=1 mz=1551.7100 intensity=49499.516 + rank=2 mz=1552.7151 intensity=35557.02 + rank=3 mz=776.6049 intensity=17152.627 + rank=4 mz=1553.7172 intensity=12496.791 + rank=5 mz=1069.2351 intensity=8383.694 + rank=6 mz=1054.5336 intensity=8342.993 + rank=7 mz=1026.0876 intensity=7723.549 + rank=8 mz=586.3372 intensity=6293.8276 + rank=9 mz=1055.5867 intensity=6147.2876 + rank=10 mz=1534.6989 intensity=5968.509 diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-34685.json b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-34685.json new file mode 100644 index 00000000..2fe663ec --- /dev/null +++ b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-34685.json @@ -0,0 +1,163 @@ +[ + { + "scan": 34685, + "peptide": "R.DPANLPWGSSNVDIAIDSTGVFK.E", + "charge": 2, + "rust_rank_score": 119, + "ions": [ + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 116.065700, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 98.055136, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2288.159777, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2289.163132, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2290.163018, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 213.114515, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 195.103951, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2191.110961, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2192.114316, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2193.114202, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 284.150247, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 266.139683, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2120.075230, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2121.078585, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2122.078470, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 398.207618, "rank": 72, "max_rank": 150, "log_prob": 1.589841, "contribution": 1.589841}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 380.197054, "rank": 380, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2006.017859, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2007.021214, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2008.021099, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 511.264486, "rank": 9, "max_rank": 150, "log_prob": 3.484472, "contribution": 3.484472}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 493.253922, "rank": 609, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1892.960991, "rank": 1, "max_rank": 150, "log_prob": 7.562068, "contribution": 7.562068}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1893.964346, "rank": 2, "max_rank": 150, "log_prob": 7.331127, "contribution": 7.331127}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1894.964231, "rank": 8, "max_rank": 150, "log_prob": 4.449402, "contribution": 4.449402}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 608.313302, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 590.302738, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1795.912175, "rank": 577, "max_rank": 150, "log_prob": 0.907424, "contribution": 0.907424}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1796.915530, "rank": 127, "max_rank": 150, "log_prob": 2.117405, "contribution": 2.117405}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1797.915415, "rank": 157, "max_rank": 150, "log_prob": 1.444459, "contribution": 1.444459}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 794.406908, "rank": 49, "max_rank": 150, "log_prob": 2.157042, "contribution": 2.157042}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 776.396344, "rank": 623, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1609.818569, "rank": 16, "max_rank": 150, "log_prob": 5.904145, "contribution": 5.904145}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1610.821924, "rank": 21, "max_rank": 150, "log_prob": 5.526967, "contribution": 5.526967}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1611.821809, "rank": 42, "max_rank": 150, "log_prob": 3.872826, "contribution": 3.872826}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 851.435593, "rank": 147, "max_rank": 150, "log_prob": 0.411093, "contribution": 0.411093}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 833.425029, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1552.789883, "rank": 45, "max_rank": 150, "log_prob": 4.136743, "contribution": 4.136743}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1553.793238, "rank": 59, "max_rank": 150, "log_prob": 3.571839, "contribution": 3.571839}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1554.793124, "rank": 120, "max_rank": 150, "log_prob": 2.693816, "contribution": 2.693816}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 938.479377, "rank": 10, "max_rank": 150, "log_prob": 3.463889, "contribution": 3.463889}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 920.468813, "rank": 115, "max_rank": 150, "log_prob": 0.899556, "contribution": 0.899556}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1465.746100, "rank": 18, "max_rank": 150, "log_prob": 5.731728, "contribution": 5.731728}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1466.749455, "rank": 12, "max_rank": 150, "log_prob": 6.046242, "contribution": 6.046242}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1467.749340, "rank": 13, "max_rank": 150, "log_prob": 4.373122, "contribution": 4.373122}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 1025.523160, "rank": 398, "max_rank": 150, "log_prob": 0.025038, "contribution": 0.025038}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 1007.512596, "rank": 277, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1378.702317, "rank": 51, "max_rank": 150, "log_prob": 3.890340, "contribution": 3.890340}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1379.705672, "rank": 53, "max_rank": 150, "log_prob": 3.723832, "contribution": 3.723832}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1380.705557, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 1139.580531, "rank": 232, "max_rank": 150, "log_prob": 0.025038, "contribution": 0.025038}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 1121.569967, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1264.644945, "rank": 68, "max_rank": 150, "log_prob": 3.443344, "contribution": 3.443344}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1265.648300, "rank": 104, "max_rank": 150, "log_prob": 2.689284, "contribution": 2.689284}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1266.648186, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1165.595123, "rank": 6, "max_rank": 150, "log_prob": 4.506861, "contribution": 4.506861}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1166.598478, "rank": 22, "max_rank": 150, "log_prob": 2.566939, "contribution": 2.566939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1050.537248, "rank": 17, "max_rank": 150, "log_prob": 3.972204, "contribution": 3.972204}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1051.540603, "rank": 84, "max_rank": 150, "log_prob": 1.581525, "contribution": 1.581525}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 937.480380, "rank": 5, "max_rank": 150, "log_prob": 4.534738, "contribution": 4.534738}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 938.483735, "rank": 10, "max_rank": 150, "log_prob": 2.759931, "contribution": 2.759931}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 866.444649, "rank": 25, "max_rank": 150, "log_prob": 3.450944, "contribution": 3.450944}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 867.448004, "rank": 44, "max_rank": 150, "log_prob": 2.258470, "contribution": 2.258470}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 753.387781, "rank": 11, "max_rank": 150, "log_prob": 4.261968, "contribution": 4.261968}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 754.391136, "rank": 61, "max_rank": 150, "log_prob": 1.937820, "contribution": 1.937820}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 638.329907, "rank": 26, "max_rank": 150, "log_prob": 3.413566, "contribution": 3.413566}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 639.333262, "rank": 37, "max_rank": 150, "log_prob": 2.397189, "contribution": 2.397189}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 551.286123, "rank": 122, "max_rank": 150, "log_prob": 0.291422, "contribution": 0.291422}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 552.289478, "rank": 256, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 450.235294, "rank": 173, "max_rank": 150, "log_prob": -0.769119, "contribution": -0.769119}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 451.238649, "rank": 622, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 393.206609, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 394.209964, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 294.156786, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 295.160141, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 147.082808, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 148.086163, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495} + ] + }, + { + "scan": 34685, + "peptide": "PDPLSELSDFYMFQKLPTFK", + "charge": 2, + "rust_rank_score": 33, + "ions": [ + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 98.056641, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 80.046077, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2306.168836, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2307.172191, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2308.172076, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 213.114515, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 195.103951, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2191.110961, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2192.114316, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2193.114202, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 310.163331, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 292.152767, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2094.062145, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2095.065500, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2096.065386, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 423.220199, "rank": 530, "max_rank": 150, "log_prob": 0.025038, "contribution": 0.025038}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 405.209635, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1981.005277, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1982.008632, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1983.008518, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 510.263983, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 492.253419, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1893.961494, "rank": 2, "max_rank": 150, "log_prob": 7.601154, "contribution": 7.601154}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1894.964849, "rank": 8, "max_rank": 150, "log_prob": 6.079219, "contribution": 6.079219}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1895.964734, "rank": 31, "max_rank": 150, "log_prob": 4.187145, "contribution": 4.187145}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 639.328903, "rank": 37, "max_rank": 150, "log_prob": 2.369332, "contribution": 2.369332}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 621.318339, "rank": 469, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1764.896574, "rank": 591, "max_rank": 150, "log_prob": 0.907424, "contribution": 0.907424}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1765.899929, "rank": 125, "max_rank": 150, "log_prob": 2.340827, "contribution": 2.340827}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1766.899814, "rank": 81, "max_rank": 150, "log_prob": 3.296003, "contribution": 3.296003}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 752.385771, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 734.375207, "rank": 207, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1651.839706, "rank": 35, "max_rank": 150, "log_prob": 4.525133, "contribution": 4.525133}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1652.843061, "rank": 230, "max_rank": 150, "log_prob": 0.842178, "contribution": 0.842178}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1653.842946, "rank": 334, "max_rank": 150, "log_prob": 1.444459, "contribution": 1.444459}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 839.429554, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 821.418990, "rank": 524, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1564.795922, "rank": 621, "max_rank": 150, "log_prob": 0.907424, "contribution": 0.907424}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1565.799277, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1566.799163, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 954.487429, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 936.476865, "rank": 220, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1449.738048, "rank": 57, "max_rank": 150, "log_prob": 3.653145, "contribution": 3.653145}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1450.741403, "rank": 36, "max_rank": 150, "log_prob": 4.571351, "contribution": 4.571351}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1451.741288, "rank": 336, "max_rank": 150, "log_prob": 1.444459, "contribution": 1.444459}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 1101.561407, "rank": 283, "max_rank": 150, "log_prob": 0.025038, "contribution": 0.025038}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 1083.550843, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1302.664069, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1303.667424, "rank": 215, "max_rank": 150, "log_prob": 0.842178, "contribution": 0.842178}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1304.667310, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1139.582038, "rank": 232, "max_rank": 150, "log_prob": -0.769119, "contribution": -0.769119}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1140.585393, "rank": 46, "max_rank": 150, "log_prob": 2.148726, "contribution": 2.148726}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1008.516112, "rank": 162, "max_rank": 150, "log_prob": -0.769119, "contribution": -0.769119}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1009.519467, "rank": 408, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 861.442133, "rank": 196, "max_rank": 150, "log_prob": -0.769119, "contribution": -0.769119}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 862.445488, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 733.377716, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 734.381071, "rank": 207, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 605.313299, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 606.316654, "rank": 227, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 492.256431, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 493.259786, "rank": 609, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 395.207615, "rank": 366, "max_rank": 150, "log_prob": -0.769119, "contribution": -0.769119}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 396.210970, "rank": 178, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 294.156786, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 295.160141, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 147.082808, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 148.086163, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495} + ] + } +] diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-34685.txt b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-34685.txt new file mode 100644 index 00000000..6b2c3a32 --- /dev/null +++ b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-34685.txt @@ -0,0 +1,128 @@ +DB: 6775 target proteins, 13550 total (target+decoy) +Param: activation=HCD instrument=QExactive mme=Da(0.5) num_segments=2 num_partitions=140 error_scaling_factor=100 max_rank=150 + + --- Sample rank_dist (partition Partition { charge: 2, parent_mass: 1271.5724, seg_num: 1 }) --- + Noise freqs (first 5 ranks): [0.0002698114, 0.00026833755, 0.00029238392, 0.0003125347, 0.00034821813] + Noise freq at max_rank (150): 4.626928 + Ion Suffix { charge: 1, offset_bits: 1101016201 }: first 5 freqs = [0.0025575447, 0.0076726344, 0.02173913, 0.032395568, 0.04177323] + missing slot (150): 1.3682865 + Ion Suffix { charge: 1, offset_bits: 1065418864 }: first 5 freqs = [0.0012787724, 0.00042625747, 0.00025575448, 0.00036536352, 0.00025575448] + missing slot (150): 3.4245524 + Ion Suffix { charge: 1, offset_bits: 1100490154 }: first 5 freqs = [0.18286446, 0.22378516, 0.21611254, 0.22463769, 0.21824382] + missing slot (150): 0.85549873 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=1) = 2.2491 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=5) = 4.7872 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=20) = 4.7155 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=100) = 2.0069 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101016201 }, rank=150) = 0.3651 + scorer.missing_ion_score = -1.2183 + seg=0: ion_types_for_segment(union) = 9 ion types (prefix=4, suffix=5) + seg=1: ion_types_for_segment(union) = 5 ion types (prefix=0, suffix=5) + Partition counts per (charge, seg): + charge=2 seg=0: 33 partitions + charge=2 seg=1: 33 partitions + charge=3 seg=0: 33 partitions + charge=3 seg=1: 33 partitions + charge=4 seg=0: 4 partitions + charge=4 seg=1: 4 partitions + charge=2 seg=0: per-partition ion-list sizes min=4 median=5 max=7, union=7 + charge=2 seg=1: per-partition ion-list sizes min=3 median=5 max=5, union=5 + +=== Spectrum: scan=34685 precursor_mz=1202.101 charge=Some(2) peaks=626 === + spectrum partition target=(c=2 pm=2402.19 seg=0) selected=(c=2 pm=2140.06 seg=0): 4 ion types — ["S(c=1,off=19.018)", "P(c=1,off=1.008)", "S(c=1,off=20.022)", "P(c=1,off=-17.003)"] + spectrum partition target=(c=2 pm=2402.19 seg=1) selected=(c=2 pm=2140.06 seg=1): 3 ion types — ["S(c=1,off=19.018)", "S(c=1,off=20.022)", "S(c=1,off=21.022)"] + Rust filtering: 1 of 626 peaks filtered (0.2%); max filtered intensity=2509.4 + Filter m/z values (count=3): + 1201.1005 ± 0.5000 + 1202.1010 ± 0.5000 + 1203.1015 ± 0.5000 + First 5 filtered peaks: + mz=1203.5573 intensity=2509.4 + +--- Candidate windows --- + charge=2: neutral_mass=2384.1769 nominal_center=2383 window=[2382..=2383] (iso_range=[0..=1], tol_da_left=0.0119, tol_da_right=0.0119) +Yield (chunk): 1 spectra in, 0 skipped by min_peaks, 1973 candidates visited, 175 PSMs pushed, 1 spectra with non-empty queue +GF diagnostics (cumulative): 2 bin attempts, 0 EmptyScoreRange, 0 SinkUnreachable, 0 of those recovered by unthresholded retry, 0 spectra with no successful bin + +--- Rust top-8 PSMs --- + #1: peptide=PDPLSELSDFYMFQKLPTFK charge=2 score=33.00 spec_e_val=4.4921e-5 iso_off=0 prot_idx=7356 prot=XXX_sp|P22515|UBA1_YEAST is_decoy=true + #2: peptide=KFDSLDVVSDKNVDMATFLMK charge=2 score=32.00 spec_e_val=1.7128e-5 iso_off=0 prot_idx=9851 prot=XXX_sp|D6W196|CMC1_YEAST is_decoy=true + #3: peptide=GKTQHDSLADESISQSSSIKQR charge=2 score=31.00 spec_e_val=3.8340e-5 iso_off=1 prot_idx=2034 prot=sp|P53048|MAL11_YEAST is_decoy=false + #4: peptide=TQYDWIKITLDDSATIMYPK charge=2 score=27.00 spec_e_val=8.3918e-5 iso_off=1 prot_idx=8537 prot=XXX_sp|P41811|COPB2_YEAST is_decoy=true + #5: peptide=KYQKGEETSTNSIASIFAWSR charge=2 score=17.00 spec_e_val=1.7972e-4 iso_off=0 prot_idx=553 prot=sp|P21954|IDHP_YEAST is_decoy=false + #6: peptide=DPTLRVSPSESTDLSYRTSYK charge=2 score=15.00 spec_e_val=6.6458e-4 iso_off=1 prot_idx=7912 prot=XXX_sp|P35201|CENPC_YEAST is_decoy=true + #7: peptide=PSMEHLLELEADELGELVHNK charge=2 score=13.00 spec_e_val=8.7718e-4 iso_off=0 prot_idx=2134 prot=sp|P53327|SLH1_YEAST is_decoy=false + #8: peptide=SLHKVDLFFLNYEGAQSFMR charge=2 score=13.00 spec_e_val=5.0096e-4 iso_off=1 prot_idx=4970 prot=sp|Q08646|SSP2_YEAST is_decoy=false + +--- Java top-1 trace: R.DPANLPWGSSNVDIAIDSTGVFK.E --- + Enumerator: 2 matches for residue sequence + cand_idx=6441 prot_idx=22 prot=sp|P00359|G3P3_YEAST is_decoy=false pep_mass=2402.1754 nominal=2383 + cand_idx=6566 prot_idx=22 prot=sp|P00359|G3P3_YEAST is_decoy=false pep_mass=2402.1754 nominal=2383 + In Rust's top-8 queue: 0 + + Per-split node_score breakdown — Java pep (R.DPANLPWGSSNVDIAIDSTGVFK.E+2) --- + spectrum_parent_mass=2402.1874, peptide_mass=2402.1754, peptide_nominal=2383 + split=1 aa[0]=D pref_nom=115 suf_nom=2268 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + ions: P1.0@116.1=MISS=-0.61 | P-17.0@98.1=MISS=-0.26 | S19.0@2288.2=MISS=-0.57 | S20.0@2289.2=MISS=-0.50 | S21.0@2290.2=MISS=-0.22 + split=2 aa[1]=P pref_nom=212 suf_nom=2171 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + split=3 aa[2]=A pref_nom=283 suf_nom=2100 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + split=4 aa[3]=N pref_nom=397 suf_nom=1986 score=1 (matched=2 sum=2.19, missing=3 sum=-1.29) + ions: P1.0@398.2=rk72=1.59 | P-17.0@380.2=rk380=0.60 | S19.0@2006.0=MISS=-0.57 | S20.0@2007.0=MISS=-0.50 | S21.0@2008.0=MISS=-0.22 + split=5 aa[4]=L pref_nom=510 suf_nom=1873 score=23 (matched=5 sum=23.42, missing=0 sum=0.00) + split=6 aa[5]=P pref_nom=607 suf_nom=1776 score=4 (matched=3 sum=4.47, missing=2 sum=-0.87) + split=7 aa[6]=W pref_nom=793 suf_nom=1590 score=18 (matched=5 sum=18.06, missing=0 sum=0.00) + split=8 aa[7]=G pref_nom=850 suf_nom=1533 score=11 (matched=4 sum=10.81, missing=1 sum=-0.26) + split=9 aa[8]=S pref_nom=937 suf_nom=1446 score=21 (matched=5 sum=20.51, missing=0 sum=0.00) + split=10 aa[9]=S pref_nom=1024 suf_nom=1359 score=8 (matched=4 sum=8.24, missing=1 sum=-0.22) + split=11 aa[10]=N pref_nom=1138 suf_nom=1245 score=6 (matched=3 sum=6.16, missing=2 sum=-0.47) + split=12 aa[11]=V pref_nom=1237 suf_nom=1146 score=7 (matched=2 sum=7.07, missing=0 sum=0.00) + split=13 aa[12]=D pref_nom=1352 suf_nom=1031 score=6 (matched=2 sum=5.55, missing=0 sum=0.00) + split=14 aa[13]=I pref_nom=1465 suf_nom=918 score=7 (matched=2 sum=7.29, missing=0 sum=0.00) + split=15 aa[14]=A pref_nom=1536 suf_nom=847 score=6 (matched=2 sum=5.71, missing=0 sum=0.00) + split=16 aa[15]=I pref_nom=1649 suf_nom=734 score=6 (matched=2 sum=6.20, missing=0 sum=0.00) + split=17 aa[16]=D pref_nom=1764 suf_nom=619 score=6 (matched=2 sum=5.81, missing=0 sum=0.00) + split=18 aa[17]=S pref_nom=1851 suf_nom=532 score=1 (matched=2 sum=0.66, missing=0 sum=0.00) + split=19 aa[18]=T pref_nom=1952 suf_nom=431 score=0 (matched=2 sum=-0.40, missing=0 sum=0.00) + split=20 aa[19]=G pref_nom=2009 suf_nom=374 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + split=21 aa[20]=V pref_nom=2108 suf_nom=275 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + split=22 aa[21]=F pref_nom=2255 suf_nom=128 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + breakdown_total = 119 + score_psm total = 119 + + Per-split node_score breakdown — Rust top-1 (PDPLSELSDFYMFQKLPTFK +2) --- + spectrum_parent_mass=2402.1874, peptide_mass=2402.1868, peptide_nominal=2383 + split=1 aa[0]=P pref_nom=97 suf_nom=2286 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + ions: P1.0@98.1=MISS=-0.61 | P-17.0@80.0=MISS=-0.26 | S19.0@2306.2=MISS=-0.57 | S20.0@2307.2=MISS=-0.50 | S21.0@2308.2=MISS=-0.22 + split=2 aa[1]=D pref_nom=212 suf_nom=2171 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + split=3 aa[2]=P pref_nom=309 suf_nom=2074 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + split=4 aa[3]=L pref_nom=422 suf_nom=1961 score=-2 (matched=1 sum=0.03, missing=4 sum=-1.54) + ions: P1.0@423.2=rk530=0.03 | P-17.0@405.2=MISS=-0.26 | S19.0@1981.0=MISS=-0.57 | S20.0@1982.0=MISS=-0.50 | S21.0@1983.0=MISS=-0.22 + split=5 aa[4]=S pref_nom=509 suf_nom=1874 score=17 (matched=3 sum=17.87, missing=2 sum=-0.87) + split=6 aa[5]=E pref_nom=638 suf_nom=1745 score=10 (matched=5 sum=9.51, missing=0 sum=0.00) + split=7 aa[6]=L pref_nom=751 suf_nom=1632 score=7 (matched=4 sum=7.41, missing=1 sum=-0.61) + split=8 aa[7]=S pref_nom=838 suf_nom=1545 score=0 (matched=2 sum=1.51, missing=3 sum=-1.32) + split=9 aa[8]=D pref_nom=953 suf_nom=1430 score=10 (matched=4 sum=10.27, missing=1 sum=-0.61) + split=10 aa[9]=F pref_nom=1100 suf_nom=1283 score=0 (matched=2 sum=0.87, missing=3 sum=-1.05) + split=11 aa[10]=Y pref_nom=1263 suf_nom=1120 score=1 (matched=2 sum=1.38, missing=0 sum=0.00) + split=12 aa[11]=M pref_nom=1394 suf_nom=989 score=0 (matched=2 sum=-0.40, missing=0 sum=0.00) + split=13 aa[12]=F pref_nom=1541 suf_nom=842 score=-1 (matched=1 sum=-0.77, missing=1 sum=-0.52) + split=14 aa[13]=Q pref_nom=1669 suf_nom=714 score=-1 (matched=1 sum=0.37, missing=1 sum=-1.63) + split=15 aa[14]=K pref_nom=1797 suf_nom=586 score=-1 (matched=1 sum=0.37, missing=1 sum=-1.63) + split=16 aa[15]=L pref_nom=1910 suf_nom=473 score=-1 (matched=1 sum=0.37, missing=1 sum=-1.63) + split=17 aa[16]=P pref_nom=2007 suf_nom=376 score=0 (matched=2 sum=-0.40, missing=0 sum=0.00) + split=18 aa[17]=T pref_nom=2108 suf_nom=275 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + split=19 aa[18]=F pref_nom=2255 suf_nom=128 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + breakdown_total = 29 + PSM.score (from queue) = 33 + +--- Spectrum top-10 peaks by intensity --- + rank=1 mz=1893.0624 intensity=159810.55 + rank=2 mz=1893.9667 intensity=119504.484 + rank=3 mz=947.3132 intensity=102851.734 + rank=4 mz=1185.5748 intensity=88080.61 + rank=5 mz=937.5807 intensity=75238.3 + rank=6 mz=1165.5762 intensity=65895.79 + rank=7 mz=1186.6443 intensity=65725.72 + rank=8 mz=1894.9336 intensity=64256.99 + rank=9 mz=511.2563 intensity=61057.945 + rank=10 mz=938.5923 intensity=49005.22 diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-41522.json b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-41522.json new file mode 100644 index 00000000..3d88f1c3 --- /dev/null +++ b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-41522.json @@ -0,0 +1,163 @@ +[ + { + "scan": 41522, + "peptide": "R.DPANLPWASLNIDIAIDSTGVFK.E", + "charge": 2, + "rust_rank_score": 128, + "ions": [ + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 116.065700, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 98.055136, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2342.186953, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2343.190308, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2344.190193, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 213.114515, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 195.103951, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2245.138137, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2246.141492, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2247.141377, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 284.150247, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 266.139683, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2174.102406, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2175.105761, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2176.105646, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 398.207618, "rank": 47, "max_rank": 150, "log_prob": 2.134533, "contribution": 2.134533}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 380.197054, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2060.045034, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2061.048389, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2062.048275, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 511.264486, "rank": 14, "max_rank": 150, "log_prob": 3.330039, "contribution": 3.330039}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 493.253922, "rank": 181, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1946.988166, "rank": 2, "max_rank": 150, "log_prob": 7.601154, "contribution": 7.601154}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1947.991521, "rank": 1, "max_rank": 150, "log_prob": 4.865191, "contribution": 4.865191}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1948.991407, "rank": 7, "max_rank": 150, "log_prob": 4.275384, "contribution": 4.275384}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 608.313302, "rank": 236, "max_rank": 150, "log_prob": 0.025038, "contribution": 0.025038}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 590.302738, "rank": 207, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1849.939350, "rank": 137, "max_rank": 150, "log_prob": 1.898406, "contribution": 1.898406}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1850.942706, "rank": 362, "max_rank": 150, "log_prob": 0.842178, "contribution": 0.842178}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1851.942591, "rank": 379, "max_rank": 150, "log_prob": 1.444459, "contribution": 1.444459}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 794.406908, "rank": 77, "max_rank": 150, "log_prob": 1.478586, "contribution": 1.478586}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 776.396344, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1663.845745, "rank": 21, "max_rank": 150, "log_prob": 5.375355, "contribution": 5.375355}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1664.849100, "rank": 18, "max_rank": 150, "log_prob": 5.803406, "contribution": 5.803406}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1665.848985, "rank": 33, "max_rank": 150, "log_prob": 4.177983, "contribution": 4.177983}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 865.442639, "rank": 43, "max_rank": 150, "log_prob": 2.286619, "contribution": 2.286619}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 847.432075, "rank": 145, "max_rank": 150, "log_prob": 0.703687, "contribution": 0.703687}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1592.810014, "rank": 8, "max_rank": 150, "log_prob": 6.485645, "contribution": 6.485645}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1593.813369, "rank": 3, "max_rank": 150, "log_prob": 6.046163, "contribution": 6.046163}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1594.813254, "rank": 12, "max_rank": 150, "log_prob": 4.375459, "contribution": 4.375459}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 952.486422, "rank": 67, "max_rank": 150, "log_prob": 1.744043, "contribution": 1.744043}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 934.475858, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1505.766230, "rank": 11, "max_rank": 150, "log_prob": 6.248171, "contribution": 6.248171}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1506.769585, "rank": 16, "max_rank": 150, "log_prob": 5.870307, "contribution": 5.870307}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1507.769471, "rank": 93, "max_rank": 150, "log_prob": 2.866239, "contribution": 2.866239}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 1065.543290, "rank": 37, "max_rank": 150, "log_prob": 2.369332, "contribution": 2.369332}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 1047.532726, "rank": 52, "max_rank": 150, "log_prob": 1.774167, "contribution": 1.774167}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1392.709362, "rank": 10, "max_rank": 150, "log_prob": 6.294260, "contribution": 6.294260}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1393.712717, "rank": 6, "max_rank": 150, "log_prob": 6.047585, "contribution": 6.047585}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1394.712603, "rank": 111, "max_rank": 150, "log_prob": 2.890018, "contribution": 2.890018}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 1179.600661, "rank": 86, "max_rank": 150, "log_prob": 1.254936, "contribution": 1.254936}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 1161.590097, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1278.651991, "rank": 40, "max_rank": 150, "log_prob": 4.320503, "contribution": 4.320503}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1279.655346, "rank": 48, "max_rank": 150, "log_prob": 4.155147, "contribution": 4.155147}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1280.655231, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1165.595123, "rank": 9, "max_rank": 150, "log_prob": 4.326320, "contribution": 4.326320}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1166.598478, "rank": 13, "max_rank": 150, "log_prob": 2.738661, "contribution": 2.738661}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1050.537248, "rank": 60, "max_rank": 150, "log_prob": 1.918315, "contribution": 1.918315}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1051.540603, "rank": 20, "max_rank": 150, "log_prob": 2.561659, "contribution": 2.561659}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 937.480380, "rank": 5, "max_rank": 150, "log_prob": 4.534738, "contribution": 4.534738}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 938.483735, "rank": 17, "max_rank": 150, "log_prob": 2.566937, "contribution": 2.566937}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 866.444649, "rank": 30, "max_rank": 150, "log_prob": 3.147025, "contribution": 3.147025}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 867.448004, "rank": 29, "max_rank": 150, "log_prob": 2.543553, "contribution": 2.543553}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 753.387781, "rank": 19, "max_rank": 150, "log_prob": 3.838439, "contribution": 3.838439}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 754.391136, "rank": 68, "max_rank": 150, "log_prob": 1.851956, "contribution": 1.851956}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 638.329907, "rank": 81, "max_rank": 150, "log_prob": 1.245601, "contribution": 1.245601}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 639.333262, "rank": 340, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 551.286123, "rank": 172, "max_rank": 150, "log_prob": -0.769119, "contribution": -0.769119}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 552.289478, "rank": 450, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 450.235294, "rank": 359, "max_rank": 150, "log_prob": -0.769119, "contribution": -0.769119}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 451.238649, "rank": 366, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 393.206609, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 394.209964, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 294.156786, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 295.160141, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 147.082808, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 148.086163, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495} + ] + }, + { + "scan": 41522, + "peptide": "VVYGNIYEIEIDRLFLTDQR", + "charge": 2, + "rust_rank_score": 11, + "ions": [ + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 100.057647, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 82.047083, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2357.194502, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2358.197857, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2359.197742, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 199.107470, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 181.096906, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2258.144679, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2259.148034, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2260.147920, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 362.189501, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 344.178937, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2095.062648, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2096.066003, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2097.065889, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 419.218186, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 401.207622, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 2038.033963, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 2039.037318, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 2040.037203, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 533.275558, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 515.264994, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1923.976591, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1924.979947, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1925.979832, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 646.332426, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 628.321862, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1810.919723, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1811.923078, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1812.922964, "rank": 176, "max_rank": 150, "log_prob": 1.444459, "contribution": 1.444459}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 809.414456, "rank": null, "max_rank": 150, "log_prob": -0.608788, "contribution": -0.608788}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 791.403892, "rank": 213, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1647.837693, "rank": 125, "max_rank": 150, "log_prob": 1.822033, "contribution": 1.822033}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1648.841048, "rank": 477, "max_rank": 150, "log_prob": 0.842178, "contribution": 0.842178}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1649.840933, "rank": null, "max_rank": 150, "log_prob": -0.216228, "contribution": -0.216228}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 938.479377, "rank": 17, "max_rank": 150, "log_prob": 3.182559, "contribution": 3.182559}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 920.468813, "rank": null, "max_rank": 150, "log_prob": -0.258250, "contribution": -0.258250}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1518.772773, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1519.776128, "rank": null, "max_rank": 150, "log_prob": -0.495335, "contribution": -0.495335}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1520.776013, "rank": 39, "max_rank": 150, "log_prob": 3.930601, "contribution": 3.930601}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 1051.536245, "rank": 20, "max_rank": 150, "log_prob": 3.045456, "contribution": 3.045456}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 1033.525681, "rank": 31, "max_rank": 150, "log_prob": 2.013470, "contribution": 2.013470}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1405.715904, "rank": null, "max_rank": 150, "log_prob": -0.573939, "contribution": -0.573939}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1406.719260, "rank": 393, "max_rank": 150, "log_prob": 0.842178, "contribution": 0.842178}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1407.719145, "rank": 226, "max_rank": 150, "log_prob": 1.444459, "contribution": 1.444459}, + {"ion_type": "Prefix { charge: 1, offset_bits: 1065418857 }", "theo_mz": 1180.601165, "rank": 240, "max_rank": 150, "log_prob": 0.025038, "contribution": 0.025038}, + {"ion_type": "Prefix { charge: 1, offset_bits: 3246917020 }", "theo_mz": 1162.590601, "rank": 217, "max_rank": 150, "log_prob": 0.597922, "contribution": 0.597922}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1276.650984, "rank": 140, "max_rank": 150, "log_prob": 1.705855, "contribution": 1.705855}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1277.654339, "rank": 26, "max_rank": 150, "log_prob": 5.139410, "contribution": 5.139410}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101540429 }", "theo_mz": 1278.654225, "rank": 40, "max_rank": 150, "log_prob": 3.861590, "contribution": 3.861590}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1163.594116, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1164.597471, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 1048.536242, "rank": 23, "max_rank": 150, "log_prob": 3.557648, "contribution": 3.557648}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 1049.539597, "rank": 258, "max_rank": 150, "log_prob": 0.367485, "contribution": 0.367485}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 892.457734, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 893.461089, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 779.400866, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 780.404221, "rank": 62, "max_rank": 150, "log_prob": 1.899350, "contribution": 1.899350}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 632.326887, "rank": 253, "max_rank": 150, "log_prob": -0.769119, "contribution": -0.769119}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 633.330242, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 519.270019, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 520.273374, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 418.219190, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 419.222545, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 303.161316, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 304.164671, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1100490154 }", "theo_mz": 175.096899, "rank": null, "max_rank": 150, "log_prob": -1.627809, "contribution": -1.627809}, + {"ion_type": "Suffix { charge: 1, offset_bits: 1101016201 }", "theo_mz": 176.100254, "rank": null, "max_rank": 150, "log_prob": -0.517495, "contribution": -0.517495} + ] + } +] diff --git a/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-41522.txt b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-41522.txt new file mode 100644 index 00000000..3f9296d1 --- /dev/null +++ b/docs/parity-analysis/notes/score-psm-trace-artifacts/rust-trace-scan-41522.txt @@ -0,0 +1,127 @@ +DB: 6775 target proteins, 13550 total (target+decoy) +Param: activation=HCD instrument=QExactive mme=Da(0.5) num_segments=2 num_partitions=140 error_scaling_factor=100 max_rank=150 + + --- Sample rank_dist (partition Partition { charge: 3, parent_mass: 1271.5972, seg_num: 1 }) --- + Noise freqs (first 5 ranks): [0.00013211217, 0.00022799712, 0.00018483217, 0.0003007183, 0.0003754308] + Noise freq at max_rank (150): 4.840471 + Ion Suffix { charge: 1, offset_bits: 1101540429 }: first 5 freqs = [0.00039840638, 0.00039840638, 0.00039840638, 0.00066401064, 0.0013280213] + missing slot (150): 3.884462 + Ion Suffix { charge: 1, offset_bits: 1100490154 }: first 5 freqs = [0.03187251, 0.043824703, 0.077689245, 0.067065075, 0.057768926] + missing slot (150): 2.2091634 + Ion Suffix { charge: 1, offset_bits: 1073673387 }: first 5 freqs = [0.0013280213, 0.0013280213, 0.003984064, 0.0013280213, 0.00066401064] + missing slot (150): 3.7948208 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101540429 }, rank=1) = 1.1038 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101540429 }, rank=5) = 1.2634 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101540429 }, rank=20) = 0.7959 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101540429 }, rank=100) = 2.3338 + scorer.node_score(Suffix { charge: 1, offset_bits: 1101540429 }, rank=150) = 1.8764 + scorer.missing_ion_score = -0.2200 + seg=0: ion_types_for_segment(union) = 9 ion types (prefix=4, suffix=5) + seg=1: ion_types_for_segment(union) = 5 ion types (prefix=0, suffix=5) + Partition counts per (charge, seg): + charge=2 seg=0: 33 partitions + charge=2 seg=1: 33 partitions + charge=3 seg=0: 33 partitions + charge=3 seg=1: 33 partitions + charge=4 seg=0: 4 partitions + charge=4 seg=1: 4 partitions + charge=2 seg=0: per-partition ion-list sizes min=4 median=5 max=7, union=7 + charge=2 seg=1: per-partition ion-list sizes min=3 median=5 max=5, union=5 + +=== Spectrum: scan=41522 precursor_mz=1229.1428 charge=Some(2) peaks=489 === + spectrum partition target=(c=2 pm=2456.27 seg=0) selected=(c=2 pm=2140.06 seg=0): 4 ion types — ["S(c=1,off=19.018)", "P(c=1,off=1.008)", "S(c=1,off=20.022)", "P(c=1,off=-17.003)"] + spectrum partition target=(c=2 pm=2456.27 seg=1) selected=(c=2 pm=2140.06 seg=1): 3 ion types — ["S(c=1,off=19.018)", "S(c=1,off=20.022)", "S(c=1,off=21.022)"] + Rust filtering: 1 of 489 peaks filtered (0.2%); max filtered intensity=303.5 + Filter m/z values (count=3): + 1228.1423 ± 0.5000 + 1229.1428 ± 0.5000 + 1230.1433 ± 0.5000 + First 5 filtered peaks: + mz=1229.5671 intensity=303.5 + +--- Candidate windows --- + charge=2: neutral_mass=2438.2605 nominal_center=2437 window=[2436..=2437] (iso_range=[0..=1], tol_da_left=0.0122, tol_da_right=0.0122) +Yield (chunk): 1 spectra in, 0 skipped by min_peaks, 1633 candidates visited, 174 PSMs pushed, 1 spectra with non-empty queue +GF diagnostics (cumulative): 2 bin attempts, 0 EmptyScoreRange, 0 SinkUnreachable, 0 of those recovered by unthresholded retry, 0 spectra with no successful bin + +--- Rust top-7 PSMs --- + #1: peptide=VVYGNIYEIEIDRLFLTDQR charge=2 score=11.00 spec_e_val=6.0906e-4 iso_off=1 prot_idx=11343 prot=XXX_sp|P38787|PANE_YEAST is_decoy=true + #2: peptide=GVVQKLRAFETFLAMYPEWR charge=2 score=10.00 spec_e_val=3.5603e-4 iso_off=0 prot_idx=837 prot=sp|P31688|TPS2_YEAST is_decoy=false + #3: peptide=LSSYLTAKDSGNLSHDINLVPGR charge=2 score=7.00 spec_e_val=1.0202e-3 iso_off=0 prot_idx=11306 prot=XXX_sp|P38187|UBP13_YEAST is_decoy=true + #4: peptide=LEPGTAIGAIGAQSIGEPGTQMTLK charge=2 score=6.00 spec_e_val=1.3098e-3 iso_off=1 prot_idx=76 prot=sp|P04051|RPC1_YEAST is_decoy=false + #5: peptide=MSPTGNYLNAITNRRTIYNLK charge=2 score=5.00 spec_e_val=1.4810e-3 iso_off=1 prot_idx=3428 prot=sp|P37261|FRM2_YEAST is_decoy=false + #6: peptide=YGDFEILVSRVGQSMEVIGITK charge=2 score=5.00 spec_e_val=1.0202e-3 iso_off=0 prot_idx=7699 prot=XXX_sp|P32528|DUR1_YEAST is_decoy=true + #7: peptide=GDLAQILQLTRYFAGSADKFDK charge=2 score=4.00 spec_e_val=4.6686e-4 iso_off=0 prot_idx=3777 prot=sp|P47771|ALDH2_YEAST is_decoy=false + +--- Java top-1 trace: R.DPANLPWASLNIDIAIDSTGVFK.E --- + Enumerator: 2 matches for residue sequence + cand_idx=6178 prot_idx=21 prot=sp|P00358|G3P2_YEAST is_decoy=false pep_mass=2456.2587 nominal=2437 + cand_idx=6308 prot_idx=21 prot=sp|P00358|G3P2_YEAST is_decoy=false pep_mass=2456.2587 nominal=2437 + In Rust's top-7 queue: 0 + + Per-split node_score breakdown — Java pep (R.DPANLPWASLNIDIAIDSTGVFK.E+2) --- + spectrum_parent_mass=2456.2710, peptide_mass=2456.2587, peptide_nominal=2437 + split=1 aa[0]=D pref_nom=115 suf_nom=2322 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + ions: P1.0@116.1=MISS=-0.61 | P-17.0@98.1=MISS=-0.26 | S19.0@2342.2=MISS=-0.57 | S20.0@2343.2=MISS=-0.50 | S21.0@2344.2=MISS=-0.22 + split=2 aa[1]=P pref_nom=212 suf_nom=2225 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + split=3 aa[2]=A pref_nom=283 suf_nom=2154 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + split=4 aa[3]=N pref_nom=397 suf_nom=2040 score=1 (matched=1 sum=2.13, missing=4 sum=-1.54) + ions: P1.0@398.2=rk47=2.13 | P-17.0@380.2=MISS=-0.26 | S19.0@2060.0=MISS=-0.57 | S20.0@2061.0=MISS=-0.50 | S21.0@2062.0=MISS=-0.22 + split=5 aa[4]=L pref_nom=510 suf_nom=1927 score=21 (matched=5 sum=20.67, missing=0 sum=0.00) + split=6 aa[5]=P pref_nom=607 suf_nom=1830 score=5 (matched=5 sum=4.81, missing=0 sum=0.00) + split=7 aa[6]=W pref_nom=793 suf_nom=1644 score=17 (matched=4 sum=16.84, missing=1 sum=-0.26) + split=8 aa[7]=A pref_nom=864 suf_nom=1573 score=20 (matched=5 sum=19.90, missing=0 sum=0.00) + split=9 aa[8]=S pref_nom=951 suf_nom=1486 score=16 (matched=4 sum=16.73, missing=1 sum=-0.26) + split=10 aa[9]=L pref_nom=1064 suf_nom=1373 score=19 (matched=5 sum=19.38, missing=0 sum=0.00) + split=11 aa[10]=N pref_nom=1178 suf_nom=1259 score=9 (matched=3 sum=9.73, missing=2 sum=-0.47) + split=12 aa[11]=I pref_nom=1291 suf_nom=1146 score=7 (matched=2 sum=7.06, missing=0 sum=0.00) + split=13 aa[12]=D pref_nom=1406 suf_nom=1031 score=4 (matched=2 sum=4.48, missing=0 sum=0.00) + split=14 aa[13]=I pref_nom=1519 suf_nom=918 score=7 (matched=2 sum=7.10, missing=0 sum=0.00) + split=15 aa[14]=A pref_nom=1590 suf_nom=847 score=6 (matched=2 sum=5.69, missing=0 sum=0.00) + split=16 aa[15]=I pref_nom=1703 suf_nom=734 score=6 (matched=2 sum=5.69, missing=0 sum=0.00) + split=17 aa[16]=D pref_nom=1818 suf_nom=619 score=2 (matched=2 sum=1.61, missing=0 sum=0.00) + split=18 aa[17]=S pref_nom=1905 suf_nom=532 score=0 (matched=2 sum=-0.40, missing=0 sum=0.00) + split=19 aa[18]=T pref_nom=2006 suf_nom=431 score=0 (matched=2 sum=-0.40, missing=0 sum=0.00) + split=20 aa[19]=G pref_nom=2063 suf_nom=374 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + split=21 aa[20]=V pref_nom=2162 suf_nom=275 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + split=22 aa[21]=F pref_nom=2309 suf_nom=128 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + breakdown_total = 128 + score_psm total = 128 + + Per-split node_score breakdown — Rust top-1 (VVYGNIYEIEIDRLFLTDQR +2) --- + spectrum_parent_mass=2456.2710, peptide_mass=2455.2747, peptide_nominal=2436 + split=1 aa[0]=V pref_nom=99 suf_nom=2337 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + ions: P1.0@100.1=MISS=-0.61 | P-17.0@82.0=MISS=-0.26 | S19.0@2357.2=MISS=-0.57 | S20.0@2358.2=MISS=-0.50 | S21.0@2359.2=MISS=-0.22 + split=2 aa[1]=V pref_nom=198 suf_nom=2238 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + split=3 aa[2]=Y pref_nom=361 suf_nom=2075 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + split=4 aa[3]=G pref_nom=418 suf_nom=2018 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + ions: P1.0@419.2=MISS=-0.61 | P-17.0@401.2=MISS=-0.26 | S19.0@2038.0=MISS=-0.57 | S20.0@2039.0=MISS=-0.50 | S21.0@2040.0=MISS=-0.22 + split=5 aa[4]=N pref_nom=532 suf_nom=1904 score=-2 (matched=0 sum=0.00, missing=5 sum=-2.15) + split=6 aa[5]=I pref_nom=645 suf_nom=1791 score=0 (matched=1 sum=1.44, missing=4 sum=-1.94) + split=7 aa[6]=Y pref_nom=808 suf_nom=1628 score=2 (matched=3 sum=3.26, missing=2 sum=-0.83) + split=8 aa[7]=E pref_nom=937 suf_nom=1499 score=6 (matched=2 sum=7.11, missing=3 sum=-1.33) + split=9 aa[8]=I pref_nom=1050 suf_nom=1386 score=7 (matched=4 sum=7.35, missing=1 sum=-0.57) + split=10 aa[9]=E pref_nom=1179 suf_nom=1257 score=11 (matched=5 sum=11.33, missing=0 sum=0.00) + split=11 aa[10]=I pref_nom=1292 suf_nom=1144 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + split=12 aa[11]=D pref_nom=1407 suf_nom=1029 score=4 (matched=2 sum=3.93, missing=0 sum=0.00) + split=13 aa[12]=R pref_nom=1563 suf_nom=873 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + split=14 aa[13]=L pref_nom=1676 suf_nom=760 score=0 (matched=1 sum=1.90, missing=1 sum=-1.63) + split=15 aa[14]=F pref_nom=1823 suf_nom=613 score=-1 (matched=1 sum=-0.77, missing=1 sum=-0.52) + split=16 aa[15]=L pref_nom=1936 suf_nom=500 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + split=17 aa[16]=T pref_nom=2037 suf_nom=399 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + split=18 aa[17]=D pref_nom=2152 suf_nom=284 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + split=19 aa[18]=Q pref_nom=2280 suf_nom=156 score=-2 (matched=0 sum=0.00, missing=2 sum=-2.15) + breakdown_total = 7 + PSM.score (from queue) = 11 + +--- Spectrum top-10 peaks by intensity --- + rank=1 mz=1948.0565 intensity=1466.5482 + rank=2 mz=1947.0988 intensity=913.67004 + rank=3 mz=1593.9252 intensity=698.60815 + rank=4 mz=974.6600 intensity=678.02356 + rank=5 mz=937.5471 intensity=670.54736 + rank=6 mz=1393.7548 intensity=659.17926 + rank=7 mz=1949.0800 intensity=648.49646 + rank=8 mz=1592.9198 intensity=642.4185 + rank=9 mz=1165.4860 intensity=633.5541 + rank=10 mz=1392.6272 intensity=591.4806 diff --git a/docs/superpowers/plans/2026-05-26-i5-score-psm-trace-plan.md b/docs/superpowers/plans/2026-05-26-i5-score-psm-trace-plan.md new file mode 100644 index 00000000..458f7651 --- /dev/null +++ b/docs/superpowers/plans/2026-05-26-i5-score-psm-trace-plan.md @@ -0,0 +1,1102 @@ +# I5 score_psm trace investigation Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Identify the dominant root cause of the Rust↔Java per-PSM scoring divergence (Rust ~14 vs Java ~38 RawScore on the same spectrum+peptide) for 5 known label-flip PSMs on PXD001819, by capturing structured per-ion traces on both sides and diffing them. Output: written analysis + proposed fix design for the next PR. + +**Architecture:** Three small artifacts: (a) extend `msgf-trace` with `--trace-json` for per-PSM per-ion JSON output, (b) instrument java-legacy on the bench VM with `System.err.println` traces, (c) Python diff harness that aligns the two outputs and emits side-by-side rows. No production code changes; CI bit-identical regression gate passes trivially. + +**Tech Stack:** Rust 2024 edition pinned to 1.87.0; JSON output written manually via `write!` (no new serde dep); Java instrumentation against `java-legacy @ 65120118` built with Maven on bench VM (`pride-linux-vm`); Python 3 stdlib for the diff harness. + +**Spec:** `docs/superpowers/specs/2026-05-26-i5-score-psm-trace-design.md` + +--- + +## File map + +**Created in this PR:** +- `crates/msgf-rust/src/bin/msgf-trace.rs` — extended (existing 729 LOC; add `--trace-json` flag + per-ion JSON output writer) +- `benchmark/ci/diff_score_psm_traces.py` — Python diff harness +- `docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md` — analysis doc (allowlisted in `.gitignore`) +- `docs/parity-analysis/notes/score-psm-trace-artifacts/` — directory with the 5-PSM Rust JSON traces + Java trace logs + diff outputs (small, ~tens of kB) +- `.gitignore` — allowlist entries for the new note + artifacts dir + +**Out-of-repo (bench VM only):** +- `/srv/data/msgf-bench/java-legacy-trace/` — fresh clone of `java-legacy` branch with instrumentation patch +- `/srv/data/msgf-bench/java-legacy-trace/target/MSGFPlus-trace.jar` — built instrumented JAR + +--- + +## The 5 label-flip PSMs (from 2026-05-20 finding) + +Per project memory, the 2026-05-20 investigation found 5 scans on PXD001819 where Rust and Java disagree on top-1 peptide. The flagship example is **scan 21** where Rust scores Java-favored peptide `R.NEEQSR.D` at 14 vs Java's RawScore 38. + +The exact 5 scan IDs are documented in the 2026-05-20 doc (local-only at the time, may need re-derivation): + +```bash +# To re-derive on bench VM if the original list is unavailable: +ssh root@pride-linux-vm 'cd /srv/data/msgf-bench/bench-pr-v1-s1b-results && \ + python3 /srv/data/msgf-bench/diff_top1.py \ + pxd001819-java.pin pxd001819-rust-off.pin | head -20' +``` + +A small re-derivation script (5 scans of the largest |Java RawScore − Rust top-1 RawScore| where both agree on the peptide candidate enumeration) can be added if the 2026-05-20 list is missing. For this plan, assume the scans are available; document the actual scan IDs in the analysis doc. + +--- + +## Pre-flight (run before Task 1) + +```bash +cd /Users/yperez/work/msgfplus-workspace/astral-speed +git branch --show-current +# Expect: feat/i5-score-psm-trace + +git log origin/dev..HEAD --oneline | wc -l +# Expect: 1 (the spec commit f943aa7e) + +git status --short +# Expect: empty (clean tree) + +cargo build --release -p msgf-rust --bin msgf-trace 2>&1 | tail -3 +# Expect: Finished release profile + +cargo test --release --workspace -- \ + --skip charge_missing_spectrum_uses_per_charge_scored_spec \ + --skip spectrum_without_charge_tries_charge_range \ + --skip known_peptide_appears_in_top_n \ + --skip read_bsa_canno_text_format \ + --skip read_tryp_pig_bov_revcat_csarr_cnlcp \ + --skip tryp_pig_bov_revcat_full_set_loads \ + --skip match_spectra_output_invariant_across_thread_counts 2>&1 | grep -E "^test result" | grep -vE "0 passed.*0 failed.*0 ignored" | tail -5 +# Expect: all 0 failed. +``` + +If pre-flight fails, STOP and investigate. + +--- + +## Task 1: Extend `msgf-trace` with `--trace-json` output + +**Goal:** Add a flag that, when set, writes per-PSM per-ion structured JSON to a file alongside the existing human-readable stderr trace. + +**Files:** +- Modify: `crates/msgf-rust/src/bin/msgf-trace.rs` + +- [ ] **Step 1: Add the CLI flag** + +Open `crates/msgf-rust/src/bin/msgf-trace.rs`. Find the `struct Cli` definition (around line 30). After the existing `--java-top1` field, add: + +```rust + /// Output structured per-PSM per-ion JSON to this path (additive; the + /// existing human-readable stderr trace is unaffected). + #[arg(long)] + trace_json: Option, +``` + +- [ ] **Step 2: Locate the per-split breakdown loop** + +In the same file, find where the per-split / per-ion breakdown is computed for the top-1 PSM (and the optional `--java-top1` peptide). Look for the loop that calls `directional_node_score_inner` or `partition_ion_logs` or `nearest_peak_rank` — that's the data source for the JSON. + +```bash +grep -nE "partition_ion_logs|nearest_peak_rank|directional_node_score|partition_for" crates/msgf-rust/src/bin/msgf-trace.rs | head -20 +``` + +Identify the line ranges where the per-ion data is produced. + +- [ ] **Step 3: Add a JSON-writer module to msgf-trace.rs** + +Near the top of the file (after imports, before the `Cli` struct), add: + +```rust +// ─── Per-PSM JSON trace output (additive; no new deps) ───────────────────── +// +// Hand-written JSON via `write!` macros: small output (~5-10 KB per PSM), +// no serde dependency, and the diff harness parses on the Python side +// where stdlib json is sufficient. + +use std::io::Write as _; + +struct TraceJson { + out: W, + first_psm: bool, +} + +impl TraceJson { + fn new(mut out: W) -> std::io::Result { + out.write_all(b"[\n")?; + Ok(Self { out, first_psm: true }) + } + + fn begin_psm( + &mut self, + scan: i32, + peptide: &str, + charge: u8, + rust_rank_score: i32, + ) -> std::io::Result<()> { + if !self.first_psm { + self.out.write_all(b",\n")?; + } + self.first_psm = false; + write!( + self.out, + " {{\n \"scan\": {},\n \"peptide\": \"{}\",\n \"charge\": {},\n \"rust_rank_score\": {},\n \"ions\": [", + scan, escape_json(peptide), charge, rust_rank_score + ) + } + + fn end_psm(&mut self) -> std::io::Result<()> { + self.out.write_all(b"\n ]\n }") + } + + fn ion( + &mut self, + first_ion: bool, + ion_type: &str, + theo_mz: f64, + rank_assigned: Option, + max_rank: u32, + log_prob: f32, + contribution: f32, + ) -> std::io::Result<()> { + if !first_ion { + self.out.write_all(b",")?; + } + let rank_str = rank_assigned + .map(|r| r.to_string()) + .unwrap_or_else(|| "null".to_string()); + write!( + self.out, + "\n {{\"ion_type\": \"{}\", \"theo_mz\": {:.6}, \"rank\": {}, \"max_rank\": {}, \"log_prob\": {:.6}, \"contribution\": {:.6}}}", + escape_json(ion_type), theo_mz, rank_str, max_rank, log_prob, contribution + ) + } + + fn finish(mut self) -> std::io::Result<()> { + self.out.write_all(b"\n]\n") + } +} + +fn escape_json(s: &str) -> String { + s.replace('\\', "\\\\") + .replace('"', "\\\"") + .replace('\n', "\\n") + .replace('\t', "\\t") +} +``` + +- [ ] **Step 4: Wire the JSON writer into the per-split breakdown loop** + +In `fn main()`, after parsing the CLI, before the per-split-breakdown loop, add: + +```rust + let mut trace_json: Option> = match cli.trace_json { + Some(ref path) => { + let file = File::create(path).map_err(|e| { + eprintln!("Failed to create --trace-json output {}: {}", path.display(), e); + e + })?; + Some(TraceJson::new(std::io::BufWriter::new(file))?) + } + None => None, + }; +``` + +Then INSIDE the per-PSM per-split-breakdown loop where the human-readable stderr is already being emitted, add parallel JSON emissions: + +```rust + // Inside the loop where you iterate over `(rust top-1, optional java_top1)`: + if let Some(ref mut tj) = trace_json { + tj.begin_psm(cli.scan, &peptide_label, charge, rust_rank_score as i32)?; + let mut first_ion = true; + for seg in 0..num_segs { + let partition = param.partition_for(charge, parent_mass, seg); + let ion_logs = scorer.partition_ion_logs(&partition); + for (ion, logs) in ion_logs { + let theo_mz = ion.mz(nominal_mass); // adjust to whatever drives the inner loop + let tol_da = param.mme.as_da(theo_mz); + let rank = ss.nearest_peak_rank(theo_mz, tol_da); + let max_rank = scorer.max_rank(); + let (log_prob, contribution) = match rank { + Some(r) => { + let idx = (r.min(max_rank).max(1) as usize) - 1; + let lp = if idx < logs.len() { logs[idx] } else { 0.0 }; + (lp, lp) + } + None => { + // No peak: missed-ion slot is logs[max_rank as usize] if present. + let lp = logs.get(max_rank as usize).copied().unwrap_or(0.0); + (lp, lp) + } + }; + tj.ion( + first_ion, + &format!("{:?}", ion), + theo_mz, + rank, + max_rank, + log_prob, + contribution, + )?; + first_ion = false; + } + } + tj.end_psm()?; + } +``` + +The exact details of where this slots into the existing 729-line file depend on the current structure. **Step 4a:** before writing the loop body, READ the existing `main()` function and figure out: +- Where is `peptide_label` available (the peptide being scored)? +- Where is `parent_mass` computed? +- Where is `num_segs` (`param.num_segments`)? +- Where is `nominal_mass` derived per inner iteration? + +Use those bindings in your insertion. If the existing code uses different field names, adapt. + +- [ ] **Step 5: Close the JSON document at end of main** + +At the bottom of `main()`, just before the final `ExitCode::SUCCESS` return: + +```rust + if let Some(tj) = trace_json { + tj.finish()?; + } +``` + +- [ ] **Step 6: Build + smoke test** + +```bash +cd /Users/yperez/work/msgfplus-workspace/astral-speed +cargo build --release -p msgf-rust --bin msgf-trace 2>&1 | tail -3 +# Expect: Finished + +./target/release/msgf-trace --help 2>&1 | grep -A 1 "trace-json" +# Expect: --trace-json line with description +``` + +- [ ] **Step 7: Functional smoke test (local fixture)** + +```bash +# Use a small in-tree fixture so we don't depend on bench VM data. +./target/release/msgf-trace \ + --spectrum test-fixtures/test.mgf \ + --database test-fixtures/BSA.fasta \ + --param resources/ionstat/HCD_QExactive_Tryp.param \ + --scan 1 \ + --trace-json /tmp/smoke-trace.json 2>&1 | tail -5 + +# Validate JSON parses: +python3 -c "import json; j=json.load(open('/tmp/smoke-trace.json')); print(f'PSMs: {len(j)}, first ions: {len(j[0][\"ions\"])}' if j else 'empty')" +# Expect: at least one PSM with at least one ion record, JSON parses cleanly. +``` + +- [ ] **Step 8: Workspace tests + clippy** + +```bash +cargo test --release --workspace -- \ + --skip charge_missing_spectrum_uses_per_charge_scored_spec \ + --skip spectrum_without_charge_tries_charge_range \ + --skip known_peptide_appears_in_top_n \ + --skip read_bsa_canno_text_format \ + --skip read_tryp_pig_bov_revcat_csarr_cnlcp \ + --skip tryp_pig_bov_revcat_full_set_loads \ + --skip match_spectra_output_invariant_across_thread_counts 2>&1 | grep -E "^test result" | grep -vE "0 passed.*0 failed.*0 ignored" | tail -5 + +cargo clippy --workspace --all-targets 2>&1 | tail -3 +``` + +Both must pass. `msgf-trace` is a diagnostic binary so any new code there doesn't affect production correctness. + +- [ ] **Step 9: Commit** + +```bash +git add crates/msgf-rust/src/bin/msgf-trace.rs +git commit -m "$(cat <<'COMMIT_EOF' +feat(msgf-trace): per-PSM per-ion JSON output via --trace-json + +Adds a structured output mode to the diagnostic trace binary so its +per-split breakdown can be diffed against Java's instrumentation +output. JSON is written by hand (no new serde dep) since the volume +is small (~5-10 KB per PSM). The existing human-readable stderr +output is unaffected. + +No production code change; msgf-trace is a separate binary from +msgf-rust. +COMMIT_EOF +)" +``` + +--- + +## Task 2: Python diff harness + +**Goal:** Take a Rust trace JSON file + a Java trace log file, produce a side-by-side per-ion comparison. + +**Files:** +- Create: `benchmark/ci/diff_score_psm_traces.py` + +- [ ] **Step 1: Create the script** + +```bash +mkdir -p benchmark/ci +``` + +Create `benchmark/ci/diff_score_psm_traces.py` with: + +```python +#!/usr/bin/env python3 +""" +Diff per-PSM per-ion trace outputs from Rust (msgf-trace --trace-json) and +Java (instrumented java-legacy stderr). For each (scan, peptide) PSM, align +records by (ion_kind, theoretical mz tolerance 1e-3 Da) and emit a side-by-side +table. + +Usage: + diff_score_psm_traces.py --rust rust-trace.json --java java-trace.log \\ + [--mz-tol 1e-3] [--scan SCAN] [--peptide PEP] + +Outputs to stdout. Exit code 0 = success. + +Rust JSON shape (per PSM): + { + "scan": int, + "peptide": str, + "charge": int, + "rust_rank_score": int, + "ions": [ + {"ion_type": str, "theo_mz": float, "rank": int|null, + "max_rank": int, "log_prob": float, "contribution": float}, + ... + ] + } + +Java log shape (one line per ion): + TRACE\\tscan=\\tpeptide=\\tion=\\ttheo_mz=\\trank=\\tlog_prob=\\tcontribution= +""" + +import argparse +import collections +import json +import sys + + +def parse_java_log(path: str) -> dict: + """Returns {(scan, peptide): [{ion fields}, ...]}.""" + out = collections.defaultdict(list) + with open(path) as fh: + for line in fh: + line = line.rstrip("\n") + if not line.startswith("TRACE\t"): + continue + fields = {} + for part in line.split("\t")[1:]: + if "=" not in part: + continue + k, v = part.split("=", 1) + fields[k] = v + try: + scan = int(fields["scan"]) + peptide = fields["peptide"] + ion = { + "ion_type": fields.get("ion", "?"), + "theo_mz": float(fields.get("theo_mz", "nan")), + "rank": int(fields["rank"]) if fields.get("rank", "") not in ("", "-1", "null") else None, + "log_prob": float(fields.get("log_prob", "nan")), + "contribution": float(fields.get("contribution", "nan")), + } + except (KeyError, ValueError) as e: + print(f"WARN: skipping malformed Java TRACE line: {line[:80]}... ({e})", file=sys.stderr) + continue + out[(scan, peptide)].append(ion) + return out + + +def parse_rust_json(path: str) -> dict: + """Returns {(scan, peptide): [{ion fields}, ...]}.""" + out = {} + with open(path) as fh: + data = json.load(fh) + for psm in data: + key = (psm["scan"], psm["peptide"]) + out[key] = psm["ions"] + return out + + +def normalize_ion_kind(s: str) -> str: + """Map both Rust and Java ion-type representations to a normalized key. + + Rust format example: `Prefix { charge: 1, offset_bits: 0 }` + Java format example: `b/1+ off=0.0` (or whatever Java's TRACE emits) + Normalize to: `b/1+0.0` or `y/1+0.0` or `Noise`. + """ + s = s.strip() + if "Noise" in s: + return "Noise" + # Rust: `Prefix { charge: , offset_bits: }` + if s.startswith("Prefix"): + # extract charge and offset_bits, reconstruct as `b/+` + import re + m = re.search(r"charge:\s*(\d+).*offset_bits:\s*(\d+)", s) + if m: + charge = int(m.group(1)) + off_bits = int(m.group(2)) + # Decode f32::from_bits(u32) — use struct to avoid float imports + import struct + off = struct.unpack(">f", struct.pack(">I", off_bits))[0] + return f"b/{charge}+{off:.5f}" + if s.startswith("Suffix"): + import re, struct + m = re.search(r"charge:\s*(\d+).*offset_bits:\s*(\d+)", s) + if m: + charge = int(m.group(1)) + off_bits = int(m.group(2)) + off = struct.unpack(">f", struct.pack(">I", off_bits))[0] + return f"y/{charge}+{off:.5f}" + # Java format (placeholder; tighten when actual Java TRACE format is known) + return s + + +def align_and_diff(rust_ions: list, java_ions: list, mz_tol: float = 1e-3): + """Yields rows: (key, rust, java, diverge_flags) per matched/unmatched ion.""" + java_by_key = collections.defaultdict(list) + for ion in java_ions: + key = (normalize_ion_kind(ion["ion_type"]), round(ion["theo_mz"] / mz_tol)) + java_by_key[key].append(ion) + + matched_java = set() + for rust_ion in rust_ions: + rust_key = ( + normalize_ion_kind(rust_ion["ion_type"]), + round(rust_ion["theo_mz"] / mz_tol), + ) + candidates = java_by_key.get(rust_key, []) + java_ion = candidates.pop(0) if candidates else None + if java_ion is not None: + matched_java.add(id(java_ion)) + flags = [] + if java_ion is None: + flags.append("RUST_ONLY") + else: + if rust_ion["rank"] != java_ion["rank"]: + flags.append("RANK_DIFF") + if abs(rust_ion["log_prob"] - java_ion["log_prob"]) > 1e-4: + flags.append("LOGPROB_DIFF") + if abs(rust_ion["contribution"] - java_ion["contribution"]) > 1e-4: + flags.append("CONTRIB_DIFF") + yield (rust_key, rust_ion, java_ion, flags) + + # Any remaining Java ions not matched in Rust: + for ion in java_ions: + if id(ion) in matched_java: + continue + key = (normalize_ion_kind(ion["ion_type"]), round(ion["theo_mz"] / mz_tol)) + yield (key, None, ion, ["JAVA_ONLY"]) + + +def format_row(rust_key, rust_ion, java_ion, flags): + def fmt(v, w): + if v is None: + return "-" * w + if isinstance(v, float): + return f"{v:>{w}.4f}" + return f"{str(v):>{w}}" + return " ".join([ + fmt(rust_key[0], 22), + fmt((rust_ion or java_ion)["theo_mz"], 10), + fmt(rust_ion["rank"] if rust_ion else None, 5), + fmt(java_ion["rank"] if java_ion else None, 5), + fmt(rust_ion["log_prob"] if rust_ion else None, 9), + fmt(java_ion["log_prob"] if java_ion else None, 9), + fmt(rust_ion["contribution"] if rust_ion else None, 9), + fmt(java_ion["contribution"] if java_ion else None, 9), + ",".join(flags) if flags else "", + ]) + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("--rust", required=True, help="Rust trace JSON from msgf-trace --trace-json") + ap.add_argument("--java", required=True, help="Java instrumented trace log (TRACE lines)") + ap.add_argument("--mz-tol", type=float, default=1e-3, help="m/z alignment tolerance (Da)") + ap.add_argument("--scan", type=int, default=None, help="Restrict to one scan") + ap.add_argument("--peptide", default=None, help="Restrict to one peptide") + args = ap.parse_args() + + rust = parse_rust_json(args.rust) + java = parse_java_log(args.java) + + all_keys = sorted(set(rust.keys()) | set(java.keys())) + for key in all_keys: + scan, pep = key + if args.scan is not None and scan != args.scan: + continue + if args.peptide is not None and pep != args.peptide: + continue + print(f"\n=== scan={scan} peptide={pep} ===") + rust_ions = rust.get(key, []) + java_ions = java.get(key, []) + if not rust_ions and not java_ions: + print(" (no data on either side)") + continue + print(" ion_type theo_mz R_rk J_rk R_logP J_logP R_ctrb J_ctrb flags") + rust_total = 0.0 + java_total = 0.0 + category_counts = collections.Counter() + for row in align_and_diff(rust_ions, java_ions, args.mz_tol): + print(" " + format_row(*row)) + if row[1] is not None: + rust_total += row[1]["contribution"] + if row[2] is not None: + java_total += row[2]["contribution"] + for f in row[3]: + category_counts[f] += 1 + print(f" TOTAL contribution: rust={rust_total:.4f} java={java_total:.4f} delta={rust_total - java_total:+.4f}") + if category_counts: + print(f" DIVERGENCES: {dict(category_counts)}") + + +if __name__ == "__main__": + main() +``` + +- [ ] **Step 2: Make executable + smoke test** + +```bash +chmod +x benchmark/ci/diff_score_psm_traces.py + +# Synthetic test: create tiny rust + java trace inputs and run +cat > /tmp/rust-smoke.json <<'EOF' +[ + {"scan": 1, "peptide": "K.PEPTIDE.D", "charge": 2, "rust_rank_score": 10, + "ions": [ + {"ion_type": "Prefix { charge: 1, offset_bits: 0 }", "theo_mz": 100.05, "rank": 5, "max_rank": 150, "log_prob": -0.4, "contribution": -0.4}, + {"ion_type": "Suffix { charge: 1, offset_bits: 0 }", "theo_mz": 200.10, "rank": null, "max_rank": 150, "log_prob": -2.1, "contribution": -2.1} + ]} +] +EOF + +cat > /tmp/java-smoke.log <<'EOF' +TRACE scan=1 peptide=K.PEPTIDE.D ion=b/1+0.00000 theo_mz=100.05 rank=4 log_prob=-0.35 contribution=-0.35 +TRACE scan=1 peptide=K.PEPTIDE.D ion=y/1+0.00000 theo_mz=200.10 rank=-1 log_prob=-2.05 contribution=-2.05 +EOF + +python3 benchmark/ci/diff_score_psm_traces.py --rust /tmp/rust-smoke.json --java /tmp/java-smoke.log +# Expect: a table showing rust=5 vs java=4 (RANK_DIFF) + LOGPROB_DIFF + CONTRIB_DIFF +# Total delta: rust=-2.5, java=-2.4, delta=-0.1. +``` + +- [ ] **Step 3: Commit** + +```bash +git add benchmark/ci/diff_score_psm_traces.py +git commit -m "$(cat <<'COMMIT_EOF' +feat(diff-harness): Python diff for Rust vs Java per-PSM ion traces + +Aligns msgf-trace JSON output against java-legacy instrumented TRACE +lines by (ion_kind, theo_mz). Emits side-by-side per-ion rows with +RANK_DIFF / LOGPROB_DIFF / CONTRIB_DIFF flags + per-PSM totals. +stdlib-only; runs on any Python 3 install. +COMMIT_EOF +)" +``` + +--- + +## Task 3: Bench VM Java instrumentation + +**Goal:** Build an instrumented `MSGFPlus-trace.jar` on the bench VM and capture the 5-PSM trace log. + +**Files:** none in this repo (all changes live on the bench VM under `/srv/data/msgf-bench/java-legacy-trace/`). + +- [ ] **Step 1: Verify VM Java toolchain + reactivate VM socket if needed** + +```bash +ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm 'java -version 2>&1 | head -3; mvn -version 2>&1 | head -3' +``` + +Expected: Java 17 (or 11) and Maven 3.x. If missing, install: + +```bash +ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm 'dnf install -y java-17-openjdk-devel maven 2>&1 | tail -5' +``` + +- [ ] **Step 2: Clone java-legacy on VM** + +```bash +ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm 'cd /srv/data/msgf-bench && \ + rm -rf java-legacy-trace && \ + git clone https://github.com/bigbio/msgf-rust.git java-legacy-trace && \ + cd java-legacy-trace && \ + git checkout 65120118 && \ + git log -1 --format="%h %s"' +``` + +If the commit `65120118` isn't reachable (e.g., the java-legacy branch was removed), bisect from the most recent commit on the `java-legacy` or `java-legacy-original` branch. + +- [ ] **Step 3: Apply instrumentation patch on the VM** + +```bash +# Edit DBScanScorer.java to add TRACE prints in the score path. +# Pattern: in the score-summing inner loop, before adding ion contribution to total: +# System.err.println("TRACE\tscan=" + scanNum + "\tpeptide=" + peptideStr + "\tion=" + ionType + "\ttheo_mz=" + theoMz + "\trank=" + rank + "\tlog_prob=" + logProb + "\tcontribution=" + contribution); +``` + +Use `sed` or paste a patch via stdin from the controller side. The exact insertion line depends on java-legacy's code structure. Reference patch shape (the actual lines to add, given by the agent on demand): + +```java +// In DBScanScorer.java, score(...) method, inside the per-ion loop: +double contribution = /* existing per-ion score */; +System.err.println( + "TRACE\tscan=" + scanNum + + "\tpeptide=" + peptideStr + + "\tion=" + ionType.toString() + + "\ttheo_mz=" + theoMz + + "\trank=" + rank + + "\tlog_prob=" + logProb + + "\tcontribution=" + contribution +); +totalScore += contribution; +``` + +Apply via heredoc/scp; commit on the VM-side clone (not pushed): + +```bash +ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm 'cd /srv/data/msgf-bench/java-legacy-trace && \ + # patch applied via Edit on VM-side files; commit: + git add -A && \ + git commit -m "diag: TRACE per-ion prints for I5 investigation" && \ + git log -1 --format="%h %s"' +``` + +Note the SHA — cite it in the analysis doc. + +- [ ] **Step 4: Build instrumented JAR** + +```bash +ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm 'cd /srv/data/msgf-bench/java-legacy-trace && \ + mvn package -DskipTests 2>&1 | tail -10' +# Expect: BUILD SUCCESS; target/MSGFPlus-*.jar exists. +ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm 'ls -la /srv/data/msgf-bench/java-legacy-trace/target/*.jar | head' +``` + +If build fails, capture the error, downgrade to a nearby buildable commit on java-legacy, document the actual SHA used. + +- [ ] **Step 5: Identify the 5 label-flip scans** + +If the 2026-05-20 doc is unavailable, derive from current PR-V1 bench data: + +```bash +ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm 'python3 <; do \ + java -Xmx8192m -jar java-legacy-trace/target/MSGFPlus-*.jar \ + -s data/UPS1_5000amol_R1.mzML \ + -d data/PXD001819_uniprot_yeast_ups.fasta \ + -mod mods.txt \ + -o /tmp/java-trace-$SCAN.mzid \ + -tda 1 -t 5ppm -ti 0,1 -m 0 -inst 0 -e 1 -protocol 0 -ntt 2 \ + -minLength 6 -maxLength 40 -minNumPeaks 10 \ + -minCharge 2 -maxCharge 4 -maxMissedCleavages 2 -n 1 -addFeatures 1 \ + -msLevel 2 -thread 8 \ + 2>/srv/data/msgf-bench/i5-trace-out/java-trace-scan-$SCAN.log; \ + done' +``` + +Note: the instrumented JAR will produce TRACE lines for ALL scans it processes, not just the 5 we care about. The Python diff harness will filter by `--scan`. Alternative: add a scan filter inside the Java instrumentation (e.g., `if (scanNum != TARGET_SCAN) return;`) to keep log volume manageable. + +If log size is unmanageable (>1 GB), add a runtime filter in Java code (a `Set` of target scans, only print TRACE when contained). + +- [ ] **Step 7: Run msgf-rust trace on the same 5 scans** + +```bash +# Make sure msgf-rust binary is up to date with Task 1's commit +ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm 'cd /srv/data/msgf-bench/pr-v1-s1b-build && /root/.cargo/bin/cargo build --release --bin msgf-trace 2>&1 | tail -3' + +# Or: scp updated source from local, rebuild +# (skip if VM build is fresh) + +# Run msgf-trace on each scan with --trace-json +ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm 'cd /srv/data/msgf-bench && \ + for SCAN in <5-scan-ids-here>; do \ + pr-v1-s1b-build/target/release/msgf-trace \ + --spectrum data/UPS1_5000amol_R1.mzML \ + --database data/PXD001819_uniprot_yeast_ups.fasta \ + --param resources/ionstat/HCD_QExactive_Tryp.param \ + --scan $SCAN \ + --java-top1 "" \ + --trace-json /srv/data/msgf-bench/i5-trace-out/rust-trace-scan-$SCAN.json \ + > /srv/data/msgf-bench/i5-trace-out/rust-trace-scan-$SCAN.txt 2>&1; \ + done' +``` + +- [ ] **Step 8: Run the diff harness for each scan** + +```bash +ssh -S /tmp/msgfplus-bench.sock root@pride-linux-vm 'cd /srv/data/msgf-bench && \ + for SCAN in <5-scan-ids-here>; do \ + echo "=== scan $SCAN diff ==="; \ + python3 /srv/data/msgf-bench/diff_score_psm_traces.py \ + --rust /srv/data/msgf-bench/i5-trace-out/rust-trace-scan-$SCAN.json \ + --java /srv/data/msgf-bench/i5-trace-out/java-trace-scan-$SCAN.log \ + --scan $SCAN > /srv/data/msgf-bench/i5-trace-out/diff-scan-$SCAN.txt; \ + tail -5 /srv/data/msgf-bench/i5-trace-out/diff-scan-$SCAN.txt; \ + done' +``` + +(Make sure to scp `benchmark/ci/diff_score_psm_traces.py` to the VM as `/srv/data/msgf-bench/diff_score_psm_traces.py` first, or run from a clone of this branch on the VM.) + +- [ ] **Step 9: Pull artifacts to local** + +```bash +mkdir -p docs/parity-analysis/notes/score-psm-trace-artifacts +scp -o ControlPath=/tmp/msgfplus-bench.sock \ + 'root@pride-linux-vm:/srv/data/msgf-bench/i5-trace-out/*' \ + docs/parity-analysis/notes/score-psm-trace-artifacts/ +ls -la docs/parity-analysis/notes/score-psm-trace-artifacts/ +# Expect: ~15 files (5 rust json + 5 java log + 5 diff txt). Total ~50-500 KB. +``` + +Note: the Java log files may be large. If any exceed 1 MB, filter them down to TRACE lines for the 5 target scans only: + +```bash +for f in docs/parity-analysis/notes/score-psm-trace-artifacts/java-trace-scan-*.log; do + scan=$(basename "$f" .log | sed 's/java-trace-scan-//') + grep "TRACE.*scan=${scan}\b" "$f" > "${f}.filtered" && mv "${f}.filtered" "$f" +done +``` + +- [ ] **Step 10: No commit yet** (artifacts staged in Task 4 alongside the analysis doc). + +--- + +## Task 4: Write the analysis doc + .gitignore allowlist + +**Goal:** Read the diff outputs from Task 3 Step 8, identify the dominant root cause, write the analysis doc with side-by-side evidence and a proposed fix design. + +**Files:** +- Create: `docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md` +- Modify: `.gitignore` (allowlist the new note + artifacts dir) + +- [ ] **Step 1: Read the 5 diff outputs** + +```bash +for s in <5-scan-ids-here>; do + echo "=== scan $s ===" + cat docs/parity-analysis/notes/score-psm-trace-artifacts/diff-scan-${s}.txt +done +``` + +For each scan, identify: +- Are there RANK_DIFF flags? If yes, how many ions show rank mismatch? +- Are there LOGPROB_DIFF flags? Where do they cluster? +- Are there CONTRIB_DIFF flags driven by rank or by log-prob? +- Are there RUST_ONLY / JAVA_ONLY ions (ion-type-list mismatch)? + +Tally divergence categories across all 5 scans. The category with the most ion-level divergences AND the largest score-delta contribution is the dominant root cause. + +- [ ] **Step 2: Localize to code** + +Once a dominant category is identified: + +- **H1 dominant** (ion-type-list mismatch): inspect Rust's `crates/scoring/src/scoring/rank_scorer.rs::partition_ion_logs` vs Java's `NewRankScorer.getIonProbabilities(Partition)` or equivalent. Capture the file:line on both sides where the ion-type set is constructed. +- **H2 dominant** (rank mismatch): inspect Rust's `crates/scoring/src/scoring/scored_spectrum.rs::nearest_peak_rank` + `setRanksOfPeaks`-equivalent vs Java's `NewScoredSpectrum.setRanksOfPeaks`. Particularly check the precursor-filter handling and rank tie-break behavior. +- **H3 dominant** (log-prob mismatch): inspect Rust's `crates/scoring/src/param_model.rs::partition_for` + the rank index calculation (`r.min(max_rank).max(1) as usize - 1`) vs Java's analogous lookup. + +Document the divergence with code citations. + +- [ ] **Step 3: Write the analysis doc** + +Create `docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md`: + +```markdown +# I5 score_psm trace investigation — findings + +**Date:** 2026-05-26 +**Branch:** feat/i5-score-psm-trace +**Java instrumentation:** java-legacy @ (out-of-repo) +**Dataset:** PXD001819 (UPS1_5000amol_R1.mzML) + +## Five label-flip PSMs traced + +| Scan | Java top-1 peptide | Java RawScore | Rust top-1 peptide | Rust RawScore | Δ | +|---:|---|---:|---|---:|---:| +| | ... | ... | ... | ... | ... | +| | ... | ... | ... | ... | ... | +| | ... | ... | ... | ... | ... | +| | ... | ... | ... | ... | ... | +| | ... | ... | ... | ... | ... | + +Trace artifacts: `score-psm-trace-artifacts/{rust-trace-scan-N.json, java-trace-scan-N.log, diff-scan-N.txt}`. + +## Aggregate divergence counts (5 PSMs combined) + +| Category | Count | % of total divergences | +|---|---:|---:| +| RANK_DIFF | |

% | +| LOGPROB_DIFF | |

% | +| CONTRIB_DIFF | |

% | +| RUST_ONLY | |

% | +| JAVA_ONLY | |

% | + +## Dominant root cause + + + +**Rust:** `crates/:` +**Java:** `:` (in java-legacy clone) + +The divergence arises because . + +## Proposed fix design + +**Code path to change:** +**Direction:** +**Expected PSM impact:** estimated +% on PXD001819 (~+ PSMs at 1% FDR). On Astral and TMT, likely based on . +**Risk class:** per the n=9 audit pattern. +**Bench gate for the fix PR:** PXD001819 auto @1% FDR ≥ + PSMs; no regression on Astral / TMT. + +## Methodology + +1. Identified 5 label-flip PSMs from PR-V1 bench (largest |Java RawScore − Rust top-1 RawScore| where peptide differs). +2. Captured per-ion structured traces: + - Rust: `msgf-trace --trace-json` (commit ) + - Java: java-legacy with `System.err.println` patches in `DBScanScorer.score()` (java-legacy clone commit ) +3. Aligned Rust ↔ Java records by (ion_kind, theo_mz) tolerance 1e-3 Da. +4. Diff harness: `benchmark/ci/diff_score_psm_traces.py` (commit ). + +## Out of scope (next PR) + +- Implementing the fix +- Validating the fix on Astral / TMT (the bench gate is PXD001819 only, but Astral / TMT should be monitored for regressions) +``` + +Replace all `<...>` placeholders with actual values from your investigation. + +- [ ] **Step 4: Update .gitignore allowlist** + +Open `.gitignore`. Find the existing parity-analysis allowlist: + +```gitignore +docs/parity-analysis/* +!docs/parity-analysis/notes/ +!docs/parity-analysis/notes/2026-05-25-precursor-cal-ship-gates.md +!docs/parity-analysis/notes/2026-05-25-spece-tail-exploration.md +``` + +Add: + +```gitignore +!docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md +!docs/parity-analysis/notes/score-psm-trace-artifacts/ +!docs/parity-analysis/notes/score-psm-trace-artifacts/* +``` + +- [ ] **Step 5: Confirm files are tracked** + +```bash +git check-ignore docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md && echo "STILL_IGNORED" || echo "TRACKED" +# Expect: TRACKED + +git check-ignore docs/parity-analysis/notes/score-psm-trace-artifacts/diff-scan-21.txt && echo "STILL_IGNORED" || echo "TRACKED" +# Expect: TRACKED +``` + +(Adjust the example scan-id to one of the 5 actual scans.) + +- [ ] **Step 6: Stage and commit** + +```bash +# Stage allowlist + analysis doc + artifacts +git add .gitignore +git add docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md +git add docs/parity-analysis/notes/score-psm-trace-artifacts/ + +git status --short +# Expect: 4 new entries (gitignore + note + artifacts dir + diff harness already-committed). + +git commit -m "$(cat <<'COMMIT_EOF' +docs(i5): per-PSM trace findings + 5-PSM artifacts (PXD001819) + +Identifies the dominant root cause of the Rust vs Java per-PSM scoring +divergence on PXD001819 label-flip PSMs. Methodology + artifacts + +proposed fix design (no code in this PR; fix lands separately). + +Dominant cause: — Rust's diverges from Java's +. + +Trace artifacts (Rust JSON + Java TRACE log + diff outputs for 5 +PSMs) committed under docs/parity-analysis/notes/score-psm-trace-artifacts/ +for reproducibility. + +Out of scope: fix implementation; next PR after this. +COMMIT_EOF +)" +``` + +Replace the placeholder ` — Rust's diverges from Java's ` in the message with the actual finding before running the commit. + +--- + +## Task 5: Push + open PR + +- [ ] **Step 1: Final workspace check** + +```bash +cargo build --release --workspace 2>&1 | tail -3 +# Expect: Finished + +cargo test --release --workspace -- \ + --skip charge_missing_spectrum_uses_per_charge_scored_spec \ + --skip spectrum_without_charge_tries_charge_range \ + --skip known_peptide_appears_in_top_n \ + --skip read_bsa_canno_text_format \ + --skip read_tryp_pig_bov_revcat_csarr_cnlcp \ + --skip tryp_pig_bov_revcat_full_set_loads \ + --skip match_spectra_output_invariant_across_thread_counts 2>&1 | grep -E "^test result" | grep -vE "0 passed.*0 failed.*0 ignored" | tail -5 +# Expect: all 0 failed. +``` + +- [ ] **Step 2: Confirm commit ladder** + +```bash +git log origin/dev..HEAD --oneline +# Expect: +# docs(i5): per-PSM trace findings ... +# feat(diff-harness): ... +# feat(msgf-trace): per-PSM per-ion JSON output ... +# f943aa7e docs(spec): I5 score_psm trace investigation design +``` + +- [ ] **Step 3: Push** + +```bash +git push -u origin feat/i5-score-psm-trace 2>&1 | tail -3 +``` + +- [ ] **Step 4: Open PR** + +```bash +gh pr create --base dev --head feat/i5-score-psm-trace \ + --title "diag(i5): score_psm trace findings + diff harness (no production code change)" \ + --body "$(cat <<'PR_BODY' +## Summary + +Research-only PR. Identifies the dominant root cause of the Rust vs +Java per-PSM scoring divergence (Rust ~14 vs Java ~38 RawScore on the +same spectrum+peptide). The actual fix is a separate PR after this. + +## Finding + + + +Full analysis with side-by-side evidence on 5 label-flip PSMs from +PXD001819: `docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md`. + +## What this PR contains + +- `crates/msgf-rust/src/bin/msgf-trace.rs` — extended with `--trace-json` + for per-PSM per-ion structured output (no production code change; + diagnostic binary) +- `benchmark/ci/diff_score_psm_traces.py` — Python diff harness +- `docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md` — analysis +- `docs/parity-analysis/notes/score-psm-trace-artifacts/` — Rust + Java + traces + diff outputs for 5 PSMs (reproducibility) + +## What this PR does NOT contain + +- The fix itself (next PR) +- Production code changes (`msgf-trace` is a separate binary) +- Java repo changes (java-legacy instrumentation lives on bench VM) +- Datasets other than PXD001819 + +## Verification + +- [x] `cargo clippy --workspace --all-targets` clean +- [x] Workspace tests green under existing CI skip list +- [x] `precursor_cal_bit_identical` regression gate green (no + production code change → trivially passes) +- [ ] CodeRabbit review pass +- [ ] CI matrix green + +## Next PR + +The proposed fix from the analysis doc, bench-gated on PXD001819 +@1% FDR. +PR_BODY +)" +``` + +Replace the `` placeholder with the actual finding from Task 4. + +- [ ] **Step 5: Confirm PR open** + +```bash +gh pr view --json number,title,state,statusCheckRollup --jq '{number, state, checks: [.statusCheckRollup[]? | {name, status}]}' +``` + +--- + +## Self-review + +I checked the plan against the spec section-by-section: + +**1. Spec coverage:** +- Component 1 (Rust trace extensions) → Task 1 ✓ +- Component 2 (Java instrumentation, out-of-repo) → Task 3 ✓ +- Component 3 (Python diff harness) → Task 2 ✓ +- Component 4 (analysis doc + artifacts) → Task 4 ✓ +- Verification / success criteria (5+ PSMs, function-level localization, fix design) → Task 4 ✓ +- Out-of-scope safety net (no production code change) → Task 1 (msgf-trace is diagnostic) + Task 3 (Java patch out-of-repo) ✓ + +**2. Placeholder scan:** The plan contains `<5-scan-ids-here>` and `` style placeholders intentionally — they are inputs the implementer fills in from the live investigation. Each is documented as such. No "TBD" or "implement later" instructions for things that should be specified upfront. + +**3. Type consistency:** The JSON field names (`ion_type`, `theo_mz`, `rank`, `max_rank`, `log_prob`, `contribution`) are used identically across Task 1 (writer), Task 2 (parser), and Task 4 (analysis). The Java TRACE format (tab-separated `key=value`) is used identically in Task 2's parser and Task 3's emitter. + +**Known soft spots:** +- The exact Java instrumentation patch lines depend on the actual java-legacy source structure at SHA `65120118`. Task 3 Step 3 provides the pattern; the agent fills in line-specific edits. +- The 5 scan IDs depend on either the 2026-05-20 doc (local-only) OR a re-derivation script (Task 3 Step 5). If re-derivation produces a different set, that's acceptable; document the actual scans used. +- If the diff harness reveals that NONE of H1/H2/H3 dominates and the cause is more subtle (e.g., a numeric-precision issue in a different code path), the analysis doc reports that honestly and the next PR has a wider scope. diff --git a/docs/superpowers/specs/2026-05-26-i5-score-psm-trace-design.md b/docs/superpowers/specs/2026-05-26-i5-score-psm-trace-design.md new file mode 100644 index 00000000..9fc5c576 --- /dev/null +++ b/docs/superpowers/specs/2026-05-26-i5-score-psm-trace-design.md @@ -0,0 +1,181 @@ +# Design — I5 score_psm trace investigation (research-only PR) + +**Date:** 2026-05-26 +**Branch:** `feat/i5-score-psm-trace` (from `origin/dev @ 42a6d54f`) +**Status:** Spec for review + +## Problem + +PR-V1 shipped a 10–15% wall reduction (FxHashMap on hot scoring tables). Wall is no longer the bottleneck for the +5%/dataset PSM goal — the bottleneck is now per-PSM scoring divergence between Rust and Java. + +A prior diagnostic session (2026-05-20, captured in project auto-memory) ran `msgf-trace` on 5 label-flip PSMs from PXD001819 and found: + +> "Rust scores the Java-favored target peptide R.NEEQSR.D at 14 (per-split breakdown) vs Java's RawScore 38. 20-24 point gap on the SAME (spectrum, peptide). Rust DOES enumerate the peptide (it's at #5 in Rust's top-10 queue), so candidate enumeration is fine — the divergence is in per-split node scoring inside score_psm. Pattern is universal across 5 label-flip samples (Java RawScore 13-38 vs Rust top-1 7-32, 6-22 point gap)." + +Three hypotheses: +- **H1** — per-partition ion-type list differs (Rust's `partition_ion_logs` enumerates a different IonType set than Java's per-partition table) +- **H2** — peak rank assignment differs (Rust's `setRanksOfPeaks` (after precursor-filter) yields different ranks per peak) +- **H3** — per-rank log-probability tables differ (the `rank_dist_table[partition][ion_type][rank]` lookup returns different values) + +That session ended with "Closing this requires Java instrumentation to dump ranks/ions for diff comparison — 2-3 day investigation." This is that investigation. + +## Goal + +Identify the dominant root cause (one of H1/H2/H3 or a compound) of the per-PSM scoring divergence. Output: written analysis with side-by-side evidence on the same 5 label-flip PSMs + a proposed fix design for the next PR. + +**No production code changes** in this PR. Diagnostic-binary extensions (`msgf-trace`) and a Python diff harness are the only Rust code. + +## Non-goals + +- Implementing the fix (next PR) +- Any change to `crates/*/src/` other than `crates/msgf-rust/src/bin/msgf-trace.rs` +- Datasets other than PXD001819 (per the brainstorm; pattern is reportedly universal) +- Java repo changes committed to msgf-rust (instrumented Java patch lives in a separate java-legacy worktree on the bench VM) +- Rebasing on top of PR-V1 (this branch is off dev; PR-V1's perf changes are orthogonal to scoring correctness) + +## Architecture — 4 components + +### Component 1 — Rust trace extensions + +File: `crates/msgf-rust/src/bin/msgf-trace.rs` (already 729 LOC, used for the 2026-05-20 finding). + +Extend with structured JSON output for per-PSM per-ion diagnostics: + +```json +{ + "scan": 21, + "peptide": "R.NEEQSR.D", + "charge": 2, + "rust_top_rank_score": 14, + "ions": [ + { + "ion_type": "Prefix(c=1, off=0.0)", + "theo_mz": 130.0498, + "observed_peak_mz": 130.0501, + "matched": true, + "rank_assigned": 7, + "max_rank_in_partition": 150, + "log_prob_at_rank": -0.43, + "score_contribution": -0.43 + }, + ... + ], + "partition": { + "charge": 2, + "parent_mass_tier": 1500.0, + "seg_num": 0, + "ion_types_count": 24, + "ion_types": ["Prefix(c=1, off=0)", "Suffix(c=1, off=0)", ...] + } +} +``` + +Output file: `--trace-json `. Existing human-readable stderr trace stays; the JSON is additive. + +Implementation: capture the per-ion data inside the existing per-split-breakdown loop; serialize with `serde_json` (already in the workspace). + +### Component 2 — Java instrumentation (out-of-repo) + +On the bench VM (`pride-linux-vm`): + +1. Verify JDK 17 + Maven installed (`java -version; mvn -version`) +2. Clone java-legacy into a new dir: `git clone /srv/data/msgf-bench/java-legacy-trace && git checkout 65120118` +3. Add `System.err.println` traces in: + - `src/main/java/edu/ucsd/msjava/msdbsearch/DBScanScorer.java::score(...)` — log per-ion score contribution + ion type + rank + - `src/main/java/edu/ucsd/msjava/msutil/NewScoredSpectrum.java::setRanksOfPeaks()` — log final rank assignment per peak + - `src/main/java/edu/ucsd/msjava/msscorer/NewRankScorer.java::errorScore(...)` and the rank-lookup method — log per-rank table value +4. Each `eprintln` outputs a structured line: `TRACE\t\t\t=` +5. `mvn package -DskipTests` → `target/MSGFPlus-trace.jar` +6. Run on the same 5 label-flip scans, redirect stderr to JSON-ish log + +The Java patch + build artifacts live in `/srv/data/msgf-bench/java-legacy-trace/` ONLY. The instrumented JAR is NOT committed to msgf-rust. The analysis doc cites the patch's commit SHA on the java-legacy clone for reproducibility. + +### Component 3 — Python diff harness + +File: `benchmark/ci/diff_score_psm_traces.py` (the `benchmark/ci/` dir is the existing carve-out for committed bench tooling). + +Behavior: +- Inputs: Rust trace JSON (one JSON object per scan) + Java trace log (TRACE lines, parsed into a JSON-equivalent dict) +- For each (scan, peptide) pair, align records by (ion_type_key, theoretical_mz) within a small tolerance +- Output: stdout table per (scan, peptide), columns: `IonType | Theo_mz | Rust rank | Java rank | Rust log-prob | Java log-prob | Rust contrib | Java contrib | DIVERGE?` +- Summary footer: total Rust score, total Java score, divergence count by category (rank mismatch, log-prob mismatch, ion-type-list mismatch) + +Uses only stdlib (`json`, `argparse`, `collections`). No new deps. + +### Component 4 — Analysis doc + +File: `docs/parity-analysis/notes/2026-05-26-score-psm-trace-findings.md` — needs `.gitignore` allowlist entry alongside the existing `2026-05-25-precursor-cal-ship-gates.md`-style allowlist. + +Contents: +1. Methodology (which scans, which Java commit, which Rust HEAD) +2. Five side-by-side example PSMs (diff-harness output per PSM) +3. Aggregated divergence counts by category (H1/H2/H3) +4. Code-level root cause: Rust file:line + Java file:line for the divergent path; one paragraph explaining the divergence +5. **Proposed fix design** (no code; high-level): + - What code path to change + - What direction (e.g., "Rust's setRanksOfPeaks needs to apply the same tie-break rule as Java") + - Expected PSM-count impact, rough order of magnitude + - Risk class per the n=9 audit pattern (additive vs. modifying-existing-distribution) + +### Verification / success criteria + +- 5+ PSMs traced with full side-by-side data +- Function-level localization: "Rust's `X::y` at file:line produces value A where Java's `Z.w` at file:line produces value B; root cause is C" +- Proposed fix design exists with the above structure +- Trace artifacts (Rust JSON + Java log + diff outputs) committed to `docs/parity-analysis/notes/score-psm-trace-artifacts/` (allowlist-relevant), small enough to commit (5 PSMs × ~kB each = tens of kB) + +If after 3 days the investigation has not produced a single function-level localization but HAS produced data: ship the data + a "pending" finding doc and pause for human triage. + +## Out-of-scope safety net + +- **No production code change.** The `msgf-trace` binary is diagnostic — extending its JSON output cannot affect production `msgf-rust` behavior. CI bit-identical regression gate still passes trivially. +- **No Java production change.** Instrumented JAR is local-to-bench-VM; production benches still use the canonical `MSGFPlus.jar`. + +## Risks & mitigations + +| Risk | Mitigation | +|---|---| +| Bench VM lacks JDK 17 / Maven | Check first; install via conda or `dnf install java-17-openjdk-devel maven` | +| `java-legacy @ 65120118` doesn't build cleanly on VM | Bisect to a nearby buildable commit; document the SHA used | +| 5 PSMs produce 5 different "dominant" hypotheses | Doc reports each independently; next PR addresses them in priority order | +| Instrumented JAR's PSM counts diverge from canonical (the trace itself broke things) | Add an integrity check: run instrumented JAR vs canonical on a 100-spectrum subset; PSM counts should match within rayon-noise ±5 | +| Trace data explodes in volume (5 PSMs × dozens of ions × multiple ranks) | Cap output: matched ions only; rank list ≤ partition max_rank; per-PSM JSON ≤ 10 kB | +| Python harness misaligns Rust ↔ Java ions due to mod-name differences | Align by (theoretical_mz, ion_kind) with mz tolerance ≤ 0.001 Da; emit warnings for unmatched on either side | +| Investigation reveals divergence is in MULTIPLE places, no single root cause | OK — doc reports the full picture; fix PR can address them sequentially or pick the highest-impact first | + +## Sequencing (single PR, ~3 commits) + +``` +feat/i5-score-psm-trace (off origin/dev @ 42a6d54f) + ↓ +Commit 1: extend msgf-trace with --trace-json output + per-ion structured fields + ↓ +Commit 2: add benchmark/ci/diff_score_psm_traces.py harness + ↓ +[out-of-repo, bench VM] Java instrumentation; build; run on 5 PSMs + ↓ +Commit 3: trace artifacts + analysis doc; gitignore allowlist entry + ↓ +PR open with the analysis doc as the PR description summary +``` + +## Time estimate + +2-3 working days: +- Day 1 morning: extend `msgf-trace` with JSON output (commit 1) +- Day 1 afternoon: write diff harness (commit 2); verify bench VM Java toolchain +- Day 2 morning: instrument Java on VM, build, run on 5 PSMs +- Day 2 afternoon: run Rust traces; diff; preliminary findings +- Day 3 morning: write analysis doc (commit 3) +- Day 3 afternoon: iterate if needed; spec self-review; push + open PR + +## Open questions + +None — all design points resolved in brainstorming. + +## Related documents + +- Project memory: 2026-05-20 score_psm divergence finding (local-only at `docs/parity-analysis/notes/2026-05-20-score-psm-divergence.md` on a prior worktree, not in repo) +- `docs/parity-analysis/reports/2026-05-13-score-psm-undercount-finding.md` — earlier under-scoring investigation (different bug, since fixed) +- PR-V1 (`feat/quality-perf-id-rate`, in review at PR #36) — speed PR; orthogonal to this scoring-correctness work +- `docs/parity-analysis/notes/2026-05-25-spece-tail-exploration.md` — SpecE-tail context; the per-PSM scoring divergence is upstream of the lnSpecE distribution drift documented there