From 43488fb4f4f8bbe4444d085f594904a0c5f88604 Mon Sep 17 00:00:00 2001
From: Greg Kaleka <greg@gregkaleka.com>
Date: Fri, 17 Apr 2026 10:35:38 -0400
Subject: [PATCH 1/6] Fix header stripping for spatial path on multi-column
 headers

Edge items were collected from the MuPDF path only, so three-column
running headers (left/center/right) produced exact-match patterns only
for MuPDF's split form. The spatial path merges those into a single
line on shared y-coordinate and was left unstripped, polluting the
dual-path comparison and flagging regions as uncertain.

Union edge items from both paths before detecting repeating patterns.
---
 lib/pdf/__init__.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/lib/pdf/__init__.py b/lib/pdf/__init__.py
index 99b7473..13fc923 100644
--- a/lib/pdf/__init__.py
+++ b/lib/pdf/__init__.py
@@ -116,7 +116,15 @@ def convert_pdf(path: Path) -> tuple[str, str | None]:
             mupdf_blocks = extract_mupdf(page, pg_num)
             spatial_blocks = extract_spatial(page, pg_num)
 
-            edge_items = get_edge_items(mupdf_blocks, pg_num, page_height)
+            # Collect edges from both paths: MuPDF splits left/center/right
+            # headers into separate lines on wide horizontal gaps; spatial
+            # merges them into one line. Patterns from both forms are needed
+            # so strip_repeating (which does exact text match) can match
+            # against either path's segmentation.
+            edge_items = (
+                get_edge_items(mupdf_blocks, pg_num, page_height)
+                + get_edge_items(spatial_blocks, pg_num, page_height)
+            )
             all_edge_items.append(edge_items)
 
             links = collect_links(page)

From 59dc6450750e2917a8be13e03d84d17d2fbbd910 Mon Sep 17 00:00:00 2001
From: Greg Kaleka <greg@gregkaleka.com>
Date: Fri, 17 Apr 2026 10:51:03 -0400
Subject: [PATCH 2/6] Strip repeating headers at span granularity

When the spatial path merges left/center/right header columns into a
single line (one span per column), the whole-line text matches no
pattern but each column-span does. Stripping per-span drops the
matched columns and preserves any non-header span that shares the
header y-coordinate (e.g. a one-off appendix title on the bibliography
page).

Also indexes repeating patterns by quantized y so the common
no-pattern-near-this-line case short-circuits without scanning, and
factors out the y-quantize expression into a shared helper.
---
 lib/pdf/cleanup.py | 86 +++++++++++++++++++++++++++++++++-------------
 1 file changed, 62 insertions(+), 24 deletions(-)

diff --git a/lib/pdf/cleanup.py b/lib/pdf/cleanup.py
index dc5fe75..e8d9929 100644
--- a/lib/pdf/cleanup.py
+++ b/lib/pdf/cleanup.py
@@ -17,6 +17,12 @@
 _log = logging.getLogger(__name__)
 
 
+def _y_bucket(bbox: tuple[float, float, float, float]) -> float:
+    """Quantize a bbox's vertical center to the Y_TOLERANCE grid."""
+    y_center = (bbox[1] + bbox[3]) / 2.0
+    return round(y_center / Y_TOLERANCE) * Y_TOLERANCE
+
+
 def get_edge_items(blocks: list[Block], page_num: int,
                     page_height: float) -> list[PageEdgeItem]:
     """Get the top N and bottom N text items from a page by y-coordinate.
@@ -72,8 +78,7 @@ def detect_repeating(all_edge_items: list[list[PageEdgeItem]],
 
     for page_items in all_edge_items:
         for item in page_items:
-            y_key = round(item.y / Y_TOLERANCE) * Y_TOLERANCE
-            y_buckets[y_key].append(item)
+            y_buckets[_y_bucket(item.bbox)].append(item)
 
     repeating = set()
     for y_key, items in y_buckets.items():
@@ -107,10 +112,36 @@ def detect_repeating(all_edge_items: list[list[PageEdgeItem]],
 
 def strip_repeating(blocks: list[Block], repeating: set[tuple[float, str]],
                     ) -> list[Block]:
-    """Remove lines matching repeating header/footer patterns from blocks."""
+    """Remove lines (or individual spans) matching repeating header/footer
+    patterns from blocks.
+
+    Spatial-path lines merge left/center/right header columns (one span
+    per column). The whole-line text then won't match any single pattern,
+    but each column-span will; per-span stripping handles that while
+    preserving non-header content that shares the header y-coordinate
+    (e.g. a one-off appendix title).
+    """
     if not repeating:
         return blocks
 
+    patterns_by_y: dict[float, list[str]] = defaultdict(list)
+    for ry, rp in repeating:
+        patterns_by_y[ry].append(rp)
+
+    def _patterns_near(y_key: float) -> list[str]:
+        return (patterns_by_y.get(y_key - Y_TOLERANCE, [])
+                + patterns_by_y.get(y_key, [])
+                + patterns_by_y.get(y_key + Y_TOLERANCE, []))
+
+    def _matches(text: str, rpattern: str) -> bool:
+        if rpattern == text:
+            return True
+        if rpattern == "__PAGE_NUM__" and PAGE_NUM_RE.match(text):
+            return True
+        if rpattern == "__DOC_NUM__" and DOC_NUM_RE.search(text):
+            return True
+        return False
+
     result = []
     for block in blocks:
         kept_lines = []
@@ -119,29 +150,36 @@ def strip_repeating(blocks: list[Block], repeating: set[tuple[float, str]],
             if not text:
                 kept_lines.append(line)
                 continue
-            y_center = (line.bbox[1] + line.bbox[3]) / 2.0
-            y_key = round(y_center / Y_TOLERANCE) * Y_TOLERANCE
-            stripped = False
-            for ry, rpattern in repeating:
-                if abs(y_key - ry) > Y_TOLERANCE:
-                    continue
-                if rpattern == text:
-                    stripped = True
-                    break
-                if rpattern == "__PAGE_NUM__" and PAGE_NUM_RE.match(text):
-                    stripped = True
-                    break
-                if rpattern == "__DOC_NUM__" and DOC_NUM_RE.search(text):
-                    stripped = True
-                    break
-            if not stripped:
+
+            line_patterns = _patterns_near(_y_bucket(line.bbox))
+            if not line_patterns:
                 kept_lines.append(line)
+                continue
+
+            if any(_matches(text, rp) for rp in line_patterns):
+                continue
+
+            kept_spans = []
+            stripped_any = False
+            for span in line.spans:
+                span_text = span.text.strip()
+                if not span_text:
+                    kept_spans.append(span)
+                    continue
+                span_patterns = _patterns_near(_y_bucket(span.bbox))
+                if any(_matches(span_text, rp) for rp in span_patterns):
+                    stripped_any = True
+                    continue
+                kept_spans.append(span)
+
+            if not any(sp.text.strip() for sp in kept_spans):
+                continue
+
+            kept_lines.append(
+                replace(line, spans=kept_spans) if stripped_any else line)
+
         if kept_lines:
-            result.append(Block(
-                lines=kept_lines,
-                bbox=block.bbox,
-                page_num=block.page_num,
-            ))
+            result.append(replace(block, lines=kept_lines))
     return result
 
 

From c77fc451b29bfe5e5e3aa32bbd189b053e85e996 Mon Sep 17 00:00:00 2001
From: Greg Kaleka <greg@gregkaleka.com>
Date: Fri, 17 Apr 2026 12:25:17 -0400
Subject: [PATCH 3/6] Fix body-size detection on code-heavy papers

Code-dense papers (especially wording papers) biased the detected body
size toward the smaller monospace font, which then pushed body prose
into the font-rank heading range. The nesting validator ratcheted
consecutive "headings" upward one level at a time, producing
depth-6/7/8 cascades where bullet lines should have been list items.

Three coordinated changes:

- _CAMEL_SPLIT_RE now splits on letter/digit boundaries, so family
  names with embedded sizes (LMMono9, LMMono10, LMMono8) are recognized
  by the name-only monospace check. Previously only LMMonoLt10 split
  cleanly because its modifier separated the digits.

- propagate_monospace now requires a majority of a font's spatial
  characters to be classified monospace before propagating. Short
  spans of digits or thin chars could false-positive the per-glyph
  metric, contaminating proportional fonts (e.g. Lato-Light) when any
  single short span passed the signal.

- _detect_body_size excludes monospace spans by default. For wording
  papers with almost no prose at all, falls back to the overall mode
  so body size isn't pinned to a tiny outlier (e.g. superscript page
  numbers at 7pt).
---
 lib/pdf/mono.py      | 44 +++++++++++++++++++++++++++++++++-----------
 lib/pdf/structure.py | 35 +++++++++++++++++++++++++++++------
 2 files changed, 62 insertions(+), 17 deletions(-)

diff --git a/lib/pdf/mono.py b/lib/pdf/mono.py
index cd3b8b1..c330902 100644
--- a/lib/pdf/mono.py
+++ b/lib/pdf/mono.py
@@ -33,7 +33,12 @@
 
 _MONO_KEYWORDS = frozenset({"mono", "courier", "code", "consolas", "menlo"})
 
-_CAMEL_SPLIT_RE = re.compile(r"(?<=[a-z])(?=[A-Z])|(?<=[A-Z])(?=[A-Z][a-z])")
+_CAMEL_SPLIT_RE = re.compile(
+    r"(?<=[a-z])(?=[A-Z])"
+    r"|(?<=[A-Z])(?=[A-Z][a-z])"
+    r"|(?<=[A-Za-z])(?=\d)"
+    r"|(?<=\d)(?=[A-Za-z])"
+)
 
 _GLYPH_CV_THRESHOLD = 0.15
 _FAT_THIN_REJECT_RATIO = 1.3
@@ -170,24 +175,41 @@ def classify_monospace(
     return False
 
 
+_PROPAGATE_MONO_MAJORITY = 0.5
+
+
 def propagate_monospace(mupdf_blocks: list[Block], spatial_blocks: list[Block],
                         dominant_font: str) -> None:
     """Apply spatial path's glyph-width monospace decisions to MuPDF spans.
 
-    Collects fonts classified as monospace by the spatial path (which has
-    per-character data), filters out the dominant body font (the single
-    most common font by character count), and sets monospace=True on
-    matching MuPDF spans. The dominant_font must be pre-lowercased.
+    Only propagates fonts whose spatial spans are mostly classified
+    monospace (by character count). Short spans of digits or thin
+    characters can false-positive the per-glyph signal, so requiring a
+    majority keeps proportional fonts (e.g. a regular text font with a
+    handful of "1" or "3.1" spans) out of the mono set.
+
+    The dominant_font is still discarded unless it passes a
+    name-based mono check, to avoid body text leaking into code blocks
+    when metrics happen to agree across a majority.
     """
-    mono_fonts: set[str] = set()
+    from collections import Counter
+    mono_chars: Counter[str] = Counter()
+    total_chars: Counter[str] = Counter()
     for b in spatial_blocks:
         for ln in b.lines:
             for s in ln.spans:
-                if s.monospace and s.text.strip():
-                    mono_fonts.add(s.font_name.lower())
-    # Only discard the dominant font if it is not itself a known monospace
-    # family. On code-heavy papers the code font may dominate by character
-    # count, and discarding it would suppress all code block detection.
+                if not s.text.strip():
+                    continue
+                key = s.font_name.lower()
+                total_chars[key] += len(s.text)
+                if s.monospace:
+                    mono_chars[key] += len(s.text)
+
+    mono_fonts = {
+        f for f, total in total_chars.items()
+        if total > 0 and mono_chars[f] / total >= _PROPAGATE_MONO_MAJORITY
+    }
+
     if dominant_font and not classify_monospace(dominant_font):
         mono_fonts.discard(dominant_font)
     if not mono_fonts:
diff --git a/lib/pdf/structure.py b/lib/pdf/structure.py
index c1d4b41..7e48688 100644
--- a/lib/pdf/structure.py
+++ b/lib/pdf/structure.py
@@ -179,17 +179,40 @@ def compare_extractions(mupdf_blocks: list[Block],
     return sections
 
 
+_BODY_PROSE_MIN_CHARS = 500
+_BODY_PROSE_MIN_FRACTION = 0.10
+
+
 def _detect_body_size(sections: list[Section]) -> float:
-    """Find the most common font size across all sections (= body text)."""
-    sizes: Counter[float] = Counter()
+    """Find the most common font size that represents body text.
+
+    Prefers prose (non-monospace) spans so that code-heavy papers don't
+    bias the body size toward the smaller code font. Falls back to the
+    overall most common size when prose is insufficient (e.g. wording
+    papers that are nearly entirely monospace specification text), since
+    in those papers the spec font *is* the body font.
+    """
+    prose_sizes: Counter[float] = Counter()
+    all_sizes: Counter[float] = Counter()
     for sec in sections:
         for line in sec.lines:
             for span in line.spans:
-                if span.text.strip():
-                    sizes[span.font_size] += len(span.text)
-    if not sizes:
+                if not span.text.strip():
+                    continue
+                all_sizes[span.font_size] += len(span.text)
+                if not span.monospace:
+                    prose_sizes[span.font_size] += len(span.text)
+
+    prose_total = sum(prose_sizes.values())
+    all_total = sum(all_sizes.values())
+    prose_sufficient = (
+        prose_total >= _BODY_PROSE_MIN_CHARS
+        and (all_total == 0 or prose_total >= _BODY_PROSE_MIN_FRACTION * all_total)
+    )
+    source = prose_sizes if prose_sufficient else all_sizes
+    if not source:
         return FALLBACK_BODY_SIZE
-    return sizes.most_common(1)[0][0]
+    return source.most_common(1)[0][0]
 
 
 def _rank_font_sizes(sections: list[Section],

From 8fda9bd6eeacb0f2addfce27e09399e299fe88d7 Mon Sep 17 00:00:00 2001
From: Greg Kaleka <greg@gregkaleka.com>
Date: Fri, 17 Apr 2026 12:37:08 -0400
Subject: [PATCH 4/6] Reject heading classification for prose-length first
 lines

Numbered paragraph clauses in wording sections ("1 A fiber is a single
flow of control...") and numeric arithmetic results that land on a
continuation line ("4 on IEEE-754 double implementations...") both
match SECTION_NUM_RE and got classified as H2 headings. The nesting
validator then let them cascade downward.

Reject the heading classification when the first physical line exceeds
_HEADING_MAX_WORDS (12) AND the confidence is LOW (number alone, no
font-size or bold confirmation). Real section titles with number +
larger-font signal at MEDIUM/HIGH confidence are preserved even when
long (e.g. "9.2 Rationale: Why Lane Count (L) is the Sole Coordinate").

Across the batch: 201 -> 76 long misclassified headings in the four
most affected papers.
---
 lib/pdf/structure.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/pdf/structure.py b/lib/pdf/structure.py
index 7e48688..a1dd200 100644
--- a/lib/pdf/structure.py
+++ b/lib/pdf/structure.py
@@ -18,6 +18,13 @@
 _HEADING_SIZE_RATIO = 1.05
 _TITLE_SIZE_RATIO = 1.2
 
+# Max words in a heading's first physical line. Beyond this the line is
+# prose, not a title. Numbered spec clauses ("1 A fiber is a single flow
+# of control...") and numeric arithmetic results that happen to start a
+# continuation line both trigger SECTION_NUM_RE; this length cap rejects
+# them downstream. Real WG21 section titles are 1-8 words.
+_HEADING_MAX_WORDS = 12
+
 _HTML_TAG_RE = re.compile(r"<[^>]+>")
 _EMAIL_INLINE_RE = re.compile(r"\S+@\S+\.\S+")
 _MULTI_SPACE_RE = re.compile(r"\s{2,}")
@@ -408,7 +415,9 @@ def structure_sections(sections: list[Section],
             level, conf = heading_confidence(
                 has_number, number_level, font_level, is_bold, is_known)
 
-            if level > 0:
+            is_prose_length = len(first_line.split()) > _HEADING_MAX_WORDS
+            prose_on_weak_signal = is_prose_length and conf == Confidence.LOW
+            if level > 0 and not prose_on_weak_signal:
                 sec.kind = SectionKind.HEADING
                 sec.heading_level = level
                 sec.confidence = conf

From e61a78711f9e3d03475a6f7b5d743f153df593cb Mon Sep 17 00:00:00 2001
From: Greg Kaleka <greg@gregkaleka.com>
Date: Fri, 17 Apr 2026 13:07:19 -0400
Subject: [PATCH 5/6] Treat same-styled consecutive headings as siblings in
 nesting validation

When multiple consecutive headings share an initial styling (e.g., a
run of "Changes since P0876R21", "Changes since P0876R20", ... entries),
the existing clamp rule would assign each prev_clamped + 1, cascading
to ever-deeper levels: level 3, 4, 5, 6, 7, 8, 8... even though they
are clearly siblings at the same logical level.

Track the previous heading's font size in _validate_nesting. When the
current heading's font size matches closely, it's a sibling and gets
the previous clamped level, not prev_clamped + 1.

The tolerance is 0.1pt, which is wider than extraction noise for the
same visual style but narrower than the real gap between heading
tiers in typical designs.
---
 lib/pdf/structure.py | 71 +++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 1 deletion(-)

diff --git a/lib/pdf/structure.py b/lib/pdf/structure.py
index a1dd200..03792b7 100644
--- a/lib/pdf/structure.py
+++ b/lib/pdf/structure.py
@@ -442,6 +442,7 @@ def structure_sections(sections: list[Section],
     structured = _detect_code_blocks(structured)
     structured = [s for s in structured if _detect_lang_label(s) is None]
     structured = _classify_wording_sections(structured)
+    _demote_repeated_low_confidence_numbers(structured)
     _validate_nesting(structured)
     return metadata, structured
 
@@ -841,17 +842,84 @@ def _classify_wording_sections(sections: list[Section]) -> list[Section]:
     return sections
 
 
+_PARAGRAPH_NUM_MIN_REPEATS = 3
+
+
+def _demote_repeated_low_confidence_numbers(sections: list[Section]) -> None:
+    """Demote LOW-confidence numbered headings when their section_num
+    repeats often enough to indicate paragraph numbering.
+
+    Real section numbers are unique, but many papers duplicate each one
+    between a TOC and the body (count 2), and a real section that ends up
+    at LOW confidence can also collide with paragraph numbering elsewhere.
+    Paragraph numbering resets per clause ("1 Constraints:", "2 Mandates:",
+    then later "1 Preconditions:", ...) so the same number typically shows
+    up 5 or more times. Requiring count >= 3 keeps TOC/body pairs and
+    section/paragraph 1-on-1 collisions intact while catching the
+    paragraph-numbering pattern.
+
+    Only LOW-confidence headings are considered, so a real section title
+    with font-size or bold confirmation is preserved regardless.
+    """
+    counts: Counter[str] = Counter()
+    nums_by_index: dict[int, str] = {}
+    for i, sec in enumerate(sections):
+        if (sec.kind != SectionKind.HEADING
+                or sec.confidence != Confidence.LOW):
+            continue
+        first_line = sec.text.split("\n")[0].strip()
+        m = SECTION_NUM_RE.match(first_line)
+        if not m:
+            continue
+        num = m.group(1)
+        nums_by_index[i] = num
+        counts[num] += 1
+
+    repeated = {num for num, c in counts.items()
+                if c >= _PARAGRAPH_NUM_MIN_REPEATS}
+    if not repeated:
+        return
+
+    for i, num in nums_by_index.items():
+        if num in repeated:
+            sec = sections[i]
+            _log.info("Demote repeated low-conf number %r: %r",
+                       num, sec.text[:40])
+            sec.kind = SectionKind.PARAGRAPH
+            sec.heading_level = 0
+
+
+_SIBLING_FONT_TOL = 0.1
+
+
 def _validate_nesting(sections: list[Section]) -> None:
     """Ensure heading levels don't skip more than one level deeper.
 
     Mutates headings that skip levels: adjusts heading_level and
     downgrades confidence from HIGH to MEDIUM when corrected.
+
+    Also consolidates runs of same-styled headings as siblings. When
+    consecutive headings share a font size, they're at the same logical
+    level; without this check, a run of similar entries (e.g. a dozen
+    "Changes since P0876RN" items) would each get prev_clamped + 1,
+    cascading to ever-deeper levels.
     """
     prev_level = 0
+    prev_font_size: float | None = None
     for sec in sections:
         if sec.kind != SectionKind.HEADING:
             continue
-        if prev_level > 0 and sec.heading_level > prev_level + 1:
+        is_sibling = (
+            prev_font_size is not None
+            and abs(sec.font_size - prev_font_size) <= _SIBLING_FONT_TOL
+        )
+        if is_sibling and prev_level > 0 and sec.heading_level > prev_level:
+            _log.info("Nesting sibling: h%d -> h%d for %r",
+                       sec.heading_level, prev_level, sec.text[:40])
+            sec.heading_level = prev_level
+            if sec.confidence == Confidence.HIGH:
+                sec.confidence = Confidence.MEDIUM
+        elif prev_level > 0 and sec.heading_level > prev_level + 1:
             corrected = prev_level + 1
             _log.info("Nesting fix: h%d -> h%d for %r",
                        sec.heading_level, corrected,
@@ -860,3 +928,4 @@ def _validate_nesting(sections: list[Section]) -> None:
             if sec.confidence == Confidence.HIGH:
                 sec.confidence = Confidence.MEDIUM
         prev_level = sec.heading_level
+        prev_font_size = sec.font_size

From 1e1383d731a0e6252ecd56b018a29ba3c7d145af Mon Sep 17 00:00:00 2001
From: Greg Kaleka <greg@gregkaleka.com>
Date: Fri, 17 Apr 2026 17:09:11 -0400
Subject: [PATCH 6/6] Add regression tests for PR #9 structure and cleanup
 fixes

Covers body-size prose preference, heading prose-length rejection,
repeated-number demotion, sibling nesting clamp, monospace majority
propagation, and per-span header stripping. Also documents review
findings (sibling false-flatten risk, tight font tolerance, leftover
LOW confidence on demoted paragraphs) in issues/pr9-review.md.

Fixes stale y-bucket assertions in three existing detect_repeating
tests that were broken by the bbox-center refactor in 59dc645.
---
 issues/pr9-review.md        | 111 ++++++++++++++++
 tests/test_header_footer.py | 114 ++++++++++++++++-
 tests/test_mono.py          |  71 ++++++++++
 tests/test_structure.py     | 249 +++++++++++++++++++++++++++++++++++-
 4 files changed, 538 insertions(+), 7 deletions(-)
 create mode 100644 issues/pr9-review.md

diff --git a/issues/pr9-review.md b/issues/pr9-review.md
new file mode 100644
index 0000000..ee1f453
--- /dev/null
+++ b/issues/pr9-review.md
@@ -0,0 +1,111 @@
+# PR #9 Review Findings
+
+Code-quality notes from the review of PR #9 (`fix/pdf-structure-classification`). None were blockers; tracked here so they surface when the surrounding code is touched next.
+
+Each entry names the concern, a concrete example, root cause, and a fix direction (matching the format of `known-issues.md`).
+
+---
+
+## Sibling consolidation may demote legitimate subsection nesting
+
+**Symptom:** `_validate_nesting` now treats consecutive headings that share a font size as siblings and clamps the second to `prev_level`. On papers that express heading depth through section numbering rather than typography (a common pattern for Markdown-origin PDFs), this flattens real nested subsections.
+
+**Example sequence:**
+
+```
+## 2 Motivation        (font_size = 14, level = 2)
+### 2.1 Background     (font_size = 12, level = 3)
+#### 2.1.1 History     (font_size = 12, level = 4)
+```
+
+With `_SIBLING_FONT_TOL = 0.1`, `History` is compared to `Background`: same font size, so it is reclassified as a sibling (`level = 3`). The numbering depth (3 dotted parts → level 4) is ignored. Output becomes:
+
+```
+## 2 Motivation
+### 2.1 Background
+### 2.1.1 History        <-- should be ####
+```
+
+**Papers likely affected:** any WG21 paper where subsection headings at multiple depths share one font size. Not yet observed in the 52-paper batch as a regression, but the probability rises for papers that pass LOW-confidence heading classification (where section number is the only evidence).
+
+**Root cause:** `lib/pdf/structure.py:_validate_nesting` (lines 912-921). The sibling check fires whenever `abs(sec.font_size - prev_font_size) <= _SIBLING_FONT_TOL` AND `sec.heading_level > prev_level`. It has no awareness of whether the section number supports the deeper level.
+
+**Fix direction:** Let the section-number signal veto sibling clamping. When the incoming heading has a `has_number` match AND its `number_level == sec.heading_level == prev_level + 1`, allow the one-level descent through unchanged. Only clamp when the level would skip (`> prev_level + 1`) or when no numbering is present. In CLAUDE.md terms: dotted-decimal section numbering is higher-reliability than font-size agreement; font agreement should not override numbering agreement.
+
+---
+
+## `_SIBLING_FONT_TOL = 0.1` is very tight for real-world font sizes
+
+**Symptom:** PDFs sometimes specify fractionally different font sizes for the same visual heading tier (e.g. `11.95` and `12.0`, or `14.04` and `14.0`). A tolerance of `0.1` will treat these as different tiers and skip sibling consolidation, reintroducing the cascade this fix was meant to prevent.
+
+**Example:**
+
+```
+sec_a.font_size = 11.95
+sec_b.font_size = 12.00
+abs(12.00 - 11.95) = 0.05   # within tol, sibling
+```
+
+```
+sec_a.font_size = 11.70
+sec_b.font_size = 12.00
+abs(12.00 - 11.70) = 0.30   # exceeds tol, NOT sibling
+```
+
+Fractions in the 0.1–0.5 range are most common in LaTeX-rendered PDFs and in scanned-then-OCR'd documents. Papers in the 52-doc batch did not happen to hit this, but the test coverage does not guard against it either.
+
+**Root cause:** `_SIBLING_FONT_TOL = 0.1` in `lib/pdf/structure.py` is an absolute point-size tolerance with no relation to the heading size itself. At 12pt body text the tolerance is ~0.8% of the size; at 8pt captions it is ~1.3%. The constant was chosen without a documented rationale.
+
+**Fix direction:** Either (a) widen the absolute tolerance to ~0.5 (covers typical fractional PDF size variance while still discriminating between real 10pt / 11pt / 12pt tiers), or (b) express it as a ratio: `abs(a - b) <= max(0.5, 0.02 * max(a, b))`. The ratio form is more defensible across font-size ranges.
+
+---
+
+## Local import of `Counter` inside `propagate_monospace`
+
+**Symptom:** `lib/pdf/mono.py:195` adds `from collections import Counter` inside the body of `propagate_monospace`. Other modules in the project (e.g. `types.py`, `structure.py`, `cleanup.py`) import `Counter` at module scope. The inline import breaks the project's import-location consistency.
+
+**Impact:** Cosmetic only. No measurable performance difference for a one-shot call per document, and no correctness risk.
+
+**Root cause:** The import was likely added at the point of first use while iterating on the fix and never hoisted.
+
+**Fix direction:** Move the import to the top of `mono.py` alongside `math` and `re`. Zero runtime impact, consistent with the rest of the codebase. Trivial to fold into the next mono.py edit.
+
+---
+
+## Commit 5 bundles two unrelated changes
+
+**Symptom:** Commit `e61a787` ("Treat same-styled consecutive headings as siblings in nesting validation") contains two logically independent changes:
+
+1. **Repetition demotion:** `_demote_repeated_low_confidence_numbers`, a new phase that demotes LOW-confidence headings whose `section_num` repeats ≥3 times.
+2. **Sibling consolidation:** a new branch inside `_validate_nesting` that clamps runs of same-font-size headings to `prev_level`.
+
+Either change is independently useful and could be reverted without the other. Bundling them reduces bisect-ability if a future regression traces to one of the two rules.
+
+**Acknowledgement:** The PR description already flags this ("Includes two changes (should have been two commits)").
+
+**Fix direction:** Split before merge via `git rebase -i e61a787` and `git reset HEAD~` + two `git commit -p` rounds. Optional — the description captures the distinction in prose, and the functions themselves are separately named and located.
+
+---
+
+## `_demote_repeated_low_confidence_numbers` leaves `confidence` at LOW on demoted paragraphs
+
+**Symptom:** When a section is demoted from HEADING → PARAGRAPH, only `kind` and `heading_level` are updated. `confidence` remains at `Confidence.LOW`, the value it carried as a heading. Most PARAGRAPH sections elsewhere in the pipeline carry `Confidence.HIGH` (constructed by `_make_paragraph_section`).
+
+**Example:**
+
+```python
+sec.kind        == SectionKind.HEADING
+sec.confidence  == Confidence.LOW
+sec.heading_level == 2
+
+# after _demote_repeated_low_confidence_numbers:
+sec.kind        == SectionKind.PARAGRAPH
+sec.confidence  == Confidence.LOW          # stays LOW
+sec.heading_level == 0
+```
+
+**Impact:** None currently observable. A grep for `sec.confidence ==` across `lib/` shows only heading-path consumers inspect confidence (the `HIGH → MEDIUM` downgrade in `_validate_nesting`), and `compare_extractions` has precedent for assigning `Confidence.LOW` to paragraphs (line 182). The emitter ignores `confidence` for PARAGRAPH sections.
+
+**Root cause:** `lib/pdf/structure.py:_demote_repeated_low_confidence_numbers` only resets the two fields it knows the downstream classifier cares about. Not a bug today, but a future consumer that trusts `confidence` on PARAGRAPH sections would be surprised.
+
+**Fix direction:** Either (a) reset `sec.confidence = Confidence.HIGH` on demotion to match the provenance of other paragraphs, (b) leave it as LOW and add a short comment stating that confidence is not meaningful for non-HEADING sections, or (c) drop the field entirely from `Section` for non-HEADING kinds. Option (a) is the least risky and aligns with the "most paragraphs are HIGH" default.
diff --git a/tests/test_header_footer.py b/tests/test_header_footer.py
index 8e879ac..737f7d1 100644
--- a/tests/test_header_footer.py
+++ b/tests/test_header_footer.py
@@ -90,8 +90,8 @@ def test_detect_repeating_exact_text():
         [PageEdgeItem(text="Unique Title", y=30.0, page_num=5, bbox=(0, 30, 100, 42))],
     ]
     result = detect_repeating(all_edges, total_pages=5)
-    # y_bucket = round(30 / Y_TOLERANCE) * Y_TOLERANCE = 30.0 (Y_TOLERANCE is 2.0)
-    assert (30.0, "Running Head") in result
+    # Bucket is derived from bbox center (30+42)/2 = 36, quantized to Y_TOLERANCE=2.
+    assert (36.0, "Running Head") in result
 
 
 def test_detect_repeating_skips_below_threshold():
@@ -120,8 +120,8 @@ def test_detect_repeating_page_number_pattern():
         for pg in range(1, 6)
     ]
     result = detect_repeating(all_edges, total_pages=5)
-    # y_bucket = round(580 / 2) * 2 = 580.0
-    assert (580.0, "__PAGE_NUM__") in result
+    # Bucket is derived from bbox center (580+592)/2 = 586, quantized to Y_TOLERANCE=2.
+    assert (586.0, "__PAGE_NUM__") in result
 
 
 def test_detect_repeating_doc_number_pattern():
@@ -135,7 +135,8 @@ def test_detect_repeating_doc_number_pattern():
         for i in range(4)
     ]
     result = detect_repeating(all_edges, total_pages=4)
-    assert (30.0, "__DOC_NUM__") in result
+    # Bucket is derived from bbox center (30+42)/2 = 36, quantized to Y_TOLERANCE=2.
+    assert (36.0, "__DOC_NUM__") in result
 
 
 # ---- strip_repeating -----------------------------------------------------
@@ -181,4 +182,105 @@ def test_strip_repeating_drops_empty_blocks():
 def test_strip_repeating_empty_input():
     """Empty repeating set returns blocks unchanged."""
     b = _make_block_at_y([("content", 300)])
-    assert strip_repeating([b], set()) == [b]
\ No newline at end of file
+    assert strip_repeating([b], set()) == [b]
+
+
+# ---------------------------------------------------------------------------
+# Regression coverage for PR #9 per-span stripping.
+# ---------------------------------------------------------------------------
+
+
+def _multi_span_line(texts, y, *, page_num=0):
+    """Build a Line with multiple spans at the same y (different x-ranges).
+
+    Models the spatial path merging left/center/right header columns
+    into one line with one span per column.
+    """
+    x = 50.0
+    spans = []
+    for t in texts:
+        w = 8.0 * max(len(t), 1)
+        spans.append(Span(text=t, font_name="Body", font_size=11.0,
+                           bbox=(x, y, x + w, y + 12.0)))
+        x += w + 40.0  # large inter-column gap
+    return Line(
+        spans=spans,
+        bbox=(50.0, y, x, y + 12.0),
+        page_num=page_num,
+    )
+
+
+def test_strip_repeating_drops_matched_spans_keeps_the_rest():
+    """Spatial-path merged header: strip matched column-spans, keep unique ones."""
+    # "Doc P1234R0"  (doc-num pattern)  |  "Appendix: Review"  (unique)  |  "3"  (page num)
+    line = _multi_span_line(["P1234R0", "Appendix: Review", "3"], 30.0)
+    block = Block(lines=[line], bbox=line.bbox, page_num=0)
+
+    repeating = {(36.0, "__DOC_NUM__"), (36.0, "__PAGE_NUM__")}
+    result = strip_repeating([block], repeating)
+
+    assert len(result) == 1, "block with surviving span must be kept"
+    kept_line = result[0].lines[0]
+    kept_texts = [s.text.strip() for s in kept_line.spans]
+    assert "P1234R0" not in kept_texts
+    assert "3" not in kept_texts
+    assert "Appendix: Review" in kept_texts
+
+
+def test_strip_repeating_drops_all_spans_drops_line():
+    """When every column-span matches a repeating pattern, the line is removed."""
+    line = _multi_span_line(["P1234R0", "5"], 30.0)
+    block = Block(lines=[line], bbox=line.bbox, page_num=0)
+
+    repeating = {(36.0, "__DOC_NUM__"), (36.0, "__PAGE_NUM__")}
+    result = strip_repeating([block], repeating)
+
+    # All spans matched; line is stripped; block becomes empty and is dropped.
+    assert result == [], "block whose only line is fully stripped must be dropped"
+
+
+def test_strip_repeating_whole_line_match_still_works():
+    """A line whose full text matches the repeating exact pattern is stripped.
+
+    Regression: the per-span path must not break the original whole-line
+    strip that handles single-span lines on the MuPDF path.
+    """
+    # Single span whose text exactly matches the repeating entry.
+    line = Line(
+        spans=[Span(text="Running Head", font_name="Body",
+                     font_size=11.0, bbox=(50.0, 30.0, 200.0, 42.0))],
+        bbox=(50.0, 30.0, 200.0, 42.0),
+    )
+    block = Block(lines=[line], bbox=line.bbox)
+
+    repeating = {(36.0, "Running Head")}
+    result = strip_repeating([block], repeating)
+    assert result == [], "whole-line exact match must still strip the block"
+
+
+def test_strip_repeating_span_outside_bucket_preserved():
+    """A span whose y-bucket is far from any repeating pattern is preserved
+    even if another span on the same line is stripped."""
+    # Build a line with two spans at different y-buckets (unusual, but
+    # defensible for floating column-spans with differing baselines).
+    s_top = Span(text="P1234R0", font_name="Body", font_size=11.0,
+                  bbox=(50.0, 30.0, 110.0, 42.0))   # bucket = 36
+    s_body = Span(text="real content", font_name="Body", font_size=11.0,
+                   bbox=(300.0, 300.0, 420.0, 312.0))  # bucket = 306
+    line = Line(
+        spans=[s_top, s_body],
+        bbox=(50.0, 30.0, 420.0, 312.0),
+    )
+    block = Block(lines=[line], bbox=line.bbox)
+
+    repeating = {(36.0, "__DOC_NUM__")}
+    result = strip_repeating([block], repeating)
+    # The line's own y_bucket (from its full bbox center) will land far from
+    # any repeating pattern; implementation short-circuits via _patterns_near
+    # returning empty and keeps the whole line untouched.
+    assert len(result) == 1
+    kept = result[0].lines[0]
+    kept_texts = [s.text for s in kept.spans]
+    assert "real content" in kept_texts
+    # Per-span strip does not fire because line_patterns is empty.
+    assert "P1234R0" in kept_texts
\ No newline at end of file
diff --git a/tests/test_mono.py b/tests/test_mono.py
index 438eaa9..7246a02 100644
--- a/tests/test_mono.py
+++ b/tests/test_mono.py
@@ -2,6 +2,7 @@
 
 from conftest import make_span, make_line, make_block
 from lib.pdf.mono import classify_monospace, propagate_monospace
+from lib.pdf.types import Span, Line, Block
 
 
 def test_keyword_courier():
@@ -105,3 +106,73 @@ def test_propagate_keeps_dominant_monospace():
 
     propagate_monospace([m_block], [s_block], "courier")
     assert m_block.lines[0].spans[0].monospace is True
+
+
+# ---------------------------------------------------------------------------
+# Regression coverage for PR #9 propagate_monospace majority filter.
+# ---------------------------------------------------------------------------
+
+
+def test_propagate_minority_mono_not_adopted():
+    """A prose font with <50% monospace-classified chars is NOT propagated.
+
+    Short spans like "3.1" or "42" can false-positive the per-glyph signal,
+    but they represent a small minority of the font's characters. Requiring
+    a majority keeps proportional text fonts out of the mono set.
+    """
+    # Lato-Light appears in ~100 chars total, only 3 of which are mono.
+    prose_lines = [
+        Line(spans=[Span(text="A" * 97, font_name="Lato-Light",
+                          monospace=False)]),
+        Line(spans=[Span(text="3.1", font_name="Lato-Light",
+                          monospace=True)]),  # false-positive
+    ]
+    spatial = [Block(lines=prose_lines)]
+
+    mupdf_line = Line(spans=[Span(text="more prose", font_name="Lato-Light",
+                                    monospace=False)])
+    mupdf = [Block(lines=[mupdf_line])]
+
+    propagate_monospace(mupdf, spatial, "some-dominant-font")
+    assert mupdf[0].lines[0].spans[0].monospace is False, (
+        "minority mono classification must not contaminate a prose font"
+    )
+
+
+def test_propagate_majority_mono_is_adopted():
+    """A font with >=50% monospace-classified chars IS propagated."""
+    code_lines = [
+        Line(spans=[Span(text="int x = 0;", font_name="MyCodeFont",
+                          monospace=True)]),
+        Line(spans=[Span(text="y", font_name="MyCodeFont",
+                          monospace=False)]),  # minority non-mono
+    ]
+    spatial = [Block(lines=code_lines)]
+
+    mupdf_line = Line(spans=[Span(text="return x;", font_name="MyCodeFont",
+                                    monospace=False)])
+    mupdf = [Block(lines=[mupdf_line])]
+
+    propagate_monospace(mupdf, spatial, "arial")
+    assert mupdf[0].lines[0].spans[0].monospace is True, (
+        "majority-monospace font should propagate to MuPDF spans"
+    )
+
+
+def test_propagate_exactly_at_threshold_is_adopted():
+    """50%% exactly meets the majority threshold (inclusive)."""
+    # 5 mono chars, 5 non-mono chars -> ratio 0.5 (meets threshold).
+    lines = [
+        Line(spans=[Span(text="aaaaa", font_name="EdgeFont",
+                          monospace=True)]),
+        Line(spans=[Span(text="bbbbb", font_name="EdgeFont",
+                          monospace=False)]),
+    ]
+    spatial = [Block(lines=lines)]
+
+    mupdf_line = Line(spans=[Span(text="target", font_name="EdgeFont",
+                                    monospace=False)])
+    mupdf = [Block(lines=[mupdf_line])]
+
+    propagate_monospace(mupdf, spatial, "arial")
+    assert mupdf[0].lines[0].spans[0].monospace is True
diff --git a/tests/test_structure.py b/tests/test_structure.py
index 7f0b794..935a5dc 100644
--- a/tests/test_structure.py
+++ b/tests/test_structure.py
@@ -1,12 +1,14 @@
 """Tests for lib.pdf.structure."""
 
-from conftest import make_block, make_section
+from conftest import make_block, make_line, make_section, make_span
 from lib.pdf.types import (
     Block, Line, Span, Section, SectionKind, Confidence,
 )
 from lib.pdf.structure import (
     compare_extractions, structure_sections,
     heading_confidence, _extract_metadata,
+    _detect_body_size, _validate_nesting,
+    _demote_repeated_low_confidence_numbers,
 )
 
 
@@ -295,3 +297,248 @@ def test_line_count_voting(self):
         # Lines: two at 14, one at 11 -> 14 wins by line count.
         # Character count would favor 11.
         assert block.font_size == 14.0
+
+
+# ---------------------------------------------------------------------------
+# Regression coverage for PR #9 fixes.
+# ---------------------------------------------------------------------------
+
+
+def _mk_section(text, *, font_size=10.0, monospace=False, bold=False,
+                kind=SectionKind.PARAGRAPH,
+                confidence=Confidence.HIGH, heading_level=0):
+    """Build a Section whose single line carries explicit span attributes."""
+    span = make_span(text, font_size=font_size,
+                      monospace=monospace, bold=bold)
+    line = Line(spans=[span])
+    return Section(kind=kind, text=text, confidence=confidence,
+                   heading_level=heading_level, lines=[line],
+                   font_size=font_size)
+
+
+class TestDetectBodySizeProsePreference:
+    """`_detect_body_size` prefers prose over monospace on code-heavy papers."""
+
+    def test_prose_wins_over_monospace_majority(self):
+        """Prose beats a more frequent monospace size when it clears the floor."""
+        prose = [_mk_section("prose line " + ("x" * 60), font_size=11.0)
+                 for _ in range(10)]
+        code = [_mk_section("code line " + ("y" * 60), font_size=9.0,
+                             monospace=True) for _ in range(30)]
+        body = _detect_body_size(prose + code)
+        assert body == 11.0, (
+            "prose font should be picked as body even when monospace spans "
+            "hold more characters overall"
+        )
+
+    def test_fallback_to_all_when_prose_too_small(self):
+        """When prose is scarce, the overall most-common size wins (wording papers)."""
+        tiny_prose = [_mk_section("hi", font_size=11.0)]  # <<500 chars
+        code = [_mk_section("code " + ("y" * 200), font_size=9.0,
+                             monospace=True) for _ in range(10)]
+        body = _detect_body_size(tiny_prose + code)
+        assert body == 9.0, (
+            "with insufficient prose, body falls back to the most common size"
+        )
+
+    def test_empty_sections_fall_back(self):
+        """No data at all returns the FALLBACK_BODY_SIZE."""
+        from lib.pdf.types import FALLBACK_BODY_SIZE
+        assert _detect_body_size([]) == FALLBACK_BODY_SIZE
+
+
+class TestHeadingProseLengthRejection:
+    """Long numbered first lines don't become headings at LOW confidence."""
+
+    def test_long_numbered_line_demoted(self):
+        """A numbered prose line with >12 words and no font/bold signal is a paragraph."""
+        long_line = ("1 A fiber is a single flow of control with a "
+                     "private stack and an associated execution context.")
+        sec = _mk_section(long_line, font_size=10.0)
+        # Flank it with body text at the same size so `_detect_body_size`
+        # agrees that 10.0 is body and no font-level signal fires.
+        body_fill = [_mk_section("ordinary body " + ("x" * 80), font_size=10.0)
+                     for _ in range(10)]
+        _, result = structure_sections(body_fill + [sec], has_title=True)
+        demoted = [s for s in result if long_line in s.text]
+        assert demoted, "expected the long numbered line to appear in output"
+        assert all(s.kind != SectionKind.HEADING for s in demoted), (
+            "prose-length first line should not become a heading at LOW conf"
+        )
+
+    def test_long_numbered_line_kept_with_font_signal(self):
+        """A long numbered line at a heading font size is preserved as a heading."""
+        long_title = ("1 A fiber is a single flow of control with a "
+                      "private stack and an associated execution context")
+        heading = _mk_section(long_title, font_size=14.0)
+        body_fill = [_mk_section("plain body " + ("x" * 80), font_size=10.0)
+                     for _ in range(10)]
+        _, result = structure_sections(body_fill + [heading], has_title=True)
+        matches = [s for s in result if long_title in s.text]
+        assert matches and any(s.kind == SectionKind.HEADING for s in matches), (
+            "long numbered line at heading font size must stay a HEADING "
+            "(MEDIUM or HIGH confidence survives the length cap)"
+        )
+
+
+class TestDemoteRepeatedLowConfidenceNumbers:
+    """Paragraph-number resets collapse to PARAGRAPH; TOC/body pairs do not."""
+
+    def _heading(self, num, text, *, confidence=Confidence.LOW):
+        return _mk_section(f"{num} {text}", font_size=10.0,
+                            kind=SectionKind.HEADING,
+                            confidence=confidence, heading_level=2)
+
+    def test_three_repeats_demoted(self):
+        """section_num repeating >=3 times at LOW confidence becomes PARAGRAPH."""
+        sections = [
+            self._heading("1", "Constraints: first"),
+            self._heading("2", "Mandates: one"),
+            self._heading("1", "Preconditions: second"),
+            self._heading("1", "Effects: third"),
+        ]
+        _demote_repeated_low_confidence_numbers(sections)
+        ones = [s for s in sections if s.text.startswith("1 ")]
+        assert all(s.kind == SectionKind.PARAGRAPH for s in ones), (
+            "three or more occurrences of number '1' at LOW conf should demote"
+        )
+        assert all(s.heading_level == 0 for s in ones)
+
+    def test_two_repeats_preserved(self):
+        """A TOC/body pair (count == 2) is NOT demoted."""
+        sections = [
+            self._heading("1", "Introduction"),
+            self._heading("1", "Introduction"),  # second copy (body)
+        ]
+        _demote_repeated_low_confidence_numbers(sections)
+        assert all(s.kind == SectionKind.HEADING for s in sections), (
+            "pair-count (TOC + body) should be left alone"
+        )
+
+    def test_medium_confidence_not_demoted(self):
+        """MEDIUM/HIGH confidence headings are never touched, even if they repeat."""
+        sections = [
+            self._heading("1", "a", confidence=Confidence.MEDIUM),
+            self._heading("1", "b", confidence=Confidence.MEDIUM),
+            self._heading("1", "c", confidence=Confidence.MEDIUM),
+        ]
+        _demote_repeated_low_confidence_numbers(sections)
+        assert all(s.kind == SectionKind.HEADING for s in sections)
+
+    def test_demoted_confidence_left_low(self):
+        """Pins current behavior: demoted sections keep Confidence.LOW.
+
+        Not currently observed as a problem; documented in
+        issues/pr9-review.md. Any change to the demotion path should
+        consciously revisit this pin.
+        """
+        sections = [
+            self._heading("1", "first"),
+            self._heading("1", "second"),
+            self._heading("1", "third"),
+        ]
+        _demote_repeated_low_confidence_numbers(sections)
+        assert all(s.confidence == Confidence.LOW for s in sections), (
+            "demoted paragraphs currently retain their heading's LOW "
+            "confidence; update issues/pr9-review.md if this changes"
+        )
+
+
+class TestValidateNestingSiblingClamp:
+    """Same-font-size consecutive headings are treated as siblings."""
+
+    def _h(self, text, *, level, fs):
+        return _mk_section(text, font_size=fs,
+                            kind=SectionKind.HEADING,
+                            confidence=Confidence.HIGH,
+                            heading_level=level)
+
+    def test_sibling_run_stays_flat(self):
+        """A long run of same-font revision headings doesn't cascade."""
+        sections = [
+            self._h("## Changes", level=2, fs=14.0),
+            self._h("### Changes since P21", level=3, fs=12.0),
+            # Each of the following originally got level = prev_clamped + 1.
+            # Sibling logic pins them all to level 3.
+            self._h("#### Changes since P20", level=4, fs=12.0),
+            self._h("##### Changes since P19", level=5, fs=12.0),
+            self._h("###### Changes since P18", level=6, fs=12.0),
+        ]
+        _validate_nesting(sections)
+        levels = [s.heading_level for s in sections]
+        assert levels == [2, 3, 3, 3, 3], (
+            f"expected runs of same-font siblings at level 3; got {levels}"
+        )
+
+    def test_sibling_downgrades_high_to_medium(self):
+        """When the sibling rule fires, HIGH confidence drops to MEDIUM."""
+        sections = [
+            self._h("## Root", level=2, fs=14.0),
+            self._h("### Child", level=3, fs=12.0),
+            self._h("#### Grandchild", level=4, fs=12.0),
+        ]
+        _validate_nesting(sections)
+        gc = sections[-1]
+        assert gc.heading_level == 3
+        assert gc.confidence == Confidence.MEDIUM
+
+    def test_different_font_allows_nesting(self):
+        """Truly different font sizes still pass through the skip-level rule."""
+        sections = [
+            self._h("## Root", level=2, fs=14.0),
+            self._h("### Child", level=3, fs=12.0),
+            self._h("#### Grandchild", level=4, fs=10.0),
+        ]
+        _validate_nesting(sections)
+        assert [s.heading_level for s in sections] == [2, 3, 4]
+
+    def test_current_behavior_flattens_legit_nesting_same_font(self):
+        """Pins the risk documented in issues/pr9-review.md.
+
+        Papers that express depth through section numbering while using
+        one font size for all sub-levels will have legitimate h4s clamped
+        to h3. This test encodes the CURRENT behavior so that any future
+        change (e.g. letting section-number depth veto the sibling rule)
+        is noticed in review.
+        """
+        sections = [
+            self._h("## 2 Motivation", level=2, fs=14.0),
+            self._h("### 2.1 Background", level=3, fs=12.0),
+            self._h("#### 2.1.1 History", level=4, fs=12.0),
+        ]
+        _validate_nesting(sections)
+        assert sections[-1].heading_level == 3, (
+            "currently flattens 2.1.1 to h3; update issues/pr9-review.md "
+            "if section-number depth is made to veto the sibling rule"
+        )
+
+    def test_tight_font_tolerance_misses_fractional_variance(self):
+        """Pins the risk documented in issues/pr9-review.md.
+
+        `_SIBLING_FONT_TOL = 0.1` rejects sibling status between 11.7 and
+        12.0 (diff 0.3), which is within the fractional variance common
+        in LaTeX PDFs. Encodes CURRENT behavior.
+        """
+        sections = [
+            self._h("## Root", level=2, fs=14.0),
+            self._h("### Sibling A", level=3, fs=12.0),
+            # fs = 11.7 is > 0.1 away from 12.0, so NOT a sibling;
+            # skip-level rule clamps level 5 -> 4 (prev + 1).
+            self._h("##### Sibling B", level=5, fs=11.7),
+        ]
+        _validate_nesting(sections)
+        assert sections[-1].heading_level == 4, (
+            "current absolute 0.1 tolerance treats 11.7 and 12.0 as "
+            "different tiers; widening the tolerance would change this "
+            "assertion — see issues/pr9-review.md"
+        )
+
+    def test_close_fractional_is_sibling(self):
+        """Within the 0.1 tolerance, sizes like 11.95 vs 12.0 are siblings."""
+        sections = [
+            self._h("## Root", level=2, fs=14.0),
+            self._h("### Sibling A", level=3, fs=12.0),
+            self._h("#### Sibling B", level=4, fs=11.95),
+        ]
+        _validate_nesting(sections)
+        assert sections[-1].heading_level == 3