From cb0ae01dcff8b685e8c7cf8ca3cde9c4ef856f5f Mon Sep 17 00:00:00 2001
From: David Foster <david@dafoster.net>
Date: Wed, 3 Jun 2026 12:31:42 -0400
Subject: [PATCH 1/8] misc/perf_compare.py: Add options/behaviors to reduce
 measured variance

Specifically:
* Median is reported, in addition to the existing mean+stdev, which is
  significantly more resistant to skew by outliers.
* --metric {wall,cpu} (default wall): Enables profiling using CPU time
  rather than wall-clock time. CPU profiling has roughly half the coefficient
  of variation as wall-clock profiling equal run count.
* --workers1: Forces MYPY_NUM_WORKERS=1 (rather than the default 4) to
  cut CPU scheduling variance. Strongly recommended when using --metric cpu.
* --warmup-runs N (default 1): Configurable number of leading cold runs to discard.
  Previously was always 1. Higher run counts decrease outliers that skew
  the reported mean.
* A new "Paired deltas vs <first commit>" section is added to the report,
  showing per-round paired differencing against the first commit
  to cancel round-level common-mode noise, reducing variance.
  Reported as median +/-95% CI.

Also:
* --cache-binaries (default false): Caches each commit's compiled clone
  to avoid ~5min recompile whenever comparing the same commit multiple times.

Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
---
 .gitignore           |   3 +
 misc/perf_compare.py | 207 ++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 187 insertions(+), 23 deletions(-)
diff --git a/.gitignore b/.gitignore
index 9c325f3e29f8a..e9954a807d317 100644
--- a/.gitignore
+++ b/.gitignore
@@ -60,3 +60,6 @@ test_capi
 test_capi
 /mypyc/lib-rt/build/
 /mypyc/lib-rt/*.so
+
+# perf_compare.py --cache-binaries cache
+/misc/perf_compare/
diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index aa05270a8c00f..d6f140818f141 100755
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -23,12 +23,50 @@
 import glob
 import os
 import random
+import resource
 import shutil
 import statistics
 import subprocess
 import sys
 import time
+from collections.abc import Callable
 from concurrent.futures import ThreadPoolExecutor, as_completed
+from typing import Any
+
+
+def winsorized_paired_stats(
+    diffs: list[float], *, trim_frac: float = 0.1, conf: float = 0.95
+) -> dict[str, float]:
+    """Robust summary of a list of per-round paired differences.
+
+    Point estimate: trimmed mean (drop ``trim_frac`` of values from each end), so a
+    single outlier round cannot drag the estimate.
+
+    Error bar: the Tukey-McLaughlin standard error of the trimmed mean, built from the
+    *Winsorized* variance. The tails are clamped to the boundary kept-value rather than
+    deleted -- deleting them and taking the ordinary variance of the survivors would
+    understate the error bar (it would measure only how calm the middle is, discarding
+    the fact that the tails were wild). The ``(1 - 2*trim_frac)`` divisor rescales for
+    the compression Winsorizing introduces.
+
+    Returns trimmed-mean estimate, median, the 95% CI half-width, and the kept count.
+    A normal-approx critical value is used (fine for the n>=~30 runs this is used with).
+    """
+    n = len(diffs)
+    s = sorted(diffs)
+    g = int(n * trim_frac)  # number trimmed from each end
+    median = statistics.median(s)
+    if n < 2 or n - 2 * g < 2:
+        est = statistics.mean(s)
+        return {"est": est, "median": median, "ci": 0.0, "kept": float(n)}
+    kept = s[g : n - g]
+    est = statistics.mean(kept)
+    # Winsorize: clamp the g smallest up to kept[0], the g largest down to kept[-1].
+    wins = [kept[0]] * g + kept + [kept[-1]] * g
+    wvar = statistics.variance(wins)  # sample Winsorized variance (df = n-1)
+    se = (wvar**0.5) / ((1 - 2 * trim_frac) * (n**0.5))
+    z = statistics.NormalDist().inv_cdf(0.5 + conf / 2)
+    return {"est": est, "median": median, "ci": z * se, "kept": float(len(kept))}
 
 
 def heading(s: str) -> None:
@@ -81,7 +119,14 @@ def edit_python_file(fnam: str) -> None:
 
 
 def run_benchmark(
-    compiled_dir: str, check_dir: str, *, incremental: bool, code: str | None, foreign: bool | None
+    compiled_dir: str,
+    check_dir: str,
+    *,
+    incremental: bool,
+    code: str | None,
+    foreign: bool | None,
+    metric: str = "wall",
+    workers1: bool = False,
 ) -> float:
     cache_dir = os.path.join(compiled_dir, ".mypy_cache")
     if os.path.isdir(cache_dir) and not incremental:
@@ -89,6 +134,8 @@ def run_benchmark(
     env = os.environ.copy()
     env["PYTHONPATH"] = os.path.abspath(compiled_dir)
     env["PYTHONHASHSEED"] = "1"
+    if workers1:
+        env["MYPY_NUM_WORKERS"] = "1"
     abschk = os.path.abspath(check_dir)
     cmd = [sys.executable, "-m", "mypy"]
     if code:
@@ -103,13 +150,26 @@ def run_benchmark(
             # Update a few files to force non-trivial incremental run
             edit_python_file(os.path.join(abschk, "mypy/__main__.py"))
             edit_python_file(os.path.join(abschk, "mypy/test/testcheck.py"))
-    t0 = time.time()
+    stopwatch_func: Callable[[], Any]
+    delta_func: Callable[[Any, Any], Any]
+    if metric == "wall":
+        stopwatch_func = lambda: time.time()
+        delta_func = lambda t0, t1: t1 - t0
+    elif metric == "cpu":
+        # NOTE: CPU time (user+sys) is far less sensitive than wall-clock to
+        #       background interference
+        stopwatch_func = lambda: resource.getrusage(resource.RUSAGE_CHILDREN)
+        delta_func = lambda r0, r1: (r1.ru_utime - r0.ru_utime) + (r1.ru_stime - r0.ru_stime)
+    else:
+        raise AssertionError(f"Unrecognized metric: {metric!r}")
+    v0 = stopwatch_func()  # capture
     # Ignore errors, since some commits being measured may generate additional errors.
     if foreign:
         subprocess.run(cmd, cwd=check_dir, env=env)
     else:
         subprocess.run(cmd, cwd=compiled_dir, env=env)
-    return time.time() - t0
+    v1 = stopwatch_func()  # capture
+    return delta_func(v0, v1)
 
 
 def main() -> None:
@@ -145,6 +205,41 @@ def main() -> None:
         type=int,
         help="set number of measurements to perform (default=15)",
     )
+    parser.add_argument(
+        "--warmup-runs",
+        metavar="N",
+        default=1,
+        type=int,
+        help="set number of leading warmup runs to discard (default=1)",
+    )
+    parser.add_argument(
+        "--cache-binaries",
+        default=False,
+        action="store_true",
+        help="cache each commit's compiled clone under "
+        + "<script_dir>/perf_compare/binaries/<commit> and restore from there on later runs, "
+        + "skipping the ~5-min clone+compile. Off by default so it doesn't silently consume "
+        + "disk. Caveat: the cache is keyed by the commit string you pass, so reuse stable SHAs "
+        + "(a moving ref like a branch name or HEAD can serve a stale build -- delete the cache "
+        + "dir if in doubt).",
+    )
+    parser.add_argument(
+        "--metric",
+        choices=["wall", "cpu"],
+        default="wall",
+        help="quantity to measure per run: 'wall' (wall-clock, default) or 'cpu' (user+sys "
+        + "CPU time of the type-check process). 'cpu' is much less sensitive to background "
+        + "interference and scheduling, so it tightens the per-run distribution.",
+    )
+    parser.add_argument(
+        "--workers1",
+        default=False,
+        action="store_true",
+        help="run selfcheck with a single mypy worker (MYPY_NUM_WORKERS=1) to "
+        + "decrease variance in measurements. "
+        + "Strongly recommended when --metric=cpu. "
+        + "When omitted, uses mypy's default worker count.",
+    )
     parser.add_argument(
         "-j",
         metavar="N",
@@ -178,20 +273,39 @@ def main() -> None:
     dont_setup: bool = args.dont_setup
     multi_file: bool = args.multi_file
     commits = args.commit
-    num_runs: int = args.num_runs + 1
+    baseline_commit: str = commits[0]
+    warmup_runs: int = args.warmup_runs
+    measurement_runs: int = args.num_runs
+    num_runs: int = measurement_runs + warmup_runs
     max_workers: int = args.j
     code: str | None = args.c
     foreign_repo: str | None = args.r
+    metric: str = args.metric
+    workers1: bool = args.workers1
+    cache_binaries: bool = args.cache_binaries
 
     if not (os.path.isdir(".git") and os.path.isdir("mypyc")):
         sys.exit("error: You must run this script from the mypy repo root")
 
+    archive_root = os.path.join(
+        os.path.dirname(os.path.abspath(__file__)), "perf_compare", "binaries"
+    )
+
     target_dirs = []
+    dirs_to_compile = []
     for i, commit in enumerate(commits):
         target_dir = f"mypy.{i}.tmpdir"
         target_dirs.append(target_dir)
         if not dont_setup:
-            clone(target_dir, commit)
+            archive = os.path.join(archive_root, commit)
+            if cache_binaries and os.path.isdir(archive):
+                print(f"restore: copying {archive} -> {target_dir} (skipping clone+compile)")
+                if os.path.isdir(target_dir):
+                    shutil.rmtree(target_dir)
+                shutil.copytree(archive, target_dir, symlinks=True)
+            else:
+                clone(target_dir, commit)
+                dirs_to_compile.append(target_dir)
 
     if foreign_repo:
         check_dir = "mypy.foreign.tmpdir"
@@ -202,27 +316,32 @@ def main() -> None:
         if not dont_setup:
             clone(check_dir, commits[0])
 
-    if not dont_setup:
+    if not dont_setup and dirs_to_compile:
         heading("Compiling mypy")
         print("(This will take a while...)")
 
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
             futures = [
-                executor.submit(build_mypy, target_dir, multi_file) for target_dir in target_dirs
+                executor.submit(build_mypy, target_dir, multi_file)
+                for target_dir in dirs_to_compile
             ]
             for future in as_completed(futures):
                 future.result()
 
-        print(f"Finished compiling mypy ({len(commits)} builds)")
+        print(f"Finished compiling mypy ({len(dirs_to_compile)} builds)")
+    elif not dont_setup:
+        print("All targets restored from archive; skipping compile step.")
 
-    heading("Performing measurements")
+    workers_desc = "workers: 1" if workers1 else "workers: default"
+    key_options_desc = f"(metric: {metric}-time, {workers_desc})"
+    heading(f"Performing measurements {key_options_desc}")
 
     results: dict[str, list[float]] = {}
     for n in range(num_runs):
-        if n == 0:
-            print("Warmup...")
+        if n < warmup_runs:
+            print(f"Warmup {n + 1}/{warmup_runs}...")
         else:
-            print(f"Run {n}/{num_runs - 1}...")
+            print(f"Run {n - warmup_runs + 1}/{num_runs - warmup_runs}...")
         items = list(enumerate(commits))
         random.shuffle(items)
         for i, commit in items:
@@ -232,26 +351,56 @@ def main() -> None:
                 incremental=incremental,
                 code=code,
                 foreign=bool(foreign_repo),
+                metric=metric,
+                workers1=workers1,
             )
-            # Don't record the first warm-up run
-            if n > 0:
+            # Don't record the leading warm-up runs
+            if n >= warmup_runs:
                 print(f"{commit}: t={tt:.3f}s")
                 results.setdefault(commit, []).append(tt)
 
     print()
-    heading("Results")
-    first = -1.0
+    heading(f"Results {key_options_desc}")
+    first_mean = -1.0
+    first_median = -1.0
     for commit in commits:
-        tt = statistics.mean(results[commit])
+        mean = statistics.mean(results[commit])
+        median = statistics.median(results[commit])
         # pstdev (instead of stdev) is used here primarily to accommodate the case where num_runs=1
         s = statistics.pstdev(results[commit]) if len(results[commit]) > 1 else 0
-        if first < 0:
-            delta = "0.0%"
-            first = tt
+        if first_mean < 0:
+            delta_mean = "0.0%"
+            first_mean = mean
+            delta_median = "0.0%"
+            first_median = median
         else:
-            d = (tt / first) - 1
-            delta = f"{d:+.1%}"
-        print(f"{commit:<25} {tt:.3f}s ({delta}) | stdev {s:.3f}s ")
+            d1 = (mean / first_mean) - 1
+            delta_mean = f"{d1:+.1%}"
+            d2 = (median / first_median) - 1
+            delta_median = f"{d2:+.1%}"
+        print(
+            f"{commit:<25} mean {mean:.3f}s ({delta_mean}) | stdev {s:.3f}s | "
+            f"median {median:.3f}s ({delta_median})"
+        )
+
+    # Paired per-round differences vs the baseline commit. Each round runs every commit
+    # once, so results[commit][k] is round k for every commit -- the differences are
+    # already matched. Differencing cancels round-level common-mode noise (a throttle or
+    # background-process spike that round slows every commit together), which is the bulk
+    # of the variance on a laptop. See winsorized_paired_stats for the robust estimator.
+    base_runs = results[baseline_commit]
+    base_center = statistics.median(base_runs)
+    heading(f"Paired deltas vs {baseline_commit} (per-round diffs; median +/- 95% CI)")
+    for commit in commits:
+        if commit == baseline_commit:
+            print(f"{commit:<25} baseline")
+            continue
+        diffs = [c - b for c, b in zip(results[commit], base_runs)]
+        st = winsorized_paired_stats(diffs)
+        ci_ms = st["ci"] * 1000
+        median_ms = st["median"] * 1000
+        pct = (st["median"] / base_center * 100) if base_center else 0.0
+        print(f"{commit:<25} median {median_ms:+7.1f}ms  +/-{ci_ms:4.1f}  ({pct:+.2f}%)")
 
     t = int(time.time() - whole_program_time_0)
     total_time_taken_formatted = ", ".join(
@@ -264,6 +413,18 @@ def main() -> None:
         total_time_taken_formatted,
     )
 
+    # Archive compiled clones before cleanup, keyed by commit, so later runs can
+    # restore them instead of recompiling. Skip if destination already exists.
+    if cache_binaries:
+        os.makedirs(archive_root, exist_ok=True)
+        for target_dir, commit in zip(target_dirs, commits):
+            dest = os.path.join(archive_root, commit)
+            if os.path.isdir(dest):
+                print(f"archive: {dest} already exists, skipping")
+            else:
+                print(f"archive: copying {target_dir} -> {dest}")
+                shutil.copytree(target_dir, dest, symlinks=True)
+
     shutil.rmtree(check_dir)
     for target_dir in target_dirs:
         shutil.rmtree(target_dir)

From 208f9b62fbda19cf535640ca38a8523bcc3cdf8b Mon Sep 17 00:00:00 2001
From: David Foster <david@dafoster.net>
Date: Wed, 3 Jun 2026 12:34:25 -0400
Subject: [PATCH 2/8] TypeForm: Add instrumentation of full parses done in
 semanal.py's try_parse_as_type_expression()

Specifically:

- If you set MYPY_TYPEFORM_PROFILE_FULL_PARSE environment variable,
  mypy will output a .tsv to that filepath which characterizes the
  kinds of Expressions that try_parse_as_type_expression() in semanal.py
  was forced to do a full parse of, which was not rejected early.

- A misc/analyze_typeform_full_parse_profile.py script is added which
  takes those .tsvs and prints an expression-time summary (by total time)
  plus top-N descriptors per FAIL class.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 misc/analyze_typeform_full_parse_profile.py | 146 ++++++++++++++++++++
 misc/analyze_typeform_stats.py              |   2 +-
 mypy/semanal.py                             | 125 ++++++++++++++---
 3 files changed, 256 insertions(+), 17 deletions(-)
 create mode 100644 misc/analyze_typeform_full_parse_profile.py

diff --git a/misc/analyze_typeform_full_parse_profile.py b/misc/analyze_typeform_full_parse_profile.py
new file mode 100644
index 0000000000000..35001413b3f9d
--- /dev/null
+++ b/misc/analyze_typeform_full_parse_profile.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+"""
+Aggregate the full-parse profile log produced by mypy's
+SemanticAnalyzer.try_parse_as_type_expression() when run with
+MYPY_TYPEFORM_PROFILE_FULL_PARSE set.
+
+Usage:
+    # 1. Run mypy with the profile env var set; per-PID log files are
+    #    written as "<path>.<pid>":
+    MYPY_TYPEFORM_PROFILE_FULL_PARSE=/tmp/tf.log \\
+        python3 -m mypy --no-incremental -p your_package
+
+    # 2. Aggregate one or more per-PID files:
+    python3 misc/analyze_typeform_full_parse_profile.py /tmp/tf.log.*
+
+    # Optional: limit per-descriptor breakdown to top N rows per class.
+    python3 misc/analyze_typeform_full_parse_profile.py --top 20 /tmp/tf.log.*
+
+The script summarizes which (outcome, kind, subkind) classes account for
+the most full-parse time, and lists the top descriptors within each
+FAIL class -- the populations worth targeting with cheaper pre-filters
+upstream in try_parse_as_type_expression.
+
+See also:
+    - mypy/semanal.py: SemanticAnalyzer.try_parse_as_type_expression()
+    - mypy/semanal.py: _log_typeform_full_parse() (TSV schema docstring)
+    - misc/analyze_typeform_stats.py (aggregate counters via --dump-build-stats)
+"""
+
+from __future__ import annotations
+
+import argparse
+import statistics
+import sys
+from collections import defaultdict
+from collections.abc import Iterable
+
+
+def read_rows(paths: Iterable[str]) -> list[tuple[str, str, str, str, int]]:
+    rows: list[tuple[str, str, str, str, int]] = []
+    for path in paths:
+        with open(path) as f:
+            for line in f:
+                # Skip header lines (each per-PID file starts with one).
+                if line.startswith("outcome\t"):
+                    continue
+                parts = line.rstrip("\n").split("\t")
+                if len(parts) < 5:
+                    continue
+                outcome, kind, subkind, desc, dur_ns_str = parts[:5]
+                try:
+                    dur_ns = int(dur_ns_str)
+                except ValueError:
+                    continue
+                rows.append((outcome, kind, subkind, desc, dur_ns))
+    return rows
+
+
+def print_class_summary(rows: list[tuple[str, str, str, str, int]]) -> None:
+    buckets: dict[tuple[str, str, str], list[int]] = defaultdict(list)
+    total_ns = 0
+    for outcome, kind, subkind, _desc, dur_ns in rows:
+        buckets[(outcome, kind, subkind)].append(dur_ns)
+        total_ns += dur_ns
+
+    print("Class summary (by total time):")
+    print("=" * 80)
+    print(f"{'count':>7} {'total_ms':>10} {'mean_us':>9} {'med_us':>9} {'pct':>6}  class")
+    print("-" * 80)
+    ordered = sorted(
+        (
+            (sum(d), len(d), statistics.mean(d), statistics.median(d), key)
+            for key, d in buckets.items()
+        ),
+        reverse=True,
+    )
+    for total, n, mean, med, key in ordered:
+        pct = (100 * total / total_ns) if total_ns else 0
+        outcome, kind, subkind = key
+        print(
+            f"{n:>7} {total/1e6:>10.2f} {mean/1e3:>9.1f} {med/1e3:>9.1f} "
+            f"{pct:>5.1f}%  {outcome} {kind} {subkind}"
+        )
+    print("-" * 80)
+    print(f"TOTAL: {len(rows):,} events, {total_ns/1e6:.2f} ms")
+
+
+def print_fail_descriptors(rows: list[tuple[str, str, str, str, int]], top_n: int) -> None:
+    # Group FAIL rows by (kind, subkind) class, then by descriptor within each.
+    by_class: dict[tuple[str, str, str], dict[str, list[int]]] = defaultdict(
+        lambda: defaultdict(list)
+    )
+    for outcome, kind, subkind, desc, dur_ns in rows:
+        if outcome != "FAIL":
+            continue
+        by_class[(outcome, kind, subkind)][desc].append(dur_ns)
+
+    # Order classes by total FAIL time, descending.
+    class_totals = sorted(
+        ((sum(sum(d) for d in descs.values()), key, descs) for key, descs in by_class.items()),
+        reverse=True,
+    )
+    for total_ns, key, descs in class_totals:
+        outcome, kind, subkind = key
+        print()
+        print(
+            f"Top {top_n} descriptors in {outcome} {kind} {subkind} "
+            f"(class total {total_ns/1e6:.2f} ms):"
+        )
+        print("-" * 80)
+        print(f"{'count':>6} {'total_ms':>10} {'mean_us':>9}  descriptor")
+        rows_d = sorted(
+            ((sum(d), len(d), statistics.mean(d), desc) for desc, d in descs.items()), reverse=True
+        )
+        for tot, n, mean, desc in rows_d[:top_n]:
+            print(f"{n:>6} {tot/1e6:>10.3f} {mean/1e3:>9.1f}  {desc!r}")
+        if len(rows_d) > top_n:
+            print(f"... {len(rows_d) - top_n} more descriptors")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__
+    )
+    parser.add_argument(
+        "files", nargs="+", help="One or more per-PID profile files (e.g. /tmp/tf.log.*)"
+    )
+    parser.add_argument(
+        "--top",
+        type=int,
+        default=20,
+        help="Max number of descriptors to list per FAIL class (default: 20)",
+    )
+    args = parser.parse_args()
+
+    rows = read_rows(args.files)
+    if not rows:
+        print("No data rows found in input files.", file=sys.stderr)
+        sys.exit(1)
+
+    print_class_summary(rows)
+    print_fail_descriptors(rows, args.top)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/misc/analyze_typeform_stats.py b/misc/analyze_typeform_stats.py
index 0a540610bc620..17368f6247e53 100644
--- a/misc/analyze_typeform_stats.py
+++ b/misc/analyze_typeform_stats.py
@@ -26,7 +26,6 @@
 
 See also:
     - mypy/semanal.py: SemanticAnalyzer.try_parse_as_type_expression()
-    - mypy/semanal.py: DEBUG_TYPE_EXPRESSION_FULL_PARSE_FAILURES
 """
 
 import re
@@ -70,6 +69,7 @@ def analyze_stats(output: str) -> None:
         print(
             f"  - Expensive failed full parses: {failures:,} ({(failures / total * 100):.1f}% of all calls)"
         )
+        print("    - Analyze further with misc/analyze_typeform_full_parse_profile.py")
 
 
 if __name__ == "__main__":
diff --git a/mypy/semanal.py b/mypy/semanal.py
index e010273b0781f..c86f04efb581a 100644
--- a/mypy/semanal.py
+++ b/mypy/semanal.py
@@ -50,10 +50,12 @@
 
 from __future__ import annotations
 
+import os
 import re
+import time
 from collections.abc import Callable, Collection, Iterable, Iterator
 from contextlib import contextmanager
-from typing import Any, Final, TypeAlias as _TypeAlias, TypeGuard, TypeVar, cast
+from typing import Any, Final, TextIO, TypeAlias as _TypeAlias, TypeGuard, TypeVar, cast
 from typing_extensions import assert_never
 
 from mypy import errorcodes as codes, message_registry
@@ -320,11 +322,18 @@
 T = TypeVar("T")
 
 
-# Whether to print diagnostic information for failed full parses
-# in SemanticAnalyzer.try_parse_as_type_expression().
+# Instrumentation: If non-None, every expression that reaches the expensive
+# full-parse block of SemanticAnalyzer.try_parse_as_type_expression()
+# is logged to a .tsv by log_typeform_full_parse().
 #
-# See also: misc/analyze_typeform_stats.py
-DEBUG_TYPE_EXPRESSION_FULL_PARSE_FAILURES: Final = False
+# See also:
+# - misc/analyze_typeform_full_parse_profile.py
+# - misc/analyze_typeform_stats.py
+_TYPEFORM_PROFILE_FULL_PARSE_PATH: Final = os.environ.get("MYPY_TYPEFORM_PROFILE_FULL_PARSE")
+_typeform_full_parse_log_file: TextIO | None = None
+
+# TSV column names for the full-parse profile log
+_TYPEFORM_PROFILE_FULL_PARSE_HEADER = "outcome\tkind\tsubkind\tdescriptor\tdur_ns\n"
 
 
 FUTURE_IMPORTS: Final = {
@@ -8164,6 +8173,9 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None:
         else:
             assert_never(maybe_type_expr)
 
+        full_parse_t0 = (
+            time.perf_counter_ns() if _TYPEFORM_PROFILE_FULL_PARSE_PATH is not None else 0
+        )
         with self.isolated_error_analysis():
             try:
                 t = self.expr_to_analyzed_type(maybe_type_expr)
@@ -8173,17 +8185,6 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None:
                 # Not a type expression
                 t = None
 
-            if DEBUG_TYPE_EXPRESSION_FULL_PARSE_FAILURES and t is None:
-                original_flushed_files = set(self.errors.flushed_files)  # save
-                try:
-                    errors = self.errors.new_messages()  # capture
-                finally:
-                    self.errors.flushed_files = original_flushed_files  # restore
-
-                print(
-                    f"SA.try_parse_as_type_expression: Full parse failure: {maybe_type_expr}, errors={errors!r}"
-                )
-
         # Count full parse attempts for profiling
         if t is not None:
             self.type_expression_full_parse_success_count += 1
@@ -8192,6 +8193,12 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None:
 
         maybe_type_expr.as_type = t
 
+        if _TYPEFORM_PROFILE_FULL_PARSE_PATH is not None:
+            full_parse_t1 = time.perf_counter_ns()
+            self.log_typeform_full_parse(
+                maybe_type_expr, t is not None, full_parse_t1 - full_parse_t0
+            )
+
     @staticmethod
     def var_is_typing_special_form(var: Var) -> bool:
         return var.fullname.startswith("typing") and var.fullname in [
@@ -8208,6 +8215,92 @@ def var_is_typing_special_form(var: Var) -> bool:
             "typing.Union",
         ]
 
+    @staticmethod
+    def log_typeform_full_parse(expr: Expression, ok: bool, dur_ns: int) -> None:
+        """Log one entry into the full-parse block of try_parse_as_type_expression.
+
+        Active only when the MYPY_TYPEFORM_PROFILE_FULL_PARSE environment variable
+        is set to a file path. Each mypy process (worker) writes to its own file
+        named "<path>.<pid>" to avoid contention; concatenating those files yields
+        the complete profile. Aggregate with misc/analyze_typeform_full_parse_profile.py.
+
+        Output is tab-separated with one row per full-parse attempt:
+
+        outcome     "OK" if as_type was set, "FAIL" if the full parse rejected
+                    the expression (either by raising TypeTranslationError or by
+                    emitting errors during analysis).
+        kind        AST node kind: StrExpr | IndexExpr | OpExpr | (other).
+        subkind     For StrExpr: "ident", "dotident", or "other" (based on the
+                    string's shape). For IndexExpr: "Name" or "Member" (base
+                    kind). For OpExpr: always "|" (no other op reaches here).
+        descriptor  Short, type-specific identifier for the expression:
+                        StrExpr   -> the string value, truncated to 80 chars
+                                    (with " (N)" suffix when truncated).
+                        IndexExpr -> the full stringified expression (str(expr),
+                                    with tabs/newlines escaped).
+                        OpExpr    -> the full stringified expression (str(expr),
+                                    with tabs/newlines escaped).
+        dur_ns      Wall-clock nanoseconds spent in the full-parse block for
+                    this expression (measured around expr_to_analyzed_type
+                    plus the surrounding isolated_error_analysis ctx).
+
+        The first line of each file is the column header (same as above).
+        """
+        global _typeform_full_parse_log_file
+        if _typeform_full_parse_log_file is None:
+            assert _TYPEFORM_PROFILE_FULL_PARSE_PATH is not None
+            _typeform_full_parse_log_file = open(
+                f"{_TYPEFORM_PROFILE_FULL_PARSE_PATH}.{os.getpid()}", "a", buffering=1
+            )
+            _typeform_full_parse_log_file.write(_TYPEFORM_PROFILE_FULL_PARSE_HEADER)
+        outcome = "OK" if ok else "FAIL"
+        if isinstance(expr, StrExpr):
+            raw = expr.value
+            val = (
+                raw[:80]
+                .replace("\\", "\\\\")
+                .replace("\t", "\\t")
+                .replace("\n", "\\n")
+                .replace("\r", "\\r")
+            )
+            if len(raw) > 80:
+                val += f" ({len(raw)})"
+            if _IDENTIFIER_RE.fullmatch(raw):
+                subkind = "ident"
+            elif _DOTTED_IDENTIFIER_RE.fullmatch(raw):
+                subkind = "dotident"
+            else:
+                subkind = "other"
+            line = f"{outcome}\tStrExpr\t{subkind}\t{val}\t{dur_ns}\n"
+        elif isinstance(expr, IndexExpr):
+            base = expr.base
+            if isinstance(base, NameExpr):
+                subkind = "Name"
+            elif isinstance(base, MemberExpr):
+                subkind = "Member"
+            else:
+                subkind = type(base).__name__
+            desc = (
+                str(expr)
+                .replace("\\", "\\\\")
+                .replace("\t", "\\t")
+                .replace("\n", "\\n")
+                .replace("\r", "\\r")
+            )
+            line = f"{outcome}\tIndexExpr\t{subkind}\t{desc}\t{dur_ns}\n"
+        elif isinstance(expr, OpExpr):
+            desc = (
+                str(expr)
+                .replace("\\", "\\\\")
+                .replace("\t", "\\t")
+                .replace("\n", "\\n")
+                .replace("\r", "\\r")
+            )
+            line = f"{outcome}\tOpExpr\t|\t{desc}\t{dur_ns}\n"
+        else:
+            line = f"{outcome}\t{type(expr).__name__}\t\t\t{dur_ns}\n"
+        _typeform_full_parse_log_file.write(line)
+
     @contextmanager
     def isolated_error_analysis(self) -> Iterator[None]:
         """

From 6fba90341f00e24185cab03c5748ba110b171775 Mon Sep 17 00:00:00 2001
From: David Foster <david@dafoster.net>
Date: Wed, 3 Jun 2026 12:35:05 -0400
Subject: [PATCH 3/8] TypeForm: Add 7 more early-reject filters to semanal.py's
 try_parse_as_type_expression()

These filters reduce the mypy's wall clock slowdown when checking the
mypy codebase after the introduction of TypeForm from +2.03% to +1.21%,
when using `misc/perf_compare.py` to profile.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
---
 mypy/semanal.py | 131 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 130 insertions(+), 1 deletion(-)

diff --git a/mypy/semanal.py b/mypy/semanal.py
index c86f04efb581a..3f1d1cb7221bf 100644
--- a/mypy/semanal.py
+++ b/mypy/semanal.py
@@ -375,6 +375,52 @@
 # string literal as a type expression.
 _MULTIPLE_WORDS_NONTYPE_RE = re.compile(r'\s*[^\s.\'"|\[]+\s+[^\s.\'"|\[]')
 
+# Matches any valid Python identifier, including identifiers with Unicode characters.
+#
+# [^\d\W] = word character that is not a digit
+# \w = word character
+# \Z = match end of string; does not allow a trailing \n, unlike $
+_IDENTIFIER_RE = re.compile(r"^[^\d\W]\w*\Z", re.UNICODE)
+
+# Matches if the string contains at least one identifier-start character
+# (letter or underscore).
+_CONTAINS_IDENTIFIER_RE = re.compile(r"[^\W\d]", re.UNICODE)
+
+# Matches a dotted identifier (e.g. 'builtins.tuple', 'typing.Mapping', 'a.b.c').
+_DOTTED_IDENTIFIER_RE = re.compile(r"^[^\d\W]\w*(\.[^\d\W]\w*)+\Z", re.UNICODE)
+
+# Matches a dotted name (one or more identifier components joined by '.').
+# Accepts a bare identifier with zero dots. Used to extract every
+# dotted identifier from inside a stringified type expression.
+_CONTAINED_DOTTED_IDENTIFIER_RE = re.compile(r"[^\W\d]\w*(?:\.[^\W\d]\w*)*", re.UNICODE)
+
+# Matches several patterns that never appear in valid type expressions
+# NOTE: Allows '*' for (PEP 646 Unpack) and '+' for (Literal[+N])
+_NONTYPE_PATTERN_RE = re.compile(
+    # Characters never valid in a type expression
+    r"[!:/<>@%$^?;&~`\\]|"
+    # '-' not directly preceded by '[' (which can occur in Literal[-N])
+    # NOTE: Incorrectly rejects multi-element edge cases like Literal[-1, -2]
+    #       which appear in stringified type expressions, which are expected
+    #       to be rare in practice.
+    r"(?<!\[)-|"
+    # Leading '.' (incomplete dotted name, file extension, etc)
+    r"^\.|"
+    # Trailing '.' (incomplete dotted name, file extension, etc)
+    r"\.$"
+)
+
+# Matches if the first character of the string is invalid as the start of
+# a type expression
+_NONTYPE_FIRST_CHAR_RE = re.compile(
+    # Any non-word char other than '*' (which is reserved for PEP 646 Unpack:
+    # 'tuple[int, *Ts]') or whitespace
+    r"\A[^\s*\w]|"
+    # A digit
+    r"\A\d",
+    re.UNICODE,
+)
+
 
 class SemanticAnalyzer(
     NodeVisitor[None], SemanticAnalyzerInterface, SemanticAnalyzerPluginInterface, SplittingVisitor
@@ -8090,6 +8136,26 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None:
             return
         elif isinstance(maybe_type_expr, StrExpr):
             str_value = maybe_type_expr.value  # cache
+            # (TODO: Experiment with the ordering of all the following filters,
+            #        to frontload those most efficient at rejecting early.)
+            # Filter out string literals with no identifier-start characters
+            # (pure punctuation/digits/whitespace) which cannot be type expressions
+            if not _CONTAINS_IDENTIFIER_RE.search(str_value):
+                maybe_type_expr.as_type = None
+                return
+            # Filter out string literals whose first non-whitespace character
+            # cannot start a valid type expression (a digit, or punctuation
+            # other than '*').
+            if _NONTYPE_FIRST_CHAR_RE.match(str_value):
+                maybe_type_expr.as_type = None
+                return
+            # Filter out string literals with common patterns that could not
+            # possibly be in a type expression
+            if _MULTIPLE_WORDS_NONTYPE_RE.match(str_value):
+                # A common pattern in string literals containing a sentence.
+                # But cannot be a type expression.
+                maybe_type_expr.as_type = None
+                return
             # Filter out string literals which look like an identifier but
             # cannot be a type expression, for a few common reasons
             if str_value.isidentifier():
@@ -8116,7 +8182,40 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None:
                         # 2. unbound_paramspec: f'ParamSpec "{name}" is unbound' [codes.VALID_TYPE]
                         maybe_type_expr.as_type = None
                         return
-            else:  # does not look like an identifier
+                    if (
+                        isinstance(node, Var)
+                        and isinstance(get_proper_type(node.type), Instance)
+                        and not self.var_is_typing_special_form(node)
+                    ):
+                        # Var whose declared type is a concrete instance: it is
+                        # a value (local, parameter, module-level constant),
+                        # not a type expression.
+                        maybe_type_expr.as_type = None
+                        return
+                    if isinstance(node, (FuncDef, OverloadedFuncDef, MypyFile)):
+                        # Functions and modules are never type expressions.
+                        maybe_type_expr.as_type = None
+                        return
+            elif _DOTTED_IDENTIFIER_RE.fullmatch(str_value):
+                # Dotted-name string (e.g. "builtins.tuple", "typing.Mapping").
+                # Look up the leftmost component; if it can't possibly be a
+                # type prefix, bail. Mirrors the IndexExpr-with-MemberExpr-base
+                # filter logic below.
+                leftmost = str_value.split(".", 1)[0]
+                sym = self.lookup(leftmost, UnboundType(leftmost), suppress_errors=True)
+                if sym is None:
+                    # Leftmost component does not refer to anything in scope
+                    maybe_type_expr.as_type = None
+                    return
+                node = sym.node  # cache
+                if isinstance(node, PlaceholderNode) and not node.becomes_typeinfo:
+                    maybe_type_expr.as_type = None
+                    return
+                if isinstance(node, Var) and not self.var_is_typing_special_form(node):
+                    # Leftmost component is a Var: cannot be a type prefix
+                    maybe_type_expr.as_type = None
+                    return
+            else:  # does not look like an identifier or dotted identifier
                 if '"' in str_value or "'" in str_value:
                     # Only valid inside a Literal[...] or Annotated[..., ...] type
                     if "[" not in str_value:
@@ -8135,6 +8234,34 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None:
                     # But cannot be a type expression.
                     maybe_type_expr.as_type = None
                     return
+                # Skip some checks when a non-zero even number of single or double quotes
+                # signals a possible Literal[...] component, whose quoted content
+                # could contain anything: symbols or identifiers that would be
+                # incorrectly processed by some checks.
+                sq = str_value.count("'")
+                dq = str_value.count('"')
+                if not ((sq > 0 and sq % 2 == 0) or (dq > 0 and dq % 2 == 0)):
+                    # Filter out string literals containing characters or boundary
+                    # patterns that never appear in valid type expressions
+                    # (e.g. '/', ':', '<', '>', '@', leading/trailing '.').
+                    if _NONTYPE_PATTERN_RE.search(str_value):
+                        maybe_type_expr.as_type = None
+                        return
+                    # A string that can spell a valid type must contain 1+ dotted names,
+                    # all of whose leftmost identifiers must exist in the local scope.
+                    found = False
+                    for m in _CONTAINED_DOTTED_IDENTIFIER_RE.finditer(str_value):
+                        found = True
+                        leftmost = m.group().split(".", 1)[0]
+                        if (
+                            self.lookup(leftmost, UnboundType(leftmost), suppress_errors=True)
+                            is None
+                        ):
+                            maybe_type_expr.as_type = None
+                            return
+                    if not found:
+                        maybe_type_expr.as_type = None
+                        return
         elif isinstance(maybe_type_expr, IndexExpr):
             if isinstance(maybe_type_expr.base, NameExpr):
                 if isinstance(
@@ -8208,6 +8335,8 @@ def var_is_typing_special_form(var: Var) -> bool:
             "typing.Literal",
             "typing_extensions.Literal",
             "typing.Optional",
+            "typing.Self",
+            "typing_extensions.Self",
             "typing.TypeGuard",
             "typing_extensions.TypeGuard",
             "typing.TypeIs",

From 978e9b49a300ceeb8df10124d0b4df82976e58ea Mon Sep 17 00:00:00 2001
From: David Foster <david@dafoster.net>
Date: Wed, 3 Jun 2026 18:35:20 -0400
Subject: [PATCH 4/8] SQUISH -> misc/perf_compare.py -- Workaround inability to
 return Any type

---
 misc/perf_compare.py | 44 +++++++++++++++++++++++++++-----------------
 1 file changed, 27 insertions(+), 17 deletions(-)

diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index d6f140818f141..41939bce21a8a 100755
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -31,7 +31,7 @@
 import time
 from collections.abc import Callable
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from typing import Any
+from resource import struct_rusage as rusage
 
 
 def winsorized_paired_stats(
@@ -150,26 +150,36 @@ def run_benchmark(
             # Update a few files to force non-trivial incremental run
             edit_python_file(os.path.join(abschk, "mypy/__main__.py"))
             edit_python_file(os.path.join(abschk, "mypy/test/testcheck.py"))
-    stopwatch_func: Callable[[], Any]
-    delta_func: Callable[[Any, Any], Any]
+
+    def run() -> None:
+        # Ignore errors, since some commits being measured may generate additional errors.
+        if foreign:
+            subprocess.run(cmd, cwd=check_dir, env=env)
+        else:
+            subprocess.run(cmd, cwd=compiled_dir, env=env)
+
     if metric == "wall":
-        stopwatch_func = lambda: time.time()
-        delta_func = lambda t0, t1: t1 - t0
+        stopwatch_func_w: Callable[[], float] = lambda: time.time()
+        delta_func_w: Callable[[float, float], float] = lambda t0, t1: t1 - t0
+
+        v0_w = stopwatch_func_w()  # capture
+        run()
+        v1_w = stopwatch_func_w()  # capture
+        return delta_func_w(v0_w, v1_w)
     elif metric == "cpu":
-        # NOTE: CPU time (user+sys) is far less sensitive than wall-clock to
-        #       background interference
-        stopwatch_func = lambda: resource.getrusage(resource.RUSAGE_CHILDREN)
-        delta_func = lambda r0, r1: (r1.ru_utime - r0.ru_utime) + (r1.ru_stime - r0.ru_stime)
+        stopwatch_func_c: Callable[[], rusage] = lambda: resource.getrusage(
+            resource.RUSAGE_CHILDREN
+        )
+        delta_func_c: Callable[[rusage, rusage], float] = lambda r0, r1: (
+            r1.ru_utime - r0.ru_utime
+        ) + (r1.ru_stime - r0.ru_stime)
+
+        v0_c = stopwatch_func_c()  # capture
+        run()
+        v1_c = stopwatch_func_c()  # capture
+        return delta_func_c(v0_c, v1_c)
     else:
         raise AssertionError(f"Unrecognized metric: {metric!r}")
-    v0 = stopwatch_func()  # capture
-    # Ignore errors, since some commits being measured may generate additional errors.
-    if foreign:
-        subprocess.run(cmd, cwd=check_dir, env=env)
-    else:
-        subprocess.run(cmd, cwd=compiled_dir, env=env)
-    v1 = stopwatch_func()  # capture
-    return delta_func(v0, v1)
 
 
 def main() -> None:

From 5a03b136514da99902ea0f6e2fba6c3f0d20a377 Mon Sep 17 00:00:00 2001
From: David Foster <david@dafoster.net>
Date: Wed, 3 Jun 2026 18:57:33 -0400
Subject: [PATCH 5/8] SQUISH -> misc/perf_compare.py -- Workaround missing
 'resource' module on Windows

---
 misc/perf_compare.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index 41939bce21a8a..86b6023c03563 100755
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -23,7 +23,6 @@
 import glob
 import os
 import random
-import resource
 import shutil
 import statistics
 import subprocess
@@ -31,7 +30,6 @@
 import time
 from collections.abc import Callable
 from concurrent.futures import ThreadPoolExecutor, as_completed
-from resource import struct_rusage as rusage
 
 
 def winsorized_paired_stats(
@@ -167,6 +165,10 @@ def run() -> None:
         v1_w = stopwatch_func_w()  # capture
         return delta_func_w(v0_w, v1_w)
     elif metric == "cpu":
+        if sys.platform == 'win32':
+            raise NotImplementedError("--metric cpu is not implemented on Windows")
+        from resource import struct_rusage as rusage
+        import resource
         stopwatch_func_c: Callable[[], rusage] = lambda: resource.getrusage(
             resource.RUSAGE_CHILDREN
         )

From 91052ef56f02becab27ab524bedfd3cb10a00ed0 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 3 Jun 2026 23:00:18 +0000
Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 misc/perf_compare.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index 86b6023c03563..b884fa038418e 100755
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -165,10 +165,11 @@ def run() -> None:
         v1_w = stopwatch_func_w()  # capture
         return delta_func_w(v0_w, v1_w)
     elif metric == "cpu":
-        if sys.platform == 'win32':
+        if sys.platform == "win32":
             raise NotImplementedError("--metric cpu is not implemented on Windows")
-        from resource import struct_rusage as rusage
         import resource
+        from resource import struct_rusage as rusage
+
         stopwatch_func_c: Callable[[], rusage] = lambda: resource.getrusage(
             resource.RUSAGE_CHILDREN
         )

From 2b804d68f99b9e792822a2355682c2f9ab95226d Mon Sep 17 00:00:00 2001
From: David Foster <david@dafoster.net>
Date: Wed, 3 Jun 2026 19:10:29 -0400
Subject: [PATCH 7/8] SQUISH -> misc/perf_compare.py -- typechecker fixes

---
 misc/perf_compare.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index b884fa038418e..9f480ed6d494b 100755
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -167,8 +167,8 @@ def run() -> None:
     elif metric == "cpu":
         if sys.platform == "win32":
             raise NotImplementedError("--metric cpu is not implemented on Windows")
-        import resource
-        from resource import struct_rusage as rusage
+        import resource  # type: ignore[unreachable]
+        from resource import struct_rusage as rusage  # type: ignore[attr-defined]
 
         stopwatch_func_c: Callable[[], rusage] = lambda: resource.getrusage(
             resource.RUSAGE_CHILDREN

From 12f933518d94df3e5203731c9cc4f0136853496b Mon Sep 17 00:00:00 2001
From: David Foster <david@dafoster.net>
Date: Wed, 3 Jun 2026 19:22:14 -0400
Subject: [PATCH 8/8] SQUISH -> misc/perf_compare.py -- typechecker fixes, take
 2

---
 misc/perf_compare.py | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

diff --git a/misc/perf_compare.py b/misc/perf_compare.py
index 9f480ed6d494b..895e8c0ca2811 100755
--- a/misc/perf_compare.py
+++ b/misc/perf_compare.py
@@ -167,20 +167,21 @@ def run() -> None:
     elif metric == "cpu":
         if sys.platform == "win32":
             raise NotImplementedError("--metric cpu is not implemented on Windows")
-        import resource  # type: ignore[unreachable]
-        from resource import struct_rusage as rusage  # type: ignore[attr-defined]
-
-        stopwatch_func_c: Callable[[], rusage] = lambda: resource.getrusage(
-            resource.RUSAGE_CHILDREN
-        )
-        delta_func_c: Callable[[rusage, rusage], float] = lambda r0, r1: (
-            r1.ru_utime - r0.ru_utime
-        ) + (r1.ru_stime - r0.ru_stime)
+        else:
+            import resource
+            from resource import struct_rusage as rusage
 
-        v0_c = stopwatch_func_c()  # capture
-        run()
-        v1_c = stopwatch_func_c()  # capture
-        return delta_func_c(v0_c, v1_c)
+            stopwatch_func_c: Callable[[], rusage] = lambda: resource.getrusage(
+                resource.RUSAGE_CHILDREN
+            )
+            delta_func_c: Callable[[rusage, rusage], float] = lambda r0, r1: (
+                r1.ru_utime - r0.ru_utime
+            ) + (r1.ru_stime - r0.ru_stime)
+
+            v0_c = stopwatch_func_c()  # capture
+            run()
+            v1_c = stopwatch_func_c()  # capture
+            return delta_func_c(v0_c, v1_c)
     else:
         raise AssertionError(f"Unrecognized metric: {metric!r}")