From cb0ae01dcff8b685e8c7cf8ca3cde9c4ef856f5f Mon Sep 17 00:00:00 2001 From: David Foster Date: Wed, 3 Jun 2026 12:31:42 -0400 Subject: [PATCH 1/8] misc/perf_compare.py: Add options/behaviors to reduce measured variance Specifically: * Median is reported, in addition to the existing mean+stdev, which is significantly more resistant to skew by outliers. * --metric {wall,cpu} (default wall): Enables profiling using CPU time rather than wall-clock time. CPU profiling has roughly half the coefficient of variation as wall-clock profiling equal run count. * --workers1: Forces MYPY_NUM_WORKERS=1 (rather than the default 4) to cut CPU scheduling variance. Strongly recommended when using --metric cpu. * --warmup-runs N (default 1): Configurable number of leading cold runs to discard. Previously was always 1. Higher run counts decrease outliers that skew the reported mean. * A new "Paired deltas vs " section is added to the report, showing per-round paired differencing against the first commit to cancel round-level common-mode noise, reducing variance. Reported as median +/-95% CI. Also: * --cache-binaries (default false): Caches each commit's compiled clone to avoid ~5min recompile whenever comparing the same commit multiple times. Co-Authored-By: Claude Opus 4.8 --- .gitignore | 3 + misc/perf_compare.py | 207 ++++++++++++++++++++++++++++++++++++++----- 2 files changed, 187 insertions(+), 23 deletions(-) diff --git a/.gitignore b/.gitignore index 9c325f3e29f8a..e9954a807d317 100644 --- a/.gitignore +++ b/.gitignore @@ -60,3 +60,6 @@ test_capi test_capi /mypyc/lib-rt/build/ /mypyc/lib-rt/*.so + +# perf_compare.py --cache-binaries cache +/misc/perf_compare/ diff --git a/misc/perf_compare.py b/misc/perf_compare.py index aa05270a8c00f..d6f140818f141 100755 --- a/misc/perf_compare.py +++ b/misc/perf_compare.py @@ -23,12 +23,50 @@ import glob import os import random +import resource import shutil import statistics import subprocess import sys import time +from collections.abc import Callable from concurrent.futures import ThreadPoolExecutor, as_completed +from typing import Any + + +def winsorized_paired_stats( + diffs: list[float], *, trim_frac: float = 0.1, conf: float = 0.95 +) -> dict[str, float]: + """Robust summary of a list of per-round paired differences. + + Point estimate: trimmed mean (drop ``trim_frac`` of values from each end), so a + single outlier round cannot drag the estimate. + + Error bar: the Tukey-McLaughlin standard error of the trimmed mean, built from the + *Winsorized* variance. The tails are clamped to the boundary kept-value rather than + deleted -- deleting them and taking the ordinary variance of the survivors would + understate the error bar (it would measure only how calm the middle is, discarding + the fact that the tails were wild). The ``(1 - 2*trim_frac)`` divisor rescales for + the compression Winsorizing introduces. + + Returns trimmed-mean estimate, median, the 95% CI half-width, and the kept count. + A normal-approx critical value is used (fine for the n>=~30 runs this is used with). + """ + n = len(diffs) + s = sorted(diffs) + g = int(n * trim_frac) # number trimmed from each end + median = statistics.median(s) + if n < 2 or n - 2 * g < 2: + est = statistics.mean(s) + return {"est": est, "median": median, "ci": 0.0, "kept": float(n)} + kept = s[g : n - g] + est = statistics.mean(kept) + # Winsorize: clamp the g smallest up to kept[0], the g largest down to kept[-1]. + wins = [kept[0]] * g + kept + [kept[-1]] * g + wvar = statistics.variance(wins) # sample Winsorized variance (df = n-1) + se = (wvar**0.5) / ((1 - 2 * trim_frac) * (n**0.5)) + z = statistics.NormalDist().inv_cdf(0.5 + conf / 2) + return {"est": est, "median": median, "ci": z * se, "kept": float(len(kept))} def heading(s: str) -> None: @@ -81,7 +119,14 @@ def edit_python_file(fnam: str) -> None: def run_benchmark( - compiled_dir: str, check_dir: str, *, incremental: bool, code: str | None, foreign: bool | None + compiled_dir: str, + check_dir: str, + *, + incremental: bool, + code: str | None, + foreign: bool | None, + metric: str = "wall", + workers1: bool = False, ) -> float: cache_dir = os.path.join(compiled_dir, ".mypy_cache") if os.path.isdir(cache_dir) and not incremental: @@ -89,6 +134,8 @@ def run_benchmark( env = os.environ.copy() env["PYTHONPATH"] = os.path.abspath(compiled_dir) env["PYTHONHASHSEED"] = "1" + if workers1: + env["MYPY_NUM_WORKERS"] = "1" abschk = os.path.abspath(check_dir) cmd = [sys.executable, "-m", "mypy"] if code: @@ -103,13 +150,26 @@ def run_benchmark( # Update a few files to force non-trivial incremental run edit_python_file(os.path.join(abschk, "mypy/__main__.py")) edit_python_file(os.path.join(abschk, "mypy/test/testcheck.py")) - t0 = time.time() + stopwatch_func: Callable[[], Any] + delta_func: Callable[[Any, Any], Any] + if metric == "wall": + stopwatch_func = lambda: time.time() + delta_func = lambda t0, t1: t1 - t0 + elif metric == "cpu": + # NOTE: CPU time (user+sys) is far less sensitive than wall-clock to + # background interference + stopwatch_func = lambda: resource.getrusage(resource.RUSAGE_CHILDREN) + delta_func = lambda r0, r1: (r1.ru_utime - r0.ru_utime) + (r1.ru_stime - r0.ru_stime) + else: + raise AssertionError(f"Unrecognized metric: {metric!r}") + v0 = stopwatch_func() # capture # Ignore errors, since some commits being measured may generate additional errors. if foreign: subprocess.run(cmd, cwd=check_dir, env=env) else: subprocess.run(cmd, cwd=compiled_dir, env=env) - return time.time() - t0 + v1 = stopwatch_func() # capture + return delta_func(v0, v1) def main() -> None: @@ -145,6 +205,41 @@ def main() -> None: type=int, help="set number of measurements to perform (default=15)", ) + parser.add_argument( + "--warmup-runs", + metavar="N", + default=1, + type=int, + help="set number of leading warmup runs to discard (default=1)", + ) + parser.add_argument( + "--cache-binaries", + default=False, + action="store_true", + help="cache each commit's compiled clone under " + + "/perf_compare/binaries/ and restore from there on later runs, " + + "skipping the ~5-min clone+compile. Off by default so it doesn't silently consume " + + "disk. Caveat: the cache is keyed by the commit string you pass, so reuse stable SHAs " + + "(a moving ref like a branch name or HEAD can serve a stale build -- delete the cache " + + "dir if in doubt).", + ) + parser.add_argument( + "--metric", + choices=["wall", "cpu"], + default="wall", + help="quantity to measure per run: 'wall' (wall-clock, default) or 'cpu' (user+sys " + + "CPU time of the type-check process). 'cpu' is much less sensitive to background " + + "interference and scheduling, so it tightens the per-run distribution.", + ) + parser.add_argument( + "--workers1", + default=False, + action="store_true", + help="run selfcheck with a single mypy worker (MYPY_NUM_WORKERS=1) to " + + "decrease variance in measurements. " + + "Strongly recommended when --metric=cpu. " + + "When omitted, uses mypy's default worker count.", + ) parser.add_argument( "-j", metavar="N", @@ -178,20 +273,39 @@ def main() -> None: dont_setup: bool = args.dont_setup multi_file: bool = args.multi_file commits = args.commit - num_runs: int = args.num_runs + 1 + baseline_commit: str = commits[0] + warmup_runs: int = args.warmup_runs + measurement_runs: int = args.num_runs + num_runs: int = measurement_runs + warmup_runs max_workers: int = args.j code: str | None = args.c foreign_repo: str | None = args.r + metric: str = args.metric + workers1: bool = args.workers1 + cache_binaries: bool = args.cache_binaries if not (os.path.isdir(".git") and os.path.isdir("mypyc")): sys.exit("error: You must run this script from the mypy repo root") + archive_root = os.path.join( + os.path.dirname(os.path.abspath(__file__)), "perf_compare", "binaries" + ) + target_dirs = [] + dirs_to_compile = [] for i, commit in enumerate(commits): target_dir = f"mypy.{i}.tmpdir" target_dirs.append(target_dir) if not dont_setup: - clone(target_dir, commit) + archive = os.path.join(archive_root, commit) + if cache_binaries and os.path.isdir(archive): + print(f"restore: copying {archive} -> {target_dir} (skipping clone+compile)") + if os.path.isdir(target_dir): + shutil.rmtree(target_dir) + shutil.copytree(archive, target_dir, symlinks=True) + else: + clone(target_dir, commit) + dirs_to_compile.append(target_dir) if foreign_repo: check_dir = "mypy.foreign.tmpdir" @@ -202,27 +316,32 @@ def main() -> None: if not dont_setup: clone(check_dir, commits[0]) - if not dont_setup: + if not dont_setup and dirs_to_compile: heading("Compiling mypy") print("(This will take a while...)") with ThreadPoolExecutor(max_workers=max_workers) as executor: futures = [ - executor.submit(build_mypy, target_dir, multi_file) for target_dir in target_dirs + executor.submit(build_mypy, target_dir, multi_file) + for target_dir in dirs_to_compile ] for future in as_completed(futures): future.result() - print(f"Finished compiling mypy ({len(commits)} builds)") + print(f"Finished compiling mypy ({len(dirs_to_compile)} builds)") + elif not dont_setup: + print("All targets restored from archive; skipping compile step.") - heading("Performing measurements") + workers_desc = "workers: 1" if workers1 else "workers: default" + key_options_desc = f"(metric: {metric}-time, {workers_desc})" + heading(f"Performing measurements {key_options_desc}") results: dict[str, list[float]] = {} for n in range(num_runs): - if n == 0: - print("Warmup...") + if n < warmup_runs: + print(f"Warmup {n + 1}/{warmup_runs}...") else: - print(f"Run {n}/{num_runs - 1}...") + print(f"Run {n - warmup_runs + 1}/{num_runs - warmup_runs}...") items = list(enumerate(commits)) random.shuffle(items) for i, commit in items: @@ -232,26 +351,56 @@ def main() -> None: incremental=incremental, code=code, foreign=bool(foreign_repo), + metric=metric, + workers1=workers1, ) - # Don't record the first warm-up run - if n > 0: + # Don't record the leading warm-up runs + if n >= warmup_runs: print(f"{commit}: t={tt:.3f}s") results.setdefault(commit, []).append(tt) print() - heading("Results") - first = -1.0 + heading(f"Results {key_options_desc}") + first_mean = -1.0 + first_median = -1.0 for commit in commits: - tt = statistics.mean(results[commit]) + mean = statistics.mean(results[commit]) + median = statistics.median(results[commit]) # pstdev (instead of stdev) is used here primarily to accommodate the case where num_runs=1 s = statistics.pstdev(results[commit]) if len(results[commit]) > 1 else 0 - if first < 0: - delta = "0.0%" - first = tt + if first_mean < 0: + delta_mean = "0.0%" + first_mean = mean + delta_median = "0.0%" + first_median = median else: - d = (tt / first) - 1 - delta = f"{d:+.1%}" - print(f"{commit:<25} {tt:.3f}s ({delta}) | stdev {s:.3f}s ") + d1 = (mean / first_mean) - 1 + delta_mean = f"{d1:+.1%}" + d2 = (median / first_median) - 1 + delta_median = f"{d2:+.1%}" + print( + f"{commit:<25} mean {mean:.3f}s ({delta_mean}) | stdev {s:.3f}s | " + f"median {median:.3f}s ({delta_median})" + ) + + # Paired per-round differences vs the baseline commit. Each round runs every commit + # once, so results[commit][k] is round k for every commit -- the differences are + # already matched. Differencing cancels round-level common-mode noise (a throttle or + # background-process spike that round slows every commit together), which is the bulk + # of the variance on a laptop. See winsorized_paired_stats for the robust estimator. + base_runs = results[baseline_commit] + base_center = statistics.median(base_runs) + heading(f"Paired deltas vs {baseline_commit} (per-round diffs; median +/- 95% CI)") + for commit in commits: + if commit == baseline_commit: + print(f"{commit:<25} baseline") + continue + diffs = [c - b for c, b in zip(results[commit], base_runs)] + st = winsorized_paired_stats(diffs) + ci_ms = st["ci"] * 1000 + median_ms = st["median"] * 1000 + pct = (st["median"] / base_center * 100) if base_center else 0.0 + print(f"{commit:<25} median {median_ms:+7.1f}ms +/-{ci_ms:4.1f} ({pct:+.2f}%)") t = int(time.time() - whole_program_time_0) total_time_taken_formatted = ", ".join( @@ -264,6 +413,18 @@ def main() -> None: total_time_taken_formatted, ) + # Archive compiled clones before cleanup, keyed by commit, so later runs can + # restore them instead of recompiling. Skip if destination already exists. + if cache_binaries: + os.makedirs(archive_root, exist_ok=True) + for target_dir, commit in zip(target_dirs, commits): + dest = os.path.join(archive_root, commit) + if os.path.isdir(dest): + print(f"archive: {dest} already exists, skipping") + else: + print(f"archive: copying {target_dir} -> {dest}") + shutil.copytree(target_dir, dest, symlinks=True) + shutil.rmtree(check_dir) for target_dir in target_dirs: shutil.rmtree(target_dir) From 208f9b62fbda19cf535640ca38a8523bcc3cdf8b Mon Sep 17 00:00:00 2001 From: David Foster Date: Wed, 3 Jun 2026 12:34:25 -0400 Subject: [PATCH 2/8] TypeForm: Add instrumentation of full parses done in semanal.py's try_parse_as_type_expression() Specifically: - If you set MYPY_TYPEFORM_PROFILE_FULL_PARSE environment variable, mypy will output a .tsv to that filepath which characterizes the kinds of Expressions that try_parse_as_type_expression() in semanal.py was forced to do a full parse of, which was not rejected early. - A misc/analyze_typeform_full_parse_profile.py script is added which takes those .tsvs and prints an expression-time summary (by total time) plus top-N descriptors per FAIL class. Co-Authored-By: Claude Opus 4.7 --- misc/analyze_typeform_full_parse_profile.py | 146 ++++++++++++++++++++ misc/analyze_typeform_stats.py | 2 +- mypy/semanal.py | 125 ++++++++++++++--- 3 files changed, 256 insertions(+), 17 deletions(-) create mode 100644 misc/analyze_typeform_full_parse_profile.py diff --git a/misc/analyze_typeform_full_parse_profile.py b/misc/analyze_typeform_full_parse_profile.py new file mode 100644 index 0000000000000..35001413b3f9d --- /dev/null +++ b/misc/analyze_typeform_full_parse_profile.py @@ -0,0 +1,146 @@ +#!/usr/bin/env python3 +""" +Aggregate the full-parse profile log produced by mypy's +SemanticAnalyzer.try_parse_as_type_expression() when run with +MYPY_TYPEFORM_PROFILE_FULL_PARSE set. + +Usage: + # 1. Run mypy with the profile env var set; per-PID log files are + # written as ".": + MYPY_TYPEFORM_PROFILE_FULL_PARSE=/tmp/tf.log \\ + python3 -m mypy --no-incremental -p your_package + + # 2. Aggregate one or more per-PID files: + python3 misc/analyze_typeform_full_parse_profile.py /tmp/tf.log.* + + # Optional: limit per-descriptor breakdown to top N rows per class. + python3 misc/analyze_typeform_full_parse_profile.py --top 20 /tmp/tf.log.* + +The script summarizes which (outcome, kind, subkind) classes account for +the most full-parse time, and lists the top descriptors within each +FAIL class -- the populations worth targeting with cheaper pre-filters +upstream in try_parse_as_type_expression. + +See also: + - mypy/semanal.py: SemanticAnalyzer.try_parse_as_type_expression() + - mypy/semanal.py: _log_typeform_full_parse() (TSV schema docstring) + - misc/analyze_typeform_stats.py (aggregate counters via --dump-build-stats) +""" + +from __future__ import annotations + +import argparse +import statistics +import sys +from collections import defaultdict +from collections.abc import Iterable + + +def read_rows(paths: Iterable[str]) -> list[tuple[str, str, str, str, int]]: + rows: list[tuple[str, str, str, str, int]] = [] + for path in paths: + with open(path) as f: + for line in f: + # Skip header lines (each per-PID file starts with one). + if line.startswith("outcome\t"): + continue + parts = line.rstrip("\n").split("\t") + if len(parts) < 5: + continue + outcome, kind, subkind, desc, dur_ns_str = parts[:5] + try: + dur_ns = int(dur_ns_str) + except ValueError: + continue + rows.append((outcome, kind, subkind, desc, dur_ns)) + return rows + + +def print_class_summary(rows: list[tuple[str, str, str, str, int]]) -> None: + buckets: dict[tuple[str, str, str], list[int]] = defaultdict(list) + total_ns = 0 + for outcome, kind, subkind, _desc, dur_ns in rows: + buckets[(outcome, kind, subkind)].append(dur_ns) + total_ns += dur_ns + + print("Class summary (by total time):") + print("=" * 80) + print(f"{'count':>7} {'total_ms':>10} {'mean_us':>9} {'med_us':>9} {'pct':>6} class") + print("-" * 80) + ordered = sorted( + ( + (sum(d), len(d), statistics.mean(d), statistics.median(d), key) + for key, d in buckets.items() + ), + reverse=True, + ) + for total, n, mean, med, key in ordered: + pct = (100 * total / total_ns) if total_ns else 0 + outcome, kind, subkind = key + print( + f"{n:>7} {total/1e6:>10.2f} {mean/1e3:>9.1f} {med/1e3:>9.1f} " + f"{pct:>5.1f}% {outcome} {kind} {subkind}" + ) + print("-" * 80) + print(f"TOTAL: {len(rows):,} events, {total_ns/1e6:.2f} ms") + + +def print_fail_descriptors(rows: list[tuple[str, str, str, str, int]], top_n: int) -> None: + # Group FAIL rows by (kind, subkind) class, then by descriptor within each. + by_class: dict[tuple[str, str, str], dict[str, list[int]]] = defaultdict( + lambda: defaultdict(list) + ) + for outcome, kind, subkind, desc, dur_ns in rows: + if outcome != "FAIL": + continue + by_class[(outcome, kind, subkind)][desc].append(dur_ns) + + # Order classes by total FAIL time, descending. + class_totals = sorted( + ((sum(sum(d) for d in descs.values()), key, descs) for key, descs in by_class.items()), + reverse=True, + ) + for total_ns, key, descs in class_totals: + outcome, kind, subkind = key + print() + print( + f"Top {top_n} descriptors in {outcome} {kind} {subkind} " + f"(class total {total_ns/1e6:.2f} ms):" + ) + print("-" * 80) + print(f"{'count':>6} {'total_ms':>10} {'mean_us':>9} descriptor") + rows_d = sorted( + ((sum(d), len(d), statistics.mean(d), desc) for desc, d in descs.items()), reverse=True + ) + for tot, n, mean, desc in rows_d[:top_n]: + print(f"{n:>6} {tot/1e6:>10.3f} {mean/1e3:>9.1f} {desc!r}") + if len(rows_d) > top_n: + print(f"... {len(rows_d) - top_n} more descriptors") + + +def main() -> None: + parser = argparse.ArgumentParser( + formatter_class=argparse.RawDescriptionHelpFormatter, description=__doc__ + ) + parser.add_argument( + "files", nargs="+", help="One or more per-PID profile files (e.g. /tmp/tf.log.*)" + ) + parser.add_argument( + "--top", + type=int, + default=20, + help="Max number of descriptors to list per FAIL class (default: 20)", + ) + args = parser.parse_args() + + rows = read_rows(args.files) + if not rows: + print("No data rows found in input files.", file=sys.stderr) + sys.exit(1) + + print_class_summary(rows) + print_fail_descriptors(rows, args.top) + + +if __name__ == "__main__": + main() diff --git a/misc/analyze_typeform_stats.py b/misc/analyze_typeform_stats.py index 0a540610bc620..17368f6247e53 100644 --- a/misc/analyze_typeform_stats.py +++ b/misc/analyze_typeform_stats.py @@ -26,7 +26,6 @@ See also: - mypy/semanal.py: SemanticAnalyzer.try_parse_as_type_expression() - - mypy/semanal.py: DEBUG_TYPE_EXPRESSION_FULL_PARSE_FAILURES """ import re @@ -70,6 +69,7 @@ def analyze_stats(output: str) -> None: print( f" - Expensive failed full parses: {failures:,} ({(failures / total * 100):.1f}% of all calls)" ) + print(" - Analyze further with misc/analyze_typeform_full_parse_profile.py") if __name__ == "__main__": diff --git a/mypy/semanal.py b/mypy/semanal.py index e010273b0781f..c86f04efb581a 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -50,10 +50,12 @@ from __future__ import annotations +import os import re +import time from collections.abc import Callable, Collection, Iterable, Iterator from contextlib import contextmanager -from typing import Any, Final, TypeAlias as _TypeAlias, TypeGuard, TypeVar, cast +from typing import Any, Final, TextIO, TypeAlias as _TypeAlias, TypeGuard, TypeVar, cast from typing_extensions import assert_never from mypy import errorcodes as codes, message_registry @@ -320,11 +322,18 @@ T = TypeVar("T") -# Whether to print diagnostic information for failed full parses -# in SemanticAnalyzer.try_parse_as_type_expression(). +# Instrumentation: If non-None, every expression that reaches the expensive +# full-parse block of SemanticAnalyzer.try_parse_as_type_expression() +# is logged to a .tsv by log_typeform_full_parse(). # -# See also: misc/analyze_typeform_stats.py -DEBUG_TYPE_EXPRESSION_FULL_PARSE_FAILURES: Final = False +# See also: +# - misc/analyze_typeform_full_parse_profile.py +# - misc/analyze_typeform_stats.py +_TYPEFORM_PROFILE_FULL_PARSE_PATH: Final = os.environ.get("MYPY_TYPEFORM_PROFILE_FULL_PARSE") +_typeform_full_parse_log_file: TextIO | None = None + +# TSV column names for the full-parse profile log +_TYPEFORM_PROFILE_FULL_PARSE_HEADER = "outcome\tkind\tsubkind\tdescriptor\tdur_ns\n" FUTURE_IMPORTS: Final = { @@ -8164,6 +8173,9 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None: else: assert_never(maybe_type_expr) + full_parse_t0 = ( + time.perf_counter_ns() if _TYPEFORM_PROFILE_FULL_PARSE_PATH is not None else 0 + ) with self.isolated_error_analysis(): try: t = self.expr_to_analyzed_type(maybe_type_expr) @@ -8173,17 +8185,6 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None: # Not a type expression t = None - if DEBUG_TYPE_EXPRESSION_FULL_PARSE_FAILURES and t is None: - original_flushed_files = set(self.errors.flushed_files) # save - try: - errors = self.errors.new_messages() # capture - finally: - self.errors.flushed_files = original_flushed_files # restore - - print( - f"SA.try_parse_as_type_expression: Full parse failure: {maybe_type_expr}, errors={errors!r}" - ) - # Count full parse attempts for profiling if t is not None: self.type_expression_full_parse_success_count += 1 @@ -8192,6 +8193,12 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None: maybe_type_expr.as_type = t + if _TYPEFORM_PROFILE_FULL_PARSE_PATH is not None: + full_parse_t1 = time.perf_counter_ns() + self.log_typeform_full_parse( + maybe_type_expr, t is not None, full_parse_t1 - full_parse_t0 + ) + @staticmethod def var_is_typing_special_form(var: Var) -> bool: return var.fullname.startswith("typing") and var.fullname in [ @@ -8208,6 +8215,92 @@ def var_is_typing_special_form(var: Var) -> bool: "typing.Union", ] + @staticmethod + def log_typeform_full_parse(expr: Expression, ok: bool, dur_ns: int) -> None: + """Log one entry into the full-parse block of try_parse_as_type_expression. + + Active only when the MYPY_TYPEFORM_PROFILE_FULL_PARSE environment variable + is set to a file path. Each mypy process (worker) writes to its own file + named "." to avoid contention; concatenating those files yields + the complete profile. Aggregate with misc/analyze_typeform_full_parse_profile.py. + + Output is tab-separated with one row per full-parse attempt: + + outcome "OK" if as_type was set, "FAIL" if the full parse rejected + the expression (either by raising TypeTranslationError or by + emitting errors during analysis). + kind AST node kind: StrExpr | IndexExpr | OpExpr | (other). + subkind For StrExpr: "ident", "dotident", or "other" (based on the + string's shape). For IndexExpr: "Name" or "Member" (base + kind). For OpExpr: always "|" (no other op reaches here). + descriptor Short, type-specific identifier for the expression: + StrExpr -> the string value, truncated to 80 chars + (with " (N)" suffix when truncated). + IndexExpr -> the full stringified expression (str(expr), + with tabs/newlines escaped). + OpExpr -> the full stringified expression (str(expr), + with tabs/newlines escaped). + dur_ns Wall-clock nanoseconds spent in the full-parse block for + this expression (measured around expr_to_analyzed_type + plus the surrounding isolated_error_analysis ctx). + + The first line of each file is the column header (same as above). + """ + global _typeform_full_parse_log_file + if _typeform_full_parse_log_file is None: + assert _TYPEFORM_PROFILE_FULL_PARSE_PATH is not None + _typeform_full_parse_log_file = open( + f"{_TYPEFORM_PROFILE_FULL_PARSE_PATH}.{os.getpid()}", "a", buffering=1 + ) + _typeform_full_parse_log_file.write(_TYPEFORM_PROFILE_FULL_PARSE_HEADER) + outcome = "OK" if ok else "FAIL" + if isinstance(expr, StrExpr): + raw = expr.value + val = ( + raw[:80] + .replace("\\", "\\\\") + .replace("\t", "\\t") + .replace("\n", "\\n") + .replace("\r", "\\r") + ) + if len(raw) > 80: + val += f" ({len(raw)})" + if _IDENTIFIER_RE.fullmatch(raw): + subkind = "ident" + elif _DOTTED_IDENTIFIER_RE.fullmatch(raw): + subkind = "dotident" + else: + subkind = "other" + line = f"{outcome}\tStrExpr\t{subkind}\t{val}\t{dur_ns}\n" + elif isinstance(expr, IndexExpr): + base = expr.base + if isinstance(base, NameExpr): + subkind = "Name" + elif isinstance(base, MemberExpr): + subkind = "Member" + else: + subkind = type(base).__name__ + desc = ( + str(expr) + .replace("\\", "\\\\") + .replace("\t", "\\t") + .replace("\n", "\\n") + .replace("\r", "\\r") + ) + line = f"{outcome}\tIndexExpr\t{subkind}\t{desc}\t{dur_ns}\n" + elif isinstance(expr, OpExpr): + desc = ( + str(expr) + .replace("\\", "\\\\") + .replace("\t", "\\t") + .replace("\n", "\\n") + .replace("\r", "\\r") + ) + line = f"{outcome}\tOpExpr\t|\t{desc}\t{dur_ns}\n" + else: + line = f"{outcome}\t{type(expr).__name__}\t\t\t{dur_ns}\n" + _typeform_full_parse_log_file.write(line) + @contextmanager def isolated_error_analysis(self) -> Iterator[None]: """ From 6fba90341f00e24185cab03c5748ba110b171775 Mon Sep 17 00:00:00 2001 From: David Foster Date: Wed, 3 Jun 2026 12:35:05 -0400 Subject: [PATCH 3/8] TypeForm: Add 7 more early-reject filters to semanal.py's try_parse_as_type_expression() These filters reduce the mypy's wall clock slowdown when checking the mypy codebase after the introduction of TypeForm from +2.03% to +1.21%, when using `misc/perf_compare.py` to profile. Co-Authored-By: Claude Opus 4.7 --- mypy/semanal.py | 131 +++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 130 insertions(+), 1 deletion(-) diff --git a/mypy/semanal.py b/mypy/semanal.py index c86f04efb581a..3f1d1cb7221bf 100644 --- a/mypy/semanal.py +++ b/mypy/semanal.py @@ -375,6 +375,52 @@ # string literal as a type expression. _MULTIPLE_WORDS_NONTYPE_RE = re.compile(r'\s*[^\s.\'"|\[]+\s+[^\s.\'"|\[]') +# Matches any valid Python identifier, including identifiers with Unicode characters. +# +# [^\d\W] = word character that is not a digit +# \w = word character +# \Z = match end of string; does not allow a trailing \n, unlike $ +_IDENTIFIER_RE = re.compile(r"^[^\d\W]\w*\Z", re.UNICODE) + +# Matches if the string contains at least one identifier-start character +# (letter or underscore). +_CONTAINS_IDENTIFIER_RE = re.compile(r"[^\W\d]", re.UNICODE) + +# Matches a dotted identifier (e.g. 'builtins.tuple', 'typing.Mapping', 'a.b.c'). +_DOTTED_IDENTIFIER_RE = re.compile(r"^[^\d\W]\w*(\.[^\d\W]\w*)+\Z", re.UNICODE) + +# Matches a dotted name (one or more identifier components joined by '.'). +# Accepts a bare identifier with zero dots. Used to extract every +# dotted identifier from inside a stringified type expression. +_CONTAINED_DOTTED_IDENTIFIER_RE = re.compile(r"[^\W\d]\w*(?:\.[^\W\d]\w*)*", re.UNICODE) + +# Matches several patterns that never appear in valid type expressions +# NOTE: Allows '*' for (PEP 646 Unpack) and '+' for (Literal[+N]) +_NONTYPE_PATTERN_RE = re.compile( + # Characters never valid in a type expression + r"[!:/<>@%$^?;&~`\\]|" + # '-' not directly preceded by '[' (which can occur in Literal[-N]) + # NOTE: Incorrectly rejects multi-element edge cases like Literal[-1, -2] + # which appear in stringified type expressions, which are expected + # to be rare in practice. + r"(? None: return elif isinstance(maybe_type_expr, StrExpr): str_value = maybe_type_expr.value # cache + # (TODO: Experiment with the ordering of all the following filters, + # to frontload those most efficient at rejecting early.) + # Filter out string literals with no identifier-start characters + # (pure punctuation/digits/whitespace) which cannot be type expressions + if not _CONTAINS_IDENTIFIER_RE.search(str_value): + maybe_type_expr.as_type = None + return + # Filter out string literals whose first non-whitespace character + # cannot start a valid type expression (a digit, or punctuation + # other than '*'). + if _NONTYPE_FIRST_CHAR_RE.match(str_value): + maybe_type_expr.as_type = None + return + # Filter out string literals with common patterns that could not + # possibly be in a type expression + if _MULTIPLE_WORDS_NONTYPE_RE.match(str_value): + # A common pattern in string literals containing a sentence. + # But cannot be a type expression. + maybe_type_expr.as_type = None + return # Filter out string literals which look like an identifier but # cannot be a type expression, for a few common reasons if str_value.isidentifier(): @@ -8116,7 +8182,40 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None: # 2. unbound_paramspec: f'ParamSpec "{name}" is unbound' [codes.VALID_TYPE] maybe_type_expr.as_type = None return - else: # does not look like an identifier + if ( + isinstance(node, Var) + and isinstance(get_proper_type(node.type), Instance) + and not self.var_is_typing_special_form(node) + ): + # Var whose declared type is a concrete instance: it is + # a value (local, parameter, module-level constant), + # not a type expression. + maybe_type_expr.as_type = None + return + if isinstance(node, (FuncDef, OverloadedFuncDef, MypyFile)): + # Functions and modules are never type expressions. + maybe_type_expr.as_type = None + return + elif _DOTTED_IDENTIFIER_RE.fullmatch(str_value): + # Dotted-name string (e.g. "builtins.tuple", "typing.Mapping"). + # Look up the leftmost component; if it can't possibly be a + # type prefix, bail. Mirrors the IndexExpr-with-MemberExpr-base + # filter logic below. + leftmost = str_value.split(".", 1)[0] + sym = self.lookup(leftmost, UnboundType(leftmost), suppress_errors=True) + if sym is None: + # Leftmost component does not refer to anything in scope + maybe_type_expr.as_type = None + return + node = sym.node # cache + if isinstance(node, PlaceholderNode) and not node.becomes_typeinfo: + maybe_type_expr.as_type = None + return + if isinstance(node, Var) and not self.var_is_typing_special_form(node): + # Leftmost component is a Var: cannot be a type prefix + maybe_type_expr.as_type = None + return + else: # does not look like an identifier or dotted identifier if '"' in str_value or "'" in str_value: # Only valid inside a Literal[...] or Annotated[..., ...] type if "[" not in str_value: @@ -8135,6 +8234,34 @@ def try_parse_as_type_expression(self, maybe_type_expr: Expression) -> None: # But cannot be a type expression. maybe_type_expr.as_type = None return + # Skip some checks when a non-zero even number of single or double quotes + # signals a possible Literal[...] component, whose quoted content + # could contain anything: symbols or identifiers that would be + # incorrectly processed by some checks. + sq = str_value.count("'") + dq = str_value.count('"') + if not ((sq > 0 and sq % 2 == 0) or (dq > 0 and dq % 2 == 0)): + # Filter out string literals containing characters or boundary + # patterns that never appear in valid type expressions + # (e.g. '/', ':', '<', '>', '@', leading/trailing '.'). + if _NONTYPE_PATTERN_RE.search(str_value): + maybe_type_expr.as_type = None + return + # A string that can spell a valid type must contain 1+ dotted names, + # all of whose leftmost identifiers must exist in the local scope. + found = False + for m in _CONTAINED_DOTTED_IDENTIFIER_RE.finditer(str_value): + found = True + leftmost = m.group().split(".", 1)[0] + if ( + self.lookup(leftmost, UnboundType(leftmost), suppress_errors=True) + is None + ): + maybe_type_expr.as_type = None + return + if not found: + maybe_type_expr.as_type = None + return elif isinstance(maybe_type_expr, IndexExpr): if isinstance(maybe_type_expr.base, NameExpr): if isinstance( @@ -8208,6 +8335,8 @@ def var_is_typing_special_form(var: Var) -> bool: "typing.Literal", "typing_extensions.Literal", "typing.Optional", + "typing.Self", + "typing_extensions.Self", "typing.TypeGuard", "typing_extensions.TypeGuard", "typing.TypeIs", From 978e9b49a300ceeb8df10124d0b4df82976e58ea Mon Sep 17 00:00:00 2001 From: David Foster Date: Wed, 3 Jun 2026 18:35:20 -0400 Subject: [PATCH 4/8] SQUISH -> misc/perf_compare.py -- Workaround inability to return Any type --- misc/perf_compare.py | 44 +++++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/misc/perf_compare.py b/misc/perf_compare.py index d6f140818f141..41939bce21a8a 100755 --- a/misc/perf_compare.py +++ b/misc/perf_compare.py @@ -31,7 +31,7 @@ import time from collections.abc import Callable from concurrent.futures import ThreadPoolExecutor, as_completed -from typing import Any +from resource import struct_rusage as rusage def winsorized_paired_stats( @@ -150,26 +150,36 @@ def run_benchmark( # Update a few files to force non-trivial incremental run edit_python_file(os.path.join(abschk, "mypy/__main__.py")) edit_python_file(os.path.join(abschk, "mypy/test/testcheck.py")) - stopwatch_func: Callable[[], Any] - delta_func: Callable[[Any, Any], Any] + + def run() -> None: + # Ignore errors, since some commits being measured may generate additional errors. + if foreign: + subprocess.run(cmd, cwd=check_dir, env=env) + else: + subprocess.run(cmd, cwd=compiled_dir, env=env) + if metric == "wall": - stopwatch_func = lambda: time.time() - delta_func = lambda t0, t1: t1 - t0 + stopwatch_func_w: Callable[[], float] = lambda: time.time() + delta_func_w: Callable[[float, float], float] = lambda t0, t1: t1 - t0 + + v0_w = stopwatch_func_w() # capture + run() + v1_w = stopwatch_func_w() # capture + return delta_func_w(v0_w, v1_w) elif metric == "cpu": - # NOTE: CPU time (user+sys) is far less sensitive than wall-clock to - # background interference - stopwatch_func = lambda: resource.getrusage(resource.RUSAGE_CHILDREN) - delta_func = lambda r0, r1: (r1.ru_utime - r0.ru_utime) + (r1.ru_stime - r0.ru_stime) + stopwatch_func_c: Callable[[], rusage] = lambda: resource.getrusage( + resource.RUSAGE_CHILDREN + ) + delta_func_c: Callable[[rusage, rusage], float] = lambda r0, r1: ( + r1.ru_utime - r0.ru_utime + ) + (r1.ru_stime - r0.ru_stime) + + v0_c = stopwatch_func_c() # capture + run() + v1_c = stopwatch_func_c() # capture + return delta_func_c(v0_c, v1_c) else: raise AssertionError(f"Unrecognized metric: {metric!r}") - v0 = stopwatch_func() # capture - # Ignore errors, since some commits being measured may generate additional errors. - if foreign: - subprocess.run(cmd, cwd=check_dir, env=env) - else: - subprocess.run(cmd, cwd=compiled_dir, env=env) - v1 = stopwatch_func() # capture - return delta_func(v0, v1) def main() -> None: From 5a03b136514da99902ea0f6e2fba6c3f0d20a377 Mon Sep 17 00:00:00 2001 From: David Foster Date: Wed, 3 Jun 2026 18:57:33 -0400 Subject: [PATCH 5/8] SQUISH -> misc/perf_compare.py -- Workaround missing 'resource' module on Windows --- misc/perf_compare.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/misc/perf_compare.py b/misc/perf_compare.py index 41939bce21a8a..86b6023c03563 100755 --- a/misc/perf_compare.py +++ b/misc/perf_compare.py @@ -23,7 +23,6 @@ import glob import os import random -import resource import shutil import statistics import subprocess @@ -31,7 +30,6 @@ import time from collections.abc import Callable from concurrent.futures import ThreadPoolExecutor, as_completed -from resource import struct_rusage as rusage def winsorized_paired_stats( @@ -167,6 +165,10 @@ def run() -> None: v1_w = stopwatch_func_w() # capture return delta_func_w(v0_w, v1_w) elif metric == "cpu": + if sys.platform == 'win32': + raise NotImplementedError("--metric cpu is not implemented on Windows") + from resource import struct_rusage as rusage + import resource stopwatch_func_c: Callable[[], rusage] = lambda: resource.getrusage( resource.RUSAGE_CHILDREN ) From 91052ef56f02becab27ab524bedfd3cb10a00ed0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 Jun 2026 23:00:18 +0000 Subject: [PATCH 6/8] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- misc/perf_compare.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/misc/perf_compare.py b/misc/perf_compare.py index 86b6023c03563..b884fa038418e 100755 --- a/misc/perf_compare.py +++ b/misc/perf_compare.py @@ -165,10 +165,11 @@ def run() -> None: v1_w = stopwatch_func_w() # capture return delta_func_w(v0_w, v1_w) elif metric == "cpu": - if sys.platform == 'win32': + if sys.platform == "win32": raise NotImplementedError("--metric cpu is not implemented on Windows") - from resource import struct_rusage as rusage import resource + from resource import struct_rusage as rusage + stopwatch_func_c: Callable[[], rusage] = lambda: resource.getrusage( resource.RUSAGE_CHILDREN ) From 2b804d68f99b9e792822a2355682c2f9ab95226d Mon Sep 17 00:00:00 2001 From: David Foster Date: Wed, 3 Jun 2026 19:10:29 -0400 Subject: [PATCH 7/8] SQUISH -> misc/perf_compare.py -- typechecker fixes --- misc/perf_compare.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/misc/perf_compare.py b/misc/perf_compare.py index b884fa038418e..9f480ed6d494b 100755 --- a/misc/perf_compare.py +++ b/misc/perf_compare.py @@ -167,8 +167,8 @@ def run() -> None: elif metric == "cpu": if sys.platform == "win32": raise NotImplementedError("--metric cpu is not implemented on Windows") - import resource - from resource import struct_rusage as rusage + import resource # type: ignore[unreachable] + from resource import struct_rusage as rusage # type: ignore[attr-defined] stopwatch_func_c: Callable[[], rusage] = lambda: resource.getrusage( resource.RUSAGE_CHILDREN From 12f933518d94df3e5203731c9cc4f0136853496b Mon Sep 17 00:00:00 2001 From: David Foster Date: Wed, 3 Jun 2026 19:22:14 -0400 Subject: [PATCH 8/8] SQUISH -> misc/perf_compare.py -- typechecker fixes, take 2 --- misc/perf_compare.py | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/misc/perf_compare.py b/misc/perf_compare.py index 9f480ed6d494b..895e8c0ca2811 100755 --- a/misc/perf_compare.py +++ b/misc/perf_compare.py @@ -167,20 +167,21 @@ def run() -> None: elif metric == "cpu": if sys.platform == "win32": raise NotImplementedError("--metric cpu is not implemented on Windows") - import resource # type: ignore[unreachable] - from resource import struct_rusage as rusage # type: ignore[attr-defined] - - stopwatch_func_c: Callable[[], rusage] = lambda: resource.getrusage( - resource.RUSAGE_CHILDREN - ) - delta_func_c: Callable[[rusage, rusage], float] = lambda r0, r1: ( - r1.ru_utime - r0.ru_utime - ) + (r1.ru_stime - r0.ru_stime) + else: + import resource + from resource import struct_rusage as rusage - v0_c = stopwatch_func_c() # capture - run() - v1_c = stopwatch_func_c() # capture - return delta_func_c(v0_c, v1_c) + stopwatch_func_c: Callable[[], rusage] = lambda: resource.getrusage( + resource.RUSAGE_CHILDREN + ) + delta_func_c: Callable[[rusage, rusage], float] = lambda r0, r1: ( + r1.ru_utime - r0.ru_utime + ) + (r1.ru_stime - r0.ru_stime) + + v0_c = stopwatch_func_c() # capture + run() + v1_c = stopwatch_func_c() # capture + return delta_func_c(v0_c, v1_c) else: raise AssertionError(f"Unrecognized metric: {metric!r}")