From cc3ef8322fc3f877b9e620f62ab2e22f9d3e75c2 Mon Sep 17 00:00:00 2001
From: Ivan Levkivskyi <levkivskyi@gmail.com>
Date: Sat, 18 Apr 2026 13:48:34 +0100
Subject: [PATCH 1/3] Use parallel parsing at all stages

---
 mypy/build.py                 | 101 ++++++++++++++++++----------------
 mypy/build_worker/worker.py   |  11 +++-
 mypy/checkstrformat.py        |   3 +-
 mypy/nativeparse.py           |  29 +++-------
 mypy/parse.py                 |  59 +++++++++++---------
 mypy/stubgen.py               |   5 +-
 mypy/test/test_nativeparse.py |  26 ++++++++-
 mypy/test/testparse.py        |   4 +-
 8 files changed, 135 insertions(+), 103 deletions(-)

diff --git a/mypy/build.py b/mypy/build.py
index ef481ed8f444..121b261e28fd 100644
--- a/mypy/build.py
+++ b/mypy/build.py
@@ -121,7 +121,6 @@
     ImportFrom,
     MypyFile,
     OverloadedFuncDef,
-    ParseError,
     SymbolTable,
 )
 from mypy.options import OPTIONS_AFFECTING_CACHE_NO_PLATFORM
@@ -168,7 +167,7 @@
 from mypy.modules_state import modules_state
 from mypy.nodes import Expression
 from mypy.options import Options
-from mypy.parse import load_from_raw, parse, report_parse_error
+from mypy.parse import load_from_raw, parse
 from mypy.plugin import ChainedPlugin, Plugin, ReportConfigContext
 from mypy.plugins.default import DefaultPlugin
 from mypy.renaming import LimitedVariableRenameVisitor, VariableRenameVisitor
@@ -999,13 +998,18 @@ def dump_stats(self) -> None:
             # Call print once so that we don't get a mess in parallel mode.
             print("\n".join(lines) + "\n\n", end="")
 
-    def parse_all(self, states: list[State]) -> None:
-        """Parse multiple files in parallel (if possible) and compute dependencies."""
+    def parse_all(self, states: list[State], post_parse: bool = True) -> None:
+        """Parse multiple files in parallel (if possible) and compute dependencies.
+
+        If post_parse is False, skip the last step (used when parsing unchanged files
+        that need to be re-checked due to stale dependencies).
+        """
         if not self.options.native_parser:
             # Old parser cannot be parallelized.
             for state in states:
                 state.parse_file()
-            self.post_parse_all(states)
+            if post_parse:
+                self.post_parse_all(states)
             return
 
         sequential_states = []
@@ -1020,7 +1024,8 @@ def parse_all(self, states: list[State]) -> None:
                 continue
             parallel_states.append(state)
         self.parse_parallel(sequential_states, parallel_states)
-        self.post_parse_all(states)
+        if post_parse:
+            self.post_parse_all(states)
 
     def parse_parallel(self, sequential_states: list[State], parallel_states: list[State]) -> None:
         """Perform parallel parsing of states.
@@ -1030,7 +1035,7 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S
         parallelized efficiently.
         """
         futures = []
-        parallel_parsed_states = {}
+        parallel_parsed_states = []
         # Use at least --num-workers if specified by user.
         available_threads = max(get_available_threads(), self.options.num_workers)
         # Overhead from trying to parallelize (small) blocking portion of
@@ -1048,7 +1053,7 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S
                     if ignore_errors:
                         self.errors.ignored_files.add(state.xpath)
                     futures.append(executor.submit(state.parse_file_inner, state.source or ""))
-                    parallel_parsed_states[state.id] = state
+                    parallel_parsed_states.append(state)
                 else:
                     self.log(f"Using cached AST for {state.xpath} ({state.id})")
                     state.tree, state.early_errors = self.ast_cache[state.id]
@@ -1058,21 +1063,27 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S
                 state.parse_file()
 
             for fut in wait(futures).done:
-                state_id, parse_errors = fut.result()
-                # New parser reports errors lazily, add them if any.
-                if parse_errors:
-                    state = parallel_parsed_states[state_id]
-                    with state.wrap_context():
-                        self.errors.set_file(state.xpath, state.id, options=state.options)
-                        for error in parse_errors:
-                            report_parse_error(error, self.errors)
-                        if self.errors.is_blockers():
-                            self.log("Bailing due to parse errors")
-                            self.errors.raise_error()
+                fut.result()
+            for state in parallel_parsed_states:
+                # New parser returns serialized trees that need to be de-serialized.
+                with state.wrap_context():
+                    assert state.tree is not None
+                    if state.tree.raw_data:
+                        state.tree = load_from_raw(
+                            state.xpath,
+                            state.id,
+                            state.tree.raw_data,
+                            self.errors,
+                            state.options,
+                            imports_only=bool(self.workers),
+                        )
+                    if self.errors.is_blockers():
+                        self.log("Bailing due to parse errors")
+                        self.errors.raise_error()
 
         for state in parallel_states:
             assert state.tree is not None
-            if state.id in parallel_parsed_states:
+            if state in parallel_parsed_states:
                 state.early_errors = list(self.errors.error_info_map.get(state.xpath, []))
                 state.semantic_analysis_pass1()
                 self.ast_cache[state.id] = (state.tree, state.early_errors)
@@ -1208,31 +1219,18 @@ def parse_file(
         source: str,
         options: Options,
         raw_data: FileRawData | None = None,
-    ) -> tuple[MypyFile, list[ParseError]]:
+    ) -> MypyFile:
         """Parse the source of a file with the given name.
 
         Raise CompileError if there is a parse error.
         """
-        imports_only = False
         file_exists = self.fscache.exists(path)
-        if self.workers and file_exists:
-            # Currently, we can use the native parser only for actual files.
-            imports_only = True
         t0 = time.time()
-        parse_errors: list[ParseError] = []
         if raw_data:
             # If possible, deserialize from known binary data instead of parsing from scratch.
             tree = load_from_raw(path, id, raw_data, self.errors, options)
         else:
-            tree, parse_errors = parse(
-                source,
-                path,
-                id,
-                self.errors,
-                options=options,
-                file_exists=file_exists,
-                imports_only=imports_only,
-            )
+            tree = parse(source, path, id, self.errors, options=options, file_exists=file_exists)
         tree._fullname = id
         if self.stats_enabled:
             with self.stats_lock:
@@ -1242,7 +1240,7 @@ def parse_file(
                     stubs_parsed=int(tree.is_stub),
                     parse_time=time.time() - t0,
                 )
-        return tree, parse_errors
+        return tree
 
     def load_fine_grained_deps(self, id: str) -> dict[str, set[str]]:
         t0 = time.time()
@@ -3089,15 +3087,12 @@ def get_source(self) -> str:
         self.time_spent_us += time_spent_us(t0)
         return source
 
-    def parse_file_inner(
-        self, source: str, raw_data: FileRawData | None = None
-    ) -> tuple[str, list[ParseError]]:
+    def parse_file_inner(self, source: str, raw_data: FileRawData | None = None) -> None:
         t0 = time_ref()
-        self.tree, parse_errors = self.manager.parse_file(
+        self.tree = self.manager.parse_file(
             self.id, self.xpath, source, options=self.options, raw_data=raw_data
         )
         self.time_spent_us += time_spent_us(t0)
-        return self.id, parse_errors
 
     def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None = None) -> None:
         """Parse file and run first pass of semantic analysis.
@@ -3120,10 +3115,20 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None =
                 self.manager.errors.ignored_files.add(self.xpath)
             with self.wrap_context():
                 manager.errors.set_file(self.xpath, self.id, options=self.options)
-                _, parse_errors = self.parse_file_inner(source, raw_data)
-                for error in parse_errors:
-                    # New parser reports errors lazily.
-                    report_parse_error(error, manager.errors)
+                self.parse_file_inner(source, raw_data)
+                tree: MypyFile | None = self.tree
+                assert tree is not None
+                # New parser returns serialized trees that need to be de-serialized.
+                if tree.raw_data is not None:
+                    assert raw_data is None
+                    self.tree = load_from_raw(
+                        self.xpath,
+                        self.id,
+                        tree.raw_data,
+                        manager.errors,
+                        self.options,
+                        imports_only=bool(self.manager.workers),
+                    )
                 if manager.errors.is_blockers():
                     manager.log("Bailing due to parse errors")
                     manager.errors.raise_error()
@@ -4631,9 +4636,9 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None:
         # Re-generate import errors in case this module was loaded from the cache.
         if graph[id].meta:
             graph[id].verify_dependencies(suppressed_only=True)
-        # We may already have parsed the module, or not.
-        # If the former, parse_file() is a no-op.
-        graph[id].parse_file()
+    # We may already have parsed the modules, or not.
+    # If the former, parse_file() is a no-op.
+    manager.parse_all([graph[id] for id in stale], post_parse=False)
     if "typing" in scc:
         # For historical reasons we need to manually add typing aliases
         # for built-in generic collections, see docstring of
diff --git a/mypy/build_worker/worker.py b/mypy/build_worker/worker.py
index 6742bd6fde6f..2139ad130637 100644
--- a/mypy/build_worker/worker.py
+++ b/mypy/build_worker/worker.py
@@ -239,14 +239,23 @@ def load_states(
     mod_data: dict[str, tuple[bytes, FileRawData | None]],
 ) -> None:
     """Re-create full state of an SCC as it would have been in coordinator."""
+    needs_parse = []
     for id in scc.mod_ids:
         state = graph[id]
         # Re-clone options since we don't send them, it is usually faster than deserializing.
         state.options = state.options.clone_for_module(state.id)
         suppressed_deps_opts, raw_data = mod_data[id]
-        state.parse_file(raw_data=raw_data)
+        if raw_data is not None:
+            state.parse_file(raw_data=raw_data)
+        else:
+            needs_parse.append(state)
         # Set data that is needed to be written to cache meta.
         state.known_suppressed_deps_opts = suppressed_deps_opts
+    # Performa actual parsing in parallel (but we don't need to compute dependencies).
+    if needs_parse:
+        manager.parse_all(needs_parse, post_parse=False)
+    for id in scc.mod_ids:
+        state = graph[id]
         assert state.tree is not None
         import_lines = {imp.line for imp in state.tree.imports}
         state.imports_ignored = {
diff --git a/mypy/checkstrformat.py b/mypy/checkstrformat.py
index e985aa352abd..e96af007e29c 100644
--- a/mypy/checkstrformat.py
+++ b/mypy/checkstrformat.py
@@ -581,13 +581,14 @@ def apply_field_accessors(
 
         temp_errors = Errors(self.chk.options)
         dummy = DUMMY_FIELD_NAME + spec.field[len(spec.key) :]
-        temp_ast, _ = parse(
+        temp_ast = parse(
             dummy,
             fnam="<format>",
             module=None,
             options=self.chk.options,
             errors=temp_errors,
             file_exists=False,
+            eager=True,
         )
         if temp_errors.is_errors():
             self.msg.fail(
diff --git a/mypy/nativeparse.py b/mypy/nativeparse.py
index f08268cfe1ca..fd90d85fa355 100644
--- a/mypy/nativeparse.py
+++ b/mypy/nativeparse.py
@@ -190,12 +190,15 @@ def add_error(
 
 
 def native_parse(
-    filename: str, options: Options, skip_function_bodies: bool = False, imports_only: bool = False
+    filename: str, options: Options, skip_function_bodies: bool = False
 ) -> tuple[MypyFile, list[ParseError], TypeIgnores]:
     """Parse a Python file using the native Rust-based parser.
 
     Return (MypyFile, errors, type_ignores).
 
+    The returned tree is empty with actual serialized data stored in `raw_data`
+    attribute. Use read_statements() and/or deserialize_imports() to de-serialize.
+
     The caller should set these additional attributes on the returned MypyFile:
       - ignored_lines: dict of type ignore comments (from the TypeIgnores return value)
       - is_stub: whether the file is a .pyi stub
@@ -210,26 +213,12 @@ def native_parse(
     b, errors, ignores, import_bytes, is_partial_package, uses_template_strings = (
         parse_to_binary_ast(filename, options, skip_function_bodies)
     )
-    data = ReadBuffer(b)
-    n = read_int(data)
-    state = State(options)
-    if imports_only:
-        defs = []
-    else:
-        defs = read_statements(state, data, n)
-
-    imports = deserialize_imports(import_bytes)
-
-    node = MypyFile(defs, imports)
+    node = MypyFile([], [])
     node.path = filename
-    node.is_partial_stub_package = is_partial_package
-    if imports_only:
-        node.raw_data = FileRawData(
-            b, import_bytes, errors, dict(ignores), is_partial_package, uses_template_strings
-        )
-    node.uses_template_strings = uses_template_strings
-    all_errors = errors + state.errors
-    return node, all_errors, ignores
+    node.raw_data = FileRawData(
+        b, import_bytes, errors, dict(ignores), is_partial_package, uses_template_strings
+    )
+    return node, errors, ignores
 
 
 def expect_end_tag(data: ReadBuffer) -> None:
diff --git a/mypy/parse.py b/mypy/parse.py
index bd8e4ad5dcd3..d2626737b8c4 100644
--- a/mypy/parse.py
+++ b/mypy/parse.py
@@ -18,14 +18,15 @@ def parse(
     errors: Errors,
     options: Options,
     file_exists: bool,
-    imports_only: bool = False,
-) -> tuple[MypyFile, list[ParseError]]:
+    eager: bool = False,
+) -> MypyFile:
     """Parse a source file, without doing any semantic analysis.
 
-    Return the parse tree. If errors is not provided, raise ParseError
-    on failure. Otherwise, use the errors object to report parse errors.
-
+    Return the parse tree, use the errors object to report parse errors.
     The python_version (major, minor) option determines the Python syntax variant.
+
+    New parser returns empty tree with serialized data. To get the full tree and
+    the parse errors, use eager=True.
     """
     if options.native_parser:
         # Native parser only works with actual files on disk
@@ -36,45 +37,43 @@ def parse(
             ignore_errors = options.ignore_errors or fnam in errors.ignored_files
             # If errors are ignored, we can drop many function bodies to speed up type checking.
             strip_function_bodies = ignore_errors and not options.preserve_asts
-            tree, parse_errors, type_ignores = mypy.nativeparse.native_parse(
-                fnam,
-                options,
-                skip_function_bodies=strip_function_bodies,
-                imports_only=imports_only,
+            tree, _, _ = mypy.nativeparse.native_parse(
+                fnam, options, skip_function_bodies=strip_function_bodies
             )
-            # Convert type ignores list to dict
-            tree.ignored_lines = dict(type_ignores)
             # Set is_stub based on file extension
             tree.is_stub = fnam.endswith(".pyi")
-            # Note: tree.imports is populated directly by native_parse with deserialized
+            # Note: tree.imports is populated directly by load_from_raw() with deserialized
             # import metadata, so we don't need to collect imports via AST traversal
-            return tree, parse_errors
+            if eager and tree.raw_data is not None:
+                tree = load_from_raw(fnam, module, tree.raw_data, errors, options)
+            return tree
         # Fall through to fastparse for non-existent files
 
-    assert not imports_only
     if options.transform_source is not None:
         source = options.transform_source(source)
     import mypy.fastparse
 
-    tree = mypy.fastparse.parse(source, fnam=fnam, module=module, errors=errors, options=options)
-    return tree, []
+    return mypy.fastparse.parse(source, fnam=fnam, module=module, errors=errors, options=options)
 
 
 def load_from_raw(
-    fnam: str, module: str | None, raw_data: FileRawData, errors: Errors, options: Options
+    fnam: str,
+    module: str | None,
+    raw_data: FileRawData,
+    errors: Errors,
+    options: Options,
+    imports_only: bool = False,
 ) -> MypyFile:
-    """Load AST from parsed binary data.
-
-    This essentially replicates parse() above but expects FileRawData instead of actually
-    parsing the source code in the file.
-    """
+    """Load AST from parsed binary data and report stored errors."""
     from mypy.nativeparse import State, deserialize_imports, read_statements
 
-    # This part mimics the logic in native_parse().
-    data = ReadBuffer(raw_data.defs)
-    n = read_int(data)
     state = State(options)
-    defs = read_statements(state, data, n)
+    if imports_only:
+        defs = []
+    else:
+        data = ReadBuffer(raw_data.defs)
+        n = read_int(data)
+        defs = read_statements(state, data, n)
     imports = deserialize_imports(raw_data.imports)
 
     tree = MypyFile(defs, imports)
@@ -83,6 +82,8 @@ def load_from_raw(
     tree.is_partial_stub_package = raw_data.is_partial_stub_package
     tree.uses_template_strings = raw_data.uses_template_strings
     tree.is_stub = fnam.endswith(".pyi")
+    if module is not None:
+        tree._fullname = module
 
     # Report parse errors, this replicates the logic in parse().
     all_errors = raw_data.raw_errors + state.errors
@@ -90,6 +91,10 @@ def load_from_raw(
     for error in all_errors:
         # Note we never raise in this function, so it should not be called in coordinator.
         report_parse_error(error, errors)
+    if imports_only:
+        # Preserve raw data when only de-serializing imports, it will be sent to
+        # the parallel workers.
+        tree.raw_data = raw_data
     return tree
 
 
diff --git a/mypy/stubgen.py b/mypy/stubgen.py
index 38bd1f228e6e..9c682ba4b820 100755
--- a/mypy/stubgen.py
+++ b/mypy/stubgen.py
@@ -1744,17 +1744,16 @@ def parse_source_file(mod: StubSource, mypy_options: MypyOptions) -> None:
         data = f.read()
     source = mypy.util.decode_python_encoding(data)
     errors = Errors(mypy_options)
-    mod.ast, errs = mypy.parse.parse(
+    mod.ast = mypy.parse.parse(
         source,
         fnam=mod.path,
         module=mod.module,
         errors=errors,
         options=mypy_options,
         file_exists=True,
+        eager=True,
     )
     mod.ast._fullname = mod.module
-    for err in errs:
-        mypy.parse.report_parse_error(err, errors)
     if errors.is_blockers():
         # Syntax error!
         for m in errors.new_messages():
diff --git a/mypy/test/test_nativeparse.py b/mypy/test/test_nativeparse.py
index 94be60e328b7..f9a18ea992c2 100644
--- a/mypy/test/test_nativeparse.py
+++ b/mypy/test/test_nativeparse.py
@@ -12,6 +12,8 @@
 import unittest
 from collections.abc import Iterator
 
+from librt.internal import ReadBuffer
+
 from mypy import defaults, nodes
 from mypy.cache import (
     END_TAG,
@@ -21,6 +23,7 @@
     LITERAL_NONE,
     LITERAL_STR,
     LOCATION,
+    read_int,
 )
 from mypy.config_parser import parse_mypy_comments
 from mypy.errors import CompileError
@@ -33,7 +36,13 @@
 # If the experimental ast_serialize module isn't installed, the following import will fail
 # and we won't run any native parser tests.
 try:
-    from mypy.nativeparse import native_parse, parse_to_binary_ast
+    from mypy.nativeparse import (
+        State,
+        deserialize_imports,
+        native_parse,
+        parse_to_binary_ast,
+        read_statements,
+    )
 
     has_nativeparse = True
 except ImportError:
@@ -90,6 +99,7 @@ def test_parser(testcase: DataDrivenTestCase) -> None:
     try:
         with temp_source(source) as fnam:
             node, errors, type_ignores = native_parse(fnam, options, skip_function_bodies)
+            errors += load_tree(node, options)
             node.path = "main"
             a = node.str_with_options(options).split("\n")
             a = [format_error(err) for err in errors] + a
@@ -113,6 +123,18 @@ def format_ignore(ignore: tuple[int, list[str]]) -> str:
         return f"ignore: {line} [{', '.join(codes)}]"
 
 
+def load_tree(node: MypyFile, options: Options) -> list[ParseError]:
+    """Deserialize full AST from serialized raw data."""
+    assert node.raw_data is not None
+    state = State(options)
+    data = ReadBuffer(node.raw_data.defs)
+    n = read_int(data)
+    node.defs = read_statements(state, data, n)
+    node.imports = deserialize_imports(node.raw_data.imports)
+    node.raw_data = None
+    return state.errors
+
+
 def test_parser_imports(testcase: DataDrivenTestCase) -> None:
     """Perform a single native parser imports test case.
 
@@ -128,7 +150,7 @@ def test_parser_imports(testcase: DataDrivenTestCase) -> None:
     try:
         with temp_source(source) as fnam:
             node, errors, type_ignores = native_parse(fnam, options)
-
+            errors += load_tree(node, options)
             # Extract and format reachable imports
             a = format_reachable_imports(node)
             a = [format_error(err) for err in errors] + a
diff --git a/mypy/test/testparse.py b/mypy/test/testparse.py
index 09177126426d..6d00f5b5710f 100644
--- a/mypy/test/testparse.py
+++ b/mypy/test/testparse.py
@@ -60,13 +60,14 @@ def test_parser(testcase: DataDrivenTestCase) -> None:
 
     try:
         errors = Errors(options)
-        n, _ = parse(
+        n = parse(
             bytes(source, "ascii"),
             fnam="main",
             module="__main__",
             errors=errors,
             options=options,
             file_exists=False,
+            eager=True,
         )
         if errors.is_errors():
             errors.raise_error()
@@ -108,6 +109,7 @@ def test_parse_error(testcase: DataDrivenTestCase) -> None:
             errors=errors,
             options=options,
             file_exists=False,
+            eager=True,
         )
         if errors.is_errors():
             errors.raise_error()

From 283d0e494d25fb5aaaf72115a34d176ac8466763 Mon Sep 17 00:00:00 2001
From: Ivan Levkivskyi <levkivskyi@gmail.com>
Date: Sat, 18 Apr 2026 14:36:44 +0100
Subject: [PATCH 2/3] Fast path for single-file SCCs

---
 mypy/build.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/mypy/build.py b/mypy/build.py
index 121b261e28fd..abc60d24d2ae 100644
--- a/mypy/build.py
+++ b/mypy/build.py
@@ -1023,7 +1023,12 @@ def parse_all(self, states: list[State], post_parse: bool = True) -> None:
                 sequential_states.append(state)
                 continue
             parallel_states.append(state)
-        self.parse_parallel(sequential_states, parallel_states)
+        if len(parallel_states) > 1:
+            self.parse_parallel(sequential_states, parallel_states)
+        else:
+            # Avoid using executor when there is no parallelism.
+            for state in states:
+                state.parse_file()
         if post_parse:
             self.post_parse_all(states)
 

From 002cbd9b314b7ea7fc923a042a1ef2694a48289a Mon Sep 17 00:00:00 2001
From: Ivan Levkivskyi <levkivskyi@gmail.com>
Date: Sat, 18 Apr 2026 14:42:38 +0100
Subject: [PATCH 3/3] Work around mypyc

---
 mypy/build.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/mypy/build.py b/mypy/build.py
index abc60d24d2ae..9a478c549d4b 100644
--- a/mypy/build.py
+++ b/mypy/build.py
@@ -3106,7 +3106,8 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None =
         modules in any way. Logic here should be kept in sync with BuildManager.parse_all().
         """
         self.needs_parse = False
-        if self.tree is not None:
+        tree = self.tree
+        if tree is not None:
             # The file was already parsed.
             return
 
@@ -3121,15 +3122,14 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None =
             with self.wrap_context():
                 manager.errors.set_file(self.xpath, self.id, options=self.options)
                 self.parse_file_inner(source, raw_data)
-                tree: MypyFile | None = self.tree
-                assert tree is not None
+                assert self.tree is not None
                 # New parser returns serialized trees that need to be de-serialized.
-                if tree.raw_data is not None:
+                if self.tree.raw_data is not None:
                     assert raw_data is None
                     self.tree = load_from_raw(
                         self.xpath,
                         self.id,
-                        tree.raw_data,
+                        self.tree.raw_data,
                         manager.errors,
                         self.options,
                         imports_only=bool(self.manager.workers),