Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
110 changes: 60 additions & 50 deletions mypy/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@
ImportFrom,
MypyFile,
OverloadedFuncDef,
ParseError,
SymbolTable,
)
from mypy.options import OPTIONS_AFFECTING_CACHE_NO_PLATFORM
Expand Down Expand Up @@ -168,7 +167,7 @@
from mypy.modules_state import modules_state
from mypy.nodes import Expression
from mypy.options import Options
from mypy.parse import load_from_raw, parse, report_parse_error
from mypy.parse import load_from_raw, parse
from mypy.plugin import ChainedPlugin, Plugin, ReportConfigContext
from mypy.plugins.default import DefaultPlugin
from mypy.renaming import LimitedVariableRenameVisitor, VariableRenameVisitor
Expand Down Expand Up @@ -999,13 +998,18 @@ def dump_stats(self) -> None:
# Call print once so that we don't get a mess in parallel mode.
print("\n".join(lines) + "\n\n", end="")

def parse_all(self, states: list[State]) -> None:
"""Parse multiple files in parallel (if possible) and compute dependencies."""
def parse_all(self, states: list[State], post_parse: bool = True) -> None:
"""Parse multiple files in parallel (if possible) and compute dependencies.

If post_parse is False, skip the last step (used when parsing unchanged files
that need to be re-checked due to stale dependencies).
"""
if not self.options.native_parser:
# Old parser cannot be parallelized.
for state in states:
state.parse_file()
self.post_parse_all(states)
if post_parse:
self.post_parse_all(states)
return

sequential_states = []
Expand All @@ -1019,8 +1023,14 @@ def parse_all(self, states: list[State]) -> None:
sequential_states.append(state)
continue
parallel_states.append(state)
self.parse_parallel(sequential_states, parallel_states)
self.post_parse_all(states)
if len(parallel_states) > 1:
self.parse_parallel(sequential_states, parallel_states)
else:
# Avoid using executor when there is no parallelism.
for state in states:
state.parse_file()
if post_parse:
self.post_parse_all(states)

def parse_parallel(self, sequential_states: list[State], parallel_states: list[State]) -> None:
"""Perform parallel parsing of states.
Expand All @@ -1030,7 +1040,7 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S
parallelized efficiently.
"""
futures = []
parallel_parsed_states = {}
parallel_parsed_states = []
# Use at least --num-workers if specified by user.
available_threads = max(get_available_threads(), self.options.num_workers)
# Overhead from trying to parallelize (small) blocking portion of
Expand All @@ -1048,7 +1058,7 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S
if ignore_errors:
self.errors.ignored_files.add(state.xpath)
futures.append(executor.submit(state.parse_file_inner, state.source or ""))
parallel_parsed_states[state.id] = state
parallel_parsed_states.append(state)
else:
self.log(f"Using cached AST for {state.xpath} ({state.id})")
state.tree, state.early_errors = self.ast_cache[state.id]
Expand All @@ -1058,21 +1068,27 @@ def parse_parallel(self, sequential_states: list[State], parallel_states: list[S
state.parse_file()

for fut in wait(futures).done:
state_id, parse_errors = fut.result()
# New parser reports errors lazily, add them if any.
if parse_errors:
state = parallel_parsed_states[state_id]
with state.wrap_context():
self.errors.set_file(state.xpath, state.id, options=state.options)
for error in parse_errors:
report_parse_error(error, self.errors)
if self.errors.is_blockers():
self.log("Bailing due to parse errors")
self.errors.raise_error()
fut.result()
for state in parallel_parsed_states:
# New parser returns serialized trees that need to be de-serialized.
with state.wrap_context():
assert state.tree is not None
if state.tree.raw_data:
state.tree = load_from_raw(
state.xpath,
state.id,
state.tree.raw_data,
self.errors,
state.options,
imports_only=bool(self.workers),
)
if self.errors.is_blockers():
self.log("Bailing due to parse errors")
self.errors.raise_error()

for state in parallel_states:
assert state.tree is not None
if state.id in parallel_parsed_states:
if state in parallel_parsed_states:
state.early_errors = list(self.errors.error_info_map.get(state.xpath, []))
state.semantic_analysis_pass1()
self.ast_cache[state.id] = (state.tree, state.early_errors)
Expand Down Expand Up @@ -1208,31 +1224,18 @@ def parse_file(
source: str,
options: Options,
raw_data: FileRawData | None = None,
) -> tuple[MypyFile, list[ParseError]]:
) -> MypyFile:
"""Parse the source of a file with the given name.

Raise CompileError if there is a parse error.
"""
imports_only = False
file_exists = self.fscache.exists(path)
if self.workers and file_exists:
# Currently, we can use the native parser only for actual files.
imports_only = True
t0 = time.time()
parse_errors: list[ParseError] = []
if raw_data:
# If possible, deserialize from known binary data instead of parsing from scratch.
tree = load_from_raw(path, id, raw_data, self.errors, options)
else:
tree, parse_errors = parse(
source,
path,
id,
self.errors,
options=options,
file_exists=file_exists,
imports_only=imports_only,
)
tree = parse(source, path, id, self.errors, options=options, file_exists=file_exists)
tree._fullname = id
if self.stats_enabled:
with self.stats_lock:
Expand All @@ -1242,7 +1245,7 @@ def parse_file(
stubs_parsed=int(tree.is_stub),
parse_time=time.time() - t0,
)
return tree, parse_errors
return tree

def load_fine_grained_deps(self, id: str) -> dict[str, set[str]]:
t0 = time.time()
Expand Down Expand Up @@ -3089,15 +3092,12 @@ def get_source(self) -> str:
self.time_spent_us += time_spent_us(t0)
return source

def parse_file_inner(
self, source: str, raw_data: FileRawData | None = None
) -> tuple[str, list[ParseError]]:
def parse_file_inner(self, source: str, raw_data: FileRawData | None = None) -> None:
t0 = time_ref()
self.tree, parse_errors = self.manager.parse_file(
self.tree = self.manager.parse_file(
self.id, self.xpath, source, options=self.options, raw_data=raw_data
)
self.time_spent_us += time_spent_us(t0)
return self.id, parse_errors

def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None = None) -> None:
"""Parse file and run first pass of semantic analysis.
Expand All @@ -3106,7 +3106,8 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None =
modules in any way. Logic here should be kept in sync with BuildManager.parse_all().
"""
self.needs_parse = False
if self.tree is not None:
tree = self.tree
if tree is not None:
# The file was already parsed.
return

Expand All @@ -3120,10 +3121,19 @@ def parse_file(self, *, temporary: bool = False, raw_data: FileRawData | None =
self.manager.errors.ignored_files.add(self.xpath)
with self.wrap_context():
manager.errors.set_file(self.xpath, self.id, options=self.options)
_, parse_errors = self.parse_file_inner(source, raw_data)
for error in parse_errors:
# New parser reports errors lazily.
report_parse_error(error, manager.errors)
self.parse_file_inner(source, raw_data)
assert self.tree is not None
# New parser returns serialized trees that need to be de-serialized.
if self.tree.raw_data is not None:
assert raw_data is None
self.tree = load_from_raw(
self.xpath,
self.id,
self.tree.raw_data,
manager.errors,
self.options,
imports_only=bool(self.manager.workers),
)
if manager.errors.is_blockers():
manager.log("Bailing due to parse errors")
manager.errors.raise_error()
Expand Down Expand Up @@ -4631,9 +4641,9 @@ def process_stale_scc(graph: Graph, ascc: SCC, manager: BuildManager) -> None:
# Re-generate import errors in case this module was loaded from the cache.
if graph[id].meta:
graph[id].verify_dependencies(suppressed_only=True)
# We may already have parsed the module, or not.
# If the former, parse_file() is a no-op.
graph[id].parse_file()
# We may already have parsed the modules, or not.
# If the former, parse_file() is a no-op.
manager.parse_all([graph[id] for id in stale], post_parse=False)
if "typing" in scc:
# For historical reasons we need to manually add typing aliases
# for built-in generic collections, see docstring of
Expand Down
11 changes: 10 additions & 1 deletion mypy/build_worker/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,14 +239,23 @@ def load_states(
mod_data: dict[str, tuple[bytes, FileRawData | None]],
) -> None:
"""Re-create full state of an SCC as it would have been in coordinator."""
needs_parse = []
for id in scc.mod_ids:
state = graph[id]
# Re-clone options since we don't send them, it is usually faster than deserializing.
state.options = state.options.clone_for_module(state.id)
suppressed_deps_opts, raw_data = mod_data[id]
state.parse_file(raw_data=raw_data)
if raw_data is not None:
state.parse_file(raw_data=raw_data)
else:
needs_parse.append(state)
# Set data that is needed to be written to cache meta.
state.known_suppressed_deps_opts = suppressed_deps_opts
# Performa actual parsing in parallel (but we don't need to compute dependencies).
if needs_parse:
manager.parse_all(needs_parse, post_parse=False)
for id in scc.mod_ids:
state = graph[id]
assert state.tree is not None
import_lines = {imp.line for imp in state.tree.imports}
state.imports_ignored = {
Expand Down
3 changes: 2 additions & 1 deletion mypy/checkstrformat.py
Original file line number Diff line number Diff line change
Expand Up @@ -581,13 +581,14 @@ def apply_field_accessors(

temp_errors = Errors(self.chk.options)
dummy = DUMMY_FIELD_NAME + spec.field[len(spec.key) :]
temp_ast, _ = parse(
temp_ast = parse(
dummy,
fnam="<format>",
module=None,
options=self.chk.options,
errors=temp_errors,
file_exists=False,
eager=True,
)
if temp_errors.is_errors():
self.msg.fail(
Expand Down
29 changes: 9 additions & 20 deletions mypy/nativeparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,12 +190,15 @@ def add_error(


def native_parse(
filename: str, options: Options, skip_function_bodies: bool = False, imports_only: bool = False
filename: str, options: Options, skip_function_bodies: bool = False
) -> tuple[MypyFile, list[ParseError], TypeIgnores]:
"""Parse a Python file using the native Rust-based parser.

Return (MypyFile, errors, type_ignores).

The returned tree is empty with actual serialized data stored in `raw_data`
attribute. Use read_statements() and/or deserialize_imports() to de-serialize.

The caller should set these additional attributes on the returned MypyFile:
- ignored_lines: dict of type ignore comments (from the TypeIgnores return value)
- is_stub: whether the file is a .pyi stub
Expand All @@ -210,26 +213,12 @@ def native_parse(
b, errors, ignores, import_bytes, is_partial_package, uses_template_strings = (
parse_to_binary_ast(filename, options, skip_function_bodies)
)
data = ReadBuffer(b)
n = read_int(data)
state = State(options)
if imports_only:
defs = []
else:
defs = read_statements(state, data, n)

imports = deserialize_imports(import_bytes)

node = MypyFile(defs, imports)
node = MypyFile([], [])
node.path = filename
node.is_partial_stub_package = is_partial_package
if imports_only:
node.raw_data = FileRawData(
b, import_bytes, errors, dict(ignores), is_partial_package, uses_template_strings
)
node.uses_template_strings = uses_template_strings
all_errors = errors + state.errors
return node, all_errors, ignores
node.raw_data = FileRawData(
b, import_bytes, errors, dict(ignores), is_partial_package, uses_template_strings
)
return node, errors, ignores


def expect_end_tag(data: ReadBuffer) -> None:
Expand Down
Loading
Loading