From a453ccbd690543d35360a328cae9398c829f064c Mon Sep 17 00:00:00 2001 From: Farhan Date: Sun, 28 Jun 2026 20:32:25 +0500 Subject: [PATCH 01/18] feat(compiler): add incremental compile cache (REFLEX_COMPILE_CACHE) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add an experimental, flag-gated incremental frontend compile cache that recompiles only the pages whose source actually changed and reuses the rest. Two layers, both off by default and enabled by REFLEX_COMPILE_CACHE: - In-process per-page cache (page_cache.py): a Salsa-style dependency graph records the exact set of source files each page reads, so editing one file invalidates only the pages that depend on it. Pages are keyed by a small genuinely-global epoch (Reflex version + rxconfig + lockfile) plus the content hashes of their dependency set. Speeds up repeat compiles within a single process. - On-disk manifest (disk_cache.py): persists each page's serializable contribution and dependency hashes to .web/reflex_compile_cache.json so a fresh process — notably a `reflex run` hot-reload worker, which respawns on every edit — recompiles only changed pages and reuses the rest. Falls back to a full compile on any unsafe condition. REFLEX_COMPILE_CACHE_VERIFY runs a full compile alongside the cached one and asserts byte-identical output, falling back on mismatch — the backstop for gaps a static dependency graph cannot see (runtime importlib imports, data read at module-import time). Supporting changes required for safe page reuse: deterministic compile-time ref-name generation, and own-before-mutate page metadata injection. --- .../src/reflex_base/environment.py | 12 + .../src/reflex_base/plugins/compiler.py | 59 +- .../reflex-base/src/reflex_base/vars/base.py | 20 +- reflex/compiler/compiler.py | 235 +++++- reflex/compiler/disk_cache.py | 453 +++++++++++ reflex/compiler/page_cache.py | 726 ++++++++++++++++++ reflex/compiler/plugins/builtin.py | 2 +- reflex/compiler/utils.py | 12 +- tests/units/compiler/test_disk_cache.py | 287 +++++++ tests/units/compiler/test_page_cache.py | 115 +++ tests/units/vars/test_base.py | 22 + 11 files changed, 1914 insertions(+), 29 deletions(-) create mode 100644 reflex/compiler/disk_cache.py create mode 100644 reflex/compiler/page_cache.py create mode 100644 tests/units/compiler/test_disk_cache.py create mode 100644 tests/units/compiler/test_page_cache.py diff --git a/packages/reflex-base/src/reflex_base/environment.py b/packages/reflex-base/src/reflex_base/environment.py index 99521d1a339..cca5de00ba9 100644 --- a/packages/reflex-base/src/reflex_base/environment.py +++ b/packages/reflex-base/src/reflex_base/environment.py @@ -611,6 +611,18 @@ class EnvironmentVariables: # If this env var is set to "yes", App.compile will be a no-op REFLEX_SKIP_COMPILE: EnvVar[bool] = env_var(False, internal=True) + # Experimental: incremental compile cache. A fresh compile process (e.g. a + # reflex-run hot-reload worker) reuses each page's compiled output from an + # on-disk manifest and recompiles only the pages whose source changed, + # backed by an in-process per-page cache for repeat compiles in one process. + # See reflex/compiler/disk_cache.py and reflex/compiler/page_cache.py. + REFLEX_COMPILE_CACHE: EnvVar[bool] = env_var(False) + + # When the compile cache reuses pages, also run a full compile and assert + # byte-identical output, falling back to the full result on any mismatch. + # Doubles compile time; for validating the cache on an app. + REFLEX_COMPILE_CACHE_VERIFY: EnvVar[bool] = env_var(False) + # Inherited by uvicorn/granian reload workers so the backend can distinguish # dev reload-capable worker boots from other backend starts. Never set in prod. REFLEX_DEV_BACKEND_RELOAD_ACTIVE: EnvVar[bool] = env_var(False, internal=True) diff --git a/packages/reflex-base/src/reflex_base/plugins/compiler.py b/packages/reflex-base/src/reflex_base/plugins/compiler.py index c8ffe6d2711..dd02fabda08 100644 --- a/packages/reflex-base/src/reflex_base/plugins/compiler.py +++ b/packages/reflex-base/src/reflex_base/plugins/compiler.py @@ -6,6 +6,7 @@ import dataclasses import inspect from collections.abc import Callable, Sequence +from contextlib import AbstractContextManager from contextvars import ContextVar, Token from types import TracebackType from typing import TYPE_CHECKING, Any, ClassVar, Protocol, TypeAlias, TypeVar, cast @@ -35,6 +36,15 @@ _BaseComponentT = TypeVar("_BaseComponentT", bound=BaseComponent) +#: Optional per-page source-read recorder, installed by an incremental compile +#: cache. When set, :meth:`CompileContext.compile` wraps each page's evaluation +#: in ``page_source_recorder()``; the context manager yields a set that collects +#: the source files (e.g. markdown/data) read during that page's eval, which is +#: then stored on the page's ``source_files``. ``None`` (default) disables +#: recording, so the compile path is unchanged when no cache is active. +page_source_recorder: Callable[[], AbstractContextManager[set[str]]] | None = None + + class PageDefinition(Protocol): """Protocol for page-like objects compiled by :class:`CompileContext`.""" @@ -690,6 +700,18 @@ class PageContext(BaseContext): output_path: str | None = None output_code: str | None = None source_module: str | None = None + # Source files (e.g. markdown/data) read while evaluating this page, when a + # per-page read recorder is installed (see ``page_source_recorder``). Lets an + # incremental cache depend on the exact non-import inputs a page consumed, so + # editing one page's data invalidates only that page. Empty when no recorder + # is active. + source_files: set[str] = dataclasses.field(default_factory=set) + # Auto-memo components first registered while compiling THIS page, keyed by + # ``(tag, source_module)``. Lets an incremental cache attribute memo + # contributions per page so a skipped page can re-register them. + memo_contributions: dict[tuple[str, str | None], Any] = dataclasses.field( + default_factory=dict + ) # Stack of ``id(component)`` for components whose subtree is # memoize-suppressed. Populated by ``MemoizeStatefulPlugin`` when it # encounters a ``MemoizationLeaf``-style snapshot boundary and popped on @@ -794,6 +816,7 @@ def compile( """ from reflex.compiler import compiler from reflex.state import all_base_state_classes + from reflex_base.vars.base import reset_unique_variable_names self.ensure_context_attached() self.compiled_pages.clear() @@ -803,15 +826,32 @@ def compile( self.memoize_wrappers.clear() self.auto_memo_components.clear() + # Reset the deterministic ref-name generator so a second in-process + # compile reproduces the same auto-generated names as the first (these + # names feed auto-memo content hashes, so drift breaks reproducibility). + reset_unique_variable_names() + + recorder = page_source_recorder for page in self.pages: page_fn = page.component n_states_before = len(all_base_state_classes) - page_ctx = self.hooks.eval_page( - page_fn, - page=page, - compile_context=self, - **kwargs, - ) + if recorder is not None: + with recorder() as read_set: + page_ctx = self.hooks.eval_page( + page_fn, + page=page, + compile_context=self, + **kwargs, + ) + if page_ctx is not None: + page_ctx.source_files = read_set + else: + page_ctx = self.hooks.eval_page( + page_fn, + page=page, + compile_context=self, + **kwargs, + ) if page_ctx is None: page_name = getattr(page_fn, "__name__", repr(page_fn)) msg = ( @@ -836,6 +876,7 @@ def compile( self.compiled_pages.values(), strict=True, ): + memo_before = set(self.auto_memo_components) with page_ctx: page_ctx.root_component = self.hooks.compile_component( page_ctx.root_component, @@ -848,6 +889,12 @@ def compile( compile_context=self, **kwargs, ) + # Attribute newly-registered auto-memo components to this page. + page_ctx.memo_contributions = { + key: value + for key, value in self.auto_memo_components.items() + if key not in memo_before + } page_ctx.frontend_imports = page_ctx.merged_imports(collapse=True) self.all_imports = merge_imports( diff --git a/packages/reflex-base/src/reflex_base/vars/base.py b/packages/reflex-base/src/reflex_base/vars/base.py index f7212bef821..15030b3e44a 100644 --- a/packages/reflex-base/src/reflex_base/vars/base.py +++ b/packages/reflex-base/src/reflex_base/vars/base.py @@ -9,6 +9,7 @@ import functools import inspect import json +import random import re import string import uuid @@ -44,7 +45,6 @@ from reflex_base.constants.state import FIELD_MARKER from reflex_base.utils import console, exceptions, imports, serializers, types from reflex_base.utils.compat import annotations_from_namespace -from reflex_base.utils.decorator import once from reflex_base.utils.exceptions import ( ComputedVarSignatureError, UntypedComputedVarError, @@ -3211,12 +3211,20 @@ def get_uuid_string_var() -> Var: # Set of unique variable names. USED_VARIABLES = set() +_UNIQUE_NAME_RNG = random.Random(42) -@once -def _rng(): - import random - return random.Random(42) +def reset_unique_variable_names() -> None: + """Reset the deterministic unique-name generator to its initial state. + + ``get_unique_variable_name`` draws from a seeded RNG and dedups against + ``USED_VARIABLES``; both persist process-wide, so a second in-process compile + would draw *different* ref names than the first (the RNG state and the used + set carry over). Resetting them before each compile makes the generated names + reproducible across compiles — names only need to be unique within a compile. + """ + USED_VARIABLES.clear() + _UNIQUE_NAME_RNG.seed(42) def get_unique_variable_name() -> str: @@ -3225,7 +3233,7 @@ def get_unique_variable_name() -> str: Returns: The unique variable name. """ - name = "".join([_rng().choice(string.ascii_lowercase) for _ in range(8)]) + name = "".join([_UNIQUE_NAME_RNG.choice(string.ascii_lowercase) for _ in range(8)]) if name not in USED_VARIABLES: USED_VARIABLES.add(name) return name diff --git a/reflex/compiler/compiler.py b/reflex/compiler/compiler.py index f0185d793d3..de5433d3697 100644 --- a/reflex/compiler/compiler.py +++ b/reflex/compiler/compiler.py @@ -63,6 +63,24 @@ def _set_progress_total( progress.update(task, total=total) +def make_compile_progress(use_rich: bool) -> Progress | console.PoorProgress: + """Build a compile progress bar. + + Args: + use_rich: Whether to use a rich progress bar (else a plain fallback). + + Returns: + A progress bar suitable for tracking a compile. + """ + if use_rich: + return Progress( + *Progress.get_default_columns()[:-1], + MofNCompleteColumn(), + TimeElapsedColumn(), + ) + return console.PoorProgress() + + def _apply_common_imports( imports: dict[str, list[ImportVar]], ): @@ -954,7 +972,7 @@ def compile_unevaluated_page( meta_args["description"] = page.description # Add meta information to the component. - utils.add_meta( + component = utils.add_meta( component, **meta_args, ) @@ -1090,6 +1108,91 @@ def _resolve_radix_themes_plugin( return plugin_chain, radix_plugin +def _normalize_imports_for_compare(all_imports: Any) -> dict[str, list[str]]: + """Render an import dict to a comparable, order-independent form. + + Args: + all_imports: The parsed import dict to normalize. + + Returns: + A mapping of library to its sorted, stringified import fields. + """ + return {lib: sorted(str(v) for v in fields) for lib, fields in all_imports.items()} + + +def _diff_compile_contexts( + incremental: CompileContext, full: CompileContext +) -> list[str]: + """Return the aggregate fields where the two compile contexts diverge. + + Args: + incremental: The cache-assisted compile context. + full: A no-cache full compile of the same app. + + Returns: + A list of human-readable divergence labels (empty when identical). + """ + diffs: list[str] = [] + inc_pages = {r: pc.output_code for r, pc in incremental.compiled_pages.items()} + full_pages = {r: pc.output_code for r, pc in full.compiled_pages.items()} + if inc_pages.keys() != full_pages.keys(): + diffs.append(f"routes:{sorted(set(inc_pages) ^ set(full_pages))}") + diffs.extend( + f"page:{r}" + for r in inc_pages.keys() & full_pages.keys() + if inc_pages[r] != full_pages[r] + ) + if _normalize_imports_for_compare( + incremental.all_imports + ) != _normalize_imports_for_compare(full.all_imports): + diffs.append("all_imports") + if sorted(map(str, incremental.auto_memo_components)) != sorted( + map(str, full.auto_memo_components) + ): + diffs.append("auto_memo_components") + if sorted(incremental.stateful_routes) != sorted(full.stateful_routes): + diffs.append("stateful_routes") + if sorted(map(str, incremental.app_wrap_components)) != sorted( + map(str, full.app_wrap_components) + ): + diffs.append("app_wrap_components") + return diffs + + +def _full_compile_context( + app: App, compiler_plugins: Sequence[Plugin] +) -> CompileContext: + """Compile every page with no cache reuse (for verify-mode comparison). + + Resets the shared bundling/memo globals first so it starts from the same + clean state the primary compile did. + + Args: + app: The app to compile. + compiler_plugins: The resolved compiler plugins. + + Returns: + A fully compiled context over all pages. + """ + from reflex_base.components.dynamic import bundle_library, reset_bundled_libraries + + reset_bundled_libraries() + reset_memo_component_classes() + for plugin in compiler_plugins: + for dependency in plugin.get_frontend_dependencies(): + bundle_library(dependency) + ctx = CompileContext( + app=app, + pages=list(app._unevaluated_pages.values()), + hooks=CompilerHooks( + plugins=default_page_plugins(style=app.style, plugins=compiler_plugins) + ), + ) + with ctx: + ctx.compile() + return ctx + + def compile_app( app: App, *, @@ -1138,20 +1241,30 @@ def compile_app( app._add_optional_endpoints() return False - progress = ( - Progress( - *Progress.get_default_columns()[:-1], - MofNCompleteColumn(), - TimeElapsedColumn(), - ) - if use_rich - else console.PoorProgress() - ) - fixed_steps = 7 + cache_on = not dry_run and environment.REFLEX_COMPILE_CACHE.get() + compiler_plugins, radix_themes_plugin = _resolve_radix_themes_plugin( app, config.plugins, ) + + # Experimental incremental compile cache: in a fresh process, recompile only + # the pages whose source changed and reuse the rest from the on-disk + # manifest. Falls back to a full compile on any unsafe condition. + if cache_on: + from reflex.compiler import disk_cache, page_cache + + page_cache.enable_read_tracking() + if disk_cache.try_incremental_rebuild( + app, + compiler_plugins=compiler_plugins, + prerender_routes=prerender_routes, + use_rich=use_rich, + ): + return True + + progress = make_compile_progress(use_rich) + fixed_steps = 7 reset_bundled_libraries() # Drop cached memo wrapper classes so each compile recomputes a memo's # ``library`` from the current module layout (handles a module flipping to @@ -1163,9 +1276,36 @@ def compile_app( base_total = (len(app._unevaluated_pages) * 2) + fixed_steps + len(config.plugins) progress.start() task = progress.add_task("Compiling:", total=base_total) + all_pages = list(app._unevaluated_pages.values()) + + # In-process per-page cache with a Salsa-style dependency graph. Reuse the + # compiled context of pages whose recorded dependency set is byte-unchanged + # (and whose global epoch matches); compile only the rest, then re-merge + # cached pages' contributions into the app-wide aggregates. In-process only + # (contributions hold live objects), so it helps repeat compiles in one + # process; cross-process reuse is handled by the disk cache above. + in_process_cache = cache_on + route_to_component = {p.route: p.component for p in all_pages} + state_index: dict[str, Any] = {} + epoch = "" + hasher: Any = None + hit_routes: list[str] = [] + if in_process_cache: + from reflex.compiler import page_cache + + state_index, _ = page_cache.state_dependency_index() + epoch = page_cache.global_epoch() + hasher = page_cache.make_hasher() + hit_routes = [ + page.route + for page in all_pages + if page_cache.validate_page(page.route, epoch, hasher) is not None + ] + + hit_set = set(hit_routes) compile_ctx = CompileContext( app=app, - pages=list(app._unevaluated_pages.values()), + pages=[p for p in all_pages if p.route not in hit_set], hooks=CompilerHooks( plugins=default_page_plugins(style=app.style, plugins=compiler_plugins) ), @@ -1177,6 +1317,72 @@ def compile_app( render_progress=lambda: progress.advance(task), ) + if in_process_cache: + from reflex.compiler import page_cache + + # Cache freshly-compiled (miss) pages with the dependency set they read. + for route in list(compile_ctx.compiled_pages): + page_ctx_fresh = compile_ctx.compiled_pages[route] + dep_hashes = page_cache.page_dependency_hashes( + page_ctx_fresh, + route_to_component[route], + state_index, + hasher, + ) + page_cache.store_page( + route, + epoch, + dep_hashes, + page_ctx_fresh, + route in compile_ctx.stateful_routes, + ) + # Re-merge cached (hit) pages' contributions into the aggregates. + for route in hit_routes: + cached = page_cache.validate_page(route, epoch, hasher) + if cached is None: # defensive: invalidated concurrently + continue + page_ctx, is_stateful = cached + compile_ctx.compiled_pages[route] = page_ctx + compile_ctx.all_imports = utils.merge_imports( + compile_ctx.all_imports, page_ctx.frontend_imports + ) + compile_ctx.app_wrap_components.update(page_ctx.app_wrap_components) + compile_ctx.auto_memo_components.update(page_ctx.memo_contributions) + if is_stateful: + compile_ctx.stateful_routes[route] = None + # Restore deterministic page order (route order of the app). + compile_ctx.compiled_pages = { + p.route: compile_ctx.compiled_pages[p.route] + for p in all_pages + if p.route in compile_ctx.compiled_pages + } + if hit_routes: + console.info( + "Incremental compile: recompiled " + f"{len(all_pages) - len(hit_routes)} page(s)." + ) + + # Verify mode: prove the cache-assisted output matches a full compile, and + # fall back to the full result on any divergence so a cache bug can never + # ship. Doubles compile time; opt-in for validation. + if hit_routes and environment.REFLEX_COMPILE_CACHE_VERIFY.get(): + from reflex.compiler import page_cache + + full_ctx = _full_compile_context(app, compiler_plugins) + diffs = _diff_compile_contexts(compile_ctx, full_ctx) + if diffs: + console.warn( + "compile cache verify FAILED " + f"({len(diffs)} divergence(s): {diffs[:8]}); using the full " + "compile and clearing the page cache." + ) + page_cache.clear_page_store() + compile_ctx = full_ctx + else: + console.debug( + "compile cache verify: incremental output matches full compile" + ) + for route, page_ctx in compile_ctx.compiled_pages.items(): app._check_routes_conflict(route) if not isinstance(page_ctx.root_component, Component): @@ -1407,4 +1613,9 @@ def add_save_task( for output_path, code in output_mapping.items(): utils.write_file(output_path, code) + if cache_on: + from reflex.compiler import disk_cache + + disk_cache.write_manifest(compile_ctx, all_pages, all_imports) + return True diff --git a/reflex/compiler/disk_cache.py b/reflex/compiler/disk_cache.py new file mode 100644 index 00000000000..4c4cbf3bc9d --- /dev/null +++ b/reflex/compiler/disk_cache.py @@ -0,0 +1,453 @@ +"""Experimental disk-persisted incremental compile cache (flag-gated). + +Enabled by ``REFLEX_COMPILE_CACHE``. When off (default), nothing changes. + +The in-process page cache (``page_cache._PAGE_STORE``) reuses compiled pages +within one process, so it never fires in the ``reflex run`` edit loop — the +reloader respawns a fresh worker subprocess on every ``.py`` change, starting +with an empty in-memory store. This cache persists each page's *serializable* +contribution to disk so the fresh worker can reuse it. + +**What is persisted** (``.web/reflex_compile_cache.json``): per page, the +``{path: hash}`` of its **dependency set** (the exact source files it read — +own module, markdown/data, component modules in its tree, and the state files it +uses; see ``page_cache.page_dependency_hashes``), its rendered ``output_code`` +and path, its ``frontend_imports`` (``ImportVar`` is a frozen dataclass of +primitives), its app-wrap key-set, and whether evaluating it registered new +state. App-wide: the genuinely-global ``epoch`` (reflex version + +config/lockfiles), the reflex version, and the merged imports. The manifest +deliberately stores **no rendered memo files**: a hit page's memo files are +already on disk from the prior compile, and a miss page re-renders its own on +recompile — so writing the manifest is just string/key serialization, never a +second memo render. + +**The fast path** (``try_incremental_rebuild``). On a fresh compile it reuses the +manifest when the global inputs match (reflex version, route set, and the global +epoch). Each page is then a hit iff **every file in its recorded dependency set +is byte-unchanged** — so editing one markdown doc or one shared view recompiles +exactly the pages that depend on it, not all of them. The on-disk app-wide files +(app root, contexts, theme, stylesheet, …) stay valid because the epoch and route +set are unchanged. Then: + +- A *stateless* hit page is skipped entirely (its frontend file is reused and + evaluating it would register nothing). +- A *stateful* hit page is re-evaluated for the backend only (to re-register its + state classes), reusing its frontend file. ``is_stateful`` is true exactly + when the page's first eval grew the state registry, so this is precisely the + set whose state would otherwise go missing. +- A *miss* page (source changed) is fully recompiled and its files rewritten. + +After recompiling misses, two guards must hold or the whole thing falls back to a +full compile (return False): each miss page's app-wrap key-set and stateful flag +must be unchanged (otherwise the reused on-disk app root would be wrong). Any +state edit, shared-file edit, route add/remove, or version change also falls back +to a full compile. ``REFLEX_COMPILE_CACHE_VERIFY`` is the backstop for an app. +""" + +from __future__ import annotations + +import dataclasses +import json +from typing import TYPE_CHECKING, Any + +from reflex_base.plugins import CompileContext, CompilerHooks +from reflex_base.utils.imports import ImportVar, merge_imports + +from reflex.compiler import page_cache +from reflex.compiler.plugins import default_page_plugins +from reflex.utils import console, prerequisites + +if TYPE_CHECKING: + from collections.abc import Callable, Sequence + from pathlib import Path + + from reflex_base.plugins import PageDefinition + from reflex_base.utils.imports import ParsedImportDict + + from reflex.app import App + +#: Bump when the manifest layout changes (old manifests are then ignored). +_SCHEMA = 2 +#: Manifest filename under the web directory. +_MANIFEST_FILE = "reflex_compile_cache.json" + + +def _manifest_path() -> Path: + return prerequisites.get_web_dir() / _MANIFEST_FILE + + +def _serialize_imports(imports: ParsedImportDict) -> dict[str, list[dict[str, Any]]]: + """Serialize a parsed import dict to JSON-able primitives. + + Args: + imports: The parsed import dict to serialize. + + Returns: + A JSON-serializable representation. + """ + return {lib: [dataclasses.asdict(iv) for iv in ivs] for lib, ivs in imports.items()} + + +def _deserialize_imports(data: dict[str, list[dict[str, Any]]]) -> ParsedImportDict: + """Rebuild a parsed import dict from its serialized form. + + Args: + data: The serialized import dict. + + Returns: + The reconstructed parsed import dict. + """ + return {lib: [ImportVar(**iv) for iv in ivs] for lib, ivs in data.items()} + + +def _wrap_key_strs(keys: Any) -> list[str]: + """Render app-wrap ``(priority, name)`` keys as sorted stable strings. + + Args: + keys: An iterable of ``(priority, name)`` app-wrap keys. + + Returns: + A sorted list of ``"priority:name"`` strings. + """ + return sorted(f"{p}:{n}" for p, n in keys) + + +def load_manifest() -> dict[str, Any] | None: + """Load the persisted compile manifest, or None if absent/unusable. + + Returns: + The parsed manifest dict, or None. + """ + path = _manifest_path() + if not path.exists(): + return None + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, ValueError): + return None + if not isinstance(data, dict) or data.get("schema") != _SCHEMA: + return None + return data + + +def write_manifest( + compile_ctx: CompileContext, + pages: Sequence[PageDefinition], + install_imports: ParsedImportDict, + root: Path | None = None, +) -> None: + """Persist a manifest of the just-completed full compile. + + Best-effort: any failure leaves no manifest (the next compile is full), it + never breaks the build. + + Args: + compile_ctx: The completed compile context (all pages compiled). + pages: The full list of page definitions that were compiled. + install_imports: The **complete** frontend import set the full compile + installed — page imports merged with the app-root (app-wrap, e.g. + the Toaster/``sonner`` provider) and memo-component imports. An + incremental rebuild reuses the on-disk app-wide files, so it must + install from this complete set, not just the per-page union. + root: Project root for fingerprinting. Defaults to cwd. + """ + try: + state_index, _ = page_cache.state_dependency_index(root) + hasher = page_cache.make_hasher() + epoch = page_cache.global_epoch(root) + + pages_data: dict[str, Any] = {} + for page in pages: + page_ctx = compile_ctx.compiled_pages.get(page.route) + if ( + page_ctx is None + or page_ctx.output_code is None + or page_ctx.output_path is None + ): + return # incomplete compile -> do not write a partial manifest + pages_data[page.route] = { + "dep_hashes": page_cache.page_dependency_hashes( + page_ctx, page.component, state_index, hasher, root + ), + "output_path": page_ctx.output_path, + "output_code": page_ctx.output_code, + "frontend_imports": _serialize_imports(page_ctx.frontend_imports), + "app_wrap_keys": _wrap_key_strs(page_ctx.app_wrap_components.keys()), + "is_stateful": page.route in compile_ctx.stateful_routes, + } + + manifest = { + "schema": _SCHEMA, + "reflex_version": page_cache._reflex_version(), + "epoch": epoch, + "all_imports": _serialize_imports(install_imports), + "pages": pages_data, + } + path = _manifest_path() + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(manifest), encoding="utf-8") + except Exception as exc: # best-effort: never break the build + console.debug(f"disk compile cache: manifest write skipped ({exc!r})") + + +def globals_match( + manifest: dict[str, Any], + *, + routes: set[str], + epoch: str, +) -> bool: + """Whether the manifest's genuinely-global inputs match the current compile. + + The fast rebuild needs the route set unchanged (adding/removing a route + changes the shared nav on every page) and the global epoch unchanged (Reflex + version + config/lockfiles). Everything else is decided per page via its + dependency set, so a shared-component or markdown edit no longer blocks the + fast path — only the pages that actually depend on the changed file miss. + + Args: + manifest: The loaded manifest. + routes: The current set of page routes. + epoch: The current global epoch (see :func:`page_cache.global_epoch`). + + Returns: + True iff the global inputs match. + """ + return ( + manifest.get("reflex_version") == page_cache._reflex_version() + and set(manifest.get("pages", {})) == routes + and manifest.get("epoch") == epoch + ) + + +def partition_pages( + pages: Sequence[PageDefinition], + manifest: dict[str, Any], + hasher: Callable[[str], str | None], +) -> list[PageDefinition]: + """Return the pages whose dependency set changed since the manifest. + + Globals are assumed already matched (see :func:`globals_match`), so a page is + a hit iff every file in its recorded dependency set is byte-unchanged. + + Args: + pages: The current page definitions. + manifest: The loaded manifest. + hasher: A memoized path -> content-hash function. + + Returns: + The list of miss pages (a dependency changed) to recompile. + """ + manifest_pages = manifest["pages"] + return [ + page + for page in pages + if not page_cache.deps_unchanged( + manifest_pages[page.route]["dep_hashes"], hasher + ) + ] + + +def try_incremental_rebuild( + app: App, + *, + compiler_plugins: Any, + prerender_routes: bool, + root: Path | None = None, + use_rich: bool = True, +) -> bool: + """Attempt a disk-cache-assisted partial rebuild; report whether it ran. + + Returns False (so the caller does a full compile) whenever anything is + unsafe to reuse: no/old manifest, a changed global input, a route change, or + a miss page that altered its app-wrap set or stateful flag. + + On success, reports (at info level) how many pages were recompiled vs reused + and, while recompiling, shows a progress bar over the changed pages so a hot + reload makes the incremental work visible. + + Args: + app: The app being compiled. + compiler_plugins: The resolved compiler plugins for this compile. + prerender_routes: Whether to prerender routes. + root: Project root for fingerprinting. Defaults to cwd. + use_rich: Whether to use a rich progress bar (else a plain fallback). + + Returns: + True if the partial rebuild completed (the caller should return), else + False (the caller should run a full compile). + """ + manifest = load_manifest() + if manifest is None: + return False + + pages = list(app._unevaluated_pages.values()) + routes = {p.route for p in pages} + hasher = page_cache.make_hasher() + epoch = page_cache.global_epoch(root) + + if not globals_match(manifest, routes=routes, epoch=epoch): + return False + + miss_pages = partition_pages(pages, manifest, hasher) + miss_routes = {p.route for p in miss_pages} + + # Recompile only the source-changed pages. + miss_ctx = None + if miss_pages: + from reflex_base.components.dynamic import ( + bundle_library, + reset_bundled_libraries, + ) + from reflex_base.components.memo import reset_memo_component_classes + + from reflex.compiler.compiler import make_compile_progress + + # Match the full compile's clean bundling/memo state before compiling. + reset_bundled_libraries() + reset_memo_component_classes() + for plugin in compiler_plugins: + for dependency in plugin.get_frontend_dependencies(): + bundle_library(dependency) + miss_ctx = CompileContext( + app=app, + pages=miss_pages, + hooks=CompilerHooks( + plugins=default_page_plugins(style=app.style, plugins=compiler_plugins) + ), + ) + # Progress over the changed pages (evaluate + render each), so a hot + # reload shows how much is being recompiled. + progress = make_compile_progress(use_rich) + progress.start() + task = progress.add_task( + "Recompiling changed pages:", total=len(miss_pages) * 2 + ) + try: + with miss_ctx: + miss_ctx.compile( + evaluate_progress=lambda: progress.advance(task), + render_progress=lambda: progress.advance(task), + ) + finally: + progress.stop() + # Guard: a miss must not change the app-wrap set or its stateful flag, or + # the reused on-disk app root / state marker would be wrong. + for page in miss_pages: + page_ctx = miss_ctx.compiled_pages.get(page.route) + if ( + page_ctx is None + or page_ctx.output_code is None + or page_ctx.output_path is None + ): + return False + entry = manifest["pages"][page.route] + if ( + _wrap_key_strs(page_ctx.app_wrap_components.keys()) + != entry["app_wrap_keys"] + ): + return False + if (page.route in miss_ctx.stateful_routes) != entry["is_stateful"]: + return False + + from reflex.compiler import compiler + + # Write changed pages + their memo files; reuse everything else on disk. + install_imports = _deserialize_imports(manifest["all_imports"]) + if miss_ctx is not None: + for page in miss_pages: + page_ctx = miss_ctx.compiled_pages[page.route] + # Both are guaranteed non-None by the guard loop above. + output_path = page_ctx.output_path + output_code = page_ctx.output_code + if output_path is None or output_code is None: + return False + compiler.utils.write_file( + compiler.utils.resolve_path_of_web_dir(output_path), + output_code, + ) + memo_defs = list(page_ctx.memo_contributions.values()) + memo_files, memo_imports = compiler.compile_memo_components(memo_defs) + for mpath, mcode in memo_files: + compiler.utils.write_file( + compiler.utils.resolve_path_of_web_dir(mpath), mcode + ) + install_imports = merge_imports( + install_imports, page_ctx.frontend_imports, memo_imports + ) + + # Re-register state for stateful hit pages (skipping eval would drop their + # state classes); stateless hits need nothing. + stateful_routes: dict[str, None] = {} + with console.timing("Evaluate Pages (Backend)"): + for page in pages: + if page.route in miss_routes: + if miss_ctx is not None and page.route in miss_ctx.stateful_routes: + stateful_routes[page.route] = None + continue + if manifest["pages"][page.route]["is_stateful"]: + app._compile_page(page.route, save_page=False) + stateful_routes[page.route] = None + + app._stateful_pages.update(stateful_routes) + app._write_stateful_pages_marker() + app._add_optional_endpoints() + app._validate_var_dependencies() + + # Frontend packages + routing scaffolding (cheap, idempotent). + from reflex.utils import frontend_skeleton + + with console.timing("Install Frontend Packages"): + app._get_frontend_packages(install_imports) + frontend_skeleton.update_react_router_config(prerender_routes=prerender_routes) + frontend_skeleton.update_entry_client() + + # Refresh the manifest for the next process. + _update_manifest_for_misses(manifest, miss_ctx, miss_pages) + + if miss_pages: + changed = ", ".join(sorted(p.route for p in miss_pages)[:8]) + if len(miss_pages) > 8: + changed += ", ..." + console.info( + f"Incremental compile: recompiled {len(miss_pages)} page(s) ({changed})." + ) + else: + console.info("Incremental compile: no page changed.") + return True + + +def _update_manifest_for_misses( + manifest: dict[str, Any], + miss_ctx: CompileContext | None, + miss_pages: Sequence[PageDefinition], +) -> None: + """Update the on-disk manifest entries for the recompiled pages. + + Args: + manifest: The loaded manifest (mutated and rewritten). + miss_ctx: The compile context of the recompiled pages, if any. + miss_pages: The recompiled page definitions. + """ + if miss_ctx is None or not miss_pages: + return + try: + state_index, _ = page_cache.state_dependency_index() + hasher = page_cache.make_hasher() + all_imports = _deserialize_imports(manifest["all_imports"]) + for page in miss_pages: + page_ctx = miss_ctx.compiled_pages[page.route] + manifest["pages"][page.route] = { + "dep_hashes": page_cache.page_dependency_hashes( + page_ctx, page.component, state_index, hasher + ), + "output_path": page_ctx.output_path, + "output_code": page_ctx.output_code, + "frontend_imports": _serialize_imports(page_ctx.frontend_imports), + "app_wrap_keys": _wrap_key_strs(page_ctx.app_wrap_components.keys()), + "is_stateful": page.route in miss_ctx.stateful_routes, + } + all_imports = merge_imports(all_imports, page_ctx.frontend_imports) + manifest["all_imports"] = _serialize_imports(all_imports) + _manifest_path().write_text(json.dumps(manifest), encoding="utf-8") + except Exception as exc: # best-effort + console.debug(f"disk compile cache: manifest refresh skipped ({exc!r})") diff --git a/reflex/compiler/page_cache.py b/reflex/compiler/page_cache.py new file mode 100644 index 00000000000..97116337a7a --- /dev/null +++ b/reflex/compiler/page_cache.py @@ -0,0 +1,726 @@ +"""Experimental incremental compile cache (flag-gated). + +Enabled by ``REFLEX_COMPILE_CACHE``. When off (the default), the compiler +behaves exactly as before. + +**In-process per-page cache, with a Salsa-style dependency graph.** Each page +records the *exact set of source files it actually read*, so a change +invalidates only the pages that depend on it (not all pages). A page's +dependency set is the union of: + +- the **transitive first-party ``.py`` import closure** of its defining module + (``page_py_dependencies`` — captures function-based views and shared helpers + that never appear as nodes in the rendered tree, e.g. a ``def hero()`` view, so + editing one invalidates exactly the pages whose closure imports it), +- the **source files read while evaluating it** (markdown/data — captured by + ``record_reads`` via the per-page read recorder; this is what lets editing one + ``.md`` doc page recompile only that page), +- the **component modules in its rendered tree** (``component_module_files`` — + belt-and-suspenders for components injected at runtime rather than statically + imported), +- the **fine-grained state files** it references (``used_state_files``). + +A page is reused iff every file in its dependency set is byte-unchanged and the +small genuinely-global ``global_epoch`` (Reflex version + ``rxconfig`` + lockfile) +is unchanged; adding/removing a route is handled separately (it changes shared +nav). Per-page dependency sets also track files *outside* the project root (e.g. +the docs site reads markdown from a sibling directory), so an external-source +edit still invalidates exactly the dependent pages. Cached pages' contributions +are re-merged into the app-wide aggregates by ``compile_app``; the store holds +live objects (``PageContext``), so it is in-process only. The cross-process +counterpart that survives a hot-reload worker respawn lives in +``reflex/compiler/disk_cache.py``. ``REFLEX_COMPILE_CACHE_VERIFY`` proves the +cache-assisted output matches a full compile and falls back on any mismatch — +the backstop for the residual gaps the static graph cannot see: runtime +``importlib`` imports and data files read at *module-import* time (outside the +per-page eval window). +""" + +from __future__ import annotations + +import builtins +import contextlib +import hashlib +import re +from collections.abc import Callable +from contextvars import ContextVar +from importlib import metadata +from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from reflex_base.components.component import BaseComponent + from reflex_base.plugins import PageContext + +#: Directories never worth hashing (build artifacts, deps, caches). +_SKIP_DIRS = {".web", ".venv", "venv", "node_modules", "__pycache__", ".git", "assets"} + +#: Genuinely-global files: a change here can affect every page's output, so it +#: bumps ``global_epoch`` rather than any single page's dependency set. +_GLOBAL_FILES = ("rxconfig.py", "reflex.lock", "uv.lock", "package.json") + +#: Matches a JS state-context identifier in compiled page output. +STATE_CONTEXT_RE = re.compile(r"reflex___state[A-Za-z0-9_]+") + + +def _sha(*parts: bytes | str) -> str: + h = hashlib.sha256() + for p in parts: + h.update(p.encode() if isinstance(p, str) else p) + h.update(b"\x1f") + return h.hexdigest() + + +def _reflex_version() -> str: + try: + return metadata.version("reflex") + except Exception: + return "unknown" + + +# --- Per-page read tracking (the Salsa "input read" seam) ------------------- +# A page's markdown/data dependencies are read lazily while the page is +# evaluated (e.g. ``Path(doc).read_text()`` inside the page callable). We record +# those reads per page so the cache depends on the exact files consumed. Patches +# are installed once (idempotent) and only record while a recording set is active +# on the current task (set by ``record_reads``), so the overhead is a contextvar +# read when no cache is running. + +_active_reads: ContextVar[set[str] | None] = ContextVar("_active_reads", default=None) +_patched = False +_recorder_root: Path | None = None + +#: Path parts that mark a dependency/build location whose reads are never a +#: page's own source dependency (a change there flows through the version/epoch). +_EXCLUDE_PARTS = _SKIP_DIRS | {"site-packages", "dist-packages"} + +#: Suffixes of content files a page may read from *outside* the project root +#: (e.g. the docs app reads its markdown from a sibling directory). Reads under +#: the project root are tracked regardless of suffix; reads elsewhere only if +#: they look like content, so stdlib/site reads of other types are ignored. +_CONTENT_SUFFIXES = { + ".md", + ".mdx", + ".py", + ".json", + ".yaml", + ".yml", + ".toml", + ".txt", + ".csv", + ".html", + ".rst", +} + + +def _record_read(path: object) -> None: + target = _active_reads.get() + if target is None: + return + try: + resolved = Path(path).resolve() # type: ignore[arg-type] + except (OSError, TypeError, ValueError): + return + if any(part in _EXCLUDE_PARTS for part in resolved.parts): + return + root = _recorder_root + under_root = root is not None and root in resolved.parents + if not under_root and resolved.suffix.lower() not in _CONTENT_SUFFIXES: + return + target.add(str(resolved)) + + +def enable_read_tracking(root: Path | None = None) -> None: + """Install per-page source-read tracking and register the recorder hook. + + Idempotent. Patches ``Path.read_text``/``read_bytes`` and ``open`` to record + reads (only while a recording set is active) and points the reflex-base + compile loop at :func:`record_reads`. Called when an incremental cache flag + is on; a no-op to the compile otherwise. + + Args: + root: Project root; only reads under it are recorded. Defaults to cwd. + """ + global _patched, _recorder_root + _recorder_root = (root or Path.cwd()).resolve() + + from reflex_base.plugins import compiler as _bc + + _bc.page_source_recorder = record_reads + + if _patched: + return + _patched = True + + orig_read_text = Path.read_text + orig_read_bytes = Path.read_bytes + orig_open = builtins.open + + def read_text(self: Path, *args: object, **kwargs: object): + _record_read(self) + return orig_read_text(self, *args, **kwargs) # type: ignore[arg-type] + + def read_bytes(self: Path): + _record_read(self) + return orig_read_bytes(self) + + def open_(file: object, mode: str = "r", *args: object, **kwargs: object): + if "r" in mode and not ("w" in mode or "a" in mode or "x" in mode): + _record_read(file) + return orig_open(file, mode, *args, **kwargs) # type: ignore[arg-type] + + Path.read_text = read_text # type: ignore[method-assign,assignment] + Path.read_bytes = read_bytes # type: ignore[method-assign,assignment] + builtins.open = open_ # type: ignore[assignment] + + +@contextlib.contextmanager +def record_reads(): + """Record source-file reads on the current task into a fresh set. + + Yields: + The set of resolved source-file path strings read within the block. + """ + reads: set[str] = set() + token = _active_reads.set(reads) + try: + yield reads + finally: + _active_reads.reset(token) + + +def global_epoch(root: Path | None = None) -> str: + """Fingerprint the genuinely-global inputs (Reflex version + config/lockfiles). + + These can affect every page's output but belong to no single page, so they + gate the whole cache rather than any one page's dependency set. Kept small on + purpose — per-file edits flow through per-page dependency sets instead. + + Args: + root: Project root. Defaults to cwd. + + Returns: + A hex digest of the global inputs. + """ + root = (root or Path.cwd()).resolve() + parts: list[str] = [f"reflex={_reflex_version()}"] + for name in _GLOBAL_FILES: + path = root / name + try: + parts.append(f"{name}={hashlib.sha256(path.read_bytes()).hexdigest()}") + except OSError: + parts.append(f"{name}=") + return _sha(*parts) + + +# In-process per-page cache. Each page is keyed by the genuinely-global epoch +# plus the content hashes of its exact dependency set, so editing one file +# misses only the pages that depend on it. Contributions to the app-wide +# aggregates include live Python objects (root_component, memo defs), so the +# store is in-process only. + + +def _module_file(component: object) -> Path | None: + import sys + + mod = sys.modules.get(getattr(component, "__module__", "") or "") + file = getattr(mod, "__file__", None) + return Path(file) if file else None + + +def page_module_files(components: object) -> set[Path]: + """Resolve the set of module files that define the given page components. + + Args: + components: An iterable of page component callables/objects. + + Returns: + The set of resolved module file paths. + """ + files = set() + for comp in components: # type: ignore[attr-defined] + f = _module_file(comp) + if f is not None: + files.add(f.resolve()) + return files + + +def component_module_files( + root_component: object, root: Path | None = None +) -> set[Path]: + """Resolve the first-party module files of every component in a tree. + + Walks the rendered component tree and collects the defining module file of + each component class under ``root``. This is the precise, barrel-immune way + (vs. static imports) to capture which shared views/templates a page renders: + editing one invalidates exactly the pages whose tree contains it. + + Args: + root_component: The page's root component (its rendered tree). + root: Project root; only files under it are returned. Defaults to cwd. + + Returns: + The set of resolved first-party module files the tree depends on. + """ + root = (root or Path.cwd()).resolve() + files: set[Path] = set() + seen: set[int] = set() + stack: list[object] = [root_component] + while stack: + comp = stack.pop() + if id(comp) in seen: + continue + seen.add(id(comp)) + f = _module_file(type(comp)) + if f is not None: + rf = f.resolve() + if root in rf.parents: + files.add(rf) + children = getattr(comp, "children", None) + if children: + stack.extend(children) + return files + + +#: Cache of the first-party import graph, keyed by project root. +_import_graph_cache: dict[Path, dict[str, set[str]]] = {} + + +def _resolve_module_file(name: str) -> str | None: + import sys + + mod = sys.modules.get(name) + file = getattr(mod, "__file__", None) + return str(Path(file).resolve()) if file else None + + +def _import_from_targets(node: object, modname: str) -> list[str]: + """Resolve a ``from ... import ...`` node to candidate module names. + + Handles relative imports via the importing module's package. Returns the + base module and each ``base.name`` (a name may be a submodule or an attribute + — both candidates are resolved against ``sys.modules`` by the caller). + + Args: + node: An ``ast.ImportFrom`` node. + modname: The dotted name of the module containing the import. + + Returns: + Candidate dotted module names to resolve. + """ + import ast + + if not isinstance(node, ast.ImportFrom): + return [] + if node.level: # relative import: walk up from the importing package + base_pkg = modname.rsplit(".", node.level)[0] if "." in modname else "" + base = f"{base_pkg}.{node.module}" if node.module else base_pkg + else: + base = node.module or "" + if not base: + return [] + return [base, *(f"{base}.{a.name}" for a in node.names)] + + +def build_import_graph(root: Path | None = None) -> dict[str, set[str]]: + """Build the first-party import graph (file -> files it imports). + + Parses every already-imported first-party module's source for ``import`` and + ``from`` statements and resolves them to files under ``root`` via + ``sys.modules``. Cached per root for the duration of the process. This is the + sound basis for per-page ``.py`` dependencies: a function (e.g. a view like + ``hero()``) can only affect a page if its module is transitively imported by + the page's module, so it appears in the page's import closure even though it + is never a node in the rendered tree. + + Args: + root: Project root. Defaults to cwd. + + Returns: + A mapping of resolved file path -> the set of first-party files it imports. + """ + import ast + import sys + + root = (root or Path.cwd()).resolve() + cached = _import_graph_cache.get(root) + if cached is not None: + return cached + + file_to_mod: dict[str, str] = {} + for name, mod in list(sys.modules.items()): + file = getattr(mod, "__file__", None) + if not file: + continue + rf = Path(file).resolve() + if root in rf.parents: + file_to_mod[str(rf)] = name + + graph: dict[str, set[str]] = {} + for file, modname in file_to_mod.items(): + deps: set[str] = set() + try: + tree = ast.parse(Path(file).read_bytes()) + except (OSError, SyntaxError, ValueError): + graph[file] = deps + continue + for node in ast.walk(tree): + names: list[str] = [] + if isinstance(node, ast.Import): + names = [a.name for a in node.names] + elif isinstance(node, ast.ImportFrom): + names = _import_from_targets(node, modname) + for n in names: + target = _resolve_module_file(n) + if target is not None and target in file_to_mod: + deps.add(target) + graph[file] = deps + _import_graph_cache[root] = graph + return graph + + +def clear_import_graph() -> None: + """Drop the cached import graph (e.g. after modules are reloaded).""" + _import_graph_cache.clear() + + +def page_py_dependencies( + component: BaseComponent | object, root: Path | None = None +) -> set[str]: + """Return the transitive first-party ``.py`` files a page's code depends on. + + Starts from the page callable's *real* defining file (``__code__`` filename, + which is correct even when ``__module__`` was reassigned, as the docs app does + for generated doc pages) plus its module file, and walks the import graph. + Captures function-based views and shared helpers that the rendered-tree walk + cannot see. + + Args: + component: The page component or callable. + root: Project root. Defaults to cwd. + + Returns: + The set of resolved first-party ``.py`` dependency file paths. + """ + root = (root or Path.cwd()).resolve() + graph = build_import_graph(root) + + start: set[str] = set() + code = getattr(component, "__code__", None) + filename = getattr(code, "co_filename", None) + if filename: + rf = Path(filename).resolve() + if root in rf.parents: + start.add(str(rf)) + own = _module_file(component) + if own is not None: + rf = own.resolve() + if root in rf.parents: + start.add(str(rf)) + + seen: set[str] = set() + stack = list(start) + while stack: + cur = stack.pop() + if cur in seen: + continue + seen.add(cur) + stack.extend(graph.get(cur, ())) + return seen + + +def make_hasher() -> Callable[[str], str | None]: + """Return a content-hasher that memoizes each path within one compile. + + Shared component files appear in many pages' dependency sets; hashing each + file at most once keeps per-page validation cheap. + + Returns: + A function mapping a path string to its content hash (None if unreadable). + """ + cache: dict[str, str | None] = {} + + def hasher(path: str) -> str | None: + if path not in cache: + try: + cache[path] = hashlib.sha256(Path(path).read_bytes()).hexdigest() + except OSError: + cache[path] = None + return cache[path] + + return hasher + + +def _subclasses(root_cls: type) -> list[type]: + seen: set[type] = set() + out: list[type] = [] + stack = [root_cls] + while stack: + cls = stack.pop() + if cls in seen: + continue + seen.add(cls) + out.append(cls) + stack.extend(cls.__subclasses__()) + return out + + +def state_dependency_index( + root: Path | None = None, +) -> tuple[dict[str, Path], set[Path]]: + """Build the state-context -> file index for fine-grained invalidation. + + A page references the JS state-context identifier + ``format_state_name(state.get_full_name())`` in its output; mapping that to + the state's module file lets a state edit invalidate only its dependents. + Only *pure* state modules under ``root`` (no Component defined in them) are + fine-grained; mixed state/component modules stay coarse (in the shared + fingerprint) so a component edit there is never missed. + + Args: + root: Project root. Defaults to cwd. + + Returns: + ``(identifier_to_file, fine_state_files)``. + """ + root = (root or Path.cwd()).resolve() + from reflex_base.components.component import Component + from reflex_base.utils import format as fmt + + from reflex.state import BaseState + + def under_root(comp: object) -> Path | None: + f = _module_file(comp) + if f is None: + return None + rf = f.resolve() + return rf if root in rf.parents else None + + component_files = {rf for cls in _subclasses(Component) if (rf := under_root(cls))} + id_to_file: dict[str, Path] = {} + state_files: set[Path] = set() + for cls in _subclasses(BaseState): + rf = under_root(cls) + if rf is None: + continue + with contextlib.suppress(Exception): + id_to_file[fmt.format_state_name(cls.get_full_name())] = rf + state_files.add(rf) + fine = state_files - component_files + return {i: f for i, f in id_to_file.items() if f in fine}, fine + + +def file_hashes(files: set[Path]) -> dict[str, str]: + """Map each file (as a string) to a hash of its current content. + + Args: + files: The files to hash. + + Returns: + A mapping of file path string to content hash. + """ + out: dict[str, str] = {} + for f in files: + with contextlib.suppress(OSError): + out[str(f)] = hashlib.sha256(f.read_bytes()).hexdigest() + return out + + +def used_state_files( + output_code: str, + memo_components: object, + id_to_file: dict[str, Path], +) -> set[Path]: + """Return the fine-grained state files a compiled page depends on. + + Stateful subtrees are auto-memoized into separate components, so a page's + own ``output_code`` may not name the state it uses — the state lives in the + memo components it *owns* (its ``memo_contributions``). Each stateful memo + is owned by exactly one page, which regenerates it whenever it recompiles, + so scanning ``output_code`` plus the page's own memo components captures the + full dependency set. If a memo can't be introspected, depend on every fine + state file (conservative — never stale). + + Args: + output_code: The page's compiled JS. + memo_components: The page's own memo component subtrees (renderable). + id_to_file: The state-context identifier -> file index. + + Returns: + The set of fine-grained state files this page depends on. + """ + chunks = [output_code or ""] + try: + chunks.extend(str(comp.render()) for comp in memo_components) # type: ignore[attr-defined] + except Exception: + return set(id_to_file.values()) # conservative: depend on all + found: set[Path] = set() + for chunk in chunks: + for ident in STATE_CONTEXT_RE.findall(chunk): + if ident in id_to_file: + found.add(id_to_file[ident]) + return found + + +def page_dependency_files( + page_ctx: PageContext, + component: BaseComponent | object, + state_index: dict[str, Path], + root: Path | None = None, +) -> set[str]: + """Return the full set of source files a compiled page depends on. + + The union of: the page's own defining module, the files read while it was + evaluated (markdown/data, via the read recorder), the component modules in + its rendered tree, and the fine-grained state files it references. A change + to any file *outside* this set cannot change the page's output, except the + genuinely-global inputs tracked by :func:`global_epoch` and route additions + (shared nav). This is the dependency set that makes invalidation precise. + + Args: + page_ctx: The compiled page context (tree, output, recorded reads). + component: The page's component/callable (for its defining module). + state_index: The state-context identifier -> file index. + root: Project root; only files under it are included. Defaults to cwd. + + Returns: + The set of resolved dependency file path strings. + """ + root = (root or Path.cwd()).resolve() + files: set[Path] = set() + files |= component_module_files(page_ctx.root_component, root) + files |= used_state_files( + page_ctx.output_code or "", + [m.component for m in page_ctx.memo_contributions.values()], + state_index, + ) + deps = {str(f) for f in files} + # Transitive first-party .py imports (captures function-based views/helpers + # that never appear as nodes in the rendered tree). + deps |= page_py_dependencies(component, root) + # Files read while evaluating the page (markdown/data). + deps |= set(page_ctx.source_files) + return deps + + +def page_dependency_hashes( + page_ctx: PageContext, + component: BaseComponent | object, + state_index: dict[str, Path], + hasher: Callable[[str], str | None], + root: Path | None = None, +) -> dict[str, str]: + """Hash a page's dependency set into a ``{path: hash}`` map. + + Args: + page_ctx: The compiled page context. + component: The page's component/callable. + state_index: The state-context identifier -> file index. + hasher: A memoized path -> content-hash function (see :func:`make_hasher`). + root: Project root. Defaults to cwd. + + Returns: + ``{path: content_hash}`` for every readable dependency file. + """ + out: dict[str, str] = {} + for path in page_dependency_files(page_ctx, component, state_index, root): + digest = hasher(path) + if digest is not None: + out[path] = digest + return out + + +def deps_unchanged( + dep_hashes: dict[str, str], hasher: Callable[[str], str | None] +) -> bool: + """Whether every file in a stored dependency set still has the same content. + + Args: + dep_hashes: A page's stored ``{path: hash}`` dependency map. + hasher: A memoized path -> content-hash function. + + Returns: + True iff every dependency file is byte-unchanged. + """ + return all(hasher(path) == digest for path, digest in dep_hashes.items()) + + +def page_source_fingerprint(component: BaseComponent | object) -> str: + """Fingerprint a page from its own module source (no build required). + + Args: + component: The page component or factory callable. + + Returns: + A hex digest of the page's defining module source. + """ + import inspect + import sys + + module_name = getattr(component, "__module__", None) + parts = [repr(getattr(component, "__qualname__", repr(component)))] + mod = sys.modules.get(module_name) if module_name else None + file = getattr(mod, "__file__", None) + if file and Path(file).exists(): + with contextlib.suppress(OSError): + parts.append(hashlib.sha256(Path(file).read_bytes()).hexdigest()) + elif callable(component): + with contextlib.suppress(OSError, TypeError): + parts.append(inspect.getsource(component)) + return _sha(*parts) + + +#: route -> (global_epoch, dep_hashes, PageContext, is_stateful) +_PAGE_STORE: dict[str, tuple[str, dict[str, str], PageContext, bool]] = {} + + +def validate_page( + route: str, + epoch: str, + hasher: Callable[[str], str | None], +) -> tuple[PageContext, bool] | None: + """Return the cached page iff its dependency set and the global epoch match. + + A page is valid when the genuinely-global epoch is unchanged and every file + in its recorded dependency set is byte-unchanged. Editing a file invalidates + exactly the pages whose dependency set contains it. + + Args: + route: The page route. + epoch: The current global epoch (see :func:`global_epoch`). + hasher: A memoized path -> content-hash function (see :func:`make_hasher`). + + Returns: + ``(PageContext, is_stateful)`` on a valid hit, else None. + """ + entry = _PAGE_STORE.get(route) + if entry is None: + return None + stored_epoch, dep_hashes, page_ctx, is_stateful = entry + if stored_epoch != epoch or not deps_unchanged(dep_hashes, hasher): + return None + return page_ctx, is_stateful + + +def store_page( + route: str, + epoch: str, + dep_hashes: dict[str, str], + page_ctx: PageContext, + is_stateful: bool, +) -> None: + """Record a freshly-compiled page with the dependency set it relies on. + + Args: + route: The page route. + epoch: The global epoch it compiled under. + dep_hashes: ``{path: hash}`` for the page's dependency set. + page_ctx: The compiled PageContext to cache. + is_stateful: Whether the page registered new state during evaluation. + """ + _PAGE_STORE[route] = (epoch, dep_hashes, page_ctx, is_stateful) + + +def clear_page_store() -> None: + """Drop all in-process per-page cache entries.""" + _PAGE_STORE.clear() diff --git a/reflex/compiler/plugins/builtin.py b/reflex/compiler/plugins/builtin.py index 4081b4b9d79..e7c18a97301 100644 --- a/reflex/compiler/plugins/builtin.py +++ b/reflex/compiler/plugins/builtin.py @@ -195,7 +195,7 @@ def eval_page( if (description := getattr(page, "description", None)) is not None: meta_args["description"] = description - utils.add_meta(component, **meta_args) + component = utils.add_meta(component, **meta_args) except Exception as err: if hasattr(err, "add_note"): err.add_note(f"Happened while evaluating page {page.route!r}") diff --git a/reflex/compiler/utils.py b/reflex/compiler/utils.py index fb537e4d176..6a58d5a917f 100644 --- a/reflex/compiler/utils.py +++ b/reflex/compiler/utils.py @@ -801,10 +801,14 @@ def add_meta( children.append(Description.create(content=description)) children.append(Image.create(content=image)) - page.children.extend(children) - page.children.extend(meta_tags) - - return page + # Own-before-mutate: the page root may be a shared instance (the construction + # cache reuses identical static subtrees across pages), so build a fresh copy + # with the metadata appended instead of mutating ``children`` in place — + # otherwise repeated reuse accumulates duplicate /<meta> tags. + new_page = copy.copy(page) + new_page.children = [*page.children, *children, *meta_tags] + new_page._clear_compile_caches() + return new_page def resolve_path_of_web_dir(path: str | Path) -> Path: diff --git a/tests/units/compiler/test_disk_cache.py b/tests/units/compiler/test_disk_cache.py new file mode 100644 index 00000000000..62b3330a38d --- /dev/null +++ b/tests/units/compiler/test_disk_cache.py @@ -0,0 +1,287 @@ +"""Tests for the experimental disk-persisted incremental compile cache.""" + +import dataclasses +from collections.abc import Callable +from typing import Any + +from reflex_base.components.component import Component +from reflex_base.plugins import CompileContext, CompilerHooks +from reflex_base.utils.imports import ImportVar + +import reflex as rx +from reflex.compiler import disk_cache, page_cache +from reflex.compiler.plugins import default_page_plugins + + +@dataclasses.dataclass(slots=True) +class _FakePage: + route: str + component: Callable[[], Component] + title: Any = None + description: Any = None + image: str = "" + meta: tuple[dict[str, Any], ...] = () + _source_module: str | None = None + + +def _footer() -> Component: + return rx.el.footer(rx.el.span("© Reflex"), class_name="footer") + + +def _page_a() -> Component: + return rx.el.div(rx.el.h1("Page A"), _footer()) + + +def _page_b() -> Component: + return rx.el.div(rx.el.h1("Page B"), _footer()) + + +def _page_b_edited() -> Component: + return rx.el.div(rx.el.h1("Page B (edited)"), rx.el.p("new body"), _footer()) + + +def _page_c() -> Component: + return rx.el.div(rx.el.h1("Page C"), _footer()) + + +def _compile(pages: list[_FakePage]) -> CompileContext: + ctx = CompileContext( + pages=pages, + hooks=CompilerHooks(plugins=default_page_plugins()), + ) + with ctx: + ctx.compile() + return ctx + + +def test_imports_round_trip(): + imports = { + "react": [ImportVar("useEffect"), ImportVar("Fragment", is_default=False)], + "@emotion/react": [ImportVar("jsx", alias="j", install=False)], + } + restored = disk_cache._deserialize_imports(disk_cache._serialize_imports(imports)) + assert restored == imports + + +def test_wrap_key_strs_is_sorted_and_stable(): + keys = [(200, "StrictMode"), (0, "AppWrap"), (45, "ColorMode")] + assert disk_cache._wrap_key_strs(keys) == [ + "0:AppWrap", + "200:StrictMode", + "45:ColorMode", + ] + + +def _manifest(pages: dict[str, dict], **overrides) -> dict: + base = { + "schema": disk_cache._SCHEMA, + "reflex_version": page_cache._reflex_version(), + "epoch": "EPOCH", + "all_imports": {}, + "pages": pages, + } + base.update(overrides) + return base + + +def test_globals_match(): + m = _manifest({"/a": {}, "/b": {}}) + routes = {"/a", "/b"} + assert disk_cache.globals_match(m, routes=routes, epoch="EPOCH") + # a changed route set -> no match + assert not disk_cache.globals_match(m, routes={"/a"}, epoch="EPOCH") + # a changed global epoch -> no match + assert not disk_cache.globals_match(m, routes=routes, epoch="OTHER") + # a stale reflex version -> no match + assert not disk_cache.globals_match( + {**m, "reflex_version": "0.0.0-old"}, routes=routes, epoch="EPOCH" + ) + + +def test_partition_pages_detects_changed_source(): + pages = [ + _FakePage(route="/a", component=_page_a), + _FakePage(route="/b", component=_page_b), + ] + # /a depends on x.py, /b depends on y.py (each at a recorded content hash). + m = _manifest({ + "/a": {"dep_hashes": {"/proj/x.py": "h-x"}}, + "/b": {"dep_hashes": {"/proj/y.py": "h-y"}}, + }) + # The hasher reports /a's dep unchanged and /b's dep changed. + current = {"/proj/x.py": "h-x", "/proj/y.py": "h-y-new"} + miss = disk_cache.partition_pages(pages, m, lambda p: current.get(p)) + assert {p.route for p in miss} == {"/b"} + + +def test_write_and_load_manifest(tmp_path, monkeypatch): + web = tmp_path / ".web" + web.mkdir() + monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + + pages = [ + _FakePage(route="/a", component=_page_a), + _FakePage(route="/b", component=_page_b), + _FakePage(route="/c", component=_page_c), + ] + ctx = _compile(pages) + disk_cache.write_manifest(ctx, pages, ctx.all_imports, root=tmp_path) + + manifest = disk_cache.load_manifest() + assert manifest is not None + assert manifest["schema"] == disk_cache._SCHEMA + assert set(manifest["pages"]) == {"/a", "/b", "/c"} + # the cached output is exactly what the compile produced for each page + for route in ("/a", "/b", "/c"): + assert ( + manifest["pages"][route]["output_code"] + == ctx.compiled_pages[route].output_code + ) + # these static pages register no new state + assert manifest["pages"][route]["is_stateful"] is False + # imports round-trip cleanly + restored = disk_cache._deserialize_imports( + manifest["pages"]["/a"]["frontend_imports"] + ) + assert restored == ctx.compiled_pages["/a"].frontend_imports + + +def test_unchanged_pages_compile_identically(tmp_path, monkeypatch): + """The reuse correctness property: an unchanged page recompiles byte-for-byte. + + The disk cache reuses a hit page's stored ``output_code`` verbatim, so it is + correct iff a fresh compile of that page yields identical output. Compile A, + B, C; then compile A, B(edited), C; A and C must be byte-identical, and the + manifest's cached A/C output must equal the fresh recompile. + """ + web = tmp_path / ".web" + web.mkdir() + monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + + pages = [ + _FakePage(route="/a", component=_page_a), + _FakePage(route="/b", component=_page_b), + _FakePage(route="/c", component=_page_c), + ] + ctx1 = _compile(pages) + disk_cache.write_manifest(ctx1, pages, ctx1.all_imports, root=tmp_path) + manifest = disk_cache.load_manifest() + assert manifest is not None + + # "Edit" page B; recompile the whole app cleanly. + pages_edited = [ + _FakePage(route="/a", component=_page_a), + _FakePage(route="/b", component=_page_b_edited), + _FakePage(route="/c", component=_page_c), + ] + ctx2 = _compile(pages_edited) + + # Unchanged pages are byte-identical across compiles -> safe to reuse. + assert ( + ctx2.compiled_pages["/a"].output_code == ctx1.compiled_pages["/a"].output_code + ) + assert ( + ctx2.compiled_pages["/c"].output_code == ctx1.compiled_pages["/c"].output_code + ) + # B changed. + assert ( + ctx2.compiled_pages["/b"].output_code != ctx1.compiled_pages["/b"].output_code + ) + # The cached output we'd reuse for A/C equals a clean recompile of them. + assert ( + manifest["pages"]["/a"]["output_code"] == ctx2.compiled_pages["/a"].output_code + ) + assert ( + manifest["pages"]["/c"]["output_code"] == ctx2.compiled_pages["/c"].output_code + ) + + +def _stub_externals(app, monkeypatch): + """Stub the side-effecting steps the fast path runs on a real app.""" + import reflex.utils.frontend_skeleton as fs + + monkeypatch.setattr(app, "_get_frontend_packages", lambda *a, **k: None) + monkeypatch.setattr(app, "_add_optional_endpoints", lambda *a, **k: None) + monkeypatch.setattr(app, "_validate_var_dependencies", lambda *a, **k: None) + monkeypatch.setattr(app, "_write_stateful_pages_marker", lambda *a, **k: None) + monkeypatch.setattr(fs, "update_react_router_config", lambda **k: None) + monkeypatch.setattr(fs, "update_entry_client", lambda *a, **k: None) + + +def test_incremental_rebuild_all_hits(tmp_path, monkeypatch): + web = tmp_path / ".web" + web.mkdir() + monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + + app = rx.App() + app.add_page(_page_a, route="/a") + app.add_page(_page_c, route="/c") + pages = list(app._unevaluated_pages.values()) + ctx = _compile(pages) + disk_cache.write_manifest(ctx, pages, ctx.all_imports, root=tmp_path) + _stub_externals(app, monkeypatch) + + # Nothing changed -> every page is a hit -> fast path runs. + assert ( + disk_cache.try_incremental_rebuild( + app, compiler_plugins=[], prerender_routes=False, root=tmp_path + ) + is True + ) + + +def test_incremental_rebuild_one_miss_writes_only_that_page(tmp_path, monkeypatch): + import json + + from reflex.compiler import utils as compiler_utils + + web = tmp_path / ".web" + web.mkdir() + monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + + app = rx.App() + app.add_page(_page_a, route="/a") + app.add_page(_page_c, route="/c") + pages = list(app._unevaluated_pages.values()) + ctx = _compile(pages) + disk_cache.write_manifest(ctx, pages, ctx.all_imports, root=tmp_path) + _stub_externals(app, monkeypatch) + + # Simulate an edit to the first page: rewrite its manifest dependency set to + # reference a file whose recorded hash no longer matches the current content, + # so partitioning sees its dependency set as changed -> a miss (only it). + edited_route = pages[0].route + manifest_path = web / disk_cache._MANIFEST_FILE + manifest = json.loads(manifest_path.read_text()) + manifest["pages"][edited_route]["dep_hashes"] = { + str(tmp_path / "view.py"): "stale-hash" + } + manifest_path.write_text(json.dumps(manifest)) + + assert ( + disk_cache.try_incremental_rebuild( + app, compiler_plugins=[], prerender_routes=False, root=tmp_path + ) + is True + ) + + # The edited page was recompiled and written; its content matches a clean + # compile of that page. + out_path = compiler_utils.resolve_path_of_web_dir( + ctx.compiled_pages[edited_route].output_path + ) + assert out_path.exists() + assert ( + out_path.read_text(encoding="utf-8") + == ctx.compiled_pages[edited_route].output_code + ) + + +def test_load_manifest_rejects_wrong_schema(tmp_path, monkeypatch): + import json + + web = tmp_path / ".web" + web.mkdir() + monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + (web / disk_cache._MANIFEST_FILE).write_text(json.dumps({"schema": 999})) + assert disk_cache.load_manifest() is None diff --git a/tests/units/compiler/test_page_cache.py b/tests/units/compiler/test_page_cache.py new file mode 100644 index 00000000000..0750224390c --- /dev/null +++ b/tests/units/compiler/test_page_cache.py @@ -0,0 +1,115 @@ +"""Tests for the experimental incremental compile cache (in-process page cache).""" + +from reflex.compiler import page_cache + + +def test_global_epoch_tracks_global_files(tmp_path): + (tmp_path / "rxconfig.py").write_text("config = 1\n") + (tmp_path / "other.py").write_text("x = 1\n") + epoch = page_cache.global_epoch(root=tmp_path) + # editing a non-global file does NOT change the epoch + (tmp_path / "other.py").write_text("x = 2\n") + assert page_cache.global_epoch(root=tmp_path) == epoch + # editing a genuinely-global file (rxconfig.py) DOES + (tmp_path / "rxconfig.py").write_text("config = 2\n") + assert page_cache.global_epoch(root=tmp_path) != epoch + + +def _dummy_page(): # a page-like callable defined in this module + return None + + +def test_page_module_files_resolves(tmp_path): + files = page_cache.page_module_files([_dummy_page]) + assert any(p.name == "test_page_cache.py" for p in files) + + +def test_used_state_files_from_output_and_memos(tmp_path): + from types import SimpleNamespace + + sfile = (tmp_path / "state.py").resolve() + mfile = (tmp_path / "mstate.py").resolve() + id_to_file = { + "reflex___state____state____app_____s": sfile, + "reflex___state____state____app_____m": mfile, + } + out = 'jsx("div",{},reflex___state____state____app_____s.x_rx_state_)' + assert page_cache.used_state_files(out, [], id_to_file) == {sfile} + assert page_cache.used_state_files("no state", [], id_to_file) == set() + # state hidden inside an auto-memoized component is still captured + memo = SimpleNamespace( + render=lambda: {"contents": "reflex___state____state____app_____m.y_rx_state_"} + ) + assert page_cache.used_state_files(out, [memo], id_to_file) == {sfile, mfile} + # un-introspectable memo -> conservative (all fine files) + boom = SimpleNamespace(render=lambda: (_ for _ in ()).throw(RuntimeError())) + assert page_cache.used_state_files(out, [boom], id_to_file) == {sfile, mfile} + + +def test_validate_page_fine_grained_deps(): + page_cache.clear_page_store() + ctx = object() + dep = "/proj/state.py" + # page depends on file `dep` at content hash H1, under global epoch "e1" + page_cache.store_page("/x", "e1", {dep: "H1"}, ctx, True) + # all deps unchanged + epoch matches -> hit + assert page_cache.validate_page("/x", "e1", lambda p: {dep: "H1"}.get(p)) == ( + ctx, + True, + ) + # a dependency file changed -> miss + assert page_cache.validate_page("/x", "e1", lambda p: {dep: "H2"}.get(p)) is None + # the genuinely-global epoch changed -> miss + assert page_cache.validate_page("/x", "e2", lambda p: {dep: "H1"}.get(p)) is None + + +def test_validate_page_with_no_deps_only_tracks_epoch(): + page_cache.clear_page_store() + ctx = object() + # page depends on NO files + page_cache.store_page("/x", "e1", {}, ctx, False) + # some unrelated file changed -> page is still a hit (it depends on nothing) + assert page_cache.validate_page("/x", "e1", lambda p: "Z") == (ctx, False) + # the global epoch changed -> miss + assert page_cache.validate_page("/x", "e2", lambda p: "Z") is None + # no stored entry -> miss + page_cache.clear_page_store() + assert page_cache.validate_page("/x", "e1", lambda p: None) is None + + +def _fake_ctx(pages, imports=None, memo=None, stateful=None, wraps=None): + from types import SimpleNamespace + + return SimpleNamespace( + compiled_pages={r: SimpleNamespace(output_code=c) for r, c in pages.items()}, + all_imports=imports or {}, + auto_memo_components=memo or {}, + stateful_routes=stateful or {}, + app_wrap_components=wraps or {}, + ) + + +def test_verify_diff_identical(): + from reflex.compiler import compiler + + a = _fake_ctx({"/": "CODE", "/x": "Y"}) + b = _fake_ctx({"/": "CODE", "/x": "Y"}) + assert compiler._diff_compile_contexts(a, b) == [] + + +def test_verify_diff_detects_page_change(): + from reflex.compiler import compiler + + a = _fake_ctx({"/": "OLD"}) + b = _fake_ctx({"/": "NEW"}) + assert "page:/" in compiler._diff_compile_contexts(a, b) + + +def test_verify_diff_detects_missing_route_and_memo(): + from reflex.compiler import compiler + + a = _fake_ctx({"/": "C"}, memo={("Memo", None): 1}) + b = _fake_ctx({"/": "C", "/x": "C"}, memo={}) + diffs = compiler._diff_compile_contexts(a, b) + assert any(d.startswith("routes:") for d in diffs) + assert "auto_memo_components" in diffs diff --git a/tests/units/vars/test_base.py b/tests/units/vars/test_base.py index a8075357fa7..72d34cc5ae2 100644 --- a/tests/units/vars/test_base.py +++ b/tests/units/vars/test_base.py @@ -53,3 +53,25 @@ def cv(self) -> int: replaced = cv._replace(_var_type=float) assert replaced._var_type is float + + +def test_reset_unique_variable_names_is_deterministic(): + """Resetting the unique-name generator reproduces the same name sequence. + + Auto-memo content hashes embed these names, so a second in-process compile + must regenerate identical ones (the RNG state and used-name set otherwise + persist process-wide and drift). + """ + from reflex_base.vars.base import ( + get_unique_variable_name, + reset_unique_variable_names, + ) + + reset_unique_variable_names() + first = [get_unique_variable_name() for _ in range(8)] + # Without a reset, the sequence keeps advancing (and dedups against the + # already-used set), so it differs. + assert [get_unique_variable_name() for _ in range(8)] != first + # After a reset it reproduces the original sequence exactly. + reset_unique_variable_names() + assert [get_unique_variable_name() for _ in range(8)] == first From 8360450cb01990745d0afe9e2d8bd60e109b5ea8 Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Sun, 28 Jun 2026 21:59:43 +0500 Subject: [PATCH 02/18] feat(compiler): warm fork-per-compile hot-reload daemon Add a persistent compile daemon (REFLEX_COMPILE_CACHE) that imports the world once and forks a throwaway child per source change, instead of the reloader respawning a worker that cold-imports on every edit. The child re-imports first-party code fresh and runs the incremental rebuild, so correctness matches a respawn while the cold import is paid once. Supporting changes that make the daemon safe and complete: - write_file now writes atomically (temp + os.replace) so a reader (vite, a concurrent compile) never sees a half-written file, even when a forked child is killed mid-compile. - _run_dev launches the daemon and sets REFLEX_SKIP_COMPILE on the backend so it only evaluates pages to register state. - The daemon watches what the compiler reads (incl. sibling-dir markdown from the manifest); uvicorn reload_includes also covers *.md/*.mdx, so markdown edits finally trigger a reload. - Drop the per-rebuild console.info now that progress is shown inline. --- reflex/compiler/disk_cache.py | 9 - reflex/compiler/utils.py | 12 +- reflex/reflex.py | 16 + reflex/utils/compile_daemon.py | 560 ++++++++++++++++++++ reflex/utils/exec.py | 3 + tests/units/compiler/test_compiler_utils.py | 30 +- tests/units/utils/test_compile_daemon.py | 225 ++++++++ 7 files changed, 844 insertions(+), 11 deletions(-) create mode 100644 reflex/utils/compile_daemon.py create mode 100644 tests/units/utils/test_compile_daemon.py diff --git a/reflex/compiler/disk_cache.py b/reflex/compiler/disk_cache.py index 4c4cbf3bc9d..7d6228fe11c 100644 --- a/reflex/compiler/disk_cache.py +++ b/reflex/compiler/disk_cache.py @@ -404,15 +404,6 @@ def try_incremental_rebuild( # Refresh the manifest for the next process. _update_manifest_for_misses(manifest, miss_ctx, miss_pages) - if miss_pages: - changed = ", ".join(sorted(p.route for p in miss_pages)[:8]) - if len(miss_pages) > 8: - changed += ", ..." - console.info( - f"Incremental compile: recompiled {len(miss_pages)} page(s) ({changed})." - ) - else: - console.info("Incremental compile: no page changed.") return True diff --git a/reflex/compiler/utils.py b/reflex/compiler/utils.py index 6a58d5a917f..9c38dcb1149 100644 --- a/reflex/compiler/utils.py +++ b/reflex/compiler/utils.py @@ -838,7 +838,17 @@ def write_file(path: str | Path, code: str): path.parent.mkdir(parents=True, exist_ok=True) if path.exists() and path.read_text(encoding="utf-8") == code: return - path.write_text(code, encoding="utf-8") + # Write atomically (temp file + os.replace) so a reader watching this tree — + # e.g. the vite dev server, or a concurrent compile — never observes a + # half-written file, even if the writing process is killed mid-write (the + # compile daemon forks throwaway children that may be terminated mid-compile). + tmp = path.with_name(f"{path.name}.{os.getpid()}.tmp") + try: + tmp.write_text(code, encoding="utf-8") + tmp.replace(path) + finally: + if tmp.exists(): + tmp.unlink(missing_ok=True) _MEMO_MANIFEST_FILENAME = ".memo-manifest.json" diff --git a/reflex/reflex.py b/reflex/reflex.py index 3c340fccea2..61cc3c5415e 100644 --- a/reflex/reflex.py +++ b/reflex/reflex.py @@ -207,6 +207,22 @@ def _run_dev( running_mode.has_backend(), )) + # When the compile cache is enabled, a persistent warm compile daemon owns + # .web regeneration (fork-per-compile, no per-edit cold import) instead of + # the backend worker recompiling on every respawn. The backend then only + # evaluates pages to register state — REFLEX_SKIP_COMPILE makes it skip the + # frontend write, and is inherited by every respawned reload worker. + from reflex_base.environment import environment + + if running_mode.has_frontend() and environment.REFLEX_COMPILE_CACHE.get(): + from reflex.utils import compile_daemon + + commands.append(( + compile_daemon.run_compile_daemon, + exec.should_prerender_routes(), + )) + environment.REFLEX_SKIP_COMPILE.set(True) + # Start the frontend and backend. with processes.run_concurrently_context(*commands): # In dev mode, run the backend on the main thread. diff --git a/reflex/utils/compile_daemon.py b/reflex/utils/compile_daemon.py new file mode 100644 index 00000000000..603ebe6d6f0 --- /dev/null +++ b/reflex/utils/compile_daemon.py @@ -0,0 +1,560 @@ +"""Persistent warm compile daemon for fast dev hot reloads (fork-per-compile). + +Active in ``reflex run`` dev when ``REFLEX_COMPILE_CACHE`` is set. Instead of the +granian/uvicorn reloader respawning a worker that cold-imports reflex + +reflex-base + the app's heavy deps (pandas/plotly/…) on every ``.py`` change, +this daemon imports everything **once** and stays warm. On each source change it +``fork()``s a throwaway child that: + +1. purges the user's first-party modules from ``sys.modules`` and resets the + cross-module state/page registries (so it is, for first-party code, a clean + interpreter — no stale ``__subclasses__`` zombies, no duplicate-substate + shadowing, no ``add_page`` re-registration error); +2. re-imports the user app fresh (third-party deps stay warm, inherited via + copy-on-write fork) and runs ``compiler.compile_app`` with the cache on, which + recompiles only the dependency-changed pages via + ``disk_cache.try_incremental_rebuild`` — the child *is* the fresh worker that + path was built for, but warm; +3. writes only the changed ``.web`` files (atomically) and ``os._exit``s. + +Because every compile runs in a child that is discarded, reload corruption can +never accumulate: correctness is the same as today's respawn, but the multi-second +cold import is paid once instead of on every edit. + +The daemon owns the file watcher, so it watches exactly what the compiler reads — +``.py``/``.md``/``.mdx`` under the app source roots **plus** every external/sibling +content file recorded in the compile manifest's per-page dependency sets (where a +docs app's markdown lives) plus ``rxconfig``/lockfiles/``assets`` — fixing the +long-standing gap where markdown edits (and sibling-dir reads) never triggered a +reload because the reloaders watch only ``*.py`` under the app dir. + +The watcher is a single-threaded poll loop on purpose: ``fork()`` from a +multi-threaded process is unsafe, so the daemon must hold no background threads. + +On Windows (no ``os.fork``) each compile runs in a fresh spawned subprocess +instead — correct, but without copy-on-write warmth (parity with today's latency); +the watcher/markdown fix applies identically. +""" + +from __future__ import annotations + +import contextlib +import importlib +import os +import signal +import subprocess +import sys +import threading +import time +from pathlib import Path + +from reflex_base.environment import environment + +from reflex.utils import console + +#: Seconds between poll passes over the watched file set. +_POLL_INTERVAL = 0.25 +#: After a change is seen, wait this long and re-snapshot so a burst of saves +#: (e.g. format-on-save touching many files) collapses into a single compile. +_DEBOUNCE = 0.1 +#: Watchdog: kill a compile child that runs longer than this (a hung/deadlocked +#: child must never wedge the daemon). Generous enough for a real full compile. +_COMPILE_TIMEOUT = 300.0 +#: Source suffixes edited under the app roots that should trigger a recompile. +_WATCH_SUFFIXES = (".py", ".md", ".mdx") +#: Directories never worth walking while building the watch snapshot. +_SKIP_DIRS = {".web", ".venv", "venv", "node_modules", "__pycache__", ".git"} + + +def run_compile_daemon(prerender_routes: bool = False) -> None: + """Supervise the compile daemon as its own (fork-safe) subprocess. + + Runs on a ``reflex run`` worker thread alongside the frontend. Launching the + daemon as a separate process keeps it single-threaded so its per-edit + ``fork()`` is safe, and isolates its environment from the backend (which is + told to skip frontend compilation via ``REFLEX_SKIP_COMPILE``). + + Args: + prerender_routes: Whether the daemon should prerender routes when compiling. + """ + env = {**os.environ} + # The daemon DOES compile; ensure the cache is on and the skip flag is off. + env.pop(environment.REFLEX_SKIP_COMPILE.name, None) + env[environment.REFLEX_COMPILE_CACHE.name] = "1" + if prerender_routes: + env["REFLEX_PRERENDER_ROUTES"] = "1" + proc = subprocess.Popen( + [sys.executable, "-m", "reflex.utils.compile_daemon"], env=env + ) + + def _terminate() -> None: + if proc.poll() is None: + proc.terminate() + try: + proc.wait(timeout=3) + except subprocess.TimeoutExpired: + proc.kill() + + # Backstop: the daemon must never outlive reflex-run, even if this thread is + # interrupted before its finally runs. The daemon also self-exits if its + # parent dies (see _serve), so the two together prevent an orphan. + import atexit + + atexit.register(_terminate) + try: + proc.wait() + finally: + _terminate() + + +def _reload_roots() -> list[Path]: + """Resolve the directories/files that hold the user's first-party source. + + Returns: + The resolved reload roots (the same set the backend reloader watches). + """ + from reflex.utils import exec as exec_utils + + return [Path(p).resolve() for p in exec_utils.get_reload_paths()] + + +def _under_roots(path: Path, roots: list[Path]) -> bool: + """Whether ``path`` is one of, or lives under, the reload roots. + + Args: + path: The resolved path to test. + roots: The resolved reload roots. + + Returns: + True if the path is covered by a reload root. + """ + return any(path == root or root in path.parents for root in roots) + + +def _iter_source_files(root: Path): + """Yield watchable source files under ``root`` (skipping build/dep dirs). + + Args: + root: A reload root directory (or file). + + Yields: + Resolved source file paths with a watched suffix. + """ + if root.is_file(): + if root.suffix in _WATCH_SUFFIXES: + yield root.resolve() + return + for path in root.rglob("*"): + if path.is_dir(): + continue + rel_parts = path.relative_to(root).parts[:-1] + if any(part in _SKIP_DIRS or part.startswith(".") for part in rel_parts): + continue + if path.suffix in _WATCH_SUFFIXES: + yield path.resolve() + + +def _external_dependency_files(roots: list[Path]) -> set[Path]: + """External content files the compiler read, taken from the disk manifest. + + The manifest records each page's full dependency set (own module, markdown, + component/state modules). Any dependency that lives *outside* the reload + roots — e.g. a docs app's markdown in a sibling directory — is invisible to + ``get_reload_paths`` and must be watched explicitly so editing it rebuilds. + + Args: + roots: The resolved reload roots. + + Returns: + Resolved external dependency file paths to watch. + """ + from reflex.compiler import disk_cache + + manifest = disk_cache.load_manifest() + if not manifest: + return set() + out: set[Path] = set() + for page in manifest.get("pages", {}).values(): + for dep in page.get("dep_hashes", {}): + path = Path(dep) + if not _under_roots(path, roots): + out.add(path) + return out + + +def _global_files(root: Path) -> set[Path]: + """Genuinely-global files whose change forces a full rebuild / daemon restart. + + Args: + root: The project root. + + Returns: + Resolved paths of rxconfig + lockfiles + package.json that exist. + """ + from reflex.compiler import page_cache + + return { + (root / name).resolve() + for name in page_cache._GLOBAL_FILES + if (root / name).exists() + } + + +def _mtime_ns(path: Path) -> int | None: + """Return the file's modification time in ns, or None if it can't be read. + + Args: + path: The file to stat. + + Returns: + The ``st_mtime_ns`` value, or None on error. + """ + try: + return path.stat().st_mtime_ns + except OSError: + return None + + +def _watch_paths(roots: list[Path], root: Path, external_files: set[Path]) -> set[Path]: + """Build the full set of files to watch this tick. + + ``external_files`` (the per-page external content deps from the manifest) is + passed in rather than re-read here, so the manifest is parsed once per compile + instead of on every poll tick. + + Args: + roots: The resolved reload roots. + root: The project root. + external_files: External content dependency files (from the manifest). + + Returns: + The set of paths to snapshot. + """ + paths: set[Path] = set(external_files) + for r in roots: + paths.update(_iter_source_files(r)) + paths.update(_global_files(root)) + assets = root / "assets" + if assets.is_dir(): + paths.update(p.resolve() for p in assets.rglob("*") if p.is_file()) + return paths + + +def _snapshot(paths: set[Path]) -> dict[Path, int]: + """Snapshot ``{path: mtime_ns}`` for the given files. + + Args: + paths: The files to stat. + + Returns: + A mapping of file path to its modification time (unreadable files omitted). + """ + return {p: m for p in paths if (m := _mtime_ns(p)) is not None} + + +def _first_party_module_names(roots: list[Path]) -> set[str]: + """Names of all loaded modules belonging to the user's first-party packages. + + First-party top-level package names are inferred from the *regular* modules + whose ``__file__`` resolves under a reload root (a plain attribute read — no + namespace-package ``__path__`` recalculation, which is lazy and would break + while ``sys.modules`` is being mutated). Every loaded module sharing one of + those top-level names is then first-party, which captures namespace packages + (they have no ``__file__``) purely by name string. + + Args: + roots: The resolved reload roots. + + Returns: + The set of ``sys.modules`` keys to purge. + """ + top_level: set[str] = set() + for name, mod in list(sys.modules.items()): + file = getattr(mod, "__file__", None) + if not file: + continue + try: + rf = Path(file).resolve() + except OSError: + continue + if _under_roots(rf, roots): + top_level.add(name.partition(".")[0]) + if not top_level: + return set() + return {name for name in sys.modules if name.partition(".")[0] in top_level} + + +def _reset_first_party(roots: list[Path]) -> None: + """Make this interpreter clean w.r.t. first-party code before re-importing. + + Purges the user's first-party modules from ``sys.modules`` and clears the + cross-module registries/caches that would otherwise pin the old class + objects (the ``__subclasses__`` zombie / duplicate-substate / stale-page + hazards). Third-party modules (reflex, reflex-base, pandas, …) are left + imported and warm. + + Args: + roots: The resolved reload roots whose modules are first-party. + """ + for name in _first_party_module_names(roots): + sys.modules.pop(name, None) + # The import-system finder caches were inherited from the warm parent via + # fork and are now stale (they reference the purged modules); without this a + # re-import can resolve a stale spec for a since-changed module. + importlib.invalidate_caches() + + from reflex_base.registry import RegistrationContext + + from reflex.compiler import page_cache + from reflex.page import DECORATED_PAGES + from reflex.state import BaseState, all_base_state_classes + + ctx = RegistrationContext.ensure_context() + ctx.base_states.clear() + ctx.base_state_substates.clear() + ctx.event_handlers.clear() + all_base_state_classes.clear() + for cached in ( + BaseState.get_parent_state, + BaseState.get_root_state, + BaseState.get_name, + BaseState.get_full_name, + BaseState.get_class_substate, + ): + cached.cache_clear() + DECORATED_PAGES.clear() + # In-process caches hold live objects from the previous import; drop them so + # the fresh compile can't reuse a tree built from now-stale classes. Cross- + # compile reuse comes from the on-disk manifest, not these. + page_cache.clear_import_graph() + page_cache.clear_page_store() + _reset_model_metadata() + + +def _reset_model_metadata() -> None: + """Clear the SQLAlchemy/SQLModel table + model registries. + + ``rx.Model`` subclasses (including ones a docs demo ``exec``s) register their + table in a process-global ``MetaData`` that lives in the framework, which the + forked child inherits warm and populated. Re-evaluating a page that defines + such a model would then raise ``Table '...' is already defined``. A fresh + respawn never hits this (empty registry); resetting here restores that + fresh-process contract. Best-effort: apps without a DB layer have nothing to + clear. + """ + with contextlib.suppress(Exception): + import sqlmodel + + sqlmodel.SQLModel.metadata.clear() + with contextlib.suppress(Exception): + from reflex.model import Model + + Model.metadata.clear() + with contextlib.suppress(Exception): + from reflex.model import ModelRegistry + + ModelRegistry.models.clear() + ModelRegistry._metadata = None + + +def _child_compile(roots: list[Path], prerender_routes: bool) -> None: + """Reset first-party state, re-import the app fresh, and compile incrementally. + + Runs in a forked child (POSIX) or a one-shot subprocess (Windows). Must not + return normally on error — the caller maps the exit code to success/failure. + + Args: + roots: The resolved reload roots. + prerender_routes: Whether to prerender routes during compile. + """ + from reflex.utils import prerequisites + + _reset_first_party(roots) + prerequisites.get_compiled_app( + reload=False, + prerender_routes=prerender_routes, + use_rich=True, + trigger="hot_reload", + ) + + +def _await_child(pid: int) -> bool: + """Reap a forked compile child, killing it if it exceeds the watchdog timeout. + + Args: + pid: The forked child's pid. + + Returns: + True if it exited 0; False on failure, timeout, or signal (a + signal-killed child — e.g. Ctrl-C during shutdown — is a quiet False). + """ + deadline = time.monotonic() + _COMPILE_TIMEOUT + while True: + done, status = os.waitpid(pid, os.WNOHANG) + if done == pid: + return os.waitstatus_to_exitcode(status) == 0 + if time.monotonic() > deadline: + with contextlib.suppress(OSError): + os.kill(pid, signal.SIGKILL) + with contextlib.suppress(OSError): + os.waitpid(pid, 0) + console.error("Compile child timed out; killed it, keeping last build.") + return False + time.sleep(0.02) + + +def _can_fork() -> bool: + """Whether forking is safe right now (POSIX and the process is single-threaded). + + Forking a multi-threaded process and then running Python (not exec) inherits + locks held by threads that don't exist in the child — a classic deadlock. The + user app, imported warm in the parent, may have started a background thread + at import time, so this is checked per compile. + + Returns: + True if a per-compile ``fork()`` is safe. + """ + return hasattr(os, "fork") and threading.active_count() == 1 + + +def _compile_once(roots: list[Path], prerender_routes: bool) -> bool: + """Run one incremental compile in an isolated child; report success. + + Uses a copy-on-write ``fork()`` when safe (warm), else a fresh subprocess + (Windows, or when the warm parent is no longer single-threaded). + + Args: + roots: The resolved reload roots. + prerender_routes: Whether to prerender routes during compile. + + Returns: + True if the child compiled successfully, else False. + """ + if _can_fork(): + pid = os.fork() + if pid == 0: # child + code = 0 + try: + _child_compile(roots, prerender_routes) + except BaseException: # report any failure, never crash the daemon + import traceback + + traceback.print_exc() + code = 1 + finally: + os._exit(code) + return _await_child(pid) + + # No fork (Windows) or unsafe to fork: a fresh (cold) subprocess compiles. + try: + proc = subprocess.run( + [sys.executable, "-m", "reflex.utils.compile_daemon", "--once"], + check=False, + timeout=_COMPILE_TIMEOUT, + ) + except subprocess.TimeoutExpired: + console.error("Compile subprocess timed out; keeping the last good build.") + return False + return proc.returncode == 0 + + +def _serve() -> None: + """Run the warm compile daemon: initial compile, then watch-and-recompile.""" + from reflex.utils import prerequisites + + root = Path.cwd() + prerender_routes = bool(os.environ.get("REFLEX_PRERENDER_ROUTES")) + roots = _reload_roots() + parent_pid = os.getppid() + + # Warm import + initial compile (writes .web + the manifest); keeps the app + # and its third-party deps resident for copy-on-write children. A failure + # here (e.g. the app is mid-edit and broken) must NOT kill the daemon — fall + # through to the watch loop so the next edit that fixes it recompiles. + try: + with console.timing("Compile daemon: initial compile"): + prerequisites.get_compiled_app( + reload=False, + prerender_routes=prerender_routes, + use_rich=True, + trigger="initial", + ) + except BaseException: # tolerate a broken initial state; keep watching + import traceback + + traceback.print_exc() + console.error("Initial compile failed; watching for a fix.") + + # External content deps (e.g. a docs app's sibling-dir markdown) come from + # the manifest; recompute only after a compile, not on every poll tick. + external = _external_dependency_files(roots) + global_files = _global_files(root) + snapshot = _snapshot(_watch_paths(roots, root, external)) + console.info("Compile daemon ready (warm); watching for changes.") + + while True: + time.sleep(_POLL_INTERVAL) + # Never outlive reflex-run: if our parent died we were reparented. + if os.getppid() != parent_pid: + return + current = _snapshot(_watch_paths(roots, root, external)) + changed = { + p + for p in current.keys() | snapshot.keys() + if current.get(p) != snapshot.get(p) + } + if not changed: + continue + + # A change to a genuinely-global input (rxconfig/lockfiles, or a reflex + # upgrade) can't be applied to the warm parent (it imported the old + # version); re-exec the daemon so the new world is actually loaded. + if changed & global_files: + console.info("Global config changed; restarting compile daemon.") + os.execv( + sys.executable, [sys.executable, "-m", "reflex.utils.compile_daemon"] + ) + + time.sleep(_DEBOUNCE) # absorb the rest of a burst of saves + roots = _reload_roots() + ok = _compile_once(roots, prerender_routes) + if not ok: + console.error("Compile failed; keeping the last good build.") + # Re-snapshot AFTER compiling so writes the compile itself made are the + # new baseline; refresh external deps + globals from the new manifest so + # a newly-referenced content dir becomes watched. + external = _external_dependency_files(roots) + global_files = _global_files(root) + snapshot = _snapshot(_watch_paths(roots, root, external)) + + +def main(argv: list[str] | None = None) -> int: + """Entry point for ``python -m reflex.utils.compile_daemon``. + + Args: + argv: Command-line arguments (defaults to ``sys.argv[1:]``). + + Returns: + Process exit code. + """ + argv = sys.argv[1:] if argv is None else argv + if "--once" in argv: + try: + _child_compile( + _reload_roots(), bool(os.environ.get("REFLEX_PRERENDER_ROUTES")) + ) + except BaseException: # report any failure, never crash + import traceback + + traceback.print_exc() + return 1 + return 0 + try: + _serve() + except KeyboardInterrupt: + return 0 # clean shutdown (Ctrl-C); no traceback + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/reflex/utils/exec.py b/reflex/utils/exec.py index 6e0543d93db..fbdef7d2499 100644 --- a/reflex/utils/exec.py +++ b/reflex/utils/exec.py @@ -601,6 +601,9 @@ def run_uvicorn_backend(host: str, port: int, loglevel: LogLevel): log_level=loglevel.value, reload=True, reload_dirs=list(map(str, get_reload_paths())), + # uvicorn's reload filter defaults to *.py only, so markdown/data edits + # would never trigger a reload; include the content suffixes too. + reload_includes=["*.py", "*.md", "*.mdx"], reload_delay=0.1, ) diff --git a/tests/units/compiler/test_compiler_utils.py b/tests/units/compiler/test_compiler_utils.py index db7eff1a495..7ffd7679f25 100644 --- a/tests/units/compiler/test_compiler_utils.py +++ b/tests/units/compiler/test_compiler_utils.py @@ -4,7 +4,7 @@ import pytest -from reflex.compiler.utils import compile_state +from reflex.compiler.utils import compile_state, write_file from reflex.constants.state import FIELD_MARKER from reflex.state import State from reflex.vars.base import computed_var @@ -48,3 +48,31 @@ async def test_compile_state_resolves_async_computed_vars_with_running_event_loo assert values[f"a{FIELD_MARKER}"] == 1 assert values[f"b{FIELD_MARKER}"] == 2 assert values[f"async_value{FIELD_MARKER}"] == "resolved" + + +def test_write_file_creates_and_updates(tmp_path): + path = tmp_path / "sub" / "page.jsx" + write_file(path, "v1") + assert path.read_text() == "v1" + write_file(path, "v2") + assert path.read_text() == "v2" + + +def test_write_file_atomic_leaves_no_temp_files(tmp_path): + path = tmp_path / "page.jsx" + write_file(path, "content") + # The temp file used for the atomic replace must not linger. + assert [p.name for p in tmp_path.iterdir()] == ["page.jsx"] + + +def test_write_file_skips_byte_identical_write(tmp_path): + """An identical write must not touch the file (so vite isn't told to HMR).""" + path = tmp_path / "page.jsx" + write_file(path, "same") + before = path.stat().st_mtime_ns + import os + + os.utime(path, ns=(before + 1_000_000_000, before + 1_000_000_000)) + bumped = path.stat().st_mtime_ns + write_file(path, "same") # identical -> no rewrite + assert path.stat().st_mtime_ns == bumped diff --git a/tests/units/utils/test_compile_daemon.py b/tests/units/utils/test_compile_daemon.py new file mode 100644 index 00000000000..9d7947a8236 --- /dev/null +++ b/tests/units/utils/test_compile_daemon.py @@ -0,0 +1,225 @@ +"""Tests for the warm fork-per-compile dev compile daemon.""" + +import os + +import pytest + +from reflex.compiler import disk_cache +from reflex.utils import compile_daemon + + +def test_iter_source_files_picks_content_skips_build_dirs(tmp_path): + (tmp_path / "page.py").write_text("x = 1\n") + (tmp_path / "doc.md").write_text("# doc\n") + (tmp_path / "guide.mdx").write_text("mdx\n") + (tmp_path / "data.txt").write_text("ignored\n") # not a watched suffix + web = tmp_path / ".web" + web.mkdir() + (web / "build.js").write_text("// artifact\n") + pycache = tmp_path / "__pycache__" + pycache.mkdir() + (pycache / "stale.py").write_text("# compiled\n") + + found = {p.name for p in compile_daemon._iter_source_files(tmp_path)} + assert found == {"page.py", "doc.md", "guide.mdx"} + + +def test_external_dependency_files_includes_sibling_markdown(tmp_path, monkeypatch): + """A page's markdown read from a sibling dir (outside the app root) is watched. + + This is the regression for markdown edits never triggering a reload: such + files live outside ``get_reload_paths`` and are only known via the compile + manifest's per-page dependency sets. + """ + app_root = tmp_path / "app" + app_root.mkdir() + own_module = app_root / "page.py" + own_module.write_text("x = 1\n") + sibling_md = tmp_path / "docs" / "guide.md" + sibling_md.parent.mkdir() + sibling_md.write_text("# guide\n") + + manifest = { + "pages": { + "/g": { + "dep_hashes": { + str(own_module): "h1", # under the app root -> not external + str(sibling_md): "h2", # sibling dir -> must be watched + } + } + } + } + monkeypatch.setattr(disk_cache, "load_manifest", lambda: manifest) + + external = compile_daemon._external_dependency_files([app_root.resolve()]) + assert sibling_md.resolve() in external + assert own_module.resolve() not in external + + +def test_external_dependency_files_empty_without_manifest(tmp_path, monkeypatch): + monkeypatch.setattr(disk_cache, "load_manifest", lambda: None) + assert compile_daemon._external_dependency_files([tmp_path]) == set() + + +def test_snapshot_detects_external_markdown_change(tmp_path, monkeypatch): + """The watch snapshot includes sibling markdown and reflects its mtime change.""" + app_root = tmp_path / "app" + app_root.mkdir() + (app_root / "page.py").write_text("x = 1\n") + sibling_md = tmp_path / "docs" / "guide.md" + sibling_md.parent.mkdir() + sibling_md.write_text("# v1\n") + monkeypatch.setattr( + disk_cache, + "load_manifest", + lambda: {"pages": {"/g": {"dep_hashes": {str(sibling_md): "h"}}}}, + ) + + roots = [app_root.resolve()] + external = compile_daemon._external_dependency_files(roots) + snap1 = compile_daemon._snapshot( + compile_daemon._watch_paths(roots, tmp_path, external) + ) + assert sibling_md.resolve() in snap1 + assert (app_root / "page.py").resolve() in snap1 + + # Force a distinct mtime (deterministic, independent of fs mtime resolution). + bumped = snap1[sibling_md.resolve()] + 1_000_000_000 + os.utime(sibling_md, ns=(bumped, bumped)) + snap2 = compile_daemon._snapshot( + compile_daemon._watch_paths(roots, tmp_path, external) + ) + assert snap2[sibling_md.resolve()] != snap1[sibling_md.resolve()] + + +def test_first_party_module_names_includes_namespace_packages(tmp_path, monkeypatch): + """Namespace packages (no ``__file__``) are captured for purge by name. + + Regression: a namespace package left in ``sys.modules`` after its regular + siblings were purged broke re-import with a ``KeyError`` on the parent path. + They are now identified by sharing a first-party top-level name (derived from + a regular sibling's ``__file__``), never by their lazy ``__path__``. + """ + import importlib + import sys + + nspkg = tmp_path / "ns_under_test" + nspkg.mkdir() # no __init__.py -> namespace package + (nspkg / "leaf.py").write_text("Y = 1\n") # regular submodule with __file__ + monkeypatch.syspath_prepend(str(tmp_path)) + try: + pkg = importlib.import_module("ns_under_test") + importlib.import_module("ns_under_test.leaf") + assert getattr(pkg, "__file__", None) is None # confirm it's a namespace pkg + + names = compile_daemon._first_party_module_names([tmp_path.resolve()]) + assert "ns_under_test" in names # namespace pkg captured by name + assert "ns_under_test.leaf" in names # regular module captured via __file__ + finally: + sys.modules.pop("ns_under_test", None) + sys.modules.pop("ns_under_test.leaf", None) + + +@pytest.mark.skipif(not hasattr(os, "fork"), reason="requires os.fork (POSIX)") +def test_reset_model_metadata_allows_table_redefinition(): + """After reset, an ``rx.Model`` table can be redefined without conflict. + + Regression: the forked child inherits the warm, populated SQLAlchemy + ``MetaData``; re-evaluating a page that defines a model raised + ``Table '...' is already defined``. Run in a fork so clearing the global + metadata can't affect the test process. + """ + read_fd, write_fd = os.pipe() + pid = os.fork() + if pid == 0: # child + os.close(read_fd) + result = b"E" + try: + import warnings + + warnings.simplefilter("ignore") + import reflex as rx + + def _define(): + class DaemonResetUser(rx.Model, table=True): + name: str + + _define() + compile_daemon._reset_model_metadata() + _define() # must NOT raise "Table 'daemonresetuser' is already defined" + result = b"1" + except Exception: + import traceback + + traceback.print_exc() + finally: + os.write(write_fd, result) + os.close(write_fd) + os._exit(0) + + os.close(write_fd) + out = os.read(read_fd, 1) + os.close(read_fd) + os.waitpid(pid, 0) + assert out == b"1" + + +@pytest.mark.skipif(not hasattr(os, "fork"), reason="requires os.fork (POSIX)") +def test_reset_first_party_purges_modules_and_registries(tmp_path): + """``_reset_first_party`` purges first-party modules and clears registries. + + Runs in a forked child so the global-registry reset can't corrupt the test + process — exactly how the daemon uses it (a throwaway child per compile). + """ + mod_file = tmp_path / "fp_module.py" + mod_file.write_text("VALUE = 1\n") + + read_fd, write_fd = os.pipe() + pid = os.fork() + if pid == 0: # child + os.close(read_fd) + result = b"E" + try: + import importlib.util + import sys + + spec = importlib.util.spec_from_file_location( + "fp_module_under_test", mod_file + ) + assert spec is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) # type: ignore[union-attr] + sys.modules["fp_module_under_test"] = module + + from reflex_base.registry import RegistrationContext + + from reflex.page import DECORATED_PAGES + from reflex.state import all_base_state_classes + + RegistrationContext.ensure_context().base_states["sentinel"] = object() # type: ignore[assignment] + all_base_state_classes["sentinel"] = None + DECORATED_PAGES["sentinel_app"].append((lambda: None, {})) + + compile_daemon._reset_first_party([tmp_path.resolve()]) + + purged = "fp_module_under_test" not in sys.modules + cleared = ( + not RegistrationContext.ensure_context().base_states + and not all_base_state_classes + and not DECORATED_PAGES + ) + result = b"1" if (purged and cleared) else b"0" + except Exception: + import traceback + + traceback.print_exc() + finally: + os.write(write_fd, result) + os.close(write_fd) + os._exit(0) + + os.close(write_fd) + out = os.read(read_fd, 1) + os.close(read_fd) + os.waitpid(pid, 0) + assert out == b"1" From 7078d9ead0a056f1cf4f8cca83222eda538fb305 Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Mon, 29 Jun 2026 01:23:12 +0500 Subject: [PATCH 03/18] perf(docs): build component prop-docs lazily at page eval multi_docs built every component's prop tables at module import, so the whole library reference was reconstructed on every import of the docs tree -- re-run on every dev hot-reload reimport, cold start, and backend respawn. Move the build into the page render closures (matching the non-library doc path) so a page builds its prop tables only when it is actually compiled. Docs cold import 9.7s -> 1.9s; hot-reload reimport 8.3s -> 0.6s. --- docs/app/reflex_docs/pages/docs/component.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/docs/app/reflex_docs/pages/docs/component.py b/docs/app/reflex_docs/pages/docs/component.py index 31954f6bcf8..e2847d8fb5c 100644 --- a/docs/app/reflex_docs/pages/docs/component.py +++ b/docs/app/reflex_docs/pages/docs/component.py @@ -921,16 +921,9 @@ def multi_docs( title: str, ll_component_list: list | None = None, ): - components = [ - component_docs(component_tuple, previews) - for component_tuple in component_list[1:] - ] ll_actual_path = actual_path.replace(".md", "-ll.md") ll_doc_exists = os.path.exists(ll_actual_path) ll_list = ll_component_list if ll_component_list is not None else component_list - ll_components = [ - component_docs(component_tuple, previews) for component_tuple in ll_list[1:] - ] active_class_name = "font-small bg-secondary-2 p-2 text-secondary-11 rounded-xl shadow-large w-28 cursor-default border border-secondary-4 text-center" @@ -982,6 +975,13 @@ def links(current_page, ll_doc_exists, path): @docpage(set_path=path, t=title) def out(): + # Built here (page eval) rather than at module import, so importing the + # docs tree — and every hot-reload reimport — doesn't construct every + # component's prop tables up front; only a compiled page builds its own. + components = [ + component_docs(component_tuple, previews) + for component_tuple in component_list[1:] + ] toc = get_docgen_toc(actual_path) doc_content = Path(actual_path).read_text(encoding="utf-8") # Append API Reference headings for the component list @@ -1010,6 +1010,9 @@ def out(): @docpage(set_path=path + "low", t=title + " (Low Level)") def ll(): + ll_components = [ + component_docs(component_tuple, previews) for component_tuple in ll_list[1:] + ] ll_virtual = virtual_path.replace(".md", "-ll.md") toc = get_docgen_toc(ll_actual_path) doc_content = Path(ll_actual_path).read_text(encoding="utf-8") From 5ba66143f4a85864e0287395d386074a0cbe15a4 Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Mon, 29 Jun 2026 01:28:26 +0500 Subject: [PATCH 04/18] perf(compiler): trim compile-daemon hot reload - disk_cache: stop re-evaluating stateful HIT pages during an incremental rebuild. The compiling process never serves (the daemon, the initial compile, and CLI compiles all exit; the serving backend re-evaluates the marked stateful pages itself), so re-running their render pipeline was pure waste. The stateful-pages marker stays complete -- hits recorded from the manifest, misses from the fresh compile. - compile_daemon: poll faster (0.25s -> 0.05s) but cheaply -- stat the known file set each tick and rglob only every 1s for added/removed files, cutting detection latency without burning idle CPU. - compile_daemon: log per-edit timing (reset / reimport / compile). --- reflex/compiler/disk_cache.py | 22 +++++----- reflex/utils/compile_daemon.py | 44 +++++++++++++++----- tests/units/compiler/test_disk_cache.py | 53 ++++++++++++++++++++++--- 3 files changed, 93 insertions(+), 26 deletions(-) diff --git a/reflex/compiler/disk_cache.py b/reflex/compiler/disk_cache.py index 7d6228fe11c..0f505d0abfb 100644 --- a/reflex/compiler/disk_cache.py +++ b/reflex/compiler/disk_cache.py @@ -375,18 +375,20 @@ def try_incremental_rebuild( install_imports, page_ctx.frontend_imports, memo_imports ) - # Re-register state for stateful hit pages (skipping eval would drop their - # state classes); stateless hits need nothing. + # Record which routes are stateful — miss pages from this compile, hit pages + # from the manifest — so the stateful-pages marker is complete. We do NOT + # re-evaluate hit pages to register their state in this process: it only + # produces .web and exits (the daemon, the initial compile, and CLI compiles + # never serve), and the serving backend re-evaluates the marked stateful + # pages itself. Re-evaluating them here re-ran the full render pipeline for + # every unchanged stateful page on every edit, for nothing. stateful_routes: dict[str, None] = {} - with console.timing("Evaluate Pages (Backend)"): - for page in pages: - if page.route in miss_routes: - if miss_ctx is not None and page.route in miss_ctx.stateful_routes: - stateful_routes[page.route] = None - continue - if manifest["pages"][page.route]["is_stateful"]: - app._compile_page(page.route, save_page=False) + for page in pages: + if page.route in miss_routes: + if miss_ctx is not None and page.route in miss_ctx.stateful_routes: stateful_routes[page.route] = None + elif manifest["pages"][page.route]["is_stateful"]: + stateful_routes[page.route] = None app._stateful_pages.update(stateful_routes) app._write_stateful_pages_marker() diff --git a/reflex/utils/compile_daemon.py b/reflex/utils/compile_daemon.py index 603ebe6d6f0..6c5d46d097a 100644 --- a/reflex/utils/compile_daemon.py +++ b/reflex/utils/compile_daemon.py @@ -52,11 +52,16 @@ from reflex.utils import console -#: Seconds between poll passes over the watched file set. -_POLL_INTERVAL = 0.25 +#: Seconds between poll passes. Each pass only re-``stat``s the known file set +#: (cheap), so this can be small for snappy detection of edits to existing files. +_POLL_INTERVAL = 0.05 #: After a change is seen, wait this long and re-snapshot so a burst of saves #: (e.g. format-on-save touching many files) collapses into a single compile. -_DEBOUNCE = 0.1 +_DEBOUNCE = 0.05 +#: How often to re-walk the tree (``rglob``) to discover added/removed files. +#: Between rescans we only ``stat`` the known set; adds are rarer and tolerate a +#: little latency, so this keeps idle CPU low while polling fast for edits. +_RESCAN_INTERVAL = 1.0 #: Watchdog: kill a compile child that runs longer than this (a hung/deadlocked #: child must never wedge the daemon). Generous enough for a real full compile. _COMPILE_TIMEOUT = 300.0 @@ -369,12 +374,18 @@ def _child_compile(roots: list[Path], prerender_routes: bool) -> None: """ from reflex.utils import prerequisites + # Timed in three steps so every hot reload reports where it spent its time + # (resetting state vs re-importing first-party code vs compiling). + t0 = time.perf_counter() _reset_first_party(roots) - prerequisites.get_compiled_app( - reload=False, - prerender_routes=prerender_routes, - use_rich=True, - trigger="hot_reload", + t1 = time.perf_counter() + app, _ = prerequisites.get_and_validate_app(reload=False) + t2 = time.perf_counter() + app._compile(prerender_routes=prerender_routes, use_rich=True, trigger="hot_reload") + t3 = time.perf_counter() + console.info( + f"Hot reload {t3 - t0:.2f}s (reset {t1 - t0:.2f}s, " + f"reimport {t2 - t1:.2f}s, compile {t3 - t2:.2f}s)" ) @@ -489,7 +500,11 @@ def _serve() -> None: # the manifest; recompute only after a compile, not on every poll tick. external = _external_dependency_files(roots) global_files = _global_files(root) - snapshot = _snapshot(_watch_paths(roots, root, external)) + # `paths` is the watched set, refreshed by an rglob rescan; each tick only + # re-stats it (cheap), so the poll can be fast without burning idle CPU. + paths = _watch_paths(roots, root, external) + snapshot = _snapshot(paths) + last_rescan = time.monotonic() console.info("Compile daemon ready (warm); watching for changes.") while True: @@ -497,7 +512,12 @@ def _serve() -> None: # Never outlive reflex-run: if our parent died we were reparented. if os.getppid() != parent_pid: return - current = _snapshot(_watch_paths(roots, root, external)) + # Cheap stat of the known set every tick; re-walk the tree occasionally + # to discover added/removed files. + if time.monotonic() - last_rescan >= _RESCAN_INTERVAL: + paths = _watch_paths(roots, root, external) + last_rescan = time.monotonic() + current = _snapshot(paths) changed = { p for p in current.keys() | snapshot.keys() @@ -525,7 +545,9 @@ def _serve() -> None: # a newly-referenced content dir becomes watched. external = _external_dependency_files(roots) global_files = _global_files(root) - snapshot = _snapshot(_watch_paths(roots, root, external)) + paths = _watch_paths(roots, root, external) + snapshot = _snapshot(paths) + last_rescan = time.monotonic() def main(argv: list[str] | None = None) -> int: diff --git a/tests/units/compiler/test_disk_cache.py b/tests/units/compiler/test_disk_cache.py index 62b3330a38d..ebd54be385e 100644 --- a/tests/units/compiler/test_disk_cache.py +++ b/tests/units/compiler/test_disk_cache.py @@ -1,7 +1,7 @@ """Tests for the experimental disk-persisted incremental compile cache.""" import dataclasses -from collections.abc import Callable +from collections.abc import Callable, Sequence from typing import Any from reflex_base.components.component import Component @@ -44,7 +44,7 @@ def _page_c() -> Component: return rx.el.div(rx.el.h1("Page C"), _footer()) -def _compile(pages: list[_FakePage]) -> CompileContext: +def _compile(pages: Sequence[Any]) -> CompileContext: ctx = CompileContext( pages=pages, hooks=CompilerHooks(plugins=default_page_plugins()), @@ -267,9 +267,9 @@ def test_incremental_rebuild_one_miss_writes_only_that_page(tmp_path, monkeypatc # The edited page was recompiled and written; its content matches a clean # compile of that page. - out_path = compiler_utils.resolve_path_of_web_dir( - ctx.compiled_pages[edited_route].output_path - ) + output_path = ctx.compiled_pages[edited_route].output_path + assert output_path is not None + out_path = compiler_utils.resolve_path_of_web_dir(output_path) assert out_path.exists() assert ( out_path.read_text(encoding="utf-8") @@ -277,6 +277,49 @@ def test_incremental_rebuild_one_miss_writes_only_that_page(tmp_path, monkeypatc ) +def test_stateful_hit_is_marked_but_not_reevaluated(tmp_path, monkeypatch): + """A stateful HIT page is recorded in the marker but never re-evaluated. + + The compile process only produces .web and exits; the serving backend + re-evaluates the marked stateful pages itself, so re-evaluating them during + the incremental rebuild was pure waste. + """ + import json + + web = tmp_path / ".web" + web.mkdir() + monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + + app = rx.App() + app.add_page(_page_a, route="/a") + pages = list(app._unevaluated_pages.values()) + route = pages[0].route + ctx = _compile(pages) + disk_cache.write_manifest(ctx, pages, ctx.all_imports, root=tmp_path) + # Mark the page as a stateful HIT page in the manifest. + manifest_path = web / disk_cache._MANIFEST_FILE + manifest = json.loads(manifest_path.read_text()) + manifest["pages"][route]["is_stateful"] = True + manifest_path.write_text(json.dumps(manifest)) + _stub_externals(app, monkeypatch) + + reevaluated: list[str] = [] + monkeypatch.setattr( + app, "_compile_page", lambda route, **k: reevaluated.append(route) + ) + + assert ( + disk_cache.try_incremental_rebuild( + app, compiler_plugins=[], prerender_routes=False, root=tmp_path + ) + is True + ) + # Not re-evaluated... + assert reevaluated == [] + # ...but still recorded as stateful so the backend's marker is complete. + assert route in app._stateful_pages + + def test_load_manifest_rejects_wrong_schema(tmp_path, monkeypatch): import json From fe060560e3c725bebabebeb8d332061b0384ed10 Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Mon, 29 Jun 2026 22:17:43 +0500 Subject: [PATCH 05/18] perf(compiler): shrink compile-cache manifest, drop in-process cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Profiling the hot-reload compile on the docs app (418 pages) showed the disk-cache rebuild spent ~1.1s of its ~1.6s on manifest I/O: the manifest stored every page's rendered output_code (and output_path/frontend_imports), ballooning it to 46MB, yet those fields are never read back — only dep_hashes, app_wrap_keys, is_stateful, and the merged all_imports are consumed. Storing just the bookkeeping that is read drops the manifest to 14MB. The in-process page cache (_PAGE_STORE / validate_page / store_page) is never reached by the warm daemon (its fork child cleared it and the disk path returns first), and forcing it measured ~55x slower than the disk path because it re-runs the whole app-level pipeline (memo render, stylesheet, plugins) every edit. Remove it, the verify mode that only guarded it (REFLEX_COMPILE_CACHE_VERIFY), and the dead page_source_fingerprint. Manifest schema bumped 2->3 so stale fat manifests are ignored. --- .../src/reflex_base/environment.py | 8 +- reflex/compiler/compiler.py | 178 +----------------- reflex/compiler/disk_cache.py | 52 +++-- reflex/compiler/page_cache.py | 118 ++---------- reflex/utils/compile_daemon.py | 7 +- tests/units/compiler/test_disk_cache.py | 39 ++-- tests/units/compiler/test_page_cache.py | 71 +------ 7 files changed, 60 insertions(+), 413 deletions(-) diff --git a/packages/reflex-base/src/reflex_base/environment.py b/packages/reflex-base/src/reflex_base/environment.py index cca5de00ba9..169eac33176 100644 --- a/packages/reflex-base/src/reflex_base/environment.py +++ b/packages/reflex-base/src/reflex_base/environment.py @@ -613,16 +613,10 @@ class EnvironmentVariables: # Experimental: incremental compile cache. A fresh compile process (e.g. a # reflex-run hot-reload worker) reuses each page's compiled output from an - # on-disk manifest and recompiles only the pages whose source changed, - # backed by an in-process per-page cache for repeat compiles in one process. + # on-disk manifest and recompiles only the pages whose source changed. # See reflex/compiler/disk_cache.py and reflex/compiler/page_cache.py. REFLEX_COMPILE_CACHE: EnvVar[bool] = env_var(False) - # When the compile cache reuses pages, also run a full compile and assert - # byte-identical output, falling back to the full result on any mismatch. - # Doubles compile time; for validating the cache on an app. - REFLEX_COMPILE_CACHE_VERIFY: EnvVar[bool] = env_var(False) - # Inherited by uvicorn/granian reload workers so the backend can distinguish # dev reload-capable worker boots from other backend starts. Never set in prod. REFLEX_DEV_BACKEND_RELOAD_ACTIVE: EnvVar[bool] = env_var(False, internal=True) diff --git a/reflex/compiler/compiler.py b/reflex/compiler/compiler.py index de5433d3697..2b368cd5c7b 100644 --- a/reflex/compiler/compiler.py +++ b/reflex/compiler/compiler.py @@ -1108,91 +1108,6 @@ def _resolve_radix_themes_plugin( return plugin_chain, radix_plugin -def _normalize_imports_for_compare(all_imports: Any) -> dict[str, list[str]]: - """Render an import dict to a comparable, order-independent form. - - Args: - all_imports: The parsed import dict to normalize. - - Returns: - A mapping of library to its sorted, stringified import fields. - """ - return {lib: sorted(str(v) for v in fields) for lib, fields in all_imports.items()} - - -def _diff_compile_contexts( - incremental: CompileContext, full: CompileContext -) -> list[str]: - """Return the aggregate fields where the two compile contexts diverge. - - Args: - incremental: The cache-assisted compile context. - full: A no-cache full compile of the same app. - - Returns: - A list of human-readable divergence labels (empty when identical). - """ - diffs: list[str] = [] - inc_pages = {r: pc.output_code for r, pc in incremental.compiled_pages.items()} - full_pages = {r: pc.output_code for r, pc in full.compiled_pages.items()} - if inc_pages.keys() != full_pages.keys(): - diffs.append(f"routes:{sorted(set(inc_pages) ^ set(full_pages))}") - diffs.extend( - f"page:{r}" - for r in inc_pages.keys() & full_pages.keys() - if inc_pages[r] != full_pages[r] - ) - if _normalize_imports_for_compare( - incremental.all_imports - ) != _normalize_imports_for_compare(full.all_imports): - diffs.append("all_imports") - if sorted(map(str, incremental.auto_memo_components)) != sorted( - map(str, full.auto_memo_components) - ): - diffs.append("auto_memo_components") - if sorted(incremental.stateful_routes) != sorted(full.stateful_routes): - diffs.append("stateful_routes") - if sorted(map(str, incremental.app_wrap_components)) != sorted( - map(str, full.app_wrap_components) - ): - diffs.append("app_wrap_components") - return diffs - - -def _full_compile_context( - app: App, compiler_plugins: Sequence[Plugin] -) -> CompileContext: - """Compile every page with no cache reuse (for verify-mode comparison). - - Resets the shared bundling/memo globals first so it starts from the same - clean state the primary compile did. - - Args: - app: The app to compile. - compiler_plugins: The resolved compiler plugins. - - Returns: - A fully compiled context over all pages. - """ - from reflex_base.components.dynamic import bundle_library, reset_bundled_libraries - - reset_bundled_libraries() - reset_memo_component_classes() - for plugin in compiler_plugins: - for dependency in plugin.get_frontend_dependencies(): - bundle_library(dependency) - ctx = CompileContext( - app=app, - pages=list(app._unevaluated_pages.values()), - hooks=CompilerHooks( - plugins=default_page_plugins(style=app.style, plugins=compiler_plugins) - ), - ) - with ctx: - ctx.compile() - return ctx - - def compile_app( app: App, *, @@ -1278,34 +1193,9 @@ def compile_app( task = progress.add_task("Compiling:", total=base_total) all_pages = list(app._unevaluated_pages.values()) - # In-process per-page cache with a Salsa-style dependency graph. Reuse the - # compiled context of pages whose recorded dependency set is byte-unchanged - # (and whose global epoch matches); compile only the rest, then re-merge - # cached pages' contributions into the app-wide aggregates. In-process only - # (contributions hold live objects), so it helps repeat compiles in one - # process; cross-process reuse is handled by the disk cache above. - in_process_cache = cache_on - route_to_component = {p.route: p.component for p in all_pages} - state_index: dict[str, Any] = {} - epoch = "" - hasher: Any = None - hit_routes: list[str] = [] - if in_process_cache: - from reflex.compiler import page_cache - - state_index, _ = page_cache.state_dependency_index() - epoch = page_cache.global_epoch() - hasher = page_cache.make_hasher() - hit_routes = [ - page.route - for page in all_pages - if page_cache.validate_page(page.route, epoch, hasher) is not None - ] - - hit_set = set(hit_routes) compile_ctx = CompileContext( app=app, - pages=[p for p in all_pages if p.route not in hit_set], + pages=all_pages, hooks=CompilerHooks( plugins=default_page_plugins(style=app.style, plugins=compiler_plugins) ), @@ -1317,72 +1207,6 @@ def compile_app( render_progress=lambda: progress.advance(task), ) - if in_process_cache: - from reflex.compiler import page_cache - - # Cache freshly-compiled (miss) pages with the dependency set they read. - for route in list(compile_ctx.compiled_pages): - page_ctx_fresh = compile_ctx.compiled_pages[route] - dep_hashes = page_cache.page_dependency_hashes( - page_ctx_fresh, - route_to_component[route], - state_index, - hasher, - ) - page_cache.store_page( - route, - epoch, - dep_hashes, - page_ctx_fresh, - route in compile_ctx.stateful_routes, - ) - # Re-merge cached (hit) pages' contributions into the aggregates. - for route in hit_routes: - cached = page_cache.validate_page(route, epoch, hasher) - if cached is None: # defensive: invalidated concurrently - continue - page_ctx, is_stateful = cached - compile_ctx.compiled_pages[route] = page_ctx - compile_ctx.all_imports = utils.merge_imports( - compile_ctx.all_imports, page_ctx.frontend_imports - ) - compile_ctx.app_wrap_components.update(page_ctx.app_wrap_components) - compile_ctx.auto_memo_components.update(page_ctx.memo_contributions) - if is_stateful: - compile_ctx.stateful_routes[route] = None - # Restore deterministic page order (route order of the app). - compile_ctx.compiled_pages = { - p.route: compile_ctx.compiled_pages[p.route] - for p in all_pages - if p.route in compile_ctx.compiled_pages - } - if hit_routes: - console.info( - "Incremental compile: recompiled " - f"{len(all_pages) - len(hit_routes)} page(s)." - ) - - # Verify mode: prove the cache-assisted output matches a full compile, and - # fall back to the full result on any divergence so a cache bug can never - # ship. Doubles compile time; opt-in for validation. - if hit_routes and environment.REFLEX_COMPILE_CACHE_VERIFY.get(): - from reflex.compiler import page_cache - - full_ctx = _full_compile_context(app, compiler_plugins) - diffs = _diff_compile_contexts(compile_ctx, full_ctx) - if diffs: - console.warn( - "compile cache verify FAILED " - f"({len(diffs)} divergence(s): {diffs[:8]}); using the full " - "compile and clearing the page cache." - ) - page_cache.clear_page_store() - compile_ctx = full_ctx - else: - console.debug( - "compile cache verify: incremental output matches full compile" - ) - for route, page_ctx in compile_ctx.compiled_pages.items(): app._check_routes_conflict(route) if not isinstance(page_ctx.root_component, Component): diff --git a/reflex/compiler/disk_cache.py b/reflex/compiler/disk_cache.py index 0f505d0abfb..c69800ee1e7 100644 --- a/reflex/compiler/disk_cache.py +++ b/reflex/compiler/disk_cache.py @@ -2,24 +2,23 @@ Enabled by ``REFLEX_COMPILE_CACHE``. When off (default), nothing changes. -The in-process page cache (``page_cache._PAGE_STORE``) reuses compiled pages -within one process, so it never fires in the ``reflex run`` edit loop — the -reloader respawns a fresh worker subprocess on every ``.py`` change, starting -with an empty in-memory store. This cache persists each page's *serializable* -contribution to disk so the fresh worker can reuse it. +In the ``reflex run`` edit loop every compile runs in a fresh process (the warm +daemon forks a throwaway child per change), so there is no in-memory state to +reuse across edits. This cache persists the small amount of bookkeeping a fresh +process needs to recompile only what changed and reuse everything else already +on disk. **What is persisted** (``.web/reflex_compile_cache.json``): per page, the ``{path: hash}`` of its **dependency set** (the exact source files it read — own module, markdown/data, component modules in its tree, and the state files it -uses; see ``page_cache.page_dependency_hashes``), its rendered ``output_code`` -and path, its ``frontend_imports`` (``ImportVar`` is a frozen dataclass of -primitives), its app-wrap key-set, and whether evaluating it registered new -state. App-wide: the genuinely-global ``epoch`` (reflex version + -config/lockfiles), the reflex version, and the merged imports. The manifest -deliberately stores **no rendered memo files**: a hit page's memo files are -already on disk from the prior compile, and a miss page re-renders its own on -recompile — so writing the manifest is just string/key serialization, never a -second memo render. +uses; see ``page_cache.page_dependency_hashes``), its app-wrap key-set, and +whether evaluating it registered new state. App-wide: the genuinely-global +``epoch`` (reflex version + config/lockfiles), the reflex version, and the merged +frontend imports. The manifest stores **no rendered output**: a page's compiled +``.js`` (and its memo files) already live in ``.web`` from the prior compile — a +hit reuses those files untouched, a miss rewrites its own — so the manifest is +pure bookkeeping, never a second render. (This is why it stays small: storing the +rendered ``output_code`` would balloon it for no gain, as it is never read back.) **The fast path** (``try_incremental_rebuild``). On a fresh compile it reuses the manifest when the global inputs match (reflex version, route set, and the global @@ -29,19 +28,18 @@ (app root, contexts, theme, stylesheet, …) stay valid because the epoch and route set are unchanged. Then: -- A *stateless* hit page is skipped entirely (its frontend file is reused and - evaluating it would register nothing). -- A *stateful* hit page is re-evaluated for the backend only (to re-register its - state classes), reusing its frontend file. ``is_stateful`` is true exactly - when the page's first eval grew the state registry, so this is precisely the - set whose state would otherwise go missing. +- A hit page is left exactly as-is: its frontend ``.js`` is already on disk and + is neither rewritten nor re-evaluated. - A *miss* page (source changed) is fully recompiled and its files rewritten. +- Stateful pages are recorded in the stateful-pages marker — misses from this + compile, hits from the manifest's ``is_stateful`` flag — so the serving backend + (which re-evaluates them to register their state) sees the complete set. This + process only writes ``.web`` and exits; it never re-evaluates a hit page. After recompiling misses, two guards must hold or the whole thing falls back to a full compile (return False): each miss page's app-wrap key-set and stateful flag -must be unchanged (otherwise the reused on-disk app root would be wrong). Any -state edit, shared-file edit, route add/remove, or version change also falls back -to a full compile. ``REFLEX_COMPILE_CACHE_VERIFY`` is the backstop for an app. +must be unchanged (otherwise the reused on-disk app root would be wrong). A route +add/remove or a global/version change also falls back to a full compile. """ from __future__ import annotations @@ -67,7 +65,7 @@ from reflex.app import App #: Bump when the manifest layout changes (old manifests are then ignored). -_SCHEMA = 2 +_SCHEMA = 3 #: Manifest filename under the web directory. _MANIFEST_FILE = "reflex_compile_cache.json" @@ -169,9 +167,6 @@ def write_manifest( "dep_hashes": page_cache.page_dependency_hashes( page_ctx, page.component, state_index, hasher, root ), - "output_path": page_ctx.output_path, - "output_code": page_ctx.output_code, - "frontend_imports": _serialize_imports(page_ctx.frontend_imports), "app_wrap_keys": _wrap_key_strs(page_ctx.app_wrap_components.keys()), "is_stateful": page.route in compile_ctx.stateful_routes, } @@ -433,9 +428,6 @@ def _update_manifest_for_misses( "dep_hashes": page_cache.page_dependency_hashes( page_ctx, page.component, state_index, hasher ), - "output_path": page_ctx.output_path, - "output_code": page_ctx.output_code, - "frontend_imports": _serialize_imports(page_ctx.frontend_imports), "app_wrap_keys": _wrap_key_strs(page_ctx.app_wrap_components.keys()), "is_stateful": page.route in miss_ctx.stateful_routes, } diff --git a/reflex/compiler/page_cache.py b/reflex/compiler/page_cache.py index 97116337a7a..4f78b4521e4 100644 --- a/reflex/compiler/page_cache.py +++ b/reflex/compiler/page_cache.py @@ -1,12 +1,11 @@ -"""Experimental incremental compile cache (flag-gated). +"""Per-page dependency graph for the incremental compile cache (flag-gated). Enabled by ``REFLEX_COMPILE_CACHE``. When off (the default), the compiler behaves exactly as before. -**In-process per-page cache, with a Salsa-style dependency graph.** Each page -records the *exact set of source files it actually read*, so a change -invalidates only the pages that depend on it (not all pages). A page's -dependency set is the union of: +**A Salsa-style dependency graph.** Each page records the *exact set of source +files it actually read*, so a change invalidates only the pages that depend on it +(not all pages). A page's dependency set is the union of: - the **transitive first-party ``.py`` import closure** of its defining module (``page_py_dependencies`` — captures function-based views and shared helpers @@ -20,20 +19,20 @@ imported), - the **fine-grained state files** it references (``used_state_files``). -A page is reused iff every file in its dependency set is byte-unchanged and the -small genuinely-global ``global_epoch`` (Reflex version + ``rxconfig`` + lockfile) -is unchanged; adding/removing a route is handled separately (it changes shared -nav). Per-page dependency sets also track files *outside* the project root (e.g. -the docs site reads markdown from a sibling directory), so an external-source -edit still invalidates exactly the dependent pages. Cached pages' contributions -are re-merged into the app-wide aggregates by ``compile_app``; the store holds -live objects (``PageContext``), so it is in-process only. The cross-process -counterpart that survives a hot-reload worker respawn lives in -``reflex/compiler/disk_cache.py``. ``REFLEX_COMPILE_CACHE_VERIFY`` proves the -cache-assisted output matches a full compile and falls back on any mismatch — -the backstop for the residual gaps the static graph cannot see: runtime -``importlib`` imports and data files read at *module-import* time (outside the -per-page eval window). +A page is unchanged iff every file in its dependency set is byte-unchanged and +the small genuinely-global ``global_epoch`` (Reflex version + ``rxconfig`` + +lockfile) is unchanged; adding/removing a route is handled separately (it changes +shared nav). Per-page dependency sets also track files *outside* the project root +(e.g. the docs site reads markdown from a sibling directory), so an +external-source edit still invalidates exactly the dependent pages. These content +hashes and dependency sets are what ``reflex/compiler/disk_cache.py`` persists and +compares each compile to recompile only the pages whose source changed. + +Two residual gaps the static graph cannot see: runtime ``importlib`` imports and +data files read at *module-import* time (outside the per-page eval window). An +edit reached only through one of those would not invalidate its page; such +patterns are rare in page modules, and a global/version change still forces a +full recompile. """ from __future__ import annotations @@ -643,84 +642,3 @@ def deps_unchanged( True iff every dependency file is byte-unchanged. """ return all(hasher(path) == digest for path, digest in dep_hashes.items()) - - -def page_source_fingerprint(component: BaseComponent | object) -> str: - """Fingerprint a page from its own module source (no build required). - - Args: - component: The page component or factory callable. - - Returns: - A hex digest of the page's defining module source. - """ - import inspect - import sys - - module_name = getattr(component, "__module__", None) - parts = [repr(getattr(component, "__qualname__", repr(component)))] - mod = sys.modules.get(module_name) if module_name else None - file = getattr(mod, "__file__", None) - if file and Path(file).exists(): - with contextlib.suppress(OSError): - parts.append(hashlib.sha256(Path(file).read_bytes()).hexdigest()) - elif callable(component): - with contextlib.suppress(OSError, TypeError): - parts.append(inspect.getsource(component)) - return _sha(*parts) - - -#: route -> (global_epoch, dep_hashes, PageContext, is_stateful) -_PAGE_STORE: dict[str, tuple[str, dict[str, str], PageContext, bool]] = {} - - -def validate_page( - route: str, - epoch: str, - hasher: Callable[[str], str | None], -) -> tuple[PageContext, bool] | None: - """Return the cached page iff its dependency set and the global epoch match. - - A page is valid when the genuinely-global epoch is unchanged and every file - in its recorded dependency set is byte-unchanged. Editing a file invalidates - exactly the pages whose dependency set contains it. - - Args: - route: The page route. - epoch: The current global epoch (see :func:`global_epoch`). - hasher: A memoized path -> content-hash function (see :func:`make_hasher`). - - Returns: - ``(PageContext, is_stateful)`` on a valid hit, else None. - """ - entry = _PAGE_STORE.get(route) - if entry is None: - return None - stored_epoch, dep_hashes, page_ctx, is_stateful = entry - if stored_epoch != epoch or not deps_unchanged(dep_hashes, hasher): - return None - return page_ctx, is_stateful - - -def store_page( - route: str, - epoch: str, - dep_hashes: dict[str, str], - page_ctx: PageContext, - is_stateful: bool, -) -> None: - """Record a freshly-compiled page with the dependency set it relies on. - - Args: - route: The page route. - epoch: The global epoch it compiled under. - dep_hashes: ``{path: hash}`` for the page's dependency set. - page_ctx: The compiled PageContext to cache. - is_stateful: Whether the page registered new state during evaluation. - """ - _PAGE_STORE[route] = (epoch, dep_hashes, page_ctx, is_stateful) - - -def clear_page_store() -> None: - """Drop all in-process per-page cache entries.""" - _PAGE_STORE.clear() diff --git a/reflex/utils/compile_daemon.py b/reflex/utils/compile_daemon.py index 6c5d46d097a..6d18638a3fb 100644 --- a/reflex/utils/compile_daemon.py +++ b/reflex/utils/compile_daemon.py @@ -328,11 +328,10 @@ def _reset_first_party(roots: list[Path]) -> None: ): cached.cache_clear() DECORATED_PAGES.clear() - # In-process caches hold live objects from the previous import; drop them so - # the fresh compile can't reuse a tree built from now-stale classes. Cross- - # compile reuse comes from the on-disk manifest, not these. + # The import graph caches each module's parsed import edges; a changed file + # may import differently now, so drop it to force a re-parse. Cross-compile + # page reuse comes from the on-disk manifest. page_cache.clear_import_graph() - page_cache.clear_page_store() _reset_model_metadata() diff --git a/tests/units/compiler/test_disk_cache.py b/tests/units/compiler/test_disk_cache.py index ebd54be385e..660052df758 100644 --- a/tests/units/compiler/test_disk_cache.py +++ b/tests/units/compiler/test_disk_cache.py @@ -131,28 +131,27 @@ def test_write_and_load_manifest(tmp_path, monkeypatch): assert manifest is not None assert manifest["schema"] == disk_cache._SCHEMA assert set(manifest["pages"]) == {"/a", "/b", "/c"} - # the cached output is exactly what the compile produced for each page for route in ("/a", "/b", "/c"): - assert ( - manifest["pages"][route]["output_code"] - == ctx.compiled_pages[route].output_code - ) + entry = manifest["pages"][route] + # the manifest is pure bookkeeping: dep set + app-wrap keys + stateful flag + assert set(entry) == {"dep_hashes", "app_wrap_keys", "is_stateful"} # these static pages register no new state - assert manifest["pages"][route]["is_stateful"] is False - # imports round-trip cleanly - restored = disk_cache._deserialize_imports( - manifest["pages"]["/a"]["frontend_imports"] - ) - assert restored == ctx.compiled_pages["/a"].frontend_imports + assert entry["is_stateful"] is False + # rendered output is never persisted (it already lives in .web, and is + # never read back from the manifest) -> keeps the manifest small + assert "output_code" not in entry + assert "frontend_imports" not in entry + # the app-wide merged imports round-trip cleanly + restored = disk_cache._deserialize_imports(manifest["all_imports"]) + assert restored == ctx.all_imports def test_unchanged_pages_compile_identically(tmp_path, monkeypatch): """The reuse correctness property: an unchanged page recompiles byte-for-byte. - The disk cache reuses a hit page's stored ``output_code`` verbatim, so it is - correct iff a fresh compile of that page yields identical output. Compile A, - B, C; then compile A, B(edited), C; A and C must be byte-identical, and the - manifest's cached A/C output must equal the fresh recompile. + The disk cache leaves a hit page's already-on-disk ``.web`` file untouched, so + reuse is correct iff a fresh compile of that page yields identical output. + Compile A, B, C; then compile A, B(edited), C; A and C must be byte-identical. """ web = tmp_path / ".web" web.mkdir() @@ -164,9 +163,6 @@ def test_unchanged_pages_compile_identically(tmp_path, monkeypatch): _FakePage(route="/c", component=_page_c), ] ctx1 = _compile(pages) - disk_cache.write_manifest(ctx1, pages, ctx1.all_imports, root=tmp_path) - manifest = disk_cache.load_manifest() - assert manifest is not None # "Edit" page B; recompile the whole app cleanly. pages_edited = [ @@ -187,13 +183,6 @@ def test_unchanged_pages_compile_identically(tmp_path, monkeypatch): assert ( ctx2.compiled_pages["/b"].output_code != ctx1.compiled_pages["/b"].output_code ) - # The cached output we'd reuse for A/C equals a clean recompile of them. - assert ( - manifest["pages"]["/a"]["output_code"] == ctx2.compiled_pages["/a"].output_code - ) - assert ( - manifest["pages"]["/c"]["output_code"] == ctx2.compiled_pages["/c"].output_code - ) def _stub_externals(app, monkeypatch): diff --git a/tests/units/compiler/test_page_cache.py b/tests/units/compiler/test_page_cache.py index 0750224390c..62250c34a25 100644 --- a/tests/units/compiler/test_page_cache.py +++ b/tests/units/compiler/test_page_cache.py @@ -1,4 +1,4 @@ -"""Tests for the experimental incremental compile cache (in-process page cache).""" +"""Tests for the per-page dependency graph used by the incremental compile cache.""" from reflex.compiler import page_cache @@ -44,72 +44,3 @@ def test_used_state_files_from_output_and_memos(tmp_path): # un-introspectable memo -> conservative (all fine files) boom = SimpleNamespace(render=lambda: (_ for _ in ()).throw(RuntimeError())) assert page_cache.used_state_files(out, [boom], id_to_file) == {sfile, mfile} - - -def test_validate_page_fine_grained_deps(): - page_cache.clear_page_store() - ctx = object() - dep = "/proj/state.py" - # page depends on file `dep` at content hash H1, under global epoch "e1" - page_cache.store_page("/x", "e1", {dep: "H1"}, ctx, True) - # all deps unchanged + epoch matches -> hit - assert page_cache.validate_page("/x", "e1", lambda p: {dep: "H1"}.get(p)) == ( - ctx, - True, - ) - # a dependency file changed -> miss - assert page_cache.validate_page("/x", "e1", lambda p: {dep: "H2"}.get(p)) is None - # the genuinely-global epoch changed -> miss - assert page_cache.validate_page("/x", "e2", lambda p: {dep: "H1"}.get(p)) is None - - -def test_validate_page_with_no_deps_only_tracks_epoch(): - page_cache.clear_page_store() - ctx = object() - # page depends on NO files - page_cache.store_page("/x", "e1", {}, ctx, False) - # some unrelated file changed -> page is still a hit (it depends on nothing) - assert page_cache.validate_page("/x", "e1", lambda p: "Z") == (ctx, False) - # the global epoch changed -> miss - assert page_cache.validate_page("/x", "e2", lambda p: "Z") is None - # no stored entry -> miss - page_cache.clear_page_store() - assert page_cache.validate_page("/x", "e1", lambda p: None) is None - - -def _fake_ctx(pages, imports=None, memo=None, stateful=None, wraps=None): - from types import SimpleNamespace - - return SimpleNamespace( - compiled_pages={r: SimpleNamespace(output_code=c) for r, c in pages.items()}, - all_imports=imports or {}, - auto_memo_components=memo or {}, - stateful_routes=stateful or {}, - app_wrap_components=wraps or {}, - ) - - -def test_verify_diff_identical(): - from reflex.compiler import compiler - - a = _fake_ctx({"/": "CODE", "/x": "Y"}) - b = _fake_ctx({"/": "CODE", "/x": "Y"}) - assert compiler._diff_compile_contexts(a, b) == [] - - -def test_verify_diff_detects_page_change(): - from reflex.compiler import compiler - - a = _fake_ctx({"/": "OLD"}) - b = _fake_ctx({"/": "NEW"}) - assert "page:/" in compiler._diff_compile_contexts(a, b) - - -def test_verify_diff_detects_missing_route_and_memo(): - from reflex.compiler import compiler - - a = _fake_ctx({"/": "C"}, memo={("Memo", None): 1}) - b = _fake_ctx({"/": "C", "/x": "C"}, memo={}) - diffs = compiler._diff_compile_contexts(a, b) - assert any(d.startswith("routes:") for d in diffs) - assert "auto_memo_components" in diffs From 9f1b1c294d7bc53995336bb67fc5321db0b749e4 Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Tue, 30 Jun 2026 00:28:20 +0500 Subject: [PATCH 06/18] refactor(compiler): drop unused page_cache helpers Remove page_module_files and file_hashes, now-dead after the compile-cache manifest rework, along with their tests. --- reflex/compiler/page_cache.py | 33 ------------------------- tests/units/compiler/test_page_cache.py | 9 ------- 2 files changed, 42 deletions(-) diff --git a/reflex/compiler/page_cache.py b/reflex/compiler/page_cache.py index 4f78b4521e4..2bd3da8b466 100644 --- a/reflex/compiler/page_cache.py +++ b/reflex/compiler/page_cache.py @@ -227,23 +227,6 @@ def _module_file(component: object) -> Path | None: return Path(file) if file else None -def page_module_files(components: object) -> set[Path]: - """Resolve the set of module files that define the given page components. - - Args: - components: An iterable of page component callables/objects. - - Returns: - The set of resolved module file paths. - """ - files = set() - for comp in components: # type: ignore[attr-defined] - f = _module_file(comp) - if f is not None: - files.add(f.resolve()) - return files - - def component_module_files( root_component: object, root: Path | None = None ) -> set[Path]: @@ -509,22 +492,6 @@ def under_root(comp: object) -> Path | None: return {i: f for i, f in id_to_file.items() if f in fine}, fine -def file_hashes(files: set[Path]) -> dict[str, str]: - """Map each file (as a string) to a hash of its current content. - - Args: - files: The files to hash. - - Returns: - A mapping of file path string to content hash. - """ - out: dict[str, str] = {} - for f in files: - with contextlib.suppress(OSError): - out[str(f)] = hashlib.sha256(f.read_bytes()).hexdigest() - return out - - def used_state_files( output_code: str, memo_components: object, diff --git a/tests/units/compiler/test_page_cache.py b/tests/units/compiler/test_page_cache.py index 62250c34a25..b6b33f5e3ef 100644 --- a/tests/units/compiler/test_page_cache.py +++ b/tests/units/compiler/test_page_cache.py @@ -15,15 +15,6 @@ def test_global_epoch_tracks_global_files(tmp_path): assert page_cache.global_epoch(root=tmp_path) != epoch -def _dummy_page(): # a page-like callable defined in this module - return None - - -def test_page_module_files_resolves(tmp_path): - files = page_cache.page_module_files([_dummy_page]) - assert any(p.name == "test_page_cache.py" for p in files) - - def test_used_state_files_from_output_and_memos(tmp_path): from types import SimpleNamespace From 43d5351f1a5c86c3b589a631eb29d9a16d29d148 Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Tue, 30 Jun 2026 01:12:44 +0500 Subject: [PATCH 07/18] fix(compiler): track app-level config and memo imports in compile cache Two correctness fixes for the incremental compile cache, plus a pass to trim verbose comments and docstrings across the cache modules. - global_epoch now fingerprints the app-level config files: the app entrypoint module plus the config-only modules it imports (theme, app-wraps, stylesheets), found by walking the import graph with page modules as barriers so per-page incrementality is preserved. An edit to app-wide config previously left every page a hit, so the reused on-disk app root / contexts / theme stayed stale. - The incremental manifest refresh now persists the complete install import set (page imports merged with the memo-component imports the rebuild generated, and threading root through dependency discovery), so a later all-hit compile installs every package the reused memo files need instead of dropping newly-introduced memo packages. --- docs/app/reflex_docs/pages/docs/component.py | 4 +- .../src/reflex_base/plugins/compiler.py | 21 +- .../reflex-base/src/reflex_base/vars/base.py | 7 +- reflex/compiler/disk_cache.py | 143 ++++---- reflex/compiler/page_cache.py | 335 +++++++++++------- reflex/compiler/utils.py | 11 +- reflex/reflex.py | 7 +- reflex/utils/compile_daemon.py | 83 ++--- tests/units/compiler/test_disk_cache.py | 39 +- tests/units/compiler/test_page_cache.py | 195 ++++++++++ tests/units/utils/test_compile_daemon.py | 3 +- 11 files changed, 551 insertions(+), 297 deletions(-) diff --git a/docs/app/reflex_docs/pages/docs/component.py b/docs/app/reflex_docs/pages/docs/component.py index e2847d8fb5c..7d0310fae52 100644 --- a/docs/app/reflex_docs/pages/docs/component.py +++ b/docs/app/reflex_docs/pages/docs/component.py @@ -975,9 +975,7 @@ def links(current_page, ll_doc_exists, path): @docpage(set_path=path, t=title) def out(): - # Built here (page eval) rather than at module import, so importing the - # docs tree — and every hot-reload reimport — doesn't construct every - # component's prop tables up front; only a compiled page builds its own. + # Build prop docs during page eval so imports stay cheap. components = [ component_docs(component_tuple, previews) for component_tuple in component_list[1:] diff --git a/packages/reflex-base/src/reflex_base/plugins/compiler.py b/packages/reflex-base/src/reflex_base/plugins/compiler.py index dd02fabda08..97e4f0d9e36 100644 --- a/packages/reflex-base/src/reflex_base/plugins/compiler.py +++ b/packages/reflex-base/src/reflex_base/plugins/compiler.py @@ -36,12 +36,7 @@ _BaseComponentT = TypeVar("_BaseComponentT", bound=BaseComponent) -#: Optional per-page source-read recorder, installed by an incremental compile -#: cache. When set, :meth:`CompileContext.compile` wraps each page's evaluation -#: in ``page_source_recorder()``; the context manager yields a set that collects -#: the source files (e.g. markdown/data) read during that page's eval, which is -#: then stored on the page's ``source_files``. ``None`` (default) disables -#: recording, so the compile path is unchanged when no cache is active. +#: Optional recorder for source files read during each page evaluation. page_source_recorder: Callable[[], AbstractContextManager[set[str]]] | None = None @@ -700,15 +695,9 @@ class PageContext(BaseContext): output_path: str | None = None output_code: str | None = None source_module: str | None = None - # Source files (e.g. markdown/data) read while evaluating this page, when a - # per-page read recorder is installed (see ``page_source_recorder``). Lets an - # incremental cache depend on the exact non-import inputs a page consumed, so - # editing one page's data invalidates only that page. Empty when no recorder - # is active. + # Source files read while evaluating this page, when a recorder is installed. source_files: set[str] = dataclasses.field(default_factory=set) - # Auto-memo components first registered while compiling THIS page, keyed by - # ``(tag, source_module)``. Lets an incremental cache attribute memo - # contributions per page so a skipped page can re-register them. + # Auto-memo components first registered while compiling this page. memo_contributions: dict[tuple[str, str | None], Any] = dataclasses.field( default_factory=dict ) @@ -826,9 +815,7 @@ def compile( self.memoize_wrappers.clear() self.auto_memo_components.clear() - # Reset the deterministic ref-name generator so a second in-process - # compile reproduces the same auto-generated names as the first (these - # names feed auto-memo content hashes, so drift breaks reproducibility). + # Keep generated ref names stable across in-process compiles. reset_unique_variable_names() recorder = page_source_recorder diff --git a/packages/reflex-base/src/reflex_base/vars/base.py b/packages/reflex-base/src/reflex_base/vars/base.py index 15030b3e44a..ff33bd00edd 100644 --- a/packages/reflex-base/src/reflex_base/vars/base.py +++ b/packages/reflex-base/src/reflex_base/vars/base.py @@ -3217,11 +3217,8 @@ def get_uuid_string_var() -> Var: def reset_unique_variable_names() -> None: """Reset the deterministic unique-name generator to its initial state. - ``get_unique_variable_name`` draws from a seeded RNG and dedups against - ``USED_VARIABLES``; both persist process-wide, so a second in-process compile - would draw *different* ref names than the first (the RNG state and the used - set carry over). Resetting them before each compile makes the generated names - reproducible across compiles — names only need to be unique within a compile. + Names only need to be unique within one compile, so resetting before each + compile makes auto-generated ref names reproducible. """ USED_VARIABLES.clear() _UNIQUE_NAME_RNG.seed(42) diff --git a/reflex/compiler/disk_cache.py b/reflex/compiler/disk_cache.py index c69800ee1e7..b047afd0489 100644 --- a/reflex/compiler/disk_cache.py +++ b/reflex/compiler/disk_cache.py @@ -1,45 +1,9 @@ -"""Experimental disk-persisted incremental compile cache (flag-gated). - -Enabled by ``REFLEX_COMPILE_CACHE``. When off (default), nothing changes. - -In the ``reflex run`` edit loop every compile runs in a fresh process (the warm -daemon forks a throwaway child per change), so there is no in-memory state to -reuse across edits. This cache persists the small amount of bookkeeping a fresh -process needs to recompile only what changed and reuse everything else already -on disk. - -**What is persisted** (``.web/reflex_compile_cache.json``): per page, the -``{path: hash}`` of its **dependency set** (the exact source files it read — -own module, markdown/data, component modules in its tree, and the state files it -uses; see ``page_cache.page_dependency_hashes``), its app-wrap key-set, and -whether evaluating it registered new state. App-wide: the genuinely-global -``epoch`` (reflex version + config/lockfiles), the reflex version, and the merged -frontend imports. The manifest stores **no rendered output**: a page's compiled -``.js`` (and its memo files) already live in ``.web`` from the prior compile — a -hit reuses those files untouched, a miss rewrites its own — so the manifest is -pure bookkeeping, never a second render. (This is why it stays small: storing the -rendered ``output_code`` would balloon it for no gain, as it is never read back.) - -**The fast path** (``try_incremental_rebuild``). On a fresh compile it reuses the -manifest when the global inputs match (reflex version, route set, and the global -epoch). Each page is then a hit iff **every file in its recorded dependency set -is byte-unchanged** — so editing one markdown doc or one shared view recompiles -exactly the pages that depend on it, not all of them. The on-disk app-wide files -(app root, contexts, theme, stylesheet, …) stay valid because the epoch and route -set are unchanged. Then: - -- A hit page is left exactly as-is: its frontend ``.js`` is already on disk and - is neither rewritten nor re-evaluated. -- A *miss* page (source changed) is fully recompiled and its files rewritten. -- Stateful pages are recorded in the stateful-pages marker — misses from this - compile, hits from the manifest's ``is_stateful`` flag — so the serving backend - (which re-evaluates them to register their state) sees the complete set. This - process only writes ``.web`` and exits; it never re-evaluates a hit page. - -After recompiling misses, two guards must hold or the whole thing falls back to a -full compile (return False): each miss page's app-wrap key-set and stateful flag -must be unchanged (otherwise the reused on-disk app root would be wrong). A route -add/remove or a global/version change also falls back to a full compile. +"""Disk-persisted incremental compile cache for ``REFLEX_COMPILE_CACHE``. + +The manifest stores only bookkeeping: per-page dependency hashes, app-wrap keys, +statefulness, and app-wide frontend imports. Rendered files stay in ``.web``. +When global inputs and routes still match, unchanged pages are reused from disk +and only dependency-changed pages are recompiled. """ from __future__ import annotations @@ -59,7 +23,7 @@ from collections.abc import Callable, Sequence from pathlib import Path - from reflex_base.plugins import PageDefinition + from reflex_base.plugins import PageContext, PageDefinition from reflex_base.utils.imports import ParsedImportDict from reflex.app import App @@ -110,6 +74,37 @@ def _wrap_key_strs(keys: Any) -> list[str]: return sorted(f"{p}:{n}" for p, n in keys) +def _manifest_page_entry( + page_ctx: PageContext, + component: Any, + state_index: dict[str, Path], + hasher: Callable[[str], str | None], + *, + is_stateful: bool, + root: Path | None = None, +) -> dict[str, Any]: + """Build the manifest entry for one compiled page. + + Args: + page_ctx: The compiled page context. + component: The page component/callable used for dependency discovery. + state_index: The state-context identifier -> file index. + hasher: A memoized path -> content-hash function. + is_stateful: Whether the page registered state during compile. + root: Project root for dependency discovery. Defaults to cwd. + + Returns: + The JSON-able manifest entry for the page. + """ + return { + "dep_hashes": page_cache.page_dependency_hashes( + page_ctx, component, state_index, hasher, root + ), + "app_wrap_keys": _wrap_key_strs(page_ctx.app_wrap_components.keys()), + "is_stateful": is_stateful, + } + + def load_manifest() -> dict[str, Any] | None: """Load the persisted compile manifest, or None if absent/unusable. @@ -143,7 +138,7 @@ def write_manifest( compile_ctx: The completed compile context (all pages compiled). pages: The full list of page definitions that were compiled. install_imports: The **complete** frontend import set the full compile - installed — page imports merged with the app-root (app-wrap, e.g. + installed: page imports merged with the app-root (app-wrap, e.g. the Toaster/``sonner`` provider) and memo-component imports. An incremental rebuild reuses the on-disk app-wide files, so it must install from this complete set, not just the per-page union. @@ -152,7 +147,7 @@ def write_manifest( try: state_index, _ = page_cache.state_dependency_index(root) hasher = page_cache.make_hasher() - epoch = page_cache.global_epoch(root) + epoch = page_cache.global_epoch(root, pages=pages) pages_data: dict[str, Any] = {} for page in pages: @@ -163,13 +158,14 @@ def write_manifest( or page_ctx.output_path is None ): return # incomplete compile -> do not write a partial manifest - pages_data[page.route] = { - "dep_hashes": page_cache.page_dependency_hashes( - page_ctx, page.component, state_index, hasher, root - ), - "app_wrap_keys": _wrap_key_strs(page_ctx.app_wrap_components.keys()), - "is_stateful": page.route in compile_ctx.stateful_routes, - } + pages_data[page.route] = _manifest_page_entry( + page_ctx, + page.component, + state_index, + hasher, + is_stateful=page.route in compile_ctx.stateful_routes, + root=root, + ) manifest = { "schema": _SCHEMA, @@ -195,9 +191,11 @@ def globals_match( The fast rebuild needs the route set unchanged (adding/removing a route changes the shared nav on every page) and the global epoch unchanged (Reflex - version + config/lockfiles). Everything else is decided per page via its - dependency set, so a shared-component or markdown edit no longer blocks the - fast path — only the pages that actually depend on the changed file miss. + version + config/lockfiles + the app-level config files: the entrypoint and + the theme/app-wrap/stylesheet modules it imports, which configure the app-wide + files reused on disk). Everything else is decided per page via its dependency + set, so a shared-component or markdown edit no longer blocks the fast path. + Only the pages that depend on the changed file miss. Args: manifest: The loaded manifest. @@ -205,7 +203,7 @@ def globals_match( epoch: The current global epoch (see :func:`page_cache.global_epoch`). Returns: - True iff the global inputs match. + True if the global inputs match. """ return ( manifest.get("reflex_version") == page_cache._reflex_version() @@ -222,7 +220,7 @@ def partition_pages( """Return the pages whose dependency set changed since the manifest. Globals are assumed already matched (see :func:`globals_match`), so a page is - a hit iff every file in its recorded dependency set is byte-unchanged. + a hit if every file in its recorded dependency set is byte-unchanged. Args: pages: The current page definitions. @@ -278,7 +276,7 @@ def try_incremental_rebuild( pages = list(app._unevaluated_pages.values()) routes = {p.route for p in pages} hasher = page_cache.make_hasher() - epoch = page_cache.global_epoch(root) + epoch = page_cache.global_epoch(root, pages=pages) if not globals_match(manifest, routes=routes, epoch=epoch): return False @@ -370,8 +368,8 @@ def try_incremental_rebuild( install_imports, page_ctx.frontend_imports, memo_imports ) - # Record which routes are stateful — miss pages from this compile, hit pages - # from the manifest — so the stateful-pages marker is complete. We do NOT + # Record which routes are stateful: miss pages from this compile, hit pages + # from the manifest, so the stateful-pages marker is complete. We do NOT # re-evaluate hit pages to register their state in this process: it only # produces .web and exits (the daemon, the initial compile, and CLI compiles # never serve), and the serving backend re-evaluates the marked stateful @@ -399,7 +397,7 @@ def try_incremental_rebuild( frontend_skeleton.update_entry_client() # Refresh the manifest for the next process. - _update_manifest_for_misses(manifest, miss_ctx, miss_pages) + _update_manifest_for_misses(manifest, miss_ctx, miss_pages, install_imports, root) return True @@ -408,6 +406,8 @@ def _update_manifest_for_misses( manifest: dict[str, Any], miss_ctx: CompileContext | None, miss_pages: Sequence[PageDefinition], + all_imports: ParsedImportDict, + root: Path | None = None, ) -> None: """Update the on-disk manifest entries for the recompiled pages. @@ -415,23 +415,24 @@ def _update_manifest_for_misses( manifest: The loaded manifest (mutated and rewritten). miss_ctx: The compile context of the recompiled pages, if any. miss_pages: The recompiled page definitions. + all_imports: The complete frontend import set after recompiling misses. + root: Project root for dependency discovery. Defaults to cwd. """ if miss_ctx is None or not miss_pages: return try: - state_index, _ = page_cache.state_dependency_index() + state_index, _ = page_cache.state_dependency_index(root) hasher = page_cache.make_hasher() - all_imports = _deserialize_imports(manifest["all_imports"]) for page in miss_pages: page_ctx = miss_ctx.compiled_pages[page.route] - manifest["pages"][page.route] = { - "dep_hashes": page_cache.page_dependency_hashes( - page_ctx, page.component, state_index, hasher - ), - "app_wrap_keys": _wrap_key_strs(page_ctx.app_wrap_components.keys()), - "is_stateful": page.route in miss_ctx.stateful_routes, - } - all_imports = merge_imports(all_imports, page_ctx.frontend_imports) + manifest["pages"][page.route] = _manifest_page_entry( + page_ctx, + page.component, + state_index, + hasher, + is_stateful=page.route in miss_ctx.stateful_routes, + root=root, + ) manifest["all_imports"] = _serialize_imports(all_imports) _manifest_path().write_text(json.dumps(manifest), encoding="utf-8") except Exception as exc: # best-effort diff --git a/reflex/compiler/page_cache.py b/reflex/compiler/page_cache.py index 2bd3da8b466..e9fdf234631 100644 --- a/reflex/compiler/page_cache.py +++ b/reflex/compiler/page_cache.py @@ -1,38 +1,9 @@ -"""Per-page dependency graph for the incremental compile cache (flag-gated). - -Enabled by ``REFLEX_COMPILE_CACHE``. When off (the default), the compiler -behaves exactly as before. - -**A Salsa-style dependency graph.** Each page records the *exact set of source -files it actually read*, so a change invalidates only the pages that depend on it -(not all pages). A page's dependency set is the union of: - -- the **transitive first-party ``.py`` import closure** of its defining module - (``page_py_dependencies`` — captures function-based views and shared helpers - that never appear as nodes in the rendered tree, e.g. a ``def hero()`` view, so - editing one invalidates exactly the pages whose closure imports it), -- the **source files read while evaluating it** (markdown/data — captured by - ``record_reads`` via the per-page read recorder; this is what lets editing one - ``.md`` doc page recompile only that page), -- the **component modules in its rendered tree** (``component_module_files`` — - belt-and-suspenders for components injected at runtime rather than statically - imported), -- the **fine-grained state files** it references (``used_state_files``). - -A page is unchanged iff every file in its dependency set is byte-unchanged and -the small genuinely-global ``global_epoch`` (Reflex version + ``rxconfig`` + -lockfile) is unchanged; adding/removing a route is handled separately (it changes -shared nav). Per-page dependency sets also track files *outside* the project root -(e.g. the docs site reads markdown from a sibling directory), so an -external-source edit still invalidates exactly the dependent pages. These content -hashes and dependency sets are what ``reflex/compiler/disk_cache.py`` persists and -compares each compile to recompile only the pages whose source changed. - -Two residual gaps the static graph cannot see: runtime ``importlib`` imports and -data files read at *module-import* time (outside the per-page eval window). An -edit reached only through one of those would not invalidate its page; such -patterns are rare in page modules, and a global/version change still forces a -full recompile. +"""Per-page dependency discovery for the incremental compile cache. + +Each page records the first-party Python import closure of its page callable, +files read during page evaluation, component modules in its rendered tree, and +fine-grained state files it references. ``disk_cache`` persists hashes of that +set so only pages whose dependencies changed need to be recompiled. """ from __future__ import annotations @@ -41,7 +12,7 @@ import contextlib import hashlib import re -from collections.abc import Callable +from collections.abc import Callable, Sequence from contextvars import ContextVar from importlib import metadata from pathlib import Path @@ -77,13 +48,9 @@ def _reflex_version() -> str: return "unknown" -# --- Per-page read tracking (the Salsa "input read" seam) ------------------- # A page's markdown/data dependencies are read lazily while the page is -# evaluated (e.g. ``Path(doc).read_text()`` inside the page callable). We record -# those reads per page so the cache depends on the exact files consumed. Patches -# are installed once (idempotent) and only record while a recording set is active -# on the current task (set by ``record_reads``), so the overhead is a contextvar -# read when no cache is running. +# evaluated. Track those reads only while ``record_reads`` is active, so the +# idle overhead is one contextvar read. _active_reads: ContextVar[set[str] | None] = ContextVar("_active_reads", default=None) _patched = False @@ -188,15 +155,54 @@ def record_reads(): _active_reads.reset(token) -def global_epoch(root: Path | None = None) -> str: - """Fingerprint the genuinely-global inputs (Reflex version + config/lockfiles). +def _app_entrypoint_file(root: Path | None = None) -> Path | None: + """Resolve the user's app entrypoint module file (where ``rx.App`` is built). + + App-wide inputs like theme, app wraps, the toaster, stylesheets, and head + components are configured here, not in any single page's + dependency set. This is the root of :func:`app_dependency_files`, which walks + its imports (stopping at page modules) to find the config-only modules whose + change must invalidate the reused on-disk app-wide files. + + Args: + root: Project root; only a file under it is returned. Defaults to cwd. + + Returns: + The resolved entrypoint file path under ``root``, or None if it can't be + determined (no app module imported, or it lives outside ``root``). + """ + import sys + + try: + from reflex.config import get_config + + module = get_config().module + except Exception: + return None + file = getattr(sys.modules.get(module), "__file__", None) + if not file: + return None + rf = Path(file).resolve() + root = (root or Path.cwd()).resolve() + return rf if root in rf.parents else None + + +def global_epoch( + root: Path | None = None, *, pages: Sequence[object] | None = None +) -> str: + """Fingerprint the genuinely-global inputs. These can affect every page's output but belong to no single page, so they - gate the whole cache rather than any one page's dependency set. Kept small on - purpose — per-file edits flow through per-page dependency sets instead. + gate the whole cache rather than any one page's dependency set: the Reflex + version, the config/lockfiles, and the app-level config files: the app + entrypoint module plus the config-only modules it imports (theme, app-wraps, + stylesheets, head components; see :func:`app_dependency_files`). Kept small + on purpose; per-file edits flow through per-page dependency sets instead. Args: root: Project root. Defaults to cwd. + pages: The current page definitions, used as barriers so page modules + (tracked per page) are excluded from the app-level config files. Returns: A hex digest of the global inputs. @@ -209,16 +215,16 @@ def global_epoch(root: Path | None = None) -> str: parts.append(f"{name}={hashlib.sha256(path.read_bytes()).hexdigest()}") except OSError: parts.append(f"{name}=<absent>") + # Sorted for a deterministic digest regardless of set iteration order. + for path_str in sorted(app_dependency_files(pages, root)): + try: + digest = hashlib.sha256(Path(path_str).read_bytes()).hexdigest() + except OSError: + digest = "<absent>" + parts.append(f"app:{path_str}={digest}") return _sha(*parts) -# In-process per-page cache. Each page is keyed by the genuinely-global epoch -# plus the content hashes of its exact dependency set, so editing one file -# misses only the pages that depend on it. Contributions to the app-wide -# aggregates include live Python objects (root_component, memo defs), so the -# store is in-process only. - - def _module_file(component: object) -> Path | None: import sys @@ -232,10 +238,9 @@ def component_module_files( ) -> set[Path]: """Resolve the first-party module files of every component in a tree. - Walks the rendered component tree and collects the defining module file of - each component class under ``root``. This is the precise, barrel-immune way - (vs. static imports) to capture which shared views/templates a page renders: - editing one invalidates exactly the pages whose tree contains it. + Walks the rendered component tree and collects each component class module + under ``root``. This catches component dependencies that static imports may + miss. Args: root_component: The page's root component (its rendered tree). @@ -276,12 +281,36 @@ def _resolve_module_file(name: str) -> str | None: return str(Path(file).resolve()) if file else None +def _loaded_first_party_modules(root: Path) -> dict[str, str]: + """Map first-party module files to module names. + + Args: + root: Resolved project root. + + Returns: + A mapping of resolved file path -> module name for loaded modules under + ``root``. + """ + import sys + + file_to_mod: dict[str, str] = {} + for name, mod in list(sys.modules.items()): + file = getattr(mod, "__file__", None) + if not file: + continue + rf = Path(file).resolve() + if root in rf.parents: + file_to_mod[str(rf)] = name + return file_to_mod + + def _import_from_targets(node: object, modname: str) -> list[str]: """Resolve a ``from ... import ...`` node to candidate module names. Handles relative imports via the importing module's package. Returns the - base module and each ``base.name`` (a name may be a submodule or an attribute - — both candidates are resolved against ``sys.modules`` by the caller). + base module and each ``base.name``. A name may be a submodule or an + attribute; both candidates are resolved against ``sys.modules`` by the + caller. Args: node: An ``ast.ImportFrom`` node. @@ -304,16 +333,45 @@ def _import_from_targets(node: object, modname: str) -> list[str]: return [base, *(f"{base}.{a.name}" for a in node.names)] +def _module_import_edges( + file: str, modname: str, file_to_mod: dict[str, str] +) -> set[str]: + """Return first-party files imported by one module. + + Args: + file: The resolved module file path. + modname: The module's dotted name. + file_to_mod: Loaded first-party module files. + + Returns: + The resolved first-party files imported by ``file``. + """ + import ast + + deps: set[str] = set() + try: + tree = ast.parse(Path(file).read_bytes()) + except (OSError, SyntaxError, ValueError): + return deps + for node in ast.walk(tree): + names: list[str] = [] + if isinstance(node, ast.Import): + names = [a.name for a in node.names] + elif isinstance(node, ast.ImportFrom): + names = _import_from_targets(node, modname) + for name in names: + target = _resolve_module_file(name) + if target is not None and target in file_to_mod: + deps.add(target) + return deps + + def build_import_graph(root: Path | None = None) -> dict[str, set[str]]: """Build the first-party import graph (file -> files it imports). - Parses every already-imported first-party module's source for ``import`` and - ``from`` statements and resolves them to files under ``root`` via - ``sys.modules``. Cached per root for the duration of the process. This is the - sound basis for per-page ``.py`` dependencies: a function (e.g. a view like - ``hero()``) can only affect a page if its module is transitively imported by - the page's module, so it appears in the page's import closure even though it - is never a node in the rendered tree. + Parses already-imported first-party modules and resolves their imports to + files under ``root`` via ``sys.modules``. Cached per root for the duration of + the process. Args: root: Project root. Defaults to cwd. @@ -321,42 +379,16 @@ def build_import_graph(root: Path | None = None) -> dict[str, set[str]]: Returns: A mapping of resolved file path -> the set of first-party files it imports. """ - import ast - import sys - root = (root or Path.cwd()).resolve() cached = _import_graph_cache.get(root) if cached is not None: return cached - file_to_mod: dict[str, str] = {} - for name, mod in list(sys.modules.items()): - file = getattr(mod, "__file__", None) - if not file: - continue - rf = Path(file).resolve() - if root in rf.parents: - file_to_mod[str(rf)] = name - - graph: dict[str, set[str]] = {} - for file, modname in file_to_mod.items(): - deps: set[str] = set() - try: - tree = ast.parse(Path(file).read_bytes()) - except (OSError, SyntaxError, ValueError): - graph[file] = deps - continue - for node in ast.walk(tree): - names: list[str] = [] - if isinstance(node, ast.Import): - names = [a.name for a in node.names] - elif isinstance(node, ast.ImportFrom): - names = _import_from_targets(node, modname) - for n in names: - target = _resolve_module_file(n) - if target is not None and target in file_to_mod: - deps.add(target) - graph[file] = deps + file_to_mod = _loaded_first_party_modules(root) + graph = { + file: _module_import_edges(file, modname, file_to_mod) + for file, modname in file_to_mod.items() + } _import_graph_cache[root] = graph return graph @@ -366,16 +398,41 @@ def clear_import_graph() -> None: _import_graph_cache.clear() +def _component_source_files(component: object, root: Path) -> set[str]: + """The component callable's own defining files under ``root``. + + The callable's *real* code filename (``__code__``, correct even when + ``__module__`` was reassigned, as the docs app does for generated pages) plus + its module file. These are the roots a page's import closure walks from, and + the barriers the app-config walk stops at. + + Args: + component: The page component or callable. + root: Resolved project root; only files under it are returned. + + Returns: + The set of resolved defining file path strings under ``root``. + """ + out: set[str] = set() + code = getattr(component, "__code__", None) + filename = getattr(code, "co_filename", None) + own = _module_file(component) + for path in (filename, own): + if path: + rf = Path(path).resolve() + if root in rf.parents: + out.add(str(rf)) + return out + + def page_py_dependencies( component: BaseComponent | object, root: Path | None = None ) -> set[str]: """Return the transitive first-party ``.py`` files a page's code depends on. - Starts from the page callable's *real* defining file (``__code__`` filename, - which is correct even when ``__module__`` was reassigned, as the docs app does - for generated doc pages) plus its module file, and walks the import graph. - Captures function-based views and shared helpers that the rendered-tree walk - cannot see. + Starts from the page callable's code filename plus its module file, then + walks the import graph. This captures function-based views and shared + helpers that the rendered-tree walk cannot see. Args: component: The page component or callable. @@ -387,21 +444,8 @@ def page_py_dependencies( root = (root or Path.cwd()).resolve() graph = build_import_graph(root) - start: set[str] = set() - code = getattr(component, "__code__", None) - filename = getattr(code, "co_filename", None) - if filename: - rf = Path(filename).resolve() - if root in rf.parents: - start.add(str(rf)) - own = _module_file(component) - if own is not None: - rf = own.resolve() - if root in rf.parents: - start.add(str(rf)) - seen: set[str] = set() - stack = list(start) + stack = list(_component_source_files(component, root)) while stack: cur = stack.pop() if cur in seen: @@ -411,6 +455,57 @@ def page_py_dependencies( return seen +def app_dependency_files( + pages: Sequence[object] | None = None, root: Path | None = None +) -> set[str]: + """First-party files whose change affects app-level config (not any page). + + Walks the first-party import graph from the app entrypoint + (:func:`_app_entrypoint_file`), treating each page-defining module as a + barrier (not entered), so the result is the entrypoint plus the config-only + modules it imports, such as theme, app-wraps, stylesheets, head components, and + never page modules or their deep dependencies, which are tracked per page. A + config module shared with a page is still captured (it is reached from the + entrypoint directly, not through the barrier). + + These configure the app-wide files an incremental rebuild reuses on disk + (app root, contexts, theme, stylesheet), so they are folded into + :func:`global_epoch`: editing one forces a full recompile instead of leaving + those files stale. + + Args: + pages: The current page definitions, used as traversal barriers. When + None (no page set available), no barriers apply. + root: Project root. Defaults to cwd. + + Returns: + The set of resolved app-config dependency file path strings, or empty if + the entrypoint can't be resolved. + """ + root = (root or Path.cwd()).resolve() + entrypoint = _app_entrypoint_file(root) + if entrypoint is None: + return set() + file_to_mod = _loaded_first_party_modules(root) + barriers: set[str] = set() + for page in pages or (): + barriers |= _component_source_files(getattr(page, "component", None), root) + + start = str(entrypoint) + seen = {start} + stack = [start] + while stack: + cur = stack.pop() + modname = file_to_mod.get(cur) + if modname is None: + continue + for dep in _module_import_edges(cur, modname, file_to_mod): + if dep not in seen and dep not in barriers: + seen.add(dep) + stack.append(dep) + return seen + + def make_hasher() -> Callable[[str], str | None]: """Return a content-hasher that memoizes each path within one compile. @@ -500,12 +595,12 @@ def used_state_files( """Return the fine-grained state files a compiled page depends on. Stateful subtrees are auto-memoized into separate components, so a page's - own ``output_code`` may not name the state it uses — the state lives in the - memo components it *owns* (its ``memo_contributions``). Each stateful memo + own ``output_code`` may not name the state it uses; the state lives in the + memo components it owns (its ``memo_contributions``). Each stateful memo is owned by exactly one page, which regenerates it whenever it recompiles, so scanning ``output_code`` plus the page's own memo components captures the full dependency set. If a memo can't be introspected, depend on every fine - state file (conservative — never stale). + state file. Args: output_code: The page's compiled JS. @@ -606,6 +701,6 @@ def deps_unchanged( hasher: A memoized path -> content-hash function. Returns: - True iff every dependency file is byte-unchanged. + True if every dependency file is byte-unchanged. """ return all(hasher(path) == digest for path, digest in dep_hashes.items()) diff --git a/reflex/compiler/utils.py b/reflex/compiler/utils.py index 9c38dcb1149..bb768023fd5 100644 --- a/reflex/compiler/utils.py +++ b/reflex/compiler/utils.py @@ -801,10 +801,8 @@ def add_meta( children.append(Description.create(content=description)) children.append(Image.create(content=image)) - # Own-before-mutate: the page root may be a shared instance (the construction - # cache reuses identical static subtrees across pages), so build a fresh copy - # with the metadata appended instead of mutating ``children`` in place — - # otherwise repeated reuse accumulates duplicate <title>/<meta> tags. + # Page roots may be shared by the construction cache; copy before appending + # metadata so repeated reuse does not accumulate duplicate tags. new_page = copy.copy(page) new_page.children = [*page.children, *children, *meta_tags] new_page._clear_compile_caches() @@ -838,10 +836,7 @@ def write_file(path: str | Path, code: str): path.parent.mkdir(parents=True, exist_ok=True) if path.exists() and path.read_text(encoding="utf-8") == code: return - # Write atomically (temp file + os.replace) so a reader watching this tree — - # e.g. the vite dev server, or a concurrent compile — never observes a - # half-written file, even if the writing process is killed mid-write (the - # compile daemon forks throwaway children that may be terminated mid-compile). + # Write atomically so readers never observe a half-written file. tmp = path.with_name(f"{path.name}.{os.getpid()}.tmp") try: tmp.write_text(code, encoding="utf-8") diff --git a/reflex/reflex.py b/reflex/reflex.py index 61cc3c5415e..24389510982 100644 --- a/reflex/reflex.py +++ b/reflex/reflex.py @@ -207,11 +207,8 @@ def _run_dev( running_mode.has_backend(), )) - # When the compile cache is enabled, a persistent warm compile daemon owns - # .web regeneration (fork-per-compile, no per-edit cold import) instead of - # the backend worker recompiling on every respawn. The backend then only - # evaluates pages to register state — REFLEX_SKIP_COMPILE makes it skip the - # frontend write, and is inherited by every respawned reload worker. + # The compile daemon owns .web regeneration; backend reload workers only + # evaluate pages to register state. from reflex_base.environment import environment if running_mode.has_frontend() and environment.REFLEX_COMPILE_CACHE.get(): diff --git a/reflex/utils/compile_daemon.py b/reflex/utils/compile_daemon.py index 6d18638a3fb..8d1d20525db 100644 --- a/reflex/utils/compile_daemon.py +++ b/reflex/utils/compile_daemon.py @@ -1,39 +1,9 @@ -"""Persistent warm compile daemon for fast dev hot reloads (fork-per-compile). - -Active in ``reflex run`` dev when ``REFLEX_COMPILE_CACHE`` is set. Instead of the -granian/uvicorn reloader respawning a worker that cold-imports reflex + -reflex-base + the app's heavy deps (pandas/plotly/…) on every ``.py`` change, -this daemon imports everything **once** and stays warm. On each source change it -``fork()``s a throwaway child that: - -1. purges the user's first-party modules from ``sys.modules`` and resets the - cross-module state/page registries (so it is, for first-party code, a clean - interpreter — no stale ``__subclasses__`` zombies, no duplicate-substate - shadowing, no ``add_page`` re-registration error); -2. re-imports the user app fresh (third-party deps stay warm, inherited via - copy-on-write fork) and runs ``compiler.compile_app`` with the cache on, which - recompiles only the dependency-changed pages via - ``disk_cache.try_incremental_rebuild`` — the child *is* the fresh worker that - path was built for, but warm; -3. writes only the changed ``.web`` files (atomically) and ``os._exit``s. - -Because every compile runs in a child that is discarded, reload corruption can -never accumulate: correctness is the same as today's respawn, but the multi-second -cold import is paid once instead of on every edit. - -The daemon owns the file watcher, so it watches exactly what the compiler reads — -``.py``/``.md``/``.mdx`` under the app source roots **plus** every external/sibling -content file recorded in the compile manifest's per-page dependency sets (where a -docs app's markdown lives) plus ``rxconfig``/lockfiles/``assets`` — fixing the -long-standing gap where markdown edits (and sibling-dir reads) never triggered a -reload because the reloaders watch only ``*.py`` under the app dir. - -The watcher is a single-threaded poll loop on purpose: ``fork()`` from a -multi-threaded process is unsafe, so the daemon must hold no background threads. - -On Windows (no ``os.fork``) each compile runs in a fresh spawned subprocess -instead — correct, but without copy-on-write warmth (parity with today's latency); -the watcher/markdown fix applies identically. +"""Warm compile daemon for ``REFLEX_COMPILE_CACHE`` dev hot reloads. + +The daemon imports the app once, then compiles each change in an isolated child. +On POSIX it forks so third-party imports stay warm; otherwise it falls back to a +fresh subprocess. It also owns the watch loop so markdown and external content +dependencies recorded in the compile manifest trigger rebuilds. """ from __future__ import annotations @@ -58,7 +28,7 @@ #: After a change is seen, wait this long and re-snapshot so a burst of saves #: (e.g. format-on-save touching many files) collapses into a single compile. _DEBOUNCE = 0.05 -#: How often to re-walk the tree (``rglob``) to discover added/removed files. +#: How often to re-walk the tree to discover added/removed files. #: Between rescans we only ``stat`` the known set; adds are rarer and tolerate a #: little latency, so this keeps idle CPU low while polling fast for edits. _RESCAN_INTERVAL = 1.0 @@ -149,14 +119,17 @@ def _iter_source_files(root: Path): if root.suffix in _WATCH_SUFFIXES: yield root.resolve() return - for path in root.rglob("*"): - if path.is_dir(): - continue - rel_parts = path.relative_to(root).parts[:-1] - if any(part in _SKIP_DIRS or part.startswith(".") for part in rel_parts): - continue - if path.suffix in _WATCH_SUFFIXES: - yield path.resolve() + for dirpath, dirnames, filenames in os.walk(root): + dirnames[:] = [ + name + for name in dirnames + if name not in _SKIP_DIRS and not name.startswith(".") + ] + base = Path(dirpath) + for name in filenames: + path = base / name + if path.suffix in _WATCH_SUFFIXES: + yield path.resolve() def _external_dependency_files(roots: list[Path]) -> set[Path]: @@ -164,7 +137,7 @@ def _external_dependency_files(roots: list[Path]) -> set[Path]: The manifest records each page's full dependency set (own module, markdown, component/state modules). Any dependency that lives *outside* the reload - roots — e.g. a docs app's markdown in a sibling directory — is invisible to + roots, such as a docs app's markdown in a sibling directory, is invisible to ``get_reload_paths`` and must be watched explicitly so editing it rebuilds. Args: @@ -261,7 +234,7 @@ def _first_party_module_names(roots: list[Path]) -> set[str]: """Names of all loaded modules belonging to the user's first-party packages. First-party top-level package names are inferred from the *regular* modules - whose ``__file__`` resolves under a reload root (a plain attribute read — no + whose ``__file__`` resolves under a reload root (a plain attribute read, no namespace-package ``__path__`` recalculation, which is lazy and would break while ``sys.modules`` is being mutated). Every loaded module sharing one of those top-level names is then first-party, which captures namespace packages @@ -293,10 +266,8 @@ def _reset_first_party(roots: list[Path]) -> None: """Make this interpreter clean w.r.t. first-party code before re-importing. Purges the user's first-party modules from ``sys.modules`` and clears the - cross-module registries/caches that would otherwise pin the old class - objects (the ``__subclasses__`` zombie / duplicate-substate / stale-page - hazards). Third-party modules (reflex, reflex-base, pandas, …) are left - imported and warm. + cross-module registries/caches that would otherwise pin old class objects. + Third-party modules are left imported and warm. Args: roots: The resolved reload roots whose modules are first-party. @@ -365,7 +336,7 @@ def _child_compile(roots: list[Path], prerender_routes: bool) -> None: """Reset first-party state, re-import the app fresh, and compile incrementally. Runs in a forked child (POSIX) or a one-shot subprocess (Windows). Must not - return normally on error — the caller maps the exit code to success/failure. + return normally on error; the caller maps the exit code to success/failure. Args: roots: The resolved reload roots. @@ -396,7 +367,7 @@ def _await_child(pid: int) -> bool: Returns: True if it exited 0; False on failure, timeout, or signal (a - signal-killed child — e.g. Ctrl-C during shutdown — is a quiet False). + signal-killed child, such as Ctrl-C during shutdown, is a quiet False). """ deadline = time.monotonic() + _COMPILE_TIMEOUT while True: @@ -417,7 +388,7 @@ def _can_fork() -> bool: """Whether forking is safe right now (POSIX and the process is single-threaded). Forking a multi-threaded process and then running Python (not exec) inherits - locks held by threads that don't exist in the child — a classic deadlock. The + locks held by threads that don't exist in the child. The user app, imported warm in the parent, may have started a background thread at import time, so this is checked per compile. @@ -479,7 +450,7 @@ def _serve() -> None: # Warm import + initial compile (writes .web + the manifest); keeps the app # and its third-party deps resident for copy-on-write children. A failure - # here (e.g. the app is mid-edit and broken) must NOT kill the daemon — fall + # here (e.g. the app is mid-edit and broken) must NOT kill the daemon; fall # through to the watch loop so the next edit that fixes it recompiles. try: with console.timing("Compile daemon: initial compile"): @@ -499,7 +470,7 @@ def _serve() -> None: # the manifest; recompute only after a compile, not on every poll tick. external = _external_dependency_files(roots) global_files = _global_files(root) - # `paths` is the watched set, refreshed by an rglob rescan; each tick only + # `paths` is the watched set, refreshed by a tree rescan; each tick only # re-stats it (cheap), so the poll can be fast without burning idle CPU. paths = _watch_paths(roots, root, external) snapshot = _snapshot(paths) diff --git a/tests/units/compiler/test_disk_cache.py b/tests/units/compiler/test_disk_cache.py index 660052df758..aa9c4b80b24 100644 --- a/tests/units/compiler/test_disk_cache.py +++ b/tests/units/compiler/test_disk_cache.py @@ -1,8 +1,10 @@ """Tests for the experimental disk-persisted incremental compile cache.""" import dataclasses +import json from collections.abc import Callable, Sequence -from typing import Any +from types import SimpleNamespace +from typing import Any, cast from reflex_base.components.component import Component from reflex_base.plugins import CompileContext, CompilerHooks @@ -150,7 +152,7 @@ def test_unchanged_pages_compile_identically(tmp_path, monkeypatch): """The reuse correctness property: an unchanged page recompiles byte-for-byte. The disk cache leaves a hit page's already-on-disk ``.web`` file untouched, so - reuse is correct iff a fresh compile of that page yields identical output. + reuse is correct if a fresh compile of that page yields identical output. Compile A, B, C; then compile A, B(edited), C; A and C must be byte-identical. """ web = tmp_path / ".web" @@ -220,8 +222,6 @@ def test_incremental_rebuild_all_hits(tmp_path, monkeypatch): def test_incremental_rebuild_one_miss_writes_only_that_page(tmp_path, monkeypatch): - import json - from reflex.compiler import utils as compiler_utils web = tmp_path / ".web" @@ -273,8 +273,6 @@ def test_stateful_hit_is_marked_but_not_reevaluated(tmp_path, monkeypatch): re-evaluates the marked stateful pages itself, so re-evaluating them during the incremental rebuild was pure waste. """ - import json - web = tmp_path / ".web" web.mkdir() monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) @@ -303,17 +301,38 @@ def test_stateful_hit_is_marked_but_not_reevaluated(tmp_path, monkeypatch): ) is True ) - # Not re-evaluated... assert reevaluated == [] - # ...but still recorded as stateful so the backend's marker is complete. assert route in app._stateful_pages def test_load_manifest_rejects_wrong_schema(tmp_path, monkeypatch): - import json - web = tmp_path / ".web" web.mkdir() monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) (web / disk_cache._MANIFEST_FILE).write_text(json.dumps({"schema": 999})) assert disk_cache.load_manifest() is None + + +def test_update_manifest_for_misses_keeps_complete_imports(tmp_path, monkeypatch): + web = tmp_path / ".web" + web.mkdir() + monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + monkeypatch.setattr( + page_cache, "state_dependency_index", lambda root=None: ({}, set()) + ) + monkeypatch.setattr(page_cache, "page_dependency_hashes", lambda *a, **k: {}) + + page = _FakePage(route="/a", component=_page_a) + page_ctx = SimpleNamespace(app_wrap_components={}, frontend_imports={}) + miss_ctx = SimpleNamespace(compiled_pages={"/a": page_ctx}, stateful_routes=set()) + complete_imports = {"memo-lib": [ImportVar("MemoThing")]} + manifest = _manifest({ + "/a": {"dep_hashes": {}, "app_wrap_keys": [], "is_stateful": False} + }) + + disk_cache._update_manifest_for_misses( + manifest, cast(Any, miss_ctx), [page], complete_imports, root=tmp_path + ) + + written = json.loads((web / disk_cache._MANIFEST_FILE).read_text()) + assert disk_cache._deserialize_imports(written["all_imports"]) == complete_imports diff --git a/tests/units/compiler/test_page_cache.py b/tests/units/compiler/test_page_cache.py index b6b33f5e3ef..dcd908788d1 100644 --- a/tests/units/compiler/test_page_cache.py +++ b/tests/units/compiler/test_page_cache.py @@ -15,6 +15,201 @@ def test_global_epoch_tracks_global_files(tmp_path): assert page_cache.global_epoch(root=tmp_path) != epoch +def test_global_epoch_tracks_app_entrypoint(tmp_path, monkeypatch): + """Editing the app entrypoint (theme/app_wraps live there) bumps the epoch. + + App-wide config is configured where ``rx.App`` is built, not in any page's + dependency set, so without this an edit to it would leave every page a hit + and the reused on-disk app root / contexts / theme stale. + """ + (tmp_path / "rxconfig.py").write_text("config = 1\n") + entrypoint = (tmp_path / "myapp.py").resolve() + entrypoint.write_text("app = rx.App(theme=light)\n") + monkeypatch.setattr( + page_cache, "_app_entrypoint_file", lambda root=None: entrypoint + ) + epoch = page_cache.global_epoch(root=tmp_path) + # editing the app entrypoint DOES change the epoch + entrypoint.write_text("app = rx.App(theme=dark)\n") + assert page_cache.global_epoch(root=tmp_path) != epoch + + +def test_app_entrypoint_file_resolution(tmp_path, monkeypatch): + import sys + from types import ModuleType, SimpleNamespace + + monkeypatch.setattr( + "reflex.config.get_config", lambda: SimpleNamespace(module="fake_entry_mod") + ) + mod = ModuleType("fake_entry_mod") + monkeypatch.setitem(sys.modules, "fake_entry_mod", mod) + + # no __file__ on the module -> None + assert page_cache._app_entrypoint_file(root=tmp_path) is None + + # a file under the project root -> resolved + entry = tmp_path / "myapp" / "myapp.py" + entry.parent.mkdir() + entry.write_text("app = 1\n") + mod.__file__ = str(entry) + assert page_cache._app_entrypoint_file(root=tmp_path) == entry.resolve() + + # a file outside the project root -> None (not a project input) + outside = tmp_path.parent / "elsewhere.py" + mod.__file__ = str(outside) + assert page_cache._app_entrypoint_file(root=tmp_path) is None + + +def test_app_dependency_files_keeps_config_excludes_pages(tmp_path, monkeypatch): + """The entrypoint's config closure, with page modules as traversal barriers. + + entry imports a config-only ``theme.py`` and a page; the page imports a view + and (also) theme. The result must keep entry + theme (theme is reached from + the entrypoint directly, even though a page imports it too) and exclude the + page module and its view (tracked per page). + """ + from types import SimpleNamespace + + entry = tmp_path / "myapp.py" + theme = tmp_path / "theme.py" + page = tmp_path / "pages" / "index.py" + view = tmp_path / "components" / "hero.py" + graph = { + str(entry): {str(theme), str(page)}, + str(page): {str(view), str(theme)}, + str(theme): set(), + str(view): set(), + } + monkeypatch.setattr(page_cache, "_app_entrypoint_file", lambda root=None: entry) + monkeypatch.setattr( + page_cache, + "_loaded_first_party_modules", + lambda root: { + str(entry): "myapp", + str(theme): "theme", + str(page): "pages.index", + str(view): "components.hero", + }, + ) + monkeypatch.setattr( + page_cache, + "_module_import_edges", + lambda file, modname, file_to_mod: graph[file], + ) + monkeypatch.setattr( + page_cache, "_component_source_files", lambda comp, root: {str(page)} + ) + + result = page_cache.app_dependency_files( + [SimpleNamespace(component=object())], root=tmp_path + ) + assert result == {str(entry), str(theme)} + + +def test_global_epoch_excludes_page_modules(tmp_path, monkeypatch): + """A page-module edit keeps the epoch (incremental); an app-config edit bumps it.""" + from types import SimpleNamespace + + (tmp_path / "rxconfig.py").write_text("c = 1\n") + entry = tmp_path / "myapp.py" + entry.write_text("app = 1\n") + theme = tmp_path / "theme.py" + theme.write_text("t = 1\n") + page = tmp_path / "index.py" + page.write_text("p = 1\n") + graph = {str(entry): {str(theme), str(page)}, str(page): set(), str(theme): set()} + monkeypatch.setattr(page_cache, "_app_entrypoint_file", lambda root=None: entry) + monkeypatch.setattr( + page_cache, + "_loaded_first_party_modules", + lambda root: { + str(entry): "myapp", + str(theme): "theme", + str(page): "index", + }, + ) + monkeypatch.setattr( + page_cache, + "_module_import_edges", + lambda file, modname, file_to_mod: graph[file], + ) + monkeypatch.setattr( + page_cache, "_component_source_files", lambda comp, root: {str(page)} + ) + pages = [SimpleNamespace(component=object())] + + epoch = page_cache.global_epoch(root=tmp_path, pages=pages) + # editing a page module does NOT change the epoch (tracked per page instead) + page.write_text("p = 2\n") + assert page_cache.global_epoch(root=tmp_path, pages=pages) == epoch + # editing app-level config the entrypoint imports (theme) DOES + theme.write_text("t = 2\n") + assert page_cache.global_epoch(root=tmp_path, pages=pages) != epoch + + +def test_global_epoch_hashes_only_app_dependency_closure(tmp_path, monkeypatch): + """Unrelated first-party modules do not make the app epoch coarse.""" + from types import SimpleNamespace + + entry = tmp_path / "myapp.py" + theme = tmp_path / "theme.py" + unrelated = tmp_path / "unrelated.py" + for path, code in ( + (entry, "import theme\n"), + (theme, "t = 1\n"), + (unrelated, "x = 1\n"), + ): + path.write_text(code) + graph = {str(entry): {str(theme)}, str(theme): set(), str(unrelated): set()} + monkeypatch.setattr(page_cache, "_app_entrypoint_file", lambda root=None: entry) + monkeypatch.setattr( + page_cache, + "_loaded_first_party_modules", + lambda root: { + str(entry): "myapp", + str(theme): "theme", + str(unrelated): "unrelated", + }, + ) + monkeypatch.setattr( + page_cache, + "_module_import_edges", + lambda file, modname, file_to_mod: graph[file], + ) + + epoch = page_cache.global_epoch( + root=tmp_path, pages=[SimpleNamespace(component=object())] + ) + unrelated.write_text("x = 2\n") + assert ( + page_cache.global_epoch( + root=tmp_path, pages=[SimpleNamespace(component=object())] + ) + == epoch + ) + theme.write_text("t = 2\n") + assert ( + page_cache.global_epoch( + root=tmp_path, pages=[SimpleNamespace(component=object())] + ) + != epoch + ) + + +def test_app_dependency_files_skips_graph_without_entrypoint(tmp_path, monkeypatch): + monkeypatch.setattr(page_cache, "_app_entrypoint_file", lambda root=None: None) + + def fail_loaded_first_party_modules(root): + msg = "module map should not be built without an entrypoint" + raise AssertionError(msg) + + monkeypatch.setattr( + page_cache, "_loaded_first_party_modules", fail_loaded_first_party_modules + ) + + assert page_cache.app_dependency_files(root=tmp_path) == set() + + def test_used_state_files_from_output_and_memos(tmp_path): from types import SimpleNamespace diff --git a/tests/units/utils/test_compile_daemon.py b/tests/units/utils/test_compile_daemon.py index 9d7947a8236..cb2bccaf3b6 100644 --- a/tests/units/utils/test_compile_daemon.py +++ b/tests/units/utils/test_compile_daemon.py @@ -168,8 +168,7 @@ class DaemonResetUser(rx.Model, table=True): def test_reset_first_party_purges_modules_and_registries(tmp_path): """``_reset_first_party`` purges first-party modules and clears registries. - Runs in a forked child so the global-registry reset can't corrupt the test - process — exactly how the daemon uses it (a throwaway child per compile). + Runs in a forked child so the registry reset cannot affect the test process. """ mod_file = tmp_path / "fp_module.py" mod_file.write_text("VALUE = 1\n") From 19b2df47d8d565105afd075a728fb9eefa83439a Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Tue, 30 Jun 2026 01:23:53 +0500 Subject: [PATCH 08/18] test(compiler): skip model-metadata daemon test without sqlmodel test_reset_model_metadata_allows_table_redefinition builds an rx.Model(table=True), which needs the SQLModel stack. The "without db dependencies" unit-test job uninstalls it, so the first model definition raised TypeError before the test could exercise the reset. Guard with pytest.importorskip("sqlmodel"), matching the repo's other db-dependent tests, so it skips there instead of failing. --- tests/units/utils/test_compile_daemon.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/units/utils/test_compile_daemon.py b/tests/units/utils/test_compile_daemon.py index cb2bccaf3b6..93e3e75e38a 100644 --- a/tests/units/utils/test_compile_daemon.py +++ b/tests/units/utils/test_compile_daemon.py @@ -129,6 +129,10 @@ def test_reset_model_metadata_allows_table_redefinition(): ``Table '...' is already defined``. Run in a fork so clearing the global metadata can't affect the test process. """ + # ``rx.Model(table=True)`` needs the SQLModel stack; the db-less unit-test + # job uninstalls it, so skip there rather than fail on the first definition. + pytest.importorskip("sqlmodel") + read_fd, write_fd = os.pipe() pid = os.fork() if pid == 0: # child From aac67b2d8a34b6c0d89d4d2a9da93449fb1cbfd8 Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Tue, 30 Jun 2026 01:55:19 +0500 Subject: [PATCH 09/18] docs(changelog): add news fragments for the incremental compile cache Add the news fragments the changelog check requires for the packaged source touched by this PR: a feature entry for the reflex package's REFLEX_COMPILE_CACHE flag and a misc entry for the reflex-base recorder hook / reproducible ref names that support it. --- news/6688.feature.md | 1 + packages/reflex-base/news/6688.misc.md | 1 + 2 files changed, 2 insertions(+) create mode 100644 news/6688.feature.md create mode 100644 packages/reflex-base/news/6688.misc.md diff --git a/news/6688.feature.md b/news/6688.feature.md new file mode 100644 index 00000000000..5ae88d4e82b --- /dev/null +++ b/news/6688.feature.md @@ -0,0 +1 @@ +Added an experimental disk-persisted incremental compile cache, enabled by the `REFLEX_COMPILE_CACHE` environment variable. When on, a fresh compile reuses the previous build already on disk in `.web` and recompiles only the pages whose source changed, tracked via a per-page dependency graph (Python import closure, files read during page evaluation, component modules, and referenced state). App-wide inputs (Reflex version, config/lockfiles, and the app entrypoint's config modules such as theme/app-wraps/stylesheets) gate the whole cache, falling back to a full compile when they change. `reflex run` dev additionally gains a warm fork-per-compile daemon so hot reloads skip the cold reimport and rebuild only what changed. Off by default — the compile path is unchanged when the flag is unset. diff --git a/packages/reflex-base/news/6688.misc.md b/packages/reflex-base/news/6688.misc.md new file mode 100644 index 00000000000..d0cfb39144c --- /dev/null +++ b/packages/reflex-base/news/6688.misc.md @@ -0,0 +1 @@ +Added an optional per-page source-read recorder hook (`page_source_recorder` in the compiler plugin) used by the incremental compile cache to track the exact files each page reads during evaluation, and made auto-generated unique ref names reproducible across in-process compiles so memo content hashes stay stable. No behavior change unless the compile cache is enabled. From 519f885117f8b95325de7da0626fc01764085d01 Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Tue, 30 Jun 2026 02:39:46 +0500 Subject: [PATCH 10/18] feat(compiler): track runtime module imports in compile cache Patch __import__ and importlib.import_module while a page recorder is active so modules imported during page eval are recorded as source dependencies. Resolves names (including relative imports), skips stdlib and out-of-project files, and caches module-file lookups. Lets the incremental compile cache invalidate pages that depend on first-party modules pulled in at import time. --- reflex/compiler/page_cache.py | 223 ++++++++++++++++++++++-- tests/units/compiler/test_page_cache.py | 110 ++++++++++++ 2 files changed, 323 insertions(+), 10 deletions(-) diff --git a/reflex/compiler/page_cache.py b/reflex/compiler/page_cache.py index e9fdf234631..058338bd38b 100644 --- a/reflex/compiler/page_cache.py +++ b/reflex/compiler/page_cache.py @@ -11,7 +11,11 @@ import builtins import contextlib import hashlib +import importlib +import importlib.util +import os import re +import sys from collections.abc import Callable, Sequence from contextvars import ContextVar from importlib import metadata @@ -55,6 +59,8 @@ def _reflex_version() -> str: _active_reads: ContextVar[set[str] | None] = ContextVar("_active_reads", default=None) _patched = False _recorder_root: Path | None = None +_recorder_root_str: str | None = None +_recorder_raw_root_str: str | None = None #: Path parts that mark a dependency/build location whose reads are never a #: page's own source dependency (a change there flows through the version/epoch). @@ -78,6 +84,14 @@ def _reflex_version() -> str: ".rst", } +_PYTHON_PREFIXES = tuple( + os.path.abspath(os.fsdecode(prefix)) # noqa: PTH100 + for prefix in {sys.base_exec_prefix, sys.base_prefix, sys.exec_prefix, sys.prefix} + if prefix +) +_MODULE_FILE_CACHE_MISSING: object = object() +_module_file_cache: dict[tuple[str, str], str | None] = {} + def _record_read(path: object) -> None: target = _active_reads.get() @@ -96,6 +110,168 @@ def _record_read(path: object) -> None: target.add(str(resolved)) +def _is_path_within(path: str, root: str) -> bool: + """Return whether ``path`` is contained within ``root`` without filesystem IO. + + Args: + path: Absolute path to test. + root: Absolute root path. + + Returns: + True when ``path`` is nested below ``root``. + """ + normalized_path = os.path.normcase(path) + normalized_root = os.path.normcase(root) + if normalized_path == normalized_root: + return False + try: + return os.path.commonpath((normalized_path, normalized_root)) == normalized_root + except ValueError: + return False + + +def _is_python_install_path(path: str) -> bool: + """Return whether ``path`` is under this interpreter's install roots. + + Args: + path: Absolute path to test. + + Returns: + True when ``path`` is under the interpreter or virtualenv prefix. + """ + return any(_is_path_within(path, prefix) for prefix in _PYTHON_PREFIXES) + + +def _recordable_module_file(file: object) -> str | None: + """Resolve a module file only when it can be a first-party dependency. + + Args: + file: The imported module's ``__file__`` value. + + Returns: + The resolved module file path to record, or None when it is outside the + project root or otherwise not recordable. + """ + root = _recorder_root + root_str = _recorder_root_str + if root is None or root_str is None: + return None + if not isinstance(file, (str, bytes, os.PathLike)): + return None + try: + raw_path = os.path.abspath(os.fsdecode(file)) # noqa: PTH100 + except (OSError, TypeError, ValueError): + return None + + cache_key = (root_str, raw_path) + cached = _module_file_cache.get(cache_key, _MODULE_FILE_CACHE_MISSING) + if cached is not _MODULE_FILE_CACHE_MISSING: + return cached if isinstance(cached, str) else None + + path = Path(raw_path) + under_project_path = _is_path_within(raw_path, root_str) or ( + _recorder_raw_root_str is not None + and _is_path_within(raw_path, _recorder_raw_root_str) + ) + resolved_str = None + if not any(part in _EXCLUDE_PARTS for part in path.parts) and ( + under_project_path or not _is_python_install_path(raw_path) + ): + try: + resolved = path.resolve() + except (OSError, TypeError, ValueError): + pass + else: + if not any(part in _EXCLUDE_PARTS for part in resolved.parts): + resolved_str = str(resolved) if root in resolved.parents else None + _module_file_cache[cache_key] = resolved_str + return resolved_str + + +def _record_module_file(module: object, target: set[str] | None = None) -> None: + """Record the source file for an imported module, if it has one. + + Args: + module: The imported module object. + target: The active read set. Defaults to the current recorder context. + """ + if target is None: + target = _active_reads.get() + if target is None: + return + file = getattr(module, "__file__", None) + if not file: + return + if resolved_file := _recordable_module_file(file): + target.add(resolved_file) + + +def _absolute_import_name( + name: str, globals_: dict[str, object] | None, level: int +) -> str: + """Resolve an import name relative to the caller package. + + Args: + name: The import name passed to ``__import__``. + globals_: The caller globals passed to ``__import__``. + level: The relative import level. + + Returns: + The absolute module name when it can be resolved, else ``name``. + """ + if not level: + return name + package = None + if globals_ is not None: + package = globals_.get("__package__") + if not package and (module := globals_.get("__name__")): + package = ( + module + if isinstance(module, str) and globals_.get("__path__") is not None + else module.rpartition(".")[0] + if isinstance(module, str) + else None + ) + if not isinstance(package, str): + return name + with contextlib.suppress(Exception): + return importlib.util.resolve_name(f"{'.' * level}{name}", package) + return name + + +def _record_imported_modules( + name: str, + result: object, + fromlist: Sequence[str] | None = None, + target: set[str] | None = None, +) -> None: + """Record source files for modules imported while a recorder is active. + + Args: + name: The absolute requested module name. + result: The object returned by ``__import__`` or ``import_module``. + fromlist: The ``fromlist`` passed to ``__import__``. + target: The active read set. Defaults to the current recorder context. + """ + if target is None: + target = _active_reads.get() + if target is None: + return + _record_module_file(result, target) + result_id = id(result) + if (module := sys.modules.get(name)) and id(module) != result_id: + _record_module_file(module, target) + if not fromlist: + return + for item in fromlist: + if ( + item != "*" + and (module := sys.modules.get(f"{name}.{item}")) + and id(module) != result_id + ): + _record_module_file(module, target) + + def enable_read_tracking(root: Path | None = None) -> None: """Install per-page source-read tracking and register the recorder hook. @@ -107,8 +283,19 @@ def enable_read_tracking(root: Path | None = None) -> None: Args: root: Project root; only reads under it are recorded. Defaults to cwd. """ - global _patched, _recorder_root - _recorder_root = (root or Path.cwd()).resolve() + global _patched, _recorder_raw_root_str, _recorder_root, _recorder_root_str + raw_root = root or Path.cwd() + resolved_root = raw_root.resolve() + raw_root_str = os.path.abspath(os.fsdecode(raw_root)) # noqa: PTH100 + if ( + _recorder_root != resolved_root + or _recorder_raw_root_str != raw_root_str + or _recorder_root_str != str(resolved_root) + ): + _module_file_cache.clear() + _recorder_root = resolved_root + _recorder_root_str = str(resolved_root) + _recorder_raw_root_str = raw_root_str from reflex_base.plugins import compiler as _bc @@ -121,6 +308,8 @@ def enable_read_tracking(root: Path | None = None) -> None: orig_read_text = Path.read_text orig_read_bytes = Path.read_bytes orig_open = builtins.open + orig_import = builtins.__import__ + orig_import_module = importlib.import_module def read_text(self: Path, *args: object, **kwargs: object): _record_read(self) @@ -135,9 +324,31 @@ def open_(file: object, mode: str = "r", *args: object, **kwargs: object): _record_read(file) return orig_open(file, mode, *args, **kwargs) # type: ignore[arg-type] + def import_( + name: str, + globals_: dict[str, object] | None = None, + locals_: dict[str, object] | None = None, + fromlist: Sequence[str] | None = (), + level: int = 0, + ): + result = orig_import(name, globals_, locals_, fromlist, level) + if (target := _active_reads.get()) is not None: + _record_imported_modules( + _absolute_import_name(name, globals_, level), result, fromlist, target + ) + return result + + def import_module(name: str, package: str | None = None): + result = orig_import_module(name, package) + if (target := _active_reads.get()) is not None: + _record_imported_modules(result.__name__, result, target=target) + return result + Path.read_text = read_text # type: ignore[method-assign,assignment] Path.read_bytes = read_bytes # type: ignore[method-assign,assignment] builtins.open = open_ # type: ignore[assignment] + builtins.__import__ = import_ # type: ignore[assignment] + importlib.import_module = import_module @contextlib.contextmanager @@ -171,8 +382,6 @@ def _app_entrypoint_file(root: Path | None = None) -> Path | None: The resolved entrypoint file path under ``root``, or None if it can't be determined (no app module imported, or it lives outside ``root``). """ - import sys - try: from reflex.config import get_config @@ -226,8 +435,6 @@ def global_epoch( def _module_file(component: object) -> Path | None: - import sys - mod = sys.modules.get(getattr(component, "__module__", "") or "") file = getattr(mod, "__file__", None) return Path(file) if file else None @@ -274,8 +481,6 @@ def component_module_files( def _resolve_module_file(name: str) -> str | None: - import sys - mod = sys.modules.get(name) file = getattr(mod, "__file__", None) return str(Path(file).resolve()) if file else None @@ -291,8 +496,6 @@ def _loaded_first_party_modules(root: Path) -> dict[str, str]: A mapping of resolved file path -> module name for loaded modules under ``root``. """ - import sys - file_to_mod: dict[str, str] = {} for name, mod in list(sys.modules.items()): file = getattr(mod, "__file__", None) diff --git a/tests/units/compiler/test_page_cache.py b/tests/units/compiler/test_page_cache.py index dcd908788d1..e5ba80ed171 100644 --- a/tests/units/compiler/test_page_cache.py +++ b/tests/units/compiler/test_page_cache.py @@ -210,6 +210,116 @@ def fail_loaded_first_party_modules(root): assert page_cache.app_dependency_files(root=tmp_path) == set() +def test_record_reads_tracks_executed_importlib_import(tmp_path, monkeypatch): + import importlib + import sys + + module_name = "runtime_import_dep_for_page_cache" + module_file = tmp_path / f"{module_name}.py" + module_file.write_text("VALUE = 1\n") + monkeypatch.syspath_prepend(str(tmp_path)) + monkeypatch.delitem(sys.modules, module_name, raising=False) + importlib.invalidate_caches() + page_cache.enable_read_tracking(root=tmp_path) + + try: + with page_cache.record_reads() as reads: + importlib.import_module(module_name) + finally: + sys.modules.pop(module_name, None) + + assert str(module_file.resolve()) in reads + + +def test_record_reads_tracks_executed_builtin_import(tmp_path, monkeypatch): + import importlib + import sys + + package = tmp_path / "runtime_import_pkg" + package.mkdir() + (package / "__init__.py").write_text("") + child = package / "child.py" + child.write_text("VALUE = 1\n") + monkeypatch.syspath_prepend(str(tmp_path)) + monkeypatch.delitem(sys.modules, "runtime_import_pkg", raising=False) + monkeypatch.delitem(sys.modules, "runtime_import_pkg.child", raising=False) + importlib.invalidate_caches() + page_cache.enable_read_tracking(root=tmp_path) + + try: + with page_cache.record_reads() as reads: + __import__("runtime_import_pkg.child") + finally: + sys.modules.pop("runtime_import_pkg", None) + sys.modules.pop("runtime_import_pkg.child", None) + + assert str(child.resolve()) in reads + + +def test_record_reads_ignores_unexecuted_import(tmp_path, monkeypatch): + import importlib + import sys + + module_name = "uncalled_runtime_import_dep" + module_file = tmp_path / f"{module_name}.py" + module_file.write_text("VALUE = 1\n") + monkeypatch.syspath_prepend(str(tmp_path)) + monkeypatch.delitem(sys.modules, module_name, raising=False) + importlib.invalidate_caches() + page_cache.enable_read_tracking(root=tmp_path) + + def import_if_called(): + return importlib.import_module(module_name) + + with page_cache.record_reads() as reads: + pass + + assert import_if_called + assert str(module_file.resolve()) not in reads + assert module_name not in sys.modules + + +def test_record_reads_imports_only_project_modules(tmp_path): + import importlib + + page_cache.enable_read_tracking(root=tmp_path) + + with page_cache.record_reads() as reads: + importlib.import_module("json") + + assert reads == set() + + +def test_record_reads_tracks_symlinked_project_import(tmp_path, monkeypatch): + import importlib + import sys + + import pytest + + root = tmp_path / "app" + root.mkdir() + module_name = "symlinked_runtime_dep" + module_file = root / f"{module_name}.py" + module_file.write_text("VALUE = 1\n") + linked_root = tmp_path / "linked_app" + try: + linked_root.symlink_to(root, target_is_directory=True) + except OSError as exc: + pytest.skip(f"symlinks unavailable: {exc}") + monkeypatch.syspath_prepend(str(linked_root)) + monkeypatch.delitem(sys.modules, module_name, raising=False) + importlib.invalidate_caches() + page_cache.enable_read_tracking(root=root) + + try: + with page_cache.record_reads() as reads: + importlib.import_module(module_name) + finally: + sys.modules.pop(module_name, None) + + assert str(module_file.resolve()) in reads + + def test_used_state_files_from_output_and_memos(tmp_path): from types import SimpleNamespace From f79b9db7b4d76a191146bb647274bd43577b1ec1 Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Tue, 30 Jun 2026 20:41:49 +0500 Subject: [PATCH 11/18] refactor(compiler): simplify page_cache module-file recording Replace the os.path string-comparison helpers used for runtime import tracking with pathlib.Path operations, dropping the redundant cached root-string globals (_recorder_root_str, _recorder_raw_root_str) and the sentinel-based module-file cache. Make the read-set target an explicit argument instead of falling back to the recorder context. --- reflex/compiler/page_cache.py | 91 ++++++++----------------- tests/units/compiler/test_page_cache.py | 73 ++++++++++---------- 2 files changed, 67 insertions(+), 97 deletions(-) diff --git a/reflex/compiler/page_cache.py b/reflex/compiler/page_cache.py index 058338bd38b..85291c19e3d 100644 --- a/reflex/compiler/page_cache.py +++ b/reflex/compiler/page_cache.py @@ -13,7 +13,6 @@ import hashlib import importlib import importlib.util -import os import re import sys from collections.abc import Callable, Sequence @@ -59,8 +58,6 @@ def _reflex_version() -> str: _active_reads: ContextVar[set[str] | None] = ContextVar("_active_reads", default=None) _patched = False _recorder_root: Path | None = None -_recorder_root_str: str | None = None -_recorder_raw_root_str: str | None = None #: Path parts that mark a dependency/build location whose reads are never a #: page's own source dependency (a change there flows through the version/epoch). @@ -85,12 +82,11 @@ def _reflex_version() -> str: } _PYTHON_PREFIXES = tuple( - os.path.abspath(os.fsdecode(prefix)) # noqa: PTH100 + Path(prefix).resolve() for prefix in {sys.base_exec_prefix, sys.base_prefix, sys.exec_prefix, sys.prefix} if prefix ) -_MODULE_FILE_CACHE_MISSING: object = object() -_module_file_cache: dict[tuple[str, str], str | None] = {} +_module_file_cache: dict[tuple[Path, str], str | None] = {} def _record_read(path: object) -> None: @@ -110,8 +106,8 @@ def _record_read(path: object) -> None: target.add(str(resolved)) -def _is_path_within(path: str, root: str) -> bool: - """Return whether ``path`` is contained within ``root`` without filesystem IO. +def _is_inside(path: Path, root: Path) -> bool: + """Return whether ``path`` is nested below ``root``. Args: path: Absolute path to test. @@ -120,17 +116,10 @@ def _is_path_within(path: str, root: str) -> bool: Returns: True when ``path`` is nested below ``root``. """ - normalized_path = os.path.normcase(path) - normalized_root = os.path.normcase(root) - if normalized_path == normalized_root: - return False - try: - return os.path.commonpath((normalized_path, normalized_root)) == normalized_root - except ValueError: - return False + return root in path.parents -def _is_python_install_path(path: str) -> bool: +def _is_python_install_file(path: Path) -> bool: """Return whether ``path`` is under this interpreter's install roots. Args: @@ -139,7 +128,7 @@ def _is_python_install_path(path: str) -> bool: Returns: True when ``path`` is under the interpreter or virtualenv prefix. """ - return any(_is_path_within(path, prefix) for prefix in _PYTHON_PREFIXES) + return any(prefix in path.parents for prefix in _PYTHON_PREFIXES) def _recordable_module_file(file: object) -> str | None: @@ -152,30 +141,24 @@ def _recordable_module_file(file: object) -> str | None: The resolved module file path to record, or None when it is outside the project root or otherwise not recordable. """ - root = _recorder_root - root_str = _recorder_root_str - if root is None or root_str is None: - return None - if not isinstance(file, (str, bytes, os.PathLike)): - return None try: - raw_path = os.path.abspath(os.fsdecode(file)) # noqa: PTH100 + path = Path(file).absolute() # type: ignore[arg-type] except (OSError, TypeError, ValueError): return None - cache_key = (root_str, raw_path) - cached = _module_file_cache.get(cache_key, _MODULE_FILE_CACHE_MISSING) - if cached is not _MODULE_FILE_CACHE_MISSING: - return cached if isinstance(cached, str) else None - - path = Path(raw_path) - under_project_path = _is_path_within(raw_path, root_str) or ( - _recorder_raw_root_str is not None - and _is_path_within(raw_path, _recorder_raw_root_str) - ) resolved_str = None + root = _recorder_root + if root is None: + return None + + cache_key = (root, str(path)) + try: + return _module_file_cache[cache_key] + except KeyError: + pass + if not any(part in _EXCLUDE_PARTS for part in path.parts) and ( - under_project_path or not _is_python_install_path(raw_path) + _is_inside(path, root) or not _is_python_install_file(path) ): try: resolved = path.resolve() @@ -183,22 +166,18 @@ def _recordable_module_file(file: object) -> str | None: pass else: if not any(part in _EXCLUDE_PARTS for part in resolved.parts): - resolved_str = str(resolved) if root in resolved.parents else None + resolved_str = str(resolved) if _is_inside(resolved, root) else None _module_file_cache[cache_key] = resolved_str return resolved_str -def _record_module_file(module: object, target: set[str] | None = None) -> None: +def _record_module_file(module: object, target: set[str]) -> None: """Record the source file for an imported module, if it has one. Args: module: The imported module object. - target: The active read set. Defaults to the current recorder context. + target: The active read set. """ - if target is None: - target = _active_reads.get() - if target is None: - return file = getattr(module, "__file__", None) if not file: return @@ -242,21 +221,17 @@ def _absolute_import_name( def _record_imported_modules( name: str, result: object, + target: set[str], fromlist: Sequence[str] | None = None, - target: set[str] | None = None, ) -> None: """Record source files for modules imported while a recorder is active. Args: name: The absolute requested module name. result: The object returned by ``__import__`` or ``import_module``. + target: The active read set. fromlist: The ``fromlist`` passed to ``__import__``. - target: The active read set. Defaults to the current recorder context. """ - if target is None: - target = _active_reads.get() - if target is None: - return _record_module_file(result, target) result_id = id(result) if (module := sys.modules.get(name)) and id(module) != result_id: @@ -283,19 +258,11 @@ def enable_read_tracking(root: Path | None = None) -> None: Args: root: Project root; only reads under it are recorded. Defaults to cwd. """ - global _patched, _recorder_raw_root_str, _recorder_root, _recorder_root_str - raw_root = root or Path.cwd() - resolved_root = raw_root.resolve() - raw_root_str = os.path.abspath(os.fsdecode(raw_root)) # noqa: PTH100 - if ( - _recorder_root != resolved_root - or _recorder_raw_root_str != raw_root_str - or _recorder_root_str != str(resolved_root) - ): + global _patched, _recorder_root + resolved_root = (root or Path.cwd()).resolve() + if _recorder_root != resolved_root: _module_file_cache.clear() _recorder_root = resolved_root - _recorder_root_str = str(resolved_root) - _recorder_raw_root_str = raw_root_str from reflex_base.plugins import compiler as _bc @@ -334,14 +301,14 @@ def import_( result = orig_import(name, globals_, locals_, fromlist, level) if (target := _active_reads.get()) is not None: _record_imported_modules( - _absolute_import_name(name, globals_, level), result, fromlist, target + _absolute_import_name(name, globals_, level), result, target, fromlist ) return result def import_module(name: str, package: str | None = None): result = orig_import_module(name, package) if (target := _active_reads.get()) is not None: - _record_imported_modules(result.__name__, result, target=target) + _record_imported_modules(result.__name__, result, target) return result Path.read_text = read_text # type: ignore[method-assign,assignment] diff --git a/tests/units/compiler/test_page_cache.py b/tests/units/compiler/test_page_cache.py index e5ba80ed171..31e28d22e7e 100644 --- a/tests/units/compiler/test_page_cache.py +++ b/tests/units/compiler/test_page_cache.py @@ -1,8 +1,37 @@ """Tests for the per-page dependency graph used by the incremental compile cache.""" +import importlib +import sys + from reflex.compiler import page_cache +def _prepare_runtime_module(root, monkeypatch, module_name, *, import_root=None): + """Create a temporary module and make it importable. + + Args: + root: Directory where the module file is written. + monkeypatch: Pytest monkeypatch fixture. + module_name: Name of the module to create. + import_root: Directory added to ``sys.path``. Defaults to ``root``. + + Returns: + The created module file path. + """ + module_file = root / f"{module_name}.py" + module_file.write_text("VALUE = 1\n") + monkeypatch.syspath_prepend(str(import_root or root)) + sys.modules.pop(module_name, None) + importlib.invalidate_caches() + return module_file + + +def _forget_modules(*module_names): + """Remove temporary modules imported by a test.""" + for module_name in module_names: + sys.modules.pop(module_name, None) + + def test_global_epoch_tracks_global_files(tmp_path): (tmp_path / "rxconfig.py").write_text("config = 1\n") (tmp_path / "other.py").write_text("x = 1\n") @@ -211,38 +240,27 @@ def fail_loaded_first_party_modules(root): def test_record_reads_tracks_executed_importlib_import(tmp_path, monkeypatch): - import importlib - import sys - module_name = "runtime_import_dep_for_page_cache" - module_file = tmp_path / f"{module_name}.py" - module_file.write_text("VALUE = 1\n") - monkeypatch.syspath_prepend(str(tmp_path)) - monkeypatch.delitem(sys.modules, module_name, raising=False) - importlib.invalidate_caches() + module_file = _prepare_runtime_module(tmp_path, monkeypatch, module_name) page_cache.enable_read_tracking(root=tmp_path) try: with page_cache.record_reads() as reads: importlib.import_module(module_name) finally: - sys.modules.pop(module_name, None) + _forget_modules(module_name) assert str(module_file.resolve()) in reads def test_record_reads_tracks_executed_builtin_import(tmp_path, monkeypatch): - import importlib - import sys - package = tmp_path / "runtime_import_pkg" package.mkdir() (package / "__init__.py").write_text("") child = package / "child.py" child.write_text("VALUE = 1\n") monkeypatch.syspath_prepend(str(tmp_path)) - monkeypatch.delitem(sys.modules, "runtime_import_pkg", raising=False) - monkeypatch.delitem(sys.modules, "runtime_import_pkg.child", raising=False) + _forget_modules("runtime_import_pkg", "runtime_import_pkg.child") importlib.invalidate_caches() page_cache.enable_read_tracking(root=tmp_path) @@ -250,22 +268,14 @@ def test_record_reads_tracks_executed_builtin_import(tmp_path, monkeypatch): with page_cache.record_reads() as reads: __import__("runtime_import_pkg.child") finally: - sys.modules.pop("runtime_import_pkg", None) - sys.modules.pop("runtime_import_pkg.child", None) + _forget_modules("runtime_import_pkg", "runtime_import_pkg.child") assert str(child.resolve()) in reads def test_record_reads_ignores_unexecuted_import(tmp_path, monkeypatch): - import importlib - import sys - module_name = "uncalled_runtime_import_dep" - module_file = tmp_path / f"{module_name}.py" - module_file.write_text("VALUE = 1\n") - monkeypatch.syspath_prepend(str(tmp_path)) - monkeypatch.delitem(sys.modules, module_name, raising=False) - importlib.invalidate_caches() + module_file = _prepare_runtime_module(tmp_path, monkeypatch, module_name) page_cache.enable_read_tracking(root=tmp_path) def import_if_called(): @@ -280,8 +290,6 @@ def import_if_called(): def test_record_reads_imports_only_project_modules(tmp_path): - import importlib - page_cache.enable_read_tracking(root=tmp_path) with page_cache.record_reads() as reads: @@ -291,31 +299,26 @@ def test_record_reads_imports_only_project_modules(tmp_path): def test_record_reads_tracks_symlinked_project_import(tmp_path, monkeypatch): - import importlib - import sys - import pytest root = tmp_path / "app" root.mkdir() module_name = "symlinked_runtime_dep" - module_file = root / f"{module_name}.py" - module_file.write_text("VALUE = 1\n") linked_root = tmp_path / "linked_app" try: linked_root.symlink_to(root, target_is_directory=True) except OSError as exc: pytest.skip(f"symlinks unavailable: {exc}") - monkeypatch.syspath_prepend(str(linked_root)) - monkeypatch.delitem(sys.modules, module_name, raising=False) - importlib.invalidate_caches() + module_file = _prepare_runtime_module( + root, monkeypatch, module_name, import_root=linked_root + ) page_cache.enable_read_tracking(root=root) try: with page_cache.record_reads() as reads: importlib.import_module(module_name) finally: - sys.modules.pop(module_name, None) + _forget_modules(module_name) assert str(module_file.resolve()) in reads From 451be1e96a1e1f19c86978fc5961608fcc8c6147 Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Wed, 1 Jul 2026 21:35:44 +0500 Subject: [PATCH 12/18] fix(compiler): guard read-tracker recursion on pathlib lazy imports The compile-cache read tracker patches builtins.__import__ to record a page's runtime imports. While recording, _recordable_module_file resolves a module file via Path(...).absolute(), which on CPython 3.12 lazily runs `import ntpath` to compare path flavours. That import re-entered the patched hook -> resolved a path again -> imported again, recursing until the stack overflowed (32 unit tests failed only on ubuntu 3.12). Suspend read/import tracking (_suspend_tracking) while the tracker's own path/module bookkeeping runs, so imports and reads it triggers are neither recorded nor able to re-enter. Add a version-independent regression test that mimics the 3.12 lazy import. --- reflex/compiler/page_cache.py | 91 +++++++++++++++---------- tests/units/compiler/test_page_cache.py | 27 ++++++++ 2 files changed, 82 insertions(+), 36 deletions(-) diff --git a/reflex/compiler/page_cache.py b/reflex/compiler/page_cache.py index 85291c19e3d..14a17036d63 100644 --- a/reflex/compiler/page_cache.py +++ b/reflex/compiler/page_cache.py @@ -89,21 +89,36 @@ def _reflex_version() -> str: _module_file_cache: dict[tuple[Path, str], str | None] = {} +@contextlib.contextmanager +def _suspend_tracking(): + """Ignore read/import hooks caused by dependency bookkeeping. + + Yields: + None. + """ + token = _active_reads.set(None) + try: + yield + finally: + _active_reads.reset(token) + + def _record_read(path: object) -> None: target = _active_reads.get() if target is None: return - try: - resolved = Path(path).resolve() # type: ignore[arg-type] - except (OSError, TypeError, ValueError): - return - if any(part in _EXCLUDE_PARTS for part in resolved.parts): - return - root = _recorder_root - under_root = root is not None and root in resolved.parents - if not under_root and resolved.suffix.lower() not in _CONTENT_SUFFIXES: - return - target.add(str(resolved)) + with _suspend_tracking(): + try: + resolved = Path(path).resolve() # type: ignore[arg-type] + except (OSError, TypeError, ValueError): + return + if any(part in _EXCLUDE_PARTS for part in resolved.parts): + return + root = _recorder_root + under_root = root is not None and root in resolved.parents + if not under_root and resolved.suffix.lower() not in _CONTENT_SUFFIXES: + return + target.add(str(resolved)) def _is_inside(path: Path, root: Path) -> bool: @@ -141,34 +156,35 @@ def _recordable_module_file(file: object) -> str | None: The resolved module file path to record, or None when it is outside the project root or otherwise not recordable. """ - try: - path = Path(file).absolute() # type: ignore[arg-type] - except (OSError, TypeError, ValueError): - return None - - resolved_str = None - root = _recorder_root - if root is None: - return None + with _suspend_tracking(): + try: + path = Path(file).absolute() # type: ignore[arg-type] + except (OSError, TypeError, ValueError): + return None - cache_key = (root, str(path)) - try: - return _module_file_cache[cache_key] - except KeyError: - pass + resolved_str = None + root = _recorder_root + if root is None: + return None - if not any(part in _EXCLUDE_PARTS for part in path.parts) and ( - _is_inside(path, root) or not _is_python_install_file(path) - ): + cache_key = (root, str(path)) try: - resolved = path.resolve() - except (OSError, TypeError, ValueError): + return _module_file_cache[cache_key] + except KeyError: pass - else: - if not any(part in _EXCLUDE_PARTS for part in resolved.parts): - resolved_str = str(resolved) if _is_inside(resolved, root) else None - _module_file_cache[cache_key] = resolved_str - return resolved_str + + if not any(part in _EXCLUDE_PARTS for part in path.parts) and ( + _is_inside(path, root) or not _is_python_install_file(path) + ): + try: + resolved = path.resolve() + except (OSError, TypeError, ValueError): + pass + else: + if not any(part in _EXCLUDE_PARTS for part in resolved.parts): + resolved_str = str(resolved) if _is_inside(resolved, root) else None + _module_file_cache[cache_key] = resolved_str + return resolved_str def _record_module_file(module: object, target: set[str]) -> None: @@ -301,7 +317,10 @@ def import_( result = orig_import(name, globals_, locals_, fromlist, level) if (target := _active_reads.get()) is not None: _record_imported_modules( - _absolute_import_name(name, globals_, level), result, target, fromlist + _absolute_import_name(name, globals_, level), + result, + target, + fromlist, ) return result diff --git a/tests/units/compiler/test_page_cache.py b/tests/units/compiler/test_page_cache.py index 31e28d22e7e..7dd872488a5 100644 --- a/tests/units/compiler/test_page_cache.py +++ b/tests/units/compiler/test_page_cache.py @@ -2,6 +2,7 @@ import importlib import sys +from pathlib import Path from reflex.compiler import page_cache @@ -289,6 +290,32 @@ def import_if_called(): assert module_name not in sys.modules +def test_record_reads_no_recursion_when_recorder_import_triggers_import( + tmp_path, monkeypatch +): + """Recorder-internal imports must not re-enter dependency tracking.""" + module_name = "recorder_reentry_dep" + module_file = _prepare_runtime_module(tmp_path, monkeypatch, module_name) + page_cache.enable_read_tracking(root=tmp_path) + + real_absolute = Path.absolute + + def absolute_with_lazy_import(self: Path): + __import__("ntpath") + return real_absolute(self) + + monkeypatch.setattr(Path, "absolute", absolute_with_lazy_import) + + try: + with page_cache.record_reads() as reads: + importlib.import_module(module_name) + finally: + _forget_modules(module_name) + + assert str(module_file.resolve()) in reads + assert not any("ntpath" in read for read in reads) + + def test_record_reads_imports_only_project_modules(tmp_path): page_cache.enable_read_tracking(root=tmp_path) From 724cd3de4bb4addbfaf0940ac0839f4ad12662e8 Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Wed, 1 Jul 2026 21:58:36 +0500 Subject: [PATCH 13/18] feat(compiler): track dynamic app-import reads for the compile cache Config the app module pulls in dynamically (importlib calls, files read at import time) was invisible to app_dependency_files, which only walked the static import graph from the entrypoint. Editing such config could leave reused app-wide output stale. Record files read/imported while the app module loads (record_app_import, wired into prerequisites.get_app under REFLEX_COMPILE_CACHE) and fold that dynamic set into app_dependency_files, subtracting per-page static closures so ordinary page edits still invalidate per-page. Extract the shared graph walk into _walk_import_closure. --- reflex/compiler/page_cache.py | 97 +++++++++++++++++-------- reflex/utils/prerequisites.py | 34 +++++---- tests/units/compiler/test_page_cache.py | 78 ++++++++++++++++++++ 3 files changed, 166 insertions(+), 43 deletions(-) diff --git a/reflex/compiler/page_cache.py b/reflex/compiler/page_cache.py index 14a17036d63..3a162112bb6 100644 --- a/reflex/compiler/page_cache.py +++ b/reflex/compiler/page_cache.py @@ -87,6 +87,7 @@ def _reflex_version() -> str: if prefix ) _module_file_cache: dict[tuple[Path, str], str | None] = {} +_app_import_reads: dict[Path, set[str]] = {} @contextlib.contextmanager @@ -352,6 +353,29 @@ def record_reads(): _active_reads.reset(token) +@contextlib.contextmanager +def record_app_import(root: Path | None = None): + """Record files read/imported while importing the app module. + + Args: + root: Project root. Defaults to cwd. + + Yields: + None. + """ + root = (root or Path.cwd()).resolve() + recorded: set[str] = set() + success = False + try: + with record_reads() as reads: + yield + recorded = set(reads) + success = True + finally: + if success: + _app_import_reads[root] = recorded + + def _app_entrypoint_file(root: Path | None = None) -> Path | None: """Resolve the user's app entrypoint module file (where ``rx.App`` is built). @@ -583,8 +607,36 @@ def build_import_graph(root: Path | None = None) -> dict[str, set[str]]: def clear_import_graph() -> None: - """Drop the cached import graph (e.g. after modules are reloaded).""" + """Drop cached import graphs (e.g. after modules are reloaded).""" _import_graph_cache.clear() + _app_import_reads.clear() + + +def _walk_import_closure( + graph: dict[str, set[str]], + starts: set[str], + barriers: set[str] | None = None, +) -> set[str]: + """Walk a first-party import graph from ``starts``. + + Args: + graph: Mapping of source file to imported source files. + starts: Source files where traversal begins. + barriers: Files included elsewhere and not traversed into. + + Returns: + The reachable source files, excluding barriers. + """ + barriers = barriers or set() + seen: set[str] = set() + stack = list(starts - barriers) + while stack: + cur = stack.pop() + if cur in seen: + continue + seen.add(cur) + stack.extend(dep for dep in graph.get(cur, ()) if dep not in barriers) + return seen def _component_source_files(component: object, root: Path) -> set[str]: @@ -632,16 +684,7 @@ def page_py_dependencies( """ root = (root or Path.cwd()).resolve() graph = build_import_graph(root) - - seen: set[str] = set() - stack = list(_component_source_files(component, root)) - while stack: - cur = stack.pop() - if cur in seen: - continue - seen.add(cur) - stack.extend(graph.get(cur, ())) - return seen + return _walk_import_closure(graph, _component_source_files(component, root)) def app_dependency_files( @@ -651,11 +694,11 @@ def app_dependency_files( Walks the first-party import graph from the app entrypoint (:func:`_app_entrypoint_file`), treating each page-defining module as a - barrier (not entered), so the result is the entrypoint plus the config-only - modules it imports, such as theme, app-wraps, stylesheets, head components, and - never page modules or their deep dependencies, which are tracked per page. A - config module shared with a page is still captured (it is reached from the - entrypoint directly, not through the barrier). + barrier (not entered), then folds in files read/imported while the app module + loaded. Static page dependency closures are removed from that dynamic set so + regular page edits still invalidate per-page rather than globally. A config + module shared with a page is still captured when it is reached from the + entrypoint directly. These configure the app-wide files an incremental rebuild reuses on disk (app root, contexts, theme, stylesheet), so they are folded into @@ -675,24 +718,18 @@ def app_dependency_files( entrypoint = _app_entrypoint_file(root) if entrypoint is None: return set() - file_to_mod = _loaded_first_party_modules(root) + graph = build_import_graph(root) barriers: set[str] = set() + page_deps: set[str] = set() for page in pages or (): - barriers |= _component_source_files(getattr(page, "component", None), root) + starts = _component_source_files(getattr(page, "component", None), root) + barriers |= starts + page_deps |= _walk_import_closure(graph, starts) start = str(entrypoint) - seen = {start} - stack = [start] - while stack: - cur = stack.pop() - modname = file_to_mod.get(cur) - if modname is None: - continue - for dep in _module_import_edges(cur, modname, file_to_mod): - if dep not in seen and dep not in barriers: - seen.add(dep) - stack.append(dep) - return seen + static_deps = _walk_import_closure(graph, {start}, barriers) + dynamic_deps = _app_import_reads.get(root, set()) - page_deps + return static_deps | dynamic_deps def make_hasher() -> Callable[[str], str | None]: diff --git a/reflex/utils/prerequisites.py b/reflex/utils/prerequisites.py index a8c84da6aaa..df243182ec3 100644 --- a/reflex/utils/prerequisites.py +++ b/reflex/utils/prerequisites.py @@ -194,22 +194,30 @@ def get_app(reload: bool = False) -> ModuleType: module = config.module sys.path.insert(0, getcwd()) # noqa: PTH109 - app = ( - __import__(module, fromlist=(constants.CompileVars.APP,)) - if not config.app_module - else config.app_module - ) - if reload: - from reflex.page import DECORATED_PAGES - from reflex.state import reload_state_module + if environment.REFLEX_COMPILE_CACHE.get(): + from reflex.compiler import page_cache + + page_cache.enable_read_tracking() + recorder = page_cache.record_app_import() + else: + recorder = contextlib.nullcontext() + with recorder: + app = ( + __import__(module, fromlist=(constants.CompileVars.APP,)) + if not config.app_module + else config.app_module + ) + if reload: + from reflex.page import DECORATED_PAGES + from reflex.state import reload_state_module - # Reset rx.State subclasses to avoid conflict when reloading. - reload_state_module(module=module) + # Reset rx.State subclasses to avoid conflict when reloading. + reload_state_module(module=module) - DECORATED_PAGES.clear() + DECORATED_PAGES.clear() - # Reload the app module. - importlib.reload(app) + # Reload the app module. + importlib.reload(app) except Exception as ex: telemetry.send_error(ex, context="frontend") raise diff --git a/tests/units/compiler/test_page_cache.py b/tests/units/compiler/test_page_cache.py index 7dd872488a5..45d84e4e7c1 100644 --- a/tests/units/compiler/test_page_cache.py +++ b/tests/units/compiler/test_page_cache.py @@ -240,6 +240,84 @@ def fail_loaded_first_party_modules(root): assert page_cache.app_dependency_files(root=tmp_path) == set() +def test_app_dependency_files_tracks_dynamic_import_config_read(tmp_path, monkeypatch): + from types import SimpleNamespace + + from reflex.utils import prerequisites + + entry = tmp_path / "myapp.py" + dynamic = tmp_path / "dynamic_theme.py" + config_file = tmp_path / "theme.json" + entry.write_text( + "import importlib\ntheme = importlib.import_module('dynamic_theme').THEME\n" + ) + dynamic.write_text( + "from pathlib import Path\nTHEME = Path('theme.json').read_text()\n" + ) + config_file.write_text('"light"\n') + config = SimpleNamespace( + module="myapp", + app_module=None, + _app_name_is_valid=True, + ) + monkeypatch.chdir(tmp_path) + monkeypatch.syspath_prepend(str(tmp_path)) + monkeypatch.setenv("REFLEX_COMPILE_CACHE", "1") + monkeypatch.setattr(prerequisites, "get_config", lambda: config) + monkeypatch.setattr("reflex.config.get_config", lambda: config) + _forget_modules("myapp", "dynamic_theme") + + try: + prerequisites.get_app() + deps = page_cache.app_dependency_files(root=tmp_path) + epoch = page_cache.global_epoch(root=tmp_path) + + config_file.write_text('"dark"\n') + finally: + _forget_modules("myapp", "dynamic_theme") + + assert str(dynamic.resolve()) in deps + assert str(config_file.resolve()) in deps + assert page_cache.global_epoch(root=tmp_path) != epoch + + +def test_app_dependency_files_subtracts_pages_from_app_import_reads( + tmp_path, monkeypatch +): + from types import SimpleNamespace + + from reflex.utils import prerequisites + + entry = tmp_path / "myapp.py" + theme = tmp_path / "theme.py" + page = tmp_path / "page.py" + view = tmp_path / "view.py" + entry.write_text("import theme\nfrom page import index\napp_theme = theme.THEME\n") + theme.write_text("THEME = 'light'\n") + page.write_text("from view import render\n\ndef index():\n return render()\n") + view.write_text("def render():\n return 'view'\n") + config = SimpleNamespace( + module="myapp", + app_module=None, + _app_name_is_valid=True, + ) + monkeypatch.chdir(tmp_path) + monkeypatch.syspath_prepend(str(tmp_path)) + monkeypatch.setenv("REFLEX_COMPILE_CACHE", "1") + monkeypatch.setattr(prerequisites, "get_config", lambda: config) + monkeypatch.setattr("reflex.config.get_config", lambda: config) + _forget_modules("myapp", "theme", "page", "view") + + try: + prerequisites.get_app() + pages = [SimpleNamespace(component=sys.modules["page"].index)] + deps = page_cache.app_dependency_files(pages, root=tmp_path) + finally: + _forget_modules("myapp", "theme", "page", "view") + + assert deps == {str(entry.resolve()), str(theme.resolve())} + + def test_record_reads_tracks_executed_importlib_import(tmp_path, monkeypatch): module_name = "runtime_import_dep_for_page_cache" module_file = _prepare_runtime_module(tmp_path, monkeypatch, module_name) From 21e7d3ea9ee99772ec457b7cf3809289fc7053fc Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Fri, 3 Jul 2026 05:01:05 +0500 Subject: [PATCH 14/18] fix(compiler): close staleness gaps in the incremental compile cache Four reviewer-confirmed ways the incremental path could serve stale .web output: - Path.open() calls io.open directly, bypassing the builtins.open patch, so data files read through it were never recorded as page dependencies. Patch Path.open itself. - The contexts file was never re-emitted, keeping old state defaults and client-storage config after a state-module edit. Re-emit it on every incremental rebuild (write_file already skips byte-identical writes). - assets/ was never copied (excluded from dependency tracking and the epoch), so an assets-only edit produced an all-hit rebuild with a stale .web/public. Run the same mtime-incremental copy as the full compile. - Memo output files are grouped one file per source module, but a miss rewrote them from only its own auto-memo contributions, dropping user @rx.memo exports and hit-sibling exports still imported by reused pages. Recompile memo-contributing same-module siblings together (tracked by a new has_memos manifest flag, schema 3 -> 4) and include user memos that share a rewritten file or whose module changed. --- reflex/compiler/disk_cache.py | 186 ++++++++++++++++- reflex/compiler/page_cache.py | 14 +- tests/units/compiler/test_disk_cache.py | 262 +++++++++++++++++++++--- tests/units/compiler/test_page_cache.py | 24 +++ 4 files changed, 449 insertions(+), 37 deletions(-) diff --git a/reflex/compiler/disk_cache.py b/reflex/compiler/disk_cache.py index b047afd0489..7e2cfc785c1 100644 --- a/reflex/compiler/disk_cache.py +++ b/reflex/compiler/disk_cache.py @@ -10,18 +10,20 @@ import dataclasses import json +import sys +from pathlib import Path from typing import TYPE_CHECKING, Any +from reflex_base import constants from reflex_base.plugins import CompileContext, CompilerHooks from reflex_base.utils.imports import ImportVar, merge_imports from reflex.compiler import page_cache from reflex.compiler.plugins import default_page_plugins -from reflex.utils import console, prerequisites +from reflex.utils import console, path_ops, prerequisites if TYPE_CHECKING: from collections.abc import Callable, Sequence - from pathlib import Path from reflex_base.plugins import PageContext, PageDefinition from reflex_base.utils.imports import ParsedImportDict @@ -29,7 +31,7 @@ from reflex.app import App #: Bump when the manifest layout changes (old manifests are then ignored). -_SCHEMA = 3 +_SCHEMA = 4 #: Manifest filename under the web directory. _MANIFEST_FILE = "reflex_compile_cache.json" @@ -102,6 +104,11 @@ def _manifest_page_entry( ), "app_wrap_keys": _wrap_key_strs(page_ctx.app_wrap_components.keys()), "is_stateful": is_stateful, + # Whether the page contributed auto memos: pages sharing a source + # module share one memo output file, so a memo-contributing hit page + # must be recompiled alongside a same-module miss (see + # ``_with_module_siblings``). + "has_memos": bool(page_ctx.memo_contributions), } @@ -240,6 +247,123 @@ def partition_pages( ] +def _with_module_siblings( + miss_pages: list[PageDefinition], + pages: Sequence[PageDefinition], + manifest: dict[str, Any], +) -> list[PageDefinition]: + """Expand the miss set with memo-contributing same-module hit pages. + + Auto-memo output is grouped into one file per source module, so rewriting + that file needs the contributions of *all* the module's pages that have + any. Hit pages that contributed no memos (per the manifest) have nothing + in that file and are left reused. + + Args: + miss_pages: The dependency-changed pages. + pages: All current page definitions (in compile order). + manifest: The loaded manifest. + + Returns: + The expanded miss list, in ``pages`` order. + """ + miss_modules = { + module + for page in miss_pages + if (module := getattr(page, "_source_module", None)) is not None + } + if not miss_modules: + return miss_pages + miss_routes = {page.route for page in miss_pages} + return [ + page + for page in pages + if page.route in miss_routes + or ( + getattr(page, "_source_module", None) in miss_modules + and manifest["pages"][page.route]["has_memos"] + ) + ] + + +def _changed_dependency_files( + manifest: dict[str, Any], hasher: Callable[[str], str | None] +) -> set[str]: + """Return every recorded dependency file whose content changed. + + Args: + manifest: The loaded manifest. + hasher: A memoized path -> content-hash function. + + Returns: + The set of changed dependency file paths. + """ + return { + path + for entry in manifest["pages"].values() + for path, digest in entry["dep_hashes"].items() + if hasher(path) != digest + } + + +def _module_source_file(module_name: str | None) -> str | None: + """Resolve a loaded module's source file path. + + Args: + module_name: The dotted module name. + + Returns: + The resolved file path string, or None. + """ + file = getattr(sys.modules.get(module_name or ""), "__file__", None) + if not file: + return None + try: + return str(Path(file).resolve()) + except OSError: + return None + + +def _complete_memo_defs( + contributions: dict[tuple[str, str | None], Any], + changed_files: set[str], +) -> list[Any]: + """Return the full definition set for the memo files being rewritten. + + Memo output is grouped one file per source module, so a rewrite must carry + every definition landing in that file: user ``@rx.memo`` definitions from + the global registry that share a module with a recompiled contribution, + plus user memos whose own module file changed (an edited memo body must be + re-emitted even though only its importer pages missed). + + Args: + contributions: The recompiled pages' auto-memo contributions. + changed_files: The dependency files whose content changed. + + Returns: + The memo definitions to compile, user memos first (matching the full + compile's emit order). + """ + from reflex_base.components.memo import MEMOS + from reflex_base.utils import memo_paths + + dirty_segments = { + segments + for definition in contributions.values() + if ( + segments := memo_paths.module_to_mirrored_segments(definition.source_module) + ) + is not None + } + user_memos = [ + memo + for memo in MEMOS.values() + if memo_paths.module_to_mirrored_segments(memo.source_module) in dirty_segments + or _module_source_file(memo.source_module) in changed_files + ] + return [*user_memos, *contributions.values()] + + def try_incremental_rebuild( app: App, *, @@ -254,6 +378,12 @@ def try_incremental_rebuild( unsafe to reuse: no/old manifest, a changed global input, a route change, or a miss page that altered its app-wrap set or stateful flag. + App-wide outputs that per-page dependency sets do not cover are always + re-emitted rather than tracked: the contexts file (state defaults and + client-storage config change with any state module) and the ``assets`` + copy (assets are excluded from dependency tracking). Both are cheap and + idempotent. + On success, reports (at info level) how many pages were recompiled vs reused and, while recompiling, shows a progress bar over the changed pages so a hot reload makes the incremental work visible. @@ -282,6 +412,8 @@ def try_incremental_rebuild( return False miss_pages = partition_pages(pages, manifest, hasher) + if miss_pages: + miss_pages = _with_module_siblings(miss_pages, pages, manifest) miss_routes = {p.route for p in miss_pages} # Recompile only the source-changed pages. @@ -347,6 +479,7 @@ def try_incremental_rebuild( # Write changed pages + their memo files; reuse everything else on disk. install_imports = _deserialize_imports(manifest["all_imports"]) if miss_ctx is not None: + memo_contributions: dict[tuple[str, str | None], Any] = {} for page in miss_pages: page_ctx = miss_ctx.compiled_pages[page.route] # Both are guaranteed non-None by the guard loop above. @@ -358,15 +491,21 @@ def try_incremental_rebuild( compiler.utils.resolve_path_of_web_dir(output_path), output_code, ) - memo_defs = list(page_ctx.memo_contributions.values()) - memo_files, memo_imports = compiler.compile_memo_components(memo_defs) - for mpath, mcode in memo_files: - compiler.utils.write_file( - compiler.utils.resolve_path_of_web_dir(mpath), mcode - ) - install_imports = merge_imports( - install_imports, page_ctx.frontend_imports, memo_imports + memo_contributions.update(page_ctx.memo_contributions) + install_imports = merge_imports(install_imports, page_ctx.frontend_imports) + # Memo output files are grouped per source module, so compile them once + # with the complete definition set (all recompiled pages' contributions + # plus the user memos sharing those files or whose module changed). + memo_files, memo_imports = compiler.compile_memo_components( + _complete_memo_defs( + memo_contributions, _changed_dependency_files(manifest, hasher) ) + ) + for mpath, mcode in memo_files: + compiler.utils.write_file( + compiler.utils.resolve_path_of_web_dir(mpath), mcode + ) + install_imports = merge_imports(install_imports, memo_imports) # Record which routes are stateful: miss pages from this compile, hit pages # from the manifest, so the stateful-pages marker is complete. We do NOT @@ -388,6 +527,31 @@ def try_incremental_rebuild( app._add_optional_endpoints() app._validate_var_dependencies() + # App-wide outputs that are cheap to re-emit and not gated by the epoch: + # contexts (state defaults/client-storage change with any state module, + # which per-page dependency sets do not force a regenerate of) and the + # assets copy (assets are excluded from dependency tracking entirely). + from reflex_components_radix.plugin import RadixThemesPlugin + + theme = next( + ( + plugin.get_theme() + for plugin in compiler_plugins + if isinstance(plugin, RadixThemesPlugin) + ), + None, + ) + context_path, context_code = compiler.compile_contexts(app._state, theme) + compiler.utils.write_file( + compiler.utils.resolve_path_of_web_dir(context_path), context_code + ) + assets_src = (root or Path.cwd()) / constants.Dirs.APP_ASSETS + if assets_src.is_dir(): + path_ops.update_directory_tree( + src=assets_src, + dest=prerequisites.get_web_dir() / constants.Dirs.PUBLIC, + ) + # Frontend packages + routing scaffolding (cheap, idempotent). from reflex.utils import frontend_skeleton diff --git a/reflex/compiler/page_cache.py b/reflex/compiler/page_cache.py index 3a162112bb6..38bcbb2b798 100644 --- a/reflex/compiler/page_cache.py +++ b/reflex/compiler/page_cache.py @@ -291,10 +291,14 @@ def enable_read_tracking(root: Path | None = None) -> None: orig_read_text = Path.read_text orig_read_bytes = Path.read_bytes + orig_path_open = Path.open orig_open = builtins.open orig_import = builtins.__import__ orig_import_module = importlib.import_module + def _is_read_mode(mode: str) -> bool: + return "r" in mode and not ("w" in mode or "a" in mode or "x" in mode) + def read_text(self: Path, *args: object, **kwargs: object): _record_read(self) return orig_read_text(self, *args, **kwargs) # type: ignore[arg-type] @@ -303,8 +307,15 @@ def read_bytes(self: Path): _record_read(self) return orig_read_bytes(self) + # ``Path.open`` calls ``io.open`` directly (not ``builtins.open``), so it + # needs its own patch for reads through it to be recorded. + def path_open(self: Path, mode: str = "r", *args: object, **kwargs: object): + if _is_read_mode(mode): + _record_read(self) + return orig_path_open(self, mode, *args, **kwargs) # type: ignore[arg-type] + def open_(file: object, mode: str = "r", *args: object, **kwargs: object): - if "r" in mode and not ("w" in mode or "a" in mode or "x" in mode): + if _is_read_mode(mode): _record_read(file) return orig_open(file, mode, *args, **kwargs) # type: ignore[arg-type] @@ -333,6 +344,7 @@ def import_module(name: str, package: str | None = None): Path.read_text = read_text # type: ignore[method-assign,assignment] Path.read_bytes = read_bytes # type: ignore[method-assign,assignment] + Path.open = path_open # type: ignore[method-assign,assignment] builtins.open = open_ # type: ignore[assignment] builtins.__import__ = import_ # type: ignore[assignment] importlib.import_module = import_module diff --git a/tests/units/compiler/test_disk_cache.py b/tests/units/compiler/test_disk_cache.py index aa9c4b80b24..fb62ad25b2b 100644 --- a/tests/units/compiler/test_disk_cache.py +++ b/tests/units/compiler/test_disk_cache.py @@ -3,6 +3,7 @@ import dataclasses import json from collections.abc import Callable, Sequence +from pathlib import Path from types import SimpleNamespace from typing import Any, cast @@ -26,6 +27,23 @@ class _FakePage: _source_module: str | None = None +def _use_tmp_web_dir(tmp_path, monkeypatch): + """Point every ``get_web_dir`` binding (module attr + env) at a tmp web dir. + + Args: + tmp_path: The test's tmp directory. + monkeypatch: The pytest monkeypatch fixture. + + Returns: + The created web directory path. + """ + web = tmp_path / ".web" + web.mkdir() + monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + monkeypatch.setenv("REFLEX_WEB_WORKDIR", str(web)) + return web + + def _footer() -> Component: return rx.el.footer(rx.el.span("© Reflex"), class_name="footer") @@ -117,9 +135,7 @@ def test_partition_pages_detects_changed_source(): def test_write_and_load_manifest(tmp_path, monkeypatch): - web = tmp_path / ".web" - web.mkdir() - monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + _use_tmp_web_dir(tmp_path, monkeypatch) pages = [ _FakePage(route="/a", component=_page_a), @@ -135,10 +151,16 @@ def test_write_and_load_manifest(tmp_path, monkeypatch): assert set(manifest["pages"]) == {"/a", "/b", "/c"} for route in ("/a", "/b", "/c"): entry = manifest["pages"][route] - # the manifest is pure bookkeeping: dep set + app-wrap keys + stateful flag - assert set(entry) == {"dep_hashes", "app_wrap_keys", "is_stateful"} - # these static pages register no new state + # the manifest is pure bookkeeping: dep set + app-wrap keys + flags + assert set(entry) == { + "dep_hashes", + "app_wrap_keys", + "is_stateful", + "has_memos", + } + # these static pages register no new state and contribute no memos assert entry["is_stateful"] is False + assert entry["has_memos"] is False # rendered output is never persisted (it already lives in .web, and is # never read back from the manifest) -> keeps the manifest small assert "output_code" not in entry @@ -155,9 +177,7 @@ def test_unchanged_pages_compile_identically(tmp_path, monkeypatch): reuse is correct if a fresh compile of that page yields identical output. Compile A, B, C; then compile A, B(edited), C; A and C must be byte-identical. """ - web = tmp_path / ".web" - web.mkdir() - monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + _use_tmp_web_dir(tmp_path, monkeypatch) pages = [ _FakePage(route="/a", component=_page_a), @@ -187,22 +207,30 @@ def test_unchanged_pages_compile_identically(tmp_path, monkeypatch): ) +_CONTEXTS_STUB = "// contexts stub" + + def _stub_externals(app, monkeypatch): """Stub the side-effecting steps the fast path runs on a real app.""" import reflex.utils.frontend_skeleton as fs + from reflex.compiler import utils as compiler_utils monkeypatch.setattr(app, "_get_frontend_packages", lambda *a, **k: None) monkeypatch.setattr(app, "_add_optional_endpoints", lambda *a, **k: None) monkeypatch.setattr(app, "_validate_var_dependencies", lambda *a, **k: None) monkeypatch.setattr(app, "_write_stateful_pages_marker", lambda *a, **k: None) + # Serializing the real root state tree would pick up unrelated state + # classes from other collected test modules. + monkeypatch.setattr( + "reflex.compiler.compiler.compile_contexts", + lambda state, theme: (compiler_utils.get_context_path(), _CONTEXTS_STUB), + ) monkeypatch.setattr(fs, "update_react_router_config", lambda **k: None) monkeypatch.setattr(fs, "update_entry_client", lambda *a, **k: None) def test_incremental_rebuild_all_hits(tmp_path, monkeypatch): - web = tmp_path / ".web" - web.mkdir() - monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + _use_tmp_web_dir(tmp_path, monkeypatch) app = rx.App() app.add_page(_page_a, route="/a") @@ -224,9 +252,7 @@ def test_incremental_rebuild_all_hits(tmp_path, monkeypatch): def test_incremental_rebuild_one_miss_writes_only_that_page(tmp_path, monkeypatch): from reflex.compiler import utils as compiler_utils - web = tmp_path / ".web" - web.mkdir() - monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + web = _use_tmp_web_dir(tmp_path, monkeypatch) app = rx.App() app.add_page(_page_a, route="/a") @@ -264,6 +290,11 @@ def test_incremental_rebuild_one_miss_writes_only_that_page(tmp_path, monkeypatc out_path.read_text(encoding="utf-8") == ctx.compiled_pages[edited_route].output_code ) + # The same-module hit page contributed no memos, so it is reused, not + # recompiled (its output was never written to this fresh web dir). + hit_output_path = ctx.compiled_pages[pages[1].route].output_path + assert hit_output_path is not None + assert not compiler_utils.resolve_path_of_web_dir(hit_output_path).exists() def test_stateful_hit_is_marked_but_not_reevaluated(tmp_path, monkeypatch): @@ -273,9 +304,7 @@ def test_stateful_hit_is_marked_but_not_reevaluated(tmp_path, monkeypatch): re-evaluates the marked stateful pages itself, so re-evaluating them during the incremental rebuild was pure waste. """ - web = tmp_path / ".web" - web.mkdir() - monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + web = _use_tmp_web_dir(tmp_path, monkeypatch) app = rx.App() app.add_page(_page_a, route="/a") @@ -305,25 +334,208 @@ def test_stateful_hit_is_marked_but_not_reevaluated(tmp_path, monkeypatch): assert route in app._stateful_pages +def test_incremental_rebuild_regenerates_contexts(tmp_path, monkeypatch): + """State defaults/client-storage are baked into the contexts file and a + state-module edit never bumps the epoch, so the incremental path must + always re-emit it. + """ + from reflex.compiler import utils as compiler_utils + + _use_tmp_web_dir(tmp_path, monkeypatch) + + app = rx.App() + app.add_page(_page_a, route="/a") + pages = list(app._unevaluated_pages.values()) + ctx = _compile(pages) + disk_cache.write_manifest(ctx, pages, ctx.all_imports, root=tmp_path) + _stub_externals(app, monkeypatch) + + assert ( + disk_cache.try_incremental_rebuild( + app, compiler_plugins=[], prerender_routes=False, root=tmp_path + ) + is True + ) + + out_path = compiler_utils.resolve_path_of_web_dir(compiler_utils.get_context_path()) + assert out_path.exists() + assert out_path.read_text(encoding="utf-8") == _CONTEXTS_STUB + + +def test_incremental_rebuild_copies_assets(tmp_path, monkeypatch): + """An assets-only edit is an all-hit rebuild, so the incremental path must + run the same assets -> public copy as the full compile. + """ + web = _use_tmp_web_dir(tmp_path, monkeypatch) + assets = tmp_path / "assets" + assets.mkdir() + (assets / "logo.svg").write_text("<svg/>") + + app = rx.App() + app.add_page(_page_a, route="/a") + pages = list(app._unevaluated_pages.values()) + ctx = _compile(pages) + disk_cache.write_manifest(ctx, pages, ctx.all_imports, root=tmp_path) + _stub_externals(app, monkeypatch) + + assert ( + disk_cache.try_incremental_rebuild( + app, compiler_plugins=[], prerender_routes=False, root=tmp_path + ) + is True + ) + + assert (web / "public" / "logo.svg").read_text() == "<svg/>" + + +@rx.memo +def _badge(text: str) -> Component: + return rx.el.span(text, class_name="badge") + + +def _page_with_user_memo() -> Component: + return rx.el.div(rx.el.h1("Memo page"), _badge(text="hello")) + + +def test_incremental_rebuild_rewrites_changed_user_memo( + tmp_path, monkeypatch, preserve_memo_registries +): + """Editing a user ``@rx.memo`` module must rewrite its mirrored memo file. + + The memo's module file is in the dep set of every page that imports it, so + those pages miss — but only auto-memo contributions were being written, + leaving the user memo's generated JS stale. + """ + from reflex.compiler import compiler + from reflex.compiler import utils as compiler_utils + + web = _use_tmp_web_dir(tmp_path, monkeypatch) + + app = rx.App() + app.add_page(_page_with_user_memo, route="/memo") + pages = list(app._unevaluated_pages.values()) + memo_route = pages[0].route + ctx = _compile(pages) + disk_cache.write_manifest(ctx, pages, ctx.all_imports, root=tmp_path) + _stub_externals(app, monkeypatch) + + # Simulate an edit to this module (which defines the user memo): record a + # stale hash for its file so the page importing the memo misses. + module_file = str(Path(__file__).resolve()) + manifest_path = web / disk_cache._MANIFEST_FILE + manifest = json.loads(manifest_path.read_text()) + manifest["pages"][memo_route]["dep_hashes"] = {module_file: "stale-hash"} + manifest_path.write_text(json.dumps(manifest)) + + assert ( + disk_cache.try_incremental_rebuild( + app, compiler_plugins=[], prerender_routes=False, root=tmp_path + ) + is True + ) + + # The user memo's mirrored file was re-emitted with its export. + from reflex_base.components.memo import MEMOS, MemoComponentDefinition + + badge_def = next(m for m in MEMOS.values() if m.source_module == __name__) + assert isinstance(badge_def, MemoComponentDefinition) + memo_files, _ = compiler.compile_memo_components([badge_def]) + assert memo_files + for mpath, _mcode in memo_files: + out_path = compiler_utils.resolve_path_of_web_dir(mpath) + assert out_path.exists() + assert badge_def.export_name in out_path.read_text(encoding="utf-8") + + +class _MemoCacheState(rx.State): + value: str = "x" + other: str = "y" + + +def _page_e() -> Component: + return rx.el.div(rx.el.p(_MemoCacheState.value, class_name="e"), rx.el.h1("E")) + + +def _page_f() -> Component: + return rx.el.div(rx.el.p(_MemoCacheState.other, class_name="f"), rx.el.h2("F")) + + +def test_incremental_miss_keeps_sibling_memo_exports( + tmp_path, monkeypatch, preserve_memo_registries +): + """A miss must not clobber memo exports owned by hit pages. + + Auto-memo output is grouped into one file per source module. Pages E and F + live in this module and each contributes a stateful auto memo, so both land + in the same mirrored file. When only E misses (e.g. a data-file edit), + rewriting that file from E's contributions alone drops F's export while + F's reused page module still imports it. + """ + from reflex.compiler import compiler + from reflex.compiler import utils as compiler_utils + + web = _use_tmp_web_dir(tmp_path, monkeypatch) + + app = rx.App() + app.add_page(_page_e, route="/e") + app.add_page(_page_f, route="/f") + pages = list(app._unevaluated_pages.values()) + route_e, route_f = pages[0].route, pages[1].route + ctx = _compile(pages) + + e_memos = list(ctx.compiled_pages[route_e].memo_contributions.values()) + f_memos = list(ctx.compiled_pages[route_f].memo_contributions.values()) + assert e_memos, "page E must contribute an auto memo" + assert f_memos, "page F must contribute an auto memo" + + # Simulate the full compile's on-disk memo state (shared grouped file). + memo_files, _ = compiler.compile_memo_components([*e_memos, *f_memos]) + assert memo_files + for mpath, mcode in memo_files: + compiler_utils.write_file(compiler_utils.resolve_path_of_web_dir(mpath), mcode) + + disk_cache.write_manifest(ctx, pages, ctx.all_imports, root=tmp_path) + _stub_externals(app, monkeypatch) + + # Make only page E miss (a dependency of E changed, e.g. a data file). + manifest_path = web / disk_cache._MANIFEST_FILE + manifest = json.loads(manifest_path.read_text()) + manifest["pages"][route_e]["dep_hashes"] = {str(tmp_path / "data.md"): "stale-hash"} + manifest_path.write_text(json.dumps(manifest)) + + assert ( + disk_cache.try_incremental_rebuild( + app, compiler_plugins=[], prerender_routes=False, root=tmp_path + ) + is True + ) + + # Every export (E's and F's) survives in the rewritten grouped file(s). + for mpath, _mcode in memo_files: + content = compiler_utils.resolve_path_of_web_dir(mpath).read_text( + encoding="utf-8" + ) + for memo_def in (*e_memos, *f_memos): + assert memo_def.export_name in content + + def test_load_manifest_rejects_wrong_schema(tmp_path, monkeypatch): - web = tmp_path / ".web" - web.mkdir() - monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + web = _use_tmp_web_dir(tmp_path, monkeypatch) (web / disk_cache._MANIFEST_FILE).write_text(json.dumps({"schema": 999})) assert disk_cache.load_manifest() is None def test_update_manifest_for_misses_keeps_complete_imports(tmp_path, monkeypatch): - web = tmp_path / ".web" - web.mkdir() - monkeypatch.setattr(disk_cache.prerequisites, "get_web_dir", lambda: web) + web = _use_tmp_web_dir(tmp_path, monkeypatch) monkeypatch.setattr( page_cache, "state_dependency_index", lambda root=None: ({}, set()) ) monkeypatch.setattr(page_cache, "page_dependency_hashes", lambda *a, **k: {}) page = _FakePage(route="/a", component=_page_a) - page_ctx = SimpleNamespace(app_wrap_components={}, frontend_imports={}) + page_ctx = SimpleNamespace( + app_wrap_components={}, frontend_imports={}, memo_contributions={} + ) miss_ctx = SimpleNamespace(compiled_pages={"/a": page_ctx}, stateful_routes=set()) complete_imports = {"memo-lib": [ImportVar("MemoThing")]} manifest = _manifest({ diff --git a/tests/units/compiler/test_page_cache.py b/tests/units/compiler/test_page_cache.py index 45d84e4e7c1..aa4a4578093 100644 --- a/tests/units/compiler/test_page_cache.py +++ b/tests/units/compiler/test_page_cache.py @@ -352,6 +352,30 @@ def test_record_reads_tracks_executed_builtin_import(tmp_path, monkeypatch): assert str(child.resolve()) in reads +def test_record_reads_tracks_path_open(tmp_path): + """``Path.open`` calls ``io.open`` directly, bypassing the ``builtins.open`` + patch, so it must be patched itself for data reads to be recorded. + """ + page_cache.enable_read_tracking(root=tmp_path) + data = tmp_path / "content.md" + data.write_text("hello") + + with page_cache.record_reads() as reads, data.open() as f: + f.read() + + assert str(data.resolve()) in reads + + +def test_record_reads_ignores_path_open_writes(tmp_path): + page_cache.enable_read_tracking(root=tmp_path) + out = tmp_path / "out.md" + + with page_cache.record_reads() as reads, out.open("w") as f: + f.write("x") + + assert str(out.resolve()) not in reads + + def test_record_reads_ignores_unexecuted_import(tmp_path, monkeypatch): module_name = "uncalled_runtime_import_dep" module_file = _prepare_runtime_module(tmp_path, monkeypatch, module_name) From 14e5a3cd98acac2fc3721ce7ebe6605174579f3b Mon Sep 17 00:00:00 2001 From: Farhan Ali Raza <62690310+FarhanAliRaza@users.noreply.github.com> Date: Fri, 3 Jul 2026 01:28:56 +0000 Subject: [PATCH 15/18] fix(compiler): make compile output deterministic so dev HMR stays granular --- .../src/reflex_base/utils/imports.py | 10 ++--- .../src/reflex_base/utils/serializers.py | 9 ++++- reflex/app.py | 8 +++- reflex/compiler/compiler.py | 19 +++++---- tests/units/compiler/test_compiler.py | 40 +++++++++++++++++++ tests/units/utils/test_imports.py | 22 ++++++++++ tests/units/utils/test_serializers.py | 12 ++++++ 7 files changed, 104 insertions(+), 16 deletions(-) diff --git a/packages/reflex-base/src/reflex_base/utils/imports.py b/packages/reflex-base/src/reflex_base/utils/imports.py index 7e614f99524..0ec426f78fe 100644 --- a/packages/reflex-base/src/reflex_base/utils/imports.py +++ b/packages/reflex-base/src/reflex_base/utils/imports.py @@ -91,6 +91,10 @@ def collapse_imports( ) -> ParsedImportDict: """Remove all duplicate ImportVar within an ImportDict. + Deduplication preserves first-occurrence order: compiled import statements + follow this order, and a hash-seed-dependent order would rewrite every + page/memo module on each dev reload, defeating granular HMR. + Args: imports: The import dict to collapse. @@ -98,11 +102,7 @@ def collapse_imports( The collapsed import dict. """ return { - lib: ( - list(set(import_vars)) - if isinstance(import_vars, list) - else list(import_vars) - ) + lib: list(dict.fromkeys(import_vars)) for lib, import_vars in ( imports if isinstance(imports, tuple) else imports.items() ) diff --git a/packages/reflex-base/src/reflex_base/utils/serializers.py b/packages/reflex-base/src/reflex_base/utils/serializers.py index d20a29e3890..3a5be37ee53 100644 --- a/packages/reflex-base/src/reflex_base/utils/serializers.py +++ b/packages/reflex-base/src/reflex_base/utils/serializers.py @@ -292,13 +292,20 @@ def serialize_base_model(model: BaseModel) -> dict: def serialize_set(value: set) -> list: """Serialize a set to a JSON serializable list. + Sets have no meaningful order and their iteration order varies with the + per-process hash seed, so sort when possible to keep serialized output + (compiled JSX, config fingerprints) stable across processes. + Args: value: The set to serialize. Returns: The serialized list. """ - return list(value) + try: + return sorted(value) + except TypeError: + return list(value) @serializer diff --git a/reflex/app.py b/reflex/app.py index 31bb0341409..3a131c6728f 100644 --- a/reflex/app.py +++ b/reflex/app.py @@ -1346,8 +1346,12 @@ def _write_stateful_pages_marker(self): prerequisites.get_backend_dir() / constants.Dirs.STATEFUL_PAGES ) stateful_pages_marker.parent.mkdir(parents=True, exist_ok=True) - with stateful_pages_marker.open("w") as f: - json.dump(list(self._stateful_pages), f) + content = json.dumps(list(self._stateful_pages)) + if ( + not stateful_pages_marker.exists() + or stateful_pages_marker.read_text() != content + ): + stateful_pages_marker.write_text(content) def add_all_routes_endpoint(self): """Add an endpoint to the app that returns all the routes.""" diff --git a/reflex/compiler/compiler.py b/reflex/compiler/compiler.py index 2b368cd5c7b..98582b8df25 100644 --- a/reflex/compiler/compiler.py +++ b/reflex/compiler/compiler.py @@ -378,19 +378,22 @@ def _compile_root_stylesheet( target.parent.mkdir(parents=True, exist_ok=True) + # Skip rewriting an unchanged target: Vite watches .web/styles, and a + # rewrite-with-identical-content still fires an HMR update per reload. if stylesheet.suffix == ".css": - path_ops.cp(src=stylesheet, dest=target, overwrite=True) + data = stylesheet.read_bytes() + if not target.exists() or target.read_bytes() != data: + target.write_bytes(data) else: try: from sass import compile as sass_compile - target.write_text( - data=sass_compile( - filename=str(stylesheet), - output_style="compressed", - ), - encoding="utf8", - ) + compiled_css = sass_compile( + filename=str(stylesheet), + output_style="compressed", + ).encode("utf8") + if not target.exists() or target.read_bytes() != compiled_css: + target.write_bytes(compiled_css) except ImportError: failed_to_import_sass = True diff --git a/tests/units/compiler/test_compiler.py b/tests/units/compiler/test_compiler.py index 1dbb4ab27bc..47d6c636766 100644 --- a/tests/units/compiler/test_compiler.py +++ b/tests/units/compiler/test_compiler.py @@ -124,6 +124,46 @@ def test_compile_imports(import_dict: ParsedImportDict, test_dicts: list[dict]): ) +def test_compile_stylesheets_skips_unchanged_copy( + tmp_path: Path, mocker: MockerFixture +): + """An unchanged asset stylesheet is not rewritten into .web on recompile. + + Vite watches .web/styles, so a rewrite with identical content still fires + an HMR update on every dev reload. + + Args: + tmp_path: The test directory. + mocker: Pytest mocker object. + """ + project = tmp_path / "test_project" + project.mkdir() + assets_dir = project / "assets" + assets_dir.mkdir() + (assets_dir / "style.css").write_text("button { color: blue }") + + mocker.patch("reflex.compiler.compiler.Path.cwd", return_value=project) + mocker.patch( + "reflex.compiler.compiler.get_web_dir", + return_value=project / constants.Dirs.WEB, + ) + mocker.patch( + "reflex.compiler.utils.get_web_dir", return_value=project / constants.Dirs.WEB + ) + + compiler.compile_root_stylesheet(["/style.css"]) + target = project / constants.Dirs.WEB / "styles" / "style.css" + stat_before = target.stat() + + compiler.compile_root_stylesheet(["/style.css"]) + stat_after = target.stat() + assert stat_after.st_mtime_ns == stat_before.st_mtime_ns + + (assets_dir / "style.css").write_text("button { color: red }") + compiler.compile_root_stylesheet(["/style.css"]) + assert target.read_text() == "button { color: red }" + + def test_compile_stylesheets(tmp_path: Path, mocker: MockerFixture): """Test that stylesheets compile correctly. diff --git a/tests/units/utils/test_imports.py b/tests/units/utils/test_imports.py index 7b456d892c9..86720222b58 100644 --- a/tests/units/utils/test_imports.py +++ b/tests/units/utils/test_imports.py @@ -3,6 +3,7 @@ ImportDict, ImportVar, ParsedImportDict, + collapse_imports, merge_imports, parse_imports, ) @@ -118,3 +119,24 @@ def test_merge_imports(input_1, input_2, output): ) def test_parse_imports(input: ImportDict, output: ParsedImportDict): assert parse_imports(input) == output + + +def test_collapse_imports_preserves_order(): + """Deduplication must preserve first-occurrence order. + + Compiled JSX import order follows this ordering; a hash-seed-dependent + order rewrites every page/memo file on each dev reload and breaks + granular HMR. + """ + import_vars = [ + ImportVar(tag=f"Icon{i}", is_default=True, package_path=f"/Icon{i}") + for i in range(32) + ] + duplicated = [*import_vars, *import_vars[:5], import_vars[0]] + collapsed = collapse_imports({"@hugeicons/core-free-icons": duplicated}) + assert collapsed == {"@hugeicons/core-free-icons": import_vars} + # Tuple-valued entries (already-immutable parsed imports) keep order too. + collapsed_tuple = collapse_imports(( + ("@hugeicons/core-free-icons", tuple(duplicated)), + )) + assert collapsed_tuple == {"@hugeicons/core-free-icons": import_vars} diff --git a/tests/units/utils/test_serializers.py b/tests/units/utils/test_serializers.py index 01e77d64cc9..142966dbffc 100644 --- a/tests/units/utils/test_serializers.py +++ b/tests/units/utils/test_serializers.py @@ -207,6 +207,18 @@ def test_serialize(value: Any, expected: str): assert json.loads(json_dumps(value)) == json.loads(json_dumps(expected)) +def test_serialize_set_deterministic_order(): + """Sortable sets serialize in sorted order regardless of the hash seed. + + Compiled output and cache fingerprints embed serialized sets; a + hash-seed-dependent order churns files on every process restart. + """ + assert serializers.serialize_set({"b", "c", "a"}) == ["a", "b", "c"] + assert serializers.serialize_set({3, 1, 2}) == [1, 2, 3] + # Unsortable element mixes fall back to iteration order. + assert set(serializers.serialize_set({1, "a"})) == {1, "a"} + + @pytest.mark.parametrize( ("value", "expected", "exp_var_is_string"), [ From bf6a8bf919cd2a83dd9cd9a1ee1ef2528d06e5aa Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Fri, 3 Jul 2026 17:20:01 +0500 Subject: [PATCH 16/18] fix(compiler): keep contexts file and HMR intact across hot reloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The incremental rebuild no longer re-emits the contexts file every time. It is rewritten only when a stateful page missed and that page's own state config actually changed (fingerprinted against the manifest); otherwise the on-disk file is reused untouched. When a rewrite is needed, the stateful hit pages are evaluated first so the state registry — and the frontend dispatch map compiled from it — stays complete. Supporting changes: - Reset the daemon's state registry surgically: states from modules that survive the purge (framework/installed/workspace packages) are re-registered in original order, while purged-module and reflex.istate.dynamic states are dropped so re-created local states get deterministic fresh-process names. - Patch react-router's served HMR runtime so an edit to a not-currently-open route no longer throws and poisons HMR until a full reload. - Store per-input epoch digests instead of one combined hash so a global-input mismatch can name the exact file that changed, and surface fallback reasons and detected file changes at info level. - Write the vite config atomically and regenerate it on the incremental path. --- .../src/reflex_base/compiler/templates.py | 27 ++ .../src/reflex_base/plugins/compiler.py | 11 +- reflex/compiler/compiler.py | 2 +- reflex/compiler/disk_cache.py | 319 +++++++++++++++--- reflex/compiler/page_cache.py | 83 ++++- reflex/utils/compile_daemon.py | 35 +- reflex/utils/frontend_skeleton.py | 6 +- tests/units/compiler/test_disk_cache.py | 309 ++++++++++++++++- tests/units/reflex_base/compiler/__init__.py | 0 .../reflex_base/compiler/test_templates.py | 53 +++ tests/units/utils/test_compile_daemon.py | 103 +++++- 11 files changed, 864 insertions(+), 84 deletions(-) create mode 100644 tests/units/reflex_base/compiler/__init__.py create mode 100644 tests/units/reflex_base/compiler/test_templates.py diff --git a/packages/reflex-base/src/reflex_base/compiler/templates.py b/packages/reflex-base/src/reflex_base/compiler/templates.py index 8e6e63a2bb3..5f3c06f021f 100644 --- a/packages/reflex-base/src/reflex_base/compiler/templates.py +++ b/packages/reflex-base/src/reflex_base/compiler/templates.py @@ -609,12 +609,39 @@ def vite_config_template( }}; }} +// react-router's HMR client (refresh-utils.mjs enqueueUpdate) throws when an +// update batch includes a route the browser hasn't loaded, and the throw skips +// the queue cleanup below it — one edit to any not-currently-open page then +// poisons HMR until a full page reload. Rewrite the served runtime so unloaded +// routes keep their manifest metadata update but stay lazy. +function patchReactRouterHmrRuntime() {{ + const unloadedRouteThrow = /if\s*\(!imported\)\s*\{{\s*throw\s+Error\([\s\S]*?\);\s*\}}/; + return {{ + name: "reflex-patch-react-router-hmr-runtime", + apply: "serve", + transform(code, id) {{ + if (id !== "\0virtual:react-router/hmr-runtime") return; + if (!unloadedRouteThrow.test(code)) {{ + this.warn( + "react-router hmr runtime changed; unloaded-route HMR patch skipped", + ); + return; + }} + return {{ + code: code.replace(unloadedRouteThrow, "if (!imported) continue;"), + map: null, + }}; + }}, + }}; +}} + export default defineConfig((config) => ({{ base: "{base}", plugins: [ alwaysUseReactDomServerNode(), reactRouter(), safariCacheBustPlugin(), + patchReactRouterHmrRuntime(), ].concat({"[fullReload()]" if force_full_reload else "[]"}), build: {{ sourcemap: {"true" if sourcemap is True else "false" if sourcemap is False else repr(sourcemap)}, diff --git a/packages/reflex-base/src/reflex_base/plugins/compiler.py b/packages/reflex-base/src/reflex_base/plugins/compiler.py index 97e4f0d9e36..813af943b1d 100644 --- a/packages/reflex-base/src/reflex_base/plugins/compiler.py +++ b/packages/reflex-base/src/reflex_base/plugins/compiler.py @@ -773,7 +773,9 @@ class CompileContext(BaseContext): app_wrap_components: dict[tuple[int, str], Component] = dataclasses.field( default_factory=dict ) - stateful_routes: dict[str, None] = dataclasses.field(default_factory=dict) + # Routes whose evaluation defined new state classes, mapped to the full + # names of the states each page defined. + stateful_routes: dict[str, list[str]] = dataclasses.field(default_factory=dict) # Auto-memoize wrapper tags seen during the tree walk (populated by # ``MemoizeStatefulPlugin``). memoize_wrappers: dict[str, None] = dataclasses.field(default_factory=dict) @@ -851,7 +853,12 @@ def compile( raise RuntimeError(msg) if len(all_base_state_classes) > n_states_before: - self.stateful_routes[page.route] = None + # Record which states this page defined (registration order is + # insertion order), so the compile cache can fingerprint the + # page's contribution to the contexts file. + self.stateful_routes[page.route] = list(all_base_state_classes)[ + n_states_before: + ] self.compiled_pages[page_ctx.route] = page_ctx diff --git a/reflex/compiler/compiler.py b/reflex/compiler/compiler.py index 98582b8df25..05908652f4f 100644 --- a/reflex/compiler/compiler.py +++ b/reflex/compiler/compiler.py @@ -1220,7 +1220,7 @@ def compile_app( raise TypeError(msg) app._pages[route] = page_ctx.root_component - app._stateful_pages.update(compile_ctx.stateful_routes) + app._stateful_pages.update(dict.fromkeys(compile_ctx.stateful_routes)) app._write_stateful_pages_marker() app._add_optional_endpoints() app._validate_var_dependencies() diff --git a/reflex/compiler/disk_cache.py b/reflex/compiler/disk_cache.py index 7e2cfc785c1..a1a966aba31 100644 --- a/reflex/compiler/disk_cache.py +++ b/reflex/compiler/disk_cache.py @@ -8,7 +8,9 @@ from __future__ import annotations +import contextlib import dataclasses +import hashlib import json import sys from pathlib import Path @@ -16,6 +18,7 @@ from reflex_base import constants from reflex_base.plugins import CompileContext, CompilerHooks +from reflex_base.utils.format import json_dumps from reflex_base.utils.imports import ImportVar, merge_imports from reflex.compiler import page_cache @@ -23,7 +26,7 @@ from reflex.utils import console, path_ops, prerequisites if TYPE_CHECKING: - from collections.abc import Callable, Sequence + from collections.abc import Callable, Iterable, Sequence from reflex_base.plugins import PageContext, PageDefinition from reflex_base.utils.imports import ParsedImportDict @@ -31,7 +34,7 @@ from reflex.app import App #: Bump when the manifest layout changes (old manifests are then ignored). -_SCHEMA = 4 +_SCHEMA = 6 #: Manifest filename under the web directory. _MANIFEST_FILE = "reflex_compile_cache.json" @@ -40,6 +43,40 @@ def _manifest_path() -> Path: return prerequisites.get_web_dir() / _MANIFEST_FILE +def format_path_list( + items: Iterable[str], root: Path | None = None, limit: int = 5 +) -> str: + """Render a bounded, root-relative summary of a path/label collection. + + Args: + items: The paths (or labels) to render. + root: When given, paths under it are shown relative to it. + limit: Maximum number of entries to show before truncating. + + Returns: + A comma-separated summary string, truncated with ``(+N more)``. + """ + + def rel(item: str) -> str: + if root is not None: + with contextlib.suppress(ValueError): + return str(Path(item).relative_to(root)) + return item + + shown = sorted(rel(item) for item in items) + extra = len(shown) - limit + return ", ".join(shown[:limit]) + (f" (+{extra} more)" if extra > 0 else "") + + +def _log_fallback(reason: str) -> None: + """Report why the incremental rebuild fell back to a full compile. + + Args: + reason: The human-readable fallback reason. + """ + console.info(f"Compile cache: falling back to a full compile — {reason}") + + def _serialize_imports(imports: ParsedImportDict) -> dict[str, list[dict[str, Any]]]: """Serialize a parsed import dict to JSON-able primitives. @@ -83,6 +120,7 @@ def _manifest_page_entry( hasher: Callable[[str], str | None], *, is_stateful: bool, + state_fingerprint: str | None = None, root: Path | None = None, ) -> dict[str, Any]: """Build the manifest entry for one compiled page. @@ -93,6 +131,8 @@ def _manifest_page_entry( state_index: The state-context identifier -> file index. hasher: A memoized path -> content-hash function. is_stateful: Whether the page registered state during compile. + state_fingerprint: Fingerprint of the page's contexts contribution + (see ``_contexts_fingerprint``), or None for stateless pages. root: Project root for dependency discovery. Defaults to cwd. Returns: @@ -104,6 +144,7 @@ def _manifest_page_entry( ), "app_wrap_keys": _wrap_key_strs(page_ctx.app_wrap_components.keys()), "is_stateful": is_stateful, + "state_fingerprint": state_fingerprint, # Whether the page contributed auto memos: pages sharing a source # module share one memo output file, so a memo-contributing hit page # must be recompiled alongside a same-module miss (see @@ -112,6 +153,92 @@ def _manifest_page_entry( } +def _contexts_fingerprint( + state_names: Sequence[str], + initial_state: dict[str, Any], + client_storage: dict[str, dict[str, Any]], +) -> str: + """Fingerprint some states' contribution to the compiled contexts file. + + A state's contribution is exactly its initial-state slice plus its + client-storage entries (see ``templates.context_template``), so equal + fingerprints mean re-emitting the contexts file would leave these states' + entries unchanged. + + Args: + state_names: Full names of the states to fingerprint. + initial_state: The complete initial-state mapping (full name -> vars). + client_storage: The compiled client-storage mapping per storage kind. + + Returns: + A stable hash of the states' contexts contribution. + """ + payload = [] + for name in sorted(state_names): + prefix = f"{name}." + payload.append(( + name, + initial_state.get(name), + { + kind: {k: v for k, v in entries.items() if k.startswith(prefix)} + for kind, entries in client_storage.items() + }, + )) + return hashlib.sha256(json_dumps(payload).encode()).hexdigest() + + +def _contexts_snapshot( + app: App | None, +) -> tuple[dict[str, Any], dict[str, Any]] | None: + """Capture the state-tree inputs of the contexts file. + + Args: + app: The app being compiled (absent in bare compile contexts). + + Returns: + The (initial state, client storage) mappings keyed by state full name, + or None when there is no state tree. + """ + if app is None or app._state is None: + return None + from reflex.compiler import utils as compiler_utils + + return ( + compiler_utils.compile_state(app._state), + compiler_utils.compile_client_storage(app._state), + ) + + +def _changed_state_config_route( + manifest: dict[str, Any], + miss_ctx: CompileContext, + snapshot: tuple[dict[str, Any], dict[str, Any]] | None, +) -> str | None: + """Find the first recompiled stateful route whose state config changed. + + Compares each stateful miss page's just-evaluated states against the + fingerprint recorded in the manifest. Only these pages' states can differ + from the on-disk contexts file: hit pages' definitions are unchanged by + construction (all their dependency files still match). + + Args: + manifest: The loaded compile manifest. + miss_ctx: The compile context of the recompiled pages. + snapshot: The ``_contexts_snapshot`` of the app, or None when there is + no state tree to fingerprint against. + + Returns: + The first route requiring a contexts rebuild, or None if none do. + """ + if snapshot is None: + return next(iter(miss_ctx.stateful_routes)) + for route, defined_states in miss_ctx.stateful_routes.items(): + stored = manifest["pages"].get(route, {}).get("state_fingerprint") + if stored != _contexts_fingerprint(defined_states, *snapshot): + return route + return None + + def load_manifest() -> dict[str, Any] | None: """Load the persisted compile manifest, or None if absent/unusable. @@ -154,7 +281,10 @@ def write_manifest( try: state_index, _ = page_cache.state_dependency_index(root) hasher = page_cache.make_hasher() - epoch = page_cache.global_epoch(root, pages=pages) + epoch_inputs = page_cache.global_epoch_inputs(root, pages=pages) + contexts_snapshot = ( + _contexts_snapshot(compile_ctx.app) if compile_ctx.stateful_routes else None + ) pages_data: dict[str, Any] = {} for page in pages: @@ -165,19 +295,27 @@ def write_manifest( or page_ctx.output_path is None ): return # incomplete compile -> do not write a partial manifest + defined_states = compile_ctx.stateful_routes.get(page.route) pages_data[page.route] = _manifest_page_entry( page_ctx, page.component, state_index, hasher, - is_stateful=page.route in compile_ctx.stateful_routes, + is_stateful=defined_states is not None, + state_fingerprint=( + _contexts_fingerprint(defined_states, *contexts_snapshot) + if defined_states and contexts_snapshot is not None + else None + ), root=root, ) manifest = { "schema": _SCHEMA, "reflex_version": page_cache._reflex_version(), - "epoch": epoch, + # Per-input digests (not one combined sha) so a later mismatch can + # name the exact global input that changed. + "epoch_inputs": epoch_inputs, "all_imports": _serialize_imports(install_imports), "pages": pages_data, } @@ -188,35 +326,54 @@ def write_manifest( console.debug(f"disk compile cache: manifest write skipped ({exc!r})") -def globals_match( +def globals_mismatch( manifest: dict[str, Any], *, routes: set[str], - epoch: str, -) -> bool: - """Whether the manifest's genuinely-global inputs match the current compile. + root: Path | None = None, +) -> str | None: + """Explain why the manifest's global inputs don't match, or None if they do. The fast rebuild needs the route set unchanged (adding/removing a route - changes the shared nav on every page) and the global epoch unchanged (Reflex + changes the shared nav on every page) and the global inputs unchanged (Reflex version + config/lockfiles + the app-level config files: the entrypoint and the theme/app-wrap/stylesheet modules it imports, which configure the app-wide files reused on disk). Everything else is decided per page via its dependency set, so a shared-component or markdown edit no longer blocks the fast path. Only the pages that depend on the changed file miss. + Global inputs are validated by re-hashing the manifest's *stored* input set + (see :func:`page_cache.changed_epoch_inputs`), never by recomputing the set + in this process — set membership is only decided when a full compile writes + the manifest. + Args: manifest: The loaded manifest. routes: The current set of page routes. - epoch: The current global epoch (see :func:`page_cache.global_epoch`). + root: Project root the stored inputs resolve against (also used to + shorten paths in the reason). Defaults to cwd. Returns: - True if the global inputs match. + A human-readable mismatch reason, or None when the global inputs match. """ - return ( - manifest.get("reflex_version") == page_cache._reflex_version() - and set(manifest.get("pages", {})) == routes - and manifest.get("epoch") == epoch - ) + old_version = manifest.get("reflex_version") + if old_version != page_cache._reflex_version(): + return ( + f"reflex version changed ({old_version} -> {page_cache._reflex_version()})" + ) + old_routes = set(manifest.get("pages", {})) + if old_routes != routes: + parts = [] + if added := routes - old_routes: + parts.append(f"added {format_path_list(added)}") + if removed := old_routes - routes: + parts.append(f"removed {format_path_list(removed)}") + return f"route set changed ({'; '.join(parts)})" + root = (root or Path.cwd()).resolve() + if stale := page_cache.changed_epoch_inputs(manifest.get("epoch_inputs", {}), root): + labels = {label.removeprefix("app:") for label in stale} + return f"global input(s) changed: {format_path_list(labels, root)}" + return None def partition_pages( @@ -378,11 +535,11 @@ def try_incremental_rebuild( unsafe to reuse: no/old manifest, a changed global input, a route change, or a miss page that altered its app-wrap set or stateful flag. - App-wide outputs that per-page dependency sets do not cover are always - re-emitted rather than tracked: the contexts file (state defaults and - client-storage config change with any state module) and the ``assets`` - copy (assets are excluded from dependency tracking). Both are cheap and - idempotent. + The ``assets`` copy is excluded from dependency tracking and always re-run + (cheap, idempotent). The contexts file is rewritten only when a stateful + page missed — and then only after evaluating the stateful hit pages, so the + state registry it is compiled from is complete (it must keep the states + that only a page's evaluation registers, e.g. exec'd docs demos). On success, reports (at info level) how many pages were recompiled vs reused and, while recompiling, shows a progress bar over the changed pages so a hot @@ -401,19 +558,33 @@ def try_incremental_rebuild( """ manifest = load_manifest() if manifest is None: + _log_fallback( + "no reusable manifest (first compile, unreadable, or schema changed)" + ) return False pages = list(app._unevaluated_pages.values()) routes = {p.route for p in pages} hasher = page_cache.make_hasher() - epoch = page_cache.global_epoch(root, pages=pages) - if not globals_match(manifest, routes=routes, epoch=epoch): + if (reason := globals_mismatch(manifest, routes=routes, root=root)) is not None: + _log_fallback(reason) return False + resolved_root = (root or Path.cwd()).resolve() miss_pages = partition_pages(pages, manifest, hasher) + changed_files: set[str] = set() if miss_pages: + # Nearly free: partition_pages already hashed every dependency file + # into the memoized hasher. + changed_files = _changed_dependency_files(manifest, hasher) miss_pages = _with_module_siblings(miss_pages, pages, manifest) + console.info( + f"Compile cache: recompiling {len(miss_pages)}/{len(pages)} pages; " + f"changed file(s): {format_path_list(changed_files, resolved_root)}" + ) + else: + console.info(f"Compile cache: reusing all {len(pages)} pages from disk") miss_routes = {p.route for p in miss_pages} # Recompile only the source-changed pages. @@ -464,14 +635,17 @@ def try_incremental_rebuild( or page_ctx.output_code is None or page_ctx.output_path is None ): + _log_fallback(f"page {page.route!r} produced no output") return False entry = manifest["pages"][page.route] if ( _wrap_key_strs(page_ctx.app_wrap_components.keys()) != entry["app_wrap_keys"] ): + _log_fallback(f"page {page.route!r} changed its app-wrap set") return False if (page.route in miss_ctx.stateful_routes) != entry["is_stateful"]: + _log_fallback(f"page {page.route!r} changed statefulness") return False from reflex.compiler import compiler @@ -486,6 +660,7 @@ def try_incremental_rebuild( output_path = page_ctx.output_path output_code = page_ctx.output_code if output_path is None or output_code is None: + _log_fallback(f"page {page.route!r} lost its output before write") return False compiler.utils.write_file( compiler.utils.resolve_path_of_web_dir(output_path), @@ -497,9 +672,7 @@ def try_incremental_rebuild( # with the complete definition set (all recompiled pages' contributions # plus the user memos sharing those files or whose module changed). memo_files, memo_imports = compiler.compile_memo_components( - _complete_memo_defs( - memo_contributions, _changed_dependency_files(manifest, hasher) - ) + _complete_memo_defs(memo_contributions, changed_files) ) for mpath, mcode in memo_files: compiler.utils.write_file( @@ -527,24 +700,58 @@ def try_incremental_rebuild( app._add_optional_endpoints() app._validate_var_dependencies() - # App-wide outputs that are cheap to re-emit and not gated by the epoch: - # contexts (state defaults/client-storage change with any state module, - # which per-page dependency sets do not force a regenerate of) and the - # assets copy (assets are excluded from dependency tracking entirely). - from reflex_components_radix.plugin import RadixThemesPlugin - - theme = next( - ( - plugin.get_theme() - for plugin in compiler_plugins - if isinstance(plugin, RadixThemesPlugin) - ), - None, - ) - context_path, context_code = compiler.compile_contexts(app._state, theme) - compiler.utils.write_file( - compiler.utils.resolve_path_of_web_dir(context_path), context_code - ) + # The contexts file holds EVERY state's defaults/dispatchers, including + # states only registered while their page evaluates (exec'd docs demos, + # dynamically imported modules). This process evaluated just the miss + # pages, so its registry is incomplete; rewriting contexts from it would + # drop the hit pages' states and break the frontend's dispatch map. Only a + # stateful miss can change state config — and only its OWN states can + # differ from the on-disk contexts file, so first fingerprint those + # against the manifest: a content-only edit leaves them identical and the + # contexts file is reused untouched. Otherwise evaluate the stateful hit + # pages (so the registry is complete) and rewrite contexts. + contexts_snapshot: tuple[dict[str, Any], dict[str, Any]] | None = None + if miss_ctx is not None and miss_ctx.stateful_routes: + contexts_snapshot = _contexts_snapshot(app) + changed_route = _changed_state_config_route( + manifest, miss_ctx, contexts_snapshot + ) + if changed_route is None: + console.info( + "Compile cache: recompiled pages define unchanged states; " + "reusing contexts file" + ) + else: + console.info( + f"Compile cache: page {changed_route!r} changed its state " + "config; rebuilding contexts" + ) + stateful_hits = [ + route + for route, entry in manifest["pages"].items() + if entry["is_stateful"] and route not in miss_routes + ] + with console.timing("Evaluate stateful hit pages (contexts)"): + for route in stateful_hits: + app._compile_page(route, save_page=False) + + from reflex_components_radix.plugin import RadixThemesPlugin + + theme = next( + ( + plugin.get_theme() + for plugin in compiler_plugins + if isinstance(plugin, RadixThemesPlugin) + ), + None, + ) + context_path, context_code = compiler.compile_contexts(app._state, theme) + compiler.utils.write_file( + compiler.utils.resolve_path_of_web_dir(context_path), context_code + ) + + # The assets copy is cheap, idempotent, and excluded from dependency + # tracking entirely, so it is always re-run. assets_src = (root or Path.cwd()) / constants.Dirs.APP_ASSETS if assets_src.is_dir(): path_ops.update_directory_tree( @@ -559,9 +766,17 @@ def try_incremental_rebuild( app._get_frontend_packages(install_imports) frontend_skeleton.update_react_router_config(prerender_routes=prerender_routes) frontend_skeleton.update_entry_client() + frontend_skeleton.initialize_vite_config() # Refresh the manifest for the next process. - _update_manifest_for_misses(manifest, miss_ctx, miss_pages, install_imports, root) + _update_manifest_for_misses( + manifest, + miss_ctx, + miss_pages, + install_imports, + root, + contexts_snapshot=contexts_snapshot, + ) return True @@ -572,6 +787,8 @@ def _update_manifest_for_misses( miss_pages: Sequence[PageDefinition], all_imports: ParsedImportDict, root: Path | None = None, + *, + contexts_snapshot: tuple[dict[str, Any], dict[str, Any]] | None = None, ) -> None: """Update the on-disk manifest entries for the recompiled pages. @@ -581,6 +798,8 @@ def _update_manifest_for_misses( miss_pages: The recompiled page definitions. all_imports: The complete frontend import set after recompiling misses. root: Project root for dependency discovery. Defaults to cwd. + contexts_snapshot: The app's ``_contexts_snapshot`` for fingerprinting + stateful pages, or None when no miss page was stateful. """ if miss_ctx is None or not miss_pages: return @@ -589,12 +808,18 @@ def _update_manifest_for_misses( hasher = page_cache.make_hasher() for page in miss_pages: page_ctx = miss_ctx.compiled_pages[page.route] + defined_states = miss_ctx.stateful_routes.get(page.route) manifest["pages"][page.route] = _manifest_page_entry( page_ctx, page.component, state_index, hasher, - is_stateful=page.route in miss_ctx.stateful_routes, + is_stateful=defined_states is not None, + state_fingerprint=( + _contexts_fingerprint(defined_states, *contexts_snapshot) + if defined_states and contexts_snapshot is not None + else None + ), root=root, ) manifest["all_imports"] = _serialize_imports(all_imports) diff --git a/reflex/compiler/page_cache.py b/reflex/compiler/page_cache.py index 38bcbb2b798..22cbde24da1 100644 --- a/reflex/compiler/page_cache.py +++ b/reflex/compiler/page_cache.py @@ -418,10 +418,10 @@ def _app_entrypoint_file(root: Path | None = None) -> Path | None: return rf if root in rf.parents else None -def global_epoch( +def global_epoch_inputs( root: Path | None = None, *, pages: Sequence[object] | None = None -) -> str: - """Fingerprint the genuinely-global inputs. +) -> dict[str, str]: + """Fingerprint each genuinely-global input individually. These can affect every page's output but belong to no single page, so they gate the whole cache rather than any one page's dependency set: the Reflex @@ -430,30 +430,91 @@ def global_epoch( stylesheets, head components; see :func:`app_dependency_files`). Kept small on purpose; per-file edits flow through per-page dependency sets instead. + Keeping each input's digest separate (rather than one combined sha) lets a + mismatch report *which* global input changed. + Args: root: Project root. Defaults to cwd. pages: The current page definitions, used as barriers so page modules (tracked per page) are excluded from the app-level config files. Returns: - A hex digest of the global inputs. + A mapping of input label (``reflex``, a global filename, or + ``app:<path>``) to its content digest (``<absent>`` if unreadable). """ root = (root or Path.cwd()).resolve() - parts: list[str] = [f"reflex={_reflex_version()}"] + inputs: dict[str, str] = {"reflex": _reflex_version()} for name in _GLOBAL_FILES: path = root / name try: - parts.append(f"{name}={hashlib.sha256(path.read_bytes()).hexdigest()}") + inputs[name] = hashlib.sha256(path.read_bytes()).hexdigest() except OSError: - parts.append(f"{name}=<absent>") - # Sorted for a deterministic digest regardless of set iteration order. - for path_str in sorted(app_dependency_files(pages, root)): + inputs[name] = "<absent>" + for path_str in app_dependency_files(pages, root): try: digest = hashlib.sha256(Path(path_str).read_bytes()).hexdigest() except OSError: digest = "<absent>" - parts.append(f"app:{path_str}={digest}") - return _sha(*parts) + inputs[f"app:{path_str}"] = digest + return inputs + + +def global_epoch( + root: Path | None = None, *, pages: Sequence[object] | None = None +) -> str: + """Fingerprint the genuinely-global inputs as one digest. + + Args: + root: Project root. Defaults to cwd. + pages: The current page definitions (see :func:`global_epoch_inputs`). + + Returns: + A hex digest of the global inputs. + """ + # Sorted for a deterministic digest regardless of dict insertion order. + return _sha( + *(f"{k}={v}" for k, v in sorted(global_epoch_inputs(root, pages=pages).items())) + ) + + +def changed_epoch_inputs(stored: dict[str, str], root: Path | None = None) -> set[str]: + """Return the labels of stored global inputs whose current content differs. + + Validates against the *stored* input set (like :func:`deps_unchanged` does + for page deps): membership is decided once, when the manifest is written by + a full compile; checking only re-hashes those inputs. Recomputing the set + via :func:`global_epoch_inputs` at check time is wrong — it depends on what + the current process happened to read/import during app import, which + differs between a cold compile and a warm forked reload (unpurged module + caches skip re-reads), spuriously invalidating every hot reload. + + Args: + stored: The manifest's ``{label: digest}`` global-input map + (see :func:`global_epoch_inputs` for the label forms). + root: Project root the plain-filename labels resolve against. + Defaults to cwd. + + Returns: + The labels whose content digest no longer matches. + """ + root = (root or Path.cwd()).resolve() + changed: set[str] = set() + for label, digest in stored.items(): + if label == "reflex": + current = _reflex_version() + else: + path = ( + Path(label.removeprefix("app:")) + if label.startswith("app:") + else root / label + ) + try: + current = hashlib.sha256(path.read_bytes()).hexdigest() + except OSError: + current = "<absent>" + if current != digest: + changed.add(label) + return changed def _module_file(component: object) -> Path | None: diff --git a/reflex/utils/compile_daemon.py b/reflex/utils/compile_daemon.py index 8d1d20525db..177d273d6a1 100644 --- a/reflex/utils/compile_daemon.py +++ b/reflex/utils/compile_daemon.py @@ -265,10 +265,18 @@ def _first_party_module_names(roots: list[Path]) -> set[str]: def _reset_first_party(roots: list[Path]) -> None: """Make this interpreter clean w.r.t. first-party code before re-importing. - Purges the user's first-party modules from ``sys.modules`` and clears the + Purges the user's first-party modules from ``sys.modules`` and resets the cross-module registries/caches that would otherwise pin old class objects. Third-party modules are left imported and warm. + The state registry is reset surgically, not blanket-cleared: a class body + in a module that survives the purge (framework internals, installed or + workspace packages) never re-executes in this process, so clearing its + registration would lose the state from the app's state tree — and from the + compiled contexts file — permanently. Those registrations are kept; states + from purged modules re-register on re-import, and runtime-created states in + ``reflex.istate.dynamic`` re-register when their page re-evaluates. + Args: roots: The resolved reload roots whose modules are first-party. """ @@ -281,11 +289,19 @@ def _reset_first_party(roots: list[Path]) -> None: from reflex_base.registry import RegistrationContext + import reflex.istate.dynamic as istate_dynamic from reflex.compiler import page_cache from reflex.page import DECORATED_PAGES from reflex.state import BaseState, all_base_state_classes ctx = RegistrationContext.ensure_context() + kept = [ + cls + for cls in ctx.base_states.values() + if (module_name := getattr(cls, "__module__", None)) is not None + and module_name != istate_dynamic.__name__ + and getattr(sys.modules.get(module_name), "__file__", None) is not None + ] ctx.base_states.clear() ctx.base_state_substates.clear() ctx.event_handlers.clear() @@ -298,6 +314,17 @@ def _reset_first_party(roots: list[Path]) -> None: BaseState.get_class_substate, ): cached.cache_clear() + # Locally-defined states are attached to ``reflex.istate.dynamic`` under + # collision-suffixed names; with the warm parent's attributes in place, + # every re-created state would drift to a new suffix and diverge from the + # names the (cold) backend computes. Reset the module so re-created states + # get their fresh-process names. + for attr in [name for name in vars(istate_dynamic) if not name.startswith("__")]: + delattr(istate_dynamic, attr) + # Original registration order, so parents always precede their children. + for cls in kept: + ctx._register_base_state(cls) + all_base_state_classes[cls.get_full_name()] = None DECORATED_PAGES.clear() # The import graph caches each module's parsed import edges; a changed file # may import differently now, so drop it to force a re-parse. Cross-compile @@ -496,6 +523,12 @@ def _serve() -> None: if not changed: continue + from reflex.compiler.disk_cache import format_path_list + + console.info( + f"Compile daemon: change detected in {format_path_list(map(str, changed), root)}" + ) + # A change to a genuinely-global input (rxconfig/lockfiles, or a reflex # upgrade) can't be applied to the warm parent (it imported the old # version); re-exec the daemon so the new world is actually loaded. diff --git a/reflex/utils/frontend_skeleton.py b/reflex/utils/frontend_skeleton.py index de9f29e7bbd..4f511703b9e 100644 --- a/reflex/utils/frontend_skeleton.py +++ b/reflex/utils/frontend_skeleton.py @@ -547,8 +547,10 @@ def _compile_vite_config(config: Config): def initialize_vite_config(): """Render and write in .web the vite.config.js file using Reflex config.""" - vite_config_file_path = get_web_dir() / constants.ReactRouter.VITE_CONFIG_FILE - vite_config_file_path.write_text(_compile_vite_config(get_config())) + write_file( + get_web_dir() / constants.ReactRouter.VITE_CONFIG_FILE, + _compile_vite_config(get_config()), + ) def initialize_bun_config(): diff --git a/tests/units/compiler/test_disk_cache.py b/tests/units/compiler/test_disk_cache.py index fb62ad25b2b..320d364e4b2 100644 --- a/tests/units/compiler/test_disk_cache.py +++ b/tests/units/compiler/test_disk_cache.py @@ -1,6 +1,8 @@ """Tests for the experimental disk-persisted incremental compile cache.""" import dataclasses +import hashlib +import itertools import json from collections.abc import Callable, Sequence from pathlib import Path @@ -64,8 +66,9 @@ def _page_c() -> Component: return rx.el.div(rx.el.h1("Page C"), _footer()) -def _compile(pages: Sequence[Any]) -> CompileContext: +def _compile(pages: Sequence[Any], app: Any = None) -> CompileContext: ctx = CompileContext( + app=app, pages=pages, hooks=CompilerHooks(plugins=default_page_plugins()), ) @@ -74,6 +77,28 @@ def _compile(pages: Sequence[Any]) -> CompileContext: return ctx +def _unregister_state(cls: type[rx.State]) -> None: + """Drop a state class from the registries, like the daemon registry reset. + + Lets a page evaluation re-define the same-named class in this process, + mirroring what happens in the fork child between hot reloads. + + Args: + cls: The state class to unregister. + """ + from reflex_base.registry import RegistrationContext + + from reflex.state import all_base_state_classes + + ctx = RegistrationContext.ensure_context() + full_name = cls.get_full_name() + ctx.base_states.pop(full_name, None) + parent = cls.get_parent_state() + if parent is not None: + ctx.base_state_substates.get(parent.get_full_name(), set()).discard(cls) + all_base_state_classes.pop(full_name, None) + + def test_imports_round_trip(): imports = { "react": [ImportVar("useEffect"), ImportVar("Fragment", is_default=False)], @@ -96,7 +121,7 @@ def _manifest(pages: dict[str, dict], **overrides) -> dict: base = { "schema": disk_cache._SCHEMA, "reflex_version": page_cache._reflex_version(), - "epoch": "EPOCH", + "epoch_inputs": {}, "all_imports": {}, "pages": pages, } @@ -104,18 +129,63 @@ def _manifest(pages: dict[str, dict], **overrides) -> dict: return base -def test_globals_match(): - m = _manifest({"/a": {}, "/b": {}}) +def test_globals_mismatch_names_the_changed_input(tmp_path): + m = _manifest({"/a": {}, "/b": {}}, epoch_inputs={"rxconfig.py": "<absent>"}) routes = {"/a", "/b"} - assert disk_cache.globals_match(m, routes=routes, epoch="EPOCH") - # a changed route set -> no match - assert not disk_cache.globals_match(m, routes={"/a"}, epoch="EPOCH") - # a changed global epoch -> no match - assert not disk_cache.globals_match(m, routes=routes, epoch="OTHER") - # a stale reflex version -> no match - assert not disk_cache.globals_match( - {**m, "reflex_version": "0.0.0-old"}, routes=routes, epoch="EPOCH" + assert disk_cache.globals_mismatch(m, routes=routes, root=tmp_path) is None + # a changed route set -> named added/removed routes + reason = disk_cache.globals_mismatch(m, routes={"/a", "/c"}, root=tmp_path) + assert reason is not None + assert "/c" in reason + assert "/b" in reason + # a stale reflex version -> named versions + reason = disk_cache.globals_mismatch( + {**m, "reflex_version": "0.0.0-old"}, routes=routes, root=tmp_path ) + assert reason is not None + assert "0.0.0-old" in reason + + +def test_globals_mismatch_validates_stored_inputs_only(tmp_path): + """Epoch validation re-hashes the *stored* input set, never a recomputed one. + + ``app_dependency_files`` depends on what the current process happened to + read/import during app import, which differs between a cold compile and a + warm forked reload (non-purged module caches skip re-reads). Comparing a + recomputed set against the stored one therefore mismatched on every hot + reload; only stored inputs whose *content* changed may invalidate. + """ + theme = tmp_path / "theme_config.py" + theme.write_text("PRIMARY = 'red'") + stored = { + "reflex": page_cache._reflex_version(), + # global files absent from this root at write time and still absent + "rxconfig.py": "<absent>", + f"app:{theme}": hashlib.sha256(theme.read_bytes()).hexdigest(), + } + m = _manifest({"/a": {}}, epoch_inputs=stored) + # nothing on disk changed -> match, regardless of what a re-recorded + # app-import read set would look like in this process + assert disk_cache.globals_mismatch(m, routes={"/a"}, root=tmp_path) is None + # a stored input's content changed -> mismatch naming that file + theme.write_text("PRIMARY = 'blue'") + reason = disk_cache.globals_mismatch(m, routes={"/a"}, root=tmp_path) + assert reason is not None + assert "theme_config.py" in reason + # a stored global file appearing counts as a change too + theme.write_text("PRIMARY = 'red'") + (tmp_path / "rxconfig.py").write_text("import reflex") + reason = disk_cache.globals_mismatch(m, routes={"/a"}, root=tmp_path) + assert reason is not None + assert "rxconfig.py" in reason + + +def test_format_path_list_relativizes_and_truncates(): + root = Path("/proj") + assert disk_cache.format_path_list({"/proj/a.py", "other"}, root) == "a.py, other" + many = {f"/proj/{i}.py" for i in range(8)} + out = disk_cache.format_path_list(many, root, limit=3) + assert out == "0.py, 1.py, 2.py (+5 more)" def test_partition_pages_detects_changed_source(): @@ -156,10 +226,12 @@ def test_write_and_load_manifest(tmp_path, monkeypatch): "dep_hashes", "app_wrap_keys", "is_stateful", + "state_fingerprint", "has_memos", } # these static pages register no new state and contribute no memos assert entry["is_stateful"] is False + assert entry["state_fingerprint"] is None assert entry["has_memos"] is False # rendered output is never persisted (it already lives in .web, and is # never read back from the manifest) -> keeps the manifest small @@ -227,6 +299,7 @@ def _stub_externals(app, monkeypatch): ) monkeypatch.setattr(fs, "update_react_router_config", lambda **k: None) monkeypatch.setattr(fs, "update_entry_client", lambda *a, **k: None) + monkeypatch.setattr(fs, "initialize_vite_config", lambda: None) def test_incremental_rebuild_all_hits(tmp_path, monkeypatch): @@ -334,10 +407,17 @@ def test_stateful_hit_is_marked_but_not_reevaluated(tmp_path, monkeypatch): assert route in app._stateful_pages -def test_incremental_rebuild_regenerates_contexts(tmp_path, monkeypatch): - """State defaults/client-storage are baked into the contexts file and a - state-module edit never bumps the epoch, so the incremental path must - always re-emit it. +def test_incremental_rebuild_preserves_contexts_without_stateful_miss( + tmp_path, monkeypatch +): + """The contexts file is reused unless a stateful page missed. + + The full compile's contexts file holds every state's defaults, including + states only registered while their page evaluates (exec'd demos, dynamic + imports). An incremental process that evaluated no stateful pages has an + incomplete registry; rewriting contexts from it drops those states and the + frontend's dispatch map with them (``dispatch is not a function``). With no + stateful miss, no state changed -> the on-disk file must stay untouched. """ from reflex.compiler import utils as compiler_utils @@ -350,18 +430,211 @@ def test_incremental_rebuild_regenerates_contexts(tmp_path, monkeypatch): disk_cache.write_manifest(ctx, pages, ctx.all_imports, root=tmp_path) _stub_externals(app, monkeypatch) + full_contexts = "// complete contexts from the full compile" + out_path = compiler_utils.resolve_path_of_web_dir(compiler_utils.get_context_path()) + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text(full_contexts, encoding="utf-8") + assert ( disk_cache.try_incremental_rebuild( app, compiler_plugins=[], prerender_routes=False, root=tmp_path ) is True ) + assert out_path.read_text(encoding="utf-8") == full_contexts + + +_state_counter = itertools.count() + + +def _page_s() -> Component: + """A page that defines its state during evaluation (like exec'd docs demos). + + Each evaluation defines a fresh uniquely-named state class, so the page is + marked stateful and repeated evaluations in one test process don't collide. + + Returns: + The page component. + """ + name = f"_ContextsState{next(_state_counter)}" + state_cls: Any = type( + name, + (rx.State,), + {"__annotations__": {"value": str}, "value": "", "__module__": __name__}, + ) + return rx.el.div(rx.el.p(state_cls.value), _footer()) + +def test_stateful_miss_evaluates_stateful_hits_then_rewrites_contexts( + tmp_path, monkeypatch +): + """A stateful miss forces a contexts rewrite from a *complete* registry. + + The stateful hit pages must be evaluated first (registering their + evaluation-time states) so the rewritten contexts file keeps every state. + """ + from reflex.compiler import utils as compiler_utils + + web = _use_tmp_web_dir(tmp_path, monkeypatch) + + app = rx.App() + app.add_page(_page_s, route="/s") + app.add_page(_page_c, route="/c") + pages = list(app._unevaluated_pages.values()) + stateful_route, hit_route = pages[0].route, pages[1].route + ctx = _compile(pages) + assert stateful_route in ctx.stateful_routes + disk_cache.write_manifest(ctx, pages, ctx.all_imports, root=tmp_path) + _stub_externals(app, monkeypatch) + + # Make the stateful page a miss, and mark the hit page stateful so the + # rebuild must re-register its states before compiling contexts. + manifest_path = web / disk_cache._MANIFEST_FILE + manifest = json.loads(manifest_path.read_text()) + manifest["pages"][stateful_route]["dep_hashes"] = { + str(tmp_path / "view.py"): "stale-hash" + } + manifest["pages"][hit_route]["is_stateful"] = True + manifest_path.write_text(json.dumps(manifest)) + + reevaluated: list[str] = [] + monkeypatch.setattr( + app, "_compile_page", lambda route, **k: reevaluated.append(route) + ) + + assert ( + disk_cache.try_incremental_rebuild( + app, compiler_plugins=[], prerender_routes=False, root=tmp_path + ) + is True + ) + # Only the stateful HIT page is re-evaluated (the miss was compiled in the + # miss context); then contexts are rewritten from the complete registry. + assert reevaluated == [hit_route] out_path = compiler_utils.resolve_path_of_web_dir(compiler_utils.get_context_path()) - assert out_path.exists() assert out_path.read_text(encoding="utf-8") == _CONTEXTS_STUB +def test_contexts_fingerprint_sensitivity(): + initial = {"root.s_one": {"value": ""}, "root.s_two": {"count": 0}} + storage = { + "cookies": {"root.s_one.token_rx_field_": {"name": "t"}}, + "local_storage": {}, + "session_storage": {}, + } + base = disk_cache._contexts_fingerprint(["root.s_one"], initial, storage) + # Stable across equal inputs and insensitive to name order. + assert disk_cache._contexts_fingerprint(["root.s_one"], dict(initial), storage) == ( + base + ) + both = disk_cache._contexts_fingerprint( + ["root.s_one", "root.s_two"], initial, storage + ) + assert ( + disk_cache._contexts_fingerprint(["root.s_two", "root.s_one"], initial, storage) + == both + ) + # Sensitive to which states, their initial values, and their client storage. + assert disk_cache._contexts_fingerprint(["root.s_two"], initial, storage) != base + assert ( + disk_cache._contexts_fingerprint( + ["root.s_one"], {"root.s_one": {"value": "x"}}, storage + ) + != base + ) + assert ( + disk_cache._contexts_fingerprint( + ["root.s_one"], + initial, + {"cookies": {}, "local_storage": {}, "session_storage": {}}, + ) + != base + ) + + +_FP_HOLDER: dict[str, Any] = {} + + +def _page_fp() -> Component: + """A stateful page that defines the SAME state class on every evaluation. + + Mirrors a docs page whose exec'd demo code is unchanged between reloads. + The previous definition must be unregistered before re-evaluation (the + daemon's registry reset does this between hot reloads). + + Returns: + The page component. + """ + state_cls: Any = type( + "_FixedFpState", + (rx.State,), + {"__annotations__": {"value": str}, "value": "", "__module__": "fp_mod_x"}, + ) + _FP_HOLDER["cls"] = state_cls + return rx.el.div(rx.el.p(state_cls.value), _footer()) + + +def test_stateful_miss_with_unchanged_states_reuses_contexts(tmp_path, monkeypatch): + """A stateful miss whose states are unchanged must not rebuild contexts. + + Most content edits leave the page's evaluation-time states identical, so + re-evaluating every stateful hit page just to rewrite an identical contexts + file would waste nearly the whole hot reload. + """ + from reflex.compiler import utils as compiler_utils + + web = _use_tmp_web_dir(tmp_path, monkeypatch) + + app = rx.App() + app.add_page(_page_fp, route="/s") + app.add_page(_page_c, route="/c") + pages = list(app._unevaluated_pages.values()) + stateful_route, hit_route = pages[0].route, pages[1].route + ctx = _compile(pages, app=app) + assert stateful_route in ctx.stateful_routes + disk_cache.write_manifest(ctx, pages, ctx.all_imports, root=tmp_path) + _stub_externals(app, monkeypatch) + + manifest_path = web / disk_cache._MANIFEST_FILE + manifest = json.loads(manifest_path.read_text()) + stored_fp = manifest["pages"][stateful_route]["state_fingerprint"] + assert stored_fp # the compile recorded the page's state config + # Make the stateful page a miss; mark the hit page stateful so a contexts + # rebuild (if wrongly triggered) would have to re-evaluate it. + manifest["pages"][stateful_route]["dep_hashes"] = { + str(tmp_path / "view.py"): "stale-hash" + } + manifest["pages"][hit_route]["is_stateful"] = True + manifest_path.write_text(json.dumps(manifest)) + + # The daemon child resets the registry before recompiling; drop the class + # so the page's re-evaluation re-defines it identically. + _unregister_state(_FP_HOLDER.pop("cls")) + + reevaluated: list[str] = [] + monkeypatch.setattr( + app, "_compile_page", lambda route, **k: reevaluated.append(route) + ) + sentinel = "// pre-existing contexts" + out_path = compiler_utils.resolve_path_of_web_dir(compiler_utils.get_context_path()) + out_path.parent.mkdir(parents=True, exist_ok=True) + out_path.write_text(sentinel, encoding="utf-8") + + assert ( + disk_cache.try_incremental_rebuild( + app, compiler_plugins=[], prerender_routes=False, root=tmp_path + ) + is True + ) + # Unchanged state config: no hit re-evaluation, contexts file untouched. + assert reevaluated == [] + assert out_path.read_text(encoding="utf-8") == sentinel + # The refreshed manifest records the same fingerprint for the miss page. + refreshed = json.loads(manifest_path.read_text()) + assert refreshed["pages"][stateful_route]["state_fingerprint"] == stored_fp + _unregister_state(_FP_HOLDER.pop("cls")) + + def test_incremental_rebuild_copies_assets(tmp_path, monkeypatch): """An assets-only edit is an all-hit rebuild, so the incremental path must run the same assets -> public copy as the full compile. @@ -536,7 +809,7 @@ def test_update_manifest_for_misses_keeps_complete_imports(tmp_path, monkeypatch page_ctx = SimpleNamespace( app_wrap_components={}, frontend_imports={}, memo_contributions={} ) - miss_ctx = SimpleNamespace(compiled_pages={"/a": page_ctx}, stateful_routes=set()) + miss_ctx = SimpleNamespace(compiled_pages={"/a": page_ctx}, stateful_routes={}) complete_imports = {"memo-lib": [ImportVar("MemoThing")]} manifest = _manifest({ "/a": {"dep_hashes": {}, "app_wrap_keys": [], "is_stateful": False} diff --git a/tests/units/reflex_base/compiler/__init__.py b/tests/units/reflex_base/compiler/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/units/reflex_base/compiler/test_templates.py b/tests/units/reflex_base/compiler/test_templates.py new file mode 100644 index 00000000000..def7434a2d9 --- /dev/null +++ b/tests/units/reflex_base/compiler/test_templates.py @@ -0,0 +1,53 @@ +"""Tests for reflex_base.compiler.templates.""" + +import re + +from reflex_base.compiler import templates + +# The unloaded-route throw block exactly as shipped in the pinned +# @react-router/dev runtime (dist/static/refresh-utils.mjs, enqueueUpdate). +_REFRESH_UTILS_THROW = """\ + let imported = window.__reactRouterRouteModuleUpdates.get(route.id); + if (!imported) { + throw Error( + `[react-router:hmr] No module update found for route ${route.id}`, + ); + } + let routeModule = { +""" + + +def _render_vite_config() -> str: + return templates.vite_config_template( + base="/", + hmr=True, + force_full_reload=False, + experimental_hmr=False, + sourcemap=False, + ) + + +def test_vite_config_patches_react_router_hmr_runtime(): + """The generated vite config neutralizes react-router's unloaded-route throw. + + react-router's HMR client throws when an update batch includes a route the + browser hasn't loaded, and the throw aborts the batch before the update + queue is cleared — one edit to any not-currently-open page then poisons HMR + until a full page reload. The generated config must ship a plugin that + rewrites the served runtime to skip unloaded routes instead. + """ + config = _render_vite_config() + assert "patchReactRouterHmrRuntime()" in config + assert '"\\0virtual:react-router/hmr-runtime"' in config + + # The embedded regex must match the pinned runtime's throw block, and the + # replacement must drop the throw while keeping the update loop going. + regex_match = re.search(r"const unloadedRouteThrow = /(.+)/;", config) + assert regex_match is not None + js_regex = regex_match.group(1) + patched, n_subs = re.subn( + js_regex, "if (!imported) continue;", _REFRESH_UTILS_THROW + ) + assert n_subs == 1 + assert "throw" not in patched + assert "if (!imported) continue;" in patched diff --git a/tests/units/utils/test_compile_daemon.py b/tests/units/utils/test_compile_daemon.py index 93e3e75e38a..27de3b76db3 100644 --- a/tests/units/utils/test_compile_daemon.py +++ b/tests/units/utils/test_compile_daemon.py @@ -4,10 +4,17 @@ import pytest +import reflex as rx from reflex.compiler import disk_cache from reflex.utils import compile_daemon +class _SurvivingModuleState(rx.State): + """Module-level state, like one from a non-purged installed package.""" + + b: int = 0 + + def test_iter_source_files_picks_content_skips_build_dirs(tmp_path): (tmp_path / "page.py").write_text("x = 1\n") (tmp_path / "doc.md").write_text("# doc\n") @@ -168,6 +175,96 @@ class DaemonResetUser(rx.Model, table=True): assert out == b"1" +@pytest.mark.skipif(not hasattr(os, "fork"), reason="requires os.fork (POSIX)") +def test_reset_first_party_keeps_surviving_module_states(tmp_path): + """States from modules that survive the purge stay registered. + + A class body in a non-purged module (framework internals, installed or + workspace packages like reflex_site_shared) never re-executes in the child, + so dropping its registration loses the state from the app's state tree — + and from the compiled contexts file — breaking the frontend's dispatch map. + Purged-module states and runtime-created ``reflex.istate.dynamic`` states + must still be dropped (they re-register on re-import/re-evaluation), and + the dynamic module's attributes must be reset so re-created local states + get their deterministic fresh-process names, matching the cold backend. + """ + mod_file = tmp_path / "purged_state_mod.py" + mod_file.write_text( + "import reflex as rx\n\nclass PurgedState(rx.State):\n a: int = 0\n" + ) + + read_fd, write_fd = os.pipe() + pid = os.fork() + if pid == 0: # child + os.close(read_fd) + result = b"E" + try: + import importlib.util + import sys + + import reflex.istate.dynamic as istate_dynamic + from reflex.state import State, all_base_state_classes + + spec = importlib.util.spec_from_file_location("purged_state_mod", mod_file) + assert spec is not None + module = importlib.util.module_from_spec(spec) + spec.loader.exec_module(module) # type: ignore[union-attr] + sys.modules["purged_state_mod"] = module + + def _local_state() -> type[rx.State]: + class LocalState(rx.State): + c: int = 0 + + return LocalState + + local_cls = _local_state() + assert local_cls.__module__ == istate_dynamic.__name__ + local_name = local_cls.__name__ + + compile_daemon._reset_first_party([tmp_path.resolve()]) + + substate_names = {c.__name__ for c in State.get_substates()} + surviving_kept = ( + "_SurvivingModuleState" in substate_names + and _SurvivingModuleState.get_full_name() in all_base_state_classes + ) + purged_dropped = ( + "purged_state_mod" not in sys.modules + and "PurgedState" not in substate_names + ) + dynamic_dropped = local_name not in substate_names and not [ + n for n in vars(istate_dynamic) if not n.startswith("__") + ] + # A re-created local state gets its original (fresh-process) name, + # not a collision-suffixed drift. + redefined_deterministic = _local_state().__name__ == local_name + + result = ( + b"1" + if ( + surviving_kept + and purged_dropped + and dynamic_dropped + and redefined_deterministic + ) + else b"0" + ) + except Exception: + import traceback + + traceback.print_exc() + finally: + os.write(write_fd, result) + os.close(write_fd) + os._exit(0) + + os.close(write_fd) + out = os.read(read_fd, 1) + os.close(read_fd) + os.waitpid(pid, 0) + assert out == b"1" + + @pytest.mark.skipif(not hasattr(os, "fork"), reason="requires os.fork (POSIX)") def test_reset_first_party_purges_modules_and_registries(tmp_path): """``_reset_first_party`` purges first-party modules and clears registries. @@ -206,9 +303,11 @@ def test_reset_first_party_purges_modules_and_registries(tmp_path): compile_daemon._reset_first_party([tmp_path.resolve()]) purged = "fp_module_under_test" not in sys.modules + # The sentinel (a bare object, no surviving module) must be dropped; + # real states from surviving modules are kept (see the test above). cleared = ( - not RegistrationContext.ensure_context().base_states - and not all_base_state_classes + "sentinel" not in RegistrationContext.ensure_context().base_states + and "sentinel" not in all_base_state_classes and not DECORATED_PAGES ) result = b"1" if (purged and cleared) else b"0" From 5cb52c5cee5022279ed5b117776a9e6865b0cb55 Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Fri, 3 Jul 2026 18:03:09 +0500 Subject: [PATCH 17/18] perf(compiler): cut redundant work in the incremental compile path The hot paths of a warm rebuild did far more work than needed: - Collapse duplicate ImportVars when serializing the manifest and merge the miss pages' import sets once, instead of re-merging the ~100k-entry app-wide set per page. A full docs-app compile accumulates ~107k import entries of which only ~6k are unique. - Key the module-file cache by the raw __file__ value and check it before any Path construction, so every import statement under an active read recorder skips the resolve. Cleared when the recorder root changes. - Compare path strings in _under_roots instead of building a Path per ancestor per module, which dominated the daemon's reset phase on every hot reload. --- reflex/compiler/disk_cache.py | 23 +++++++++++++++--- reflex/compiler/page_cache.py | 30 ++++++++++++++---------- reflex/utils/compile_daemon.py | 13 +++++++++- tests/units/compiler/test_disk_cache.py | 25 ++++++++++++++++++-- tests/units/compiler/test_page_cache.py | 28 ++++++++++++++++++++++ tests/units/utils/test_compile_daemon.py | 12 ++++++++++ 6 files changed, 113 insertions(+), 18 deletions(-) diff --git a/reflex/compiler/disk_cache.py b/reflex/compiler/disk_cache.py index a1a966aba31..bf7b99baca0 100644 --- a/reflex/compiler/disk_cache.py +++ b/reflex/compiler/disk_cache.py @@ -77,16 +77,30 @@ def _log_fallback(reason: str) -> None: console.info(f"Compile cache: falling back to a full compile — {reason}") +_IMPORT_VAR_FIELDS = tuple(f.name for f in dataclasses.fields(ImportVar)) + + def _serialize_imports(imports: ParsedImportDict) -> dict[str, list[dict[str, Any]]]: """Serialize a parsed import dict to JSON-able primitives. + Duplicates are collapsed in first-seen order (a full docs-app compile + accumulates ~107k entries, ~6k unique): the manifest's import set only + feeds package installation and later merges, where only the unique set + matters, and duplicates bloat the manifest and every pass over it. + Args: imports: The parsed import dict to serialize. Returns: A JSON-serializable representation. """ - return {lib: [dataclasses.asdict(iv) for iv in ivs] for lib, ivs in imports.items()} + return { + lib: [ + {name: getattr(iv, name) for name in _IMPORT_VAR_FIELDS} + for iv in dict.fromkeys(ivs) + ] + for lib, ivs in imports.items() + } def _deserialize_imports(data: dict[str, list[dict[str, Any]]]) -> ParsedImportDict: @@ -654,6 +668,7 @@ def try_incremental_rebuild( install_imports = _deserialize_imports(manifest["all_imports"]) if miss_ctx is not None: memo_contributions: dict[tuple[str, str | None], Any] = {} + miss_imports = [] for page in miss_pages: page_ctx = miss_ctx.compiled_pages[page.route] # Both are guaranteed non-None by the guard loop above. @@ -667,7 +682,7 @@ def try_incremental_rebuild( output_code, ) memo_contributions.update(page_ctx.memo_contributions) - install_imports = merge_imports(install_imports, page_ctx.frontend_imports) + miss_imports.append(page_ctx.frontend_imports) # Memo output files are grouped per source module, so compile them once # with the complete definition set (all recompiled pages' contributions # plus the user memos sharing those files or whose module changed). @@ -678,7 +693,9 @@ def try_incremental_rebuild( compiler.utils.write_file( compiler.utils.resolve_path_of_web_dir(mpath), mcode ) - install_imports = merge_imports(install_imports, memo_imports) + # Merge once: re-merging the app-wide set per page re-walks its ~100k + # entries each time. + install_imports = merge_imports(install_imports, *miss_imports, memo_imports) # Record which routes are stateful: miss pages from this compile, hit pages # from the manifest, so the stateful-pages marker is complete. We do NOT diff --git a/reflex/compiler/page_cache.py b/reflex/compiler/page_cache.py index 22cbde24da1..0ad2e8cdeb7 100644 --- a/reflex/compiler/page_cache.py +++ b/reflex/compiler/page_cache.py @@ -86,7 +86,9 @@ def _reflex_version() -> str: for prefix in {sys.base_exec_prefix, sys.base_prefix, sys.exec_prefix, sys.prefix} if prefix ) -_module_file_cache: dict[tuple[Path, str], str | None] = {} +#: Maps a module's raw ``__file__`` value to its recordable resolved path (or +#: None). Cleared by ``enable_read_tracking`` when the recorder root changes. +_module_file_cache: dict[object, str | None] = {} _app_import_reads: dict[Path, set[str]] = {} @@ -157,6 +159,20 @@ def _recordable_module_file(file: object) -> str | None: The resolved module file path to record, or None when it is outside the project root or otherwise not recordable. """ + root = _recorder_root + if root is None: + return None + # Hot path: every import statement under an active recorder lands here, so + # look up the raw ``__file__`` value before any Path construction. Keying + # by the raw value is safe because ``enable_read_tracking`` clears the + # cache whenever the recorder root changes. + try: + return _module_file_cache[file] + except KeyError: + pass + except TypeError: + return None + with _suspend_tracking(): try: path = Path(file).absolute() # type: ignore[arg-type] @@ -164,16 +180,6 @@ def _recordable_module_file(file: object) -> str | None: return None resolved_str = None - root = _recorder_root - if root is None: - return None - - cache_key = (root, str(path)) - try: - return _module_file_cache[cache_key] - except KeyError: - pass - if not any(part in _EXCLUDE_PARTS for part in path.parts) and ( _is_inside(path, root) or not _is_python_install_file(path) ): @@ -184,7 +190,7 @@ def _recordable_module_file(file: object) -> str | None: else: if not any(part in _EXCLUDE_PARTS for part in resolved.parts): resolved_str = str(resolved) if _is_inside(resolved, root) else None - _module_file_cache[cache_key] = resolved_str + _module_file_cache[file] = resolved_str return resolved_str diff --git a/reflex/utils/compile_daemon.py b/reflex/utils/compile_daemon.py index 177d273d6a1..69d1ffa07ac 100644 --- a/reflex/utils/compile_daemon.py +++ b/reflex/utils/compile_daemon.py @@ -96,6 +96,10 @@ def _reload_roots() -> list[Path]: def _under_roots(path: Path, roots: list[Path]) -> bool: """Whether ``path`` is one of, or lives under, the reload roots. + Compares path strings: this runs for every loaded module on every hot + reload, and ``root in path.parents`` constructs a Path object per ancestor + per check, which dominated the reset phase of a reload. + Args: path: The resolved path to test. roots: The resolved reload roots. @@ -103,7 +107,14 @@ def _under_roots(path: Path, roots: list[Path]) -> bool: Returns: True if the path is covered by a reload root. """ - return any(path == root or root in path.parents for root in roots) + path_str = str(path) + for root in roots: + root_str = str(root) + if path_str == root_str or path_str.startswith( + root_str if root_str.endswith(os.sep) else root_str + os.sep + ): + return True + return False def _iter_source_files(root: Path): diff --git a/tests/units/compiler/test_disk_cache.py b/tests/units/compiler/test_disk_cache.py index 320d364e4b2..5a1b3cdec92 100644 --- a/tests/units/compiler/test_disk_cache.py +++ b/tests/units/compiler/test_disk_cache.py @@ -108,6 +108,25 @@ def test_imports_round_trip(): assert restored == imports +def test_serialize_imports_collapses_duplicates(): + """The manifest only needs the unique import set, in first-seen order. + + A full docs-app compile accumulates ~107k entries of which ~6k are unique; + storing duplicates bloats the manifest and every later merge over it. + """ + use_effect = ImportVar("useEffect") + fragment = ImportVar("Fragment", is_default=False) + imports = { + "react": [use_effect, fragment, use_effect, use_effect, fragment], + "@emotion/react": [ImportVar("jsx"), ImportVar("jsx")], + } + restored = disk_cache._deserialize_imports(disk_cache._serialize_imports(imports)) + assert restored == { + "react": [use_effect, fragment], + "@emotion/react": [ImportVar("jsx")], + } + + def test_wrap_key_strs_is_sorted_and_stable(): keys = [(200, "StrictMode"), (0, "AppWrap"), (45, "ColorMode")] assert disk_cache._wrap_key_strs(keys) == [ @@ -237,9 +256,11 @@ def test_write_and_load_manifest(tmp_path, monkeypatch): # never read back from the manifest) -> keeps the manifest small assert "output_code" not in entry assert "frontend_imports" not in entry - # the app-wide merged imports round-trip cleanly + # the app-wide merged imports round-trip cleanly (duplicates collapsed) restored = disk_cache._deserialize_imports(manifest["all_imports"]) - assert restored == ctx.all_imports + assert restored == { + lib: list(dict.fromkeys(ivs)) for lib, ivs in ctx.all_imports.items() + } def test_unchanged_pages_compile_identically(tmp_path, monkeypatch): diff --git a/tests/units/compiler/test_page_cache.py b/tests/units/compiler/test_page_cache.py index aa4a4578093..a954167a6bb 100644 --- a/tests/units/compiler/test_page_cache.py +++ b/tests/units/compiler/test_page_cache.py @@ -352,6 +352,34 @@ def test_record_reads_tracks_executed_builtin_import(tmp_path, monkeypatch): assert str(child.resolve()) in reads +def test_read_tracking_root_change_invalidates_module_cache(tmp_path, monkeypatch): + """Recording a module under one root must not leak into another root. + + The module-file cache is keyed by the raw ``__file__`` value (the hot path + for every import statement), so switching recorder roots relies on + ``enable_read_tracking`` clearing it. + """ + root_a = tmp_path / "app_a" + root_a.mkdir() + root_b = tmp_path / "app_b" + root_b.mkdir() + module_name = "root_change_runtime_dep" + module_file = _prepare_runtime_module(root_a, monkeypatch, module_name) + page_cache.enable_read_tracking(root=root_a) + + try: + with page_cache.record_reads() as reads_a: + importlib.import_module(module_name) + assert str(module_file.resolve()) in reads_a + + page_cache.enable_read_tracking(root=root_b) + with page_cache.record_reads() as reads_b: + importlib.import_module(module_name) + assert str(module_file.resolve()) not in reads_b + finally: + _forget_modules(module_name) + + def test_record_reads_tracks_path_open(tmp_path): """``Path.open`` calls ``io.open`` directly, bypassing the ``builtins.open`` patch, so it must be patched itself for data reads to be recorded. diff --git a/tests/units/utils/test_compile_daemon.py b/tests/units/utils/test_compile_daemon.py index 27de3b76db3..ea9d858b552 100644 --- a/tests/units/utils/test_compile_daemon.py +++ b/tests/units/utils/test_compile_daemon.py @@ -31,6 +31,18 @@ def test_iter_source_files_picks_content_skips_build_dirs(tmp_path): assert found == {"page.py", "doc.md", "guide.mdx"} +def test_under_roots_matches_only_real_ancestors(tmp_path): + root = tmp_path / "app" + roots = [root] + assert compile_daemon._under_roots(root, roots) + assert compile_daemon._under_roots(root / "pages" / "index.py", roots) + # A sibling sharing the root's string prefix is NOT under it. + assert not compile_daemon._under_roots(tmp_path / "app_extra" / "m.py", roots) + assert not compile_daemon._under_roots(tmp_path / "other.py", roots) + # The root's own parent is not under it either. + assert not compile_daemon._under_roots(tmp_path, roots) + + def test_external_dependency_files_includes_sibling_markdown(tmp_path, monkeypatch): """A page's markdown read from a sibling dir (outside the app root) is watched. From 6dbfacec09ce3ae71a4a9d232042b9893914dcf2 Mon Sep 17 00:00:00 2001 From: Farhan <www.mfarvirus@gmail.com> Date: Fri, 3 Jul 2026 19:18:25 +0500 Subject: [PATCH 18/18] test(compiler): scope contexts snapshot to the test file's states MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The incremental-cache tests fingerprint state config via _contexts_snapshot, which walks the whole root state tree — picking up unrelated (and sometimes broken) state classes registered by other test modules. Stub it with a snapshot limited to the states this file defines, and stub externals before write_manifest so the fingerprint it records uses the same scoped snapshot. --- tests/units/compiler/test_disk_cache.py | 39 ++++++++++++++++++++++++- 1 file changed, 38 insertions(+), 1 deletion(-) diff --git a/tests/units/compiler/test_disk_cache.py b/tests/units/compiler/test_disk_cache.py index 5a1b3cdec92..a4d5f596d26 100644 --- a/tests/units/compiler/test_disk_cache.py +++ b/tests/units/compiler/test_disk_cache.py @@ -303,6 +303,41 @@ def test_unchanged_pages_compile_identically(tmp_path, monkeypatch): _CONTEXTS_STUB = "// contexts stub" +_TEST_STATE_MODULES = (__name__, "fp_mod_x") + + +def _scoped_contexts_snapshot(app) -> tuple[dict[str, Any], dict[str, Any]] | None: + """A ``_contexts_snapshot`` limited to the states this test file defines. + + Same shape and serialization as the real snapshot, but each test state is + compiled standalone instead of walking the whole root state tree, which + would pick up unrelated (and sometimes broken) state classes collected + from other test modules. + + Args: + app: The app being compiled. + + Returns: + The (initial state, client storage) mappings for this file's states, + or None when the app has no state tree. + """ + if app is None or app._state is None: + return None + from reflex_base.registry import RegistrationContext + + from reflex.compiler import utils as compiler_utils + + initial: dict[str, Any] = {} + storage: dict[str, dict[str, Any]] = {} + ctx = RegistrationContext.ensure_context() + for cls in list(ctx.base_states.values()): + if cls.__module__ in _TEST_STATE_MODULES: + initial.update(compiler_utils.compile_state(cls)) + for kind, entries in compiler_utils.compile_client_storage(cls).items(): + storage.setdefault(kind, {}).update(entries) + return initial, storage + + def _stub_externals(app, monkeypatch): """Stub the side-effecting steps the fast path runs on a real app.""" import reflex.utils.frontend_skeleton as fs @@ -318,6 +353,7 @@ def _stub_externals(app, monkeypatch): "reflex.compiler.compiler.compile_contexts", lambda state, theme: (compiler_utils.get_context_path(), _CONTEXTS_STUB), ) + monkeypatch.setattr(disk_cache, "_contexts_snapshot", _scoped_contexts_snapshot) monkeypatch.setattr(fs, "update_react_router_config", lambda **k: None) monkeypatch.setattr(fs, "update_entry_client", lambda *a, **k: None) monkeypatch.setattr(fs, "initialize_vite_config", lambda: None) @@ -613,8 +649,9 @@ def test_stateful_miss_with_unchanged_states_reuses_contexts(tmp_path, monkeypat stateful_route, hit_route = pages[0].route, pages[1].route ctx = _compile(pages, app=app) assert stateful_route in ctx.stateful_routes - disk_cache.write_manifest(ctx, pages, ctx.all_imports, root=tmp_path) + # Stub before write_manifest: it fingerprints via the contexts snapshot. _stub_externals(app, monkeypatch) + disk_cache.write_manifest(ctx, pages, ctx.all_imports, root=tmp_path) manifest_path = web / disk_cache._MANIFEST_FILE manifest = json.loads(manifest_path.read_text())