From ad040eb6d1c0daf2b45a7d1505855bc9e14843c5 Mon Sep 17 00:00:00 2001 From: Naveen Suda Date: Thu, 23 Apr 2026 20:35:12 -0700 Subject: [PATCH] Cache flatc binary and schema extraction to fix 3x fbpkg export slowdown MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Summary: When running from a standalone PAR file (e.g. via fbpkg), the `flatc` binary used for XNNPACK flatbuffer serialization is extracted from the PAR zip archive on every invocation via `importlib.resources.as_file()`. For one of llama transformer XNNPack exports, this happened ~225 times (once per XNNPACK partition), adding ~8.5 seconds per extraction from the 3.4 GB PAR archive — a total of **~32 minutes** of pure I/O overhead. ## Changes ### `executorch/exir/_serialize/_flatbuffer.py` - Added `_get_flatc_path()` which caches the extracted `flatc` binary path using a module-level `contextlib.ExitStack`. The `ExitStack` keeps the `importlib.resources.as_file()` context manager alive for the process lifetime, preventing the temp file from being cleaned up between calls. - Simplified `_run_flatc()` to use the cached path directly. ### `executorch/backends/xnnpack/serialization/xnnpack_graph_serialize.py` - Added `_cached_schema_bytes` module-level cache for `schema.fbs` content, avoiding repeated `importlib.resources.read_bytes()` calls from the PAR archive. Differential Revision: D102214303 --- .../serialization/xnnpack_graph_serialize.py | 17 ++++-- exir/_serialize/_flatbuffer.py | 59 ++++++++++++------- 2 files changed, 48 insertions(+), 28 deletions(-) diff --git a/backends/xnnpack/serialization/xnnpack_graph_serialize.py b/backends/xnnpack/serialization/xnnpack_graph_serialize.py index e4e9a334692..c7fcc57c414 100644 --- a/backends/xnnpack/serialization/xnnpack_graph_serialize.py +++ b/backends/xnnpack/serialization/xnnpack_graph_serialize.py @@ -300,8 +300,11 @@ def pretty_print_xnngraph(xnnpack_graph_json: str, filename: Optional[str] = Non _delegate_instance_id = 0 +_cached_schema_bytes: Optional[bytes] = None + + def convert_to_flatbuffer(xnnpack_graph: XNNGraph) -> bytes: - global _delegate_instance_id + global _delegate_instance_id, _cached_schema_bytes sanity_check_xnngraph_dataclass(xnnpack_graph) xnnpack_graph_json = json.dumps(xnnpack_graph, cls=_DataclassEncoder) @@ -316,11 +319,13 @@ def convert_to_flatbuffer(xnnpack_graph: XNNGraph) -> bytes: with tempfile.TemporaryDirectory() as d: schema_path = os.path.join(d, "schema.fbs") with open(schema_path, "wb") as schema_file: - schema_file.write( - _resources.files(serialization_package) - .joinpath("schema.fbs") - .read_bytes() - ) + if _cached_schema_bytes is None: + _cached_schema_bytes = ( + _resources.files(serialization_package) + .joinpath("schema.fbs") + .read_bytes() + ) + schema_file.write(_cached_schema_bytes) json_path = os.path.join(d, "schema.json") with open(json_path, "wb") as json_file: json_file.write(xnnpack_graph_json.encode("ascii")) diff --git a/exir/_serialize/_flatbuffer.py b/exir/_serialize/_flatbuffer.py index e201e4d5743..6d5c43dfd8a 100644 --- a/exir/_serialize/_flatbuffer.py +++ b/exir/_serialize/_flatbuffer.py @@ -7,6 +7,7 @@ # pyre-strict +import contextlib import importlib.resources import os import re @@ -240,35 +241,49 @@ class _FlatbufferResult: # Name of an optional resource containing the `flatc` executable. _FLATC_RESOURCE_NAME: str = "flatbuffers-flatc" +# Cached flatc binary path. In PAR files, importlib.resources.as_file() +# extracts the binary to a temp file on each call. With 200+ XNNPACK +# partitions this adds ~30 min of overhead. Caching avoids re-extraction. +_flatc_cached_path: Optional[str] = None +_flatc_exit_stack: Optional[contextlib.ExitStack] = None -def _run_flatc(args: Sequence[str]) -> None: - """Runs the `flatc` command with the provided args. - If a resource matching _FLATC_RESOURCE_NAME exists, uses that executable. - Otherwise, expects the `flatc` tool to be available on the system path. - """ +def _get_flatc_path() -> str: + """Returns the path to the flatc executable, caching the result.""" + global _flatc_cached_path, _flatc_exit_stack + if _flatc_cached_path is not None: + return _flatc_cached_path + flatc_resource = importlib.resources.files(__package__).joinpath( _FLATC_RESOURCE_NAME ) if flatc_resource.is_file(): - # Use the provided flatc binary. - with importlib.resources.as_file(flatc_resource) as flatc_path: - # Ensure the binary has execute permissions (needed for PAR files) - try: - current_mode = flatc_path.stat().st_mode - if not (current_mode & stat.S_IXUSR): - flatc_path.chmod( - current_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH - ) - except OSError: - pass - subprocess.run([flatc_path] + list(args), check=True) + _flatc_exit_stack = contextlib.ExitStack() + flatc_path = _flatc_exit_stack.enter_context( + importlib.resources.as_file(flatc_resource) + ) + try: + current_mode = flatc_path.stat().st_mode + if not (current_mode & stat.S_IXUSR): + flatc_path.chmod( + current_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH + ) + except OSError: + pass + _flatc_cached_path = str(flatc_path) else: - # Expect the `flatc` tool to be on the system path or set as an env var. - flatc_path = os.getenv("FLATC_EXECUTABLE") - if not flatc_path: - flatc_path = "flatc" - subprocess.run([flatc_path] + list(args), check=True) + _flatc_cached_path = os.getenv("FLATC_EXECUTABLE", "flatc") + + return _flatc_cached_path + + +def _run_flatc(args: Sequence[str]) -> None: + """Runs the `flatc` command with the provided args. + + If a resource matching _FLATC_RESOURCE_NAME exists, uses that executable. + Otherwise, expects the `flatc` tool to be available on the system path. + """ + subprocess.run([_get_flatc_path()] + list(args), check=True) def _flatc_compile(output_dir: str, schema_path: str, json_path: str) -> None: