From c008426d19a147550607c497cf5fc136dd290e25 Mon Sep 17 00:00:00 2001 From: mariaKt Date: Fri, 17 Apr 2026 17:00:01 -0500 Subject: [PATCH 01/10] Added lazy (generator based) show_iter. show unchanged for now. --- pyk/src/pyk/kcfg/show.py | 53 ++++++++++++++++++++++++++++++++++++++- pyk/src/pyk/proof/show.py | 22 +++++++++++++++- 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/pyk/src/pyk/kcfg/show.py b/pyk/src/pyk/kcfg/show.py index 818e45107c..42e93fba76 100644 --- a/pyk/src/pyk/kcfg/show.py +++ b/pyk/src/pyk/kcfg/show.py @@ -25,7 +25,7 @@ from .kcfg import KCFG if TYPE_CHECKING: - from collections.abc import Iterable + from collections.abc import Iterable, Iterator from pathlib import Path from typing import Final @@ -368,6 +368,57 @@ def show( return res_lines + def show_iter( + self, + cfg: KCFG, + nodes: Iterable[NodeIdLike] = (), + node_deltas: Iterable[tuple[NodeIdLike, NodeIdLike]] = (), + to_module: bool = False, + minimize: bool = True, + omit_cells: Iterable[str] = (), + module_name: str | None = None, + ) -> Iterator[str]: + """Yield proof show output line-by-line, avoiding memory accumulation.""" + yield from self.pretty(cfg, minimize=minimize) + + nodes_printed = False + + for node_id in nodes: + nodes_printed = True + kast = cfg.node(node_id).cterm.kast + kast = KCFGShow.hide_cells(kast, omit_cells) + if minimize: + kast = minimize_term(kast) + yield '' + yield '' + yield f'Node {node_id}:' + yield '' + yield self.pretty_printer.print(kast) + yield '' + + for node_id_1, node_id_2 in node_deltas: + nodes_printed = True + config_1 = KCFGShow.simplify_config(cfg.node(node_id_1).cterm.config, omit_cells) + config_2 = KCFGShow.simplify_config(cfg.node(node_id_2).cterm.config, omit_cells) + config_delta = push_down_rewrites(KRewrite(config_1, config_2)) + if minimize: + config_delta = minimize_term(config_delta) + yield '' + yield '' + yield f'State Delta {node_id_1} => {node_id_2}:' + yield '' + yield self.pretty_printer.print(config_delta) + yield '' + + if not nodes_printed: + yield '' + yield '' + yield '' + + if to_module: + module = self.to_module(cfg, module_name, omit_cells=omit_cells) + yield self.pretty_printer.print(module) + def dot(self, kcfg: KCFG) -> Digraph: def _short_label(label: str) -> str: return '\n'.join( diff --git a/pyk/src/pyk/proof/show.py b/pyk/src/pyk/proof/show.py index 9483fa1599..db0d81ec75 100644 --- a/pyk/src/pyk/proof/show.py +++ b/pyk/src/pyk/proof/show.py @@ -7,7 +7,7 @@ from ..utils import ensure_dir_path if TYPE_CHECKING: - from collections.abc import Iterable + from collections.abc import Iterable, Iterator from pathlib import Path from typing import Final @@ -87,6 +87,26 @@ def show( ) return res_lines + def show_iter( + self, + proof: APRProof, + nodes: Iterable[NodeIdLike] = (), + node_deltas: Iterable[tuple[NodeIdLike, NodeIdLike]] = (), + to_module: bool = False, + minimize: bool = True, + omit_cells: Iterable[str] = (), + ) -> Iterator[str]: + """Yield proof show output line-by-line, avoiding memory accumulation.""" + yield from self.kcfg_show.show_iter( + proof.kcfg, + nodes=nodes, + node_deltas=node_deltas, + to_module=to_module, + minimize=minimize, + omit_cells=omit_cells, + module_name=f'SUMMARY-{proof.id.upper().replace("_", "-")}', + ) + def dot(self, proof: APRProof) -> Digraph: graph = self.kcfg_show.dot(proof.kcfg) attrs = {'class': 'target', 'style': 'solid'} From cc8cb7e73df2b86dbd19991e9d58ccaa4e95452e Mon Sep 17 00:00:00 2001 From: mariaKt Date: Fri, 17 Apr 2026 18:45:39 -0500 Subject: [PATCH 02/10] show now use show_iter (materialize generator) --- pyk/src/pyk/kcfg/show.py | 51 +++++++-------------------------------- pyk/src/pyk/proof/show.py | 8 +++--- 2 files changed, 12 insertions(+), 47 deletions(-) diff --git a/pyk/src/pyk/kcfg/show.py b/pyk/src/pyk/kcfg/show.py index 42e93fba76..9344974b51 100644 --- a/pyk/src/pyk/kcfg/show.py +++ b/pyk/src/pyk/kcfg/show.py @@ -325,48 +325,15 @@ def show( omit_cells: Iterable[str] = (), module_name: str | None = None, ) -> list[str]: - res_lines: list[str] = [] - res_lines += self.pretty(cfg, minimize=minimize) - - nodes_printed = False - - for node_id in nodes: - nodes_printed = True - kast = cfg.node(node_id).cterm.kast - kast = KCFGShow.hide_cells(kast, omit_cells) - if minimize: - kast = minimize_term(kast) - res_lines.append('') - res_lines.append('') - res_lines.append(f'Node {node_id}:') - res_lines.append('') - res_lines.append(self.pretty_printer.print(kast)) - res_lines.append('') - - for node_id_1, node_id_2 in node_deltas: - nodes_printed = True - config_1 = KCFGShow.simplify_config(cfg.node(node_id_1).cterm.config, omit_cells) - config_2 = KCFGShow.simplify_config(cfg.node(node_id_2).cterm.config, omit_cells) - config_delta = push_down_rewrites(KRewrite(config_1, config_2)) - if minimize: - config_delta = minimize_term(config_delta) - res_lines.append('') - res_lines.append('') - res_lines.append(f'State Delta {node_id_1} => {node_id_2}:') - res_lines.append('') - res_lines.append(self.pretty_printer.print(config_delta)) - res_lines.append('') - - if not (nodes_printed): - res_lines.append('') - res_lines.append('') - res_lines.append('') - - if to_module: - module = self.to_module(cfg, module_name, omit_cells=omit_cells) - res_lines.append(self.pretty_printer.print(module)) - - return res_lines + return list(self.show_iter( + cfg, + nodes=nodes, + node_deltas=node_deltas, + to_module=to_module, + minimize=minimize, + omit_cells=omit_cells, + module_name=module_name, + )) def show_iter( self, diff --git a/pyk/src/pyk/proof/show.py b/pyk/src/pyk/proof/show.py index db0d81ec75..0485ff70c7 100644 --- a/pyk/src/pyk/proof/show.py +++ b/pyk/src/pyk/proof/show.py @@ -76,16 +76,14 @@ def show( minimize: bool = True, omit_cells: Iterable[str] = (), ) -> list[str]: - res_lines = self.kcfg_show.show( - proof.kcfg, + return list(self.show_iter( + proof, nodes=nodes, node_deltas=node_deltas, to_module=to_module, minimize=minimize, omit_cells=omit_cells, - module_name=f'SUMMARY-{proof.id.upper().replace("_", "-")}', - ) - return res_lines + )) def show_iter( self, From 0556f9044d051e60459cb9102c19f35172d4041b Mon Sep 17 00:00:00 2001 From: mariaKt Date: Fri, 17 Apr 2026 23:30:53 -0500 Subject: [PATCH 03/10] stubs for kcfg needed types --- pyk/src/pyk/kcfg/lazy.py | 150 ++++++++++++++++++++++++++++++++++ pyk/src/pyk/kcfg/lazy_load.py | 107 ++++++++++++++++++++++++ pyk/src/pyk/proof/show.py | 39 +++++++++ 3 files changed, 296 insertions(+) create mode 100644 pyk/src/pyk/kcfg/lazy.py create mode 100644 pyk/src/pyk/kcfg/lazy_load.py diff --git a/pyk/src/pyk/kcfg/lazy.py b/pyk/src/pyk/kcfg/lazy.py new file mode 100644 index 0000000000..90a8cca642 --- /dev/null +++ b/pyk/src/pyk/kcfg/lazy.py @@ -0,0 +1,150 @@ +"""Lazy loading stubs for memory-efficient proof display. + +These stubs duck-type the real KCFG classes, deferring heavy data loading +(node CTerms, cover/split CSubsts) until actually accessed for printing. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ..cterm import CSubst, CTerm + from ..kast.inner import KInner + from .kcfg import NodeAttr + + +class LazyNode: + """Duck-types KCFG.Node. Loads CTerm from disk on first .cterm access.""" + + id: int + attrs: frozenset + _node_path: Path + _cterm: CTerm | None + + def __init__(self, id: int, attrs: frozenset, node_path: Path) -> None: + self.id = id + self.attrs = attrs + self._node_path = node_path + self._cterm = None + + @property + def cterm(self) -> CTerm: + if self._cterm is None: + from ..cterm import CTerm + + node_dict = json.loads(self._node_path.read_text()) + self._cterm = CTerm.from_dict(node_dict['cterm']) + return self._cterm + + def evict(self) -> None: + """Release the loaded CTerm from memory.""" + self._cterm = None + + def __eq__(self, other: object) -> bool: + if isinstance(other, LazyNode): + return self.id == other.id + # Also compare with real KCFG.Node + return hasattr(other, 'id') and self.id == other.id + + def __hash__(self) -> int: + return hash(self.id) + + def __lt__(self, other: object) -> bool: + if hasattr(other, 'id'): + return self.id < other.id + return NotImplemented + + def __le__(self, other: object) -> bool: + if hasattr(other, 'id'): + return self.id <= other.id + return NotImplemented + + +class APRProofStub: + """Lightweight stub for APRProof — answers proof-level queries without loading the full proof. + + Duck-types enough of APRProof for APRProofNodePrinter.node_attrs() to work. + Uses proof.json metadata + the KCFG for graph queries. + """ + + def __init__(self, proof_dict: dict, kcfg: object) -> None: + self.init = int(proof_dict['init']) + self.target = int(proof_dict['target']) + self._terminal_ids = set(proof_dict.get('terminal') or []) + self._bounded_ids = set(proof_dict.get('bounded') or []) + self._refuted_ids = {int(k) for k in (proof_dict.get('node_refutations') or {}).keys()} + self.kcfg = kcfg + + def _resolve(self, node_id: int) -> int: + return node_id + + def is_init(self, node_id: int) -> bool: + return node_id == self.init + + def is_target(self, node_id: int) -> bool: + return node_id == self.target + + def is_terminal(self, node_id: int) -> bool: + return node_id in self._terminal_ids + + def is_explorable(self, node_id: int) -> bool: + return ( + self.kcfg.is_leaf(node_id) + and not self.is_terminal(node_id) + and not self.kcfg.is_stuck(node_id) + and not self.kcfg.is_vacuous(node_id) + ) + + def is_pending(self, node_id: int) -> bool: + return ( + self.is_explorable(node_id) + and not self.is_target(node_id) + and not self.is_refuted(node_id) + and not self.is_bounded(node_id) + ) + + def is_refuted(self, node_id: int) -> bool: + return node_id in self._refuted_ids + + def is_bounded(self, node_id: int) -> bool: + return node_id in self._bounded_ids + + +class LazyCSubst: + """Duck-types CSubst. Loads from a JSON dict on first access.""" + + _raw: dict | None + _csubst: CSubst | None + + def __init__(self, raw_dict: dict) -> None: + self._raw = raw_dict + self._csubst = None + + def _load(self) -> CSubst: + if self._csubst is None: + from ..cterm import CSubst + + self._csubst = CSubst.from_dict(self._raw) + self._raw = None # Release raw dict + return self._csubst + + @property + def constraints(self) -> tuple: + return self._load().constraints + + @property + def subst(self) -> object: + return self._load().subst + + def pred(self, *args, **kwargs) -> KInner: + return self._load().pred(*args, **kwargs) + + def to_dict(self) -> dict: + return self._load().to_dict() + + def evict(self) -> None: + """Release the loaded CSubst from memory, keep raw dict for reload.""" + self._csubst = None diff --git a/pyk/src/pyk/kcfg/lazy_load.py b/pyk/src/pyk/kcfg/lazy_load.py new file mode 100644 index 0000000000..318f815986 --- /dev/null +++ b/pyk/src/pyk/kcfg/lazy_load.py @@ -0,0 +1,107 @@ +"""Load a KCFG from disk with lazy node CTerms and cover CSubsts. + +Nodes are represented as LazyNode stubs that load CTerms on demand. +Cover CSubsts are wrapped as LazyCSubst to defer parsing. +Edges, splits, and ndbranches are constructed with the actual KCFG types +using stub nodes. +""" + +from __future__ import annotations + +import json +from pathlib import Path +from typing import TYPE_CHECKING + +from .kcfg import KCFG, KCFGNodeAttr +from .lazy import LazyCSubst, LazyNode + +if TYPE_CHECKING: + pass + + +def read_cfg_lazy(cfg_dir: Path) -> KCFG: + """Read a KCFG from disk without loading node CTerms or cover CSubsts eagerly. + + Returns a real KCFG object populated with LazyNode stubs and LazyCSubst wrappers. + The existing pretty_segments traversal works unchanged on this KCFG. + """ + from ..cterm import CSubst + + kcfg_json_path = cfg_dir / 'kcfg.json' + node_dir = cfg_dir / 'nodes' + + dct = json.loads(kcfg_json_path.read_text()) + + node_ids = dct.get('nodes') or [] + vacuous_ids = set(dct.get('vacuous') or []) + stuck_ids = set(dct.get('stuck') or []) + + # Build lazy nodes + nodes: dict[int, LazyNode] = {} + for nid in node_ids: + attrs = frozenset() + if nid in vacuous_ids: + attrs = frozenset([KCFGNodeAttr.VACUOUS]) + if nid in stuck_ids: + attrs = frozenset(attrs | {KCFGNodeAttr.STUCK}) + nodes[nid] = LazyNode(nid, attrs, node_dir / f'{nid}.json') + + # Build KCFG without optimize_memory (stubs aren't real KInner terms) + cfg = KCFG(optimize_memory=False) + + # Add nodes — KCFG.add_node expects KCFG.Node, but we pass LazyNode (duck typing) + for node in nodes.values(): + cfg._nodes[node.id] = node # type: ignore[assignment] + cfg._node_id = max(cfg._node_id, node.id + 1) + + cfg._node_id = dct.get('next', cfg._node_id) + + # Edges — lightweight (just depth + rules) + for edge_dict in dct.get('edges') or []: + src = nodes[edge_dict['source']] + tgt = nodes[edge_dict['target']] + edge = KCFG.Edge(src, tgt, edge_dict['depth'], tuple(edge_dict.get('rules') or [])) # type: ignore[arg-type] + cfg._edges[src.id] = edge + + # Merged edges + for me_dict in dct.get('merged_edges') or []: + src = nodes[me_dict['source']] + tgt = nodes[me_dict['target']] + sub_edges = tuple( + KCFG.Edge(nodes[e['source']], nodes[e['target']], e['depth'], tuple(e.get('rules') or [])) # type: ignore[arg-type] + for e in me_dict['edges'] + ) + merged = KCFG.MergedEdge(src, tgt, sub_edges) # type: ignore[arg-type] + cfg._merged_edges[src.id] = merged + + # Splits — CSubst per target loaded eagerly (32 MB total, acceptable) + for split_dict in dct.get('splits') or []: + src = nodes[split_dict['source']] + targets = [] + for t in split_dict['targets']: + tgt_node = nodes[t['target']] + csubst = CSubst.from_dict(t['csubst']) + targets.append((tgt_node, csubst)) + split = KCFG.Split(src, tuple(targets)) # type: ignore[arg-type] + cfg._splits[src.id] = split + + # Covers — CSubst wrapped as LazyCSubst (792 MB deferred) + for cover_dict in dct.get('covers') or []: + src = nodes[cover_dict['source']] + tgt = nodes[cover_dict['target']] + lazy_csubst = LazyCSubst(cover_dict['csubst']) + cover = KCFG.Cover(src, tgt, lazy_csubst) # type: ignore[arg-type] + cfg._covers[src.id] = cover + + # NDBranches + for nb_dict in dct.get('ndbranches') or []: + src = nodes[nb_dict['source']] + tgt_nodes = tuple(nodes[tid] for tid in nb_dict['targets']) + nb = KCFG.NDBranch(src, tgt_nodes, tuple(nb_dict.get('rules') or [])) # type: ignore[arg-type] + cfg._ndbranches[src.id] = nb + + # Aliases + for alias, node_id in dct.get('aliases', {}).items(): + cfg._aliases[alias] = node_id + + return cfg diff --git a/pyk/src/pyk/proof/show.py b/pyk/src/pyk/proof/show.py index 0485ff70c7..46ae45a415 100644 --- a/pyk/src/pyk/proof/show.py +++ b/pyk/src/pyk/proof/show.py @@ -105,6 +105,45 @@ def show_iter( module_name=f'SUMMARY-{proof.id.upper().replace("_", "-")}', ) + def show_iter_from_disk( + self, + proof_dir: Path, + proof_id: str, + minimize: bool = True, + ) -> Iterator[str]: + """Yield proof show output line-by-line, loading data from disk on demand. + + Unlike show/show_iter, this does not load the entire proof into memory. + Node CTerms and cover CSubsts are loaded lazily when accessed for printing. + """ + import json + + from ..kcfg.lazy import APRProofStub + from ..kcfg.lazy_load import read_cfg_lazy + + proof_subdir = proof_dir / proof_id + cfg_dir = proof_subdir / 'kcfg' + + # Load KCFG with lazy stubs + kcfg = read_cfg_lazy(cfg_dir) + + # If the node_printer is proof-aware, swap in a lightweight stub + _original_proof = getattr(self.kcfg_show.node_printer, 'proof', None) + if _original_proof is not None: + proof_dict = json.loads((proof_subdir / 'proof.json').read_text()) + proof_stub = APRProofStub(proof_dict, kcfg) + object.__setattr__(self.kcfg_show.node_printer, 'proof', proof_stub) + + # Use existing show_iter with the lazy KCFG + yield from self.kcfg_show.show_iter( + kcfg, + minimize=minimize, + ) + + # Restore original proof if swapped + if _original_proof is not None: + object.__setattr__(self.kcfg_show.node_printer, 'proof', _original_proof) + def dot(self, proof: APRProof) -> Digraph: graph = self.kcfg_show.dot(proof.kcfg) attrs = {'class': 'target', 'style': 'solid'} From 34fc00fb1b73f3d9c430ca14c96ee9389ea7ad2c Mon Sep 17 00:00:00 2001 From: mariaKt Date: Mon, 20 Apr 2026 14:16:08 -0500 Subject: [PATCH 04/10] Testing for show_iter_from_disc --- pyk/src/tests/integration/proof/test_imp.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/pyk/src/tests/integration/proof/test_imp.py b/pyk/src/tests/integration/proof/test_imp.py index 65be2ca7d4..2964792fcd 100644 --- a/pyk/src/tests/integration/proof/test_imp.py +++ b/pyk/src/tests/integration/proof/test_imp.py @@ -20,7 +20,7 @@ from pyk.proof import APRProver, ProofStatus from pyk.proof.proof import parallel_advance_proof from pyk.proof.reachability import APRFailureInfo, APRProof -from pyk.proof.show import APRProofNodePrinter +from pyk.proof.show import APRProofNodePrinter, APRProofShow from pyk.testing import KCFGExploreTest, KProveTest, ParallelTest from pyk.utils import single @@ -951,6 +951,11 @@ def test_all_path_reachability_prove( cfg_lines = kcfg_show.show(proof.kcfg) _LOGGER.info('\n'.join(cfg_lines)) + # Verify show_iter_from_disk produces identical output + apr_show = APRProofShow(kprove.definition, node_printer=kcfg_show.node_printer) + disk_lines = list(apr_show.show_iter_from_disk(proof_dir, proof.id)) + assert cfg_lines == disk_lines, 'show_iter_from_disk output differs from show' + assert proof.status == proof_status assert leaf_number(proof) == expected_leaf_number @@ -1005,6 +1010,11 @@ def test_all_path_reachability_prove_with_kcfg_optims( cfg_lines = kcfg_show.show(proof.kcfg) _LOGGER.info('\n'.join(cfg_lines)) + # Verify show_iter_from_disk produces identical output + apr_show = APRProofShow(kprove.definition, node_printer=kcfg_show.node_printer) + disk_lines = list(apr_show.show_iter_from_disk(proof_dir, proof.id)) + assert cfg_lines == disk_lines, 'show_iter_from_disk output differs from show' + assert proof.status == proof_status assert len(proof.kcfg._nodes) == expected_nodes @@ -1564,6 +1574,11 @@ def test_all_path_reachability_prove_parallel( cfg_lines = kcfg_show.show(proof.kcfg) _LOGGER.info('\n'.join(cfg_lines)) + # Verify show_iter_from_disk produces identical output + apr_show = APRProofShow(kprove.definition, node_printer=kcfg_show.node_printer) + disk_lines = list(apr_show.show_iter_from_disk(proof_dir, proof.id)) + assert cfg_lines == disk_lines, 'show_iter_from_disk output differs from show' + assert proof.status == proof_status assert leaf_number(proof) == expected_leaf_number From 0275f6aad3de7e7fffed6780501a4f73ba7f8fcc Mon Sep 17 00:00:00 2001 From: mariaKt Date: Mon, 20 Apr 2026 16:17:46 -0500 Subject: [PATCH 05/10] Removed duplication, instead edit from_dict with lazy param --- pyk/src/pyk/kcfg/kcfg.py | 64 +++++++++++++++++--- pyk/src/pyk/kcfg/lazy.py | 2 - pyk/src/pyk/kcfg/lazy_load.py | 107 ---------------------------------- pyk/src/pyk/proof/show.py | 4 +- 4 files changed, 59 insertions(+), 118 deletions(-) delete mode 100644 pyk/src/pyk/kcfg/lazy_load.py diff --git a/pyk/src/pyk/kcfg/kcfg.py b/pyk/src/pyk/kcfg/kcfg.py index 35482b149c..f34c22246a 100644 --- a/pyk/src/pyk/kcfg/kcfg.py +++ b/pyk/src/pyk/kcfg/kcfg.py @@ -106,6 +106,30 @@ def read_cfg_data(self) -> dict[str, Any]: return dct + def read_cfg_data_lazy(self) -> dict[str, Any]: + """Read kcfg.json without loading node CTerms. + + Node entries contain id, attrs, and the path to load the CTerm from, + but no 'cterm' key. Cover CSubsts are left as raw dicts. + """ + dct = json.loads(self.kcfg_json_path.read_text()) + + new_nodes = [] + for node_id in dct.get('nodes') or []: + attrs = [] + if node_id in dct['vacuous']: + attrs.append(KCFGNodeAttr.VACUOUS.value) + if node_id in dct['stuck']: + attrs.append(KCFGNodeAttr.STUCK.value) + new_nodes.append({'id': node_id, 'node_path': str(self.kcfg_node_path(node_id)), 'attrs': attrs}) + + dct['nodes'] = new_nodes + + del dct['vacuous'] + del dct['stuck'] + + return dct + def read_node_data(self, node_id: int) -> dict[str, Any]: return json.loads(self.kcfg_node_path(node_id).read_text()) @@ -664,14 +688,28 @@ def to_dict(self) -> dict[str, Any]: return {k: v for k, v in res.items() if v} @staticmethod - def from_dict(dct: Mapping[str, Any], optimize_memory: bool = True) -> KCFG: - cfg = KCFG(optimize_memory=optimize_memory) + def from_dict(dct: Mapping[str, Any], optimize_memory: bool = True, lazy: bool = False) -> KCFG: + cfg = KCFG(optimize_memory=(optimize_memory and not lazy)) + + if lazy: + from pathlib import Path - for node_dict in dct.get('nodes') or []: - node = KCFG.Node.from_dict(node_dict) - cfg.add_node(node) + from .lazy import LazyCSubst, LazyNode + + for node_dict in dct.get('nodes') or []: + node = LazyNode( + node_dict['id'], + frozenset(NodeAttr(a) for a in node_dict.get('attrs', [])), + Path(node_dict['node_path']), + ) + cfg._nodes[node.id] = node # type: ignore[assignment] + cfg._node_id = max(cfg._node_id, node.id + 1) + else: + for node_dict in dct.get('nodes') or []: + node = KCFG.Node.from_dict(node_dict) + cfg.add_node(node) - max_id = max([node.id for node in cfg.nodes], default=0) + max_id = max(cfg._nodes.keys(), default=0) cfg._node_id = dct.get('next', max_id + 1) for edge_dict in dct.get('edges') or []: @@ -683,7 +721,12 @@ def from_dict(dct: Mapping[str, Any], optimize_memory: bool = True) -> KCFG: cfg.add_successor(merged_edge) for cover_dict in dct.get('covers') or []: - cover = KCFG.Cover.from_dict(cover_dict, cfg._nodes) + if lazy: + src = cfg._nodes[cover_dict['source']] + tgt = cfg._nodes[cover_dict['target']] + cover = KCFG.Cover(src, tgt, LazyCSubst(cover_dict['csubst'])) # type: ignore[arg-type] + else: + cover = KCFG.Cover.from_dict(cover_dict, cfg._nodes) cfg.add_successor(cover) for split_dict in dct.get('splits') or []: @@ -1301,6 +1344,13 @@ def read_cfg_data(cfg_dir: Path) -> KCFG: cfg._kcfg_store = store return cfg + @staticmethod + def read_cfg_data_lazy(cfg_dir: Path) -> KCFG: + store = KCFGStore(cfg_dir) + cfg = KCFG.from_dict(store.read_cfg_data_lazy(), lazy=True) + cfg._kcfg_store = store + return cfg + @staticmethod def read_node_data(cfg_dir: Path, node_id: int) -> KCFG.Node: store = KCFGStore(cfg_dir) diff --git a/pyk/src/pyk/kcfg/lazy.py b/pyk/src/pyk/kcfg/lazy.py index 90a8cca642..7e21672fbd 100644 --- a/pyk/src/pyk/kcfg/lazy.py +++ b/pyk/src/pyk/kcfg/lazy.py @@ -13,7 +13,6 @@ if TYPE_CHECKING: from ..cterm import CSubst, CTerm from ..kast.inner import KInner - from .kcfg import NodeAttr class LazyNode: @@ -128,7 +127,6 @@ def _load(self) -> CSubst: from ..cterm import CSubst self._csubst = CSubst.from_dict(self._raw) - self._raw = None # Release raw dict return self._csubst @property diff --git a/pyk/src/pyk/kcfg/lazy_load.py b/pyk/src/pyk/kcfg/lazy_load.py deleted file mode 100644 index 318f815986..0000000000 --- a/pyk/src/pyk/kcfg/lazy_load.py +++ /dev/null @@ -1,107 +0,0 @@ -"""Load a KCFG from disk with lazy node CTerms and cover CSubsts. - -Nodes are represented as LazyNode stubs that load CTerms on demand. -Cover CSubsts are wrapped as LazyCSubst to defer parsing. -Edges, splits, and ndbranches are constructed with the actual KCFG types -using stub nodes. -""" - -from __future__ import annotations - -import json -from pathlib import Path -from typing import TYPE_CHECKING - -from .kcfg import KCFG, KCFGNodeAttr -from .lazy import LazyCSubst, LazyNode - -if TYPE_CHECKING: - pass - - -def read_cfg_lazy(cfg_dir: Path) -> KCFG: - """Read a KCFG from disk without loading node CTerms or cover CSubsts eagerly. - - Returns a real KCFG object populated with LazyNode stubs and LazyCSubst wrappers. - The existing pretty_segments traversal works unchanged on this KCFG. - """ - from ..cterm import CSubst - - kcfg_json_path = cfg_dir / 'kcfg.json' - node_dir = cfg_dir / 'nodes' - - dct = json.loads(kcfg_json_path.read_text()) - - node_ids = dct.get('nodes') or [] - vacuous_ids = set(dct.get('vacuous') or []) - stuck_ids = set(dct.get('stuck') or []) - - # Build lazy nodes - nodes: dict[int, LazyNode] = {} - for nid in node_ids: - attrs = frozenset() - if nid in vacuous_ids: - attrs = frozenset([KCFGNodeAttr.VACUOUS]) - if nid in stuck_ids: - attrs = frozenset(attrs | {KCFGNodeAttr.STUCK}) - nodes[nid] = LazyNode(nid, attrs, node_dir / f'{nid}.json') - - # Build KCFG without optimize_memory (stubs aren't real KInner terms) - cfg = KCFG(optimize_memory=False) - - # Add nodes — KCFG.add_node expects KCFG.Node, but we pass LazyNode (duck typing) - for node in nodes.values(): - cfg._nodes[node.id] = node # type: ignore[assignment] - cfg._node_id = max(cfg._node_id, node.id + 1) - - cfg._node_id = dct.get('next', cfg._node_id) - - # Edges — lightweight (just depth + rules) - for edge_dict in dct.get('edges') or []: - src = nodes[edge_dict['source']] - tgt = nodes[edge_dict['target']] - edge = KCFG.Edge(src, tgt, edge_dict['depth'], tuple(edge_dict.get('rules') or [])) # type: ignore[arg-type] - cfg._edges[src.id] = edge - - # Merged edges - for me_dict in dct.get('merged_edges') or []: - src = nodes[me_dict['source']] - tgt = nodes[me_dict['target']] - sub_edges = tuple( - KCFG.Edge(nodes[e['source']], nodes[e['target']], e['depth'], tuple(e.get('rules') or [])) # type: ignore[arg-type] - for e in me_dict['edges'] - ) - merged = KCFG.MergedEdge(src, tgt, sub_edges) # type: ignore[arg-type] - cfg._merged_edges[src.id] = merged - - # Splits — CSubst per target loaded eagerly (32 MB total, acceptable) - for split_dict in dct.get('splits') or []: - src = nodes[split_dict['source']] - targets = [] - for t in split_dict['targets']: - tgt_node = nodes[t['target']] - csubst = CSubst.from_dict(t['csubst']) - targets.append((tgt_node, csubst)) - split = KCFG.Split(src, tuple(targets)) # type: ignore[arg-type] - cfg._splits[src.id] = split - - # Covers — CSubst wrapped as LazyCSubst (792 MB deferred) - for cover_dict in dct.get('covers') or []: - src = nodes[cover_dict['source']] - tgt = nodes[cover_dict['target']] - lazy_csubst = LazyCSubst(cover_dict['csubst']) - cover = KCFG.Cover(src, tgt, lazy_csubst) # type: ignore[arg-type] - cfg._covers[src.id] = cover - - # NDBranches - for nb_dict in dct.get('ndbranches') or []: - src = nodes[nb_dict['source']] - tgt_nodes = tuple(nodes[tid] for tid in nb_dict['targets']) - nb = KCFG.NDBranch(src, tgt_nodes, tuple(nb_dict.get('rules') or [])) # type: ignore[arg-type] - cfg._ndbranches[src.id] = nb - - # Aliases - for alias, node_id in dct.get('aliases', {}).items(): - cfg._aliases[alias] = node_id - - return cfg diff --git a/pyk/src/pyk/proof/show.py b/pyk/src/pyk/proof/show.py index 46ae45a415..bdec2d1fa5 100644 --- a/pyk/src/pyk/proof/show.py +++ b/pyk/src/pyk/proof/show.py @@ -119,13 +119,13 @@ def show_iter_from_disk( import json from ..kcfg.lazy import APRProofStub - from ..kcfg.lazy_load import read_cfg_lazy + from ..kcfg.kcfg import KCFG proof_subdir = proof_dir / proof_id cfg_dir = proof_subdir / 'kcfg' # Load KCFG with lazy stubs - kcfg = read_cfg_lazy(cfg_dir) + kcfg = KCFG.read_cfg_data_lazy(cfg_dir) # If the node_printer is proof-aware, swap in a lightweight stub _original_proof = getattr(self.kcfg_show.node_printer, 'proof', None) From 57ddca4b8f979cd08878feeb951cecf177ad52ff Mon Sep 17 00:00:00 2001 From: mariaKt Date: Mon, 20 Apr 2026 19:13:35 -0500 Subject: [PATCH 06/10] Code quality --- pyk/src/pyk/kcfg/kcfg.py | 6 +++--- pyk/src/pyk/kcfg/lazy.py | 15 +++++++++------ pyk/src/pyk/kcfg/show.py | 20 +++++++++++--------- pyk/src/pyk/proof/show.py | 20 +++++++++++--------- 4 files changed, 34 insertions(+), 27 deletions(-) diff --git a/pyk/src/pyk/kcfg/kcfg.py b/pyk/src/pyk/kcfg/kcfg.py index f34c22246a..d8c9151d35 100644 --- a/pyk/src/pyk/kcfg/kcfg.py +++ b/pyk/src/pyk/kcfg/kcfg.py @@ -697,13 +697,13 @@ def from_dict(dct: Mapping[str, Any], optimize_memory: bool = True, lazy: bool = from .lazy import LazyCSubst, LazyNode for node_dict in dct.get('nodes') or []: - node = LazyNode( + lazy_node = LazyNode( node_dict['id'], frozenset(NodeAttr(a) for a in node_dict.get('attrs', [])), Path(node_dict['node_path']), ) - cfg._nodes[node.id] = node # type: ignore[assignment] - cfg._node_id = max(cfg._node_id, node.id + 1) + cfg._nodes[lazy_node.id] = lazy_node # type: ignore[assignment] + cfg._node_id = max(cfg._node_id, lazy_node.id + 1) else: for node_dict in dct.get('nodes') or []: node = KCFG.Node.from_dict(node_dict) diff --git a/pyk/src/pyk/kcfg/lazy.py b/pyk/src/pyk/kcfg/lazy.py index 7e21672fbd..bde2ac9cbd 100644 --- a/pyk/src/pyk/kcfg/lazy.py +++ b/pyk/src/pyk/kcfg/lazy.py @@ -7,12 +7,15 @@ from __future__ import annotations import json -from pathlib import Path from typing import TYPE_CHECKING if TYPE_CHECKING: + from pathlib import Path + from typing import Any + from ..cterm import CSubst, CTerm from ..kast.inner import KInner + from .kcfg import KCFG class LazyNode: @@ -69,7 +72,7 @@ class APRProofStub: Uses proof.json metadata + the KCFG for graph queries. """ - def __init__(self, proof_dict: dict, kcfg: object) -> None: + def __init__(self, proof_dict: dict[str, Any], kcfg: KCFG) -> None: self.init = int(proof_dict['init']) self.target = int(proof_dict['target']) self._terminal_ids = set(proof_dict.get('terminal') or []) @@ -115,11 +118,11 @@ def is_bounded(self, node_id: int) -> bool: class LazyCSubst: """Duck-types CSubst. Loads from a JSON dict on first access.""" - _raw: dict | None + _raw: dict[str, Any] _csubst: CSubst | None - def __init__(self, raw_dict: dict) -> None: - self._raw = raw_dict + def __init__(self, raw_dict: dict[str, Any]) -> None: + self._raw: dict[str, Any] = raw_dict self._csubst = None def _load(self) -> CSubst: @@ -137,7 +140,7 @@ def constraints(self) -> tuple: def subst(self) -> object: return self._load().subst - def pred(self, *args, **kwargs) -> KInner: + def pred(self, *args: Any, **kwargs: Any) -> KInner: return self._load().pred(*args, **kwargs) def to_dict(self) -> dict: diff --git a/pyk/src/pyk/kcfg/show.py b/pyk/src/pyk/kcfg/show.py index 9344974b51..46902ce271 100644 --- a/pyk/src/pyk/kcfg/show.py +++ b/pyk/src/pyk/kcfg/show.py @@ -325,15 +325,17 @@ def show( omit_cells: Iterable[str] = (), module_name: str | None = None, ) -> list[str]: - return list(self.show_iter( - cfg, - nodes=nodes, - node_deltas=node_deltas, - to_module=to_module, - minimize=minimize, - omit_cells=omit_cells, - module_name=module_name, - )) + return list( + self.show_iter( + cfg, + nodes=nodes, + node_deltas=node_deltas, + to_module=to_module, + minimize=minimize, + omit_cells=omit_cells, + module_name=module_name, + ) + ) def show_iter( self, diff --git a/pyk/src/pyk/proof/show.py b/pyk/src/pyk/proof/show.py index bdec2d1fa5..1551b4edbd 100644 --- a/pyk/src/pyk/proof/show.py +++ b/pyk/src/pyk/proof/show.py @@ -76,14 +76,16 @@ def show( minimize: bool = True, omit_cells: Iterable[str] = (), ) -> list[str]: - return list(self.show_iter( - proof, - nodes=nodes, - node_deltas=node_deltas, - to_module=to_module, - minimize=minimize, - omit_cells=omit_cells, - )) + return list( + self.show_iter( + proof, + nodes=nodes, + node_deltas=node_deltas, + to_module=to_module, + minimize=minimize, + omit_cells=omit_cells, + ) + ) def show_iter( self, @@ -118,8 +120,8 @@ def show_iter_from_disk( """ import json - from ..kcfg.lazy import APRProofStub from ..kcfg.kcfg import KCFG + from ..kcfg.lazy import APRProofStub proof_subdir = proof_dir / proof_id cfg_dir = proof_subdir / 'kcfg' From 44827c3d746f6d464a12c18612c4accb38313881 Mon Sep 17 00:00:00 2001 From: mariaKt Date: Wed, 22 Apr 2026 16:51:56 -0500 Subject: [PATCH 07/10] LazyNode (dyck-typing Node) removed, integrated into Node --- pyk/src/pyk/kcfg/kcfg.py | 65 ++++++++++++++++++++++++++++++++-------- pyk/src/pyk/kcfg/lazy.py | 57 +++-------------------------------- 2 files changed, 56 insertions(+), 66 deletions(-) diff --git a/pyk/src/pyk/kcfg/kcfg.py b/pyk/src/pyk/kcfg/kcfg.py index d8c9151d35..2b106a043e 100644 --- a/pyk/src/pyk/kcfg/kcfg.py +++ b/pyk/src/pyk/kcfg/kcfg.py @@ -136,16 +136,35 @@ def read_node_data(self, node_id: int) -> dict[str, Any]: class KCFG(Container[Union['KCFG.Node', 'KCFG.Successor']]): @final - @dataclass(frozen=True, order=True) class Node: id: int - cterm: CTerm + _cterm: CTerm | None + _node_path: Path | None attrs: frozenset[NodeAttr] - def __init__(self, id: int, cterm: CTerm, attrs: Iterable[NodeAttr] = ()) -> None: - object.__setattr__(self, 'id', id) - object.__setattr__(self, 'cterm', cterm) - object.__setattr__(self, 'attrs', frozenset(attrs)) + def __init__( + self, id: int, cterm: CTerm | None = None, attrs: Iterable[NodeAttr] = (), node_path: Path | None = None + ) -> None: + self.id = id + self._cterm = cterm + self._node_path = node_path + self.attrs = frozenset(attrs) + + @property + def cterm(self) -> CTerm: + if self._cterm is None: + if self._node_path is None: + raise ValueError(f'Node {self.id} has no CTerm and no path to load from') + import json + + node_dict = json.loads(self._node_path.read_text()) + self._cterm = CTerm.from_dict(node_dict['cterm']) + return self._cterm + + def evict(self) -> None: + """Release the loaded CTerm from memory. Reloads from disk on next .cterm access.""" + if self._node_path is not None: + self._cterm = None def to_dict(self) -> dict[str, Any]: return {'id': self.id, 'cterm': self.cterm.to_dict(), 'attrs': [attr.value for attr in self.attrs]} @@ -174,6 +193,27 @@ def let(self, cterm: CTerm | None = None, attrs: Iterable[KCFGNodeAttr] | None = def free_vars(self) -> frozenset[str]: return frozenset(self.cterm.free_vars) + def __eq__(self, other: object) -> bool: + if not isinstance(other, KCFG.Node): + return NotImplemented + return self.id == other.id + + def __hash__(self) -> int: + return hash(self.id) + + def __lt__(self, other: object) -> bool: + if not isinstance(other, KCFG.Node): + return NotImplemented + return self.id < other.id + + def __le__(self, other: object) -> bool: + if not isinstance(other, KCFG.Node): + return NotImplemented + return self.id <= other.id + + def __repr__(self) -> str: + return f'Node(id={self.id}, attrs={self.attrs})' + class Successor(ABC): source: KCFG.Node @@ -694,16 +734,15 @@ def from_dict(dct: Mapping[str, Any], optimize_memory: bool = True, lazy: bool = if lazy: from pathlib import Path - from .lazy import LazyCSubst, LazyNode + from .lazy import LazyCSubst for node_dict in dct.get('nodes') or []: - lazy_node = LazyNode( - node_dict['id'], - frozenset(NodeAttr(a) for a in node_dict.get('attrs', [])), - Path(node_dict['node_path']), + node = KCFG.Node( + id=node_dict['id'], + attrs=[NodeAttr(a) for a in node_dict.get('attrs', [])], + node_path=Path(node_dict['node_path']), ) - cfg._nodes[lazy_node.id] = lazy_node # type: ignore[assignment] - cfg._node_id = max(cfg._node_id, lazy_node.id + 1) + cfg.add_node(node) else: for node_dict in dct.get('nodes') or []: node = KCFG.Node.from_dict(node_dict) diff --git a/pyk/src/pyk/kcfg/lazy.py b/pyk/src/pyk/kcfg/lazy.py index bde2ac9cbd..4b82276d4d 100644 --- a/pyk/src/pyk/kcfg/lazy.py +++ b/pyk/src/pyk/kcfg/lazy.py @@ -1,70 +1,21 @@ -"""Lazy loading stubs for memory-efficient proof display. +"""Stubs for memory-efficient proof display. -These stubs duck-type the real KCFG classes, deferring heavy data loading -(node CTerms, cover/split CSubsts) until actually accessed for printing. +LazyCSubst defers CSubst parsing until accessed. +APRProofStub answers proof-level queries from proof.json metadata. """ from __future__ import annotations -import json from typing import TYPE_CHECKING if TYPE_CHECKING: - from pathlib import Path from typing import Any - from ..cterm import CSubst, CTerm + from ..cterm import CSubst from ..kast.inner import KInner from .kcfg import KCFG -class LazyNode: - """Duck-types KCFG.Node. Loads CTerm from disk on first .cterm access.""" - - id: int - attrs: frozenset - _node_path: Path - _cterm: CTerm | None - - def __init__(self, id: int, attrs: frozenset, node_path: Path) -> None: - self.id = id - self.attrs = attrs - self._node_path = node_path - self._cterm = None - - @property - def cterm(self) -> CTerm: - if self._cterm is None: - from ..cterm import CTerm - - node_dict = json.loads(self._node_path.read_text()) - self._cterm = CTerm.from_dict(node_dict['cterm']) - return self._cterm - - def evict(self) -> None: - """Release the loaded CTerm from memory.""" - self._cterm = None - - def __eq__(self, other: object) -> bool: - if isinstance(other, LazyNode): - return self.id == other.id - # Also compare with real KCFG.Node - return hasattr(other, 'id') and self.id == other.id - - def __hash__(self) -> int: - return hash(self.id) - - def __lt__(self, other: object) -> bool: - if hasattr(other, 'id'): - return self.id < other.id - return NotImplemented - - def __le__(self, other: object) -> bool: - if hasattr(other, 'id'): - return self.id <= other.id - return NotImplemented - - class APRProofStub: """Lightweight stub for APRProof — answers proof-level queries without loading the full proof. From de0582cab6352ee5a331bb8643a985b106ecf2d2 Mon Sep 17 00:00:00 2001 From: mariaKt Date: Wed, 22 Apr 2026 17:06:26 -0500 Subject: [PATCH 08/10] Write node path to node in write_cfg_data --- pyk/src/pyk/kcfg/kcfg.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyk/src/pyk/kcfg/kcfg.py b/pyk/src/pyk/kcfg/kcfg.py index 2b106a043e..e0cb47c17e 100644 --- a/pyk/src/pyk/kcfg/kcfg.py +++ b/pyk/src/pyk/kcfg/kcfg.py @@ -82,7 +82,9 @@ def write_cfg_data( for node_id in deleted_nodes: self.kcfg_node_path(node_id).unlink(missing_ok=True) for node_id in created_nodes: - self.kcfg_node_path(node_id).write_text(json.dumps(kcfg._nodes[node_id].to_dict())) + node_path = self.kcfg_node_path(node_id) + node_path.write_text(json.dumps(kcfg._nodes[node_id].to_dict())) + kcfg._nodes[node_id]._node_path = node_path self.kcfg_json_path.write_text(json.dumps(dct)) def read_cfg_data(self) -> dict[str, Any]: From 17e384e1cb00d81214e9fa41431550531c506bee Mon Sep 17 00:00:00 2001 From: mariaKt Date: Wed, 22 Apr 2026 18:34:46 -0500 Subject: [PATCH 09/10] Allow evicting nodes duting a proof (for new proofs) --- pyk/src/pyk/kcfg/kcfg.py | 21 ++++++++++--- pyk/src/pyk/proof/reachability.py | 5 ++- pyk/src/tests/integration/proof/test_imp.py | 35 +++++++++++++++++++++ 3 files changed, 55 insertions(+), 6 deletions(-) diff --git a/pyk/src/pyk/kcfg/kcfg.py b/pyk/src/pyk/kcfg/kcfg.py index e0cb47c17e..5608bec0f9 100644 --- a/pyk/src/pyk/kcfg/kcfg.py +++ b/pyk/src/pyk/kcfg/kcfg.py @@ -507,8 +507,11 @@ def replace_target(self, node: KCFG.Node) -> KCFG.NDBranch: _kcfg_store: KCFGStore | None - def __init__(self, cfg_dir: Path | None = None, optimize_memory: bool = True) -> None: + _auto_evict: bool + + def __init__(self, cfg_dir: Path | None = None, optimize_memory: bool = True, auto_evict: bool = False) -> None: self._node_id = 1 + self._auto_evict = auto_evict if optimize_memory: from .store import OptimizedNodeStore @@ -587,9 +590,13 @@ def uncovered(self) -> list[KCFG.Node]: @staticmethod def from_claim( - defn: KDefinition, claim: KClaim, cfg_dir: Path | None = None, optimize_memory: bool = True + defn: KDefinition, + claim: KClaim, + cfg_dir: Path | None = None, + optimize_memory: bool = True, + auto_evict: bool = False, ) -> tuple[KCFG, NodeIdLike, NodeIdLike]: - cfg = KCFG(cfg_dir=cfg_dir, optimize_memory=optimize_memory) + cfg = KCFG(cfg_dir=cfg_dir, optimize_memory=optimize_memory, auto_evict=auto_evict) claim_body = claim.body claim_body = defn.instantiate_cell_vars(claim_body) claim_body = rename_generated_vars(claim_body) @@ -730,8 +737,10 @@ def to_dict(self) -> dict[str, Any]: return {k: v for k, v in res.items() if v} @staticmethod - def from_dict(dct: Mapping[str, Any], optimize_memory: bool = True, lazy: bool = False) -> KCFG: - cfg = KCFG(optimize_memory=(optimize_memory and not lazy)) + def from_dict( + dct: Mapping[str, Any], optimize_memory: bool = True, lazy: bool = False, auto_evict: bool = False + ) -> KCFG: + cfg = KCFG(optimize_memory=(optimize_memory and not lazy), auto_evict=auto_evict) if lazy: from pathlib import Path @@ -991,6 +1000,8 @@ def add_successor(self, succ: KCFG.Successor) -> None: self._splits[succ.source.id] = succ elif type(succ) is KCFG.NDBranch: self._ndbranches[succ.source.id] = succ + if self._auto_evict: + succ.source.evict() def edge(self, source_id: NodeIdLike, target_id: NodeIdLike) -> Edge | None: source_id = self._resolve(source_id) diff --git a/pyk/src/pyk/proof/reachability.py b/pyk/src/pyk/proof/reachability.py index 928fe5c25a..15db6c9bc7 100644 --- a/pyk/src/pyk/proof/reachability.py +++ b/pyk/src/pyk/proof/reachability.py @@ -422,11 +422,12 @@ def from_claim( logs: dict[int, tuple[LogEntry, ...]], proof_dir: Path | None = None, bmc_depth: int | None = None, + auto_evict: bool = False, **kwargs: Any, ) -> APRProof: kcfg_dir = proof_dir / claim.label / 'kcfg' if proof_dir is not None else None - kcfg, init_node, target_node = KCFG.from_claim(defn, claim, cfg_dir=kcfg_dir) + kcfg, init_node, target_node = KCFG.from_claim(defn, claim, cfg_dir=kcfg_dir, auto_evict=auto_evict) return APRProof( claim.label, kcfg, @@ -475,6 +476,7 @@ def from_spec_modules( logs: dict[int, tuple[LogEntry, ...]], proof_dir: Path | None = None, spec_labels: Iterable[str] | None = None, + auto_evict: bool = False, ) -> list[APRProof]: claim_index = ClaimIndex.from_module_list(spec_modules) spec_labels = claim_index.labels(include=spec_labels, with_depends=True, ordered=True) @@ -492,6 +494,7 @@ def from_spec_modules( claim, logs=logs, proof_dir=proof_dir, + auto_evict=auto_evict, ) apr_proof.write_proof_data() res.append(apr_proof) diff --git a/pyk/src/tests/integration/proof/test_imp.py b/pyk/src/tests/integration/proof/test_imp.py index 2964792fcd..6d1bbcef03 100644 --- a/pyk/src/tests/integration/proof/test_imp.py +++ b/pyk/src/tests/integration/proof/test_imp.py @@ -1315,6 +1315,41 @@ def test_aprbmc_prove_read_write_node_data( assert proof.dict == proof_from_disk.dict assert proof.kcfg.nodes == proof_from_disk.kcfg.nodes + def test_apr_prove_auto_evict( + self, + kprove: KProve, + kcfg_explore: KCFGExplore, + tmp_path_factory: TempPathFactory, + ) -> None: + claim = single( + kprove.get_claims( + Path(K_FILES / 'imp-simple-spec.k'), + spec_module_name='IMP-SIMPLE-SPEC', + claim_labels=['IMP-SIMPLE-SPEC.addition-1'], + ) + ) + + # Run without auto_evict + proof_dir_base = tmp_path_factory.mktemp('apr_auto_evict_base') + proof_base = APRProof.from_claim(kprove.definition, claim, logs={}, proof_dir=proof_dir_base) + kcfg_explore.simplify(proof_base.kcfg, {}) + prover_base = APRProver(kcfg_explore=kcfg_explore, execute_depth=1) + prover_base.advance_proof(proof_base) + + # Run with auto_evict + proof_dir_evict = tmp_path_factory.mktemp('apr_auto_evict_evict') + proof_evict = APRProof.from_claim(kprove.definition, claim, logs={}, proof_dir=proof_dir_evict, auto_evict=True) + kcfg_explore.simplify(proof_evict.kcfg, {}) + prover_evict = APRProver(kcfg_explore=kcfg_explore, execute_depth=1) + prover_evict.advance_proof(proof_evict) + + # Verify eviction actually happened + evicted = [n for n in proof_evict.kcfg.nodes if n._cterm is None] + assert len(evicted) > 0, 'Expected at least one evicted node' + + assert proof_base.status == proof_evict.status + assert proof_base.dict == proof_evict.dict + def test_fail_fast( self, kprove: KProve, From 8acd76e969d38697b60ee564d61a7b5c7b9a4872 Mon Sep 17 00:00:00 2001 From: mariaKt Date: Wed, 22 Apr 2026 18:40:55 -0500 Subject: [PATCH 10/10] Allow evicting nodes during a proof (for resumed proofs) --- pyk/src/pyk/kcfg/kcfg.py | 20 ++++++++-- pyk/src/pyk/proof/reachability.py | 6 +-- pyk/src/tests/integration/proof/test_imp.py | 41 +++++++++++++++++++++ 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/pyk/src/pyk/kcfg/kcfg.py b/pyk/src/pyk/kcfg/kcfg.py index 5608bec0f9..cc3626ded0 100644 --- a/pyk/src/pyk/kcfg/kcfg.py +++ b/pyk/src/pyk/kcfg/kcfg.py @@ -99,7 +99,14 @@ def read_cfg_data(self) -> dict[str, Any]: attrs.append(KCFGNodeAttr.VACUOUS.value) if node['id'] in dct['stuck']: attrs.append(KCFGNodeAttr.STUCK.value) - new_nodes.append({'id': node['id'], 'cterm': node['cterm'], 'attrs': attrs}) + new_nodes.append( + { + 'id': node['id'], + 'cterm': node['cterm'], + 'attrs': attrs, + 'node_path': str(self.kcfg_node_path(node['id'])), + } + ) dct['nodes'] = new_nodes @@ -173,7 +180,12 @@ def to_dict(self) -> dict[str, Any]: @staticmethod def from_dict(dct: dict[str, Any]) -> KCFG.Node: - return KCFG.Node(dct['id'], CTerm.from_dict(dct['cterm']), [NodeAttr(attr) for attr in dct['attrs']]) + from pathlib import Path + + node_path = Path(dct['node_path']) if 'node_path' in dct else None + return KCFG.Node( + dct['id'], CTerm.from_dict(dct['cterm']), [NodeAttr(attr) for attr in dct['attrs']], node_path=node_path + ) def add_attr(self, attr: NodeAttr) -> KCFG.Node: return KCFG.Node(self.id, self.cterm, list(self.attrs) + [attr]) @@ -1390,9 +1402,9 @@ def write_cfg_data(self) -> None: self._created_nodes.clear() @staticmethod - def read_cfg_data(cfg_dir: Path) -> KCFG: + def read_cfg_data(cfg_dir: Path, auto_evict: bool = False) -> KCFG: store = KCFGStore(cfg_dir) - cfg = KCFG.from_dict(store.read_cfg_data()) + cfg = KCFG.from_dict(store.read_cfg_data(), auto_evict=auto_evict) cfg._kcfg_store = store return cfg diff --git a/pyk/src/pyk/proof/reachability.py b/pyk/src/pyk/proof/reachability.py index 15db6c9bc7..c6a8d130d3 100644 --- a/pyk/src/pyk/proof/reachability.py +++ b/pyk/src/pyk/proof/reachability.py @@ -485,7 +485,7 @@ def from_spec_modules( for label in spec_labels: if proof_dir is not None and Proof.proof_data_exists(label, proof_dir): - apr_proof = APRProof.read_proof_data(proof_dir, label) + apr_proof = APRProof.read_proof_data(proof_dir, label, auto_evict=auto_evict) else: _LOGGER.info(f'Building APRProof for claim: {label}') claim = claim_index[label] @@ -578,12 +578,12 @@ def get_refutation_id(self, node_id: int) -> str: return f'{self.id}.node-infeasible-{node_id}' @staticmethod - def read_proof_data(proof_dir: Path, id: str) -> APRProof: + def read_proof_data(proof_dir: Path, id: str, auto_evict: bool = False) -> APRProof: proof_subdir = proof_dir / id proof_json = proof_subdir / 'proof.json' proof_dict = json.loads(proof_json.read_text()) cfg_dir = proof_subdir / 'kcfg' - kcfg = KCFG.read_cfg_data(cfg_dir) + kcfg = KCFG.read_cfg_data(cfg_dir, auto_evict=auto_evict) init = int(proof_dict['init']) target = int(proof_dict['target']) bounded = proof_dict['bounded'] diff --git a/pyk/src/tests/integration/proof/test_imp.py b/pyk/src/tests/integration/proof/test_imp.py index 6d1bbcef03..6080357c6f 100644 --- a/pyk/src/tests/integration/proof/test_imp.py +++ b/pyk/src/tests/integration/proof/test_imp.py @@ -1350,6 +1350,47 @@ def test_apr_prove_auto_evict( assert proof_base.status == proof_evict.status assert proof_base.dict == proof_evict.dict + def test_apr_prove_auto_evict_resume( + self, + kprove: KProve, + kcfg_explore: KCFGExplore, + tmp_path_factory: TempPathFactory, + ) -> None: + claim = single( + kprove.get_claims( + Path(K_FILES / 'imp-simple-spec.k'), + spec_module_name='IMP-SIMPLE-SPEC', + claim_labels=['IMP-SIMPLE-SPEC.addition-2'], + ) + ) + + # Run baseline to completion + proof_dir_base = tmp_path_factory.mktemp('apr_auto_evict_resume_base') + proof_base = APRProof.from_claim(kprove.definition, claim, logs={}, proof_dir=proof_dir_base) + kcfg_explore.simplify(proof_base.kcfg, {}) + prover_base = APRProver(kcfg_explore=kcfg_explore, execute_depth=1) + prover_base.advance_proof(proof_base) + + # Run partial proof (1 iteration), save to disk + proof_dir_resume = tmp_path_factory.mktemp('apr_auto_evict_resume') + proof_partial = APRProof.from_claim(kprove.definition, claim, logs={}, proof_dir=proof_dir_resume) + kcfg_explore.simplify(proof_partial.kcfg, {}) + prover_partial = APRProver(kcfg_explore=kcfg_explore, execute_depth=1) + prover_partial.advance_proof(proof_partial, max_iterations=1) + proof_partial.write_proof_data() + + # Resume from disk with auto_evict=True + proof_resumed = APRProof.read_proof_data(proof_dir_resume, proof_partial.id, auto_evict=True) + prover_resumed = APRProver(kcfg_explore=kcfg_explore, execute_depth=1) + prover_resumed.advance_proof(proof_resumed) + + # Verify eviction actually happened + evicted = [n for n in proof_resumed.kcfg.nodes if n._cterm is None] + assert len(evicted) > 0, 'Expected at least one evicted node' + + assert proof_base.status == proof_resumed.status + assert proof_base.dict == proof_resumed.dict + def test_fail_fast( self, kprove: KProve,