Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
137 commits
Select commit Hold shift + click to select a range
acd1380
Guard replay requires for planner input
AlexanderOnischenko Jan 23, 2026
6c6c7b5
Make ALL flag boolean in fixture target
AlexanderOnischenko Jan 23, 2026
509fbe4
Fix replay plan normalize imports
AlexanderOnischenko Jan 23, 2026
e60e312
Allow module-level skip for replay fixtures
AlexanderOnischenko Jan 23, 2026
b232f1a
Merge pull request #107 from AlexanderOnischenko/codex/add-replay-fix…
AlexanderOnischenko Jan 23, 2026
d73c690
version inc - tracer
AlexanderOnischenko Jan 23, 2026
809897d
Clarify fixture help defaults
AlexanderOnischenko Jan 23, 2026
7e1c909
Fix replay fixture typing
AlexanderOnischenko Jan 23, 2026
2d46fa2
Validate replay fixtures for relational selectors
AlexanderOnischenko Jan 23, 2026
5408b02
Add fixture management commands
AlexanderOnischenko Jan 23, 2026
17fc8ea
Decouple fixture commands from check
AlexanderOnischenko Jan 23, 2026
35c825e
Fix fixture tools git paths and case ids
AlexanderOnischenko Jan 23, 2026
a76f976
Remove unused replay bucket helper
AlexanderOnischenko Jan 23, 2026
b3eba81
Improve replay fixture debug output
AlexanderOnischenko Jan 23, 2026
344b301
новые трейсы с падениями (перемещаем)
AlexanderOnischenko Jan 23, 2026
c5bcb45
Precheck trace conflicts and rollback fixture moves
AlexanderOnischenko Jan 23, 2026
e1058c2
Merge branch 'codex/reorganize-fixtures-and-migrate-paths' of github.…
AlexanderOnischenko Jan 23, 2026
6b4cef0
Improve fixture tool discovery
AlexanderOnischenko Jan 23, 2026
868774a
Merge branch 'codex/reorganize-fixtures-and-migrate-paths' of github.…
AlexanderOnischenko Jan 23, 2026
456618d
Use git mv for tracked rollbacks
AlexanderOnischenko Jan 23, 2026
921de37
Allow unbucketed replay fixtures
AlexanderOnischenko Jan 23, 2026
fd18fb4
Report promoted fixture paths
AlexanderOnischenko Jan 23, 2026
55fda76
Merge branch 'codex/reorganize-fixtures-and-migrate-paths' of github.…
AlexanderOnischenko Jan 23, 2026
d50d110
Fix rerun hint for root fixtures
AlexanderOnischenko Jan 23, 2026
a6da9e3
Drop zip plan-trace loading
AlexanderOnischenko Jan 23, 2026
5c2a437
Merge pull request #108 from AlexanderOnischenko/codex/reorganize-fix…
AlexanderOnischenko Jan 23, 2026
b0d8f70
Merge branch 'codex/reorganize-fixtures-and-migrate-paths' of github.…
AlexanderOnischenko Jan 23, 2026
d9076ec
Merge pull request #112 from AlexanderOnischenko/codex/reorganize-fix…
AlexanderOnischenko Jan 23, 2026
4f3e6ae
ruff fixes
AlexanderOnischenko Jan 23, 2026
46284fb
fixture_tools: restore JSON on migrate rollback, add resource handlin…
AlexanderOnischenko Jan 25, 2026
d2cefbd
документация по трейсеру
AlexanderOnischenko Jan 25, 2026
9ea2865
Document replay case fixture regeneration
AlexanderOnischenko Jan 25, 2026
da9edfc
Fix replay case export filtering and tooling
AlexanderOnischenko Jan 25, 2026
d08c237
Remove legacy fixture tooling and harden exports
AlexanderOnischenko Jan 25, 2026
e477861
Fix resource path update on existing files
AlexanderOnischenko Jan 25, 2026
fbbc1ae
Restore fixture tools and rename replay id var
AlexanderOnischenko Jan 25, 2026
d9060b8
Add overwrite handling for replay exports
AlexanderOnischenko Jan 25, 2026
3a85b44
Optimize replay export indexing and errors
AlexanderOnischenko Jan 25, 2026
17cfeb8
Improve replay bundle idempotency and diagnostics
AlexanderOnischenko Jan 25, 2026
24dc042
Avoid mutating shared resources during export
AlexanderOnischenko Jan 25, 2026
0fe2f66
Tighten export validation and makefile checks
AlexanderOnischenko Jan 25, 2026
3a97075
Remove legacy fixture tooling
AlexanderOnischenko Jan 25, 2026
d0bc39e
Update tracer documentation for replay_case v2
AlexanderOnischenko Jan 25, 2026
0a88c73
Fix tracer export imports and expected-only tests
AlexanderOnischenko Jan 25, 2026
da12338
Add tracer fixture utility helpers
AlexanderOnischenko Jan 25, 2026
ba8a2e8
Refine tracer fixture tooling
AlexanderOnischenko Jan 25, 2026
702ca75
Guard tracer-export bucket usage
AlexanderOnischenko Jan 25, 2026
8decb64
Remove legacy replay fixture entrypoints
AlexanderOnischenko Jan 25, 2026
a92f651
Allow fixture-green shorthand case names
AlexanderOnischenko Jan 25, 2026
19a38e5
Add provider snapshot to replay inputs
AlexanderOnischenko Jan 25, 2026
22f8cfc
Refine tracer export and fixture behavior
AlexanderOnischenko Jan 25, 2026
cdc91c0
Add tracer auto-discovery for events.jsonl
AlexanderOnischenko Jan 25, 2026
b1bf05d
Improve replay case selection UX
AlexanderOnischenko Jan 25, 2026
93571a4
Move tracer auto-resolve module
AlexanderOnischenko Jan 25, 2026
08857c5
Fix fixture-rm bucket typing
AlexanderOnischenko Jan 25, 2026
36eca65
Finalize tracer workflow tests and DX
AlexanderOnischenko Jan 25, 2026
3c855b0
Simplify tracer export defaults and event resolution
AlexanderOnischenko Jan 25, 2026
6dd25de
Ensure events.jsonl on runner failures
AlexanderOnischenko Jan 25, 2026
57b9f8b
Improve tracer export run selection
AlexanderOnischenko Jan 25, 2026
7b462ab
Fix tracer selection typing
AlexanderOnischenko Jan 25, 2026
80dc7f5
Improve replay fixture pytest diagnostics
AlexanderOnischenko Jan 25, 2026
672129e
Refine tracer fixture diagnostics
AlexanderOnischenko Jan 25, 2026
a1673ea
Fix pytest parameter typing
AlexanderOnischenko Jan 25, 2026
170e94e
Add known_bad backlog suite
AlexanderOnischenko Jan 25, 2026
124a32e
Close tracer v2 spec gaps
AlexanderOnischenko Jan 25, 2026
410b84e
Fix replay log and known_bad typing
AlexanderOnischenko Jan 25, 2026
fff4279
Tighten deprecated replay log typing
AlexanderOnischenko Jan 25, 2026
35d5af1
Improve tracer export filters and fixture selection
AlexanderOnischenko Jan 25, 2026
4052bba
Fix fixture tools source typing
AlexanderOnischenko Jan 25, 2026
c3daa91
Extend fixture selectors and add demote
AlexanderOnischenko Jan 25, 2026
1b3ab9a
Prefer runs with replay events
AlexanderOnischenko Jan 25, 2026
9d66581
Add rollback for fixture moves
AlexanderOnischenko Jan 26, 2026
fd1c59c
Handle overwrite in fixture demote
AlexanderOnischenko Jan 26, 2026
be0ea0f
Merge pull request #118 from AlexanderOnischenko/codex/implement-trac…
AlexanderOnischenko Jan 26, 2026
c91a472
Respect events flag for case logging
AlexanderOnischenko Jan 26, 2026
764aa87
Merge pull request #121 from AlexanderOnischenko/codex/fix-event-logg…
AlexanderOnischenko Jan 26, 2026
bc96fac
Update fixture-green validation and diagnostics
AlexanderOnischenko Jan 26, 2026
0ac1dc7
Fix fixture-green observed hints
AlexanderOnischenko Jan 26, 2026
e248500
Deduplicate diff path helper
AlexanderOnischenko Jan 26, 2026
4ed2a7b
Guard fixture-rm path usage for BUCKET=all
AlexanderOnischenko Jan 26, 2026
0ba8f5b
Revert "Guard fixture-rm path usage for BUCKET=all"
AlexanderOnischenko Jan 26, 2026
bd409af
Expose diff utils and adjust fixture-rm heuristic
AlexanderOnischenko Jan 26, 2026
86c2b3b
Add fixture-green name heuristic
AlexanderOnischenko Jan 26, 2026
aaaadb7
Merge pull request #123 from AlexanderOnischenko/codex/fix-fixture-gr…
AlexanderOnischenko Jan 26, 2026
8129cfd
Default-enable event logging; allow explicit disable via None (#126)
AlexanderOnischenko Jan 26, 2026
28a483d
Fix relational selector validation (#125)
AlexanderOnischenko Jan 26, 2026
c3b6935
Add validator for resource_read replay (#127)
AlexanderOnischenko Jan 26, 2026
6ab3c51
обновлена документация
AlexanderOnischenko Jan 26, 2026
7091ab3
Fix type narrowing for replay utilities (#128)
AlexanderOnischenko Jan 26, 2026
45eb6be
Improve plan_normalize replay dependency handling (#129)
AlexanderOnischenko Jan 26, 2026
4ce2bab
Resolve tracer --run-id from history/run_meta and improve CLI semanti…
AlexanderOnischenko Jan 26, 2026
d3bf518
Fix optional path handling in tracer (#132)
AlexanderOnischenko Jan 27, 2026
2507210
сделал проходящие тесты в корзине known_bad зелеными
AlexanderOnischenko Jan 28, 2026
7b5597d
форматирование вывода
AlexanderOnischenko Jan 28, 2026
d48c4d1
Fix fixture migrate resource_id duplication
AlexanderOnischenko Feb 1, 2026
586f934
Normalize resource paths across platforms
AlexanderOnischenko Feb 1, 2026
18b02c3
Warn on missing demo config defaults
AlexanderOnischenko Feb 1, 2026
d9ea116
Add preflight warnings for tracer tooling
AlexanderOnischenko Feb 1, 2026
bea5e33
Merge pull request #133 from AlexanderOnischenko/codex/comment-on-lin…
AlexanderOnischenko Feb 1, 2026
a75580a
Handle schema snapshot extensions in case open
AlexanderOnischenko Feb 1, 2026
a6ddef9
Merge pull request #134 from AlexanderOnischenko/codex/comment-on-lin…
AlexanderOnischenko Feb 1, 2026
e1b2473
Improve replay_id discovery and CLI/Makefile UX for tracer export (#135)
AlexanderOnischenko Feb 1, 2026
de4ca5e
Normalize run/case layout paths
AlexanderOnischenko Feb 1, 2026
cfb265f
Relocate layout helpers to utils
AlexanderOnischenko Feb 1, 2026
71e0885
Harden run-relative path validation
AlexanderOnischenko Feb 1, 2026
273d08c
Clarify schema snapshot case dir
AlexanderOnischenko Feb 1, 2026
a3fe495
Align replay resource paths with layout contract
AlexanderOnischenko Feb 1, 2026
aa8f18c
Merge pull request #136 from AlexanderOnischenko/codex/establish-unif…
AlexanderOnischenko Feb 1, 2026
2003a60
Fix fixture resource path migration
AlexanderOnischenko Feb 1, 2026
21b1633
Merge pull request #137 from AlexanderOnischenko/codex/fix-test-failu…
AlexanderOnischenko Feb 1, 2026
aac85a5
Improve replay case selection UX
AlexanderOnischenko Feb 1, 2026
3e3085c
Merge pull request #138 from AlexanderOnischenko/codex/update-input-h…
AlexanderOnischenko Feb 1, 2026
766b30a
Fix pyright typing in replay export tests
AlexanderOnischenko Feb 1, 2026
22b3513
Merge pull request #139 from AlexanderOnischenko/codex/fix-attribute-…
AlexanderOnischenko Feb 1, 2026
8275d3d
Remove legacy fixture handling
AlexanderOnischenko Feb 1, 2026
2c0cb72
Merge pull request #140 from AlexanderOnischenko/codex/find-and-migra…
AlexanderOnischenko Feb 1, 2026
c1a9803
Fix meta typing in replay export
AlexanderOnischenko Feb 1, 2026
fee257e
Merge pull request #141 from AlexanderOnischenko/codex/fix-get-attrib…
AlexanderOnischenko Feb 1, 2026
c209456
Update tracer documentation to match implementation
AlexanderOnischenko Feb 1, 2026
48d73ad
Merge pull request #142 from AlexanderOnischenko/codex/-fetchgraph_tr…
AlexanderOnischenko Feb 1, 2026
072f9e7
Clarify fixture commands in help
AlexanderOnischenko Feb 1, 2026
7e8ce4f
Merge pull request #143 from AlexanderOnischenko/codex/fix-duplicate-…
AlexanderOnischenko Feb 1, 2026
fceea10
Clarify latest_non_missed behavior
AlexanderOnischenko Feb 1, 2026
25aa83c
Unify run/case layout for runtime and tracer
AlexanderOnischenko Feb 1, 2026
f665152
Tidy tracer docs formatting
AlexanderOnischenko Feb 1, 2026
2dbdd16
Fix test runner typing
AlexanderOnischenko Feb 1, 2026
28c6fc5
Update tracer-matches make test
AlexanderOnischenko Feb 1, 2026
85e8cd2
Shorten replay match timestamps
AlexanderOnischenko Feb 1, 2026
f1403ad
Fix batch run_id and tracer help
AlexanderOnischenko Feb 1, 2026
1b3f8ed
Add tracer resolve parity and replay discovery tests (#146)
AlexanderOnischenko Feb 1, 2026
163c4d7
Unify run dir enumeration for tracer listing and resolve (#147)
AlexanderOnischenko Feb 1, 2026
159d980
Include non-timestamp run dirs in scan (#148)
AlexanderOnischenko Feb 1, 2026
40fb363
Fix replay-id discovery: list modes accept latest run with any replay…
AlexanderOnischenko Feb 1, 2026
58c1058
Merge pull request #144 from AlexanderOnischenko/codex/refactor-makef…
AlexanderOnischenko Feb 1, 2026
154918c
все тесты видны в vscode
AlexanderOnischenko Feb 2, 2026
afa4b88
Merge branch 'dev' into feature/tracer
AlexanderOnischenko Feb 2, 2026
b6a58fc
Harden replay path resolution (#150)
AlexanderOnischenko Feb 8, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
402 changes: 384 additions & 18 deletions Makefile

Large diffs are not rendered by default.

67 changes: 50 additions & 17 deletions examples/demo_qa/batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
from .term import fmt_num, fmt_pct, should_use_color, truncate
from .term import render_table as render_text_table
from .utils import dump_json
from fetchgraph.utils.path_layout import LayoutConfig, RunLayout, ensure_dirs, find_case_dirs, make_case_dir, runs_root


def write_summary(out_path: Path, summary: dict) -> Path:
Expand Down Expand Up @@ -416,13 +417,8 @@ def _git_sha() -> Optional[str]:


def _find_case_artifact(run_path: Path, case_id: str) -> Optional[Path]:
cases_dir = run_path / "cases"
if not cases_dir.exists():
return None
matches = sorted(cases_dir.glob(f"{case_id}_*"))
if matches:
return matches[-1]
return None
matches = find_case_dirs(run_path, case_id, LayoutConfig())
return matches[0] if matches else None


def _resolve_run_path(path: Path | None, artifacts_dir: Path) -> Optional[Path]:
Expand Down Expand Up @@ -984,9 +980,17 @@ def handle_batch(args) -> int:
selected_case_ids = [case.id for case in cases]

timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
run_folder = artifacts_dir / "runs" / f"{timestamp}_{cases_path.stem}"
cfg = LayoutConfig()
default_artifacts_dir = data_dir / ".runs"
if artifacts_dir == default_artifacts_dir:
runs_root_path = runs_root(data_dir, cfg)
else:
runs_root_path = artifacts_dir / "runs"
run_dir_name = f"{timestamp}_{cases_path.stem}_{run_id}"
run_folder = runs_root_path / run_dir_name
run_layout = RunLayout(data_dir=data_dir, run_root=run_folder, run_dir_name=run_dir_name, run_id=run_id)
ensure_dirs(run_layout, cfg=cfg)
results_path = Path(args.out) if args.out else (run_folder / "results.jsonl")
artifacts_root = run_folder / "cases"
results_path.parent.mkdir(parents=True, exist_ok=True)
summary_path = results_path.with_name("summary.json")
artifacts_dir.mkdir(parents=True, exist_ok=True)
Expand Down Expand Up @@ -1018,20 +1022,29 @@ def handle_batch(args) -> int:
for case in cases:
current_case_id = case.id
try:
result = run_one(case, runner, artifacts_root, plan_only=args.plan_only, event_logger=event_logger)
result = run_one(
case,
runner,
runs_root_path,
plan_only=args.plan_only,
event_logger=event_logger,
run_dir=run_folder,
run_dir_name=run_dir_name,
schema_path=schema_path,
)
except KeyboardInterrupt:
interrupted = True
interrupted_at_case_id = current_case_id
run_dir = artifacts_root / f"{case.id}_{uuid.uuid4().hex[:8]}"
run_dir.mkdir(parents=True, exist_ok=True)
case_layout = make_case_dir(run=run_layout, case_id=case.id, suffix=None, cfg=cfg)
ensure_dirs(run_layout, case_layout, cfg=cfg)
stub = RunResult(
id=case.id,
question=case.question,
status="error",
checked=case.has_asserts,
reason="KeyboardInterrupt",
details={"error": "KeyboardInterrupt"},
artifacts_dir=str(run_dir),
artifacts_dir=str(case_layout.case_dir),
duration_ms=0,
tags=list(case.tags),
answer=None,
Expand Down Expand Up @@ -1379,8 +1392,16 @@ def handle_case_run(args) -> int:
artifacts_dir = args.artifacts_dir or (args.data / ".runs")
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
run_id = uuid.uuid4().hex[:8]
run_folder = artifacts_dir / "runs" / f"{timestamp}_{args.cases.stem}_{run_id}"
artifacts_root = run_folder / "cases"
cfg = LayoutConfig()
default_artifacts_dir = args.data / ".runs"
if artifacts_dir == default_artifacts_dir:
runs_root_path = runs_root(args.data, cfg)
else:
runs_root_path = artifacts_dir / "runs"
run_dir_name = f"{timestamp}_{args.cases.stem}_{run_id}"
run_folder = runs_root_path / run_dir_name
run_layout = RunLayout(data_dir=args.data, run_root=run_folder, run_dir_name=run_dir_name, run_id=run_id)
ensure_dirs(run_layout, cfg=cfg)
results_path = run_folder / "results.jsonl"

log_dir = artifacts_dir / "logs"
Expand All @@ -1390,7 +1411,14 @@ def handle_case_run(args) -> int:
llm = build_llm(settings)
runner = build_agent(llm, provider)

result = run_one(cases[args.case_id], runner, artifacts_root, plan_only=args.plan_only)
result = run_one(
cases[args.case_id],
runner,
runs_root_path,
plan_only=args.plan_only,
run_dir=run_folder,
schema_path=args.schema,
)
write_results(results_path, [result])
counts = summarize([result])
bad = bad_statuses("bad", False)
Expand Down Expand Up @@ -1437,7 +1465,12 @@ def handle_case_open(args) -> int:
plan = case_dir / "plan.json"
answer = case_dir / "answer.txt"
status = case_dir / "status.json"
for path in [plan, answer, status]:
events = case_dir / "events.jsonl"
error = case_dir / "error.txt"
# Schema snapshots may use the schema file extension (json/yaml), so match all.
schema_snapshots = sorted(case_dir.glob("schema_snapshot.*"))
artifacts = [plan, answer, status, events, error, *schema_snapshots]
for path in artifacts:
if path.exists():
print(f"- {path}")
return 0
Expand Down
22 changes: 19 additions & 3 deletions examples/demo_qa/chat_repl.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,13 @@
import json
import readline
import sys
import datetime
import uuid
from pathlib import Path
from typing import Optional, Sequence

from fetchgraph.utils.path_layout import LayoutConfig, RunLayout, ensure_dirs, make_case_dir

from .provider_factory import build_provider
from .runner import (
Case,
Expand Down Expand Up @@ -106,13 +109,21 @@ def start_repl(
continue

run_id = uuid.uuid4().hex[:8]
run_dir = runs_root / f"{run_id}_{uuid.uuid4().hex[:8]}"
run_dir_name = f"{datetime.datetime.now().strftime('%Y%m%d_%H%M%S')}_{run_id}"
event_logger = EventLogger(path=None, run_id=run_id)

artifacts: RunArtifacts | None = None
try:
case = Case(id=run_id, question=line, tags=[])
result = run_one(case, runner, runs_root, plan_only=False, event_logger=event_logger, run_dir=run_dir)
result = run_one(
case,
runner,
runs_root,
plan_only=False,
event_logger=event_logger,
run_dir_name=run_dir_name,
schema_path=None,
)
plan_obj = _load_json(Path(result.artifacts_dir) / "plan.json")
ctx_obj = _load_json(Path(result.artifacts_dir) / "context.json") or {}
artifacts = RunArtifacts(
Expand All @@ -132,7 +143,12 @@ def start_repl(
print(result.answer or "")
print(f"Events: {Path(result.artifacts_dir) / 'events.jsonl'}")
except Exception as exc: # pragma: no cover - REPL resilience
error_artifacts = artifacts or RunArtifacts(run_id=run_id, run_dir=run_dir, question=line)
run_root = runs_root / run_dir_name
cfg = LayoutConfig()
run_layout = RunLayout(data_dir=data_dir, run_root=run_root, run_dir_name=run_dir_name, run_id=run_id)
case_layout = make_case_dir(run=run_layout, case_id=run_id, suffix=None, cfg=cfg)
ensure_dirs(run_layout, case_layout, cfg=cfg)
error_artifacts = artifacts or RunArtifacts(run_id=run_id, run_dir=case_layout.case_dir, question=line)
error_artifacts.error = error_artifacts.error or str(exc)
last_artifacts = error_artifacts
save_artifacts(error_artifacts)
Expand Down
Loading