diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index abf43a9..53dc20c 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -6,10 +6,7 @@ "./commands/" ], "agents": [ - "./agents/browser-explorer.md", - "./agents/amphibious-generator.md", - "./agents/amphibious-verify.md" - + "./agents/amphibious-code.md" ], "skills": [] } diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index a4297ac..38051ed 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -83,6 +83,7 @@ jobs: uses: ludeeus/action-shellcheck@master with: scandir: scripts + severity: warning file-references: name: File Reference Check diff --git a/CLAUDE.md b/CLAUDE.md index 76d88e1..89a5492 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -8,7 +8,8 @@ Agent skill & knowledge corpus for the Bridgic ecosystem — providing skills, a AmphiLoop/ ├── CLAUDE.md ← this file ├── .claude-plugin/ -│ └── plugin.json ← Claude Code plugin registration +│ ├── plugin.json ← Claude Code plugin registration +│ └── marketplace.json ← marketplace metadata ├── skills/ ← domain knowledge: "what it is, how to use it" │ ├── manifest.ini ← skill source registry (repo, ref, paths) │ ├── README.md ← manifest docs + auto-generated skill table @@ -16,24 +17,26 @@ AmphiLoop/ │ ├── bridgic-amphibious/ ← dual-mode agent framework │ └── bridgic-llms/ ← LLM providers and initialization ├── agents/ ← execution methodology: "how to do it well" -│ ├── browser-explorer.md ← CLI exploration expertise -│ ├── amphibious-generator.md ← code generation expertise +│ ├── amphibious-config.md ← inline-loaded by /build Phase 2 (interactive; NOT a subagent) +│ ├── amphibious-explore.md ← abstract exploration methodology +│ ├── amphibious-code.md ← code generation expertise │ └── amphibious-verify.md ← project verification expertise ├── commands/ ← user-invocable workflows (thin orchestrators) -│ └── build-browser.md ← /build-browser pipeline -├── examples/ ← static example docs (not auto-scanned by Claude Code) -│ ├── build-browser-code-patterns.md ← browser-specific code patterns -│ └── build-browser-task-template.md ← TASK.md template for /build-browser Phase 1 +│ └── build.md ← /build pipeline (domain-agnostic; accepts --) +├── domain-context/ ← pre-distilled per-domain context injected by /build +│ └── browser/ ← intent.md, config.md, explore.md, code.md, verify.md +│ └── script/ ← domain-only helpers (e.g. browser-observe.sh) +├── templates/ ← static templates read by commands (not auto-scanned by Claude Code) +│ └── build-task-template.md ← unified TASK.md template (used by /build Phase 1) ├── hooks/ ← auto-loaded by Claude Code -│ ├── hooks.json ← hook definitions -│ └── README.md ← hook system documentation +│ └── hooks.json ← hook definitions └── scripts/ ├── hook/ ← hook script implementations │ └── inject-command-paths.sh ← injects PLUGIN_ROOT + PROJECT_ROOT when a bridgic command loads ├── run/ ← runtime scripts used by agents - │ ├── setup-env.sh ← auto-install uv + uv init --bare + │ ├── setup-env.sh ← verify uv toolchain (auto-installs if missing) and run `uv init --bare` in PROJECT_ROOT │ ├── check-dotenv.sh ← .env LLM configuration validation - │ └── monitor.sh ← process monitor for amphibious-verify agent + │ └── monitor.sh ← run-and-monitor for amphibious-verify agent └── maintenance/ ← plugin maintenance scripts (manual) └── sync-skills.sh ← sync skills from source repos via manifest.ini ``` @@ -43,8 +46,9 @@ AmphiLoop/ | Type | Purpose | Example | |------|---------|---------| | **Skill** | Domain knowledge reference — loaded on-demand by agents; synced from source repos via `manifest.ini` | bridgic-browser, bridgic-amphibious, bridgic-llms | -| **Agent** | Deep execution methodology — delegated by commands | browser-explorer, amphibious-generator, amphibious-verify | -| **Command** | Multi-step orchestrator invoked by user | /build-browser | +| **Agent** | Deep execution methodology — delegated by commands | amphibious-explore, amphibious-code, amphibious-verify | +| **Command** | Multi-step orchestrator invoked by user | /build | +| **Domain Context** | Pre-distilled per-domain rules (`intent.md`, `config.md`, `explore.md`, `code.md`, `verify.md`) injected by `/build` when a domain is selected explicitly via `--` or auto-detected from `TASK.md` | domain-context/browser | ## Installation @@ -66,12 +70,12 @@ claude plugin install AmphiLoop | Agent | When to Use | |-------|-------------| -| **browser-explorer** | Systematically explore a website via CLI, produce structured exploration report | -| **amphibious-generator** | Generate a complete bridgic-amphibious project from a task description with optional domain context | +| **amphibious-explore** | Systematically explore a target environment via a domain toolset, produce an executable plan with stability-annotated operations | +| **amphibious-code** | Generate a complete bridgic-amphibious project from a task description with optional domain context | | **amphibious-verify** | Verify a generated amphibious project: inject debug instrumentation, run with monitoring, validate results, clean up | ## Commands | Command | When to Use | |---------|-------------| -| **/build-browser** | Turn a browser task into a working bridgic-amphibious project (parse → explore → generate → verify) | +| **/build** | Unified entry point. Turn any task into a working bridgic-amphibious project. Accepts an optional domain flag (`/build --browser`) to inject pre-distilled context from `domain-context//`. Without a flag, auto-detects the domain from `TASK.md` (or falls back to a generic flow). Users may additionally supply their own domain references in `TASK.md`. | diff --git a/README-zh.md b/README-zh.md index 7c40d84..4ee1d4d 100644 --- a/README-zh.md +++ b/README-zh.md @@ -26,36 +26,53 @@ claude plugin marketplace add bitsky-tech/AmphiLoop claude plugin install AmphiLoop ``` -或从本地仓库直接安装: +或从本地仓库安装(把 `marketplace add` 指向本地目录即可——本仓库自带 `.claude-plugin/marketplace.json`,会被识别为 marketplace): ```bash git clone https://github.com/bitsky-tech/AmphiLoop.git -claude plugin install /path/to/AmphiLoop + +claude plugin marketplace add /path/to/AmphiLoop +claude plugin install AmphiLoop ``` -安装后,skills、agents 和 commands(如 `/build-browser`)会自动在 Claude Code 中可用。 +安装后,skills、agents 和 commands(如 `/build`)会自动在 Claude Code 中可用。 ## 使用 ### Commands -Commands 是用户可直接调用的工作流: +Commands 是用户可直接调用的工作流,使用 `/` 前缀触发: + +#### `/AmphiLoop:build` -#### `/build-browser` +统一流水线。描述任意任务,列出 agent 应读取的领域参考(SKILL、CLI 帮助、SDK 文档、风格指南),然后要求生成一个可运行项目: ``` -/build-browser +/AmphiLoop:build -Task: Go to https://example.com, search for "product", and extract the first 5 results +我想把 ~/data/inputs 下所有 `orders_*.csv` 汇总成一个 summary.csv — +按 customer 聚合出每个客户的总额。 ``` +**领域标志(可选)** — 在命令后追加 `--`,即可注入 `domain-context//` 下预先蒸馏好的领域上下文。当前已支持:`--browser`。 + +``` +/AmphiLoop:build --browser + +打开 https://example.com,搜索 "product",提取前 5 条结果。 +我需要一个能稳定运行的项目。 +``` + +不带标志时,`/build` 会根据 `TASK.md` 自动识别领域(若没有匹配则回退到通用流程)。用户可随时在 `TASK.md` 中补充额外领域参考。 + **执行流程:** -1. **Parse** — 从任务描述中提取 URL、目标和预期输出 -2. **Setup** — 检查环境(uv、依赖、`.env`) -3. **Explore** — 委派 `browser-explorer` agent 通过 CLI 系统性探索目标网站 -4. **Generate** — 委派 `amphibious-generator` agent 生成完整项目及所有源文件 -5. **Verify** — 委派 `amphibious-verify` agent 注入调试插桩、运行项目、验证结果 +1. **Initialize Task** — 生成 `TASK.md` 模板,用户填写目标、预期输出、**领域参考**;若未带标志则自动识别领域 +2. **Configure Pipeline** — 项目模式(Workflow / Amphiflow)、按需的 LLM 配置,以及任何领域特定配置(例如 `--browser` 模式下询问浏览器环境模式) +3. **Setup Environment** — 检查 `uv`,执行 `uv init` +4. **Explore** — 委派 `amphibious-explore` agent 读取用户提供的领域参考并探索环境 +5. **Generate** — 委派 `amphibious-code` agent 生成完整项目及所有源文件 +6. **Verify** — 委派 `amphibious-verify` agent 注入调试插桩、运行项目、验证结果 ### Agents @@ -63,8 +80,8 @@ Agents 是由 commands 调度的执行专家,不由用户直接调用: | Agent | 功能 | |-------|------| -| **browser-explorer** | 通过 CLI 系统性探索网站,生成结构化的探索报告和快照 | -| **amphibious-generator** | 根据任务描述和探索报告生成完整的 bridgic-amphibious 项目 | +| **amphibious-explore** | 通过领域工具集系统性探索目标环境,生成带稳定性标注的可执行操作序列与关键快照 | +| **amphibious-code** | 根据任务描述和探索报告生成完整的 bridgic-amphibious 项目 | | **amphibious-verify** | 注入调试插桩、监控运行、验证结果、清理环境 | ### Skills @@ -73,9 +90,7 @@ Skills 是领域知识参考,agent 会根据对话上下文自动加载,无 | Skill | 触发场景 | |-------|---------| -| **bridgic-basic** | 使用 Bridgic 核心框架(Worker、Automa、GraphAutoma、ASL) | | **bridgic-browser** | 使用浏览器自动化 CLI(`bridgic-browser ...`)或 Python SDK(`from bridgic.browser`) | -| **bridgic-browser-agent** | 构建浏览器自动化 Agent(OOP 模式 + 动态 ref 解析) | | **bridgic-amphibious** | 使用双模框架(`AmphibiousAutoma`、`CognitiveWorker`、`on_agent`/`on_workflow`) | | **bridgic-llms** | 初始化 LLM 提供商(`OpenAILlm`、`OpenAILikeLlm`、`VllmServerLlm`) | @@ -84,30 +99,35 @@ Skills 是领域知识参考,agent 会根据对话上下文自动加载,无 ``` AmphiLoop/ ├── .claude-plugin/ -│ └── plugin.json # 插件注册 -├── skills/ # 领域知识(5 个 skills) -│ ├── bridgic-basic/ # 核心框架概念 +│ ├── plugin.json # 插件注册 +│ └── marketplace.json # Marketplace 元数据 +├── skills/ # 领域知识(3 个 skills) +│ ├── manifest.ini # Skill 来源注册表(repo、ref、paths) +│ ├── README.md # Manifest 文档 + 自动生成的 skill 表格 │ ├── bridgic-browser/ # 浏览器自动化 CLI + SDK -│ ├── bridgic-browser-agent/ # 浏览器 Agent 模式 │ ├── bridgic-amphibious/ # 双模 Agent 框架 │ └── bridgic-llms/ # LLM 提供商集成 ├── agents/ # 执行方法论(3 个 agents) -│ ├── browser-explorer.md # CLI 探索专家 -│ ├── amphibious-generator.md # 代码生成专家 +│ ├── amphibious-explore.md # 抽象探索方法论 +│ ├── amphibious-code.md # 代码生成专家 │ └── amphibious-verify.md # 项目验证专家 ├── commands/ # 用户可调用的工作流 -│ └── build-browser.md # 端到端流水线 -├── examples/ # 静态示例文档(不会被自动扫描) -│ └── build-browser-code-patterns.md +│ └── build.md # 统一流水线(可选 -- 标志) +├── domain-context/ # /build 注入的预蒸馏领域上下文 +│ └── browser/ # intent.md / config.md / explore.md / code.md / verify.md(含 script/) +├── templates/ # 命令使用的静态模板(不会被自动扫描) +│ └── build-task-template.md # /build 使用的统一 TASK.md 模板 ├── hooks/ # 自动加载的事件处理器 │ └── hooks.json └── scripts/ # Hook 与工具脚本 ├── hook/ │ └── inject-command-paths.sh - └── run/ - ├── setup-env.sh # 环境配置(uv、依赖、playwright) - ├── check-dotenv.sh # LLM 模型配置校验 - └── monitor.sh + ├── run/ + │ ├── setup-env.sh # 校验 uv 工具链;在 PROJECT_ROOT 执行 uv init --bare + │ ├── check-dotenv.sh # LLM 模型配置校验 + │ └── monitor.sh # amphibious-verify 的 run-and-monitor 脚本 + └── maintenance/ + └── sync-skills.sh # 从源仓库同步 skills(基于 manifest.ini) ``` ### 各层如何协作 diff --git a/README.md b/README.md index f12c9d6..5c5533e 100644 --- a/README.md +++ b/README.md @@ -26,36 +26,52 @@ claude plugin marketplace add bitsky-tech/AmphiLoop claude plugin install AmphiLoop ``` -Or install directly from a local checkout: +Or install from a local checkout (point `marketplace add` at the local directory — it's read as a marketplace because the repo ships `.claude-plugin/marketplace.json`): ```bash git clone https://github.com/bitsky-tech/AmphiLoop.git -claude plugin install /path/to/AmphiLoop +claude plugin marketplace add /path/to/AmphiLoop +claude plugin install AmphiLoop ``` -After installation, skills, agents, and commands (e.g. `/build-browser`) are automatically available in Claude Code. +After installation, skills, agents, and commands (e.g. `/build`) are automatically available in Claude Code. ## Usage ### Commands -Commands are user-invocable workflows. Type them directly: +Commands are user-invocable workflows. Invoke them with the `/` prefix: + +#### `/AmphiLoop:build` + +Unified pipeline. Describe any task, list the domain references the agents should read (SKILLs, CLI help, SDK docs, style guides), and ask to generate a runnable project: + +``` +/AmphiLoop:build + +I want to aggregate all `orders_*.csv` files under ~/data/inputs into a single +summary.csv — one row per customer with totals. +``` -#### `/build-browser` +**Domain flag (optional)** — append `--` to inject pre-distilled domain context from `domain-context//`. Currently supported: `--browser`. ``` -/build-browser +/AmphiLoop:build --browser -Task: Go to https://example.com, search for "product", and extract the first 5 results +Go to https://example.com, search for "product", and extract the first 5 results. +I want a project that can run this reliably. ``` +Without a flag, `/build` auto-detects the domain from `TASK.md` (and falls back to a generic flow if none matches). Users can always supply additional domain references in `TASK.md`. + **What happens under the hood:** -1. **Parse** — Extracts URL, goal, and expected output from your task description -2. **Setup** — Checks environment (uv, dependencies, `.env`) -3. **Explore** — Delegates to `browser-explorer` agent to systematically explore the target website via CLI -4. **Generate** — Delegates to `amphibious-generator` agent to produce a complete project with all source files -5. **Verify** — Delegates to `amphibious-verify` agent to inject debug instrumentation, run the project, and validate results +1. **Initialize Task** — Writes a `TASK.md` template where you fill in goal, expected output, and **Domain References**; auto-detects the domain if no flag was given +2. **Configure Pipeline** — Project mode (Workflow vs Amphiflow), LLM config if needed, plus any domain-specific configuration (e.g. browser environment mode when `--browser` is active) +3. **Setup Environment** — Checks `uv`, runs `uv init` +4. **Explore** — Delegates to `amphibious-explore` agent, which reads your domain references and probes the environment +5. **Generate** — Delegates to `amphibious-code` agent to produce a complete project with all source files +6. **Verify** — Delegates to `amphibious-verify` agent to inject debug instrumentation, run the project, and validate results ### Agents @@ -63,8 +79,8 @@ Agents are execution specialists delegated by commands. They are not called dire | Agent | What It Does | |-------|-------------| -| **browser-explorer** | Systematically explores a website via CLI, produces a structured exploration report with snapshots | -| **amphibious-generator** | Generates a complete bridgic-amphibious project from a task description and exploration report | +| **amphibious-explore** | Systematically explores a target environment via a domain-supplied toolset, produces an executable plan with stability-annotated operations and supporting snapshots | +| **amphibious-code** | Generates a complete bridgic-amphibious project from a task description and exploration report | | **amphibious-verify** | Injects debug instrumentation, runs the project with monitoring, validates results, and cleans up | ### Skills @@ -73,9 +89,7 @@ Skills are domain knowledge references that agents and Claude load automatically | Skill | Activates When | |-------|---------------| -| **bridgic-basic** | Working with Bridgic core framework (Worker, Automa, GraphAutoma, ASL) | | **bridgic-browser** | Using browser automation via CLI (`bridgic-browser ...`) or Python SDK (`from bridgic.browser`) | -| **bridgic-browser-agent** | Building browser automation agents with OOP patterns and dynamic ref resolution | | **bridgic-amphibious** | Building dual-mode agents with `AmphibiousAutoma`, `CognitiveWorker`, `on_agent`/`on_workflow` | | **bridgic-llms** | Initializing LLM providers (`OpenAILlm`, `OpenAILikeLlm`, `VllmServerLlm`) | @@ -84,30 +98,35 @@ Skills are domain knowledge references that agents and Claude load automatically ``` AmphiLoop/ ├── .claude-plugin/ -│ └── plugin.json # Plugin registration -├── skills/ # Domain knowledge (5 skills) -│ ├── bridgic-basic/ # Core framework concepts +│ ├── plugin.json # Plugin registration +│ └── marketplace.json # Marketplace metadata +├── skills/ # Domain knowledge (3 skills) +│ ├── manifest.ini # Skill source registry (repo, ref, paths) +│ ├── README.md # Manifest docs + auto-generated skill table │ ├── bridgic-browser/ # Browser automation CLI + SDK -│ ├── bridgic-browser-agent/ # Browser agent patterns │ ├── bridgic-amphibious/ # Dual-mode agent framework │ └── bridgic-llms/ # LLM provider integration ├── agents/ # Execution methodology (3 agents) -│ ├── browser-explorer.md # CLI exploration expert -│ ├── amphibious-generator.md # Code generation expert +│ ├── amphibious-explore.md # Abstract exploration methodology +│ ├── amphibious-code.md # Code generation expert │ └── amphibious-verify.md # Project verification expert ├── commands/ # User-invocable workflows -│ └── build-browser.md # End-to-end pipeline -├── examples/ # Static example docs (not auto-scanned) -│ └── build-browser-code-patterns.md +│ └── build.md # Unified pipeline (accepts optional -- flag) +├── domain-context/ # Pre-distilled per-domain context injected by /build +│ └── browser/ # intent.md, config.md, explore.md, code.md, verify.md (+ script/) +├── templates/ # Static templates read by commands (not auto-scanned) +│ └── build-task-template.md # Unified TASK.md template used by /build ├── hooks/ # Auto-loaded event handlers │ └── hooks.json └── scripts/ # Hook & utility implementations ├── hook/ │ └── inject-command-paths.sh - └── run/ - ├── setup-env.sh # Environment setup (uv, deps, playwright) - ├── check-dotenv.sh # LLM configuration validation - └── monitor.sh + ├── run/ + │ ├── setup-env.sh # Verify uv toolchain; uv init --bare in PROJECT_ROOT + │ ├── check-dotenv.sh # LLM configuration validation + │ └── monitor.sh # Run-and-monitor for amphibious-verify + └── maintenance/ + └── sync-skills.sh # Sync skills from source repos via manifest.ini ``` ### How the Layers Connect diff --git a/agents/amphibious-code.md b/agents/amphibious-code.md new file mode 100644 index 0000000..e1fc912 --- /dev/null +++ b/agents/amphibious-code.md @@ -0,0 +1,323 @@ +--- +name: amphibious-code +description: >- + Code generation specialist for bridgic-amphibious projects. Takes a task + description with optional domain context and produces a complete, runnable + project at //: scaffold via CLI, then adapt + the generated amphi.py and write main.py + supporting files following + framework best practices. +tools: ["Bash", "Read", "Grep", "Glob", "Write", "Edit"] +model: opus +--- + +# Amphibious Code Agent + +You are a bridgic-amphibious code generation specialist. You receive a task description with optional domain context and produce a complete, working bridgic-amphibious project. + +## Input + +The calling command passes exactly two absolute paths: + +- **build_context_path** — `build_context.md` (schema in `amphibious-config.md` Step 5). Read once. For this agent: `## Task → file` (task brief), `## Pipeline` (mode / llm_configured / domain_config — these drive what code to generate), `## References`, and `## Outputs → exploration_report` (the spine of the code). The references and exploration report carry every fact you need; open them on demand, not upfront. +- **domain_context_path** — a `domain-context//code.md` path, or the literal `none`. **Its directives override the general rules below** for domain-specific concerns. + +## Bootstrap + +Before any other work, batch-load the required startup files. Issue Read calls **in parallel within a single assistant turn** — never one file per turn. + +- **Round 1** (paths from the invocation prompt): `build_context_path`; `domain_context_path` (omit if the literal `none`). +- **Round 2** (paths discovered in `build_context.md`, issued as one second turn): the file under `## Task → file`; the file under `## Outputs → exploration_report`. + +Skill files (see Skill References below) and `## References` stay on-demand — do not batch them here. + +## Skill References (read on demand) + +- `{PLUGIN_ROOT}/skills/bridgic-amphibious/SKILL.md` — framework usage patterns, code examples, best practices. +- `{PLUGIN_ROOT}/skills/bridgic-llms/SKILL.md` — LLM provider initialization (read only when `llm_configured = yes`). + +## Output Layout + +The agent installs its runtime dependencies into PROJECT_ROOT's uv env (creating it if absent) and produces a code-only `/` subdirectory. The structure inside `/` may follow the pattern below: + +``` +/ +├── pyproject.toml # uv project manifest +├── uv.lock # resolution lockfile +├── .venv/ # uv-managed virtualenv +├── .env # only when llm_configured = yes +└── / # this agent's generator_project — code only + ├── amphi.py # scaffold-created; this agent edits it + ├── main.py # this agent creates: entry point (LLM init + agent.arun) + ├── README.md # short, operational + ├── log/ # runtime logs land here (configured in main.py) + └── result/ # task outputs land here +``` + +--- + +## Phase 1: Initialize Project Skeleton + +### 1.1 Pick a project name + +Derive a short snake_case slug from the task description (≤30 chars, `[a-z0-9_]+`). If `//` already exists, append `_2`, `_3`, … until free. Fallback when no good slug derives: `amphi_project`. + +### 1.2 Install runtime dependencies + +Run the bridgic-amphibious installer against PROJECT_ROOT. It creates `pyproject.toml` if absent and `uv add`s the runtime packages (`bridgic-core`, `bridgic-amphibious`, `bridgic-llms-openai`, `python-dotenv`); idempotent if PROJECT_ROOT is already a uv project: + +```bash +mkdir -p "/" +bash "{PLUGIN_ROOT}/skills/bridgic-amphibious/scripts/install-deps.sh" \ + "" +``` + +### 1.3 Scaffold `amphi.py` + +```bash +cd "/" +uv run bridgic-amphibious create --task "" +``` + +### 1.4 Create runtime directories + +```bash +mkdir -p "//log" \ + "//result" +``` + +- `log/` receives runtime logs (wired in main.py). `result/` receives task outputs — every output file the project produces lands here as `result/`, so downstream orchestration finds outputs uniformly. +- `.env` stays at PROJECT_ROOT; `main.py` reads it via `load_dotenv(Path(__file__).parent.parent / ".env")`. No relocation. + +--- + +## Phase 2: Implement `amphi.py` + +Open the scaffolded `amphi.py` and adapt every section. The order below matches dependency direction — context first, hooks/tools/helpers next, then orchestration methods. + +### 2.1 Context (`CognitiveContext` subclass) + +Add fields the agent needs at runtime. Two visibility rules: + +- **Non-serializable resources** (browser session, db client, http client) — mark with `json_schema_extra={"display": False}`. They are meaningless to the LLM and serializing them wastes tokens and may crash JSON encoding. +- **State-tracking fields** (processed item set, counters, progress markers) — leave visible. The LLM uses them to reason about progress during agent fallback. + +```python +from typing import Any +from pydantic import Field +from bridgic.amphibious import CognitiveContext + +class AmphiContext(CognitiveContext): + # Non-serializable resource — hidden from LLM + browser: Any = Field(default=None, json_schema_extra={"display": False}) + # State-tracking — visible to LLM + processed_ids: set[str] = Field(default_factory=set) +``` + +### 2.2 Hooks (override only what you need) + +Skip a hook entirely if your task doesn't need it — don't override an empty method. + +| Hook | When called | Use for | +|------|-------------|---------| +| `observation(self, ctx)` | Before each OTC cycle and each `yield` in workflow | Fetch live state (read page snapshot, query DB, GET /status). Return value populates `ctx.observation`. | +| `before_action(self, decision_result, ctx)` | Before each tool execution | Track items being processed, sanitize tool args (fix LLM formatting), gate actions. | +| `after_action(self, step_result, ctx)` | After each tool execution | Refresh `ctx.observation` after a state-changing action, accumulate results, side effects, cleanup. | + +Domain-specific hook patterns (e.g. browser's `after_action` refreshing observation on `wait_for` completion) come from the domain-context file. + +### 2.3 `on_workflow` — only for `WORKFLOW` or `AMPHIFLOW` + +An async generator that yields `ActionCall` / `AgentCall` / `HumanCall`. Translate the exploration report's "Operation Sequence" into yields, preserving order, parameters, and stability annotations. + +**Best practices**: + +1. **Every `ActionCall` includes `description="..."`.** The description doubles as debug-log text *and* — critically — as the context the LLM receives when a step fails and triggers agent fallback. Without it, the fallback agent has no idea what the failed step was trying to do. + +2. **Operation sequence lives in `on_workflow` itself.** The explore report's "Operation Sequence" maps **one-to-one** to yields inside `on_workflow`. Do not push the yield sequence into helper functions or sibling `async def` methods that just yield through — that turns the workflow into hide-and-seek and makes verify/fallback harder. Sub-generators are only justified when the **same** yielded sub-sequence repeats with parameter variation (e.g. per-row processing called from a `for` loop); a sub-generator called once is bloat — inline it. + +3. **Stable identifiers hardcoded; volatile identifiers extracted from `ctx.observation`.** The exploration report records STABLE values (like browser refs) verbatim — `# ref=5dc3463e STABLE`. **Use those literals directly.** Hardcode them as module-level constants near the top of `amphi.py` and reference them inline at the yield site. **Never write a `find__ref(observation)` parser for a STABLE element** — the value is already known; re-deriving it by regex is pure token waste and breaks the moment the snapshot text format shifts. Helpers (see 2.7) exist only for VOLATILE values. + + ```python + # ❌ Wrong — re-discovering a STABLE ref by parsing the snapshot + def find_search_button_ref(observation: str) -> Optional[str]: + match = re.search(r'button\s+"Search"\s+\[ref=([0-9a-f]+)\]', observation) + return match.group(1) if match else None + + # ✅ Right — recorded once during exploration, hardcoded once in code + SEARCH_BUTTON_REF = "4084c4ad" # STABLE per exploration_report.md §2 step 5 + yield ActionCall("click_element_by_ref", description="Click Search", ref=SEARCH_BUTTON_REF) + ``` + +4. **Workflow-first principle — prefer `ActionCall` over `AgentCall`.** Use `AgentCall` only for genuinely semantic sub-tasks (analyze, categorize, summarize). Use `HumanCall` only for confirmations the user must resolve. + + ```python + yield ActionCall("save_record", description="Persist row to DB", **row) # Deterministic + yield AgentCall(goal="Categorize the record", tools=["tag_record"], max_attempts=3) # Semantic + yield HumanCall(prompt="Confirm before deleting?") # Human-only + ``` + +5. **Compute dynamic values at runtime.** Relative phrases in the task description ("past 7 days", "today", "last 30 days") must be computed inside the generator with `datetime` etc., not hardcoded at write time. + +6. **Keep generator-internal logic minimal.** Code between yields runs in the generator body. **If it raises, the generator is unrecoverable** — `asend()` cannot resume past an exception, so AMPHIFLOW skips per-step retry and jumps directly to full `on_agent` fallback. Keep inline code to variable assignment and pure helpers; push risky operations (network calls, parsing untrusted input) into `ActionCall`-wrapped tools where they can be retried. + +### 2.4 `on_agent` — only for `AGENT` or `AMPHIFLOW` + +Declare `think_unit`s as class attributes; await them in `on_agent`. Each `think_unit` wraps a `CognitiveWorker` that runs an OTC loop until completion or `max_attempts` exhausts. + +```python +from bridgic.amphibious import CognitiveWorker, think_unit + +class Amphi(AmphibiousAutoma[AmphiContext]): + planner = think_unit( + CognitiveWorker.inline("Look up X then summarise the result."), + max_attempts=5, + ) + + async def on_agent(self, ctx): + await self.planner +``` + +**Best practices**: + +- **One `think_unit` = one cohesive sub-task.** Multi-phase work splits into multiple think_units chained in `on_agent`. +- **`max_attempts` budget**: 3–5 for narrow tasks, up to 10 for open-ended exploration. Higher budgets only help if the worker actually converges. +- **`request_human` is auto-injected.** The framework adds `request_human` to every agent's tool list automatically — the LLM can call it without you listing it in `tools=[...]`. Don't double-register unless you want to be explicit. + +### 2.5 Mode → method mapping (which methods to override) + +| Mode (`build_context.md → ## Pipeline → mode`) | Override `on_workflow` | Override `on_agent` | +|---|:-:|:-:| +| `workflow` | required | omit (no fallback path) | +| `amphiflow` | required | required (fallback target) | + +`AGENT` and `AUTO` are not surfaced by /build — they aren't relevant to this agent. + +### 2.6 Task tools (functions registered with `FunctionToolSpec`) + +Inline in `amphi.py` by default. Split into a sibling `tools.py` only when there are >5 tools or >300 lines of tool code. + +```python +from bridgic.core.agentic.tool_specs import FunctionToolSpec + +async def save_record(item_id: str, title: str, detail: str) -> str: + """Persist an extracted record to result/records.jsonl. + + Parameters + ---------- + item_id : str + Stable unique identifier from the source page. + title : str + Display title. + detail : str + Free-text body. + """ + ... + +TASK_TOOLS = [FunctionToolSpec.from_raw(save_record)] +``` + +The docstring becomes the description the LLM sees — make it precise and parameter-accurate. + +### 2.7 Helpers (pure functions for parsing/transformation) + +Inline in `amphi.py` as module-level functions. Split into `helpers.py` only when extraction logic is large or shared across modules. + +**Hard constraints**: + +- **Pure.** No I/O, network, SDK calls, `await`, or `yield`. Side-effecting actions are *task tools* (2.6), not helpers. +- **VOLATILE-only.** Helpers extract values re-observed at runtime; STABLE values are hardcoded constants (see 2.3 #3). +- **No yielding sub-routines.** The operation sequence stays in `on_workflow` (see 2.3 #2). +- **One helper per concern.** When several VOLATILE values come out of the same observation block, return them together (`dict` / `tuple` / dataclass) — don't write a separate finder per field. + +**Base every helper on actual sample data** from `/.bridgic/explore/` artifacts — never guess data shape. Helpers that look reasonable but don't match real data are the most common verification failure. + +--- + +## Phase 3: Validate Helpers + +After `amphi.py` is written, validate each helper against real exploration samples: + +```bash +cd "/" +uv run python -c " +from amphi import extract_items +sample = open('/.bridgic/explore/snapshot_xxx.txt').read() +print(extract_items(sample)) +" +``` + +If output is empty or wrong-shape, fix the helper and re-test. Helpers are the most fragile layer — get them right before main.py. + +--- + +## Phase 4: Create `main.py` + +The entry point. Write `main.py` at `//main.py`: + +```python +import asyncio +import logging +import os +from pathlib import Path + +from dotenv import load_dotenv +from bridgic.amphibious import RunMode + +# Only when llm_configured = yes: +# from bridgic.llms.openai import OpenAILlm, OpenAIConfiguration + +from amphi import Amphi, TASK_TOOLS + +LOG_DIR = Path(__file__).parent / "log" + + +async def main(): + # .env lives at PROJECT_ROOT (one level above this file's directory). + load_dotenv(Path(__file__).parent.parent / ".env") + + LOG_DIR.mkdir(exist_ok=True) + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + handlers=[ + logging.FileHandler(LOG_DIR / "run.log"), + logging.StreamHandler(), + ], + ) + + # llm_configured = no: + llm = None + # llm_configured = yes: + # llm = OpenAILlm( + # api_key=os.getenv("LLM_API_KEY"), + # api_base=os.getenv("LLM_API_BASE"), + # configuration=OpenAIConfiguration( + # model=os.getenv("LLM_MODEL"), + # temperature=0.0, + # max_tokens=16384, + # ), + # timeout=180.0, + # ) + + agent = Amphi(llm=llm, verbose=True) + await agent.arun( + goal="", + tools=TASK_TOOLS, + mode=RunMode.WORKFLOW, # or RunMode.AMPHIFLOW per build_context.md + ) + + +if __name__ == "__main__": + asyncio.run(main()) +``` + +**Best practices**: + +1. **Args parsing only when the task requires runtime parameters.** Don't add `argparse` for its own sake. +2. **LLM block conditional on `llm_configured`.** When `no`, pass `llm=None` and omit the imports — explicit beats implicit. When `yes`, instantiate `OpenAILlm` from env vars (loaded by `load_dotenv()`). +3. **Tool assembly**: combine domain tools (e.g. browser tools from a `BrowserToolSetBuilder`) with `TASK_TOOLS` from `amphi.py` into one list passed to `agent.arun(tools=...)`. The framework distributes them to both `on_workflow` steps and `on_agent` think units. +4. **Mode**: pass `mode=RunMode.WORKFLOW` or `mode=RunMode.AMPHIFLOW` explicitly per `build_context.md → ## Pipeline → mode`. Don't rely on `AUTO` — explicit mode keeps verify behavior stable. +5. **Logging wired only here** — keep `amphi.py` free of `logging.basicConfig`. Logs land in `log/run.log` so monitor.sh and CI can aggregate uniformly. +6. **No `config.py` by default.** Inline `os.getenv` in main.py. Split into a `config.py` only if env loading grows complex (many vars, validation, defaults). + +--- diff --git a/agents/amphibious-config.md b/agents/amphibious-config.md new file mode 100644 index 0000000..33b4213 --- /dev/null +++ b/agents/amphibious-config.md @@ -0,0 +1,140 @@ +--- +name: amphibious-config +description: >- + Configuration specialist for the bridgic-amphibious build pipeline. Drives + interactive selection of project mode (Workflow / Amphiflow) and LLM + configuration, applies any domain-specific configuration from + domain-context//config.md, runs the uv environment setup script, + and writes the consolidated build_context.md that every later phase reads. + Interactive — runs inline in the calling command's thread (needs + AskUserQuestion), not as a subagent. +tools: ["AskUserQuestion", "Bash", "Read", "Write"] +--- + +# Amphibious Config Agent + +You are a build-pipeline configuration specialist. Your job is to interactively determine project-mode / LLM / domain-specific settings, run environment setup, and write the consolidated `build_context.md` that every later agent reads. + +## Input + +The calling command passes the inputs already established in Phase 1 of `/build`: + +- **PLUGIN_ROOT / PROJECT_ROOT** — absolute path placeholders used throughout this document. +- **SELECTED_DOMAIN** — resolved domain name (e.g. `browser`), or unresolved if the user opted into the generic flow. +- **TASK.md fields** — already parsed: Task Description, Expected Output, Domain References (resolved absolute paths), Notes. + +Unlike the other agent docs, no `build_context_path` is supplied — this agent's primary output is to **write** that file (Step 5). + +## Bootstrap + +This agent runs interactively from the very first step; there are no startup files to batch-load. Each Step below opens whatever it needs on demand. + +--- + +## Step 1: Project Mode + +Present via `AskUserQuestion`: + +> Choose project mode: +> +> **1. Workflow** — Every step runs deterministically. Best for stable, predictable tasks. +> +> **2. Amphiflow** — Every step runs normally, but switches to AI when something unexpected happens (unclear state, unrecoverable error, ambiguous branch). Requires LLM config. + +Record the chosen `project_mode` (`workflow` or `amphiflow`). It will determine the `mode=` argument passed to `agent.arun()` during code generation (Phase 4 of `/build`). + +## Step 2: LLM Configuration + +Decide whether to set up LLM — set `llm_configured` to `yes` or `no`. + +- **If `project_mode == amphiflow`**: LLM is required. Run + + ```bash + bash "{PLUGIN_ROOT}/scripts/run/check-dotenv.sh" + ``` + + - Exit 0: variables present — proceed. + - Exit 1: list missing variables; create `.env`, ask the user to fill it, re-run the check; do not proceed until exit 0. + + Set `llm_configured = yes`. + +- **If `project_mode == workflow`**: analyze the task description. + + - **If task contains AI-suggestive operations** (e.g. "extract key information", "analyze content", "generate a report"), ask via `AskUserQuestion`: + + > Your task description mentions operations that may benefit from AI/LLM capabilities (e.g. content analysis, intelligent extraction). Configure an LLM? + > + > **1. Yes** — configure LLM for AI-powered processing. + > **2. No** — run purely with deterministic scripts, no AI. + + On **1** → run `check-dotenv.sh` (same exit-handling as above), then `llm_configured = yes`. + On **2** → `llm_configured = no`. + + - **If task is purely mechanical** (deterministic file operations, fixed-shape API calls, scripted transformations) → set `llm_configured = no` without asking. + +## Step 3: Domain-specific Configuration + +If `SELECTED_DOMAIN` is resolved AND `{PLUGIN_ROOT}/domain-context//config.md` exists, read that file and follow its instructions verbatim — it tells you which questions to ask the user (still via `AskUserQuestion`) and which keys to record. Capture each answer as `domain_config[] = `. + +If no `config.md` exists, skip this step and treat `domain_config` as empty. + + +## Step 4: Environment Setup + +### 4.1 uv toolchain + PROJECT_ROOT uv project + +```bash +bash "{PLUGIN_ROOT}/scripts/run/setup-env.sh" "{PROJECT_ROOT}" +``` + +The script verifies `uv` is on PATH (auto-installs if missing) and runs `uv init --bare` in `PROJECT_ROOT` if no `pyproject.toml` is present. After it exits 0, `PROJECT_ROOT` is a uv project — every later phase (`install-deps.sh`, `amphibious-code` Phase 1.2, etc.) `uv add`s into this same env. + +- **Exit 0**: capture the `ENV_READY` block from stdout — it goes into `build_context.md` below. +- **Exit non-zero**: surface the error and **stop the entire pipeline**. + +### 4.2 Domain-specific tool installation + +**By Reference**. The `amphibious-explore` agent handles it during its own **Analyse Task** phase, using the user-supplied references (which typically include installation instructions). + +## Step 5: Write Build Context + +Write the consolidated context to `{PROJECT_ROOT}/.bridgic/build_context.md`. This file is the **single index** for the explore / code / verify agents — it tells them *what was decided* in Phases 1–2 and *where to find* every other artifact (TASK.md, user-supplied references, env, prior phase outputs). Agents open the heavier files (TASK.md, references, SKILL.md) only when the work demands it. + +Use this exact structure (omit any section whose body would be empty): + +```markdown +# Build Context + +## Task +- file: {PROJECT_ROOT}/TASK.md +- domain: + +## Pipeline +- mode: +- llm_configured: +- domain_config: + : + +## References +- + +## Environment +- plugin_root: {PLUGIN_ROOT} +- project_root: {PROJECT_ROOT} +- env_ready: | + + +## Outputs +- exploration_report: (filled by Phase 3) +- generator_project: (filled by Phase 4) +``` + +Section semantics: + +- **Task** — *what* this build is. `file:` points to the user-authored TASK.md (read on demand for description / expected_output / notes); `domain:` is the resolved selection from Phase 1. +- **Pipeline** — *how* the generated project should run. `domain_config:` holds the answers from Step 3; if Step 3 captured nothing, omit the `domain_config` line entirely. +- **References** — absolute paths to user-supplied reference material (resolved in Phase 1 from TASK.md "Domain References"). Read on demand. Omit the section if the user supplied none. +- **Environment** — toolchain anchors. `env_ready:` is the verbatim block printed by `setup-env.sh` — it confirms `uv` is available and includes the current `pyproject.toml` so later agents see which packages and dependencies the shared uv env already has. +- **Outputs** — placeholders that later phases fill in. Phase 3 replaces `(filled by Phase 3)` with the resolved exploration_report path; Phase 4 replaces `(filled by Phase 4)` with the generator_project path. + +After writing the file, return control to the calling command — the next phase is Exploration. diff --git a/agents/amphibious-explore.md b/agents/amphibious-explore.md new file mode 100644 index 0000000..1ea373f --- /dev/null +++ b/agents/amphibious-explore.md @@ -0,0 +1,208 @@ +--- +name: amphibious-explore +description: >- + Abstract exploration methodology. Decomposes a task + into an executable plan by probing the target environment with a + domain-supplied toolset, and classifies every parameter as stable (known + during exploration and reusable across runs) or volatile (must be + re-observed each time the plan is carried out). Produces a pseudocode + operation sequence with inline stability annotations plus any key-artifact + files capturing the observed states the plan references. +tools: ["Bash", "Read", "Grep", "Write", "Edit"] +--- + +# Amphibious Explore Agent + +You are an exploration specialist. Your job is to produce a precise, concise, and self-contained report. + +## Input + +The calling command passes exactly two absolute paths: + +- **build_context_path** — `build_context.md` (schema in `amphibious-config.md` Step 5). Read once. For this agent: `## Task → file` (open for the task brief), `## References` (user-supplied material — SKILLs, CLI dumps, SDK docs, style guides; open each on demand in **Analyse Task**, not upfront), `## Environment` (toolchain paths). +- **domain_context_path** — a `domain-context//explore.md` path, or the literal `none`. **Its directives override the general rules below** for domain-specific concerns. + +## Bootstrap + +Before any other work, batch-load the required startup files. Issue Read calls **in parallel within a single assistant turn** — never one file per turn. + +- **Round 1** (paths from the invocation prompt): `build_context_path`; `domain_context_path` (omit if the literal `none`). +- **Round 2** (paths discovered in `build_context.md`, issued as one second turn): the file under `## Task → file`. + +References (`## References`) stay on-demand — do not batch them here. + +## Analyse Task + +### Distill cited external references in the task description + +Read each reference through **two lenses** (the same reference may carry both — apply each in turn; when multiple references are in play, cite the source so conflicts can be reconciled later): + +#### Operational / tool-based material + +Material that teaches *how to act on the environment* (framework manuals, CLI help, SDK docs). + +- Read entry points (SKILL.md, `--help`, SDK index). +- **Derive the observation mechanism** — which command/call returns the *current* environment state. The *Core Loop* requires a fresh observation before every action because every action may have changed the state the next decision depends on. Identify the concrete command + the trigger conditions under which it must re-run. +- Run the observation command once to see the actual output shape and how identifiers appear. +- Identify the cleanup command(s) that release resources at end-of-run. + +#### Guidance-based material + +Material that prescribes *rules, patterns, or requirements* (style guides, architectural constraints, domain DOs and DON'Ts). + +- Skim for what must be done, must be avoided, output shape constraints, edge cases the plan must handle. +- Discard non-actionable background. +- Preserve directives verbatim or near-verbatim — do not paraphrase away their specificity. + +#### Distilling the findings + +Fold everything above into the report's **Domain Guidance** section (shape in Generate Report §1). Terse — record only what a future executor needs to act correctly. + + +## Explore Task + +With the domain context understood, decompose the task itself. Produces the pseudocode operation sequence plus any supporting artifacts. + +### The Core Loop + +For every step, follow the loop: + +1. **Observe** — enter every iteration holding a **fresh view** of the environment's current state, Decide reasons about reality rather than memory. There are two ways to satisfy this: + - *Default — run the observation command.* Invoke the observation command(s) derived in **Analyse Task** at the start of the iteration. This is the safe path and is the expected behavior unless the shortcut below clearly applies. + - *Shortcut — reuse the prior Act's return.* If the previous iteration's Act already returned a value that fully describes the post-action state, you are already holding a fresh view and may proceed directly to make decision without a separate observation call. +2. **Decide** — compare observed state against the task goal; pick the next action from the tool's action vocabulary (consult SKILL.md / `--help` / SDK docs as needed). Respect any guidance-based directives extracted in **Analyse Task**. +3. **Act** — execute the chosen action. +4. **Record** — capture the operation, its parameters, and each parameter's stability classification (see below). + +Do not advance the plan without observing first. Classify a parameter **only when its value varies across iterations or runs** — constants, verbatim literals, and values fully determined by the task description need no STABLE tag (their stability is implicit). Reserve annotations for genuine choice points where a future executor must decide between "reuse the recorded value" and "re-observe at runtime". + +### What to Record + +#### 1. Critical Operation Sequence + +This is the primary deliverable — **the complete task structure expressed as an executable flow**. + +Firstly, Capture every structural element needed to reproduce the task end-to-end: + +- **Order** — the exact sequence of operations from first to last. +- **Loops** — collection-driven iteration (`FOR`) and condition-driven repetition (`WHILE`), together with what the loop body does. +- **Branches** — divergence on observed state (`IF` / `ELSE`), together with what each side does. + +To record loops and branches faithfully, you must **probe their boundaries and alternate paths during exploration** — not only the happy path. Walk at least one full iteration of every loop and check its termination condition (last item, empty collection, exit signal); observe both sides of every branch (success and error, present and absent). Without this, the control flow in your pseudocode will be guesswork. + +**Scope the probing**: only probe branches whose outcome changes the **recorded output or the next operation chosen**. Cosmetic variations that the plan would handle identically (e.g. styling differences, optional UI hints, alternative phrasings of the same success message) need not be probed — note them as "(cosmetic, ignored)" if at all. The goal is faithful control flow, not exhaustive enumeration. + +Secondly, mark **human handoffs** — points where the task requires intervention that automation cannot resolve alone (authentication wall, CAPTCHA, destructive-confirm dialog, permissions you lack, ambiguous UI, unexpected error state). Record each as a `HUMAN:` step in the plan, describing what the human must do and the signal to resume. + +When you encounter a handoff during exploration, you **MUST** request human: + +- **Request** specific human intervention. +- **Resume** exploration from the same point once the human confirms the obstacle is cleared. + +Finally, record only the **minimal chain of operations** needed to achieve the goal. Exclude: + +- Observation commands (they happen on every step; they are not part of the plan). +- Waiting, timing, and intermediate file reads. +- Exploratory dead-ends you backed out of. + +#### 2. Parameter Stability Classification + +For each parameter of each recorded operation, decide and annotate: + +- **Stable** — the value is known now during exploration and remains the same on future runs — it can be recorded verbatim in the plan. Examples: a URL, an element identifier that survives reloads, a constant query string, a known file path. +- **Volatile** — the value is only determinable by inspecting the environment when the plan is carried out, and must be re-observed on every run. Examples: a list-item identifier that regenerates every page load, an ID returned by a prior step at run time, a filename chosen from a glob match, a session-scoped token. + +Use the domain context's stability vocabulary if supplied; otherwise default to `STABLE|VOLATILE`. Attach the classification **inline** on the parameter — it is never a separate section. + +#### 3. Save Key Artifacts + +**Skip this section entirely if the Operation Sequence contains no `VOLATILE`-tagged parameter.** Artifacts exist solely to ground volatile references in inspectable samples — without volatile data, there is nothing to ground. + +Otherwise, save the raw observation output of any state that contains **volatile parameters or fields**. These artifacts preserve the exact structure where those volatile values appear, grounding every `VOLATILE` reference in the plan in a concrete, inspectable sample. + +Save only states that contain extractable volatile data, not every intermediate observation. Use descriptive filenames (e.g., `list_state.txt`, `detail_state.txt`). + +### Cleanup + +After exploration, run the cleanup protocol recorded in the Domain Guidance to release any resources held. This is a process step, not part of the report. + +## Generate Report + +Write `exploration_report.md` plus all saved artifact files. The report has **up to three sections** — §1 is optional, §3 is omitted when no volatile data was captured. + +### 1. Domain Guidance + +Based on the results of the Analyse Task, relevant insights have been obtained through analysis. +- If there is any, add this section to the report and explain. Keep each entry to a few lines: + - **Observation protocol** — the concrete command(s) that surface the current environment state. + - **Cleanup protocol** — command(s) to release resources when a run ends. + - **Applicable directives** — rules, patterns, and constraints the plan must respect (near-verbatim; do not paraphrase away specificity). Cite the source reference when multiple are in play. +Otherwise, this section is not necessary. + +### 2. Operation Sequence + +A pseudocode-style list. Use indentation and control-flow keywords (`FOR`, `WHILE`, `IF` / `ELSE`) to express loops, conditions, and nesting. + +**Format**: each step line carries **only the action** (verb + brief target name). All parameters, identifiers, stability tags, and behavioral notes go on `#` comment lines directly below the step. This keeps the action skeleton scannable on its own and pushes detail into a uniform sub-block. + +**Example (browser domain)**: + +``` +1. open + # url= + # mode=headed +2. IF login page detected: + 2.1 HUMAN: log in manually + # resume signal: dashboard is visible +3. fill start_date + # ref=5dc3463e STABLE + # "开始日期" textbox, YYYY-MM-DD +4. fill end_date + # ref=a9cca048 STABLE + # "结束日期" textbox, YYYY-MM-DD +5. click search + # ref=4084c4ad STABLE + # results refresh in-place +6. WHILE next_page not disabled: + 6.1 FOR each row in current_page: + # row refs VOLATILE + 6.1.1 extract detail_url + # source: row's link + # URL pattern: /detail?order_id=... + 6.1.2 open detail_url in new tab + 6.1.3 extract detail fields + # fields: order_no, amount, ... + 6.1.4 close tab + 6.2 click next_page + # ref=cbac3327 STABLE +``` + +**Example (filesystem domain, hypothetical)**: + +``` +1. list entries + # path=/input + # glob=*.csv STABLE +2. FOR each file in matched: + # paths VOLATILE + 2.1 read file + 2.2 parse rows + 2.3 write result + # path=/output/.json + # VOLATILE — derived from each matched file +``` + +**Rules**: + +- **Only critical operations**: the minimal sequence needed to achieve the task. Do not include observation, waiting, cleanup, or internal file reads — those are implicit in the loop, not part of the plan. +- **Action-only step lines**: the step line is `. ` (or a control-flow keyword). No values, refs, stability tags, or notes on the step line. +- **Parameters and notes as `#` comments**: every parameter (`key=value`), stability tag (` STABLE` / `VOLATILE`), and behavioral note goes on its own `#` line directly below the step. Indent the `#` **three spaces deeper than the step's leading indent**. One fact per line. Never place comments at line-end; never align comments across lines by column. +- **Control flow**: indent to show nesting; use explicit keywords: + - `WHILE :` — condition-driven repetition: repeat until a termination signal is observed (total iterations unknown upfront). + - `FOR each in :` — collection-driven iteration: enumerate a known/visible set. + - `IF :` / `ELSE:` — branch on observed state. `ELSE:` sits at the same indent as `IF`; sub-numbers continue sequentially under the same parent. +- **Human handoffs**: `HUMAN:` is a special marker. Describe what the human must do on the step line; put the resume signal on a `#` line below. + +### 3. Artifact Files + +List saved artifact paths. Each entry annotates **what extractable content** the file contains — enough for a reader to know which file documents which volatile data without opening every one. diff --git a/agents/amphibious-generator.md b/agents/amphibious-generator.md deleted file mode 100644 index d6e59ec..0000000 --- a/agents/amphibious-generator.md +++ /dev/null @@ -1,114 +0,0 @@ ---- -name: amphibious-generator -description: >- - Code generation specialist for bridgic-amphibious projects. Takes a task - description with optional domain context and produces a complete working - project: scaffold via CLI, then write agents.py, tools.py, workers.py, - helpers.py, config.py, main.py following framework best practices. -tools: ["Bash", "Read", "Grep", "Glob", "Write", "Edit"] -model: opus ---- - -# Amphibious Generator Agent - -You are a bridgic-amphibious code generation specialist. You receive a task description with optional domain context and produce a complete, working bridgic-amphibious project. - -## Dependent Skills - -Before starting, read and load all dependent skills listed below. - -- **bridgic-amphibious** — `skills/bridgic-amphibious/SKILL.md` -- **bridgic-llms** — `skills/bridgic-llms/SKILL.md` - -## Input - -You receive from the calling command: -- **Task description**: Goal, expected output, constraints -- **Domain context** (optional): Domain-specific instructions provided by the command — tool setup patterns, observation patterns, state tracking patterns, per-file overrides, and reference files to read. When provided, domain context takes precedence over the general rules below for domain-specific concerns. -- **Auxiliary context** (optional): Auxiliary information about the target system that can guide code generation (e.g., operation sequences, identifier stability, edge cases) - -## Phase 1: Scaffold via CLI (MANDATORY) - -**You MUST run this command before writing any code.** Do not create files manually. - -```bash -bridgic-amphibious create -n --task "" -``` - -This generates the project skeleton: `task.md`, `config.py`, `tools.py`, `workers.py`, `agents.py`, `skills/`, `result/`, `log/`. - -**After the scaffold is created**, adapt each generated file based on the task description, domain context, and auxiliary context. - -## Phase 2: Generate Code (Per-File Rules) - -### agents.py - -The agent class is an `AmphibiousAutoma` subclass. The framework provides several template methods (hooks), each with a clear responsibility boundary. Understanding these boundaries is essential for generating correct code. - -#### Template Methods Overview - -| Method | When Called | Responsibility | -|--------|------------|----------------| -| `observation(self, ctx)` | Before each OTC cycle and before each `yield` in workflow | **State acquisition.** Fetch and return the current environment state as a string. The return value populates `ctx.observation`. All domain-specific state fetching (reading pages, querying APIs, checking status) belongs here. | -| `before_action(self, decision_result, ctx)` | Before each tool execution | **Pre-action processing.** Track state changes (e.g., record items being processed), sanitize tool arguments (e.g., fix LLM formatting mistakes), or gate actions. | -| `after_action(self, step_result, ctx)` | After each tool execution | **Post-action processing.** React to the result of a tool call — update derived state (e.g., refresh `ctx.observation` to reflect the new environment), accumulate results, trigger side effects (logging, notifications), or perform cleanup. | -| `on_workflow(self, ctx)` | When running in WORKFLOW or AMPHIFLOW mode | **Deterministic orchestration.** An async generator that yields `ActionCall`, `AgentCall`, or `HumanCall` to express the step sequence. This method should only contain **action logic**.| -| `on_agent(self, ctx)` | When running in AGENT mode, or as fallback when a workflow step fails | **LLM-driven execution.** Awaits `think_unit` workers that use the LLM to observe-think-act. Must be defined even in workflow-centric agents, otherwise fallback has nowhere to go. | - -#### on_workflow Best Practices - -1. **Every `ActionCall` must include `description="..."`.** The description serves two purposes: human-readable debug logs, and — critically — it becomes the context the LLM receives when a step fails and triggers agent fallback. Without it, the fallback agent has no idea what the failed step was trying to accomplish. - -2. **Linear steps: use stable identifiers directly.** For sequential deterministic operations where the target identifier is known and stable (confirmed in pre-analysis), hardcode the value. Do NOT write dynamic lookup helpers for stable identifiers — a helper adds unnecessary fragility. - -3. **Loop/conditional steps: extract identifiers dynamically from `ctx.observation`.** Inside loops or conditional branches, data changes on each iteration. Re-extract from the current `ctx.observation` (kept fresh by hooks) using task-specific extraction functions in `helpers.py`. - -4. **Workflow-first principle:** Translate known operations directly to `yield` statements. Only use `AgentCall` for semantic tasks that cannot be deterministic: - ``` - Deterministic step: - yield ActionCall("tool_name", description="...", arg1="value") - - Semantic step (cannot be deterministic): - yield AgentCall(goal="Analyze and categorize items", tools=["save_record"], max_attempts=3) - - Human interaction step: - yield HumanCall(prompt="Please confirm this action") - ``` - -### tools.py - -1. **Task tools: async functions registered via `FunctionToolSpec.from_raw()`.** For task-specific operations (saving data, computation, external API calls), write standard async Python functions with typed parameters and docstrings, then register with `FunctionToolSpec.from_raw()` (imported from `bridgic.core.agentic.tool_specs`). The docstring becomes the tool description the LLM sees, so make it precise. - -### workers.py - -1. **`CognitiveContext` subclass with proper field visibility.** Fields that hold non-serializable resources (connections, clients, sessions) must be marked `json_schema_extra={"display": False}` because serializing them into the LLM prompt is meaningless and wastes tokens. State-tracking fields (e.g., processed item sets, counters, progress indicators) should remain visible so the LLM can reason about progress during `on_agent` fallback. - -### helpers.py - -1. **Standalone functions only.** Helpers are pure functions that extract or transform domain-specific data. Putting them on the agent class couples parsing logic to the agent lifecycle and makes testing harder. Keep them in `helpers.py` as importable utilities. - -2. **Base extraction logic on actual data formats from pre-analysis.** Do not guess data formats. Use the real data structures or samples from the pre-analysis report to write precise extraction logic. Data formats vary between domains and applications, so every helper must be task-specific. - -### config.py - -1. **Fixed template — load from environment only.** Use `dotenv` to load `LLM_API_BASE`, `LLM_API_KEY`, and `LLM_MODEL` from `.env`. Do not hardcode API keys or model names. This file should contain no logic beyond environment variable loading. Add additional domain-specific environment variables as needed. - -### main.py - -1. **Use `OpenAILlm` + `OpenAIConfiguration` for LLM initialization.** The initialization pattern is fixed: import from `bridgic.llms.openai`, pass config values from `config.py`, set `temperature=0.0` for deterministic workflows. - -2. **Resource lifecycle via async context managers.** Create domain-specific resources (connections, clients, sessions) in `main.py` using async context managers for proper cleanup even on exceptions. Store resources in the custom context. Resources must not be created inside the agent class. - -3. **Tool assembly: combine domain tools + task tools into a single list.** Build domain-specific tools (from SDK or library), collect task tools from `tools.py`, merge them into a single list, and pass to `agent.arun(tools=all_tools)`. The agent framework distributes tools to both `on_workflow` steps and `on_agent` think units. - -## Phase 3: Validate Helpers - -After all code is generated, validate each helper function against the real snapshot files from the auxiliary context (e.g., `.bridgic/explore/`). Use Python to call each function and verify the output is non-empty and structurally correct. Fix and re-test if needed. - -```bash -uv run python -c " -from helpers import extract_items -snapshot = open('.bridgic/explore/snapshot_xxx.txt').read() -print(extract_items(snapshot)) -" -``` diff --git a/agents/amphibious-verify.md b/agents/amphibious-verify.md index 9923f7d..446fcd6 100644 --- a/agents/amphibious-verify.md +++ b/agents/amphibious-verify.md @@ -6,26 +6,26 @@ description: >- loop slicing), runs the program with log monitoring, handles human-in-the-loop interactions, validates results, and cleans up all debug code on success. Scene-agnostic — domain-specific verification rules arrive via domain context. -tools: ["Bash", "Read", "Grep", "Glob", "Edit", "Write"] -model: opus +tools: ["Bash", "Read", "Grep", "Glob", "Write", "Edit"] --- # Amphibious Verify Agent You are a verification specialist for bridgic-amphibious projects. Your job is to take an already-generated project, verify it runs correctly end-to-end, and return clean production code. -## Dependent Skills +## Input -Before starting, read and load all dependent skills listed below. +The calling command passes exactly two absolute paths: -- **bridgic-amphibious** — `skills/bridgic-amphibious/SKILL.md` (for `RunMode`, `AmphibiousAutoma` class structure) +- **build_context_path** — `build_context.md` (schema in `amphibious-config.md` Step 5). Read once. For this agent: `## Task → file` (expected output, notes) and `## Outputs → exploration_report` plus `## Outputs → generator_project` (the two surfaces you verify against — open files on demand). Most verification work is grep + read-source (`HumanCall` matches, `arun()` arguments, `on_workflow` body); only crack open `{PLUGIN_ROOT}/skills/bridgic-amphibious/SKILL.md` (or `bridgic-llms/SKILL.md`) when an API question can't be answered from the generated code itself. +- **domain_context_path** — a `domain-context//verify.md` path, or the literal `none`. **Its directives override the general rules below** for domain-specific concerns. -## Input +## Bootstrap -You receive from the calling command: -- **Task description**: Goal, expected output, constraints -- **Domain context** (optional): Domain-specific verification rules — helper check methods, expected output indicators, domain-specific error patterns. When provided, domain context takes precedence over the general rules below for domain-specific concerns. -- **Auxiliary context** (optional): Supporting information for verification (e.g., pre-analysis reports, sample data, expected output indicators) +Before any other work, batch-load the required startup files. Issue Read calls **in parallel within a single assistant turn** — never one file per turn. + +- **Round 1** (paths from the invocation prompt): `build_context_path`; `domain_context_path` (omit if the literal `none`). +- **Round 2** (paths discovered in `build_context.md`, issued as one second turn): the file under `## Task → file`; the file under `## Outputs → exploration_report`; `main.py` and `amphi.py` under `## Outputs → generator_project` (sibling modules like `tools.py` / `helpers.py` stay on-demand — only Glob for them when actually needed). --- @@ -33,16 +33,26 @@ You receive from the calling command: Insert temporary verification instrumentation into the generated code. **Every insertion** must be wrapped in `# --- VERIFY_ONLY_BEGIN ---` / `# --- VERIFY_ONLY_END ---` markers. +Each sub-step below opens with a **precondition probe** (grep or AST inspect). If the probe says the change is unnecessary, **skip the sub-step entirely** — don't insert dead instrumentation. + ### 1.1 Force Workflow Mode -Override the `mode` parameter in `main.py`'s `arun()` as `mode=RunMode.WORKFLOW` call to force pure workflow execution. This prevents the amphibious/auto fallback from masking workflow errors — any failure in `on_workflow` will surface immediately instead of silently degrading to agent mode. +**Precondition**: + +```bash +grep -nE "mode\s*=\s*RunMode\.WORKFLOW" {generator_project}/main.py +``` + +A match means `main.py` is already pinned to `RunMode.WORKFLOW` — skip 1.1. + +Override `arun()`'s `mode` to `RunMode.WORKFLOW` to force pure workflow execution. This prevents amphibious/auto fallback from masking workflow errors — any failure in `on_workflow` surfaces immediately instead of silently degrading to agent mode. **Where to insert**: In `main.py`, at the `arun()` call site. **Implementation pattern**: ```python -# Add import (use the same package as AmphibiousAutoma — check agents.py for the path): +# Add import (use the same package as AmphibiousAutoma — check amphi.py for the path): # --- VERIFY_ONLY_BEGIN --- from bridgic.amphibious import RunMode # --- VERIFY_ONLY_END --- @@ -57,14 +67,22 @@ result = await agent.arun( ``` **Rules**: -- Import `RunMode` from the same module as `AmphibiousAutoma` — check existing imports in `agents.py` for the correct path +- Import `RunMode` from the same module as `AmphibiousAutoma` — check existing imports in `amphi.py` for the correct path - If `RunMode` is already imported, skip the import injection - If `arun()` already has a `mode=` parameter, replace its value with `RunMode.WORKFLOW` - The marker lines inside the function call are valid: when removed in Phase 4, the surrounding arguments remain syntactically correct ### 1.2 Human Input Signal-File Override -If there are any points in the workflow that require human interaction, insert a `human_input` method override into the agent class (in `agents.py`). This replaces the default stdin-based input with a file-based communication channel that the monitoring loop can interact with. +**Precondition**: + +```bash +grep -rnE "\bHumanCall\b" {generator_project}/ +``` + +No match → no human-interaction points in the workflow → skip 1.2. + +Insert a `human_input` method override into the agent class (in `amphi.py`). It replaces the default stdin-based input with a file-based channel the monitoring loop can drive. **Where to insert**: As a method of the `AmphibiousAutoma` subclass, after the class definition line. @@ -76,7 +94,10 @@ If there are any points in the workflow that require human interaction, insert a """Signal-file human input for verification mode.""" import json, asyncio from pathlib import Path - verify_dir = Path(".bridgic/verify") + # Verify artifacts live under PROJECT_ROOT (amphi.py's parent's parent), + # alongside build_context.md and explore/ — not inside the generator + # project. Stays consistent with monitor.sh. + verify_dir = Path(__file__).resolve().parent.parent / ".bridgic" / "verify" verify_dir.mkdir(parents=True, exist_ok=True) prompt = data.get("prompt", "Human input required:") request_file = verify_dir / "human_request.json" @@ -94,7 +115,9 @@ If there are any points in the workflow that require human interaction, insert a ### 1.3 Loop Slicing -For each dynamic list loop in `on_workflow`, insert a slice immediately before the `for` statement to limit iterations during verification. +**Precondition**: Open `amphi.py`'s `on_workflow` and identify each `for ... in :` whose `` comes from a runtime source — `ctx.observation` (directly or via an extract helper), a tool/SDK return value, or an `await` on an API response. No such dynamic loop → skip 1.3. Loops over fixed/literal collections (`for url in ["...", "..."]`) are deterministic and **must not** be sliced. + +For each qualifying dynamic loop, insert a slice immediately before the `for` statement to bound iterations during verification. **Pattern**: @@ -108,57 +131,33 @@ for item in items: ``` **Rules**: -- Only slice **dynamic** loops (lists extracted at runtime from observation, API responses, etc.) -- Do NOT slice deterministic step sequences (stable ref clicks, navigation chains) +- Only slice the dynamic loops identified above +- Do NOT slice deterministic step sequences (stable ref clicks, navigation chains, fixed-list iteration) - The slice size `[:3]` is the default — adjust if the domain context specifies otherwise --- ## Phase 2: Run & Monitor -### 2.1 Start Program - -Execute `main.py` and record the PID from the output and output to a log file. - -### 2.2 Monitor via Script - -Start monitoring using a script: - -Execute `monitor.sh` with the PID, log path, verify directory, and a timeout. The timeout must not exceed **5 minutes (300 seconds)** — verification is not a full production run, it only needs to confirm the code works correctly. To fit within this budget: -- Loop slicing should be small (Phase 1.3) -- Pagination should be limited to 1–2 pages -- Any batch or iteration count should use the minimum needed to exercise the code path -The script watches the process, captures logs, and detects actionable events (completion, errors, human input requests). - -The script **only returns control to the agent when an actionable event occurs**. The agent reads the exit code and stdout to decide the next action: - -| Exit Code | Meaning | Agent Action | -|-----------|---------|--------------| -| **0** | Program finished successfully | Proceed to Phase 3 | -| **1** | Program finished with errors | Read the log excerpt from stdout, diagnose, fix code, restart (go to 2.1) | -| **2** | Human intervention required | Read `human_request.json` from stdout, ask user, write `human_response.json`, re-run monitor | -| **3** | Timeout | Report to user, investigate | +### 2.1 Run & Monitor via Script -#### On exit code 2 (Human Intervention) +A single script handles both launch and monitoring: -1. The script stdout contains the content of `human_request.json` — read the prompt -2. After the user confirms, create `.bridgic/verify/human_response.json`: - ```json - {"response": ""} - ``` -3. The program detects the response file and continues automatically -4. Re-run `monitor.sh` with the same PID to continue watching - -#### On exit code 1 (Error) +```bash +bash {PLUGIN_ROOT}/scripts/run/monitor.sh {generator_project} [TIMEOUT] +``` -1. The script stdout contains the last 50 lines of the log — read the error context -2. Read the source code file where the error occurred -3. Diagnose and fix the root cause -4. Restart the program (go to 2.1) +| Exit | Meaning | Agent action | +|------|---------|--------------| +| **0** | Finished cleanly | Proceed to Phase 3 | +| **1** | Finished with errors | Diagnose from stdout (last 50 log lines of `run.log`), fix code, re-run `monitor.sh` | +| **2** | Human intervention required | Read the prompt from stdout, ask the user, write the answer to the `human_response` path printed in stdout as `{"response": ""}`, re-run `monitor.sh` | +| **3** | Timeout | Report to user and investigate | -#### Maximum retries +The script calls `uv run python main.py`; the script returns only when an actionable event occurs. Re-invoke with the **same arguments** to resume — it auto-detects the existing PID after human intervention, or starts fresh after a terminal exit. The script owns every runtime artifact (`run.log`, `pid`, `human_request.json`, `human_response.json`) and prints the resolved absolute paths to stdout on every exit, so that the agent can interact with them to reason next steps or communicate with the user. -If the same error occurs 3 times after fixes, stop and report the issue to the user. +- **If the same error recurs 3 times after fixes, Must stop and report to the user that *You can not complete the task*.** +- The timeout period should be dynamically set based on the complexity of the task, but **it must not exceed 300 seconds**. To stay within budget: keep loop slices small (Phase 1.3), limit pagination to 1–2 pages, use minimum iteration counts. --- @@ -167,9 +166,9 @@ If the same error occurs 3 times after fixes, stop and report the issue to the u 1. **Exit code**: Confirm the process exited with code 0 2. **Error-free logs**: Grep the full log for `ERROR`, `Traceback`, `Exception` — there should be none 3. **Expected output**: Check that the task's expected output was produced, based on: - - Task description's "expected output" field - - Domain context's "expected output indicators" (if provided) - - Log content showing successful completion messages + - the `expected_output` field in `build_context.md` + - the domain-context file's "expected output indicators" (if `domain_context_path` was provided) + - log content showing successful completion messages 4. **If validation fails**: Diagnose → fix → return to Phase 2.1 --- @@ -182,13 +181,7 @@ After verification passes: Search all `.py` files in the project for `# --- VERIFY_ONLY_BEGIN ---` and `# --- VERIFY_ONLY_END ---`. Remove everything between each marker pair, including the markers themselves. -### 4.2 Clean Up Verification Artifacts - -```bash -rm -rf /.bridgic/verify/ -``` - -### 4.3 Final Syntax Check +### 4.2 Final Syntax Check ```bash find -name "*.py" -exec python -m py_compile {} + diff --git a/agents/browser-explorer.md b/agents/browser-explorer.md deleted file mode 100644 index b6c657a..0000000 --- a/agents/browser-explorer.md +++ /dev/null @@ -1,152 +0,0 @@ ---- -name: browser-explorer -description: >- - Browser exploration specialist. Drives bridgic-browser CLI to systematically - explore websites, recording critical operations, ref stability, and edge cases. Produces a pseudocode operation sequence and saves key snapshots to disk. -tools: ["Bash", "Read", "Grep", "Write"] -model: opus ---- - -# Browser Explorer Agent - -You are a browser exploration specialist. Your job is to systematically explore a website using `bridgic-browser` CLI commands and produce a compact exploration report. - -## Dependent Skills - -Before starting, read and load all dependent skills listed below. - -- **bridgic-browser** — `skills/bridgic-browser/SKILL.md` (Dependencies + installation, CLI guide reference) - -## Input - -You receive from the calling command: -- **Task description**: Goal, expected output, constraints -- **Domain context** (optional): Domain-specific instructions provided by the command — tool setup patterns, observation patterns, state tracking patterns, per-file overrides, and reference files to read. When provided, domain context takes precedence over the general rules below for domain-specific concerns. -- **Auxiliary context** (optional): Auxiliary information that can guide exploration — environment details, operation sequences, identifier stability, edge cases, etc. - -At the beginning of task, determine whether the current task is completely new or a continuation of a previously interrupted one: -- If new, start exploring. -- If continuation, read the previous exploration report. - -## Explore - -### Observation Methodology - -**Before every action** (click, fill, navigate, etc.), always run **both** commands together: - -```bash -uv run bridgic-browser snapshot # current tab's page state -uv run bridgic-browser tabs # all open tabs + which is active -``` - -#### Understanding Observation Output - -1. **Tab Management**: Use the output of `bridgic-browser tabs` to track open tabs and identify the active tab, ensuring subsequent actions target the correct page context. - -2. **Content Display Behavior**: The `bridgic-browser snapshot` command has two output modes depending on page content volume: - - **Minimal content**: The CLI prints the full page snapshot directly to stdout. - - **Substantial content**: The CLI automatically saves the snapshot to a file and prints only the file path. - You do not control which mode is used — it is determined by the CLI automatically. - -3. **Find Element**: After obtaining the Observation, find the key elements needed for interaction: - a. When the Observation was printed to stdout: Analyze the content directly in the terminal output to locate target elements. - b. When the Observation was saved to a file: use the printed file path to access the content — - - **Search for keywords** related to the task description within that file - - **Read the entire file** to locate the target elements and their refs - -### Exploration Recording - -During exploration, carefully record the following: - -#### 1. Critical Operation Sequence - -Identify the minimal chain of tool calls that achieves the goal. For example, if the full sequence is `navigate → snapshot → click_search → snapshot → click_result → snapshot → extract_data`, the critical operations are `navigate → click_search → click_result → extract_data`. Intermediate observations (snapshots, tab checks, file reads) are **not** part of the critical chain — they are supporting steps. Record only the essential action sequence that must be reproduced in code. - -#### 2. Ref Stability Analysis - -For each critical operation's parameters (especially `[ref=...]` values), determine whether the value is **stable** or **volatile**: - -- **Stable refs** — elements that don't change between visits: navigation buttons, search buttons, fixed menu items, form fields with static IDs. -- **Volatile refs** — elements whose ref changes on every page load or snapshot: list items, dynamically generated rows, tab IDs, pagination-dependent elements, search results. -- Record each ref with its stability classification (e.g., `[ref=a3f2] search button → STABLE`, `[ref=c8e1] first result row → VOLATILE`). - -#### 3. Human-in-the-Loop Interruption - -When encountering situations you cannot resolve independently (e.g., CAPTCHA, two-factor authentication, unexpected error dialogs, login walls, ambiguous UI states, or pages that deviate significantly from expectations): -- **Stop** the current exploration immediately. -- **Describe** the exact situation: what you see, what you attempted, and why you're blocked. -- **Request** specific human intervention (e.g., "Please complete the CAPTCHA and tell me when the page has loaded"). -- **Resume** exploration from the same point after the human confirms the obstacle is cleared. - -#### 4. Exhaustive Interaction Coverage - -Explore **every** task-related interactive element on the page, not just the happy path: -- **Pagination** — click through Next/Previous, page numbers; understand the pattern (does ref change? does URL change?). -- **Buttons and links** — all clickable elements relevant to the task (filters, sorts, tabs, dropdowns, expand/collapse). -- **Form interactions** — input fields, checkboxes, radio buttons, date pickers, dropdowns. -- **Tab/window management** — actions that open new tabs or redirect; confirm target tab content. -- **Edge cases** — empty states, last page of pagination, no-results scenarios, loading states. -- Record the behavior of each interaction so the generated code handles all paths. - -#### 5. Save Key Snapshots - -Save snapshots of pages containing **dynamic elements that need extraction helpers**. These files are **reference material for code generation** — downstream agents read them to write precise a11y tree parsing helpers. - -For each key page (list pages with items to iterate, detail pages with fields to extract, data tables), save the snapshot with a descriptive filename (e.g., `list_page.txt`, `detail_page.txt`). Only save pages where volatile data must be extracted — do not save every intermediate snapshot. - -#### 6. Close Browser - -After exploration, ensure all browser processes started by `bridgic-browser` are closed to prevent resource leaks and interference with subsequent steps. - -```bash -uv run bridgic-browser close -``` - -## Generate Report - -Write `exploration_report.md` and all snapshot files — the report reflects all progress made so far. The report contains **exactly two sections — no additional sections**. All observations gathered during exploration (ref stability, edge cases, behavioral quirks) go into **inline `#` comments** within the Operation Sequence. - -### 1. Operation Sequence - -A pseudocode-style operation list. Use indentation and control-flow keywords (`FOR`, `WHILE`, `IF`) to express loops, conditions, and nesting. - -**Example**: - -``` -1. open --headed # uses default user_data_dir for session persistence -2. IF login page detected: - 2.1 HUMAN: Please log in manually and tell me when the dashboard is visible -3. fill start_date [ref=5dc3463e STABLE] # "开始日期" textbox, YYYY-MM-DD -4. fill end_date [ref=a9cca048 STABLE] # "结束日期" textbox, YYYY-MM-DD -5. click status_dropdown [ref=063b563b STABLE] # "投诉状态" dropdown -6. click search [ref=4084c4ad STABLE] # results refresh in-place, URL unchanged -7. WHILE next_page not disabled: - 7.1 FOR each order_row in current_page (VOLATILE refs) - 7.1.1 extract detail_url from link # URL pattern: /detail?order_id=... - 7.1.2 new-tab → open detail_url - 7.1.3 extract detail fields # 订单号, 交易时间, 金额, ... - 7.1.4 extract 协商历史 table # columns: time, role, action, content (variable rows) - 7.1.5 close-tab # returns to list page - 7.2 click next_page [ref=cbac3327 STABLE] # disabled attr on last page -``` - -**Rules**: -- **Critical Operation Sequence Record**: - - Record only the critical operations needed to achieve the task. Do not include every single CLI command or observation step — focus on the essential interaction chain. Intermediate observations (snapshots, tab checks, file reads) are **not** part of the critical chain - - **Do not write `snapshot`, `tabs`, `wait`, `close` operations in pseudocode-style operation list**, file reads, and any other observation/timing steps that are implicit in every action cycle. **Focus on the critical interaction chain and its parameters**. -- **Behavioral notes as `#` comments**: edge cases, timing notes, component quirks go in trailing comments. When a note is too long for one line, continue on the next line at the same indent with another `#` -- **Ref and stability inline**: append `[ref= STABLE|VOLATILE]` after the operation target -- **Control flow**: use indentation to indicate nesting; use explicit keywords for loops and conditions: - - `WHILE :` — condition-driven repetition: repeat until a termination signal is observed (total iterations unknown upfront) - - `FOR each in :` — collection-driven iteration: enumerate a known/visible set of elements on the current page - - `IF :` / `ELSE:` — branching on observed page state; `ELSE:` sits at the same indent as the IF body, sub-numbers continue sequentially under the same parent -- **Human intervention**: `HUMAN:` is a special marker indicating the operation requires human interaction to proceed. Describe what the human must do and the signal to resume. - -### 2. Snapshot Files - -List saved snapshot file paths. Each entry includes a brief annotation of **what extractable content** the file contains — enough for the downstream code generator to know which file to read for which data, without opening every file: - -``` -- `/_list.txt` — results table: order link elements with detail URLs (volatile per page), pagination controls -- `/_detail.txt` — order detail: 微信支付订单号/商户订单号/交易商品 fields + 协商历史 table (time, role, action, content columns) -``` diff --git a/commands/build-browser.md b/commands/build-browser.md deleted file mode 100644 index 6aca7fa..0000000 --- a/commands/build-browser.md +++ /dev/null @@ -1,248 +0,0 @@ ---- -description: >- - End-to-end pipeline that turns a browser automation task into a working - bridgic-amphibious project. TRIGGER when the user says like: "provides a browser - task and wants to generate an amphibious project from it"; or "generate a project - from this browser workflow". The pipeline covers: task - initialization → pipeline configuration → environment setup → CLI - exploration → SDK code generation → verification. ---- - -# Build Browser Pipeline - -Turn a browser task into a working bridgic-amphibious project. - -## Pipeline Workflow - -``` -1. Initialize Task (this command — generate TASK.md template, user fills in task details) -2. Configure Pipeline (this command — project mode, LLM config if needed, browser mode) -3. Setup Environment (this command, runs setup-env.sh) -4. CLI Exploration (→ browser-explorer agent) -5. Generate Amphibious Code (→ amphibious-generator agent) -6. Verify (→ amphibious-verify agent) -``` - -> **Path variables**: `{PLUGIN_ROOT}` and `{PROJECT_ROOT}` are the paths below use these prefixes. If either is missing, the plugin was not loaded correctly — do not proceed. - ---- - -## Phase 1: Initialize Task - -Generate a `TASK.md` template file in `{PROJECT_ROOT}` for the user to describe their browser automation task. Write the following template: `{PLUGIN_ROOT}/examples/build-browser-task-template.md`. The template includes instructions and sections for the user to fill in. After writing the file, tell the user: A task template has been created at `TASK.md`. Please fill in it. - -Wait for the user to confirm they have filled in the template. Then read `{PROJECT_ROOT}/TASK.md` and extract the *Task Description*: -- Goal -- Expected Output -- Other optional details (starting URL, notes, constraints, special instructions, etc.) - -If any required section (Goal, Expected Output) is empty, ask the user to complete it before proceeding. - ---- - -## Phase 2: Configure Pipeline - -Present the following configuration questions **in order** as numbered choices. The user selects by entering the number (e.g., `1` or `2`). Wait for each answer before proceeding. All interactions with user to confirm use `AskUserQuestion` tool to present the question and capture the answer. - -### 2a. Project Mode - -Present the options as: - -> Choose project mode: -> -> **1. Workflow** — Pure script, no AI. Every step runs deterministically. Best for stable, predictable tasks. -> -> **2. Amphiflow** — Script + AI fallback. Runs the script normally, but switches to AI when something unexpected happens (CAPTCHA, layout change, etc.). Requires LLM config. -> - -Record the chosen **project mode** — it affects code generation in Phase 5. - -#### LLM configuration - -**If the user chose Amphiflow**, immediately validate LLM configuration: - -```bash -bash "{PLUGIN_ROOT}/scripts/run/check-dotenv.sh" -``` - -- **Exit 0**: LLM variables present — proceed. -- **Exit 1**: missing variables listed in output. Create `.env` file and ask the user to set them in it, then re-run the script. Do not proceed until it exits 0. - -**If the user chose Workflow**, analyze the task description from TASK.md to determine whether LLM is still needed: - -1. **Task clearly requires LLM** — The task description contains explicit AI/model demands such as: intelligent summarization, AI-based classification, natural language generation, semantic analysis, content understanding that cannot be achieved with deterministic rules, or the user explicitly mentions using AI/LLM/model. In this case, inform the user that their task involves AI-powered operations and LLM configuration is needed, then run the same `.env` validation check above. Do not ask — proceed directly. - -2. **Task is ambiguous** — The task description contains operations that *could* involve AI but are not explicitly stated (e.g., "extract key information", "analyze content", "generate a report"). Present the question: - - > Your task description mentions operations that may benefit from AI/LLM capabilities (e.g., content analysis, intelligent extraction). Would you like to configure an LLM? - > - > **1. Yes** — Configure LLM for AI-powered processing. - > - > **2. No** — Run purely with deterministic scripts, no AI. - > - > Enter **1** or **2**: - - If the user chose **1**, run the `.env` validation check above. If **2**, skip. - -3. **Task clearly does not require LLM** — The task is purely mechanical (page navigation, clicking, form-filling, data scraping with fixed selectors, file download). Skip LLM configuration entirely. - -### 2b. Browser Environment Mode - -Present the options as: - -> Choose browser environment: -> -> **1. Default** — Shared browser state across phases (login sessions carry over). -> -> **2. Isolated** — Each phase gets a clean browser profile, auto-cleaned after use. Ensures reproducible runs. -> -> Enter **1** or **2** (default: 1): - -Record the chosen **browser mode** — it affects Phases 4, 5, and 6. - -Confirm understanding with the user (task summary from TASK.md + project mode + browser mode) before proceeding. - ---- - -## Phase 3: Setup Environment - -Initialize an **empty uv project** in the working directory. - -```bash -bash "{PLUGIN_ROOT}/scripts/run/setup-env.sh" -``` - -Checks that `uv` is on PATH and runs `uv init` if `pyproject.toml` is absent. - -- **Exit 0**: Capture the `ENV_READY` block from stdout as the environment details passed to later phases. -- **Exit non-zero**: `uv` is not installed or init failed. Surface the error to the user and **stop the entire pipeline**. - -Do not proceed until the script exits 0. - ---- - -## Phase 4: CLI Exploration - -**Delegate to the `browser-explorer` agent.** - -Pass to the agent: -- **Task description** from Phase 1 (`TASK.md`) -- **Auxiliary context**: - - `PLUGIN_ROOT` and `PROJECT_ROOT` values - - Output directory `{PROJECT_ROOT}/.bridgic/explore/` - - Please initialize the required execution environment based on the skill. - - **Browser environment mode** from Phase 2: if **Isolated** mode is selected, pass `user-data-dir` = `{PROJECT_ROOT}/.bridgic/browser/`. The agent must create this directory before launching the browser, and **delete the entire `{PROJECT_ROOT}/.bridgic/browser/` directory** after exploration is complete and resources are cleaned up, so that subsequent phases start with a clean browser state. - -**Do not proceed to Phase 5 until complete.** - ---- - -## Phase 5: Generate Amphibious Code - -**Delegate to the `amphibious-generator` agent.** - -Pass to the agent: -- **Task description** from Phase 1 (`TASK.md`) -- **Project mode** from Phase 2 — **Workflow** or **Amphiflow** -- **Auxiliary context**: - - `PLUGIN_ROOT` and `PROJECT_ROOT` values - - **LLM configured** from Phase 2 — whether LLM environment was validated (yes/no). - - **Browser environment mode** from Phase 2: if **Isolated** mode is selected, pass `user-data-dir` = `{PROJECT_ROOT}/.bridgic/browser/` - - Please initialize the required execution environment based on the skill. - - The exploration report path: `{PROJECT_ROOT}/.bridgic/explore/exploration_report.md` from Phase 4 -- **Domain context** (browser automation): Include the following browser-specific instructions in the delegation prompt: - -### Domain Context to Pass - -**Domain reference files to read**: -- `bridgic-browser` skill — `{PLUGIN_ROOT}/skills/bridgic-browser/references/sdk-guide.md` and `{PLUGIN_ROOT}/skills/bridgic-browser/references/cli-sdk-api-mapping.md` for SDK tool names and usage -- `{PLUGIN_ROOT}/examples/build-browser-code-patterns.md` — browser-specific code patterns for all project files - -**Browser-specific per-file rules** (override or supplement the agent's general rules): - -#### task.md - -- Copy the user's `{PROJECT_ROOT}/TASK.md` content verbatim into the generated project's `task.md`. - -#### agents.py - -**Project mode affects code generation** - -- **Workflow mode**: Generate only `on_workflow` with deterministic step-by-step actions with: `ActionCall`, `AgentCall` (with LLM configured = yes), and `HumanCall`. -- **Amphiflow mode**: Generate both `on_workflow` (primary path) and `on_agent` (final fallback handler). - -**Element references** - -- **Stable refs**: hardcode directly in `ActionCall` (e.g., `ref="4084c4ad"`). These are element identifiers from the exploration report that don't change between page visits. -- **Volatile refs** (list items, dynamic rows, search results): re-extract from `ctx.observation` at runtime using helpers. - -**Interaction principles** - -- **Simulate human interaction — NEVER use JavaScript to modify page state.** Do not use `evaluate_javascript_on_ref` (or any JS execution) to set form values, trigger clicks, or manipulate DOM elements. JS-based DOM changes bypass the frontend framework's event bindings — the page appears to change but internal state remains stale. `evaluate_javascript_on_ref` is only acceptable for **reading** data from the page, never for writing. -- **Dynamic parameters must be computed at runtime.** When the task description contains relative or context-dependent values (e.g., "past week", "today", "last 30 days"), compute them in `on_workflow` using Python's `datetime` module. Never hardcode dates, counts, or any value that depends on when the program runs. - -**Tool call conventions** - -- `ActionCall` tool names must match SDK method names (not CLI command names). See `cli-sdk-api-mapping.md`. -- **Explicit `wait_for` after every browser action.** Every browser operation (`navigate_to`, `click_element_by_ref`, `input_text_by_ref`, etc.) must be immediately followed by a `yield ActionCall("wait_for", ...)` call. Recommended durations by action type: - - | Action type | Wait (seconds) | - |---|---| - | Navigation / full page load | 3–5 | - | Click that triggers content loading (search, filter, tab switch) | 3–5 | - | Click that opens dropdown / toggles UI element | 1–2 | - | Text input / form fill | 1–2 | - | Close tab / minor UI action | 1–2 | - - Adjust based on actual observed response times during exploration. - -**Observation management** - -- **Max snapshot limit**: `observation()` must call `get_snapshot_text(limit=1000000)` to ensure the full snapshot is captured. -- **Do NOT call `get_snapshot_text` in `on_workflow`** to read page state. The `observation()` hook keeps `ctx.observation` up-to-date — read it directly. The only exception is when `on_workflow` needs a snapshot before hooks have run (e.g., the very first state check after navigation). -- **`after_action` hook (MUST override)**: refresh `ctx.observation` after `wait_for` completes. Without this, inline code between a `wait_for` yield and the next yield sees stale (pre-wait) page state. See `build-browser-code-patterns.md` for the mandatory code pattern and optional additional uses. - -#### workers.py - -- The `browser` field must be marked `json_schema_extra={"display": False}` — serializing a browser instance is meaningless. -- State-tracking fields (e.g., scraped item sets, counters) should remain visible. - -#### helpers.py - -- Extraction functions parse live `ctx.observation` at runtime. To **write** these helpers, read the snapshot files in `{PROJECT_ROOT}/.bridgic/explore/` (referenced in the exploration report) for the real a11y tree structure. Do not guess the format. - -#### main.py - -- **Run mode**: set `mode=RunMode.AMPHIFLOW` if project mode is *Amphiflow*, otherwise `mode=RunMode.WORKFLOW` if project mode is *Workflow*. -- **Browser lifecycle**: `async with Browser() as browser` — create in main.py, store in context. - - **If Isolated mode**: set `user_data_dir` to `{PROJECT_ROOT}/.bridgic/browser/` so the generated project runs in its own clean browser profile. - - **If Default mode**: omit `user_data_dir` (use the browser's default profile). -- **Browser tools**: `BrowserToolSetBuilder.for_tool_names(browser, ...)` selecting only the SDK methods used in the exploration. -- **Tool assembly**: `[*browser_tools, *task_tools]` → pass to `agent.arun(tools=all_tools)`. -- **LLM initialization** (based on the **LLM configured** flag from Phase 2, not the project mode): - - **LLM configured = yes** : initialize `OpenAILlm` from `.env` / environment variables and pass `llm=llm` to the agent constructor. - - **LLM configured = no** : pass `llm=None` to the agent constructor. Do not import or initialize any LLM classes. -- At runtime, read the project's `task.md` file and pass its full content as the `goal` parameter to `agent.arun()`. Load it from `task.md`. - -The agent will: -1. Scaffold the project via `bridgic-amphibious create` -2. Load framework references from `bridgic-amphibious` skill + browser domain references from above -3. Complete all project files based on the scaffold created by `bridgic-amphibious create` - -**Proceed directly to Phase 6**. Code quality issues are the sole responsibility of the amphibious-verify agent — it will run the project, detect errors from actual execution, and fix them with proper diagnosis. - ---- - -## Phase 6: Verify - -**Immediately delegate to the `amphibious-verify` agent.** - -Pass to the agent: -- **Task description** from Phase 1 (`TASK.md`) -- **Project mode** from Phase 2 — **Workflow** or **Amphiflow** -- **Auxiliary context**: - - `PLUGIN_ROOT` and `PROJECT_ROOT` values - - Please initialize the required execution environment based on the skill. - - Exploration report and snapshot files from `{PROJECT_ROOT}/.bridgic/explore/` - - Work directory of the generated project from Phase 5 - - **Browser environment mode** from Phase 2: if **Isolated** mode is selected, pass `user-data-dir` = `{PROJECT_ROOT}/.bridgic/browser/`. The agent must override `user_data_dir` in the debug-instrumented code to this path. After verification is complete and all resources are cleaned up, **delete the entire `{PROJECT_ROOT}/.bridgic/browser/` directory** to leave a clean state. diff --git a/commands/build.md b/commands/build.md new file mode 100644 index 0000000..8b8b85b --- /dev/null +++ b/commands/build.md @@ -0,0 +1,119 @@ +--- +description: >- + End-to-end pipeline that turns any task into a working bridgic-amphibious + project. TRIGGER when the user says like: "generate an amphibious project + from this task" or "build a bridgic project". Optionally accepts a domain + flag (e.g., `--browser`) to inject pre-distilled domain context. Without a + flag, the model auto-detects the domain from TASK.md, falling back to a + generic flow when no domain matches. Users may also supply additional + domain references (SKILLs, CLIs, SDK docs, style guides) in TASK.md. The + pipeline orchestrates: task initialization → configure & setup → + exploration → code generation → verification. +argument-hint: "[--] e.g. --browser" +--- + +# Build Pipeline + +Turn any task into a working bridgic-amphibious project. The pipeline is **domain-agnostic by default**, with optional **pre-distilled domain context** injected per supported domain (browser, ...). + +## Argument parsing + +`$ARGUMENTS` may be empty, or contain a single domain flag in either form `--` (e.g., `--browser`). Trim whitespace; ignore case. + +- **Flag present** → set `SELECTED_DOMAIN = ` and skip auto-detection. Validate that `{PLUGIN_ROOT}/domain-context//` exists. If it does not, list the available domains (subdirectories of `{PLUGIN_ROOT}/domain-context/`) and ask the user to pick one or rerun without a flag. +- **No flag** → leave `SELECTED_DOMAIN` unresolved; resolve it during Phase 1's auto-detection step. + +Anything else in `$ARGUMENTS` (extra tokens, multiple flags) → stop and ask the user to clarify. + +## Pipeline Workflow + +``` +1. Initialize Task (this command — generate TASK.md template, user fills in; then auto-detect domain if not flagged) +2. Configure & Setup (this command — inline methodology; user interactions; writes build_context.md) +3. Exploration (this command — inline methodology; may invoke human-in-the-loop) +4. Generate Amphibious Code (→ amphibious-code subagent) +5. Verify (this command — inline methodology; may invoke human-in-the-loop) +``` + +> **Path variables**: `{PLUGIN_ROOT}` and `{PROJECT_ROOT}` are path placeholders — all paths below use these prefixes. If either is missing, the plugin was not loaded correctly — do not proceed. +> **build_context_path** — always `{PROJECT_ROOT}/.bridgic/build_context.md`. +> **domain_context_path** — `{PLUGIN_ROOT}/domain-context//.md` when `SELECTED_DOMAIN` is resolved, otherwise the literal `none` (generic flow). `` is `explore.md` for Phase 3, `code.md` for Phase 4, `verify.md` for Phase 5. + +After Phases 3 and 4, refresh `build_context.md` in two places: + +1. **Outputs** — replace the matching `(filled by Phase N)` placeholder with the phase's primary output path. +2. **env_ready** — read `{PROJECT_ROOT}/pyproject.toml` and update the dump under `--- pyproject.toml ---` inside the `env_ready:` block with its current contents. + +--- + +## Phase 1: Initialize Task + +Generate a `TASK.md` template file in `{PROJECT_ROOT}` for the user to describe their task. Read the template from `{PLUGIN_ROOT}/templates/build-task-template.md` and write its contents verbatim to `{PROJECT_ROOT}/TASK.md`. The template includes sections for *Task Description*, *Expected Output*, *Domain References*, and *Notes*. After writing the file, tell the user: A task template has been created at `TASK.md`. Please fill it in. + +Wait for the user to confirm. Then read `{PROJECT_ROOT}/TASK.md` and understand: + +- **Task Description** — goal of the project. +- **Expected Output** — what indicates success. +- **Domain References** — list of paths to domain reference files (may be empty). Each entry may be a SKILL.md, CLI help dump, SDK doc, style guide, or any other material that teaches the agents *how to act* or *what rules to follow*. Resolve each path (relative paths resolve against `{PROJECT_ROOT}`) and confirm it exists. Any missing path is a validation error — ask the user to correct it before proceeding. +- **Notes** — optional additional constraints. + +If Task Description or Expected Output is empty, ask the user to complete it before proceeding. + +### Domain auto-detection (only if `SELECTED_DOMAIN` is unresolved) + +1. List the subdirectories under `{PLUGIN_ROOT}/domain-context/`. Each subdirectory is a candidate domain. +2. For each candidate, read its `intent.md` (the matching criteria for that domain). +3. Compare the Task Description + Expected Output + Notes against each domain's `intent.md`. Pick the **single best match**, or `none` if no domain has strong signals. +4. If a candidate matches, present the decision via `AskUserQuestion`: + + > Detected domain: ****. Use the pre-distilled `` context for exploration, code generation, and verification? + > + > **1. Yes** — use `` context. + > **2. No** — proceed with the generic (domain-agnostic) flow. + > **3. Other** — let me name a different domain explicitly. + + On **1** set `SELECTED_DOMAIN = `. On **2** leave unresolved (generic flow). On **3** ask which domain (must match an existing subdirectory) and set accordingly. + +5. If no candidate matches, do not ask — silently proceed with the generic flow (`SELECTED_DOMAIN` stays unresolved). + +After this step `SELECTED_DOMAIN` is either a valid domain name or unresolved (generic). + +--- + +## Phase 2: Configure & Setup + +**Execute** by reading `{PLUGIN_ROOT}/agents/amphibious-config.md` and following its steps in order. + +The methodology document covers, in this order: + +1. Project Mode (Workflow | Amphiflow) +2. LLM Configuration (`check-dotenv.sh`) +3. Domain-specific Configuration (`domain-context//config.md`, if any) +4. Environment Setup (`setup-env.sh` — verifies the `uv` toolchain and runs `uv init --bare` in `PROJECT_ROOT` so later phases share one uv env) +5. Write `{PROJECT_ROOT}/.bridgic/build_context.md` (the single source of truth for Phases 3–5) + +If `setup-env.sh` exits non-zero, the methodology doc says to **stop the entire pipeline** — respect that and do not enter Phase 3. + +On successful completion, `{PROJECT_ROOT}/.bridgic/build_context.md` exists and is the only artifact later phases need to read for context. + +--- + +## Phase 3: Exploration + +**Execute** by reading `{PLUGIN_ROOT}/agents/amphibious-explore.md` and following its steps in order. + +Do not start Phase 4 until exploration is complete — the report and artifact files under `{PROJECT_ROOT}/.bridgic/explore/` are the sole bridge between Phase 3 and Phase 4. After exploration finishes, fill `## Outputs → exploration_report` in `build_context.md`. + +--- + +## Phase 4: Generate Amphibious Code + +Delegate to the **`amphibious-code`** subagent, passing `build_context_path` and `domain_context_path` per the Phase context contract above. The agent reads `## Pipeline → mode` and `llm_configured` from `build_context.md` and applies its own mode-/LLM-mapping rules. + +After the agent returns, fill `## Outputs → generator_project` (the `//` subdirectory the agent created and populated) in `build_context.md`. + +--- + +## Phase 5: Verify + +**Execute** by reading `{PLUGIN_ROOT}/agents/amphibious-verify.md` and following its steps in order. diff --git a/domain-context/browser/code.md b/domain-context/browser/code.md new file mode 100644 index 0000000..94c9247 --- /dev/null +++ b/domain-context/browser/code.md @@ -0,0 +1,218 @@ +# Browser Domain — Code Generation Context + +## Domain reference files to read + +**MUST** read `{PLUGIN_ROOT}/skills/bridgic-browser/references/sdk-guide.md` and `{PLUGIN_ROOT}/skills/bridgic-browser/references/cli-sdk-api-mapping.md` — SDK tool names and usage. + +## Faithful to exploration report + +`on_workflow` in `amphi.py` must implement **every numbered step (and sub-step)** from the report's "Operation Sequence" — same order, same refs, same values. + +## Action principle — never modify page state via JavaScript + +**Do not use `evaluate_javascript_on_ref` (or any JS execution) to set form values, trigger clicks, or manipulate DOM elements.** JS-based DOM changes bypass the frontend framework's event bindings — the page appears to change but internal state remains stale. `evaluate_javascript_on_ref` is only acceptable for **reading** data from the page, never for writing. + +## Action conventions + +- `ActionCall` tool names must match SDK method names (not CLI command names). See `cli-sdk-api-mapping.md`. +- **Explicit `wait_for` after every browser action.** Every state-mutating call is followed by `yield ActionCall("wait_for", time_seconds=, description="...")`. Condition-based waits use `text=` / `text_gone=` / `selector=` (see `cli-sdk-api-mapping.md`). Recommended durations: + + | Action type | `time_seconds` | + |---|---| + | Navigation / full page load | 3–5 | + | Click that triggers content loading | 3–5 | + | Click that opens dropdown / toggles UI | 1–2 | + | Text input / form fill | 1–2 | + | Close tab / minor UI action | 1–2 | + + Adjust to actual observed response times. + +## Observation management + +**Do NOT call `get_snapshot_text` inside `on_workflow` to read page state.** The `observation()` hook keeps `ctx.observation` up-to-date — read it directly. The only exception is when `on_workflow` needs a snapshot before hooks have run (e.g., the very first state check after navigation). + +--- + +## `amphi.py` — browser-specific implementation + +### Context (`CognitiveContext` subclass) + +Add a `browser` field — the SDK `Browser` instance — and mark it `json_schema_extra={"display": False}`. It is a non-serializable resource and must not be serialized into the LLM prompt. + +```python +from typing import Any +from pydantic import Field +from bridgic.amphibious import CognitiveContext + +class AmphiContext(CognitiveContext): + browser: Any = Field(default=None, json_schema_extra={"display": False}) +``` + +### Hooks — `observation` and `after_action` + +**`observation` — live browser state before each step.** Called automatically before each `yield` in `on_workflow` and each OTC cycle. Returns the current browser state (open tabs + page snapshot) for `ctx.observation`: + +```python +async def observation(self, ctx) -> Optional[str]: + if ctx.browser is None: + return "No browser available." + + parts = [] + tabs = await ctx.browser.get_tabs() + if tabs: + parts.append(f"[Open tabs]\n{tabs}") + snapshot = await ctx.browser.get_snapshot_text(limit=1000000) + if snapshot: + parts.append(f"[Snapshot]\n{snapshot}") + return "\n\n".join(parts) if parts else "No page loaded." +``` + +**`after_action` — mandatory override for observation refresh.** Called automatically after each tool call. Refreshes `ctx.observation` once `wait_for` completes. Critical for browser projects — without it, inline code between a `wait_for` yield and the next yield sees stale page state. + +```python +async def after_action(self, step_result, ctx): + action_result = step_result.result + if hasattr(action_result, "results"): + for step in action_result.results: + if step.tool_name == "wait_for" and step.success: + ctx.observation = await self.observation(ctx) + break +``` + +### Ref handling — STABLE vs VOLATILE + +Browser refs are **deterministic per element**: the same DOM element on the same page yields the same ref string across observations and across runs (until that page navigates or its DOM is mutated). This is what makes `STABLE` annotations in the exploration report meaningful — those refs were captured once during exploration and remain valid at runtime. + +**Mirror that distinction directly in `amphi.py`:** + +- **STABLE refs → module-level constants.** For every ref tagged STABLE in the exploration report (header buttons, fixed dropdowns, pagination Next, search controls, etc.), declare a constant near the top of `amphi.py` and reference it inline at the yield site. **No `find__ref(observation)` parser** — the value is already known and re-deriving it by regex is pure waste. + + ```python + # Top of amphi.py — copy these from exploration_report.md §2 (STABLE-tagged steps). + STATUS_DROPDOWN_REF = "5dc3463e" + SEARCH_BUTTON_REF = "4084c4ad" + NEXT_BUTTON_REF = "cbac3327" + + # In on_workflow: + yield ActionCall("click_element_by_ref", + description="Open the status filter dropdown", + ref=STATUS_DROPDOWN_REF) + ``` + +- **VOLATILE refs → extracted per-iteration.** Per-row buttons, dynamically generated list items, and any element whose ref regenerates on each page load go in `ctx.observation` and must be parsed at runtime — see Helpers below. + +If the exploration report doesn't list a ref for an element your `on_workflow` needs, that's an exploration gap — go look in `{PROJECT_ROOT}/.bridgic/explore/` artifacts and copy the literal hex ref out of the snapshot. Do not add a regex parser to "auto-discover" it. + +### Helpers — extraction from `ctx.observation` + +Helpers exist **only for VOLATILE data** — values that change per page-load, per row, or per run. Parsers for STABLE elements do not belong here (see "Ref handling" above). Base every helper on the actual a11y tree text under `{PROJECT_ROOT}/.bridgic/explore/`. + +```python +import re +from typing import Optional + +def find_active_tab(observation: str) -> Optional[str]: + """Active tab's page_id. VOLATILE — regenerated per browser session.""" + if not observation: + return None + match = re.search(r'(page_\d+)\s*\(active\)', observation) + return match.group(1) if match else None + +def extract_list_rows(observation: str) -> list[dict[str, str]]: + """Per-row data from the filtered list. Refs and ids are VOLATILE.""" + ... +``` + +Keep helpers as module-level functions in `amphi.py` (split into a sibling `helpers.py` only if extraction logic grows large). When several VOLATILE values come out of the same observation block, return them together from one helper — don't write a separate finder per field. + +--- + +## `main.py` — browser lifecycle, run mode, LLM init, tool assembly + +- **Browser lifecycle**: `async with Browser(...) as browser` — create in `main.py`, store in context. + - **Isolated mode**: set `user_data_dir` to `{PROJECT_ROOT}/.bridgic/browser/` (resolved at runtime as `Path(__file__).parent.parent / ".bridgic" / "browser"`). Matches the path used by Phase 3 exploration and Phase 5 verification, so the same isolated profile chain carries through every phase. + - **Default mode**: omit `user_data_dir` (use the browser's default profile). + - All other launch parameters (headless, channel, args, viewport, etc.) must mirror those recorded in the exploration report — otherwise, under Default mode the shared browser state observed during exploration may not be reachable at runtime. +- **Browser tools**: `BrowserToolSetBuilder.for_tool_names(browser, ...)` selecting only the SDK methods used in the exploration. +- **Goal**: hardcode the task description as a string in `main.py`. Multi-line descriptions go into a triple-quoted constant. Do not read it from a sibling file — the project should be runnable as-is from its own directory. + +Run-mode (`RunMode.WORKFLOW` / `RunMode.AMPHIFLOW`) and LLM initialization (`llm=llm` vs `llm=None`) follow the generic rules in `amphibious-code.md` — no browser-specific override. + +```python +import asyncio +import logging +import os +from pathlib import Path + +from dotenv import load_dotenv +from bridgic.amphibious import RunMode +from bridgic.browser.session import Browser +from bridgic.browser.tools import BrowserToolSetBuilder + +# Only when llm_configured = yes: +# from bridgic.llms.openai import OpenAILlm, OpenAIConfiguration + +from amphi import Amphi, AmphiContext, TASK_TOOLS + +LOG_DIR = Path(__file__).parent / "log" + +GOAL = """ + +""".strip() + + +async def main(): + # .env lives at PROJECT_ROOT (one level above this file's directory). + load_dotenv(Path(__file__).parent.parent / ".env") + + LOG_DIR.mkdir(exist_ok=True) + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", + handlers=[ + logging.FileHandler(LOG_DIR / "run.log"), + logging.StreamHandler(), + ], + ) + + # llm_configured = no: + llm = None + # llm_configured = yes: + # llm = OpenAILlm( + # api_key=os.getenv("LLM_API_KEY"), + # api_base=os.getenv("LLM_API_BASE"), + # configuration=OpenAIConfiguration( + # model=os.getenv("LLM_MODEL"), + # temperature=0.0, + # max_tokens=16384, + # ), + # timeout=180.0, + # ) + + # Launch parameters below mirror the exploration report's recorded values. + # Under Isolated mode also pass + # user_data_dir=Path(__file__).parent.parent / ".bridgic" / "browser" + # (i.e. {PROJECT_ROOT}/.bridgic/browser/). + async with Browser(headless=False) as browser: + builder = BrowserToolSetBuilder.for_tool_names( + browser, + "navigate_to", + # ...others based on exploration's Operation Sequence + strict=True, + ) + browser_tools = builder.build()["tool_specs"] + all_tools = [*browser_tools, *TASK_TOOLS] + + agent = Amphi(llm=llm, verbose=True) + # context carries `browser` (non-serializable) and `goal`; `tools` MUST + # go to arun() — context.tools is `CognitiveTools`, not a list. + await agent.arun( + context=AmphiContext(browser=browser, goal=GOAL), + tools=all_tools, + mode=RunMode.WORKFLOW, # or RunMode.AMPHIFLOW + ) + + +if __name__ == "__main__": + asyncio.run(main()) +``` diff --git a/domain-context/browser/config.md b/domain-context/browser/config.md new file mode 100644 index 0000000..bb06212 --- /dev/null +++ b/domain-context/browser/config.md @@ -0,0 +1,22 @@ +# Browser Domain — Pipeline Configuration + +Additional Phase 2 configuration questions for the browser domain. Ask after the generic Project Mode / LLM questions, using `AskUserQuestion`. + +## Browser Environment Mode + +Present the options as: + +> Choose browser environment: +> +> **1. Default** — Shared browser state across phases (login sessions carry over). +> +> **2. Isolated** — Each phase gets a clean browser profile, auto-cleaned after use. Ensures reproducible runs. +> +> Enter **1** or **2** (default: 1): + +Record the chosen **browser mode** — it must be forwarded to Phases 3, 4, and 5 as part of the auxiliary context passed to each sub-agent. Specifically: + +- **Isolated mode** → pass `user-data-dir = {PROJECT_ROOT}/.bridgic/browser/` to every sub-agent that launches a browser. +- **Default mode** → omit `user-data-dir`; the browser uses its default profile and shared state. + +Do **not** perform a final summary confirmation here — the caller (Phase 2 in `commands/build.md`) owns the single end-of-Phase-2 summary and will include `browser mode` in it. diff --git a/domain-context/browser/explore.md b/domain-context/browser/explore.md new file mode 100644 index 0000000..a8cdee7 --- /dev/null +++ b/domain-context/browser/explore.md @@ -0,0 +1,66 @@ +# Browser Domain — Phase 3 Exploration Context + +## Domain reference files to read + +- `{PLUGIN_ROOT}/skills/bridgic-browser/SKILL.md` — browser skill definitions and usage. + +## Setup protocol + +Install the skill into PROJECT_ROOT — the single shared uv env reused by Phase 4: + +```bash +bash {PLUGIN_ROOT}/skills/bridgic-browser/scripts/install-deps.sh {PROJECT_ROOT} +``` + +## Observation protocol + +Pick the call form by **command kind**: + +| Command | How to invoke | +|---|---| +| State-mutating **CLI action** (`open`, `click`, `wait`, …) | `bash {PLUGIN_ROOT}/domain-context/browser/script/browser-observe.sh [--wait ] -- ` — runs the action, waits, then prints `=== ACTION ===` / `=== POST-ACTION TABS ===` / `=== POST-ACTION SNAPSHOT ===`. | +| Observation (`snapshot`, `tabs`) or lifecycle (`close`) | `uv run bridgic-browser ` **directly**. | + +`--wait`: navigation / content-loading click **3–5s**; dropdown / text input **1–2s**; otherwise omit. + +**Hard rules:** + +1. **The wrapper REFUSES `snapshot`, `tabs`, `close`** — they are not actions; wrapping self-includes or runs on a dead browser. `bash browser-observe.sh -- tabs` fails with `refusing to wrap ''` and burns a turn. Always call them via `uv run bridgic-browser ` directly. +2. **Do not re-fetch `snapshot` / `tabs` after each action** — the wrapper already printed both. Re-fetching is the most common waste pattern. Direct calls are reserved for genuinely insufficient wrapper output (snapshot truncated, late render, tab-focus confirmation). + +Snapshot output (wrapper or direct) is either inline (minimal) or a file path (substantial — grep or read it). + +## Ref classification — STABLE vs VOLATILE + +Browser refs are **deterministic per element**: the same DOM element on the same page yields the same ref string across snapshots and runs (until that page navigates or its DOM is mutated). Use this property to classify every ref recorded in the operation sequence — the code agent will hardcode STABLE refs as constants and only write extraction helpers for VOLATILE ones. + +| Class | Ref behaviour | Typical examples | What to record | +|---|---|---|---| +| **STABLE** | Same element on the same page reload → same ref | Header / sidebar buttons, fixed search/filter controls, pagination Next, persistent dropdowns, top-level tabs | The literal hex ref value, e.g. `# ref=5dc3463e STABLE` | +| **VOLATILE** | Ref regenerates per page load, per row, or per session | List rows, grid cells, items inside a re-fetched feed, dynamically rendered cards, popover/portal items mounted on demand | Tag the **shape**, not the value: `# row refs VOLATILE`. Save the snapshot artifact (see "Save Key Artifacts" in the framework rules) so the code agent can write a parser against real text. | + +Decision rule: a ref is STABLE only if you have **observed it twice** — once in the initial snapshot and once after at least one page reload or unrelated state change — and the value matched. If you have not double-checked, default to VOLATILE; over-tagging STABLE causes runtime breakage when the assumption fails. STABLE is the privileged case (literal value travels into code), VOLATILE is the safe default. + +When recording a STABLE ref, copy the **exact** hex string from the snapshot — do not abbreviate, do not paraphrase, do not try to "name" the element instead. The code agent reads this value verbatim. + +## Browser launch parameters + +Record the **full browser launch parameters** used in this phase (headless, channel, args, viewport, etc., **excluding `user-data-dir`**) into the exploration report. Phase 4 must mirror these values in `main.py` so runtime behavior matches what was observed. + +Parameter Setting Guide: +1. If the task requires login, please launch the browser in non-headless mode to facilitate authentication. + +## Browser environment mode + +The auxiliary context will include a **browser mode** value (`Default` or `Isolated`): + +- **Isolated** → use `user-data-dir = {PROJECT_ROOT}/.bridgic/browser/`. Create this directory before launching the browser, and **delete the entire `{PROJECT_ROOT}/.bridgic/browser/` directory** after exploration is complete and resources are cleaned up, so subsequent phases start with a clean browser state. +- **Default** → omit `user-data-dir`; the browser uses its default profile. + +## Cleanup protocol + +Run once at the end of exploration to release all browser processes started by `bridgic-browser`: + +```bash +uv run bridgic-browser close +``` diff --git a/domain-context/browser/intent.md b/domain-context/browser/intent.md new file mode 100644 index 0000000..fea1025 --- /dev/null +++ b/domain-context/browser/intent.md @@ -0,0 +1,13 @@ +# Browser Domain — Intent + +Match this domain when the task is **browser automation**: driving a real web browser to navigate pages, click elements, fill forms, scrape DOM/a11y content, manage tabs/sessions, or interact with web UIs. + +**Strong signals** (any one is enough): +- Mentions URLs, websites, web pages, login flows, forms, buttons, search results, pagination, listings. +- Mentions browsers/Chromium/Chrome/Playwright, headless mode, cookies/sessions, CAPTCHAs. +- Goal requires "open ... in a browser", "navigate to ...", "click ...", "extract ... from a site", "log in to ...", "scrape ...". + +**Negative signals** (do NOT match): +- Pure HTTP/REST API calls without a browser. +- File-system, database, or local CLI orchestration with no web UI. +- LLM-only text processing. diff --git a/domain-context/browser/script/browser-observe.sh b/domain-context/browser/script/browser-observe.sh new file mode 100755 index 0000000..55774db --- /dev/null +++ b/domain-context/browser/script/browser-observe.sh @@ -0,0 +1,89 @@ +#!/bin/bash +# browser-observe.sh — Run one bridgic-browser action, optionally wait for the +# UI to settle, then capture tabs + snapshot in a single tool result. Bundles +# act-and-observe into one shell call so the explore agent halves its per- +# iteration tool turns. +# +# Usage: +# bash browser-observe.sh [--wait ] -- +# (the '--' separator may be omitted when the first action arg does not +# start with '-') +# +# Examples: +# browser-observe.sh --wait 3 -- navigate_to --url https://example.com +# browser-observe.sh -- click_element_by_ref --ref 5dc3463e +# browser-observe.sh --wait 0.5 input_text_by_ref --ref a9cca048 --text foo +# +# Refused (call directly with `uv run bridgic-browser ` instead): +# snapshot, tabs already observations — wrapping would self-include +# close no useful post-state on a closed browser +# +# Output (plain stdout, three labelled sections): +# === ACTION === wrapped command's stdout +# === POST-ACTION TABS === bridgic-browser tabs +# === POST-ACTION SNAPSHOT === bridgic-browser snapshot +# +# Exit code mirrors the wrapped action's exit code. Failures in the trailing +# tabs/snapshot print "(... failed)" inline but do not change the exit code, +# so the agent can still read the action result and decide what to do. + +set -u + +WAIT=0 + +while [[ $# -gt 0 ]]; do + case "$1" in + --wait) + shift + [[ $# -gt 0 ]] || { echo "browser-observe.sh: --wait requires a value" >&2; exit 64; } + WAIT="$1" + shift + ;; + --) + shift + break + ;; + -h|--help) + sed -n '2,30p' "$0" + exit 0 + ;; + --*) + echo "browser-observe.sh: unknown flag '$1' (forgot '--' before bridgic-browser args?)" >&2 + exit 64 + ;; + *) + break + ;; + esac +done + +if [[ $# -eq 0 ]]; then + echo "browser-observe.sh: no bridgic-browser arguments provided" >&2 + exit 64 +fi + +case "$1" in + snapshot|tabs|close) + echo "browser-observe.sh: refusing to wrap '$1' — call 'uv run bridgic-browser $1' directly." >&2 + exit 64 + ;; +esac + +echo "=== ACTION ===" +uv run bridgic-browser "$@" +ACTION_EXIT=$? + +case "$WAIT" in + 0|0.0|0.00) ;; + *) sleep "$WAIT" ;; +esac + +echo "" +echo "=== POST-ACTION TABS ===" +uv run bridgic-browser tabs || echo "(tabs failed)" + +echo "" +echo "=== POST-ACTION SNAPSHOT ===" +uv run bridgic-browser snapshot || echo "(snapshot failed)" + +exit $ACTION_EXIT diff --git a/domain-context/browser/verify.md b/domain-context/browser/verify.md new file mode 100644 index 0000000..9e72016 --- /dev/null +++ b/domain-context/browser/verify.md @@ -0,0 +1,16 @@ +# Browser Domain — Phase 5 Verification Context + +## Launch-parameter parity (Default mode) + +If **browser mode = Default**, verify that the generated `main.py`'s browser launch parameters (headless, channel, args, viewport, etc.) match those recorded in the exploration report from Phase 3. Mismatches under Default mode break shared-state assumptions (the runtime browser may not see the cookies / login session captured during exploration) and must be fixed before declaring success. + +## Isolated-mode `user-data-dir` override + +If **browser mode = Isolated**, the auxiliary context will include `user-data-dir = {PROJECT_ROOT}/.bridgic/browser/`. The agent must: + +1. **Override** `user_data_dir` in the debug-instrumented code to this exact path so verification runs in the same isolated profile chain. +2. After verification is complete and all resources are cleaned up, **delete the entire `{PROJECT_ROOT}/.bridgic/browser/` directory** to leave a clean state for the next run. + +## Cross-check `on_workflow` against the exploration report + +Treat the report's "Operation Sequence" as the source of truth. Any numbered step (or sub-step) missing from `on_workflow` is a bug — fix it, do not work around it. diff --git a/examples/build-browser-code-patterns.md b/examples/build-browser-code-patterns.md deleted file mode 100644 index a666c77..0000000 --- a/examples/build-browser-code-patterns.md +++ /dev/null @@ -1,304 +0,0 @@ -# Code Patterns for Build Browser - -## Table of Contents - -- [Browser tools via BrowserToolSetBuilder](#browser-tools-via-browsertoolsetbuilder) -- [Task-specific tools](#task-specific-tools) -- [Hooks: observation, after_action, before_action](#hooks-observation-after_action-before_action) -- [Helpers: extracting data from ctx.observation](#helpers-extracting-data-from-ctxobservation) -- [on_workflow patterns](#on_workflow-patterns) -- [on_agent pattern](#on_agent-pattern) -- [config: Project config](#config-project-config) -- [main: Browser lifecycle and tool assembly](#main-browser-lifecycle-and-tool-assembly) - ---- - -## Browser Tools via BrowserToolSetBuilder - -The bridgic-browser SDK provides `BrowserToolSetBuilder` to create `FunctionToolSpec` objects directly compatible with bridgic-amphibious. Tool names match SDK method names. - -### Select by tool names (preferred) - -Pick only the SDK methods that correspond to CLI commands used during exploration: - -```python -from bridgic.browser.tools import BrowserToolSetBuilder - -builder = BrowserToolSetBuilder.for_tool_names( - browser, - "navigate_to", - "get_snapshot_text", - "click_element_by_ref", - "input_text_by_ref", - "select_dropdown_option_by_ref", - "wait_for", - strict=True, -) -browser_tools = builder.build()["tool_specs"] -``` - -### Select by category - -```python -from bridgic.browser.tools import BrowserToolSetBuilder, ToolCategory - -builder = BrowserToolSetBuilder.for_categories( - browser, - ToolCategory.NAVIGATION, - ToolCategory.ELEMENT_INTERACTION, -) -browser_tools = builder.build()["tool_specs"] -``` - ---- - -## Task-Specific Tools - -For operations beyond browser interaction (saving data, computing results, human intervention), write standard async tools registered via `FunctionToolSpec.from_raw()`: - -```python -from bridgic.core.agentic.tool_specs import FunctionToolSpec - -async def save_record(item_id: str, title: str, detail: str) -> str: - """Save an extracted record. - - Parameters - ---------- - item_id : str - Unique identifier. - title : str - Item title. - detail : str - Extracted content. - """ - # Replace with actual persistence - ... -``` - ---- - -## Hooks: observation, after_action, before_action - -### observation — live browser state before each step - -Called automatically **before** each `yield` in `on_workflow` and each OTC cycle. Returns the current information for `ctx.observation`. Including tabs and snapshot. - -```python -async def observation(self, ctx) -> Optional[str]: - if ctx.browser is None: - return "No browser available." - - parts = [] - tabs = await ctx.browser.get_tabs() - if tabs: - parts.append(f"[Open tabs]\n{tabs}") - snapshot = await ctx.browser.get_snapshot_text(limit=1000000) - if snapshot: - parts.append(f"[Snapshot]\n{snapshot}") - return "\n\n".join(parts) if parts else "No page loaded." -``` - -### before_action — track state and sanitize - -Use for state tracking and argument sanitization. Receives `List[Tuple[ToolCall, ToolSpec]]`: - -```python -async def before_action(self, decision_result, ctx): - for tool_call, _ in decision_result: - name = tool_call.name - args = tool_call.arguments - - ... # Some logic - - return decision_result -``` - -### after_action — mandatory override for observation refresh - -Called automatically after each tool call. Refresh to get the newest page state after `wait_for` completes. This is critical for browser projects — without it, the agent sees stale page state between a `wait_for` yield and the next yield. - -```python -async def after_action(self, step_result, ctx): - action_result = step_result.result - if hasattr(action_result, "results"): - for step in action_result.results: - if step.tool_name == "wait_for" and step.success: - ctx.observation = await self.observation(ctx) - break -``` - -Additional optional uses (add alongside the mandatory refresh as needed): -- **Result accumulation**: aggregate tool call results -- **Logging / notifications**: log step outcomes for debugging -- **Conditional recovery**: detect failure patterns in `step_result` and set flags for `on_workflow` - ---- - -## Helpers: Extracting Data from ctx.observation - -Helpers are **only needed for dynamic extraction** — NOT for deterministic steps. They parse the accessibility tree text in `ctx.observation` and must be written based on the actual a11y tree structure observed. - -### Common pattern: extract tab info - -```python -def find_active_tab(observation: str) -> Optional[str]: - """Find the active tab's page_id.""" - if not observation: - return None - match = re.search(r'(page_\d+)\s*\(active\)', observation) - return match.group(1) if match else None - - -def extract_ref(observation: str) -> Optional[str]: - ... -``` - ---- - -## on_workflow Patterns - -### Deterministic steps - -For deterministic steps where the target elements are stable (e.g., navigation buttons, filters, search controls), use the ref values directly from CLI exploration: - -```python -async def on_workflow(self, ctx): - # Every browser action is followed by wait_for with a duration tuned to the action type - yield ActionCall("navigate_to", description="Open target page", url=ctx.target_url) - yield ActionCall("wait_for", description="Wait for page to load", time_seconds=4) - - yield ActionCall("click_element_by_ref", description="Click status dropdown", ref="063b563b") - yield ActionCall("wait_for", description="Wait for dropdown to open", time_seconds=1) - - yield ActionCall("click_element_by_ref", description="Select 'Pending' option", ref="05d0a863") - yield ActionCall("wait_for", description="Wait for selection to apply", time_seconds=1) - - yield ActionCall("click_element_by_ref", description="Click search button", ref="4084c4ad") - yield ActionCall("wait_for", description="Wait for results to load", time_seconds=3) -``` - -### Dynamic steps - -For dynamic steps where the target elements are volatile (list items, search results, paginated rows), extract identifiers from `ctx.observation` at runtime using helpers: - -```python -async def on_workflow(self, ctx): - # ... deterministic steps above ... - - # Dynamic: process each item from the current page - items = extract_items(ctx.observation) # helper from helpers.py - for item in items: - yield ActionCall("navigate_to", description=f"Open detail: {item['title']}", url=item["url"]) - yield ActionCall("wait_for", description="Wait for detail page", time_seconds=3) - - # ctx.observation is refreshed by the observation() hook before each yield - detail = extract_detail(ctx.observation) # helper from helpers.py - yield ActionCall("save_record", description="Save extracted data", **detail) - - yield ActionCall("go_back", description="Return to list page") - yield ActionCall("wait_for", description="Wait for list reload", time_seconds=2) -``` - -Key differences from deterministic steps: -- Refs and identifiers come from `ctx.observation`, not hardcoded -- Extraction functions in `helpers.py` parse the live accessibility tree -- `observation()` hook keeps `ctx.observation` fresh before each `yield` - ---- - -## on_agent Pattern - -Full LLM-driven. The LLM sees `ctx.observation` (live page state) and decides what tool to call each round: - -```python -from bridgic.amphibious import ( - AmphibiousAutoma, CognitiveWorker, think_unit, ErrorStrategy, -) - -class MyAgent(AmphibiousAutoma[MyContext]): - main_think = think_unit( - CognitiveWorker.inline( - "You are a browser automation executor. Executing exactly one action per round.\n\n" - "# Critical Rules\n" - "1. **ONE ACTION AT A TIME**: Your step_content and tool call MUST describe the same " - "single action.\n" - "2. **COMPLETION**: Set finish=True ONLY when ALL steps of the current phase are fully " - "accomplished and verified.\n\n" - "**Respond with a JSON object.**" - ), - max_attempts=50, - on_error=ErrorStrategy.RAISE, - ) - - async def on_agent(self, ctx): - await self.main_think -``` - ---- - -## config: Project config - -The LLM config or other variables - -```python -import os - -from dotenv import load_dotenv - -load_dotenv() - -LLM_API_BASE = os.getenv("LLM_API_BASE") -LLM_API_KEY = os.getenv("LLM_API_KEY") -LLM_MODEL = os.getenv("LLM_MODEL") - -``` - ---- - -## main: Browser Lifecycle and Tool Assembly - -The `Browser` instance is created in main.py, stored in context, and passed to `BrowserToolSetBuilder`: - -```python -import asyncio -from bridgic.llms.openai import OpenAILlm, OpenAIConfiguration -from bridgic.browser.session import Browser -from bridgic.browser.tools import BrowserToolSetBuilder - -from config import LLM_API_KEY, LLM_API_BASE, LLM_MODEL - -async def main(): - llm = OpenAILlm( - api_key=LLM_API_KEY, - api_base=LLM_API_BASE, - configuration=OpenAIConfiguration( - model=LLM_MODEL, - temperature=0.0, - max_tokens=16384, - ), - timeout=180 - ) - - async with Browser(headless=False) as browser: - # Browser tools from SDK - builder = BrowserToolSetBuilder.for_tool_names( - browser, - "navigate_to", "get_snapshot_text", "click_element_by_ref", - "input_text_by_ref", "switch_tab", "close_tab", "wait_for", - strict=True, - ) - browser_tools = builder.build()["tool_specs"] - all_tools = [*browser_tools, *get_task_tools()] - - goal = "..." # From task.md - - agent = MyAgent(llm=llm, verbose=True) - await agent.arun( - goal=goal, - browser=browser, - tools=all_tools - ) - -if __name__ == "__main__": - asyncio.run(main()) -``` diff --git a/examples/build-browser-task-template.md b/examples/build-browser-task-template.md deleted file mode 100644 index 57b0f5b..0000000 --- a/examples/build-browser-task-template.md +++ /dev/null @@ -1,29 +0,0 @@ -# Browser Automation Task - - -## Task Description - - - - -## Expected Output - - - - -## Notes (optional) - - - diff --git a/scripts/hook/inject-command-paths.sh b/scripts/hook/inject-command-paths.sh index b2c3e49..a6d5293 100755 --- a/scripts/hook/inject-command-paths.sh +++ b/scripts/hook/inject-command-paths.sh @@ -3,7 +3,7 @@ # # A bridgic command can reach Claude via two independent paths: # -# (1) User types `/AmphiLoop:build-browser ...` directly. +# (1) User types `/AmphiLoop:build ...` directly. # Claude Code expands the command inline as a tag in the # user turn. No Skill tool call happens — per the Skill tool's own rule, # "if you see a tag the skill has ALREADY been loaded, @@ -12,7 +12,7 @@ # for a `/command` token matching one of our commands/*.md files. # # (2) Claude auto-matches the user's natural-language task to a registered -# skill and invokes `Skill("build-browser")` itself. +# skill and invokes `Skill("build")` itself. # → Hook point: PreToolUse with matcher "Skill". Detect by reading # `tool_input.skill` from the stdin JSON. # @@ -28,7 +28,7 @@ ROOT="${CLAUDE_PLUGIN_ROOT:-}" # Authoritative plugin name. MUST match .claude-plugin/plugin.json `name`. # Used to reject cross-plugin invocations that happen to share a bare command -# name with us (e.g. another plugin exposing `OtherPlugin:build-browser`). +# name with us (e.g. another plugin exposing `OtherPlugin:build`). # Without this gate, the hook would run globally and pollute unrelated flows. PLUGIN_NAME="AmphiLoop" @@ -53,7 +53,7 @@ case "$HOOK_EVENT" in ;; esac -# Resolve BARE_NAME (the unqualified command name, e.g. "build-browser") from +# Resolve BARE_NAME (the unqualified command name, e.g. "build") from # whichever input shape applies to the current event. BARE_NAME="" @@ -80,9 +80,9 @@ else # UserPromptSubmit: scan commands/ and look for a `/command` or # `/AmphiLoop:command` token in the `"prompt":"..."` JSON field. We don't # fully parse the prompt string — we just require the slash-command form to - # appear so natural-language prompts mentioning `/build-browser` in passing + # appear so natural-language prompts mentioning `/build` in passing # don't false-positive, AND we require any namespace prefix to be exactly - # `AmphiLoop:` so `/OtherPlugin:build-browser` is rejected. + # `AmphiLoop:` so `/OtherPlugin:build` is rejected. if [ -d "$ROOT/commands" ]; then for f in "$ROOT"/commands/*.md; do [ -f "$f" ] || continue @@ -146,5 +146,5 @@ PWD_ESC=$(json_escape "$PWD") # newlines when rendering the additionalContext value back into the context. # permissionDecision is deliberately omitted so the PreToolUse branch only # adds context and does not override the permission flow of other hooks. -printf '{"hookSpecificOutput":{"hookEventName":"%s","additionalContext":"---\\nPLUGIN_ROOT=%s\\nPROJECT_ROOT=%s\\nUse these as path prefixes: {PLUGIN_ROOT}/scripts/..., {PLUGIN_ROOT}/skills/..., {PLUGIN_ROOT}/examples/..., {PROJECT_ROOT}/.bridgic/...\\n---"}}' "$HOOK_EVENT" "$ROOT_ESC" "$PWD_ESC" +printf '{"hookSpecificOutput":{"hookEventName":"%s","additionalContext":"---\\nPLUGIN_ROOT=%s\\nPROJECT_ROOT=%s\\nUse these as path prefixes: {PLUGIN_ROOT}/scripts/..., {PLUGIN_ROOT}/skills/..., {PLUGIN_ROOT}/templates/..., {PROJECT_ROOT}/.bridgic/...\\n---"}}' "$HOOK_EVENT" "$ROOT_ESC" "$PWD_ESC" exit 0 diff --git a/scripts/maintenance/sync-skills.sh b/scripts/maintenance/sync-skills.sh index 7337193..74e7524 100755 --- a/scripts/maintenance/sync-skills.sh +++ b/scripts/maintenance/sync-skills.sh @@ -193,16 +193,14 @@ def fetch_description(repo): return '' rows = [ - '| Skill | Source Repo | Ref | Description |', - '|-------|------------|-----|-------------|', + '| Skill | Source Repo | Description |', + '|-------|------------|-------------|', ] for skill in c.sections(): repo = c[skill]['repo'] - ref = c[skill]['ref'] desc = fetch_description(repo) repo_link = f'[\`{repo}\`](https://github.com/{repo})' - ref_display = f'\`{ref}\`' - rows.append(f'| {skill} | {repo_link} | {ref_display} | {desc} |') + rows.append(f'| {skill} | {repo_link} | {desc} |') print('\n'.join(rows)) " "$MANIFEST") diff --git a/scripts/run/monitor.sh b/scripts/run/monitor.sh index 53203bf..5281633 100755 --- a/scripts/run/monitor.sh +++ b/scripts/run/monitor.sh @@ -1,34 +1,94 @@ #!/bin/bash -# monitor.sh — Lightweight process monitor for amphibious-verify agent. +# monitor.sh — Run-and-monitor for amphibious-verify agent. # -# Polls a running program's PID and log file. Returns control to the -# calling agent only when an actionable event occurs, keeping LLM -# inference costs at zero during normal execution. +# On first call: launches `uv run python main.py` in , captures the +# PID, then enters the monitor loop. On subsequent calls (e.g., after the agent +# handled a human-intervention pause): detects the existing PID and resumes +# monitoring without restarting the program. +# +# Returns control to the caller only when an actionable event occurs, keeping +# LLM inference cost at zero during normal execution. # # Usage: -# monitor.sh [TIMEOUT_SECONDS] +# monitor.sh [TIMEOUT_SECONDS] +# +# The script owns all runtime artifacts under /.bridgic/verify/ +# (PROJECT_ROOT = parent of WORK_DIR — the generator project lives at +# //), so verify state sits next to build_context.md +# and explore/ instead of polluting the generator project: +# run.log — captured stdout/stderr of the launched program +# pid — PID of the running program (removed on exit) +# human_request.json — written by the program when it needs human input +# human_response.json — written by the agent to answer a human request +# +# Every exit prints the resolved paths to stdout so the caller never has to +# guess where files live. # # Exit codes: # 0 Program finished successfully (no errors in log) # 1 Program finished with errors (traceback/ERROR in log) # 2 Human intervention required (human_request.json appeared) # 3 Timeout — program exceeded allowed runtime -# -# On exit, the last N lines of the log are printed to stdout for -# the agent to read without needing a separate file read. set -euo pipefail -PID="${1:?Usage: monitor.sh [TIMEOUT]}" -LOG_FILE="${2:?Usage: monitor.sh [TIMEOUT]}" -VERIFY_DIR="${3:?Usage: monitor.sh [TIMEOUT]}" +WORK_DIR="${1:?Usage: monitor.sh [TIMEOUT]}" MAX_TIMEOUT=300 -TIMEOUT="${4:-300}" +TIMEOUT="${2:-300}" if [ "$TIMEOUT" -gt "$MAX_TIMEOUT" ]; then TIMEOUT="$MAX_TIMEOUT" fi +# Derived paths — caller should never need to know these. +# Verify artifacts live under PROJECT_ROOT (= parent of WORK_DIR), not under +# the generator project itself. +PROJECT_ROOT="$(dirname "${WORK_DIR%/}")" +VERIFY_DIR="${PROJECT_ROOT}/.bridgic/verify" +LOG_FILE="${VERIFY_DIR}/run.log" +PID_FILE="${VERIFY_DIR}/pid" + POLL_INTERVAL=3 +mkdir -p "$VERIFY_DIR" + +print_paths() { + echo "--- Paths ---" + echo "work_dir: $WORK_DIR" + echo "verify_dir: $VERIFY_DIR" + echo "log_file: $LOG_FILE" + echo "human_request: ${VERIFY_DIR}/human_request.json" + echo "human_response: ${VERIFY_DIR}/human_response.json" +} + +# Recursively terminate $1 and all of its descendants. +kill_tree() { + local parent=$1 + for child in $(pgrep -P "$parent" 2>/dev/null); do + kill_tree "$child" + done + kill "$parent" 2>/dev/null || true +} + +# --- Determine PID: resume existing or start fresh --- +PID="" +if [ -f "$PID_FILE" ]; then + EXISTING_PID=$(cat "$PID_FILE" 2>/dev/null || echo "") + if [ -n "$EXISTING_PID" ] && ps -p "$EXISTING_PID" > /dev/null 2>&1; then + PID="$EXISTING_PID" + echo "=== MONITOR: RESUMING PID=$PID ===" + else + rm -f "$PID_FILE" + fi +fi + +if [ -z "$PID" ]; then + rm -f "${VERIFY_DIR}/human_request.json" "${VERIFY_DIR}/human_response.json" + : > "$LOG_FILE" + nohup bash -c "cd '$WORK_DIR' && uv run python main.py" >> "$LOG_FILE" 2>&1 & + PID=$! + echo "$PID" > "$PID_FILE" + echo "=== MONITOR: STARTED PID=$PID ===" +fi + START_TIME=$(date +%s) while true; do @@ -36,31 +96,41 @@ while true; do NOW=$(date +%s) ELAPSED=$(( NOW - START_TIME )) if [ "$ELAPSED" -ge "$TIMEOUT" ]; then + kill_tree "$PID" + rm -f "$PID_FILE" echo "=== MONITOR: TIMEOUT after ${TIMEOUT}s ===" + print_paths echo "--- Last 30 lines of log ---" tail -30 "$LOG_FILE" 2>/dev/null || echo "(log file not found)" exit 3 fi # --- Human intervention check --- + # Delete the request file before exiting so a quick re-invoke doesn't + # re-trip on the stale request before the program consumes the response. if [ -f "${VERIFY_DIR}/human_request.json" ]; then echo "=== MONITOR: HUMAN_INTERVENTION_REQUIRED ===" + print_paths + echo "--- human_request.json ---" cat "${VERIFY_DIR}/human_request.json" + rm -f "${VERIFY_DIR}/human_request.json" exit 2 fi # --- Process liveness check --- if ! ps -p "$PID" > /dev/null 2>&1; then - # Process ended — check log for errors + rm -f "$PID_FILE" if grep -qE "Traceback|ERROR|Exception" "$LOG_FILE" 2>/dev/null; then echo "=== MONITOR: PROGRAM_ERROR ===" + print_paths echo "--- Last 50 lines of log ---" tail -50 "$LOG_FILE" 2>/dev/null || echo "(log file not found)" exit 1 else echo "=== MONITOR: PROGRAM_FINISHED ===" - echo "--- Last 30 lines of log ---" - tail -30 "$LOG_FILE" 2>/dev/null || echo "(log file not found)" + print_paths + echo "--- Last 10 lines of log ---" + tail -10 "$LOG_FILE" 2>/dev/null || echo "(log file not found)" exit 0 fi fi diff --git a/scripts/run/setup-env.sh b/scripts/run/setup-env.sh index 4bd8c45..9671ec1 100755 --- a/scripts/run/setup-env.sh +++ b/scripts/run/setup-env.sh @@ -1,8 +1,16 @@ #!/bin/bash -# setup-env.sh — Ensure uv is installed and initialize a uv project. +# setup-env.sh — Ensure uv is available and PROJECT_ROOT is a uv project. # -# 1. Checks if uv is on PATH. If not, auto-installs it (macOS/Linux/Windows). -# 2. Runs `uv init --bare` in the working directory if pyproject.toml is absent. +# 1. Checks uv is on PATH; auto-installs it on macOS/Linux/Windows if missing. +# 2. Runs `uv init --bare` in PROJECT_DIR if no pyproject.toml is present. +# 3. Prints an ENV_READY block followed by the verbatim pyproject.toml so +# callers see exactly which packages and dependencies the shared uv env +# currently has. +# +# After this script exits 0, PROJECT_DIR is a uv project (pyproject.toml + +# .venv ready to grow). Per-skill `install-deps.sh` scripts and the +# amphibious-code agent then `uv add` their packages into this same env — +# the project-level uv env is shared across all later phases. # # Usage: # setup-env.sh [PROJECT_DIR] (defaults to current directory) @@ -50,7 +58,7 @@ fi echo "uv: $(uv --version 2>&1)" # ────────────────────────────────────────────── -# 2. Initialize uv project (bare — no main.py, README, etc.) +# 2. Initialize uv project (bare — no main.py / README scaffolding) # ────────────────────────────────────────────── if [ ! -f pyproject.toml ]; then echo "No pyproject.toml found — running uv init --bare ..." @@ -64,3 +72,5 @@ echo "" echo "=== ENV_READY ===" echo "uv: $(uv --version 2>&1)" echo "project_dir: $(pwd)" +echo "--- pyproject.toml ---" +cat pyproject.toml diff --git a/skills/bridgic-amphibious/SKILL.md b/skills/bridgic-amphibious/SKILL.md index dc8da1a..d1a22a4 100644 --- a/skills/bridgic-amphibious/SKILL.md +++ b/skills/bridgic-amphibious/SKILL.md @@ -13,7 +13,7 @@ A bridgic-amphibious project requires the following packages: | Package | Description | |---------|-------------| -| `bridgic-core` | Core framework (Worker, Automa, GraphAutoma, ASL) | +| `bridgic-core` | Core framework (Worker, Automa, GraphAutoma) | | `bridgic-amphibious` | Dual-mode agent framework | | `bridgic-llms-openai` | LLM provider (only required for `AGENT` / `AMPHIFLOW` modes) | | `python-dotenv` | `.env` file loading | @@ -74,12 +74,12 @@ result = await agent.arun( Use the CLI to bootstrap a new project: ```bash -bridgic-amphibious create -n my_project -bridgic-amphibious create -n my_project --task "Navigate to example.com and extract data" -bridgic-amphibious create -n my_project --base-dir /path/to/projects +bridgic-amphibious create +bridgic-amphibious create --task "Navigate to example.com and extract data" +bridgic-amphibious create --base-dir /path/to/project ``` -Creates: `task.md`, `config.py`, `tools.py`, `workers.py`, `agents.py`, `skills/`, `result/`, `log/`. +Creates a single `amphi.py` in the target directory (default: cwd). The template includes a custom `CognitiveContext` subclass, an `AmphibiousAutoma` subclass with a `think_unit` declaration, and stubs for both `on_agent` and `on_workflow`. Runtime concerns (LLM credentials, entry script) are intentionally left to the caller. ## Core Concepts diff --git a/skills/bridgic-amphibious/references/api-reference.md b/skills/bridgic-amphibious/references/api-reference.md index 694bf9c..c03faa2 100644 --- a/skills/bridgic-amphibious/references/api-reference.md +++ b/skills/bridgic-amphibious/references/api-reference.md @@ -86,30 +86,24 @@ from bridgic.core.model.types import Message Bootstrap a new amphibious project: ```bash -bridgic-amphibious create -n [--base-dir ] [--task ] +bridgic-amphibious create [--base-dir ] [--task ] ``` | Flag | Default | Description | |------|---------|-------------| -| `-n, --name` | (required) | Project directory name | -| `--base-dir` | Current directory | Parent directory for the project | -| `--task` | "Describe your task here." | Initial task description for `task.md` | +| `--base-dir` | Current directory | Target directory for the generated file | +| `--task` | (omitted) | Injected as a top-level `# Task: ...` comment in `amphi.py` | -Generated structure: +Generated file: ``` -/ -├── task.md # Task description (input) -├── config.py # LLM configuration (API base, key, model) -├── tools/ # Tool definitions -│ └── __init__.py -├── workers.py # Context and data models (ProjectContext) -├── agents.py # AmphibiousAutoma subclass (TODO template) -├── skills/ # Amphibious skills -├── result/ # Trace and analysis results -└── log/ # Runtime logs +amphi.py # AmphibiousAutoma stub: AmphiContext + Amphi class with think_unit, on_agent, on_workflow ``` +The scaffold writes only this single file in the target directory. It does not create subdirectories and does not emit runtime configuration (e.g. `.env`) — those concerns belong to the caller's environment, not the scaffold. + +Python API: `create_project(base_dir: Optional[str] = None, task: Optional[str] = None) -> Path`. Raises `FileExistsError` if `amphi.py` already exists in the target directory. + ## AmphibiousAutoma ```python diff --git a/skills/bridgic-amphibious/references/architecture.md b/skills/bridgic-amphibious/references/architecture.md index 291e75d..56b3769 100644 --- a/skills/bridgic-amphibious/references/architecture.md +++ b/skills/bridgic-amphibious/references/architecture.md @@ -186,12 +186,22 @@ async with self.snapshot(goal="Sub-task A"): ## Workflow Fallback Mechanism -In AMPHIFLOW mode: +Two distinct failure sources are handled in AMPHIFLOW mode: -1. Deterministic step fails → check `consecutive_failures < max_consecutive_fallbacks` -2. If within limit: agent fixes the specific step (scoped goal via `snapshot`) -3. If exceeded: abandon workflow → call `on_agent()` for full agent mode -4. `AgentCall` yield explicitly delegates a sub-task to agent mode (with a clean context snapshot) +**ActionCall tool failure** (a yielded tool raises during execution): + +1. Step fails → check `consecutive_failures < max_consecutive_fallbacks` +2. Within limit: agent fixes the specific step (scoped goal via `snapshot`); generator resumes +3. Limit exceeded: abandon workflow → call `on_agent()` for full agent mode + +**Generator-internal exception** (helper / inline logic between yields raises): + +- The generator is unrecoverable after a raise — `asend()` cannot resume it — so step-level fallback is impossible. The framework jumps directly to full fallback: `on_agent(ctx)` takes over the remaining task. +- Pure WORKFLOW mode (`will_fallback=False`): the original exception is re-raised — no fallback. +- AMPHIFLOW with `on_agent` overridden: hand off to `on_agent(ctx)`. +- AMPHIFLOW forced via `mode=` without an `on_agent` override: a `RuntimeError` is raised, tagged with the failing step index. + +`AgentCall` yield is orthogonal to fallback — it explicitly delegates a sub-task to agent mode (with a clean context snapshot) regardless of failure state. ## Human-in-the-Loop diff --git a/skills/manifest.ini b/skills/manifest.ini index 1a58d3d..2087968 100644 --- a/skills/manifest.ini +++ b/skills/manifest.ini @@ -16,7 +16,7 @@ path = skills/bridgic-browser [bridgic-amphibious] repo = bitsky-tech/bridgic -ref = v0.4.0 +ref = v0.4.1 path = skills/bridgic-amphibious [bridgic-llms] diff --git a/templates/build-task-template.md b/templates/build-task-template.md new file mode 100644 index 0000000..53ca540 --- /dev/null +++ b/templates/build-task-template.md @@ -0,0 +1,52 @@ +# Amphibious Task + + +## Task Description + + + + +## Expected Output + + + + +## Domain References + + + + +## Notes + + + +