diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 53dc20c..3ef507c 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -6,7 +6,9 @@ "./commands/" ], "agents": [ - "./agents/amphibious-code.md" + "./agents/amphibious-explore.md", + "./agents/amphibious-code.md", + "./agents/amphibious-verify.md" ], "skills": [] } diff --git a/.gitignore b/.gitignore index 7a7e2df..362627e 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,8 @@ __pycache__/ .env # Node (if npx skills is used locally) -node_modules/ \ No newline at end of file +node_modules/ + + +# superpowers +docs/superpowers/ \ No newline at end of file diff --git a/CLAUDE.md b/CLAUDE.md index 89a5492..2078de3 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -79,3 +79,18 @@ claude plugin install AmphiLoop | Command | When to Use | |---------|-------------| | **/build** | Unified entry point. Turn any task into a working bridgic-amphibious project. Accepts an optional domain flag (`/build --browser`) to inject pre-distilled context from `domain-context//`. Without a flag, auto-detects the domain from `TASK.md` (or falls back to a generic flow). Users may additionally supply their own domain references in `TASK.md`. | + +## OpenClaw Integration + +The AmphiLoop repository **is** an OpenClaw native plugin. Installing it (`openclaw plugins install --link`) auto-registers a bundled skill that exposes `/amphiloop_build ""` in any OpenClaw chat surface. + +| Aspect | How it works | +|--------|--------------| +| **Plugin install** | `openclaw plugins install /abs/path/AmphiLoop-02 --link` then `openclaw gateway restart`. Setup + verification details in `extensions/openclaw-skill/README.md`. | +| **Native classification** | Three small files at repo root — `openclaw.plugin.json` (manifest), `package.json` (with `openclaw.extensions: ["./openclaw-entry.mjs"]`), and `openclaw-entry.mjs` (no-op entry) — make OpenClaw classify AmphiLoop as **native** (`Format: openclaw`) instead of falling back to Claude Code bundle detection from `.claude-plugin/plugin.json`. | +| **Bundled skill** | The plugin manifest declares `"skills": ["./extensions/openclaw-skill"]`; OpenClaw auto-discovers `amphiloop-build/SKILL.md` under that directory. | +| **Orchestration** | The OpenClaw host model drives Phases 2–3 (config + explore) directly, reading the methodology from `agents/amphibious-*.md` via the `{baseDir}/../..` path resolution. | +| **Code generation (host ↔ coding-agent)** | Host writes `/.amphiloop/AGENT_BRIEF.md` (lists the bridgic-* SKILL.md files the worker MUST read for correct API usage) + `/.amphiloop/TODOS.md` (task list). Then opens **one** long-lived OpenClaw `coding-agent` session and sends a tiny pointer prompt. Worker reads brief, reads TODOs, completes them, ticks `[ ]` to `[x]`. | +| **Verify-fix loop** | Phase 5 verify failures get **appended** to the same `TODOS.md` as new `[ ] FIX-N: ...` entries; host then sends a one-line "continue" to the same long-lived worker session. Up to 3 fix rounds. | +| **Worker choice** | The skill asks the user at run start which worker to dispatch to (`claude` recommended, plus `codex` / `opencode` / `pi`). One worker per run, reused throughout. | +| **Existing files** | All Claude Code-only artifacts (`hooks/`, `.claude-plugin/`, `commands/build.md`, `scripts/hook/`) remain in place. The new `package.json` + `openclaw-entry.mjs` + `openclaw.plugin.json` are the only repo-root additions; they coexist with the Claude Code manifest without conflict. | diff --git a/agents/amphibious-code.md b/agents/amphibious-code.md index e1fc912..a87bbef 100644 --- a/agents/amphibious-code.md +++ b/agents/amphibious-code.md @@ -37,7 +37,7 @@ Skill files (see Skill References below) and `## References` stay on-demand — ## Output Layout -The agent installs its runtime dependencies into PROJECT_ROOT's uv env (creating it if absent) and produces a code-only `/` subdirectory. The structure inside `/` may follow the pattern below: +The agent installs its runtime dependencies into PROJECT_ROOT's uv env (creating it if absent) and produces a code-only `/` subdirectory. The structure inside `/` **MUST match this layout exactly** — deviations break Phase 5 verify and downstream orchestration: ``` / @@ -45,14 +45,26 @@ The agent installs its runtime dependencies into PROJECT_ROOT's uv env (creating ├── uv.lock # resolution lockfile ├── .venv/ # uv-managed virtualenv ├── .env # only when llm_configured = yes -└── / # this agent's generator_project — code only +├── .bridgic/ # orchestrator workspace (build_context.md, explore/, verify/) — DO NOT write code here +└── / # this agent's generator_project — ALL generated code lives inside here ├── amphi.py # scaffold-created; this agent edits it ├── main.py # this agent creates: entry point (LLM init + agent.arun) ├── README.md # short, operational ├── log/ # runtime logs land here (configured in main.py) - └── result/ # task outputs land here + ├── result/ # task outputs land here + └── .py # any extra helpers/modules go here too — never at PROJECT_ROOT ``` +**`/` is the project's own directory, not a Python import package.** The entry points (`amphi.py`, `main.py`) and the support modules all live **inside** `/`. PROJECT_ROOT only carries uv metadata (`pyproject.toml`, `uv.lock`, `.venv/`, `.env`) and the orchestrator's `.bridgic/` workspace — never code. + +### Layout anti-patterns — never produce these + +- ❌ `amphi.py` / `main.py` placed at `/` (sibling of `pyproject.toml`) instead of inside `/`. The entry points must be reached as `//main.py`. +- ❌ Treating `/` as a Python import package (adding `__init__.py`, importing it from a sibling `main.py` at PROJECT_ROOT). `/` is the project root, not a package alongside other code. +- ❌ Writing project code (`*.py`, `log/`, `result/`) under `/.bridgic/`. That directory is the orchestrator's workspace and is exclusive to `build_context.md`, `explore/`, `verify/`. +- ❌ Creating `log/` or `result/` at PROJECT_ROOT instead of inside `/`. They must sit next to `main.py` so `Path(__file__).parent / "log"` resolves correctly. +- ❌ Splitting support modules (`config.py`, `tools.py`, helper files) out to PROJECT_ROOT. Any module imported by `amphi.py` or `main.py` must live inside `/`. + --- ## Phase 1: Initialize Project Skeleton @@ -73,11 +85,15 @@ bash "{PLUGIN_ROOT}/skills/bridgic-amphibious/scripts/install-deps.sh" \ ### 1.3 Scaffold `amphi.py` +`cd` into `/` **first**, then run the scaffolder. The cwd at the moment of `bridgic-amphibious create` is what determines where `amphi.py` lands — running it from `/` will drop the file at PROJECT_ROOT and violate the Output Layout. + ```bash cd "/" uv run bridgic-amphibious create --task "" ``` +After the command returns, verify with `ls "//amphi.py"` — if `amphi.py` is missing or sitting at `/amphi.py` instead, stop and move it before continuing. + ### 1.4 Create runtime directories ```bash @@ -156,9 +172,16 @@ An async generator that yields `ActionCall` / `AgentCall` / `HumanCall`. Transla yield HumanCall(prompt="Confirm before deleting?") # Human-only ``` -5. **Compute dynamic values at runtime.** Relative phrases in the task description ("past 7 days", "today", "last 30 days") must be computed inside the generator with `datetime` etc., not hardcoded at write time. +5. **`HumanCall` vs `wait_for` — strict separation.** Two different waits exist; do not confuse them. + + - **Waiting for the UI to settle** (page render, click reaction, animation): use `yield ActionCall("wait_for", time_seconds=N)` or condition-based `wait_for(text=..., text_gone=..., selector=...)`. Time-bounded. + - **Waiting for the user to act** (login, QR-code scan, CAPTCHA solve, destructive-action confirmation): use `yield HumanCall(prompt="...")`. The bridgic framework **truly blocks** that yield until a human response arrives. You do not — and must not — guess how long the user will take. -6. **Keep generator-internal logic minimal.** Code between yields runs in the generator body. **If it raises, the generator is unrecoverable** — `asend()` cannot resume past an exception, so AMPHIFLOW skips per-step retry and jumps directly to full `on_agent` fallback. Keep inline code to variable assignment and pure helpers; push risky operations (network calls, parsing untrusted input) into `ActionCall`-wrapped tools where they can be retried. + **Forbidden**: using `wait_for(time_seconds=N)` (any N) to wait for a user action. User logins can take 5 seconds or 5 minutes; a fixed timer either fails too fast or wastes time. Any exploration step tagged `HUMAN:` MUST map to `HumanCall` in the generated code, never to `wait_for`. + +6. **Compute dynamic values at runtime.** Relative phrases in the task description ("past 7 days", "today", "last 30 days") must be computed inside the generator with `datetime` etc., not hardcoded at write time. + +7. **Keep generator-internal logic minimal.** Code between yields runs in the generator body. **If it raises, the generator is unrecoverable** — `asend()` cannot resume past an exception, so AMPHIFLOW skips per-step retry and jumps directly to full `on_agent` fallback. Keep inline code to variable assignment and pure helpers; push risky operations (network calls, parsing untrusted input) into `ActionCall`-wrapped tools where they can be retried. ### 2.4 `on_agent` — only for `AGENT` or `AMPHIFLOW` @@ -321,3 +344,11 @@ if __name__ == "__main__": 6. **No `config.py` by default.** Inline `os.getenv` in main.py. Split into a `config.py` only if env loading grows complex (many vars, validation, defaults). --- + +## Update build_context.md + +After Phase 4 completes, edit `/.bridgic/build_context.md`: +1. Replace the `## Outputs → generator_project` placeholder line `generator_project: (filled by Phase 4)` with the absolute path to `//`. +2. Refresh the `env_ready:` block: read `/pyproject.toml` and replace the content under `--- pyproject.toml ---` with its current text. This keeps Phase 5 (verify) accurate about which packages are installed. + +--- diff --git a/agents/amphibious-config.md b/agents/amphibious-config.md index 33b4213..7021ad5 100644 --- a/agents/amphibious-config.md +++ b/agents/amphibious-config.md @@ -13,8 +13,12 @@ tools: ["AskUserQuestion", "Bash", "Read", "Write"] # Amphibious Config Agent +> **Not a dispatchable subagent.** This agent is interactive (uses `AskUserQuestion` / equivalent ask-the-user mechanism) and runs **inline** in the calling command's thread. Do not register it under `.claude-plugin/plugin.json` `agents:` — only `amphibious-explore`, `amphibious-code`, and `amphibious-verify` are dispatchable. + You are a build-pipeline configuration specialist. Your job is to interactively determine project-mode / LLM / domain-specific settings, run environment setup, and write the consolidated `build_context.md` that every later agent reads. +Every user-facing prompt in this document follows `{PLUGIN_ROOT}/agents/human-interaction-protocol.md`. Inside Claude Code you are running inline in `/build`'s thread (Tier 1 — use `AskUserQuestion`); inside OpenClaw the host follows this same methodology in Tier 2 (chat message + await textual reply). The question content below is identical across both; only the transport differs. + ## Input The calling command passes the inputs already established in Phase 1 of `/build`: @@ -33,7 +37,9 @@ This agent runs interactively from the very first step; there are no startup fil ## Step 1: Project Mode -Present via `AskUserQuestion`: +**Ask the user** with these exact options. Use the platform's structured-question tool if one is available (e.g. Claude Code's `AskUserQuestion`); otherwise send the question as a single message and wait for the user's reply. **Do not also emit the question as chat text alongside the tool call** — the question is sent once. + +Question: > Choose project mode: > @@ -54,13 +60,13 @@ Decide whether to set up LLM — set `llm_configured` to `yes` or `no`. ``` - Exit 0: variables present — proceed. - - Exit 1: list missing variables; create `.env`, ask the user to fill it, re-run the check; do not proceed until exit 0. + - Exit 1: use the same ask-the-user mechanism as Step 1. Tell the user the missing variables and ask whether to (a) write a `.env` skeleton for them to fill, or (b) wait while they `export` the vars in their shell. Then re-run `check-dotenv.sh` until exit 0; do not proceed until exit 0. Set `llm_configured = yes`. - **If `project_mode == workflow`**: analyze the task description. - - **If task contains AI-suggestive operations** (e.g. "extract key information", "analyze content", "generate a report"), ask via `AskUserQuestion`: + - **If task contains AI-suggestive operations** (e.g. "extract key information", "analyze content", "generate a report"), ask using the same mechanism as Step 1: > Your task description mentions operations that may benefit from AI/LLM capabilities (e.g. content analysis, intelligent extraction). Configure an LLM? > @@ -74,13 +80,28 @@ Decide whether to set up LLM — set `llm_configured` to `yes` or `no`. ## Step 3: Domain-specific Configuration -If `SELECTED_DOMAIN` is resolved AND `{PLUGIN_ROOT}/domain-context//config.md` exists, read that file and follow its instructions verbatim — it tells you which questions to ask the user (still via `AskUserQuestion`) and which keys to record. Capture each answer as `domain_config[] = `. +If `SELECTED_DOMAIN` is resolved AND `{PLUGIN_ROOT}/domain-context//config.md` exists, read that file and follow its instructions verbatim — it tells you which questions to ask the user (using the same ask-the-user mechanism as Step 1) and which keys to record. Capture each answer as `domain_config[] = `. If no `config.md` exists, skip this step and treat `domain_config` as empty. ## Step 4: Environment Setup +### 4.0 Side-effect checkpoint (before running any setup script) + +Steps 1–3 only collected decisions; nothing on disk has been mutated yet beyond the `.env` skeleton (if Step 2 wrote one). Step 4.1 is the **first script that touches the user's toolchain** — it may install `uv` to PATH, create `pyproject.toml`, and otherwise alter PROJECT_ROOT in ways the user might want to see coming. + +Before invoking `setup-env.sh`, ask the user via the Human Interaction Protocol (Tier 1 in Claude Code, Tier 2 in OpenClaw — same content, different transport): + +> About to run environment setup against `{PROJECT_ROOT}`: +> - verify the `uv` toolchain is on PATH (auto-install if missing) +> - run `uv init --bare` if no `pyproject.toml` exists yet +> +> **1. Run setup now** — proceed with `setup-env.sh`. +> **2. Pause** — I want to inspect or change something first. + +On **1** continue to 4.1. On **2** wait for the user's follow-up, then re-prompt. + ### 4.1 uv toolchain + PROJECT_ROOT uv project ```bash diff --git a/agents/amphibious-explore.md b/agents/amphibious-explore.md index 1ea373f..2e98897 100644 --- a/agents/amphibious-explore.md +++ b/agents/amphibious-explore.md @@ -8,7 +8,7 @@ description: >- re-observed each time the plan is carried out). Produces a pseudocode operation sequence with inline stability annotations plus any key-artifact files capturing the observed states the plan references. -tools: ["Bash", "Read", "Grep", "Write", "Edit"] +tools: ["AskUserQuestion", "Bash", "Read", "Grep", "Write", "Edit"] --- # Amphibious Explore Agent @@ -94,10 +94,16 @@ To record loops and branches faithfully, you must **probe their boundaries and a Secondly, mark **human handoffs** — points where the task requires intervention that automation cannot resolve alone (authentication wall, CAPTCHA, destructive-confirm dialog, permissions you lack, ambiguous UI, unexpected error state). Record each as a `HUMAN:` step in the plan, describing what the human must do and the signal to resume. -When you encounter a handoff during exploration, you **MUST** request human: +When you encounter a handoff *during exploration itself* (e.g. the target site shows a login wall and you cannot probe further until the user logs in), you **MUST** ask the user following `{PLUGIN_ROOT}/agents/human-interaction-protocol.md`. Pick the highest tier your current runtime supports: -- **Request** specific human intervention. -- **Resume** exploration from the same point once the human confirms the obstacle is cleared. +- **Claude Code subagent (Tier 1).** This agent's `tools:` declares `AskUserQuestion`. Use it: ask one focused question that names exactly what the user must do (e.g. "Please log into in the open browser, then choose 1. `done` / 2. `cancel exploration`."). Wait for the structured reply before resuming. +- **OpenClaw host running this methodology directly (Tier 2).** No `AskUserQuestion` here, but you have the chat / message channel captured by the host (`notifyChannel` / `notifyTarget`). Send a clearly formatted chat message that begins with `[USER ACTION REQUIRED]`, states exactly what to do (open which URL, click what, paste which token), and tells the user how to reply (`reply "done" once login completes`, or `reply with the token`). Wait for the user's textual reply before resuming. +- **Subagent without `AskUserQuestion` and no chat channel (Tier 3 — fallback).** Stop work and return a structured "human input needed" status to the calling command — include the prompt text and the resume signal. The calling command runs in Tier 1 or Tier 2 and asks on your behalf, then re-dispatches you with the answer. +- **Forbidden anti-pattern (all tiers).** Do **not** fall back to `echo "please do X" + until [ -f /tmp/flag ]; do sleep 3; done`. The user sees only a silent "Running" indicator and has no idea what is being asked. This is the canonical violation called out in the protocol. + +Once the user confirms the obstacle is cleared, **resume** exploration from the same point. + +Each `HUMAN:` step in the Operation Sequence will map **one-to-one** to a `HumanCall` yield in the generated code (see `amphibious-code.md` Phase 2.3). The framework blocks at that yield until the user responds — you do not need to estimate how long the user will take, and you must **not** record a fallback `wait_for(time_seconds=...)` to "give the user time". Finally, record only the **minimal chain of operations** needed to achieve the goal. Exclude: @@ -128,7 +134,13 @@ After exploration, run the cleanup protocol recorded in the Domain Guidance to r ## Generate Report -Write `exploration_report.md` plus all saved artifact files. The report has **up to three sections** — §1 is optional, §3 is omitted when no volatile data was captured. +Write all outputs into `/.bridgic/explore/`: +- `exploration_report.md` — the report itself +- artifact files (e.g. `list_state.txt`, `detail_state.txt`) at the same directory level + +Do not nest under any further subdirectory. The path `/.bridgic/explore/` is the canonical location read by `amphibious-code.md` Phase 3 and `amphibious-verify.md`. + +The report has **up to three sections** — §1 is optional, §3 is omitted when no volatile data was captured. ### 1. Domain Guidance @@ -206,3 +218,8 @@ A pseudocode-style list. Use indentation and control-flow keywords (`FOR`, `WHIL ### 3. Artifact Files List saved artifact paths. Each entry annotates **what extractable content** the file contains — enough for a reader to know which file documents which volatile data without opening every one. + +## Update build_context.md + +After writing the report and artifacts, edit `/.bridgic/build_context.md`: +1. Replace the `## Outputs → exploration_report` placeholder line `exploration_report: (filled by Phase 3)` with the absolute path to `exploration_report.md` (e.g. `exploration_report: /abs/path/.bridgic/explore/exploration_report.md`). diff --git a/agents/amphibious-verify.md b/agents/amphibious-verify.md index 446fcd6..b64433c 100644 --- a/agents/amphibious-verify.md +++ b/agents/amphibious-verify.md @@ -13,6 +13,8 @@ tools: ["Bash", "Read", "Grep", "Glob", "Write", "Edit"] You are a verification specialist for bridgic-amphibious projects. Your job is to take an already-generated project, verify it runs correctly end-to-end, and return clean production code. +Verify is the **last construction-phase step** — the orchestrator's first instrumented test-run of the freshly frozen code, before it is handed off to the user for production runs. Every user-facing prompt in this document (relaying a runtime `HumanCall`, surfacing a fatal error, deciding whether to retry) follows `{PLUGIN_ROOT}/agents/human-interaction-protocol.md`. The runtime file-bridge in Phase 1.2 is a *transport* that delivers prompts from the running program up to the orchestrator; once the prompt arrives here, this agent (or its OpenClaw-side host counterpart) is responsible for the Tier 1 / Tier 2 ask the protocol mandates. + ## Input The calling command passes exactly two absolute paths: @@ -151,7 +153,7 @@ bash {PLUGIN_ROOT}/scripts/run/monitor.sh {generator_project} [TIMEOUT] |------|---------|--------------| | **0** | Finished cleanly | Proceed to Phase 3 | | **1** | Finished with errors | Diagnose from stdout (last 50 log lines of `run.log`), fix code, re-run `monitor.sh` | -| **2** | Human intervention required | Read the prompt from stdout, ask the user, write the answer to the `human_response` path printed in stdout as `{"response": ""}`, re-run `monitor.sh` | +| **2** | Human intervention required | Read the prompt from stdout, **ask the user via the Human Interaction Protocol** (Tier 1 `AskUserQuestion` if available; otherwise Tier 2 / escalate per the protocol — never silent polling), write the answer to the `human_response` path printed in stdout as `{"response": ""}`, re-run `monitor.sh` | | **3** | Timeout | Report to user and investigate | The script calls `uv run python main.py`; the script returns only when an actionable event occurs. Re-invoke with the **same arguments** to resume — it auto-detects the existing PID after human intervention, or starts fresh after a terminal exit. The script owns every runtime artifact (`run.log`, `pid`, `human_request.json`, `human_response.json`) and prints the resolved absolute paths to stdout on every exit, so that the agent can interact with them to reason next steps or communicate with the user. @@ -184,9 +186,11 @@ Search all `.py` files in the project for `# --- VERIFY_ONLY_BEGIN ---` and `# - ### 4.2 Final Syntax Check ```bash -find -name "*.py" -exec python -m py_compile {} + +find -name "*.py" -exec uv run --project "" python -m py_compile {} + ``` +`` is the parent uv workspace (the directory holding `pyproject.toml`); `` is the generator project directory under it. Using `uv run --project` ensures the syntax check runs against the same Python interpreter the project's uv env was set up with — bare `python` may pick up a different version and yield false positives. + Confirm all files still compile after marker removal. --- @@ -198,3 +202,23 @@ Report back to the calling command: - **Summary**: What was verified and how - **Issues found and fixed**: Any code fixes applied during verification - **Human interventions**: Any points where human action was required + +--- + +## OpenClaw addendum — host-side human-intervention flow + +Under OpenClaw, **the host orchestrator is the one running `monitor.sh`** (per `extensions/openclaw-skill/amphiloop-build/SKILL.md` Step F.2) and is therefore the natural Tier 2 endpoint per the Human Interaction Protocol. The host has a chat / message channel to the user; the worker (coding-agent) does not. + +When `monitor.sh` exits with code 2: + +1. Host reads `/.bridgic/verify/human_request.json` to obtain the prompt text. +2. Host asks the user via **Tier 2** (free-text chat message): begin with a `[USER ACTION REQUIRED]` marker, paste the prompt text, state how to reply (`reply with the answer text, or "done" once you've completed the action`). +3. Host waits for the user's textual reply. +4. Host writes `{"response": ""}` to `/.bridgic/verify/human_response.json`. +5. Host re-invokes `monitor.sh` with the same arguments — it auto-resumes the still-running PID. + +This is re-entrant: a single Phase 5 run may hit multiple cycles (login, then CAPTCHA, then a confirmation dialog). Repeat the loop each time exit code 2 reappears. + +### Under Claude Code (no OpenClaw) + +When this agent runs as a Claude Code subagent (not as worker code), the OpenClaw addendum does not apply — the agent uses `AskUserQuestion` (or escalates per the protocol's Tier 3) and writes `human_response.json` directly. diff --git a/agents/human-interaction-protocol.md b/agents/human-interaction-protocol.md new file mode 100644 index 0000000..379d0c4 --- /dev/null +++ b/agents/human-interaction-protocol.md @@ -0,0 +1,214 @@ +--- +name: human-interaction-protocol +description: >- + Shared methodology for any AmphiLoop orchestrator or agent that needs to + pause and ask the human user during the build pipeline (Phases 1–5 of + /build, equivalently Steps B–F of the OpenClaw amphiloop-build skill). + Defines a capability-tiered fallback (structured ask tool → free-text + channel → escalate to parent), the phase-checkpoint pattern that keeps the + user in control between major pipeline steps, and the anti-patterns + (notably silent Bash polling) that violate the contract. +--- + +# Human Interaction Protocol + +Every AmphiLoop pipeline shares one non-negotiable contract: + +> The user must always **see** what is being asked, must always **explicitly +> reply** before the run advances, and must always be able to **redirect** at +> phase boundaries. No silent waiting. No timeout-driven auto-continue. + +This document is the single source of truth. `commands/build.md`, every +`agents/amphibious-*.md`, and `extensions/openclaw-skill/amphiloop-build/SKILL.md` +all defer to the rules below. + +## Scope — what this protocol governs + +The protocol governs **orchestrator-driven, build-pipeline user interaction**: + +- Claude Code: the `/build` command and every agent it dispatches — + Phase 1 (Init) → Phase 2 (Config) → Phase 3 (Explore) → Phase 4 (Code) → + Phase 5 (Verify, including the first instrumented test-run of the freshly + generated code). +- OpenClaw: the `amphiloop-build` skill and its host orchestration — + Steps B → C → D → E0/E → F → G. + +All five phases are construction-phase: the orchestrator is driving, the user +is a *co-designer*, and the user must be able to see/steer at every meaningful +moment. + +**Out of scope**: once the build finishes and the user takes the generated +project and runs `uv run python main.py` themselves (or wires it into their +own CI/cron/etc.), any human interaction the program performs at that point +is **runtime business logic** owned by the bridgic framework's default +`HumanCall` mechanism. The protocol does not govern that channel. + +### Note on Phase 5's runtime file-bridge + +Phase 5 (verify) injects a `human_input` override and runs the program under +`monitor.sh`. When the program yields a `HumanCall` mid-execution, a file +bridge transports the prompt from the running program back up to the +orchestrator. **That bridge is a transport mechanism, not a protocol tier.** +Once the prompt arrives at the orchestrator (the verify agent in Claude Code, +or the OpenClaw host), the orchestrator is now in Tier 1 or Tier 2 and asks +the user using the rules below — the bridge does not absolve the orchestrator +of applying the protocol. + +## Capability tiers — pick the first that applies in your runtime + +### Tier 1 — Structured ask tool available + +If the runtime exposes a structured-question tool (Claude Code's +`AskUserQuestion`, or any platform equivalent that pops a labeled-options UI), +**use it directly**. Phrase the question with explicit numbered options. + +Do **not** also emit the same question as plain chat text alongside the tool +call — the question is sent once, through the tool. + +**Option-construction rules** (apply to every Tier 1 ask): + +- **Header is hard-capped — keep it ≤12 characters.** Claude Code's + `AskUserQuestion` UI truncates long headers and the truncation tail can + surface as garbled output (e.g. `Phase 1→2876…`). Treat the header as a + tab label, not a sentence: `Phase 1→2`, `Continue?`, `LLM mode`, + `Domain?`. If you can't fit the meaning in 12 characters, the meaning + belongs in the question body, not the header. +- **Don't duplicate the always-available free-text channel as an explicit + option.** `AskUserQuestion` (and most equivalents) already render a + permanent free-text input row beneath the structured options ("Chat about + this" in Claude Code). Adding a structured option whose action is "type + something" / "free input" / "describe in chat" is pure noise — that + channel is open by default. Reserve structured options for the *distinct + branches the orchestrator must commit to*. +- **Use however many options the decision actually has — don't pad.** If + there are only two real branches, ship two options. If you find yourself + adding a third option just to reach a round number and its content is + vague ("Type something", "Other", "Anything else"), delete it. A + meaningful third option looks like an escape hatch (`Cancel build`, + `Skip this check`, `Abort and revisit later`), not a filler. +- **Each option's description must add information beyond its label** — + what concretely happens next, what the user should reply, what side + effect kicks in. Do not paraphrase the label in slightly different words; + if you can delete the description without losing meaning, you wrote it + wrong. +- **Option descriptions must NOT re-state the pre-question summary.** The + summary above the AskUserQuestion already established context (what just + finished, what's coming, side effects). The option description's job is + to say what is **unique** to choosing *this* branch — typically one short + clause: `→ runs setup-env.sh now`, `→ stays on the menu`, `→ aborts the + build`. If two branches' descriptions both rehearse the same upcoming + pipeline outline, you wrote the summary into the options. Delete the + duplication; trust the summary. + +### Tier 2 — Free-text reply channel available + +The runtime can send the user a normal chat / message and receive a free-text +reply (e.g. an OpenClaw host conversation, a chat surface that holds a +`notifyChannel` / `notifyTarget` route). Send a clearly formatted message and +**wait for the user's explicit textual reply** before continuing. + +The message MUST: + +- Begin with a visible marker — e.g. `[USER ACTION REQUIRED]` or `[CHECKPOINT]`. +- State concretely what the user must do or decide. No paraphrase. +- State exactly how to reply — what word, what file, what click. Examples: + - "Reply `yes` to continue, or describe what you'd like changed." + - "Once you finish login in the open browser, reply `done`." +- Stay terse. One screenful max. + +### Tier 3 — No direct user channel → escalate to parent + +You are running where the user cannot be reached directly (typical case: a +Claude Code dispatchable subagent whose `tools:` list omits `AskUserQuestion`). +In this tier you **MUST** escalate; you **MUST NOT** poll a signal file in +silence and call that "asking the user". + +Stop work and return a structured "human input needed" status to the calling +command — include the prompt text, the resume signal the parent should hand +back, and any context the parent needs to ask coherently. The parent runs in a +higher tier (1 or 2) and asks on your behalf, then re-dispatches you with the +answer. + +If the agent's job genuinely requires interactive user input mid-task and +escalation is impractical, the cleaner fix is to add `AskUserQuestion` (or +the platform equivalent) to the agent's `tools:` list so it operates in Tier +1 — not to invent a polling workaround. + +## Phase checkpoint pattern + +At every checkpoint the orchestrator must: + +1. Send a short pre-question summary that combines (a) what just finished + (artifacts written, decisions made) and (b) what the next phase will do + plus any side effect worth a veto. **Total length cap: 3 visible lines + maximum across (a)+(b) combined** (not 3 lines per item). If you can't + fit it in 3 lines, you're describing too much — the user can read + `build_context.md` if they want detail. +2. The list of "things worth flagging as side effects" — files written, + env mutated, scripts run, processes spawned, real browsers opened, real + money / API quota spent, the generated program executed for the first + time — is a **selection menu, not an enumeration template**. Pick the + **one or two** items the user is most likely to want to veto on this + transition; ignore the rest. Do not produce a flowing prose paragraph + that lists every applicable category. +3. Ask "Continue to ?" via Tier 1 or Tier 2. The pre-question + summary lives **above** the question, not duplicated inside option + descriptions (see Tier 1 option-construction rules). +4. Wait for an explicit affirmative reply (`yes`, `y`, `go`, `continue`) + before advancing. Anything else (silence, `wait`, `let me look`, a + counter-question) means **do not advance** — answer the user's intervention + first and re-prompt the checkpoint when ready. + +Checkpoints are cheap when terse — one short summary plus a one-tap question. +Their value is the *option* to redirect, not the friction. A checkpoint that +overflows the visible area defeats its own purpose: the user cannot see the +question they are being asked. + +### Length self-check (run mentally before sending) + +- Pre-question summary: ≤3 lines? If no → trim. +- AskUserQuestion header: ≤12 chars? If no → trim. +- Each option description: 1 short clause that is **not** in the summary? + If no → rewrite or delete. +- Total surface (summary + question + 2–3 options): does it fit one screen + without scrolling? If no → cut the summary first, then the option + descriptions; the question text itself stays. + +**Where to place checkpoints**: at the boundary between major pipeline phases, +and additionally before any single sub-step that has a meaningful side effect +the user might want to veto or adjust — running a setup script that mutates +the toolchain, writing `.env`, spawning a worker process, kicking off a real +run of the generated program, or starting a multi-attempt fix-and-retry loop. +Skip checkpoints inside tight inner loops or for purely informational reads. + +## Anti-patterns — never do these + +- ❌ `echo "Please do X" && until [ -f /tmp/flag ]; do sleep 3; done` — the + user only sees a quiet "Running" indicator. They have no idea what is being + asked or that they are the bottleneck. This is the canonical violation. +- ❌ Auto-continuing after a fixed silence. Silence means the user is busy or + hasn't seen the request — not consent. +- ❌ Burying the question in a long log dump. The user will scroll past it + and the run stalls. +- ❌ Asking via Tier 1/2 *and* echoing the same question to a Bash polling + loop. Pick one channel; the second is noise that masks the real one. +- ❌ Treating the Phase 5 file-bridge as if it were the user channel. The + bridge ends at the orchestrator; the orchestrator still owes the user a + Tier 1/2 ask. +- ❌ Padding a Tier 1 ask with a "Type something" / "Other / free input" + structured option. The free-text input row is always rendered; an + explicit option pointing to it is redundant and signals to the user that + the structured options are not load-bearing. +- ❌ Writing an option description that just rephrases its label. Either + add concrete information (what happens next, how to reply) or drop the + description. + +## Quick decision flowchart + +``` +Need to ask the user something? +├── Have AskUserQuestion (or equivalent)? → Tier 1: ask directly. +├── Have a chat / message channel? → Tier 2: send + await reply. +└── Neither (subagent boundary, no user channel) + → Tier 3: escalate to parent. +``` diff --git a/commands/build.md b/commands/build.md index 8b8b85b..a20b777 100644 --- a/commands/build.md +++ b/commands/build.md @@ -39,10 +39,20 @@ Anything else in `$ARGUMENTS` (extra tokens, multiple flags) → stop and ask th > **build_context_path** — always `{PROJECT_ROOT}/.bridgic/build_context.md`. > **domain_context_path** — `{PLUGIN_ROOT}/domain-context//.md` when `SELECTED_DOMAIN` is resolved, otherwise the literal `none` (generic flow). `` is `explore.md` for Phase 3, `code.md` for Phase 4, `verify.md` for Phase 5. -After Phases 3 and 4, refresh `build_context.md` in two places: +## Human Interaction & Phase Checkpoints -1. **Outputs** — replace the matching `(filled by Phase N)` placeholder with the phase's primary output path. -2. **env_ready** — read `{PROJECT_ROOT}/pyproject.toml` and update the dump under `--- pyproject.toml ---` inside the `env_ready:` block with its current contents. +Every prompt to the user — including the gates between phases below — MUST follow `{PLUGIN_ROOT}/agents/human-interaction-protocol.md`. Read it once before starting Phase 1. Inside Claude Code you are in **Tier 1**: use `AskUserQuestion` for every checkpoint and every decision the user must make. + +**Mandatory checkpoint between every phase transition** (Phase 1→2, 2→3, 3→4, 4→5): + +1. Send a 1–3 line summary of what just finished (artifacts written, decisions made). +2. Name what the next phase is about to do, and call out side effects the user might want to veto: scripts that mutate the env (`setup-env.sh`), files about to be written (`.env`, `build_context.md`), real-environment probes, code generation, real runs of the generated program. +3. Ask via `AskUserQuestion`: "Continue to Phase N?" with options like `1. Yes, continue` / `2. Pause — I want to adjust something`. +4. Only advance on an explicit affirmative reply. On `2`, take whatever follow-up the user requests, then re-prompt the checkpoint. + +Do **not** chain phases automatically. The user must always have the option to interrupt at every boundary. + +Individual sub-steps inside a phase that have their own meaningful side effect (notably Phase 2 → `setup-env.sh`, which mutates the project's uv toolchain and writes `pyproject.toml`) follow the same rule — `amphibious-config.md` enumerates those mid-phase gates. --- diff --git a/extensions/openclaw-skill/README.md b/extensions/openclaw-skill/README.md new file mode 100644 index 0000000..6e9a49c --- /dev/null +++ b/extensions/openclaw-skill/README.md @@ -0,0 +1,106 @@ +# AmphiLoop OpenClaw Skill + +Drop-in OpenClaw skill that exposes AmphiLoop as the slash command `/amphiloop_build`. The skill orchestrates AmphiLoop's 5-phase pipeline inside OpenClaw and delegates every code-writing step to OpenClaw's built-in `coding-agent` skill (a worker CLI of your choice — Claude Code, Codex, OpenCode, or Pi). Host and worker communicate via shared files in the working directory (`.amphiloop/AGENT_BRIEF.md` + `.amphiloop/TODOS.md`), not by stuffing a giant prompt. + +## Install model + +The AmphiLoop repository **is** the OpenClaw plugin. Mounting the repository as a plugin (one command) automatically registers this bundled skill: + +1. Clone the AmphiLoop repo somewhere on disk. +2. `openclaw plugins install --link` — see Install below. +3. The skill resolves the AmphiLoop repo root automatically using the OpenClaw `{baseDir}` macro (`{baseDir}/../..`). Users do not need to provide an AmphiLoop path. + +There is intentionally no auto-download / clawhub install path — the skill is colocated with the AmphiLoop methodology files (`agents/amphibious-*.md`, `scripts/run/*.sh`, `domain-context/*`, and the bridgic-* SDK skills under `skills/`) it needs at runtime, so they always travel together with the repo clone. + +## Dependencies + +You must have at least one of the following coding-agent worker CLIs installed and reachable on `PATH`: + +- **Claude Code** *(recommended)* — `npm install -g @anthropic-ai/claude-code` +- Codex — `npm install -g @openai/codex` +- OpenCode — see project docs +- Pi — `npm install -g @mariozechner/pi-coding-agent` + +OpenClaw's `coding-agent` skill must also be enabled in your OpenClaw config (`skills.entries.coding-agent.enabled: true`). + +## Install (recommended: as an OpenClaw plugin) + +```bash +# 1. Enable the built-in coding-agent skill we delegate to +openclaw config set skills.entries.coding-agent.enabled true --strict-json + +# 2. Install the AmphiLoop repo as a linked openclaw plugin +# (--link points at your local clone instead of copying — edits to +# SKILL.md / agents/* / scripts/* are picked up live) +openclaw plugins install /abs/path/to/AmphiLoop-02 --link + +# 3. Restart so the gateway loads the plugin +openclaw gateway restart +``` + +That registers the bundled skill `amphiloop-build` automatically — no `skills.load.extraDirs` entry needed. + +> **Note on plugin classification.** AmphiLoop ships both `openclaw.plugin.json` (OpenClaw native manifest) and `.claude-plugin/plugin.json` (the original Claude Code marker). For OpenClaw to classify the repo as a **native** plugin (not as a Claude Code bundle), it also needs `package.json` with `openclaw.extensions: ["./openclaw-entry.mjs"]` plus the tiny `openclaw-entry.mjs` no-op entry. Both files live at the AmphiLoop repo root and are committed. + +### Fallback: mount only this skill via `extraDirs` (no plugin) + +If you don't want a plugin install, you can mount just this skill directory: + +```bash +openclaw config set skills.load.extraDirs \ + '["/abs/path/to/AmphiLoop-02/extensions/openclaw-skill"]' \ + --strict-json --merge +openclaw gateway restart +``` + +The skill works the same; you just lose the `openclaw plugins enable/disable/inspect/list` lifecycle controls. + +## Verification + +```bash +# Plugin should be Format: openclaw, Status: loaded +openclaw plugins inspect amphiloop + +# Skill should be ✓ Ready +openclaw skills info amphiloop-build + +# Cross-check that coding-agent itself is also Ready +openclaw skills check 2>&1 | grep coding-agent +``` + +After both are ready, the slash command `/amphiloop_build` is live in any OpenClaw chat surface. + +## Usage + +In an OpenClaw chat: + +``` +/amphiloop_build "" +``` + +What happens next: + +1. The skill asks you to pick the coding worker for this run (`claude` / `codex` / `opencode` / `pi`). Reply with one word. +2. The skill asks for `` (where the generated project will live; offers a sensible default). +3. The skill drives Phases 2–3 (config + explore) directly using the OpenClaw host model. Outputs land at `/.bridgic/build_context.md` and `/.bridgic/exploration/exploration_report.md`. +4. **The skill writes the worker brief and TODO list** to `/.amphiloop/AGENT_BRIEF.md` and `/.amphiloop/TODOS.md`. The brief tells the worker which bridgic-* SKILL.md files to read so the API surface is correct; the TODO list is the work plan. +5. The skill opens **one** long-lived `coding-agent` session with the worker you picked, sends a tiny pointer prompt ("read AGENT_BRIEF.md, read TODOS.md, work through them"), and watches the worker tick TODOs to `[x]`. +6. Phase 5 verifies the generated project. If verification fails because of a code defect, the skill **appends new FIX entries to TODOS.md** and tells the worker (in the same long-lived session) to continue. Up to 3 fix rounds. +7. The skill closes the worker session and sends you a summary message with the project path and pass/fail status. + +## Design notes + +- **Communication channel is the working directory, not the prompt.** The kickoff prompt is ~200 chars and only points at `.amphiloop/AGENT_BRIEF.md` + `.amphiloop/TODOS.md`. Methodology, API references, and bug reports all flow through files. Benefit: worker isn't drowned in a giant context blob; host can monitor progress by re-reading TODOS.md; bug fixes are appended TODOs instead of fresh fix prompts. +- **Worker is forced to read the bridgic-* SKILL.md files.** The brief lists them as STEP 1 mandatory reads. Without this, the worker hallucinates APIs that don't exist in the bridgic-amphibious / bridgic-llms / bridgic-browser SDK. +- **Why a single long-lived session?** So the worker carries context from the initial generation into any follow-up fix. Restarting the worker per call would force it to re-derive everything from disk and risks stylistic drift. +- **Why ask the user for the worker?** Worker quality varies by task. Claude Code is the closest fit to AmphiLoop's coding methodology and is the recommended default; the others are available for users who prefer them. +- **Why does the skill never write code itself?** The host model (default Pi) is good at orchestration but weaker at sustained coding. All code production is routed to a worker that is purpose-built for it. +- **No write-conflict on TODOS.md.** Host writes to it only while the worker is sentinel-waiting (between turns); worker writes to it only while actively working. The sequential prompt/sentinel cycle enforces this. + +## Reference + +- AmphiLoop skill source: `extensions/openclaw-skill/amphiloop-build/SKILL.md` +- OpenClaw native plugin manifest: `/openclaw.plugin.json`, `/package.json`, `/openclaw-entry.mjs` +- OpenClaw built-in skill we delegate to: `/skills/coding-agent/SKILL.md` +- OpenClaw slash-command docs: `/docs/tools/slash-commands.md` +- OpenClaw plugin CLI: `openclaw plugins --help` diff --git a/extensions/openclaw-skill/amphiloop-build/SKILL.md b/extensions/openclaw-skill/amphiloop-build/SKILL.md new file mode 100644 index 0000000..2d128af --- /dev/null +++ b/extensions/openclaw-skill/amphiloop-build/SKILL.md @@ -0,0 +1,362 @@ +--- +name: amphiloop-build +description: Drive AmphiLoop's 5-phase pipeline inside OpenClaw. Host orchestrates and verifies; the built-in `coding-agent` skill writes all code. Host and coding-agent communicate via shared files in the working directory (`.amphiloop/AGENT_BRIEF.md` + `.amphiloop/TODOS.md`), not by stuffing big prompts. ONE long-lived worker session for the whole run, sequential. Worker (claude/codex/opencode/pi) is chosen by the user at run start. +user-invocable: true +metadata: + openclaw: + emoji: 🌊 + requires: + anyBins: ["claude", "codex", "opencode", "pi"] + config: ["skills.entries.coding-agent.enabled"] +--- + +# AmphiLoop Build (OpenClaw) + +Turn a task description into a runnable bridgic-amphibious Python project. + +## Architecture + +- **Host (you)** — the brain: reasoning, planning, verifying. You prepare a working directory with `.amphiloop/AGENT_BRIEF.md` (what the worker must read) + `.amphiloop/TODOS.md` (what the worker must do). When verify finds bugs, you append them as new TODO entries. +- **`coding-agent` skill** — the hands: a worker (`claude`/`codex`/`opencode`/`pi`) reads the brief, reads the bridgic-* SKILL.md files it points to so the API is correct, then works through TODOS.md ticking items off as it goes. + +**Communication channel** is the working directory, not the prompt. The kickoff prompt stays short (~200 chars: "read AGENT_BRIEF.md, read TODOS.md, work through them"). Methodology, API references, and bug reports all flow through files. This avoids context overflow, lets the host monitor progress by re-reading TODOS.md, and forces the worker to actually read the bridgic skill SKILL.md files. + +**Single long-lived worker session** for the whole run (strictly sequential) so the worker carries context from initial generation into any follow-up fix. + +## Argument parsing + +`/amphiloop_build [--]` + +- **`--` present** (e.g. `--browser`) → set `SELECTED_DOMAIN = ` and skip Phase 1 auto-detection. Validate that `{baseDir}/../../domain-context//` exists. If it does not, list available domains and ask the user to pick one or rerun without a flag. +- **No flag** → leave `SELECTED_DOMAIN` unresolved; resolve it during Phase 1's auto-detection step. +- **Anything else** (extra tokens, multiple flags, free-form text) → stop and ask the user to clarify. Do not silently treat free-form text as TASK.md content — Phase 1 owns TASK.md construction. + +## Pipeline overview + +``` +A0. Parse arguments ── argument handling (see above) +A. Pick coding worker ── user picks claude / codex / opencode / pi +B. Prepare working directory ── confirm , capture notification route +B'. Initialize Task (Phase 1) ── seed TASK.md template, user fills in, validate, domain auto-detect +C. Configure & Setup (Phase 2) ── project mode, LLM config, env setup → build_context.md +D. Explore (Phase 3) ── probe target environment → .bridgic/explore/exploration_report.md +E0. Prepare work template ── write AGENT_BRIEF.md + TODOS.md +E. Generate Code (Phase 4) ── open coding-agent session, worker completes TODOs +F. Verify (Phase 5) ── run monitor.sh, fix-attempt loop via TODOS.md +G. Cleanup & report ── kill session, send summary +``` + +> **Path variables** — used throughout this document: +> +> | Variable | Resolves to | +> |---|---| +> | `{baseDir}` | Directory containing this SKILL.md | +> | `{baseDir}/../..` | AmphiLoop repository root (agents, skills, templates, scripts, domain-context) | +> | `` | User-confirmed working directory for the generated project (set in Step B) | +> | `build_context_path` | `/.bridgic/build_context.md` | +> | `domain_context_path` | `{baseDir}/../../domain-context//.md` when resolved; `none` otherwise | +> +> `build_context.md` is the single source of truth for Phase 3→5 — every later step reads it for context, and Phase 3 / Phase 4 each fill their `## Outputs` placeholder after completing. + +This skill reads methodology files from `{baseDir}/../../agents/`, the task template from `{baseDir}/../../templates/build-task-template.md`, domain-context from `{baseDir}/../../domain-context//`, and helper scripts from `{baseDir}/../../scripts/run/` — **do not ask the user for an AmphiLoop path**. + +## Human interaction & step checkpoints + +Every prompt to the user — including the gates between steps and in-step side-effect gates — MUST follow `{baseDir}/../../agents/human-interaction-protocol.md`. Read it once before starting Step A0. The host operates at **Tier 2**: every gate is a clearly formatted chat message that waits for the user's explicit textual reply. + +**Mandatory step-transition gates** (send a 1–3 line summary + "Continue?" question; wait for `yes` / `y` / `continue` before advancing): + +| Boundary | Why gate here | +|---|---| +| Step B' → Step C | Phase 1 finished — TASK.md validated, `SELECTED_DOMAIN` resolved. About to enter Phase 2 which collects pipeline mode, LLM credentials, and runs `setup-env.sh`. | +| Step C → Step D | Config decisions recorded. About to probe the target environment (may open browsers, hit external sites, mutate user data). | +| Step D → Step E0 | Exploration finished. About to spawn the long-lived worker session and burn LLM tokens for code generation. | +| Step E → Step F | Code frozen. About to run the generated program for the first time under `monitor.sh` (real side effects, real API calls). | +| Each fix attempt in Step F | Before appending FIX-N and re-engaging the worker, give the user a chance to inspect the failure or stop. | + +**Mandatory in-step gates**: + +- Step C → before `setup-env.sh` runs (surfaced by `amphibious-config.md` Step 4.0). +- Step D → any `HUMAN:` handoff during exploration (login wall, CAPTCHA, etc.) — ask via chat; never echo + poll. +- Step F → on `monitor.sh` exit code 2 — relay the runtime prompt to the user per the verify methodology's OpenClaw addendum. + +The user must always have the option to interrupt and redirect at every gate. Silence is **not** consent. + +--- + +### Step A0. Parse arguments + +See [Argument parsing](#argument-parsing) above. After this step, `SELECTED_DOMAIN` is either a valid domain name or unresolved. + +### Step A. Pick the coding worker + +Send the user exactly: + +> About to start AmphiLoop build. Pick the coding worker for this run: +> `claude` (recommended) | `codex` | `opencode` | `pi` (not recommended). +> Reply with one word. + +Wait for the reply. Record it as ``. Reuse `` for the entire build run; do not switch mid-run. + +If the user replies with anything other than the four valid options, ask again rather than guessing. + +### Step B. Prepare the working directory + +1. Confirm `` with the user; offer a sensible default (e.g., a fresh `mktemp -d`) if they have not specified one. The AmphiLoop repo path does **not** need to be asked — it is `{baseDir}/../..` by construction. +2. Capture the OpenClaw notification route of the current conversation: `notifyChannel`, `notifyTarget`, `notifyAccount`, `notifyReplyTo`, `notifyThreadId`. You will need them later for Step G. + +This step does not write code and does not write `TASK.md`. Do it directly. + +--- + +### Step B'. Phase 1 — Initialize Task + +1. **Seed the template.** `read` `{baseDir}/../../templates/build-task-template.md`, then `write` its contents **verbatim** to `/TASK.md`. Do not modify, summarize, or pre-fill any section. + +2. **Tell the user to fill it in.** Send a chat message: + + > A task template has been created at `/TASK.md`. Please open it, fill in the four sections (`Task Description` / `Expected Output` / `Domain References` / `Notes`), save, and reply `done` to continue (or `cancel` to abort). + +3. **Wait for an explicit `done` reply.** Silence is **not** consent. Do not poll a flag file; do not auto-advance after a fixed wait. Any other reply (counter-question, "wait", silence) is handled before re-prompting. + +4. **Read TASK.md back** and parse the four sections: + - **Task Description** — goal of the project. + - **Expected Output** — what indicates success. + - **Domain References** — list of paths to domain reference files (may be empty). Each entry may be a SKILL.md, CLI help dump, SDK doc, style guide, or any other material that teaches the agents *how to act* or *what rules to follow*. + - **Notes** — optional additional constraints. + +5. **Validate.** + - `Task Description` must be non-empty. + - `Expected Output` must be non-empty. + - For every `Domain References` entry that is not a comment / example / blank line: resolve relative paths against ``, use absolute paths as-is, and confirm the file exists on disk. **Any missing path is a hard validation error.** + - On any failure: send a chat message naming the specific field / path that failed, ask the user to fix `TASK.md` and reply `done` again. Loop until validation passes (or the user replies `cancel`). + +6. **Domain auto-detection** — execute **only** if `SELECTED_DOMAIN` is still unresolved after Step A0: + 1. List subdirectories under `{baseDir}/../../domain-context/`. Each subdirectory is a candidate domain. + 2. For each candidate, `read` its `intent.md` (the matching criteria for that domain). + 3. Compare `Task Description + Expected Output + Notes` against each candidate's `intent.md`. Pick the **single best match**, or `none` if no candidate has strong signals. + 4. **If a candidate matches**, present the decision: + + > Detected domain: **``**. Use the pre-distilled `` context for exploration, code generation, and verification? + > + > Reply `1` / `yes` — use `` context. + > Reply `2` / `no` — proceed with the generic (domain-agnostic) flow. + > Reply `3 ` — specify a different domain explicitly. + + On `1` set `SELECTED_DOMAIN = `. On `2` leave unresolved (generic flow). On `3 ` validate that `{baseDir}/../../domain-context//` exists and set `SELECTED_DOMAIN = `; otherwise re-prompt. + 5. **If no candidate matches**, do not ask — silently proceed with the generic flow (`SELECTED_DOMAIN` stays unresolved). + + After this step `SELECTED_DOMAIN` is either a valid domain name or unresolved (generic). + +7. **Step B' → Step C gate**: summarize in 1–3 lines what just landed (`TASK.md` validated, `SELECTED_DOMAIN = `, count of resolved Domain References) and ask: + + > Proceed to Phase 2 (Configure & Setup)? This phase will collect pipeline mode and LLM configuration, then run `setup-env.sh` (which modifies the uv toolchain and writes `pyproject.toml`). Reply `yes` to continue, or describe what you want adjusted first. + + Wait for the explicit affirmative before continuing. + +This step does not write code; do it directly. + +--- + +### Step C. Phase 2 — Configure & Setup + +Inputs from Step B' (already established): parsed `TASK.md` fields (`Task Description`, `Expected Output`, `Domain References` with resolved absolute paths, `Notes`) and `SELECTED_DOMAIN` (a valid domain name or unresolved/generic). **Step C does not re-decide the domain** — that is Phase 1's responsibility. + +1. `read` the file `{baseDir}/../../agents/amphibious-config.md` to load the Phase 2 methodology. +2. Following that methodology — and feeding it the pre-resolved inputs above — drive Project Mode selection (Workflow / Amphiflow), LLM Configuration (`check-dotenv.sh`), Domain-specific Configuration (only when `SELECTED_DOMAIN` is resolved and `{baseDir}/../../domain-context//config.md` exists), and Environment Setup (`setup-env.sh`); then `write` the consolidated decision record to `/.bridgic/build_context.md`. Present every question from the methodology as a clearly formatted chat message and wait for the user's explicit textual reply. + +This produces a markdown decision record, not code. Do it directly. + +If `setup-env.sh` exits non-zero, the methodology doc says to **stop the entire pipeline** — respect that and do not enter Step D. + +On successful completion, `/.bridgic/build_context.md` exists and is the only artifact later steps need to read for context. + +**Step C → D gate**: summarize the recorded decisions (mode, llm_configured, domain) in 1–3 lines and ask: "Proceed to Phase 3 (Explore)? This phase will probe the target environment described in TASK.md — depending on the task it may open browsers, hit external sites, or read local files. Reply `yes` to continue, or describe what you want changed first." + +--- + +### Step D. Phase 3 — Explore + +1. `read` the file `{baseDir}/../../agents/amphibious-explore.md` to load the Phase 3 methodology. +2. Following that methodology, use `bash` to observe the target environment (running existing tools, taking notes, capturing samples). `write` the consolidated observations to `/.bridgic/explore/exploration_report.md`. + +Do not start Phase 4 until exploration is complete — the report and artifact files under `/.bridgic/explore/` are the sole bridge between Phase 3 and Phase 4. After exploration finishes, fill `## Outputs → exploration_report` in `build_context.md`. + +Writing notes is not coding — do it directly. + +**HUMAN handoff during exploration** (login wall, CAPTCHA, manual confirmation, providing a token, etc.): the methodology already enumerates the tiers and anti-patterns; in this context the **Tier 2 case** applies — use the chat channel captured in Step B. + +**Exception**: if the exploration genuinely needs a probe script to be authored, treat that as the **first** code-writing action of the run and jump to Step E0/E (use the probe-script as the first TODO). + +**Step D → E0 gate**: summarize what exploration found — operation sequence sketch, any HUMAN steps in the plan, artifacts captured. Ask: "Exploration complete (`/.bridgic/explore/exploration_report.md`). Proceed to Phase 4 (Code Generation)? This will spawn a long-lived `` session and burn LLM tokens to write the project. Reply `yes` to continue, or describe what you want adjusted in the plan first." + +--- + +### Step E0. Prepare the work template + +Before opening any worker session, host must write two communication files into `/.amphiloop/`. These are the entire interface between host and worker for this run. + +1. **Write `/.amphiloop/AGENT_BRIEF.md`** — a static reference brief. Use the `write` tool. Recommended structure: + + ```markdown + # Worker brief + + You are doing the coding for an AmphiLoop bridgic-amphibious project build. + Working directory: + + ## STEP 1 — read the bridgic API surface FIRST (mandatory before any code) + + Use your file-read tool on each of these files in order. Do not skip any. + + - {baseDir}/../../skills/bridgic-amphibious/SKILL.md + - {baseDir}/../../skills/bridgic-llms/SKILL.md + - {baseDir}/../../skills/bridgic-browser/SKILL.md ← only if the task involves browser automation; skip otherwise + - {baseDir}/../../agents/amphibious-code.md ← the coding methodology you must follow + - {baseDir}/../../domain-context//code.md ← only if a matching domain context exists + + The bridgic-* SKILL.md files define the actual class names, method signatures, and APIs you must use. Inventing API surface that is not in those files will fail. + + ## STEP 2 — read this run's context + + - /TASK.md + - /.bridgic/build_context.md + - /.bridgic/explore/exploration_report.md + + ## STEP 3 — work through TODOS.md + + Open /.amphiloop/TODOS.md. Pick the topmost open `[ ]` item, complete it, then EDIT TODOS.md in place to change its `[ ]` to `[x]`. Save. Move to the next open item. Repeat until no open items remain. + + ## STEP 4 — when all TODOs are done + + Print exactly this line on stdout and then wait for further input. DO NOT exit: + `### AMPHI-TASK-DONE ###` + + The orchestrator may append new `[ ]` items to TODOS.md later (e.g. fixes after verification). When you receive a "continue" instruction, re-open TODOS.md, find the new open items, and resume from STEP 3. + + ## Output layout — MANDATORY + + Final deliverable lives **inside** `//`. amphi.py / main.py / log/ / result/ and every support module MUST be inside `/`. Dropping them at `/` directly is a hard error — the orchestrator will reject the run. + + `//` layout: + amphi.py ← entry, scaffold-created here + main.py ← entry, you write here + log/ ← runtime logs + result/ ← task outputs + .py ← any extra helpers go here too, never at + + `/` only carries uv metadata (`pyproject.toml`, `uv.lock`, `.venv/`, `.env`), the AmphiLoop workspace (`.bridgic/`, `.amphiloop/`), and `TASK.md`. Never write code into `/.bridgic/` — that is the orchestrator's workspace. + + Anti-patterns to avoid: + - ❌ `amphi.py` / `main.py` at `/` (sibling of `pyproject.toml`) + - ❌ Treating `/` as a Python import package (adding `__init__.py`, importing it from a sibling main.py at `/`) + - ❌ `log/` or `result/` at `/` instead of inside `/` + - ❌ Any `.py` file written under `/.bridgic/` + ``` + + When writing this file, substitute real absolute paths for `{baseDir}/../..` and ``. For the `domain-context//code.md` line: if `SELECTED_DOMAIN` is unresolved (the generic flow), **delete that line entirely**; otherwise replace `` with the resolved domain name (and confirm the file exists — drop the line if it does not). Same drop-if-missing rule applies to the optional `bridgic-browser` line. + +2. **Write `/.amphiloop/TODOS.md`** — the initial Phase 4 task list. Use the `write` tool. Derive 5–8 items by mapping the sections of `{baseDir}/../../agents/amphibious-code.md` into checkboxes. Tailor wording to the current task. A typical seed: + + ```markdown + # AmphiLoop build TODOs + + - [ ] T1: Scaffold inside `/`. Run `mkdir -p / && cd / && uv run bridgic-amphibious create --task ""`. The `cd` is REQUIRED — running the CLI from `` drops `amphi.py` at the wrong level. After it returns, verify `//amphi.py` exists; if it landed at `/amphi.py` instead, move it inside `/` and fix. + - [ ] T2: Create empty `//log/` and `//result/` dirs (NOT at `/log/` or `/result/`). + - [ ] T3: In `/amphi.py`, define the CognitiveContext for this task following build_context.md. + - [ ] T4: In `/amphi.py`, implement on_workflow yielding ActionCalls that mirror the Operation Sequence in exploration_report.md. + - [ ] T5: In `/amphi.py`, implement on_agent think_units for AMPHIFLOW fallback per the methodology. + - [ ] T6: Register task tools (FunctionToolSpec) for any domain-specific operations the workflow needs. Inline in `/amphi.py` (or split into `/tools.py` per the methodology — never at `/`). + - [ ] T7: Implement helper functions for parsing VOLATILE refs from ctx.observation. Same placement rule — inside `/`. + - [ ] T8: Write `/main.py` with LLM init (per skills/bridgic-llms/SKILL.md), tools assembly, and the agent.arun(...) call. + - [ ] T9: Run `cd && uv run python /main.py` once dry to confirm it boots without import or syntax errors. + - [ ] T10: Final layout check — `ls ` should show `pyproject.toml`, `uv.lock`, `.venv/`, `.env`, `.bridgic/`, `.amphiloop/`, `TASK.md`, `/` and NOTHING ELSE. Any `.py` file at `/` is a violation. + ``` + +3. Send a short progress note to the user: "Worker brief and TODO list written to `/.amphiloop/`. Opening coding-agent session next." + +--- + +### Step E. Phase 4 — Generate Code + +This is the first code-writing action of the run (unless Step D opened the session for a probe). The goal: open one worker session, capture ``, and submit a tiny pointer prompt that hands the worker over to AGENT_BRIEF.md + TODOS.md. + +1. **Invoke the `coding-agent` skill.** Tell it: + - `Worker: ` + - `Workdir: ` (so the worker starts in the right place; `cd` into it via the spawn config) + - `Mode: INTERACTIVE` — launch the worker in REPL/interactive mode, **not** a one-shot. Concretely: `claude` must be launched **without** `--print`; `codex` **without** `exec`; `pi` and `opencode` in their REPL form. PTY rules and exact spawn flags are coding-agent's responsibility — do not hand-roll bash here. + - `Background: yes` (coding-agent's hard rule). + - `This is a long-lived orchestrated session.` Tell coding-agent: do **not** require the worker to self-notify the user via `openclaw message send` per task. The orchestrator (this skill) will summarize at Step G. + - `Capture the OpenClaw process sessionId returned by bash background:true and report it back so the orchestrator can remember it as .` + +2. Once you have ``, submit the **kickoff prompt** via `process action:submit sessionId: data:`. The prompt is short and is the SAME shape every time: + + > Working directory is ``. First, read `.amphiloop/AGENT_BRIEF.md` end-to-end and follow it (it tells you which SKILL.md files to read so you know the bridgic API surface, and which context files to read for this task). Then read `.amphiloop/TODOS.md` and work through every open `[ ]` item top-to-bottom, editing TODOS.md to change `[ ]` to `[x]` as you finish each one. When all items are `[x]`, print exactly `### AMPHI-TASK-DONE ###` on its own line and wait for further input. DO NOT exit or terminate. + + Do NOT paste methodology, build_context, or exploration data into the prompt — they are reachable from AGENT_BRIEF.md. + +3. Send a short progress note to the user before submitting ("Phase 4: handing TODOs to the worker — read TODOS.md to follow along"). + +4. Monitor with `process action:log sessionId:` until the sentinel `### AMPHI-TASK-DONE ###` appears. Optionally `read` `/.amphiloop/TODOS.md` periodically to watch `[x]` count rise. + +5. **Do NOT kill the session.** + +6. After the worker completes (sentinel appears and all TODOS are `[x]`), fill `## Outputs → generator_project` in `build_context.md` with the path to `//`. + +7. **Step E → F gate**: summarize the worker's output — list the files now under `//` (`amphi.py`, `main.py`, etc.), confirm `[x]` count on TODOS.md. Ask: "Code generation complete. Proceed to Phase 5 (Verify)? This will run the generated program for the first time under `monitor.sh` — it will execute against the real target environment, may make real API calls, and may surface runtime `HumanCall` prompts you'll need to answer. Reply `yes` to continue, or `pause` to inspect the generated code first." Wait for the explicit affirmative reply. + +--- + +### Step F. Phase 5 — Verify + +1. `read` the file `{baseDir}/../../agents/amphibious-verify.md` to load the Phase 5 methodology. +2. Run `{baseDir}/../../scripts/run/monitor.sh` against the generated project via `bash` (or follow whatever execution recipe the methodology prescribes for this run). Collect the output. + + **If `monitor.sh` exits with code 2** (the running program hit a `HumanCall`), follow the verify methodology's **OpenClaw addendum** — host reads `/.bridgic/verify/human_request.json`, relays the prompt to the user via chat, writes the user's reply into `human_response.json`, and re-invokes `monitor.sh`. **Never** invent a polling loop here; the protocol forbids it. + +3. Decide based on the exit: + - **Pass** — proceed to Step G. + - **Fail, root cause is in the generated code** (logic error, missing import, wrong API call, etc.) — apply a **fix-attempt gate** before re-engaging the worker: + - Send the user: `[CHECKPOINT]` Phase 5 verify failed (attempt N/3). One-line root cause: ``. Proposed fix: ``. Reply `yes` to append FIX-N to TODOS.md and ask the worker to retry; reply `stop` to abort and inspect manually; or reply with edits to the proposed fix wording. + - Wait for the user's explicit reply. + - On `yes` (or an edited fix description): **append** one or more FIX entries to `/.amphiloop/TODOS.md` (use `read` then `write` the full new content; the worker is sentinel-waiting and not touching the file right now). Format each entry as: + ```markdown + - [ ] FIX-N: : + + ``` + Use a stable monotonic N across attempts (FIX-1, FIX-2, ...). + - Submit a one-line continue prompt to the **same** `` via `process action:submit sessionId: data:`: + > New FIX entries appended to `.amphiloop/TODOS.md`. Re-read TODOS.md and resume from the first open `[ ]` item. Same rules as before: tick each item to `[x]` as you finish, then print `### AMPHI-TASK-DONE ###` and wait. DO NOT exit. + - Monitor with `process action:log` until the sentinel reappears. + - Re-run verification (return to Step F.2). + - On `stop` → proceed to Step G with `fail` status and the user-aborted reason. + - **Fail, root cause is NOT code** (missing env var, missing credential, network issue, missing input data): + - Send the user: `[USER ACTION REQUIRED]` Phase 5 failed for a non-code reason: ``. Reply with the missing value (e.g. an env var assignment), or `cancel` to stop the run. + - Apply the user's instructions yourself with `bash` / `write` (do not append a FIX TODO and do not submit to the worker). + - Re-run verification. + +4. Cap fix attempts at 3. After 3 consecutive code-fix attempts that still fail, stop the loop and proceed to Step G with a `fail` status. + +--- + +### Step G. Cleanup and report + +1. Kill the long-lived worker session: `process action:kill sessionId:`. +2. Send a final summary to the user with `openclaw message send` (use the notification route captured in Step B). Include: + - Pass / fail status + - Path to the generated project (`//`) + - Number of coding-agent turns used (1 for the Phase 4 prompt, plus N for fix attempts) + - If `fail`: the last failure summary so the user knows what to investigate + +--- + +## Common constraints + +- **Never write code yourself.** All code-writing — Phase 4 generation, Phase 5 fixes, Phase 3 probe scripts, anything else — must go through `process:submit` to `` and the TODO list. Do not edit `.py` / `.ts` / `.sh` files with the host's `write` or `edit` tools. (`/.amphiloop/AGENT_BRIEF.md` and `TODOS.md` are written by the host — those are protocol files, not code.) +- **All worker direction flows through TODOS.md.** Methodology, API references, and bug reports go into `/.amphiloop/AGENT_BRIEF.md` and `/.amphiloop/TODOS.md`, not into the prompt. The kickoff prompt and continue prompt are deliberately tiny pointers to those files. +- **One worker, one sessionId, for the whole run.** `` is chosen once in Step A; `` is opened once in Step E (or earlier in a Step D probe) and reused throughout. +- **Strictly sequential, no concurrent file writes.** The worker handles one prompt at a time. The host writes to TODOS.md only while the worker is sentinel-waiting; the worker writes to TODOS.md only while it is actively working. This is enforced by the sequential prompt/sentinel cycle. +- **Sentinel discipline.** Every prompt you submit ends with the requirement to print `### AMPHI-TASK-DONE ###` so you have a deterministic completion signal. If after a generous wait the sentinel has not appeared but the expected files exist and the worker output has been quiet, treat that as completion (sentinel missed) and proceed. +- **Verify the worker actually read the brief.** After the kickoff prompt, scan `process:log` for evidence the worker called its file-read tool on the bridgic-* SKILL.md files listed in AGENT_BRIEF.md. If it skipped them (jumped straight to coding), inject one corrective `process:submit`: "You skipped the brief. STOP and read `.amphiloop/AGENT_BRIEF.md` STEP 1 files now before any further code." +- **Do not re-implement coding-agent.** Do not write `claude --print '...'` / `codex exec '...'` style bash here — coding-agent's SKILL.md owns spawn details (PTY, background, flags). This skill only tells coding-agent **what** to launch and **how** to drive it via `process:submit`. +- **Progress visibility.** Send a one-line progress note before each `process:submit` so the user can follow the run. +- **Notification deviation.** Tell coding-agent up front this is a long-lived orchestrated session and the orchestrator will summarize at Step G. Do not have the worker self-notify per task. diff --git a/openclaw-entry.mjs b/openclaw-entry.mjs new file mode 100644 index 0000000..e8bf44b --- /dev/null +++ b/openclaw-entry.mjs @@ -0,0 +1,12 @@ +import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"; + +export default definePluginEntry({ + id: "amphiloop", + register: () => { + // No-op: AmphiLoop's behavior is entirely in the bundled skill at + // extensions/openclaw-skill/amphiloop-build/SKILL.md. This entry exists only + // so OpenClaw classifies AmphiLoop as a native plugin (via package.json's + // openclaw.extensions) instead of falling through to Claude Code bundle + // detection from .claude-plugin/plugin.json. + }, +}); diff --git a/openclaw.plugin.json b/openclaw.plugin.json new file mode 100644 index 0000000..3b79ad8 --- /dev/null +++ b/openclaw.plugin.json @@ -0,0 +1,15 @@ +{ + "id": "amphiloop", + "name": "AmphiLoop", + "description": "Drive AmphiLoop's 5-phase pipeline inside OpenClaw via /amphiloop_build; delegates every code-writing step to the built-in coding-agent skill via a TODO-protocol working directory.", + "version": "1.0.0", + "activation": { + "onStartup": false + }, + "skills": ["./extensions/openclaw-skill"], + "configSchema": { + "type": "object", + "additionalProperties": false, + "properties": {} + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..1a34dfe --- /dev/null +++ b/package.json @@ -0,0 +1,10 @@ +{ + "name": "amphiloop-openclaw-plugin", + "version": "1.0.0", + "private": true, + "type": "module", + "description": "OpenClaw plugin manifest for AmphiLoop. Required so OpenClaw classifies AmphiLoop as a native plugin instead of falling back to Claude Code bundle detection (.claude-plugin/plugin.json). All actual logic lives in the bundled skill at extensions/openclaw-skill/amphiloop-build/.", + "openclaw": { + "extensions": ["./openclaw-entry.mjs"] + } +}