From 3a21a281f939ca5bfc387ebe2d8e7b848a9e101d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?batuhan=20i=C3=A7=C3=B6z?= Date: Sun, 22 Mar 2026 19:30:04 +0100 Subject: [PATCH 1/8] wip --- README.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 3690013c..e6355dd5 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,23 @@ -# AgentRemote + + + + + + +
+
+ + + +
+
+

AgentRemote

+
+ +
+ ⚠️ This project is heavily in development and not ready for use.
+ Try at your own risk +
AgentRemote brings all your agents into one app. @@ -8,6 +27,8 @@ Connect agent runtimes to Beeper with full history, live streaming, tool approva Run the bridge next to your agent, then talk to it from Beeper on your phone or desktop. + + ## Why Use It - Keep agents on your own machine, server, or private network From d80e9f8281f786d5c62efa29fc48e9e29f98a069 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?batuhan=20i=C3=A7=C3=B6z?= Date: Sun, 22 Mar 2026 19:35:25 +0100 Subject: [PATCH 2/8] wip --- README.md | 283 ++----------- bridges/codex/README.md | 55 +-- bridges/openclaw/README.md | 44 +- bridges/opencode/README.md | 47 +-- docs/bridge-orchestrator.md | 75 ++-- docs/matrix-ai-matrix-spec-v1.md | 507 +++-------------------- docs/msc/com.beeper.mscXXXX-commands.md | 102 +---- docs/msc/com.beeper.mscXXXX-ephemeral.md | 140 +------ docs/msc/com.beeper.mscXXXX-streaming.md | 266 ++---------- 9 files changed, 243 insertions(+), 1276 deletions(-) diff --git a/README.md b/README.md index e6355dd5..f06b437b 100644 --- a/README.md +++ b/README.md @@ -1,277 +1,50 @@ - - - - - - -
-
- - - -
-
-

AgentRemote

-
+# AgentRemote -
- ⚠️ This project is heavily in development and not ready for use.
- Try at your own risk -
+AgentRemote connects Beeper to self-hosted agent runtimes. -AgentRemote brings all your agents into one app. +It gives Matrix/Beeper chats a bridge layer for full history, live streaming, approvals, and remote access, while the actual runtime stays on your machine or network. -Beeper becomes the universal remote for agents. +This repository is still experimental. -Connect agent runtimes to Beeper with full history, live streaming, tool approvals, and encrypted delivery. +## Included bridges -Run the bridge next to your agent, then talk to it from Beeper on your phone or desktop. - - - -## Why Use It - -- Keep agents on your own machine, server, or private network -- Use Beeper instead of building a separate web UI -- Stream responses and approve tool calls in the same chat -- Reach your agents from anywhere Beeper runs - -## Open Source Focus - -This repository is centered on the self-hosted path. - -That means: - -- local developer machines -- homelabs -- office servers -- runtimes behind a firewall -- private deployments that still want a polished remote interface - -There is a broader product direction around richer AI chats and more opinionated agent experiences. Open source here is focused on making the bridge layer for private deployments easy to run and hard to break. - -## AgentRemote SDK - -If you want to build your own bridge, start with the SDK in [`sdk/`](./sdk). - -The SDK handles the Matrix and Beeper side of the bridge for you: - -- bridge bootstrapping and registration -- room and conversation wrappers -- streaming turn lifecycle -- tool approval UI -- agent identity and capability metadata - -The main entrypoint is `sdk.New(sdk.Config{...})`. - -In practice, most custom bridges only need three things: - -- an `sdk.Agent` that represents the remote assistant in Beeper -- an `OnConnect` hook that builds whatever runtime client you need -- an `OnMessage` hook that turns an incoming Beeper message into model output - -### Minimal SDK Shape - -This is the smallest useful shape of a bridge: - -```go -bridge := sdk.New(sdk.Config{ - Name: "my-bridge", - Agent: &sdk.Agent{ - ID: "my-agent", - Name: "My Agent", - Description: "A custom agent exposed through Beeper", - ModelKey: "openai/gpt-5-mini", - Capabilities: sdk.BaseAgentCapabilities(), - }, - OnConnect: func(ctx context.Context, login *sdk.LoginInfo) (any, error) { - return newRuntimeClient(), nil - }, - OnMessage: func(session any, conv *sdk.Conversation, msg *sdk.Message, turn *sdk.Turn) error { - turn.WriteText("hello from my bridge") - turn.End("stop") - return nil - }, -}) - -bridge.Run() -``` - -`turn` is the important piece here. You can write text and reasoning deltas into it, request approvals, attach sources/files, and then finalize the message with `turn.End(...)` or `turn.EndWithError(...)`. - -### Simple OpenAI SDK Bridge - -The example below is intentionally minimal. It uses the Go OpenAI SDK directly and lets AgentRemote handle the chat room, sender identity, and message lifecycle. - -```go -package main - -import ( - "context" - "fmt" - "log" - "os" - - "github.com/beeper/agentremote/sdk" - "github.com/openai/openai-go/v3" - "github.com/openai/openai-go/v3/option" -) - -func main() { - if os.Getenv("OPENAI_API_KEY") == "" { - log.Fatal("OPENAI_API_KEY is required") - } - - bridge := sdk.New(sdk.Config{ - Name: "openai-simple", - Description: "A minimal OpenAI-backed AgentRemote bridge", - Agent: &sdk.Agent{ - ID: "openai-simple-agent", - Name: "OpenAI Simple", - Description: "Minimal bridge example using openai-go", - ModelKey: "openai/gpt-4o-mini", - Capabilities: sdk.BaseAgentCapabilities(), - }, - OnConnect: func(ctx context.Context, login *sdk.LoginInfo) (any, error) { - return openai.NewClient(option.WithAPIKey(os.Getenv("OPENAI_API_KEY"))), nil - }, - OnMessage: func(session any, conv *sdk.Conversation, msg *sdk.Message, turn *sdk.Turn) error { - client := session.(*openai.Client) - - resp, err := client.Chat.Completions.New(turn.Context(), openai.ChatCompletionNewParams{ - Model: "gpt-4o-mini", - Messages: []openai.ChatCompletionMessageParamUnion{ - openai.SystemMessage("You are a helpful assistant replying through Beeper."), - openai.UserMessage(msg.Text), - }, - }) - if err != nil { - turn.EndWithError(err.Error()) - return err - } - if len(resp.Choices) == 0 { - err := fmt.Errorf("openai returned no choices") - turn.EndWithError(err.Error()) - return err - } - - turn.WriteText(resp.Choices[0].Message.Content) - turn.End(resp.Choices[0].FinishReason) - return nil - }, - }) - - bridge.Run() -} -``` - -Useful details from that example: - -- `OnConnect` returns the session object that will be passed back into every `OnMessage` call. -- `sdk.Message` already gives you the normalized incoming Beeper message text. -- `sdk.Turn` is where you stream or finalize the assistant reply. -- If you want live token streaming later, switch the OpenAI call to `client.Chat.Completions.NewStreaming(...)` or `client.Responses.NewStreaming(...)` and forward deltas with `turn.WriteText(...)`. - -## Included Bridges - -Each bridge has its own README with setup details and scope: - -| Bridge | Purpose | +| Bridge | What it connects | | --- | --- | -| `ai` | AI Chats bridge surface used by the project | -| [`codex`](./bridges/codex/README.md) | Connect the Codex CLI app-server to Beeper | -| [`openclaw`](./bridges/openclaw/README.md) | Connect a self-hosted OpenClaw gateway to Beeper | -| [`opencode`](./bridges/opencode/README.md) | Connect a self-hosted OpenCode server to Beeper | - -## Quick Start +| `ai` | The built-in Beeper AI chat surface in this repo | +| [`codex`](./bridges/codex/README.md) | A local `codex app-server` runtime | +| [`opencode`](./bridges/opencode/README.md) | A remote OpenCode server or a bridge-managed local OpenCode process | +| [`openclaw`](./bridges/openclaw/README.md) | A self-hosted OpenClaw gateway | -Log into Beeper and start a bridge: +## Quick start ```bash ./tools/bridges login --env prod +./tools/bridges list ./tools/bridges run codex ``` -Then open Beeper and use the connected bridge from chat. - -For a local Beeper environment: - -```bash -./tools/bridges login --env local -./tools/bridges whoami -./tools/bridges run codex -``` - -Configured instances live under `~/.config/agentremote/profiles//instances/`: +Useful commands: -- `ai` -- `codex` -- `openclaw` -- `opencode` +- `./tools/bridges up ` starts a bridge in the background +- `./tools/bridges status` shows local and remote bridge state +- `./tools/bridges logs --follow` tails logs +- `./tools/bridges stop ` stops a running instance -Run any of them directly: +Instance state lives under `~/.config/agentremote/profiles//instances/`. -```bash -./tools/bridges run ai -./tools/bridges run codex -./tools/bridges run openclaw -./tools/bridges run opencode -``` +## SDK -Or use the wrapper: +Custom bridges in this repo are built on [`sdk/`](./sdk), using: -```bash -./run.sh ai -./run.sh codex -./run.sh openclaw -./run.sh opencode -``` - -## Bridge Manager +- `bridgesdk.NewStandardConnectorConfig(...)` +- `bridgesdk.NewConnectorBase(...)` +- `sdk.Config`, `sdk.Agent`, `sdk.Conversation`, and `sdk.Turn` -Common commands: - -```bash -./tools/bridges list -./tools/bridges status -./tools/bridges logs codex --follow -./tools/bridges restart codex -./tools/bridges down codex -./tools/bridges whoami -``` - -Reset all local bridge state and registrations: - -```bash -./tools/bridges delete ai -./tools/bridges delete codex -./tools/bridges delete openclaw -./tools/bridges delete opencode -./tools/bridges logout -``` +See [`bridges/dummybridge`](./bridges/dummybridge) for a minimal bridge example. ## Docs -- [`docs/bridge-orchestrator.md`](./docs/bridge-orchestrator.md): local bridge management workflow -- [`docs/matrix-ai-matrix-spec-v1.md`](./docs/matrix-ai-matrix-spec-v1.md): Matrix transport profile for streaming, approvals, state, and AI payloads -- [`bridges/codex/README.md`](./bridges/codex/README.md): Codex bridge details -- [`bridges/openclaw/README.md`](./bridges/openclaw/README.md): OpenClaw bridge details -- [`bridges/opencode/README.md`](./bridges/opencode/README.md): OpenCode bridge details - -## Status - -Experimental and evolving quickly. The transport and bridge surfaces are real, but the project is still early. - -## Build - -Requires `libolm` for encryption support. - -```bash -./build.sh -``` - -Or with Docker: - -```bash -docker build -t agentremote . -``` +- CLI reference: [`docs/bridge-orchestrator.md`](./docs/bridge-orchestrator.md) +- Matrix transport surface: [`docs/matrix-ai-matrix-spec-v1.md`](./docs/matrix-ai-matrix-spec-v1.md) +- Streaming note: [`docs/msc/com.beeper.mscXXXX-streaming.md`](./docs/msc/com.beeper.mscXXXX-streaming.md) +- Command profile: [`docs/msc/com.beeper.mscXXXX-commands.md`](./docs/msc/com.beeper.mscXXXX-commands.md) diff --git a/bridges/codex/README.md b/bridges/codex/README.md index 50bbd97f..df8cf6f3 100644 --- a/bridges/codex/README.md +++ b/bridges/codex/README.md @@ -1,38 +1,28 @@ -# Codex Companion +# Codex Bridge -The Codex Companion bridge connects a local Codex CLI runtime to Beeper through AgentRemote. +The Codex bridge connects Beeper to a local Codex CLI runtime. -This is the bridge for people who want to run Codex on a workstation, laptop, or remote machine and use Beeper as the chat client. It exposes Codex conversations in Beeper with streaming responses, history, and tool approval flows, while keeping the actual runtime close to the code and credentials it needs. +It fits setups where Codex stays on the machine that already has the checkout, credentials, and tools. -## What It Does +## What it does -- Starts or connects to a local `codex app-server` process -- Bridges Codex threads into Beeper rooms -- Streams assistant output into chat as it is generated -- Preserves conversation history -- Surfaces tool calls and approval requests in Beeper +- starts or connects to `codex app-server` +- maps Codex conversations into Beeper rooms +- streams replies into chat +- carries approvals and tool activity through the same room -## Login Model +## Login modes -The bridge supports Codex-backed logins through: +The bridge supports: -- ChatGPT-based auth -- OpenAI API key auth -- Externally managed ChatGPT tokens +- ChatGPT login +- OpenAI API key login +- externally managed ChatGPT tokens +- host-auth auto-detection when Codex is already logged in on the machine -If Codex is already authenticated on the host, the bridge can auto-provision a login from the existing local Codex state. +Managed logins use an isolated `CODEX_HOME` per login. Host-auth uses the machine's existing Codex auth state. -## Best Fit - -Use this bridge when: - -- Your agent already runs through the Codex CLI -- You want a phone-friendly interface for coding agents -- You want to keep execution on your own machine or behind your own network boundary - -## Run It - -From the repo root: +## Run ```bash ./tools/bridges run codex @@ -43,16 +33,3 @@ Or: ```bash ./run.sh codex ``` - -For local Beeper environments: - -```bash -./tools/bridges login --env local -./tools/bridges run codex -``` - -## Notes - -- The bridge uses a dedicated Codex surface rather than the generic AI connector. -- Auth tokens are managed by Codex itself when using the local Codex home flow. -- This bridge is part of the self-hosted AgentRemote story: Beeper is the remote control, Codex stays where the work happens. diff --git a/bridges/openclaw/README.md b/bridges/openclaw/README.md index c38f18c2..b1cff907 100644 --- a/bridges/openclaw/README.md +++ b/bridges/openclaw/README.md @@ -1,39 +1,24 @@ -# OpenClaw Gateway +# OpenClaw Bridge -The OpenClaw Gateway bridge connects a self-hosted OpenClaw gateway to Beeper through AgentRemote. +The OpenClaw bridge connects Beeper to a self-hosted OpenClaw gateway. -This is the most direct way to expose OpenClaw sessions in Beeper while keeping the agent runtime on infrastructure you control. Run the gateway on a local machine, server, or private network, then use Beeper from mobile or desktop to talk to those agents remotely. +## What it does -## What It Does +- connects to a gateway over `ws`, `wss`, `http`, or `https` +- syncs OpenClaw sessions into Beeper rooms +- streams replies, approvals, and session updates into chat -- Connects to an OpenClaw gateway over `ws`, `wss`, `http`, or `https` -- Syncs OpenClaw sessions into Beeper rooms -- Streams responses and updates live -- Carries tool calls, approvals, and agent state into chat -- Preserves per-session metadata, usage, and history context - -## Login Model +## Login flow The bridge asks for: -- Gateway URL -- Optional gateway token -- Optional gateway password -- Optional label for distinguishing multiple gateways - -That makes it a good fit for private deployments where the gateway is reachable only on a LAN, VPN, Tailscale network, or internal hostname. - -## Best Fit - -Use this bridge when: +- gateway URL +- auth mode: none, token, or password +- optional label -- You already run OpenClaw and want Beeper as the client -- Your agents live behind a firewall and should stay there -- You want streaming and approvals without building a separate mobile UI +If the gateway requires device pairing, the login waits for approval and surfaces the request ID. -## Run It - -From the repo root: +## Run ```bash ./tools/bridges run openclaw @@ -44,8 +29,3 @@ Or: ```bash ./run.sh openclaw ``` - -## Notes - -- The bridge is intentionally focused on OpenClaw as a remote runtime, not a hosted SaaS workflow. -- It is a core example of the AgentRemote model: keep the gateway private, use Beeper as the interface. diff --git a/bridges/opencode/README.md b/bridges/opencode/README.md index 01878609..d21867ab 100644 --- a/bridges/opencode/README.md +++ b/bridges/opencode/README.md @@ -1,38 +1,32 @@ -# OpenCode Companion +# OpenCode Bridge -The OpenCode Companion bridge connects a self-hosted OpenCode server to Beeper through AgentRemote. +The OpenCode bridge connects Beeper to OpenCode. -It is built for setups where OpenCode is already running on a machine you trust and you want Beeper to become the front end. That can be a local development machine, a lab box, or an office server that you reach from your phone. +It supports two modes: -## What It Does +- remote: connect to an existing OpenCode server over HTTP +- managed: let the bridge launch `opencode` locally and keep a default working directory -- Connects to an OpenCode server over HTTP -- Subscribes to the OpenCode event stream for live updates -- Maps OpenCode sessions into Beeper rooms -- Streams responses, titles, and session events into chat -- Keeps the bridge usable even when the remote instance temporarily disconnects +## What it does -## Login Model +- maps OpenCode sessions into Beeper rooms +- streams replies and session updates into chat +- keeps reconnect logic inside the bridge instead of requiring a separate UI -The bridge asks for: +## Login flow -- Server URL -- Optional username -- Optional password for HTTP basic auth +Remote mode asks for: -Multiple OpenCode instances can be tracked per login, which is useful if you talk to different machines or environments. +- server URL +- optional basic-auth username +- optional basic-auth password -## Best Fit +Managed mode asks for: -Use this bridge when: +- path to the `opencode` binary +- default working directory -- You run OpenCode yourself and want Beeper access from anywhere -- You want a simple remote interface for agent sessions without exposing a separate UI -- You want to keep the runtime and credentials on the host machine - -## Run It - -From the repo root: +## Run ```bash ./tools/bridges run opencode @@ -43,8 +37,3 @@ Or: ```bash ./run.sh opencode ``` - -## Notes - -- OpenCode uses an HTTP API plus event streaming rather than the local Codex app-server flow. -- In AgentRemote terms, this is the bridge for turning a private OpenCode deployment into a Beeper-accessible agent endpoint. diff --git a/docs/bridge-orchestrator.md b/docs/bridge-orchestrator.md index 20717c11..aea2a1ae 100644 --- a/docs/bridge-orchestrator.md +++ b/docs/bridge-orchestrator.md @@ -1,53 +1,60 @@ -# Bridge Orchestrator +# AgentRemote CLI -`tools/bridges` is a thin wrapper around `agentremote`, which manages isolated bridgev2 instances for Beeper from this repo. +`./tools/bridges` is the local entrypoint for `agentremote`. -## Auth - -Use one of: - -- `./tools/bridges login --env prod` for the email and code flow -- `./tools/bridges auth set-token --token syt_... --env prod` -- Environment variables: `BEEPER_ACCESS_TOKEN`, optional `BEEPER_ENV`, `BEEPER_USERNAME` - -## One-command startup +It wraps: ```bash -./tools/bridges up ai +go run ./cmd/agentremote ... ``` -This will: +## Authentication + +Use one of: -1. Create instance state under `~/.config/agentremote/profiles/default/instances//` -2. Generate config from the bridge binary with `-e` if needed -3. Ensure Beeper appservice registration and sync config tokens -4. Start the bridge process and write PID and log files +- `./tools/bridges login --env prod` +- `./tools/bridges auth set-token --token syt_... --env prod` +- `./tools/bridges whoami` -## Core commands +Profiles default to `default`. + +## Bridge lifecycle - `./tools/bridges list` -- `./tools/bridges login` -- `./tools/bridges logout` -- `./tools/bridges whoami [--output json]` -- `./tools/bridges profiles` +- `./tools/bridges run ` - `./tools/bridges up ` - `./tools/bridges start ` -- `./tools/bridges run ` -- `./tools/bridges init ` -- `./tools/bridges register ` -- `./tools/bridges status [instance]` -- `./tools/bridges instances` -- `./tools/bridges logs [--follow]` -- `./tools/bridges down ` - `./tools/bridges stop ` -- `./tools/bridges stop-all` +- `./tools/bridges down ` - `./tools/bridges restart ` - `./tools/bridges delete [instance]` + +`up` is an alias of `start`. `down` is an alias of `stop`. + +## Inspection + +- `./tools/bridges status [instance...]` +- `./tools/bridges instances` +- `./tools/bridges logs --follow` - `./tools/bridges doctor` + +## Setup helpers + +- `./tools/bridges init ` +- `./tools/bridges register ` - `./tools/bridges completion ` -Shortcut wrapper: +## Quick examples -- `./run.sh ai|codex|opencode|openclaw` - - checks login and prompts with `login` if needed - - then runs the selected bridge instance +```bash +./tools/bridges login --env prod +./tools/bridges up codex --wait +./tools/bridges status codex +./tools/bridges logs codex --follow +``` + +Local instance data is stored under: + +```text +~/.config/agentremote/profiles//instances// +``` diff --git a/docs/matrix-ai-matrix-spec-v1.md b/docs/matrix-ai-matrix-spec-v1.md index 4cd3cd21..389efc1b 100644 --- a/docs/matrix-ai-matrix-spec-v1.md +++ b/docs/matrix-ai-matrix-spec-v1.md @@ -1,491 +1,106 @@ -# Real-time AI with Matrix? +# Matrix AI Transport v1 -## Matrix AI Transport Spec v1 +Status: experimental and unstable. -> [!WARNING] -> Status: *Draft* (unreleased), proposed v1. -> This is a highly experimental profile. -> It relies on homeserver/client support for custom event types and rendering/consumption. -> Streaming transport is message-anchored: a placeholder `m.room.message` advertises `com.beeper.stream`, live deltas flow over `to_device`, and completion is signaled by a final timeline edit. -> This repo contains one experimental implementation, but the transport profile is not bridge-specific: any Matrix bot/client/bridge can emit and consume these events. +## What the code emits -## Contents -- [Scope](#scope) -- [Compatibility](#compatibility) -- [Terminology](#terminology) -- [Inventory](#inventory) -- [Canonical Assistant Message](#canonical) -- [Streaming](#streaming) -- [Timeline Projections](#projections) -- [State Events](#state) -- [Tool Approvals](#approvals) -- [Other Matrix Keys](#other-keys) -- [Implementation Notes](#impl-notes) -- [Forward Compatibility](#forward-compat) +### 1. Canonical assistant messages - -## Scope -This document specifies a Matrix transport profile for real-time AI: -- Canonical assistant content in `m.room.message` (`com.beeper.ai` as AI SDK-compatible `UIMessage`). -- Streaming deltas via message-anchored transport: - - placeholder `m.room.message` carrying `com.beeper.stream` - - `to_device` subscription and update events (`com.beeper.stream.subscribe`, `com.beeper.stream.update`) - - final `m.replace` timeline edit with canonical content -- `com.beeper.ai.*` timeline projection events (tool call/result, compaction status, etc). -- standard Matrix room features for capability advertising. -- Tool approvals (MCP approvals + selected builtin tools). -- Auxiliary `com.beeper.ai*` keys used for routing/metadata. +Assistant turns are stored as normal `m.room.message` events with: -This spec is intended to be usable by any Matrix bot/client/bridge. Where this document references "the bridge", it refers to the producing implementation (for this repo, `AI Chats`). +- standard Matrix fallback fields such as `msgtype` and `body` +- `com.beeper.ai`, which carries an AI SDK-style `UIMessage` -Upstream reference (AI SDK): -- Normative message model target: Vercel AI SDK `ai@6.0.121`. -- Core types: - - `packages/ai/src/ui/ui-messages.ts` - - `packages/ai/src/ui-message-stream/ui-message-chunks.ts` - - `packages/ai/src/ui-message-stream/json-to-sse-transform-stream.ts` +Current shape: -Reference implementation in this repo (AI Chats): -- Event type identifiers: `pkg/matrixevents/matrixevents.go` -- Event payload structs (where defined): `bridges/ai/events.go` -- Streaming envelope and emission: `pkg/matrixevents/matrixevents.go`, `turns/session.go`, `sdk/turn.go` -- Tool call/result projections: `bridges/ai/tool_execution.go` -- Compaction status emission: `bridges/ai/response_retry.go` -- State broadcast: `bridges/ai/chat.go` -- Approvals: `bridges/ai/tool_approvals*.go`, `bridges/ai/handlematrix.go`, `bridges/ai/handler_interfaces.go`, `bridges/ai/streaming_ui_tools.go` -- Shared approval manager and reaction handling: `approval_manager.go`, `approval_decision.go` - - -## Compatibility -- Homeserver support for custom event types is required. -- Clients that want live streaming must implement `com.beeper.stream` descriptor handling plus `to_device` subscribe/update flows. -- Non-supporting clients still interoperate through placeholder fallback text and the final timeline edit. -- Non-supporting clients should fall back to `m.room.message.body` where available. - - -## Terminology -- `turn_id`: Unique ID for a single assistant response "turn". -- `seq`: Per-turn monotonic sequence number for streamed deltas. -- `call_id` / `toolCallId`: Tool invocation identifier. -- `timeline`: persisted Matrix events. -- `stream descriptor`: `com.beeper.stream` object attached to the placeholder timeline event. -- `subscription`: A short-lived `to_device` request to receive live updates for one placeholder event. -- `m.reference`: relation used to link events to a target event ID. -- `m.replace`: relation used to edit/replace an earlier timeline message. - - -## Inventory -Authoritative identifiers are defined in `pkg/matrixevents/matrixevents.go`. - -### Event Types -| Event type | Class | Persistence | Primary purpose | Spec section | -| --- | --- | --- | --- | --- | -| `m.room.message` | message | timeline | Canonical assistant message carrier; placeholder also carries `com.beeper.stream` | [Canonical](#canonical) | -| `com.beeper.stream.subscribe` | to-device | transient | Subscribe one device to a placeholder-backed live stream | [Streaming](#streaming) | -| `com.beeper.stream.update` | to-device | transient | Deliver buffered or incremental stream deltas | [Streaming](#streaming) | -| `com.beeper.ai.compaction_status` | message | timeline | Context compaction lifecycle/status | [Projections](#projection-compaction) | -| `com.beeper.ai.agents` | state | state | Agent definitions for the room | — | - -### Content Keys (Inside Standard Events) -| Key | Where it appears | Purpose | Spec section | -| --- | --- | --- | --- | -| `com.beeper.ai` | `m.room.message` | Canonical assistant `UIMessage` | [Canonical](#canonical) | -| `com.beeper.stream` | `m.room.message` | Active live-stream descriptor for a placeholder message | [Streaming](#streaming) | -| `com.beeper.ai.model_id` | `m.room.message` | Routing/display hint | [Other keys](#other-keys-routing) | -| `com.beeper.ai.agent` | `m.room.message`, `m.room.member` | Routing hint or agent definition | [Other keys](#other-keys-agent) | -| `com.beeper.ai.image_generation` | `m.room.message` (image) | Generated-image tag/metadata | [Other keys](#other-keys-media) | -| `com.beeper.ai.tts` | `m.room.message` (audio) | Generated-audio tag/metadata | [Other keys](#other-keys-media) | - - -## Canonical Assistant Message -Canonical assistant content is carried in a standard `m.room.message` event. - -Requirements: -- MUST include standard Matrix fallback fields (`msgtype`, `body`) for non-AI clients. -- MUST include `com.beeper.ai` and it MUST be an AI SDK-compatible `UIMessage`. - -### UIMessage Shape -`com.beeper.ai`: -- `id: string` -- `role: "assistant"` -- `metadata?: object` -- `parts: UIMessagePart[]` - -Recommended `metadata` keys: -- `turn_id`, `agent_id`, `model`, `finish_reason` -- `usage` (`prompt_tokens`, `completion_tokens`, `reasoning_tokens`, `total_tokens?`) -- `timing` (`started_at`, `first_token_at`, `completed_at`, unix ms) - -Example: ```json { "msgtype": "m.text", - "body": "Thinking...", + "body": "...", "com.beeper.ai": { "id": "turn_123", "role": "assistant", - "metadata": { "turn_id": "turn_123" }, + "metadata": { + "turn_id": "turn_123" + }, "parts": [] } } ``` -### Assistant Turn Encoding -Send assistant turns as standard `m.room.message` events: -- `msgtype` and `body` for Matrix fallback. -- Full AI payload in `com.beeper.ai` as `UIMessage`. -- Turn-level metadata in `com.beeper.ai.metadata` (for example: `turn_id`, `agent_id`, `model`, `finish_reason`, `usage`, `timing`). +The final edit keeps `com.beeper.ai` as the canonical payload. Streaming-only UI parts are compacted before final persistence. - -## Streaming -Streaming uses a placeholder timeline event plus `to_device` subscribe/update traffic. +### 2. Message-anchored live streaming -### Placeholder Descriptor -The sender starts the turn by sending a placeholder `m.room.message`. While the turn is live, that message carries `com.beeper.stream`: +When live streaming is available, the placeholder message also carries `com.beeper.stream`. -```json -{ - "msgtype": "m.text", - "body": "Thinking...", - "com.beeper.ai": { - "id": "turn_123", - "role": "assistant", - "metadata": { "turn_id": "turn_123" }, - "parts": [] - }, - "com.beeper.stream": { - "user_id": "@aibot:beeper.local", - "device_id": "ABCD1234", - "type": "com.beeper.llm", - "expiry_ms": 1800000 - } -} -``` - -Descriptor fields: -- `user_id: string` (REQUIRED) -- `device_id: string` (REQUIRED) -- `type: string` (REQUIRED, currently `com.beeper.llm`) -- `expiry_ms?: integer` (milliseconds; clients SHOULD stop subscribing after this age) -- `encryption?: object` (OPTIONAL custom symmetric encryption descriptor; see MSC doc) - -If the most recent assistant placeholder in a room still contains `com.beeper.stream`, clients MAY render a preview such as "Generating response...". +The bridge code does not hardcode the transport backend. It asks a `BeeperStreamPublisher` for a descriptor, registers the placeholder event, and emits live deltas against that target. -### Subscription -Clients subscribe with `to_device` event type `com.beeper.stream.subscribe`: +Live delta payloads use the stable `com.beeper.llm` envelope: ```json { - "type": "com.beeper.stream.subscribe", - "content": { - "room_id": "!meow", - "event_id": "$foobar", - "device_id": "4321EFGH", - "expiry_ms": 300000 + "turn_id": "turn_123", + "seq": 7, + "part": { + "type": "text-delta", + "id": "text-turn_123", + "delta": "hello" + }, + "m.relates_to": { + "rel_type": "m.reference", + "event_id": "$placeholder" } } ``` -Content: -- `room_id: string` (REQUIRED) -- `event_id: string` (REQUIRED; placeholder event ID) -- `device_id: string` (REQUIRED; subscriber device) -- `expiry_ms?: integer` (OPTIONAL requested subscription lifetime in milliseconds) +Envelope fields: -### Update Delivery -The sender replies with `to_device` event type `com.beeper.stream.update`. +- `turn_id` +- `seq` +- `part` +- `m.relates_to` +- optional `agent_id` -Content: -- `room_id: string` (REQUIRED) -- `event_id: string` (REQUIRED) -- `com.beeper.llm.deltas: object[]` (REQUIRED for `type = "com.beeper.llm"`) +`part` follows the AI SDK `UIMessageChunk` model. -The sender MUST first send buffered state accumulated so far to the new subscriber, then MAY continue with incremental updates while the subscription is active. +### 3. Finalization -### `com.beeper.llm` Delta Envelope -Each entry in `com.beeper.llm.deltas` is: -- `turn_id: string` (REQUIRED) -- `seq: integer` (REQUIRED, starts at 1, strictly increasing per `turn_id`) -- `part: UIMessageChunk` (REQUIRED) -- `m.relates_to: { rel_type: "m.reference", event_id: string }` (REQUIRED) -- `agent_id?: string` (OPTIONAL) +When a turn completes, the placeholder is edited with the final assistant content. The final event is authoritative. The stream descriptor is no longer present after finalization. -### SSE Mapping -AI SDK UI streams emit SSE frames: -- `data: ` -- terminal sentinel `data: [DONE]` +### 4. Compaction status events -Mapping: -1. For each SSE JSON chunk, append one entry to `com.beeper.llm.deltas` with `part = `. -2. `data: [DONE]` is transport-level termination and does not require a Matrix event. +The AI bridge may emit `com.beeper.ai.compaction_status` timeline events while retrying after context compaction. -Implications: -- Producers MUST NOT remap chunk payload schemas. -- Consumers MUST process each delta `part` as AI SDK `UIMessageChunk`. +Current fields are: -### Chunk Compatibility -Producers MAY emit any valid AI SDK `UIMessageChunk` type: -- `start` -- `start-step` -- `finish-step` -- `message-metadata` -- `text-start` -- `text-delta` -- `text-end` -- `reasoning-start` -- `reasoning-delta` -- `reasoning-end` -- `tool-input-start` -- `tool-input-delta` -- `tool-input-available` -- `tool-input-error` -- `tool-approval-request` -- `tool-approval-response` -- `tool-output-available` -- `tool-output-error` -- `tool-output-denied` -- `source-url` -- `source-document` -- `file` -- `data-*` -- `finish` -- `abort` +- `type` +- `session_id` +- `messages_before` +- `messages_after` +- `tokens_before` +- `tokens_after` +- `summary` +- `will_retry` - `error` -Consumer requirements: -- MUST accept and safely handle all valid AI SDK chunk types. -- MUST ignore unknown future chunk types. -- MUST NOT persist `data-*` chunks with `transient: true`. -- MUST treat `start`, `finish`, `abort`, and `message-metadata` as stream-only events, not persisted parts. -- MUST merge payload data from stream-only terminal and metadata chunks into the final canonical `UIMessage.metadata` during finalization or replay assembly. This includes fields such as `finish_reason`, `usage`, and `timing`. -- MUST persist `start-step` as a `step-start` part in the canonical `UIMessage`. - -### Bridge-specific `data-*` chunks -This bridge emits some `data-*` chunks in `part` for UI coordination. Clients that do not recognize them SHOULD ignore them. - -| Chunk type | Transient | Payload | -| --- | --- | --- | -| `data-tool-progress` | yes | `data.call_id`, `data.tool_name`, `data.status`, `data.progress` | -| `data-image_generation_partial` | yes | `data.item_id`, `data.index`, `data.image_b64` | -| `data-annotation` | yes | `data.annotation`, `data.index` | - -### Ordering and Lifecycle -Per turn: -- `seq` MUST be strictly increasing. -- Duplicate/stale deltas (`seq <= last_applied_seq`) MUST be ignored. -- Out-of-order deltas SHOULD be buffered briefly and applied in `seq` order. -- Producers MUST NOT advertise or publish a live stream until the canonical assistant placeholder has a concrete Matrix event ID. -- Producers MUST buffer the final timeline edit until the placeholder's Matrix event ID is resolved, because `m.replace` requires `m.relates_to.event_id`. -- If neither a bridge-side message ID nor a Matrix event ID exists, producers MUST buffer or fail the turn and MUST NOT start live delivery. +### 5. Command descriptions -Required lifecycle: -1. Send initial placeholder `m.room.message` with seed `com.beeper.ai` and `com.beeper.stream`. -2. Resolve/store the placeholder's Matrix event ID. -3. Accept `com.beeper.stream.subscribe` requests for that placeholder. -4. Send buffered `com.beeper.stream.update` state, then incremental updates (monotonic `seq`) while subscribed. -5. Emit final timeline edit (`m.replace`) containing final fallback text + full final `com.beeper.ai`, and remove `com.beeper.stream`. - -Terminal chunks: -- The stream SHOULD end with one of: `finish`, `abort`, `error`. - -Mermaid (conceptual): -```mermaid -sequenceDiagram - participant C as Client - participant H as Homeserver - participant B as Bridge - - B->>H: m.room.message (placeholder + com.beeper.ai + com.beeper.stream) - H->>C: timeline placeholder - C->>B: to_device com.beeper.stream.subscribe - loop subscribed updates - B->>C: to_device com.beeper.stream.update (com.beeper.llm.deltas) - end - B->>H: m.room.message (m.replace final + com.beeper.ai final) - H->>C: timeline edit -``` - -### Streaming Example -```json -{ - "room_id": "!meow", - "event_id": "$foobar", - "com.beeper.llm.deltas": [ - { - "turn_id": "turn_123", - "seq": 7, - "m.relates_to": { "rel_type": "m.reference", "event_id": "$foobar" }, - "part": { "type": "text-delta", "id": "text-turn_123", "delta": "hello" } - } - ] -} -``` - - -## Additional Timeline Status - - -### `com.beeper.ai.compaction_status` -Status events emitted during context compaction/retry. - -Schema (event content): -- `type: "compaction_start"|"compaction_end"` (required) -- `session_id?: string` -- `messages_before?: number` -- `messages_after?: number` -- `tokens_before?: number` -- `tokens_after?: number` -- `summary?: string` -- `will_retry?: boolean` -- `error?: string` -- `duration_ms?: number` - -Example: -```json -{ - "type": "compaction_end", - "session_id": "main", - "messages_before": 50, - "messages_after": 20, - "tokens_before": 80000, - "tokens_after": 30000, - "summary": "...", - "will_retry": true, - "duration_ms": 742 -} -``` - - -## State Events -This bridge no longer uses custom room state for editable AI configuration. Room target selection is determined by ghost identity and membership, while room-level capability advertising uses standard Matrix room features. - - -## Tool Approvals -Approvals are an owner-only gate for: -- MCP approvals (OpenAI Responses `mcp_approval_request` items). -- Selected builtin tool actions, configured via `network.tool_approvals.require_for_tools`. - -Config (see `config.example.yaml` and `bridges/ai/integrations_config.go`): -- `network.tool_approvals.enabled` (default true) -- `network.tool_approvals.ttl_seconds` (default 600) -- `network.tool_approvals.require_for_mcp` (default true) -- `network.tool_approvals.require_for_tools` (default list in code) - -### Approval Request Emission -When approval is needed, the bridge emits: -1. A live stream delta delivered in `com.beeper.stream.update`, where one `com.beeper.llm.deltas[*].part.type = "tool-approval-request"` and contains: - - `approvalId: string` - - `toolCallId: string` -2. A timeline-visible canonical approval notice. - - The notice is an `m.room.message` with `msgtype = "m.notice"`, SHOULD reply to the originating assistant turn via `m.relates_to.m.in_reply_to`, and includes a complete `com.beeper.ai` `UIMessage` using the canonical shape defined above (`id`, `role`, optional `metadata`, `parts`). - - The notice body MUST list the canonical reaction keys for the available options. - - The bridge MUST send bridge-authored placeholder `m.reaction` events on the notice, one for each allowed option key, using `m.annotation` as the relation type. - - `UIMessage.metadata.approval` SHOULD include: - - `id: string` - - `options: [{ id, key, label, approved, always?, reason? }]` - - `presentation` - - `expiresAt` when known - - The `dynamic-tool` part contains: - - `state = "approval-requested"` - - `toolCallId: string` - - `toolName: string` - - `approval: { id: string }` - -Canonical approval data in persisted `dynamic-tool` parts follows the AI SDK: -- pending approval: `approval: { id: string }` -- responded approval: `approval: { id: string, approved: boolean, reason?: string }` - - -### Approving / Denying -Approvals are resolved through reactions on the canonical approval notice: - -1. **Bridge sends** the canonical approval notice and placeholder reactions for the allowed option keys. -2. **Owner reacts** to that notice using one of the advertised option keys: - -```json -{ - "type": "m.reaction", - "content": { - "m.relates_to": { - "rel_type": "m.annotation", - "event_id": "$approval_notice", - "key": "approval.allow_once" - } - } -} -``` - -Rules: -- The approval notice is the canonical Matrix artifact. Rich clients MAY also observe mirrored `tool-approval-request` and `tool-approval-response` stream parts inside `com.beeper.stream.update`. A `tool-approval-response` chunk carries `approvalId`, `toolCallId`, `approved`, and optional `reason`. -- Clients MUST NOT send legacy timeline approval decision payloads such as `com.beeper.ai.approval_decision`; owner reactions on the approval notice are the only Matrix approval action. -- Only owner reactions with an advertised option key can resolve the approval. -- Non-owner reactions and invalid keys MUST be rejected and SHOULD be redacted. -- On terminal completion, the bridge MUST edit the approval notice into its final state and redact all bridge-authored placeholder reactions. -- The resolving owner reaction MUST remain visible. -- If the approval was resolved outside Matrix, the bridge SHOULD mirror the owner's chosen reaction into Matrix before terminal cleanup so the notice stays in sync. -- Approval notices and their terminal edits remain excluded from provider replay history. - -Always-allow: -- Reacting with the `allow always` option persists an allow rule in login metadata, scoped to the current login/account for the current bridge implementation. -- A stored rule matches on the approval target identity emitted by the bridge for that login: at minimum `toolName`, plus any bridge-emitted qualifier needed to distinguish separate approval surfaces for that login (for example agent/model or room-scoped tool routing). -- Rules are allow-only. If multiple stored rules match, the most specific rule for the current login wins; otherwise any matching allow rule MAY be applied. -- Approval events themselves remain the audit record for the concrete `approvalId`; persisted allow rules are derived from those events and do not change canonical replay history. - -TTL: -- Pending approvals expire after `ttl_seconds`. - - -## Other Matrix Keys - - -### Routing/Display Hints on `m.room.message` -The bridge may set: -- `com.beeper.ai.model_id: string` -- `com.beeper.ai.agent: string` - - -### Agent Definitions in `m.room.member` (Builder room) -Agent definitions can be stored in member state (see `AgentMemberContent` in `bridges/ai/events.go`): -- `com.beeper.ai.agent: AgentDefinitionContent` - -Example: -```json -{ - "membership": "join", - "displayname": "Researcher", - "avatar_url": "mxc://example.org/abc", - "com.beeper.ai.agent": { - "id": "researcher", - "name": "Researcher", - "model": "openai/gpt-5", - "created_at": 1738970000000, - "updated_at": 1738970000000 - } -} -``` +AI rooms broadcast `org.matrix.msc4391.command_description` state events for the user-facing commands implemented by the bridge. See [`docs/msc/com.beeper.mscXXXX-commands.md`](./msc/com.beeper.mscXXXX-commands.md). - -### AI-Generated Media Tags -Generated media messages may include minimal metadata: -- `com.beeper.ai.image_generation: { "turn_id": "..." }` -- `com.beeper.ai.tts: { "turn_id": "..." }` +## Extra keys -### Unstable HTTP Namespace -For the Beeper provider, base URLs may be formed with: -- `/_matrix/client/unstable/com.beeper.ai` +These keys appear as metadata or rendering hints on Matrix events: -Examples: -- `https:///_matrix/client/unstable/com.beeper.ai/openrouter/v1` -- `https:///_matrix/client/unstable/com.beeper.ai/openai/v1` -- `https:///_matrix/client/unstable/com.beeper.ai/exa` +- `com.beeper.ai` +- `com.beeper.stream` +- `com.beeper.ai.model_id` +- `com.beeper.ai.agent` +- `com.beeper.ai.image_generation` +- `com.beeper.ai.tts` - -## Implementation Notes -- Desktop consumes `com.beeper.llm.deltas[*].part` as an AI SDK `UIMessageChunk` and reconstructs a live `UIMessage`. -- Matrix envelope concerns (`turn_id`, `seq`, `m.relates_to`) remain bridge/client responsibilities inside each delta entry. -- Consumers should prefer AI SDK-compatible chunk semantics (metadata merge, tool partial JSON handling, step boundaries). +## Notes - -## Forward Compatibility -- Clients MUST ignore unknown `com.beeper.ai.*` event types and unknown fields. -- Clients MUST ignore unknown future streaming chunk types. +- Custom agents are stored in login metadata, not published as room state events. +- `com.beeper.ai.info` is registered as a known state type, but it is not actively broadcast. +- Room capability state is sent through standard Beeper room-feature state, not a custom AI state event. diff --git a/docs/msc/com.beeper.mscXXXX-commands.md b/docs/msc/com.beeper.mscXXXX-commands.md index a09019d5..74565927 100644 --- a/docs/msc/com.beeper.mscXXXX-commands.md +++ b/docs/msc/com.beeper.mscXXXX-commands.md @@ -1,98 +1,34 @@ -# MSC: AI Chats MSC4391 Command Profile +# MSC: AI Command Profile -## Summary +Status: implemented for the AI bridge in this repo. -This document defines the specific command set that AI Chats advertises via [MSC4391] bot command descriptions. Rather than introducing a custom `com.beeper.*` command system, AI Chats adopts MSC4391 directly — broadcasting `org.matrix.msc4391.command_description` state events so that supporting clients can render slash commands with autocomplete and typed parameters. +## Transport -This is a profile document, not a new MSC. It specifies which commands AI Chats publishes via MSC4391. +Room state: -## Motivation +- `org.matrix.msc4391.command_description` -Text-based bot commands (`!ai status`, `!ai reset`) have several problems: +Structured invocation: -- **Undiscoverable:** Users must read documentation or type `!ai help` to learn available commands. There is no in-client autocomplete or parameter hinting. -- **Fragile parsing:** Free-text command parsing leads to ambiguous inputs and poor error messages. Typed parameters eliminate this class of bugs. -- **No validation:** Without structured schemas, clients cannot validate arguments before sending. Invalid commands waste a round-trip. +- `org.matrix.msc4391.command` inside `m.room.message` -[MSC4391] solves these problems by letting bots advertise commands as room state events. Clients that support MSC4391 render them as slash commands with autocomplete. AI Chats adopts this directly. +When both structured data and plain text are present, the structured command wins. -## Proposal +## Built-in user-facing commands -### State Event +The AI bridge currently publishes these stable user-facing commands: -Type: `org.matrix.msc4391.command_description` +| Command | Meaning | +| --- | --- | +| `new` | Create a new chat of the same type, optionally targeting an agent | +| `status` | Show current session status | +| `reset` | Start a new session or thread in the current room | +| `stop` | Abort the active run and clear the pending queue | -The bot MUST broadcast one state event per command when it joins a room. The `state_key` is the command name. - -```json -{ - "type": "org.matrix.msc4391.command_description", - "state_key": "status", - "content": { - "description": "Show current session status", - "arguments": {} - } -} -``` - -```json -### Structured Invocation - -When a client sends a command, it MUST include the `org.matrix.msc4391.command` field in the message content: - -```json -{ - "type": "m.room.message", - "content": { - "msgtype": "m.text", - "body": "!ai status", - "org.matrix.msc4391.command": { - "command": "status", - "arguments": {} - } - } -} -``` - -The `body` field MUST contain a text fallback for clients without MSC4391 support. When `org.matrix.msc4391.command` is present, the bot MUST use the structured field and ignore the `body` for command parsing. - -### Command List - -Commands broadcast by AI Chats: - -| Command | Description | Arguments | -|---------|-------------|-----------| -| `new` | Create a new chat of the same type | `agent?: string` | -| `status` | Show current session status | — | -| `reset` | Start a new session/thread | — | -| `stop` | Abort current run and clear queue | — | - -Dynamic commands from integrations and modules are also broadcast as state events. +Integration modules may register more commands at runtime. Those are also broadcast through MSC4391 when available. ## Fallback -Clients without MSC4391 support MAY send commands as `!ai ` text messages. The bot MUST parse `!ai` prefixed text as a fallback when the `org.matrix.msc4391.command` field is absent. - -When both are present, the structured `org.matrix.msc4391.command` field takes precedence over the text `body`. - -## Security Considerations - -- **Command authorization:** The bot SHOULD check room power levels before executing commands that modify room or session state. -- **Argument validation:** The bot MUST validate structured arguments against the published schema before execution. Malformed arguments MUST be rejected with an error message. - -## Unstable Prefix - -This profile uses the MSC4391 unstable prefix directly: - -| Unstable | Stable (future) | -|----------|----------------| -| `org.matrix.msc4391.command_description` | `m.command_description` | -| `org.matrix.msc4391.command` | `m.command` | - -No `com.beeper.*` variant is needed — MSC4391 is adopted as-is. - -## Dependencies - -- [MSC4391]: Bot command descriptions — the underlying protocol this profile builds on. +Clients without MSC4391 support can still send plain-text commands using the room command prefix. -[MSC4391]: https://github.com/matrix-org/matrix-spec-proposals/pull/4391 +The default command prefix is `!ai`. diff --git a/docs/msc/com.beeper.mscXXXX-ephemeral.md b/docs/msc/com.beeper.mscXXXX-ephemeral.md index ee1e2fd6..6b3c6f73 100644 --- a/docs/msc/com.beeper.mscXXXX-ephemeral.md +++ b/docs/msc/com.beeper.mscXXXX-ephemeral.md @@ -1,139 +1,9 @@ # MSC: Custom Room Ephemeral Events -## Summary +Current status: -`com.beeper.ephemeral` provides a transport for custom ephemeral events in Matrix rooms. This is an implementation of [MSC2477] with a `com.beeper` unstable prefix, plus transparent E2EE support following the [MSC3673] pattern. +- no bridge here implements `com.beeper.ephemeral` +- live AI output uses the message-anchored streaming model in [`com.beeper.mscXXXX-streaming.md`](./com.beeper.mscXXXX-streaming.md) +- timeline state remains the source of truth -Ephemeral events are short-lived, non-persisted events delivered via `/sync` to joined room members. They are useful for real-time features like room-scoped AI telemetry, live indicators, and collaborative cursors. - -## Motivation - -Matrix currently provides only a limited set of built-in ephemeral events — primarily typing indicators (`m.typing`) and read receipts (`m.receipt`). Applications that need real-time, non-persisted data delivery within a room have no standard mechanism available. - -Use cases that require custom ephemeral events include: - -- **Transient room-scoped AI telemetry:** Implementations that want every joined client to observe non-persisted AI status can use ephemeral events. The AI streaming profile in this repo now prefers message-anchored `to_device` delivery instead of room ephemerals. -- **Collaborative cursors:** Real-time cursor position sharing in shared editing contexts. -- **Custom presence:** Application-specific presence or activity indicators beyond `m.presence`. - -[MSC2477] proposes user-defined ephemeral events but has not yet been merged into the Matrix specification. This proposal implements the same concept with a `com.beeper` unstable prefix to unblock real-time features today. - -## Proposal - -### Differences from MSC2477 - -| Aspect | MSC2477 | com.beeper.ephemeral | -|--------|---------|---------------------| -| Unstable prefix | `org.matrix.msc2477` | `com.beeper.ephemeral` | -| Endpoint | `PUT /_matrix/client/unstable/org.matrix.msc2477/rooms/{roomId}/ephemeral/{type}/{txnId}` | `PUT /_matrix/client/unstable/com.beeper.ephemeral/rooms/{roomId}/ephemeral/{type}/{txnId}` | -| Power levels key | `ephemeral` + `ephemeral_default` (default 50) | Same concept — checked via power levels | -| TTL | Not specified | Servers SHOULD expire events. Recommended TTL: 2 minutes. | -| Timestamp | `origin_server_ts` on event | `?ts=` query param on PUT, stored as `origin_server_ts` | -| Response | `{}` | `{}` (empty body) | -| Built-in type blocking | Rejects `m.*` types | Rejects built-in `m.*` ephemeral types except `m.room.encrypted` | -| Sync delivery | `ephemeral` section of `/sync` rooms | Same — delivered in `rooms.join.{roomId}.ephemeral.events[]` | - -### Client-Server API - -#### Sending - -``` -PUT /_matrix/client/unstable/com.beeper.ephemeral/rooms/{roomId}/ephemeral/{eventType}/{txnId} -``` - -**Request body:** Arbitrary JSON content. - -**Query parameters:** - -| Parameter | Type | Required | Description | -|-----------|------|----------|-------------| -| `ts` | integer | no | Unix millisecond timestamp for `origin_server_ts`. If omitted, the server MUST use the current time. | - -**Authentication:** Standard Matrix access token. The sender MUST be joined to the room. - -**Power levels:** The server MUST check the sender's power level against the room's ephemeral event power level for the given `eventType`. - -**Constraints:** -- Maximum content size: 64KB. Servers MUST reject requests exceeding this limit with `M_TOO_LARGE`. -- Event types: Servers MUST accept `m.room.encrypted` and custom non-`m.*` event types. Servers MUST reject other built-in `m.*` ephemeral event types. -- Deduplication: Servers MUST deduplicate on the composite key `(room_id, sender, event_type, txn_id)`. Duplicate sends MUST be silently accepted and return `200 OK`. - -**Response:** `200 OK` -```json -{} -``` - -#### Receiving via /sync - -Ephemeral events appear in the `/sync` response under `rooms.join.{roomId}.ephemeral.events[]`: - -```json -{ - "type": "com.example.custom", - "sender": "@user:server", - "origin_server_ts": 1709123456000, - "room_id": "!room:server", - "content": { ... } -} -``` - -Servers MUST only deliver ephemeral events to users with `membership: join` in the room. - -#### TTL and Expiry - -Servers SHOULD expire ephemeral events after a configured TTL. The recommended TTL is 2 minutes. Servers SHOULD run periodic cleanup to remove expired events. The `/sync` endpoint MUST NOT deliver expired events. - -### E2EE - -When a room is encrypted, clients MUST encrypt ephemeral event content using the room's Megolm session before sending: - -1. The client checks whether the room is encrypted. -2. If encrypted: the client wraps the content with Megolm encryption and sets `eventType` to `m.room.encrypted`. -3. The encrypted event is sent via `PUT .../ephemeral/m.room.encrypted/{txnId}`. -4. The server stores the event content-agnostically. -5. `/sync` delivers the encrypted event. Receiving clients decrypt with shared Megolm room keys. - -This reuses existing room Megolm sessions — no separate key management is required. This follows the [MSC3673] pattern for encrypted ephemeral data units. - -## Potential Issues - -- **No delivery guarantee:** Ephemeral events are best-effort. Clients MUST NOT rely on ephemeral events as the sole delivery mechanism for critical data. Applications SHOULD provide a persisted fallback (e.g. timeline edits for streaming). -- **TTL semantics are server-defined:** The TTL is a server implementation detail, not a client-controlled parameter. Different servers MAY use different TTL values, which could affect applications that assume a specific event lifetime. -- **Dedup key constraints:** The composite dedup key `(room_id, sender, event_type, txn_id)` means that two different senders MAY use the same `txn_id` for the same `event_type` without conflict, but a single sender reusing a `txn_id` will have the second event silently dropped. - -## Alternatives - -### `to_device` events - -`to_device` events provide direct device-to-device messaging but bypass room semantics entirely. They require the sender to enumerate target devices, do not benefit from server-side room membership filtering, and cannot be delivered to all room members via a single API call. - -### Reusing `m.typing` - -The existing `m.typing` mechanism is limited to a single boolean per user per room. It cannot carry arbitrary payloads, custom types, or per-event content. Extending `m.typing` to support custom data would be a breaking change to a well-established API. - -### MSC2477 directly - -Adopting [MSC2477] with its `org.matrix.msc2477` prefix is the eventual goal. The `com.beeper.ephemeral` prefix is used in the interim because MSC2477 has not yet been merged, and we need to ship real-time features today. The protocol semantics are intentionally aligned to make migration straightforward. - -## Security Considerations - -- **Power level enforcement:** Servers MUST check the sender's power level before accepting ephemeral events. Without power level checks, any joined user could flood a room with ephemeral events. -- **Content size limits:** Servers MUST enforce the 64KB content size limit. Unbounded content could be used for denial-of-service attacks on the `/sync` pipeline. -- **E2EE requirement for sensitive data:** Applications sending sensitive data (e.g. tool call parameters, user input) via ephemeral events in encrypted rooms MUST encrypt the content per the E2EE section above. Sending plaintext ephemeral events in encrypted rooms leaks data to the server. -- **Rate limiting:** Servers SHOULD apply rate limits to the ephemeral event endpoint. High-frequency streaming use cases (e.g. AI token-by-token output) can generate significant load. - -## Unstable Prefix - -While this proposal is not yet part of the Matrix specification, implementations MUST use the following unstable prefix: - -| Unstable | Stable (future) | -|----------|----------------| -| `com.beeper.ephemeral` (endpoint path) | Aligned with [MSC2477] — `org.matrix.msc2477` or future `m.ephemeral` | - -## Dependencies - -- [MSC2477]: User-defined ephemeral events — the upstream proposal this implementation is based on. -- [MSC3673]: Encrypted ephemeral data units — the pattern for E2EE ephemeral events. - -[MSC2477]: https://github.com/matrix-org/matrix-spec-proposals/pull/2477 -[MSC3673]: https://github.com/matrix-org/matrix-spec-proposals/pull/3673 +If room-scoped custom ephemerals are added later, they should be documented separately from the current bridge surface. diff --git a/docs/msc/com.beeper.mscXXXX-streaming.md b/docs/msc/com.beeper.mscXXXX-streaming.md index bfe24ef1..322b4a68 100644 --- a/docs/msc/com.beeper.mscXXXX-streaming.md +++ b/docs/msc/com.beeper.mscXXXX-streaming.md @@ -1,255 +1,75 @@ # MSC: Message-Anchored AI Streaming -## Summary +Status: experimental. -This proposal defines an application-level streaming profile for real-time AI output in Matrix rooms. +## Current model -Instead of broadcasting every token into room-scoped ephemeral events, the sender publishes a normal placeholder `m.room.message` that carries a `com.beeper.stream` descriptor. Clients that care about live progress subscribe to that descriptor over `to_device`, and the sender delivers buffered and incremental updates directly to those devices. The final assistant message still lands in the room timeline as a normal edit of the placeholder. +The bridge starts a turn with a normal placeholder `m.room.message`. -The profile covers transport, subscription, completion, and optional custom encryption. The authoritative chunk catalog for `com.beeper.llm` remains in the [AI Matrix Spec](../matrix-ai-matrix-spec-v1.md#streaming). +That placeholder may include: -## Motivation +- `com.beeper.ai` for canonical assistant state +- `com.beeper.stream` for live-stream attachment -AI model responses are generated token-by-token and can take tens of seconds to complete. Users should see progress quickly, but room-wide streaming transport has a few practical problems: +While the turn is active, the bridge emits `com.beeper.llm` delta envelopes anchored to the placeholder event. -- **Unnecessary fanout:** Most joined devices are not actively viewing the room. -- **Server support burden:** Custom room-ephemeral support is not universally available. -- **Per-room delivery overhead:** High-frequency token traffic does not need to be delivered to every client. +When the turn finishes, the placeholder is replaced by a final edit and the live stream is considered complete. -Anchoring the stream in a timeline placeholder solves those problems: - -- **Timeline-first UX:** Clients can render a room preview such as "Generating response..." from the placeholder alone. -- **Opt-in live delivery:** Only actively viewing devices subscribe. -- **Strong completion signal:** The final `m.replace` edit removes the stream descriptor, so even non-subscribed clients can tell the stream ended. - -## Proposal - -### Placeholder Descriptor - -The sender starts by sending a placeholder `m.room.message` in the room timeline. The message includes a `com.beeper.stream` object: - -```json -{ - "type": "m.room.message", - "room_id": "!meow", - "event_id": "$foobar", - "sender": "@ai_chatgpt:beeper.local", - "content": { - "msgtype": "m.text", - "body": "Pondering...", - "com.beeper.stream": { - "user_id": "@aibot:beeper.local", - "device_id": "ABCD1234", - "type": "com.beeper.llm", - "expiry_ms": 1800000 - } - } -} -``` - -Fields: - -| Field | Type | Required | Description | -|-------|------|----------|-------------| -| `user_id` | string | yes | Matrix user that accepts subscriptions and publishes updates. This may differ from the placeholder message sender when bridge bot/device identities differ. | -| `device_id` | string | yes | Device that accepts subscriptions and sends updates. | -| `type` | string | yes | Stream payload family. This proposal currently defines `com.beeper.llm`. | -| `expiry_ms` | integer | no | Maximum age in milliseconds for treating the descriptor as live. Clients SHOULD ignore stale descriptors after this window. | -| `encryption` | object | no | Optional custom symmetric encryption parameters. See [Custom encryption](#custom-encryption). | - -If a message containing `com.beeper.stream` is the latest relevant event in a room, clients MAY show a room-list or timeline preview such as "Generating response...". - -### Subscription Request - -When a client opens the room and sees an unexpired stream descriptor, it subscribes with a `to_device` event: - -```json -{ - "type": "com.beeper.stream.subscribe", - "sender": "@you:beeper.com", - "to_user_id": "@aibot:beeper.local", - "to_device_id": "ABCD1234", - "content": { - "room_id": "!meow", - "event_id": "$foobar", - "device_id": "4321EFGH", - "expiry_ms": 300000 - } -} -``` - -Fields: - -| Field | Type | Required | Description | -|-------|------|----------|-------------| -| `room_id` | string | yes | Room containing the placeholder message. | -| `event_id` | string | yes | Placeholder event ID being subscribed to. | -| `device_id` | string | yes | Subscriber device that should receive updates. | -| `expiry_ms` | integer | no | Requested subscription lifetime in milliseconds. Clients SHOULD renew before expiry if still viewing the stream. | - -The sender SHOULD verify that the subscription targets a live placeholder message it controls and SHOULD clamp the granted expiry to a sender-defined maximum. - -### Stream Update Delivery - -After receiving a valid subscription, the sender sends a buffered snapshot of stream state so far to the subscribing device, then continues sending incremental updates while the subscription is active: - -```json -{ - "type": "com.beeper.stream.update", - "sender": "@aibot:beeper.local", - "to_user_id": "@you:beeper.com", - "to_device_id": "4321EFGH", - "content": { - "room_id": "!meow", - "event_id": "$foobar", - "com.beeper.llm.deltas": [ - { - "turn_id": "turn_123", - "seq": 7, - "part": { - "type": "text-delta", - "id": "text-turn_123", - "delta": "hello" - }, - "m.relates_to": { - "rel_type": "m.reference", - "event_id": "$foobar" - } - } - ] - } -} -``` - -For a descriptor with `type = X`, update content uses the field `X + ".deltas"`. This proposal defines `com.beeper.llm.deltas` for AI SDK-compatible streaming chunks. - -Each entry in `com.beeper.llm.deltas` uses the stable envelope defined by the AI profile: - -| Field | Type | Required | Description | -|-------|------|----------|-------------| -| `turn_id` | string | yes | Identifier for the assistant turn. | -| `seq` | integer | yes | Monotonically increasing per `turn_id`. | -| `part` | object | yes | AI SDK-compatible streaming chunk. | -| `m.relates_to` | object | yes | `m.reference` pointing at the placeholder event. | -| `agent_id` | string | no | Multi-agent routing hint. | - -For `com.beeper.llm`, producers SHOULD send buffered deltas in-order and receivers SHOULD ignore duplicates where `seq <= last_applied_seq`. - -### Completion - -When the stream is complete, the sender edits the original message: +## Placeholder shape ```json { - "type": "m.room.message", - "room_id": "!meow", - "sender": "@ai_chatgpt:beeper.local", - "content": { - "m.relates_to": { - "rel_type": "m.replace", - "event_id": "$foobar" + "msgtype": "m.text", + "body": "...", + "com.beeper.ai": { + "id": "turn_123", + "role": "assistant", + "metadata": { + "turn_id": "turn_123" }, - "m.new_content": { - "msgtype": "m.text", - "body": "Result of pondering is here" - } + "parts": [] + }, + "com.beeper.stream": { + "...": "publisher-defined descriptor" } } ``` -The terminal edit is authoritative. It SHOULD remove `com.beeper.stream` from the message content and include the finalized assistant state. Clients MUST treat the removal of `com.beeper.stream`, or the arrival of the final edit, as the end of the live stream. - -### Client Behavior - -1. Observe placeholder `m.room.message` events for `com.beeper.stream`. -2. If the descriptor is unexpired and the room is actively viewed, send `com.beeper.stream.subscribe` to the advertised `user_id` and `device_id`. -3. Apply the initial buffered `com.beeper.stream.update`, then subsequent incremental updates. -4. Re-subscribe before subscription expiry if the room remains active. -5. Stop rendering the stream when the placeholder is edited to remove `com.beeper.stream`, when the descriptor has expired, or when the client leaves the room. - -## Custom Encryption +The descriptor comes from the active `BeeperStreamPublisher`. Transport details are publisher-defined. -`to_device` updates can use normal Olm encryption. In encrypted rooms, that is the default and recommended transport. - -As an optional optimization, the placeholder descriptor MAY expose a symmetric key: - -```json -{ - "com.beeper.stream": { - "user_id": "@aibot:beeper.local", - "device_id": "ABCD1234", - "type": "com.beeper.llm", - "expiry_ms": 1800000, - "encryption": { - "algorithm": "com.beeper.stream.v1.aes-gcm", - "key": "57v+6jXy1NOiFzkrrg+nga0VN7+RURdrCEbm+8OrCDA" - } - } -} -``` +## Delta envelope -When using this mode, the sender encrypts the `com.beeper.stream.update` payload once and sends the same ciphertext to every subscriber: +Each streamed delta is wrapped as: ```json { - "type": "m.room.encrypted", - "content": { - "algorithm": "com.beeper.stream.v1.aes-gcm", - "room_id": "!meow", - "event_id": "$foobar", - "iv": "svNAxzmSqyRdMU3O", - "ciphertext": "vrKgF7jsQyd9CKnXLqVjAI9mSLH1okmtu0Puu4Tl4uh+HjrR4JhhD0DhT2ioxiUZMaqgYuERuXThAkpebpFFs0kwT0Bp8sC+NyCXHw8apLWxbUxMZ1FMUvyV5fIR6l6RXS50gA" + "turn_id": "turn_123", + "seq": 7, + "part": { + "type": "text-delta", + "delta": "hello" + }, + "m.relates_to": { + "rel_type": "m.reference", + "event_id": "$placeholder" } } ``` -Requirements: - -- `key` is 32 random bytes encoded as unpadded standard base64. -- `room_id` and `event_id` are included in the encrypted event envelope so receivers can route the payload to the correct stream key without trial-decrypting every active stream. -- `iv` is 12 random bytes encoded as unpadded standard base64. -- `ciphertext` is AES-GCM ciphertext followed by the 16-byte authentication tag, encoded as unpadded standard base64. - -This is an optimization, not the baseline transport. - -## Potential Issues - -- **Sender-side subscriber tracking:** The sender must keep short-lived subscriber state per placeholder event. -- **Metadata exposure:** The placeholder reveals that a stream exists and identifies the serving device. -- **Late subscribers:** Clients may receive only buffered state retained by the sender, not an authoritative replay log. -- **Descriptor staleness:** If the sender crashes and never edits the placeholder, clients rely on `expiry_ms` to stop subscribing. - -## Alternatives - -### Room ephemerals - -Room-scoped ephemeral events can broadcast updates to all joined clients, but they require homeserver support and deliver high-frequency traffic to devices that may not be viewing the room. - -### Timeline edits only - -Streaming entirely through `m.replace` edits would persist every intermediate state and create unnecessary room traffic. The placeholder-plus-subscription model keeps the timeline authoritative without persisting every token. - -## Security Considerations +Envelope rules: -- **Authorization:** Senders SHOULD only honor subscriptions from users who are entitled to view the placeholder message. -- **Validation:** `room_id` and `event_id` in subscriptions and updates MUST match the anchored placeholder. -- **Expiry enforcement:** Senders SHOULD cap subscription lifetimes and discard expired subscribers. -- **Custom AES mode:** Anyone who can read the placeholder descriptor can decrypt stream updates when the symmetric key mode is used. This is acceptable only because anyone who can read the placeholder is also allowed to subscribe. -- **Key/IV reuse:** AES-GCM senders MUST generate a fresh random IV for every encrypted update. Implementations that approach AES-GCM limits for a single key MUST rotate keys. +- `turn_id` is required +- `seq` is strictly positive and monotonic per turn +- `part` is required +- `m.relates_to.event_id` must point at the placeholder event +- `agent_id` may be included when the sender wants multi-agent routing hints -## Unstable Prefix +## Final message -While this proposal is not yet part of the Matrix specification, implementations MUST use the following unstable identifiers: +The final timeline edit is the canonical result. -| Unstable | Stable (future) | -|----------|----------------| -| `com.beeper.stream` | `m.stream` | -| `com.beeper.stream.subscribe` | `m.stream.subscribe` | -| `com.beeper.stream.update` | `m.stream.update` | -| `com.beeper.stream.v1.aes-gcm` | `m.stream.v1.aes-gcm` | +The final `com.beeper.ai` payload is compacted before it is attached to the edit, dropping live-only parts that are useful during streaming but not in the stored message. -## Dependencies +## Out of scope -- Matrix timeline messaging (`m.room.message`, `m.replace`) for the placeholder and final state. -- Matrix `to_device` delivery for subscriptions and live updates. -- Standard Olm `to_device` encryption, or the optional AES-GCM mode defined above. +This document does not define the wire protocol behind the stream publisher abstraction. For the broader Matrix event surface, see [`docs/matrix-ai-matrix-spec-v1.md`](../matrix-ai-matrix-spec-v1.md). From fb779b49ef0c239bfa71eb30b340176c87ed109a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?batuhan=20i=C3=A7=C3=B6z?= Date: Sun, 22 Mar 2026 21:03:42 +0100 Subject: [PATCH 3/8] sync --- .github/workflows/docker-agentremote.yml | 112 ++++++++++ .github/workflows/go.yml | 23 +++ .github/workflows/publish-release.yml | 163 +++++++++++++++ .goreleaser.yml | 49 ----- README.md | 49 ++++- bridges/openclaw/catalog.go | 23 ++- bridges/openclaw/gateway_client.go | 253 +++++++++++++++++++++-- bridges/openclaw/gateway_client_test.go | 112 ++++++++++ bridges/openclaw/manager.go | 211 +++++++++++++++---- bridges/openclaw/manager_test.go | 180 +++++++++++++++- bridges/openclaw/status.go | 14 +- bridges/openclaw/stream_test.go | 56 +++++ docker/agentremote/Dockerfile | 36 ++++ docker/agentremote/README.md | 21 ++ install.sh | 187 +++++++++++++++++ sdk/part_apply.go | 11 +- sdk/part_apply_test.go | 88 ++++++++ sdk/turn_test.go | 11 + tools/generate-homebrew-cask.sh | 69 +++++++ 19 files changed, 1546 insertions(+), 122 deletions(-) create mode 100644 .github/workflows/docker-agentremote.yml create mode 100644 .github/workflows/publish-release.yml delete mode 100644 .goreleaser.yml create mode 100644 docker/agentremote/Dockerfile create mode 100644 docker/agentremote/README.md create mode 100755 install.sh create mode 100644 sdk/part_apply_test.go create mode 100755 tools/generate-homebrew-cask.sh diff --git a/.github/workflows/docker-agentremote.yml b/.github/workflows/docker-agentremote.yml new file mode 100644 index 00000000..16ca47ac --- /dev/null +++ b/.github/workflows/docker-agentremote.yml @@ -0,0 +1,112 @@ +name: Publish AgentRemote Docker + +on: + push: + branches: + - main + tags: + - "v*" + pull_request: + branches: + - main + +env: + GHCR_REGISTRY: ghcr.io + GHCR_REGISTRY_IMAGE: ghcr.io/${{ github.repository }} + +jobs: + build-docker: + runs-on: ${{ matrix.runs_on }} + strategy: + fail-fast: false + matrix: + include: + - runs_on: ubuntu-latest + target: amd64 + - runs_on: ubuntu-arm64 + target: arm64 + name: build-agentremote-docker (${{ matrix.target }}) + + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Prepare build metadata + id: meta + run: | + set -euo pipefail + if [ "${GITHUB_REF_TYPE}" = "tag" ]; then + version="${GITHUB_REF_NAME}" + else + version="${GITHUB_SHA}" + fi + echo "version=${version}" >> "$GITHUB_OUTPUT" + echo "build_time=$(date -u +%Y-%m-%dT%H:%M:%SZ)" >> "$GITHUB_OUTPUT" + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to ghcr + if: ${{ github.event_name != 'pull_request' }} + uses: docker/login-action@v3 + with: + registry: ${{ env.GHCR_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Build agentremote image + uses: docker/build-push-action@v6 + with: + context: . + file: ./docker/agentremote/Dockerfile + pull: true + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ env.GHCR_REGISTRY_IMAGE }}:${{ github.sha }}-${{ matrix.target }} + build-args: | + VERSION=${{ steps.meta.outputs.version }} + COMMIT=${{ github.sha }} + BUILD_TIME=${{ steps.meta.outputs.build_time }} + provenance: false + sbom: false + + publish-manifests: + runs-on: ubuntu-latest + needs: + - build-docker + if: ${{ github.event_name != 'pull_request' }} + + steps: + - name: Log in to ghcr + uses: docker/login-action@v3 + with: + registry: ${{ env.GHCR_REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + + - name: Create sha manifest + run: | + set -euo pipefail + docker pull ${{ env.GHCR_REGISTRY_IMAGE }}:${{ github.sha }}-amd64 + docker pull ${{ env.GHCR_REGISTRY_IMAGE }}:${{ github.sha }}-arm64 + docker manifest create ${{ env.GHCR_REGISTRY_IMAGE }}:${{ github.sha }} \ + ${{ env.GHCR_REGISTRY_IMAGE }}:${{ github.sha }}-amd64 \ + ${{ env.GHCR_REGISTRY_IMAGE }}:${{ github.sha }}-arm64 + docker manifest push ${{ env.GHCR_REGISTRY_IMAGE }}:${{ github.sha }} + + - name: Create version manifest + if: ${{ github.ref_type == 'tag' }} + run: | + set -euo pipefail + docker manifest create ${{ env.GHCR_REGISTRY_IMAGE }}:${{ github.ref_name }} \ + ${{ env.GHCR_REGISTRY_IMAGE }}:${{ github.sha }}-amd64 \ + ${{ env.GHCR_REGISTRY_IMAGE }}:${{ github.sha }}-arm64 + docker manifest push ${{ env.GHCR_REGISTRY_IMAGE }}:${{ github.ref_name }} + + - name: Create latest manifest + if: ${{ github.ref == 'refs/heads/main' }} + run: | + set -euo pipefail + docker manifest create ${{ env.GHCR_REGISTRY_IMAGE }}:latest \ + ${{ env.GHCR_REGISTRY_IMAGE }}:${{ github.sha }}-amd64 \ + ${{ env.GHCR_REGISTRY_IMAGE }}:${{ github.sha }}-arm64 + docker manifest push ${{ env.GHCR_REGISTRY_IMAGE }}:latest diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index d1061c26..15eb67c6 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -30,3 +30,26 @@ jobs: - name: Run pre-commit uses: pre-commit/action@2c7b3805fd2a0fd8c1884dcaebf91fc102a13ecd # v3.0.1 + + package-smoke: + runs-on: ubuntu-latest + name: Package Smoke + + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Set up Go + uses: actions/setup-go@7a3fe6cf4cb3a834922a1244abfce67bcef6a0c5 # v6.2.0 + with: + go-version: "1.25" + cache: true + + - name: Build agentremote release binary + env: + CGO_ENABLED: "1" + run: go build -tags goolm -trimpath -o "$RUNNER_TEMP/agentremote" ./cmd/agentremote + + - name: Validate install scripts + run: | + sh -n ./install.sh + sh -n ./tools/generate-homebrew-cask.sh diff --git a/.github/workflows/publish-release.yml b/.github/workflows/publish-release.yml new file mode 100644 index 00000000..6a3061d4 --- /dev/null +++ b/.github/workflows/publish-release.yml @@ -0,0 +1,163 @@ +name: Publish Release + +permissions: + contents: write + +concurrency: + group: publish-release-${{ github.ref }} + +on: + push: + tags: + - "v*" + +env: + GOTOOLCHAIN: local + +jobs: + build-binaries: + name: build (${{ matrix.goos }}/${{ matrix.goarch }}) + runs-on: ${{ matrix.runs_on }} + strategy: + fail-fast: false + matrix: + include: + - runs_on: ubuntu-latest + goos: linux + goarch: amd64 + - runs_on: ubuntu-arm64 + goos: linux + goarch: arm64 + - runs_on: macos-13 + goos: darwin + goarch: amd64 + - runs_on: macos-14 + goos: darwin + goarch: arm64 + + steps: + - name: Checkout + uses: actions/checkout@v6 + + - name: Set up Go + uses: actions/setup-go@v6 + with: + go-version-file: go.mod + cache: true + + - name: Build release archive + env: + CGO_ENABLED: "1" + GOOS: ${{ matrix.goos }} + GOARCH: ${{ matrix.goarch }} + VERSION: ${{ github.ref_name }} + COMMIT: ${{ github.sha }} + run: | + set -euo pipefail + version_no_v="${VERSION#v}" + archive_name="agentremote_v${version_no_v}_${GOOS}_${GOARCH}.tar.gz" + build_time="$(date -u +%Y-%m-%dT%H:%M:%SZ)" + dist_dir="$RUNNER_TEMP/dist" + stage_dir="$RUNNER_TEMP/package" + + mkdir -p "$dist_dir" "$stage_dir" + + go build \ + -tags goolm \ + -trimpath \ + -ldflags "-s -w -X main.Tag=${VERSION} -X main.Commit=${COMMIT} -X main.BuildTime=${build_time}" \ + -o "$stage_dir/agentremote" \ + ./cmd/agentremote + + cp LICENSE "$stage_dir/LICENSE" + cp README.md "$stage_dir/README.md" + + tar -C "$stage_dir" -czf "$dist_dir/$archive_name" agentremote LICENSE README.md + shasum -a 256 "$dist_dir/$archive_name" | awk -v name="$archive_name" '{ print $1 " " name }' > "$dist_dir/$archive_name.sha256" + + - name: Upload release artifact + uses: actions/upload-artifact@v4 + with: + name: release-${{ matrix.goos }}-${{ matrix.goarch }} + path: | + ${{ runner.temp }}/dist/*.tar.gz + ${{ runner.temp }}/dist/*.sha256 + if-no-files-found: error + + publish-release: + runs-on: ubuntu-latest + needs: + - build-binaries + + steps: + - name: Download release artifacts + uses: actions/download-artifact@v4 + with: + path: dist + pattern: release-* + merge-multiple: true + + - name: Assemble checksums + run: | + set -euo pipefail + cat dist/*.sha256 | sort -k2 > dist/checksums.txt + rm -f dist/*.sha256 + ls -1 dist + + - name: Publish GitHub release + uses: softprops/action-gh-release@v2 + with: + files: | + dist/*.tar.gz + dist/checksums.txt + generate_release_notes: true + + update-homebrew-tap: + runs-on: ubuntu-latest + needs: + - publish-release + if: ${{ secrets.HOMEBREW_TAP_GITHUB_TOKEN != '' }} + + steps: + - name: Checkout source repo + uses: actions/checkout@v6 + + - name: Download release artifacts + uses: actions/download-artifact@v4 + with: + path: dist + pattern: release-* + merge-multiple: true + + - name: Assemble checksums + run: | + set -euo pipefail + cat dist/*.sha256 | sort -k2 > dist/checksums.txt + + - name: Checkout homebrew tap + uses: actions/checkout@v6 + with: + repository: beeper/homebrew-tap + token: ${{ secrets.HOMEBREW_TAP_GITHUB_TOKEN }} + path: homebrew-tap + + - name: Update agentremote cask + env: + VERSION: ${{ github.ref_name }} + run: | + set -euo pipefail + mkdir -p homebrew-tap/Casks + ./tools/generate-homebrew-cask.sh "$VERSION" dist/checksums.txt > homebrew-tap/Casks/agentremote.rb + + - name: Commit cask update + run: | + set -euo pipefail + cd homebrew-tap + if git diff --quiet -- Casks/agentremote.rb; then + exit 0 + fi + git config user.name "github-actions[bot]" + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git add Casks/agentremote.rb + git commit -m "Update agentremote cask for ${{ github.ref_name }}" + git push diff --git a/.goreleaser.yml b/.goreleaser.yml deleted file mode 100644 index 22efad86..00000000 --- a/.goreleaser.yml +++ /dev/null @@ -1,49 +0,0 @@ -version: 2 - -builds: - - id: agentremote - main: ./cmd/agentremote - binary: agentremote - env: - - CGO_ENABLED=1 - goos: - - darwin - - linux - goarch: - - amd64 - - arm64 - ldflags: - - -s -w - - -X main.Tag={{.Tag}} - - -X main.Commit={{.Commit}} - - -X main.BuildTime={{.Date}} - -archives: - - id: agentremote - builds: - - agentremote - format: tar.gz - name_template: "agentremote_{{ .Os }}_{{ .Arch }}" - -brews: - - name: agentremote - ids: - - agentremote - repository: - owner: beeper - name: homebrew-tap - homepage: https://github.com/beeper/agentremote - description: Unified AI bridge manager for Beeper - license: Apache-2.0 - install: | - bin.install "agentremote" - -checksum: - name_template: "checksums.txt" - -changelog: - sort: asc - filters: - exclude: - - "^docs:" - - "^test:" diff --git a/README.md b/README.md index f06b437b..5b0093a9 100644 --- a/README.md +++ b/README.md @@ -6,6 +6,27 @@ It gives Matrix/Beeper chats a bridge layer for full history, live streaming, ap This repository is still experimental. +## Install + +Install the latest release with the one-liner: + +```bash +curl -fsSL https://raw.githubusercontent.com/beeper/agentremote/main/install.sh | sh +``` + +Other supported install paths: + +- Download a release archive from [GitHub Releases](https://github.com/beeper/agentremote/releases) +- Install via Homebrew: `brew install --cask beeper/tap/agentremote` + +To pin a version or choose the install directory: + +```bash +curl -fsSL https://raw.githubusercontent.com/beeper/agentremote/main/install.sh | VERSION=v0.1.0 BINDIR="$HOME/.local/bin" sh +``` + +The installed CLI stores profile state under `~/.config/agentremote/`. + ## Included bridges | Bridge | What it connects | @@ -18,20 +39,34 @@ This repository is still experimental. ## Quick start ```bash -./tools/bridges login --env prod -./tools/bridges list -./tools/bridges run codex +agentremote login --env prod +agentremote list +agentremote run codex ``` Useful commands: -- `./tools/bridges up ` starts a bridge in the background -- `./tools/bridges status` shows local and remote bridge state -- `./tools/bridges logs --follow` tails logs -- `./tools/bridges stop ` stops a running instance +- `agentremote up ` starts a bridge in the background +- `agentremote status` shows local and remote bridge state +- `agentremote logs --follow` tails logs +- `agentremote stop ` stops a running instance + +For local development from a checkout, `./tools/bridges ...` remains a thin wrapper around `go run ./cmd/agentremote`. Instance state lives under `~/.config/agentremote/profiles//instances/`. +## Docker + +The CLI is also published as a multi-arch Linux container image: + +```bash +docker run --rm -it \ + -v "$(pwd):/data" \ + ghcr.io/beeper/agentremote:latest help +``` + +The container sets `HOME=/data`, so mounted state is persisted under `/data/.config/agentremote/`. See [`docker/agentremote/README.md`](./docker/agentremote/README.md) for usage details. + ## SDK Custom bridges in this repo are built on [`sdk/`](./sdk), using: diff --git a/bridges/openclaw/catalog.go b/bridges/openclaw/catalog.go index 5bed7b81..49849f22 100644 --- a/bridges/openclaw/catalog.go +++ b/bridges/openclaw/catalog.go @@ -6,6 +6,7 @@ import ( "time" "github.com/beeper/agentremote/pkg/shared/cachedvalue" + "github.com/beeper/agentremote/pkg/shared/openclawconv" "github.com/beeper/agentremote/pkg/shared/stringutil" ) @@ -118,10 +119,16 @@ func (oc *OpenClawClient) previewSessionSnippet(ctx context.Context, sessionKey return "" } resp, err := gateway.PreviewSessions(ctx, []string{sessionKey}, 6, 240) - if err != nil || resp == nil { + if err == nil && resp != nil { + if snippet := previewSnippetForSession(*resp, sessionKey); snippet != "" { + return snippet + } + } + history, err := gateway.SessionHistory(ctx, sessionKey, 6, "") + if err != nil || history == nil { return "" } - return previewSnippetForSession(*resp, sessionKey) + return previewSnippetFromHistory(history.Messages) } func previewSnippetForSession(resp gatewaySessionsPreviewResponse, sessionKey string) string { @@ -142,6 +149,18 @@ func previewSnippetForSession(resp gatewaySessionsPreviewResponse, sessionKey st return "" } +func previewSnippetFromHistory(messages []map[string]any) string { + var parts []string + for _, message := range messages { + text := strings.TrimSpace(openclawconv.ExtractMessageText(message)) + if text == "" { + continue + } + parts = append(parts, text) + } + return strings.TrimSpace(strings.Join(parts, " ")) +} + func summarizeToolsCatalog(resp gatewayToolsCatalogResponse) (int, string) { count := 0 for _, group := range resp.Groups { diff --git a/bridges/openclaw/gateway_client.go b/bridges/openclaw/gateway_client.go index 49bb475d..548286b3 100644 --- a/bridges/openclaw/gateway_client.go +++ b/bridges/openclaw/gateway_client.go @@ -60,16 +60,18 @@ type gatewayHello struct { } type openClawGatewayCompatibilityReport struct { - ServerVersion string - MissingMethods []string - MissingEvents []string - HistoryEndpointOK bool - HistoryEndpointCode int - HistoryEndpointError string + ServerVersion string + MissingMethods []string + MissingEvents []string + RequiredMissingMethods []string + RequiredMissingEvents []string + HistoryEndpointOK bool + HistoryEndpointCode int + HistoryEndpointError string } func (r openClawGatewayCompatibilityReport) Compatible() bool { - return len(r.MissingMethods) == 0 && len(r.MissingEvents) == 0 && r.HistoryEndpointOK + return len(r.RequiredMissingMethods) == 0 && len(r.RequiredMissingEvents) == 0 } type gatewaySessionRow struct { @@ -416,6 +418,7 @@ type gatewayWSClient struct { writeMu sync.Mutex pendingMu sync.Mutex pending map[string]chan gatewayResponseFrame + requestFn func(ctx context.Context, method string, params map[string]any, out any) error conn *websocket.Conn events chan gatewayEvent @@ -427,8 +430,15 @@ type gatewayWSClient struct { lastErr error helloMu sync.RWMutex hello *gatewayHello + historyMode atomic.Int32 } +const ( + openClawHistoryModeUnknown int32 = iota + openClawHistoryModeHTTP + openClawHistoryModeRPC +) + func newGatewayWSClient(cfg gatewayConnectConfig) *gatewayWSClient { return &gatewayWSClient{ cfg: cfg, @@ -533,6 +543,40 @@ func (c *gatewayWSClient) Hello() *gatewayHello { return &clone } +func (c *gatewayWSClient) SupportsMethod(method string) bool { + method = strings.ToLower(strings.TrimSpace(method)) + if method == "" { + return false + } + hello := c.Hello() + if hello == nil { + return false + } + for _, candidate := range hello.Features.Methods { + if strings.EqualFold(strings.TrimSpace(candidate), method) { + return true + } + } + return false +} + +func (c *gatewayWSClient) SupportsEvent(evt string) bool { + evt = strings.ToLower(strings.TrimSpace(evt)) + if evt == "" { + return false + } + hello := c.Hello() + if hello == nil { + return false + } + for _, candidate := range hello.Features.Events { + if strings.EqualFold(strings.TrimSpace(candidate), evt) { + return true + } + } + return false +} + func (c *gatewayWSClient) setLastError(err error) { c.lastErrMu.Lock() defer c.lastErrMu.Unlock() @@ -573,15 +617,37 @@ func (c *gatewayWSClient) ListSessions(ctx context.Context, limit int) ([]gatewa } func (c *gatewayWSClient) SessionHistory(ctx context.Context, sessionKey string, limit int, cursor string) (*gatewaySessionHistoryResponse, error) { - base, err := c.sessionHistoryURL(sessionKey, limit, cursor) - if err != nil { - return nil, err + var httpErr error + if c.historyMode.Load() != openClawHistoryModeRPC { + base, err := c.sessionHistoryURL(sessionKey, limit, cursor) + if err != nil { + httpErr = err + } else { + req, reqErr := http.NewRequestWithContext(ctx, http.MethodGet, base.String(), nil) + if reqErr != nil { + httpErr = fmt.Errorf("build session history request: %w", reqErr) + } else { + history, historyErr := c.doSessionHistoryRequest(req) + if historyErr == nil { + c.historyMode.Store(openClawHistoryModeHTTP) + return history, nil + } + httpErr = historyErr + } + } } - req, err := http.NewRequestWithContext(ctx, http.MethodGet, base.String(), nil) - if err != nil { - return nil, fmt.Errorf("build session history request: %w", err) + if !c.SupportsMethod("chat.history") { + return nil, httpErr + } + history, rpcErr := c.sessionHistoryViaRPC(ctx, sessionKey, limit, cursor) + if rpcErr == nil { + c.historyMode.Store(openClawHistoryModeRPC) + return history, nil + } + if httpErr != nil { + return nil, fmt.Errorf("http history failed: %v; chat.history fallback failed: %w", httpErr, rpcErr) } - return c.doSessionHistoryRequest(req) + return nil, rpcErr } func (c *gatewayWSClient) ProbeSessionHistory(ctx context.Context) openClawGatewayCompatibilityReport { @@ -604,6 +670,7 @@ func (c *gatewayWSClient) ProbeSessionHistory(ctx context.Context) openClawGatew } if reqErr == nil { report.HistoryEndpointOK = true + c.historyMode.Store(openClawHistoryModeHTTP) return report } report.HistoryEndpointError = reqErr.Error() @@ -612,13 +679,21 @@ func (c *gatewayWSClient) ProbeSessionHistory(ctx context.Context) openClawGatew // treat the endpoint as compatible when the semantic error type matches. if history != nil && strings.EqualFold(strings.TrimSpace(history.Error.Type), "not_found") { report.HistoryEndpointOK = true + c.historyMode.Store(openClawHistoryModeHTTP) return report } } + if c.SupportsMethod("chat.history") { + report.HistoryEndpointOK = true + c.historyMode.Store(openClawHistoryModeRPC) + } return report } func (c *gatewayWSClient) ListPendingApprovals(ctx context.Context) ([]gatewayApprovalRequestEvent, error) { + if !c.SupportsMethod("exec.approval.list") { + return nil, nil + } var resp gatewayApprovalListResponse if err := c.Request(ctx, "exec.approval.list", map[string]any{}, &resp); err != nil { return nil, err @@ -689,6 +764,9 @@ func (c *gatewayWSClient) doSessionHistoryRequestWithStatus(req *http.Request) ( } func (c *gatewayWSClient) PreviewSessions(ctx context.Context, keys []string, limit, maxChars int) (*gatewaySessionsPreviewResponse, error) { + if !c.SupportsMethod("sessions.preview") { + return nil, nil + } filtered := make([]string, 0, len(keys)) for _, key := range keys { if trimmed := strings.TrimSpace(key); trimmed != "" { @@ -716,6 +794,9 @@ func (c *gatewayWSClient) PreviewSessions(ctx context.Context, keys []string, li } func (c *gatewayWSClient) ResolveSessionKey(ctx context.Context, key string) (string, error) { + if !c.SupportsMethod("sessions.resolve") { + return strings.TrimSpace(key), nil + } var resp gatewayResolveSessionResponse if err := c.Request(ctx, "sessions.resolve", map[string]any{ "key": strings.TrimSpace(key), @@ -746,6 +827,9 @@ func (c *gatewayWSClient) DeleteSession(ctx context.Context, key string, deleteT } func (c *gatewayWSClient) ListAgents(ctx context.Context) (*gatewayAgentsListResponse, error) { + if !c.SupportsMethod("agents.list") { + return &gatewayAgentsListResponse{}, nil + } var resp gatewayAgentsListResponse if err := c.Request(ctx, "agents.list", map[string]any{}, &resp); err != nil { return nil, err @@ -762,6 +846,9 @@ func (c *gatewayWSClient) ListAgents(ctx context.Context) (*gatewayAgentsListRes } func (c *gatewayWSClient) ListModels(ctx context.Context) (*gatewayModelsListResponse, error) { + if !c.SupportsMethod("models.list") { + return &gatewayModelsListResponse{}, nil + } var resp gatewayModelsListResponse if err := c.Request(ctx, "models.list", map[string]any{}, &resp); err != nil { return nil, err @@ -783,6 +870,9 @@ func (c *gatewayWSClient) ListModels(ctx context.Context) (*gatewayModelsListRes } func (c *gatewayWSClient) GetToolsCatalog(ctx context.Context, agentID string) (*gatewayToolsCatalogResponse, error) { + if !c.SupportsMethod("tools.catalog") { + return &gatewayToolsCatalogResponse{}, nil + } params := map[string]any{} if trimmed := strings.TrimSpace(agentID); trimmed != "" { params["agentId"] = trimmed @@ -834,6 +924,9 @@ func (c *gatewayWSClient) ResolveApproval(ctx context.Context, approvalID, decis } func (c *gatewayWSClient) GetAgentIdentity(ctx context.Context, agentID, sessionKey string) (*gatewayAgentIdentity, error) { + if !c.SupportsMethod("agent.identity.get") { + return nil, nil + } params := map[string]any{} if strings.TrimSpace(agentID) != "" { params["agentId"] = strings.TrimSpace(agentID) @@ -852,6 +945,9 @@ func (c *gatewayWSClient) GetAgentIdentity(ctx context.Context, agentID, session } func (c *gatewayWSClient) WaitForRun(ctx context.Context, runID string, timeout time.Duration) (*gatewayWaitRunResponse, error) { + if !c.SupportsMethod("agent.wait") { + return nil, nil + } runID = strings.TrimSpace(runID) if runID == "" { return nil, errors.New("run id is required") @@ -884,6 +980,9 @@ func normalizeGatewayAgentIdentity(identity *gatewayAgentIdentity) *gatewayAgent } func (c *gatewayWSClient) Request(ctx context.Context, method string, params map[string]any, out any) error { + if c.requestFn != nil { + return c.requestFn(ctx, method, params, out) + } if ctx == nil { ctx = context.Background() } @@ -928,6 +1027,132 @@ func (c *gatewayWSClient) Request(ctx context.Context, method string, params map } } +func (c *gatewayWSClient) sessionHistoryViaRPC(ctx context.Context, sessionKey string, limit int, cursor string) (*gatewaySessionHistoryResponse, error) { + sessionKey = strings.TrimSpace(sessionKey) + if sessionKey == "" { + return nil, errors.New("session key is required") + } + var resp gatewaySessionHistoryResponse + if err := c.Request(ctx, "chat.history", map[string]any{ + "sessionKey": sessionKey, + "limit": openClawMaxHistoryPageLimit, + }, &resp); err != nil { + return nil, err + } + if len(resp.Messages) == 0 && len(resp.Items) > 0 { + resp.Messages = resp.Items + } + resp.SessionKey = sessionKey + return paginateGatewayHistoryResponse(&resp, limit, cursor), nil +} + +func paginateGatewayHistoryResponse(history *gatewaySessionHistoryResponse, limit int, cursor string) *gatewaySessionHistoryResponse { + if history == nil { + return nil + } + limit = normalizeGatewayHistoryLimit(limit) + cursorSeq := parseGatewayHistoryCursor(cursor) + messages := history.Messages + endExclusive := len(messages) + if cursorSeq > 0 { + endExclusive = 0 + for idx, message := range messages { + if gatewayHistoryMessageSeq(message, idx) >= cursorSeq { + endExclusive = idx + break + } + endExclusive = idx + 1 + } + } + start := 0 + if limit > 0 && endExclusive > limit { + start = endExclusive - limit + } + paged := &gatewaySessionHistoryResponse{ + SessionKey: strings.TrimSpace(history.SessionKey), + Messages: cloneGatewayHistorySlice(messages[start:endExclusive]), + HasMore: start > 0, + } + paged.Items = cloneGatewayHistorySlice(paged.Messages) + if paged.HasMore && start < len(messages) { + paged.NextCursor = fmt.Sprintf("%d", gatewayHistoryMessageSeq(messages[start], start)) + } + return paged +} + +func normalizeGatewayHistoryLimit(limit int) int { + if limit <= 0 || limit > openClawMaxHistoryPageLimit { + return openClawMaxHistoryPageLimit + } + return limit +} + +func parseGatewayHistoryCursor(cursor string) int64 { + cursor = strings.TrimSpace(cursor) + cursor = strings.TrimPrefix(cursor, "seq:") + if cursor == "" { + return 0 + } + var value int64 + _, _ = fmt.Sscanf(cursor, "%d", &value) + if value < 0 { + return 0 + } + return value +} + +func gatewayHistoryMessageSeq(message map[string]any, idx int) int64 { + meta, _ := message["__openclaw"].(map[string]any) + switch seq := meta["seq"].(type) { + case float64: + if seq > 0 { + return int64(seq) + } + case int64: + if seq > 0 { + return seq + } + case int: + if seq > 0 { + return int64(seq) + } + } + return int64(idx + 1) +} + +func cloneGatewayHistorySlice(messages []map[string]any) []map[string]any { + if len(messages) == 0 { + return nil + } + cloned := make([]map[string]any, len(messages)) + for i, message := range messages { + cloned[i] = cloneGatewayHistoryMap(message) + } + return cloned +} + +func cloneGatewayHistoryMap(message map[string]any) map[string]any { + if message == nil { + return nil + } + data, err := json.Marshal(message) + if err != nil { + cloned := make(map[string]any, len(message)) + for key, value := range message { + cloned[key] = value + } + return cloned + } + var cloned map[string]any + if err = json.Unmarshal(data, &cloned); err != nil { + cloned = make(map[string]any, len(message)) + for key, value := range message { + cloned[key] = value + } + } + return cloned +} + func (c *gatewayWSClient) writeJSON(ctx context.Context, value any) error { c.writeMu.Lock() defer c.writeMu.Unlock() diff --git a/bridges/openclaw/gateway_client_test.go b/bridges/openclaw/gateway_client_test.go index 35b3c3ab..ef3b6bb8 100644 --- a/bridges/openclaw/gateway_client_test.go +++ b/bridges/openclaw/gateway_client_test.go @@ -6,6 +6,7 @@ import ( "crypto/rand" "encoding/base64" "encoding/json" + "errors" "net/http" "net/http/httptest" "runtime" @@ -188,6 +189,53 @@ func TestSessionHistoryFallsBackToItemsArray(t *testing.T) { } } +func TestSessionHistoryFallsBackToChatHistoryRPC(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + _, _ = w.Write([]byte("control-ui")) + })) + defer server.Close() + + client := newGatewayWSClient(gatewayConnectConfig{URL: server.URL}) + client.hello = &gatewayHello{ + Features: gatewayHelloFeatures{Methods: []string{"chat.history"}}, + } + client.requestFn = func(ctx context.Context, method string, params map[string]any, out any) error { + if method != "chat.history" { + t.Fatalf("unexpected method %q", method) + } + resp, ok := out.(*gatewaySessionHistoryResponse) + if !ok { + t.Fatalf("unexpected response type %T", out) + } + *resp = gatewaySessionHistoryResponse{ + Messages: []map[string]any{ + {"role": "assistant", "text": "one", "__openclaw": map[string]any{"seq": 1}}, + {"role": "assistant", "text": "two", "__openclaw": map[string]any{"seq": 2}}, + {"role": "assistant", "text": "three", "__openclaw": map[string]any{"seq": 3}}, + }, + } + return nil + } + + history, err := client.SessionHistory(context.Background(), "agent:main:test", 2, "4") + if err != nil { + t.Fatalf("SessionHistory returned error: %v", err) + } + if history == nil || len(history.Messages) != 2 { + t.Fatalf("expected paginated rpc fallback history, got %#v", history) + } + if got := history.Messages[0]["text"]; got != "two" { + t.Fatalf("unexpected first fallback message: %v", got) + } + if got := history.Messages[1]["text"]; got != "three" { + t.Fatalf("unexpected second fallback message: %v", got) + } + if !history.HasMore || history.NextCursor != "2" { + t.Fatalf("expected local pagination markers, got %#v", history) + } +} + func TestProbeSessionHistoryAcceptsSemanticNotFound(t *testing.T) { server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") @@ -223,3 +271,67 @@ func TestProbeSessionHistoryRejectsGeneric404(t *testing.T) { t.Fatalf("unexpected history probe status: %d", report.HistoryEndpointCode) } } + +func TestProbeSessionHistoryAcceptsRPCFallback(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + _, _ = w.Write([]byte("control-ui")) + })) + defer server.Close() + + client := newGatewayWSClient(gatewayConnectConfig{URL: server.URL}) + client.hello = &gatewayHello{ + Features: gatewayHelloFeatures{Methods: []string{"chat.history"}}, + } + report := client.ProbeSessionHistory(context.Background()) + if !report.HistoryEndpointOK { + t.Fatalf("expected rpc fallback probe to be accepted, got %#v", report) + } + if !strings.Contains(report.HistoryEndpointError, "invalid character '<'") { + t.Fatalf("expected original http failure to be preserved, got %#v", report) + } +} + +func TestRequestUsesOverrideWhenProvided(t *testing.T) { + client := newGatewayWSClient(gatewayConnectConfig{}) + client.requestFn = func(ctx context.Context, method string, params map[string]any, out any) error { + if method != "models.list" { + t.Fatalf("unexpected method %q", method) + } + resp, ok := out.(*gatewayModelsListResponse) + if !ok { + t.Fatalf("unexpected out type %T", out) + } + resp.Models = []gatewayModelChoice{{ID: "model-1"}} + return nil + } + + var resp gatewayModelsListResponse + if err := client.Request(context.Background(), "models.list", nil, &resp); err != nil { + t.Fatalf("Request returned error: %v", err) + } + if len(resp.Models) != 1 || resp.Models[0].ID != "model-1" { + t.Fatalf("unexpected request override response: %#v", resp) + } +} + +func TestSessionHistoryReturnsCombinedFallbackErrors(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + _, _ = w.Write([]byte("control-ui")) + })) + defer server.Close() + + client := newGatewayWSClient(gatewayConnectConfig{URL: server.URL}) + client.hello = &gatewayHello{ + Features: gatewayHelloFeatures{Methods: []string{"chat.history"}}, + } + client.requestFn = func(ctx context.Context, method string, params map[string]any, out any) error { + return errors.New("rpc unavailable") + } + + _, err := client.SessionHistory(context.Background(), "agent:main:test", 10, "") + if err == nil || !strings.Contains(err.Error(), "chat.history fallback failed") { + t.Fatalf("expected combined fallback error, got %v", err) + } +} diff --git a/bridges/openclaw/manager.go b/bridges/openclaw/manager.go index 05dd75c6..55e2227f 100644 --- a/bridges/openclaw/manager.go +++ b/bridges/openclaw/manager.go @@ -127,6 +127,10 @@ func newOpenClawManager(client *OpenClawClient) *openClawManager { var ( openClawRequiredGatewayMethods = []string{ + "sessions.list", + "chat.send", + } + openClawPreferredGatewayMethods = []string{ "sessions.list", "sessions.patch", "sessions.resolve", @@ -141,6 +145,9 @@ var ( } openClawRequiredGatewayEvents = []string{ "chat", + } + openClawPreferredGatewayEvents = []string{ + "chat", "agent", "exec.approval.requested", "exec.approval.resolved", @@ -202,6 +209,16 @@ func (m *openClawManager) Start(ctx context.Context) (bool, error) { if compatErr != nil { return false, compatErr } + if report != nil && (!report.HistoryEndpointOK || len(report.MissingMethods) > 0 || len(report.MissingEvents) > 0) { + m.client.Log().Warn(). + Str("server_version", report.ServerVersion). + Strs("missing_methods", report.MissingMethods). + Strs("missing_events", report.MissingEvents). + Bool("history_endpoint_ok", report.HistoryEndpointOK). + Int("history_endpoint_code", report.HistoryEndpointCode). + Str("history_endpoint_error", report.HistoryEndpointError). + Msg("OpenClaw gateway connected with compatibility fallbacks") + } if err = m.syncSessions(ctx); err != nil { return false, err } @@ -287,8 +304,10 @@ func (m *openClawManager) validateGatewayCompatibility(ctx context.Context, gate if version := strings.TrimSpace(stringValue(hello.Server["version"])); version != "" { report.ServerVersion = version } - report.MissingMethods = findMissingGatewayFeatures(hello.Features.Methods, openClawRequiredGatewayMethods) - report.MissingEvents = findMissingGatewayFeatures(hello.Features.Events, openClawRequiredGatewayEvents) + report.RequiredMissingMethods = findMissingGatewayFeatures(hello.Features.Methods, openClawRequiredGatewayMethods) + report.RequiredMissingEvents = findMissingGatewayFeatures(hello.Features.Events, openClawRequiredGatewayEvents) + report.MissingMethods = findMissingGatewayFeatures(hello.Features.Methods, openClawPreferredGatewayMethods) + report.MissingEvents = findMissingGatewayFeatures(hello.Features.Events, openClawPreferredGatewayEvents) historyProbe := gateway.ProbeSessionHistory(ctx) report.HistoryEndpointOK = historyProbe.HistoryEndpointOK report.HistoryEndpointCode = historyProbe.HistoryEndpointCode @@ -2167,6 +2186,17 @@ func (m *openClawManager) handleAgentEvent(ctx context.Context, payload gatewayA m.startRunRecovery(ctx, portal, meta, turnID, payload.RunID, agentID) stream := strings.ToLower(strings.TrimSpace(payload.Stream)) switch stream { + case "assistant": + if !shouldEmitOpenClawRawAgentData(stream, payload.Data) { + return + } + m.client.EmitStreamPart(ctx, portal, turnID, agentID, payload.SessionKey, map[string]any{ + "timestamp": eventTS.UnixMilli(), + "type": "data-openclaw-" + stream, + "id": fmt.Sprintf("openclaw-%s-%d", stream, payload.Seq), + "data": map[string]any{"stream": payload.Stream, "data": payload.Data}, + }) + return case "reasoning": if text := stringutil.TrimDefault(stringValue(payload.Data["text"]), stringValue(payload.Data["delta"])); text != "" { m.client.EmitStreamPart(ctx, portal, turnID, agentID, payload.SessionKey, map[string]any{ @@ -2180,17 +2210,13 @@ func (m *openClawManager) handleAgentEvent(ctx context.Context, payload gatewayA toolCallID := stringutil.TrimDefault(stringValue(payload.Data["toolCallId"]), stringutil.TrimDefault(stringValue(payload.Data["toolUseId"]), stringValue(payload.Data["id"]))) toolName := stringutil.TrimDefault(stringValue(payload.Data["toolName"]), stringutil.TrimDefault(stringValue(payload.Data["name"]), "tool")) if toolCallID != "" { - if input, ok := payload.Data["input"]; ok { - m.client.EmitStreamPart(ctx, portal, turnID, agentID, payload.SessionKey, map[string]any{ - "timestamp": eventTS.UnixMilli(), - "type": "tool-input-available", - "toolCallId": toolCallID, - "toolName": toolName, - "input": input, - "providerExecuted": true, - }) + update := openClawBuildToolStreamUpdate(eventTS, payload.Data) + emitted := false + for _, part := range update.Parts { + m.client.EmitStreamPart(ctx, portal, turnID, agentID, payload.SessionKey, part) + emitted = true } - if approvalID := strings.TrimSpace(stringValue(payload.Data["approvalId"])); approvalID != "" { + if approvalID := strings.TrimSpace(stringutil.TrimDefault(stringValue(payload.Data["approvalId"]), stringValue(jsonutil.ToMap(payload.Data["approval"])["id"]))); approvalID != "" { m.attachApprovalContext(approvalID, payload.SessionKey, agentID, turnID, toolCallID, toolName) m.client.EmitStreamPart(ctx, portal, turnID, agentID, payload.SessionKey, map[string]any{ "timestamp": eventTS.UnixMilli(), @@ -2198,36 +2224,14 @@ func (m *openClawManager) handleAgentEvent(ctx context.Context, payload gatewayA "approvalId": approvalID, "toolCallId": toolCallID, }) + emitted = true } - if output, ok := payload.Data["output"]; ok { - m.client.EmitStreamPart(ctx, portal, turnID, agentID, payload.SessionKey, map[string]any{ - "timestamp": eventTS.UnixMilli(), - "type": "tool-output-available", - "toolCallId": toolCallID, - "output": output, - "providerExecuted": true, - }) - m.ensureSpawnedSessionPortal(ctx, openClawSpawnedSessionKeyFromToolResult(toolName, output)) - } else if result, ok := payload.Data["result"]; ok { - m.client.EmitStreamPart(ctx, portal, turnID, agentID, payload.SessionKey, map[string]any{ - "timestamp": eventTS.UnixMilli(), - "type": "tool-output-available", - "toolCallId": toolCallID, - "output": result, - "providerExecuted": true, - }) - m.ensureSpawnedSessionPortal(ctx, openClawSpawnedSessionKeyFromToolResult(toolName, result)) + if update.HasFinalOutput { + m.ensureSpawnedSessionPortal(ctx, openClawSpawnedSessionKeyFromToolResult(toolName, update.FinalOutput)) } - if errText := strings.TrimSpace(stringValue(payload.Data["error"])); errText != "" { - m.client.EmitStreamPart(ctx, portal, turnID, agentID, payload.SessionKey, map[string]any{ - "timestamp": eventTS.UnixMilli(), - "type": "tool-output-error", - "toolCallId": toolCallID, - "errorText": errText, - "providerExecuted": true, - }) + if emitted { + return } - return } fallthrough default: @@ -2240,6 +2244,14 @@ func (m *openClawManager) handleAgentEvent(ctx context.Context, payload gatewayA } } +func shouldEmitOpenClawRawAgentData(stream string, data map[string]any) bool { + stream = strings.ToLower(strings.TrimSpace(stream)) + if stream != "assistant" { + return true + } + return strings.TrimSpace(stringutil.TrimDefault(stringValue(data["text"]), stringValue(data["delta"]))) == "" +} + func (m *openClawManager) ensureSpawnedSessionPortal(ctx context.Context, sessionKey string) { sessionKey = strings.TrimSpace(sessionKey) if sessionKey == "" { @@ -2293,6 +2305,127 @@ func openClawExtractSpawnedSessionKey(value any) string { return "" } +type openClawToolStreamUpdate struct { + Parts []map[string]any + FinalOutput any + HasFinalOutput bool +} + +func openClawBuildToolStreamUpdate(eventTS time.Time, data map[string]any) openClawToolStreamUpdate { + toolCallID := strings.TrimSpace(stringutil.TrimDefault(stringValue(data["toolCallId"]), stringutil.TrimDefault(stringValue(data["toolUseId"]), stringValue(data["id"])))) + if toolCallID == "" { + return openClawToolStreamUpdate{} + } + toolName := strings.TrimSpace(stringutil.TrimDefault(stringValue(data["toolName"]), stringutil.TrimDefault(stringValue(data["name"]), "tool"))) + if toolName == "" { + toolName = "tool" + } + base := map[string]any{ + "timestamp": eventTS.UnixMilli(), + "toolCallId": toolCallID, + "toolName": toolName, + "providerExecuted": true, + } + partWithBase := func(partType string) map[string]any { + part := jsonutil.DeepCloneMap(base) + part["type"] = partType + return part + } + + update := openClawToolStreamUpdate{} + switch strings.ToLower(strings.TrimSpace(stringValue(data["phase"]))) { + case "start": + part := partWithBase("tool-input-start") + if input, ok := openClawToolEventInput(data); ok { + part["type"] = "tool-input-available" + part["input"] = input + } + update.Parts = append(update.Parts, part) + case "update": + if output, ok := openClawToolEventPartialOutput(data); ok { + part := partWithBase("tool-output-available") + part["output"] = output + part["preliminary"] = true + update.Parts = append(update.Parts, part) + } + case "result": + if errText := openClawToolEventErrorText(data); errText != "" { + part := partWithBase("tool-output-error") + part["errorText"] = errText + update.Parts = append(update.Parts, part) + return update + } + if output, ok := openClawToolEventFinalOutput(data); ok { + part := partWithBase("tool-output-available") + part["output"] = output + update.Parts = append(update.Parts, part) + update.FinalOutput = output + update.HasFinalOutput = true + } + } + return update +} + +func openClawToolEventInput(data map[string]any) (any, bool) { + input, ok := data["args"] + if !ok || input == nil { + return nil, false + } + return jsonutil.DeepCloneAny(input), true +} + +func openClawToolEventPartialOutput(data map[string]any) (any, bool) { + output, ok := data["partialResult"] + if !ok || output == nil { + return nil, false + } + return jsonutil.DeepCloneAny(output), true +} + +func openClawToolEventFinalOutput(data map[string]any) (any, bool) { + output, ok := data["result"] + if !ok || output == nil { + return nil, false + } + return jsonutil.DeepCloneAny(output), true +} + +func openClawToolEventErrorText(data map[string]any) string { + isError, _ := data["isError"].(bool) + if !isError { + return "" + } + if text := openClawToolResultErrorText(data["result"]); text != "" { + return text + } + if text := strings.TrimSpace(stringValue(data["error"])); text != "" { + return text + } + return "OpenClaw tool failed" +} + +func openClawToolResultErrorText(result any) string { + switch typed := result.(type) { + case map[string]any: + if text := strings.TrimSpace(openclawconv.ExtractMessageText(typed)); text != "" { + return text + } + for _, key := range []string{"error", "message"} { + if text := strings.TrimSpace(stringValue(typed[key])); text != "" { + return text + } + } + for _, key := range []string{"details", "result", "output"} { + if nested := openClawToolResultErrorText(typed[key]); nested != "" { + return nested + } + } + case string: + return strings.TrimSpace(typed) + } + return "" +} + func isOpenClawSpawnedSessionKey(sessionKey string) bool { sessionKey = strings.TrimSpace(sessionKey) if sessionKey == "" { diff --git a/bridges/openclaw/manager_test.go b/bridges/openclaw/manager_test.go index aaadbda6..8f8f1e1c 100644 --- a/bridges/openclaw/manager_test.go +++ b/bridges/openclaw/manager_test.go @@ -211,18 +211,185 @@ func TestAttachApprovalContextKeepsHintsAndPendingData(t *testing.T) { _ = agentremote.ErrApprovalUnknown } -func TestOpenClawRequiredGatewayMethodsIncludeRuntimeRPCs(t *testing.T) { +func TestOpenClawRequiredGatewayMethodsCoverCoreChatSessionFlow(t *testing.T) { required := make(map[string]struct{}, len(openClawRequiredGatewayMethods)) for _, method := range openClawRequiredGatewayMethods { required[method] = struct{}{} } - for _, method := range []string{"exec.approval.resolve", "agent.wait"} { + for _, method := range []string{"sessions.list", "chat.send"} { if _, ok := required[method]; !ok { t.Fatalf("expected required gateway methods to include %q", method) } } } +func TestShouldEmitOpenClawRawAgentDataSuppressesAssistantTextSnapshots(t *testing.T) { + if shouldEmitOpenClawRawAgentData("assistant", map[string]any{"text": "pretty good"}) { + t.Fatal("expected assistant text snapshots to be suppressed") + } + if shouldEmitOpenClawRawAgentData("assistant", map[string]any{"delta": " good"}) { + t.Fatal("expected assistant delta snapshots to be suppressed") + } + if !shouldEmitOpenClawRawAgentData("assistant", map[string]any{"phase": "start"}) { + t.Fatal("expected non-text assistant payloads to remain available as raw data") + } + if !shouldEmitOpenClawRawAgentData("lifecycle", map[string]any{"phase": "start"}) { + t.Fatal("expected non-assistant streams to keep raw data") + } +} + +func TestValidateGatewayCompatibilityAllowsOptionalGaps(t *testing.T) { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html; charset=utf-8") + _, _ = w.Write([]byte("control-ui")) + })) + defer server.Close() + + mgr := newOpenClawManager(&OpenClawClient{}) + gateway := newGatewayWSClient(gatewayConnectConfig{URL: server.URL}) + gateway.hello = &gatewayHello{ + Server: map[string]any{"version": "test"}, + Features: gatewayHelloFeatures{ + Methods: []string{"sessions.list", "chat.send", "chat.history"}, + Events: []string{"chat"}, + }, + } + + report, err := mgr.validateGatewayCompatibility(context.Background(), gateway) + if err != nil { + t.Fatalf("validateGatewayCompatibility returned error: %v", err) + } + if report == nil || !report.Compatible() { + t.Fatalf("expected compatibility report to accept optional gaps, got %#v", report) + } + if !containsString(report.MissingMethods, "agents.list") { + t.Fatalf("expected optional missing methods to be reported, got %#v", report) + } + if !containsString(report.MissingEvents, "agent") { + t.Fatalf("expected optional missing events to be reported, got %#v", report) + } +} + +func TestOpenClawBuildToolStreamUpdateFromStartArgs(t *testing.T) { + update := openClawBuildToolStreamUpdate(time.UnixMilli(1_700_000_000_000), map[string]any{ + "phase": "start", + "toolCallId": "tool-1", + "name": "read", + "args": map[string]any{"path": "/tmp/example.txt"}, + }) + + if len(update.Parts) != 1 { + t.Fatalf("expected 1 part, got %#v", update.Parts) + } + part := update.Parts[0] + if part["type"] != "tool-input-available" { + t.Fatalf("unexpected part type: %#v", part) + } + if part["toolName"] != "read" || part["toolCallId"] != "tool-1" { + t.Fatalf("unexpected tool identity: %#v", part) + } + input, _ := part["input"].(map[string]any) + if input["path"] != "/tmp/example.txt" { + t.Fatalf("unexpected tool input: %#v", input) + } +} + +func TestOpenClawBuildToolStreamUpdateFromStartWithoutArgs(t *testing.T) { + update := openClawBuildToolStreamUpdate(time.UnixMilli(1_700_000_000_000), map[string]any{ + "phase": "start", + "toolCallId": "tool-2", + "name": "exec", + }) + + if len(update.Parts) != 1 { + t.Fatalf("expected 1 part, got %#v", update.Parts) + } + part := update.Parts[0] + if part["type"] != "tool-input-start" { + t.Fatalf("unexpected part type: %#v", part) + } + if part["toolName"] != "exec" || part["toolCallId"] != "tool-2" { + t.Fatalf("unexpected tool identity: %#v", part) + } +} + +func TestOpenClawBuildToolStreamUpdateFromPartialResult(t *testing.T) { + update := openClawBuildToolStreamUpdate(time.UnixMilli(1_700_000_000_000), map[string]any{ + "phase": "update", + "toolCallId": "tool-3", + "name": "fetch", + "partialResult": map[string]any{"status": "running"}, + }) + + if len(update.Parts) != 1 { + t.Fatalf("expected 1 part, got %#v", update.Parts) + } + part := update.Parts[0] + if part["type"] != "tool-output-available" { + t.Fatalf("unexpected part type: %#v", part) + } + if preliminary, _ := part["preliminary"].(bool); !preliminary { + t.Fatalf("expected preliminary output, got %#v", part) + } + output, _ := part["output"].(map[string]any) + if output["status"] != "running" { + t.Fatalf("unexpected partial output: %#v", output) + } +} + +func TestOpenClawBuildToolStreamUpdateFromFinalResult(t *testing.T) { + update := openClawBuildToolStreamUpdate(time.UnixMilli(1_700_000_000_000), map[string]any{ + "phase": "result", + "toolCallId": "tool-4", + "name": "fetch", + "result": map[string]any{"status": 200}, + }) + + if len(update.Parts) != 1 { + t.Fatalf("expected 1 part, got %#v", update.Parts) + } + part := update.Parts[0] + if part["type"] != "tool-output-available" { + t.Fatalf("unexpected part type: %#v", part) + } + if preliminary, _ := part["preliminary"].(bool); preliminary { + t.Fatalf("did not expect final output to be preliminary: %#v", part) + } + if !update.HasFinalOutput { + t.Fatalf("expected final output marker, got %#v", update) + } + output, _ := update.FinalOutput.(map[string]any) + if output["status"] != 200 { + t.Fatalf("unexpected final output: %#v", output) + } +} + +func TestOpenClawBuildToolStreamUpdateFromErrorResult(t *testing.T) { + update := openClawBuildToolStreamUpdate(time.UnixMilli(1_700_000_000_000), map[string]any{ + "phase": "result", + "toolCallId": "tool-5", + "name": "exec", + "isError": true, + "result": map[string]any{ + "error": "permission denied", + }, + }) + + if len(update.Parts) != 1 { + t.Fatalf("expected 1 part, got %#v", update.Parts) + } + part := update.Parts[0] + if part["type"] != "tool-output-error" { + t.Fatalf("unexpected part type: %#v", part) + } + if part["errorText"] != "permission denied" { + t.Fatalf("unexpected error text: %#v", part) + } + if update.HasFinalOutput { + t.Fatalf("did not expect final output on error: %#v", update) + } +} + func TestLoadAllHistoryMessagesStopsWhenCursorRepeats(t *testing.T) { var calls int server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { @@ -252,3 +419,12 @@ func TestLoadAllHistoryMessagesStopsWhenCursorRepeats(t *testing.T) { t.Fatalf("expected both fetched pages before loop exit, got %#v", messages) } } + +func containsString(values []string, needle string) bool { + for _, value := range values { + if value == needle { + return true + } + } + return false +} diff --git a/bridges/openclaw/status.go b/bridges/openclaw/status.go index 69c3f3af..38347fb1 100644 --- a/bridges/openclaw/status.go +++ b/bridges/openclaw/status.go @@ -81,12 +81,14 @@ func classifyOpenClawConnectionError(err error, retryDelay time.Duration) (statu state.UserAction = status.UserActionRestart if compatErr != nil { state.Info = map[string]any{ - "server_version": compatErr.Report.ServerVersion, - "missing_methods": compatErr.Report.MissingMethods, - "missing_events": compatErr.Report.MissingEvents, - "history_endpoint_ok": compatErr.Report.HistoryEndpointOK, - "history_endpoint_code": compatErr.Report.HistoryEndpointCode, - "history_endpoint_err": compatErr.Report.HistoryEndpointError, + "server_version": compatErr.Report.ServerVersion, + "missing_methods": compatErr.Report.MissingMethods, + "missing_events": compatErr.Report.MissingEvents, + "required_missing_methods": compatErr.Report.RequiredMissingMethods, + "required_missing_events": compatErr.Report.RequiredMissingEvents, + "history_endpoint_ok": compatErr.Report.HistoryEndpointOK, + "history_endpoint_code": compatErr.Report.HistoryEndpointCode, + "history_endpoint_err": compatErr.Report.HistoryEndpointError, } } return state, false diff --git a/bridges/openclaw/stream_test.go b/bridges/openclaw/stream_test.go index 1964dcd1..c03ccfcb 100644 --- a/bridges/openclaw/stream_test.go +++ b/bridges/openclaw/stream_test.go @@ -216,6 +216,62 @@ func TestApplyStreamPartStateLockedUpdatesLifecycleFields(t *testing.T) { } } +func TestBuildStreamDBMetadataFinalizesPreliminaryToolOutput(t *testing.T) { + turn := newOpenClawTestTurn("turn-tool-seq") + parts := []map[string]any{ + { + "type": "tool-input-available", + "toolCallId": "call-2", + "toolName": "fetch", + "input": map[string]any{"url": "https://example.com"}, + "providerExecuted": true, + }, + { + "type": "tool-output-available", + "toolCallId": "call-2", + "output": map[string]any{"status": "running"}, + "providerExecuted": true, + "preliminary": true, + }, + { + "type": "tool-output-available", + "toolCallId": "call-2", + "output": map[string]any{"status": 200}, + "providerExecuted": true, + }, + } + for _, part := range parts { + bridgesdk.ApplyStreamPart(turn, part, bridgesdk.PartApplyOptions{}) + } + + oc := &OpenClawClient{} + state := &openClawStreamState{ + turnID: "turn-tool-seq", + agentID: "main", + sessionID: "sess-1", + sessionKey: "agent:main:matrix-dm", + role: "assistant", + turn: turn, + } + meta := oc.buildStreamDBMetadata(state) + if meta == nil { + t.Fatal("expected metadata") + } + if len(meta.ToolCalls) != 1 { + t.Fatalf("expected 1 tool call, got %#v", meta.ToolCalls) + } + call := meta.ToolCalls[0] + if call.ToolName != "fetch" || call.CallID != "call-2" { + t.Fatalf("unexpected tool identity: %#v", call) + } + if call.Status != "output-available" || call.ResultStatus != "completed" { + t.Fatalf("unexpected final tool state: %#v", call) + } + if call.Output["status"] != 200 { + t.Fatalf("unexpected final tool output: %#v", call.Output) + } +} + func TestDrainAndAbortResetsMap(t *testing.T) { // Use states without real turns to avoid nil-cancel panics in unit tests. oc := newOpenClawTestClient(map[string]*openClawStreamState{ diff --git a/docker/agentremote/Dockerfile b/docker/agentremote/Dockerfile new file mode 100644 index 00000000..54086a69 --- /dev/null +++ b/docker/agentremote/Dockerfile @@ -0,0 +1,36 @@ +FROM golang:1.25-alpine3.23 AS builder + +ARG VERSION=dev +ARG COMMIT=unknown +ARG BUILD_TIME=unknown + +RUN apk add --no-cache ca-certificates git build-base + +WORKDIR /build + +COPY go.mod go.sum ./ +RUN go mod download + +COPY . . + +RUN CGO_ENABLED=1 GOOS=linux GOARCH=$TARGETARCH \ + go build \ + -tags goolm \ + -trimpath \ + -ldflags "-s -w -X main.Tag=${VERSION} -X main.Commit=${COMMIT} -X main.BuildTime=${BUILD_TIME}" \ + -o /out/agentremote \ + ./cmd/agentremote + +FROM alpine:3.23 + +RUN apk add --no-cache bash ca-certificates curl jq yq-go + +ENV HOME=/data + +WORKDIR /data +VOLUME /data + +COPY --from=builder /out/agentremote /usr/local/bin/agentremote + +ENTRYPOINT ["/usr/local/bin/agentremote"] +CMD ["help"] diff --git a/docker/agentremote/README.md b/docker/agentremote/README.md new file mode 100644 index 00000000..c2d38221 --- /dev/null +++ b/docker/agentremote/README.md @@ -0,0 +1,21 @@ +# AgentRemote Docker Image + +The AgentRemote container packages the `agentremote` CLI for Linux `amd64` and `arm64`. + +The image stores CLI state under `/data` by setting `HOME=/data`, so mounting a host directory preserves profiles, auth, and bridge instance state. + +## Usage + +```sh +docker run --rm -it \ + -v "$(pwd):/data" \ + ghcr.io/beeper/agentremote:latest help +``` + +Run a bridge command with persisted state: + +```sh +docker run --rm -it \ + -v "$(pwd):/data" \ + ghcr.io/beeper/agentremote:latest run ai +``` diff --git a/install.sh b/install.sh new file mode 100755 index 00000000..6c4a5bdd --- /dev/null +++ b/install.sh @@ -0,0 +1,187 @@ +#!/bin/sh +set -eu + +OWNER="${OWNER:-beeper}" +REPO="${REPO:-agentremote}" +VERSION="${VERSION:-latest}" +BINDIR="${BINDIR:-}" + +need_cmd() { + command -v "$1" >/dev/null 2>&1 +} + +fail() { + printf '%s\n' "error: $*" >&2 + exit 1 +} + +detect_os() { + case "$(uname -s)" in + Linux) printf '%s\n' "linux" ;; + Darwin) printf '%s\n' "darwin" ;; + *) fail "unsupported operating system: $(uname -s)" ;; + esac +} + +detect_arch() { + case "$(uname -m)" in + x86_64|amd64) printf '%s\n' "amd64" ;; + arm64|aarch64) printf '%s\n' "arm64" ;; + *) fail "unsupported architecture: $(uname -m)" ;; + esac +} + +normalize_version() { + if [ "$VERSION" = "latest" ]; then + printf '%s\n' "latest" + return + fi + case "$VERSION" in + v*) printf '%s\n' "$VERSION" ;; + *) printf 'v%s\n' "$VERSION" ;; + esac +} + +asset_name() { + version_no_v="$1" + os="$2" + arch="$3" + printf 'agentremote_v%s_%s_%s.tar.gz\n' "$version_no_v" "$os" "$arch" +} + +download() { + url="$1" + dest="$2" + if need_cmd curl; then + curl -fsSL "$url" -o "$dest" + return + fi + if need_cmd wget; then + wget -qO "$dest" "$url" + return + fi + fail "curl or wget is required" +} + +pick_bindir() { + home_dir="${HOME:-}" + + if [ -n "$BINDIR" ]; then + mkdir -p "$BINDIR" + printf '%s\n' "$BINDIR" + return + fi + + if [ -n "$home_dir" ]; then + for candidate in "$home_dir/.local/bin" "$home_dir/bin"; do + mkdir -p "$candidate" + if [ -w "$candidate" ]; then + printf '%s\n' "$candidate" + return + fi + done + fi + + if [ -d /usr/local/bin ] && [ -w /usr/local/bin ]; then + printf '%s\n' "/usr/local/bin" + return + fi + + fail "could not find a writable install directory; set BINDIR=/path/to/bin" +} + +checksum_for() { + file_name="$1" + checksums_file="$2" + awk -v target="$file_name" '$2 == target { print $1; exit }' "$checksums_file" +} + +verify_checksum() { + file_path="$1" + expected="$2" + if [ -z "$expected" ]; then + fail "missing checksum for $(basename "$file_path")" + fi + if need_cmd shasum; then + actual="$(shasum -a 256 "$file_path" | awk '{print $1}')" + elif need_cmd sha256sum; then + actual="$(sha256sum "$file_path" | awk '{print $1}')" + else + fail "shasum or sha256sum is required" + fi + if [ "$actual" != "$expected" ]; then + fail "checksum mismatch for $(basename "$file_path")" + fi +} + +install_binary() { + archive_path="$1" + dest_dir="$2" + tmp_extract="$3" + + tar -xzf "$archive_path" -C "$tmp_extract" + if [ ! -f "$tmp_extract/agentremote" ]; then + fail "release archive did not contain agentremote" + fi + + dest_path="$dest_dir/agentremote" + if need_cmd install; then + install -m 0755 "$tmp_extract/agentremote" "$dest_path" + else + cp "$tmp_extract/agentremote" "$dest_path" + chmod 0755 "$dest_path" + fi + + printf '%s\n' "$dest_path" +} + +path_hint() { + bin_dir="$1" + case ":${PATH:-}:" in + *:"$bin_dir":*) return 0 ;; + esac + printf '%s\n' "warning: $bin_dir is not on PATH" >&2 + printf '%s\n' "add this to your shell profile:" >&2 + printf '%s\n' " export PATH=\"$bin_dir:\$PATH\"" >&2 +} + +main() { + os="$(detect_os)" + arch="$(detect_arch)" + version="$(normalize_version)" + version_no_v="${version#v}" + bin_dir="$(pick_bindir)" + asset="$(asset_name "$version_no_v" "$os" "$arch")" + + tmp_dir="$(mktemp -d)" + trap 'rm -rf "$tmp_dir"' EXIT INT TERM + + case "$version" in + latest) + base_url="https://github.com/$OWNER/$REPO/releases/latest/download" + ;; + *) + base_url="https://github.com/$OWNER/$REPO/releases/download/$version" + ;; + esac + + archive_path="$tmp_dir/$asset" + checksums_path="$tmp_dir/checksums.txt" + extract_dir="$tmp_dir/extracted" + mkdir -p "$extract_dir" + + printf '%s\n' "Downloading $asset" + download "$base_url/$asset" "$archive_path" + download "$base_url/checksums.txt" "$checksums_path" + + expected_checksum="$(checksum_for "$asset" "$checksums_path")" + verify_checksum "$archive_path" "$expected_checksum" + + dest_path="$(install_binary "$archive_path" "$bin_dir" "$extract_dir")" + path_hint "$bin_dir" + + printf '%s\n' "Installed $dest_path" + "$dest_path" --version +} + +main "$@" diff --git a/sdk/part_apply.go b/sdk/part_apply.go index 5e80236b..1463f9ab 100644 --- a/sdk/part_apply.go +++ b/sdk/part_apply.go @@ -53,12 +53,13 @@ func ApplyStreamPart(turn *Turn, part map[string]any, opts PartApplyOptions) boo ProviderExecuted: app.b("providerExecuted"), }) case "tool-input-delta": - app.tools.InputDelta(app.ctx, app.s("toolCallId"), "", app.s("inputTextDelta"), app.b("providerExecuted")) + app.tools.InputDelta(app.ctx, app.s("toolCallId"), "", app.raw("inputTextDelta"), app.b("providerExecuted")) case "tool-input-available": app.tools.Input(app.ctx, app.s("toolCallId"), app.s("toolName"), app.part["input"], app.b("providerExecuted")) case "tool-output-available": app.tools.Output(app.ctx, app.s("toolCallId"), app.part["output"], ToolOutputOptions{ ProviderExecuted: app.b("providerExecuted"), + Streaming: app.b("preliminary"), }) case "tool-output-error": app.tools.OutputError(app.ctx, app.s("toolCallId"), app.s("errorText"), app.b("providerExecuted")) @@ -136,6 +137,10 @@ func (a partApplicator) s(key string) string { return strings.TrimSpace(stringValue(a.part[key])) } +func (a partApplicator) raw(key string) string { + return stringValue(a.part[key]) +} + func (a partApplicator) b(key string) bool { value, _ := a.part[key].(bool) return value @@ -157,7 +162,7 @@ func (a partApplicator) messageMetadata() { } func (a partApplicator) textDelta() { - if delta := a.s("delta"); delta != "" { + if delta := a.raw("delta"); delta != "" { a.writer.TextDelta(a.ctx, delta) return } @@ -165,7 +170,7 @@ func (a partApplicator) textDelta() { } func (a partApplicator) reasoningDelta() { - if delta := a.s("delta"); delta != "" { + if delta := a.raw("delta"); delta != "" { a.writer.ReasoningDelta(a.ctx, delta) return } diff --git a/sdk/part_apply_test.go b/sdk/part_apply_test.go new file mode 100644 index 00000000..3ccf9a1c --- /dev/null +++ b/sdk/part_apply_test.go @@ -0,0 +1,88 @@ +package sdk + +import ( + "context" + "testing" + + "maunium.net/go/mautrix/bridgev2" +) + +func newPartApplyTestTurn() *Turn { + conv := NewConversation(context.Background(), nil, nil, bridgev2.EventSender{}, &Config{}, nil) + return conv.StartTurn(context.Background(), &Agent{ID: "agent"}, nil) +} + +func TestApplyStreamPartPreservesPreliminaryToolOutput(t *testing.T) { + turn := newPartApplyTestTurn() + + ApplyStreamPart(turn, map[string]any{ + "type": "tool-input-available", + "toolCallId": "call-1", + "toolName": "fetch", + "input": map[string]any{"url": "https://example.com"}, + "providerExecuted": true, + }, PartApplyOptions{}) + ApplyStreamPart(turn, map[string]any{ + "type": "tool-output-available", + "toolCallId": "call-1", + "output": map[string]any{"status": "running"}, + "providerExecuted": true, + "preliminary": true, + }, PartApplyOptions{}) + + ui := turn.UIState().UIMessage + parts, _ := ui["parts"].([]any) + if len(parts) != 1 { + t.Fatalf("expected 1 UI part, got %#v", parts) + } + part, _ := parts[0].(map[string]any) + if part["state"] != "output-available" { + t.Fatalf("unexpected tool state: %#v", part) + } + if preliminary, _ := part["preliminary"].(bool); !preliminary { + t.Fatalf("expected preliminary flag, got %#v", part) + } + output, _ := part["output"].(map[string]any) + if output["status"] != "running" { + t.Fatalf("unexpected preliminary output: %#v", output) + } +} + +func TestApplyStreamPartFinalOutputClearsPreliminaryFlag(t *testing.T) { + turn := newPartApplyTestTurn() + + ApplyStreamPart(turn, map[string]any{ + "type": "tool-input-available", + "toolCallId": "call-2", + "toolName": "fetch", + "input": map[string]any{"url": "https://example.com"}, + "providerExecuted": true, + }, PartApplyOptions{}) + ApplyStreamPart(turn, map[string]any{ + "type": "tool-output-available", + "toolCallId": "call-2", + "output": map[string]any{"status": "running"}, + "providerExecuted": true, + "preliminary": true, + }, PartApplyOptions{}) + ApplyStreamPart(turn, map[string]any{ + "type": "tool-output-available", + "toolCallId": "call-2", + "output": map[string]any{"status": 200}, + "providerExecuted": true, + }, PartApplyOptions{}) + + ui := turn.UIState().UIMessage + parts, _ := ui["parts"].([]any) + if len(parts) != 1 { + t.Fatalf("expected 1 UI part, got %#v", parts) + } + part, _ := parts[0].(map[string]any) + if preliminary, ok := part["preliminary"].(bool); ok && preliminary { + t.Fatalf("did not expect preliminary flag after final output: %#v", part) + } + output, _ := part["output"].(map[string]any) + if output["status"] != 200 { + t.Fatalf("unexpected final output: %#v", output) + } +} diff --git a/sdk/turn_test.go b/sdk/turn_test.go index dee3e5b8..30da7951 100644 --- a/sdk/turn_test.go +++ b/sdk/turn_test.go @@ -537,6 +537,17 @@ func TestTurnBuildFinalEditDefaultsToGenericBodyForArtifacts(t *testing.T) { } } +func TestApplyStreamPartPreservesWhitespaceTextDelta(t *testing.T) { + turn := newTurn(context.Background(), nil, nil, nil) + + ApplyStreamPart(turn, map[string]any{"type": "text-delta", "delta": "pretty"}, PartApplyOptions{}) + ApplyStreamPart(turn, map[string]any{"type": "text-delta", "delta": " good"}, PartApplyOptions{}) + + if got := turn.VisibleText(); got != "pretty good" { + t.Fatalf("expected visible text to preserve leading whitespace in deltas, got %q", got) + } +} + func TestTurnSuppressFinalEditSkipsAutomaticPayload(t *testing.T) { turn := newTurn(context.Background(), nil, nil, nil) turn.initialEventID = id.EventID("$event-suppressed") diff --git a/tools/generate-homebrew-cask.sh b/tools/generate-homebrew-cask.sh new file mode 100755 index 00000000..5cb60692 --- /dev/null +++ b/tools/generate-homebrew-cask.sh @@ -0,0 +1,69 @@ +#!/bin/sh +set -eu + +VERSION="${1:?usage: tools/generate-homebrew-cask.sh }" +CHECKSUMS_FILE="${2:?usage: tools/generate-homebrew-cask.sh }" + +version_no_v="${VERSION#v}" + +checksum_for() { + target="$1" + awk -v file_name="$target" '$2 == file_name { print $1; exit }' "$CHECKSUMS_FILE" +} + +darwin_amd64_asset="agentremote_v${version_no_v}_darwin_amd64.tar.gz" +darwin_arm64_asset="agentremote_v${version_no_v}_darwin_arm64.tar.gz" +linux_amd64_asset="agentremote_v${version_no_v}_linux_amd64.tar.gz" +linux_arm64_asset="agentremote_v${version_no_v}_linux_arm64.tar.gz" + +darwin_amd64_sha="$(checksum_for "$darwin_amd64_asset")" +darwin_arm64_sha="$(checksum_for "$darwin_arm64_asset")" +linux_amd64_sha="$(checksum_for "$linux_amd64_asset")" +linux_arm64_sha="$(checksum_for "$linux_arm64_asset")" + +for value in "$darwin_amd64_sha" "$darwin_arm64_sha" "$linux_amd64_sha" "$linux_arm64_sha"; do + if [ -z "$value" ]; then + printf '%s\n' "error: missing checksum entry in $CHECKSUMS_FILE" >&2 + exit 1 + fi +done + +cat < Date: Sun, 22 Mar 2026 21:19:47 +0100 Subject: [PATCH 4/8] sync --- bridges/openclaw/config.go | 10 +- bridges/openclaw/connector.go | 38 +- bridges/openclaw/discovery.go | 422 +++++++++++++++++++++ bridges/openclaw/discovery_provisioning.go | 151 ++++++++ bridges/openclaw/discovery_test.go | 118 ++++++ bridges/openclaw/example-config.yaml | 7 + bridges/openclaw/login.go | 177 +++------ bridges/openclaw/login_prefill.go | 71 ++++ bridges/openclaw/login_test.go | 152 ++++---- bridges/openclaw/metadata.go | 1 - 10 files changed, 934 insertions(+), 213 deletions(-) create mode 100644 bridges/openclaw/discovery.go create mode 100644 bridges/openclaw/discovery_provisioning.go create mode 100644 bridges/openclaw/discovery_test.go create mode 100644 bridges/openclaw/login_prefill.go diff --git a/bridges/openclaw/config.go b/bridges/openclaw/config.go index 93ea440d..23901357 100644 --- a/bridges/openclaw/config.go +++ b/bridges/openclaw/config.go @@ -19,7 +19,15 @@ type Config struct { } type OpenClawConfig struct { - Enabled *bool `yaml:"enabled"` + Enabled *bool `yaml:"enabled"` + Discovery OpenClawDiscoveryConfig `yaml:"discovery"` +} + +type OpenClawDiscoveryConfig struct { + Enabled *bool `yaml:"enabled"` + TimeoutMS int `yaml:"timeout_ms"` + WideAreaDomain string `yaml:"wide_area_domain"` + PrefillTTLSeconds int `yaml:"prefill_ttl_seconds"` } func upgradeConfig(_ configupgrade.Helper) {} diff --git a/bridges/openclaw/connector.go b/bridges/openclaw/connector.go index 77f3143e..a497397d 100644 --- a/bridges/openclaw/connector.go +++ b/bridges/openclaw/connector.go @@ -3,10 +3,12 @@ package openclaw import ( "context" "sync" + "time" "go.mau.fi/util/configupgrade" "maunium.net/go/mautrix/bridgev2" "maunium.net/go/mautrix/bridgev2/networkid" + "maunium.net/go/mautrix/id" "github.com/beeper/agentremote" bridgesdk "github.com/beeper/agentremote/sdk" @@ -25,6 +27,16 @@ type OpenClawConnector struct { clientsMu sync.Mutex clients map[networkid.UserLoginID]bridgev2.NetworkAPI + + prefillsMu sync.Mutex + prefills map[string]openClawLoginPrefill +} + +type openClawLoginPrefill struct { + UserMXID id.UserID + URL string + Label string + ExpiresAt time.Time } func NewConnector() *OpenClawConnector { @@ -42,6 +54,14 @@ func NewConnector() *OpenClawConnector { StartConnector: func(_ context.Context, _ *bridgev2.Bridge) error { bridgesdk.ApplyDefaultCommandPrefix(&oc.Config.Bridge.CommandPrefix, "!openclaw") bridgesdk.ApplyBoolDefault(&oc.Config.OpenClaw.Enabled, true) + bridgesdk.ApplyBoolDefault(&oc.Config.OpenClaw.Discovery.Enabled, true) + if oc.Config.OpenClaw.Discovery.TimeoutMS <= 0 { + oc.Config.OpenClaw.Discovery.TimeoutMS = 2000 + } + if oc.Config.OpenClaw.Discovery.PrefillTTLSeconds <= 0 { + oc.Config.OpenClaw.Discovery.PrefillTTLSeconds = 300 + } + oc.initProvisioning() return nil }, DisplayName: "OpenClaw Bridge", @@ -79,10 +99,22 @@ func NewConnector() *OpenClawConnector { Description: "Create a login for an OpenClaw gateway.", }), CreateLogin: func(_ context.Context, user *bridgev2.User, flowID string) (bridgev2.LoginProcess, error) { - if err := agentremote.ValidateSingleLoginFlow(flowID, ProviderOpenClaw, oc.openClawEnabled()); err != nil { - return nil, err + if !oc.openClawEnabled() { + return nil, bridgev2.ErrInvalidLoginFlowID + } + if flowID == ProviderOpenClaw { + return &OpenClawLogin{User: user, Connector: oc}, nil + } + prefill, ok := oc.loginPrefill(flowID, user) + if !ok { + return nil, bridgev2.ErrInvalidLoginFlowID } - return &OpenClawLogin{User: user, Connector: oc}, nil + return &OpenClawLogin{ + User: user, + Connector: oc, + prefillURL: prefill.URL, + prefillLabel: prefill.Label, + }, nil }, }) oc.ConnectorBase = bridgesdk.NewConnectorBase(oc.sdkConfig) diff --git a/bridges/openclaw/discovery.go b/bridges/openclaw/discovery.go new file mode 100644 index 00000000..80952a1f --- /dev/null +++ b/bridges/openclaw/discovery.go @@ -0,0 +1,422 @@ +package openclaw + +import ( + "bytes" + "context" + "errors" + "fmt" + "os/exec" + "regexp" + "runtime" + "slices" + "strconv" + "strings" + "time" +) + +const openClawGatewayServiceType = "_openclaw-gw._tcp" + +type openClawDiscoveredGateway struct { + StableID string + Source string + Domain string + InstanceName string + DisplayName string + GatewayURL string + ServiceHost string + ServicePort int + LanHost string + TailnetDNS string + GatewayTLS bool + GatewayTLSFingerprintSHA256 string + SSHPort int + CLIPath string +} + +type openClawDiscoveryOptions struct { + Timeout time.Duration + WideAreaEnabled bool + WideAreaDomain string +} + +type gatewayBonjourBeacon struct { + InstanceName string + Domain string + DisplayName string + Host string + Port int + LanHost string + TailnetDNS string + GatewayPort int + SSHPort int + GatewayTLS bool + GatewayTLSFingerprintSHA256 string + CLIPath string +} + +type discoveryCommandRunner func(ctx context.Context, name string, args ...string) (stdout string, stderr string, err error) + +func defaultDiscoveryCommandRunner(ctx context.Context, name string, args ...string) (string, string, error) { + cmd := exec.CommandContext(ctx, name, args...) + var stdout bytes.Buffer + var stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + err := cmd.Run() + return stdout.String(), stderr.String(), err +} + +func normalizeDiscoveryTimeout(timeout time.Duration) time.Duration { + if timeout <= 0 { + return 2 * time.Second + } + return timeout +} + +func normalizeServiceDomain(raw string) string { + trimmed := strings.ToLower(strings.TrimSpace(raw)) + if trimmed == "" || trimmed == "local" || trimmed == "local." { + return "local." + } + if strings.HasSuffix(trimmed, ".") { + return trimmed + } + return trimmed + "." +} + +func discoveryDomains(opts openClawDiscoveryOptions) []string { + domains := []string{"local."} + if opts.WideAreaEnabled { + if wide := normalizeServiceDomain(opts.WideAreaDomain); wide != "local." { + domains = append(domains, wide) + } + } + return domains +} + +func discoverOpenClawGateways(ctx context.Context, opts openClawDiscoveryOptions) ([]openClawDiscoveredGateway, error) { + return discoverOpenClawGatewaysWithRunner(ctx, opts, defaultDiscoveryCommandRunner) +} + +func discoverOpenClawGatewaysWithRunner(ctx context.Context, opts openClawDiscoveryOptions, run discoveryCommandRunner) ([]openClawDiscoveredGateway, error) { + timeout := normalizeDiscoveryTimeout(opts.Timeout) + if ctx == nil { + ctx = context.Background() + } + var ( + beacons []gatewayBonjourBeacon + firstErr error + ) + for _, domain := range discoveryDomains(opts) { + discoverCtx, cancel := context.WithTimeout(ctx, timeout) + var domainBeacons []gatewayBonjourBeacon + var err error + switch runtime.GOOS { + case "darwin": + domainBeacons, err = discoverViaDNSSD(discoverCtx, domain, run) + case "linux": + domainBeacons, err = discoverViaAvahi(discoverCtx, domain, run) + default: + cancel() + return nil, nil + } + cancel() + if err != nil && firstErr == nil { + firstErr = err + } + beacons = append(beacons, domainBeacons...) + } + results := dedupeDiscoveredGateways(mapDiscoveredGateways(beacons)) + if len(results) == 0 { + return nil, firstErr + } + return results, nil +} + +func mapDiscoveredGateways(beacons []gatewayBonjourBeacon) []openClawDiscoveredGateway { + out := make([]openClawDiscoveredGateway, 0, len(beacons)) + for _, beacon := range beacons { + host := strings.TrimSpace(beacon.Host) + if host == "" { + host = strings.TrimSpace(beacon.TailnetDNS) + } + if host == "" { + host = strings.TrimSpace(beacon.LanHost) + } + port := beacon.Port + if port <= 0 { + port = beacon.GatewayPort + } + if host == "" || port <= 0 { + continue + } + scheme := "ws" + if beacon.GatewayTLS { + scheme = "wss" + } + domain := normalizeServiceDomain(beacon.Domain) + source := "mdns" + if domain != "local." { + source = "wide_area" + } + displayName := strings.TrimSpace(beacon.DisplayName) + if displayName == "" { + displayName = strings.TrimSpace(beacon.InstanceName) + } + stableID := fmt.Sprintf("%s|%s|%s|%s|%d", source, domain, strings.TrimSpace(beacon.InstanceName), host, port) + out = append(out, openClawDiscoveredGateway{ + StableID: stableID, + Source: source, + Domain: domain, + InstanceName: strings.TrimSpace(beacon.InstanceName), + DisplayName: displayName, + GatewayURL: fmt.Sprintf("%s://%s:%d", scheme, host, port), + ServiceHost: strings.TrimSpace(beacon.Host), + ServicePort: beacon.Port, + LanHost: strings.TrimSpace(beacon.LanHost), + TailnetDNS: strings.TrimSpace(beacon.TailnetDNS), + GatewayTLS: beacon.GatewayTLS, + GatewayTLSFingerprintSHA256: strings.TrimSpace(beacon.GatewayTLSFingerprintSHA256), + SSHPort: beacon.SSHPort, + CLIPath: strings.TrimSpace(beacon.CLIPath), + }) + } + return out +} + +func dedupeDiscoveredGateways(gateways []openClawDiscoveredGateway) []openClawDiscoveredGateway { + if len(gateways) == 0 { + return nil + } + seen := make(map[string]struct{}, len(gateways)) + out := make([]openClawDiscoveredGateway, 0, len(gateways)) + for _, gateway := range gateways { + if gateway.StableID == "" { + continue + } + if _, ok := seen[gateway.StableID]; ok { + continue + } + seen[gateway.StableID] = struct{}{} + out = append(out, gateway) + } + slices.SortFunc(out, func(a, b openClawDiscoveredGateway) int { + if cmp := strings.Compare(strings.ToLower(a.DisplayName), strings.ToLower(b.DisplayName)); cmp != 0 { + return cmp + } + return strings.Compare(a.GatewayURL, b.GatewayURL) + }) + return out +} + +func discoverViaDNSSD(ctx context.Context, domain string, run discoveryCommandRunner) ([]gatewayBonjourBeacon, error) { + if _, err := exec.LookPath("dns-sd"); err != nil { + return nil, nil + } + stdout, _, browseErr := run(ctx, "dns-sd", "-B", openClawGatewayServiceType, domain) + instances := parseDnsSdBrowse(stdout) + if len(instances) == 0 { + return nil, browseErr + } + results := make([]gatewayBonjourBeacon, 0, len(instances)) + for _, instance := range instances { + resolveCtx, cancel := context.WithTimeout(ctx, time.Second) + resolveStdout, _, err := run(resolveCtx, "dns-sd", "-L", instance, openClawGatewayServiceType, domain) + cancel() + if err != nil && strings.TrimSpace(resolveStdout) == "" { + continue + } + beacon, ok := parseDnsSdResolve(resolveStdout, instance, domain) + if ok { + results = append(results, beacon) + } + } + if len(results) == 0 { + return nil, browseErr + } + return results, nil +} + +func discoverViaAvahi(ctx context.Context, domain string, run discoveryCommandRunner) ([]gatewayBonjourBeacon, error) { + if _, err := exec.LookPath("avahi-browse"); err != nil { + return nil, nil + } + args := []string{"-rt", openClawGatewayServiceType} + if domain != "" && domain != "local." { + args = append(args, "-d", strings.TrimSuffix(domain, ".")) + } + stdout, _, err := run(ctx, "avahi-browse", args...) + results := parseAvahiBrowse(stdout, domain) + if len(results) == 0 { + return nil, err + } + return results, nil +} + +func decodeDnsSdEscapes(value string) string { + var out strings.Builder + for i := 0; i < len(value); i++ { + if value[i] == '\\' && i+3 < len(value) { + escaped := value[i+1 : i+4] + if escaped[0] >= '0' && escaped[0] <= '9' && escaped[1] >= '0' && escaped[1] <= '9' && escaped[2] >= '0' && escaped[2] <= '9' { + if b, err := strconv.Atoi(escaped); err == nil && b >= 0 && b <= 255 { + out.WriteByte(byte(b)) + i += 3 + continue + } + } + } + out.WriteByte(value[i]) + } + return out.String() +} + +func parseTxtTokens(tokens []string) map[string]string { + txt := make(map[string]string, len(tokens)) + for _, token := range tokens { + idx := strings.Index(token, "=") + if idx <= 0 { + continue + } + key := strings.TrimSpace(token[:idx]) + value := decodeDnsSdEscapes(strings.TrimSpace(token[idx+1:])) + if key == "" { + continue + } + txt[key] = value + } + return txt +} + +func parseDnsSdBrowse(stdout string) []string { + instances := make([]string, 0, 4) + seen := make(map[string]struct{}) + re := regexp.MustCompile(`_openclaw-gw\._tcp\.?\s+(.+)$`) + for _, raw := range strings.Split(stdout, "\n") { + line := strings.TrimSpace(raw) + if line == "" || !strings.Contains(line, openClawGatewayServiceType) || !strings.Contains(line, "Add") { + continue + } + match := re.FindStringSubmatch(line) + if len(match) < 2 { + continue + } + instance := decodeDnsSdEscapes(strings.TrimSpace(match[1])) + if instance == "" { + continue + } + if _, ok := seen[instance]; ok { + continue + } + seen[instance] = struct{}{} + instances = append(instances, instance) + } + return instances +} + +func parseDnsSdResolve(stdout, instanceName, domain string) (gatewayBonjourBeacon, bool) { + beacon := gatewayBonjourBeacon{ + InstanceName: decodeDnsSdEscapes(strings.TrimSpace(instanceName)), + Domain: domain, + } + var txt map[string]string + reachability := regexp.MustCompile(`can be reached at\s+([^\s:]+):(\d+)`) + for _, raw := range strings.Split(stdout, "\n") { + line := strings.TrimSpace(raw) + if line == "" { + continue + } + if match := reachability.FindStringSubmatch(line); len(match) == 3 { + beacon.Host = strings.TrimSuffix(strings.TrimSpace(match[1]), ".") + beacon.Port, _ = strconv.Atoi(match[2]) + continue + } + if strings.HasPrefix(line, "txt") || strings.Contains(line, "txtvers=") { + txt = parseTxtTokens(strings.Fields(line)) + } + } + applyTxtToBeacon(&beacon, txt) + if beacon.DisplayName == "" { + beacon.DisplayName = beacon.InstanceName + } + return beacon, beacon.DisplayName != "" || beacon.Host != "" +} + +func parseAvahiBrowse(stdout, domain string) []gatewayBonjourBeacon { + results := make([]gatewayBonjourBeacon, 0, 4) + var current *gatewayBonjourBeacon + for _, raw := range strings.Split(stdout, "\n") { + line := strings.TrimRight(raw, "\r") + if strings.TrimSpace(line) == "" { + continue + } + if strings.HasPrefix(line, "=") && strings.Contains(line, openClawGatewayServiceType) { + if current != nil { + results = append(results, *current) + } + idx := strings.Index(line, " "+openClawGatewayServiceType) + left := strings.TrimSpace(line) + if idx >= 0 { + left = strings.TrimSpace(line[:idx]) + } + parts := strings.Fields(left) + instanceName := left + if len(parts) > 3 { + instanceName = strings.Join(parts[3:], " ") + } + current = &gatewayBonjourBeacon{ + InstanceName: strings.TrimSpace(instanceName), + DisplayName: strings.TrimSpace(instanceName), + Domain: domain, + } + continue + } + if current == nil { + continue + } + trimmed := strings.TrimSpace(line) + switch { + case strings.HasPrefix(trimmed, "hostname ="): + if match := regexp.MustCompile(`hostname\s*=\s*\[([^\]]+)\]`).FindStringSubmatch(trimmed); len(match) == 2 { + current.Host = strings.TrimSpace(match[1]) + } + case strings.HasPrefix(trimmed, "port ="): + if match := regexp.MustCompile(`port\s*=\s*\[(\d+)\]`).FindStringSubmatch(trimmed); len(match) == 2 { + current.Port, _ = strconv.Atoi(match[1]) + } + case strings.HasPrefix(trimmed, "txt ="): + matches := regexp.MustCompile(`"([^"]*)"`).FindAllStringSubmatch(trimmed, -1) + tokens := make([]string, 0, len(matches)) + for _, match := range matches { + if len(match) == 2 { + tokens = append(tokens, match[1]) + } + } + applyTxtToBeacon(current, parseTxtTokens(tokens)) + } + } + if current != nil { + results = append(results, *current) + } + return results +} + +func applyTxtToBeacon(beacon *gatewayBonjourBeacon, txt map[string]string) { + if beacon == nil || len(txt) == 0 { + return + } + if value := strings.TrimSpace(txt["displayName"]); value != "" { + beacon.DisplayName = value + } + beacon.LanHost = strings.TrimSpace(txt["lanHost"]) + beacon.TailnetDNS = strings.TrimSpace(txt["tailnetDns"]) + beacon.CLIPath = strings.TrimSpace(txt["cliPath"]) + beacon.GatewayPort, _ = strconv.Atoi(strings.TrimSpace(txt["gatewayPort"])) + beacon.SSHPort, _ = strconv.Atoi(strings.TrimSpace(txt["sshPort"])) + if raw := strings.ToLower(strings.TrimSpace(txt["gatewayTls"])); raw == "1" || raw == "true" || raw == "yes" { + beacon.GatewayTLS = true + } + beacon.GatewayTLSFingerprintSHA256 = strings.TrimSpace(txt["gatewayTlsSha256"]) +} + +var errWideAreaDomainRequired = errors.New("wide-area discovery requested but no wide-area domain is configured") diff --git a/bridges/openclaw/discovery_provisioning.go b/bridges/openclaw/discovery_provisioning.go new file mode 100644 index 00000000..e925a0f3 --- /dev/null +++ b/bridges/openclaw/discovery_provisioning.go @@ -0,0 +1,151 @@ +package openclaw + +import ( + "errors" + "net/http" + "strconv" + "strings" + "time" + + "github.com/rs/zerolog" + "go.mau.fi/util/exhttp" + "maunium.net/go/mautrix" + "maunium.net/go/mautrix/bridgev2" +) + +type openClawDiscoveryProvisioningAPI struct { + log zerolog.Logger + connector *OpenClawConnector + prov bridgev2.IProvisioningAPI +} + +type openClawDiscoveryGatewayResponse struct { + StableID string `json:"stable_id"` + Source string `json:"source"` + Domain string `json:"domain"` + DisplayName string `json:"display_name"` + GatewayURL string `json:"gateway_url"` + ServiceHost string `json:"service_host,omitempty"` + ServicePort int `json:"service_port,omitempty"` + LanHost string `json:"lan_host,omitempty"` + TailnetDNS string `json:"tailnet_dns,omitempty"` + GatewayTLS bool `json:"gateway_tls,omitempty"` + GatewayTLSFingerprintSHA256 string `json:"gateway_tls_fingerprint_sha256,omitempty"` + SSHPort int `json:"ssh_port,omitempty"` + CLIPath string `json:"cli_path,omitempty"` + FlowID string `json:"flow_id"` + FlowExpiresAtMS int64 `json:"flow_expires_at_ms"` + LoginPrefill openClawDiscoveryLoginPrefill `json:"login_prefill"` +} + +type openClawDiscoveryLoginPrefill struct { + URL string `json:"url"` + Label string `json:"label,omitempty"` +} + +func (oc *OpenClawConnector) initProvisioning() { + c, ok := oc.br.Matrix.(bridgev2.MatrixConnectorWithProvisioning) + if !ok { + return + } + prov := c.GetProvisioning() + r := prov.GetRouter() + if r == nil { + return + } + api := &openClawDiscoveryProvisioningAPI{ + log: oc.br.Log.With().Str("component", "provisioning").Str("bridge", "openclaw").Logger(), + connector: oc, + prov: prov, + } + r.HandleFunc("GET /v1/discovery/gateways", api.handleListDiscoveredGateways) +} + +func (oc *OpenClawConnector) discoveryEnabled() bool { + return oc == nil || oc.Config.OpenClaw.Discovery.Enabled == nil || *oc.Config.OpenClaw.Discovery.Enabled +} + +func (api *openClawDiscoveryProvisioningAPI) handleListDiscoveredGateways(w http.ResponseWriter, r *http.Request) { + if api == nil || api.connector == nil || !api.connector.discoveryEnabled() { + mautrix.MForbidden.WithMessage("OpenClaw discovery is disabled.").Write(w) + return + } + user := api.prov.GetUser(r) + if user == nil { + mautrix.MForbidden.WithMessage("Missing provisioning user context.").Write(w) + return + } + opts, err := api.discoveryOptions(r) + if err != nil { + mautrix.MInvalidParam.WithMessage("%s", err).Write(w) + return + } + gateways, err := discoverOpenClawGateways(r.Context(), opts) + if err != nil { + mautrix.MUnknown.WithMessage("Couldn't discover gateways: %v.", err).Write(w) + return + } + items := make([]openClawDiscoveryGatewayResponse, 0, len(gateways)) + for _, gateway := range gateways { + flowID, expiresAt := api.connector.registerLoginPrefill(user, gateway.GatewayURL, gateway.DisplayName) + items = append(items, openClawDiscoveryGatewayResponse{ + StableID: gateway.StableID, + Source: gateway.Source, + Domain: gateway.Domain, + DisplayName: gateway.DisplayName, + GatewayURL: gateway.GatewayURL, + ServiceHost: gateway.ServiceHost, + ServicePort: gateway.ServicePort, + LanHost: gateway.LanHost, + TailnetDNS: gateway.TailnetDNS, + GatewayTLS: gateway.GatewayTLS, + GatewayTLSFingerprintSHA256: gateway.GatewayTLSFingerprintSHA256, + SSHPort: gateway.SSHPort, + CLIPath: gateway.CLIPath, + FlowID: flowID, + FlowExpiresAtMS: expiresAt.UnixMilli(), + LoginPrefill: openClawDiscoveryLoginPrefill{ + URL: gateway.GatewayURL, + Label: gateway.DisplayName, + }, + }) + } + exhttp.WriteJSONResponse(w, http.StatusOK, map[string]any{"gateways": items}) +} + +func (api *openClawDiscoveryProvisioningAPI) discoveryOptions(r *http.Request) (openClawDiscoveryOptions, error) { + timeout := time.Duration(api.connector.Config.OpenClaw.Discovery.TimeoutMS) * time.Millisecond + if raw := strings.TrimSpace(r.URL.Query().Get("timeout_ms")); raw != "" { + value, err := strconv.Atoi(raw) + if err != nil || value <= 0 { + return openClawDiscoveryOptions{}, errors.New("timeout_ms must be a positive integer") + } + if value > 10_000 { + value = 10_000 + } + timeout = time.Duration(value) * time.Millisecond + } + mode := strings.ToLower(strings.TrimSpace(r.URL.Query().Get("wide_area"))) + wideAreaDomain := strings.TrimSpace(api.connector.Config.OpenClaw.Discovery.WideAreaDomain) + switch mode { + case "", "auto": + return openClawDiscoveryOptions{ + Timeout: timeout, + WideAreaEnabled: wideAreaDomain != "", + WideAreaDomain: wideAreaDomain, + }, nil + case "off", "false", "0": + return openClawDiscoveryOptions{Timeout: timeout}, nil + case "on", "true", "1": + if wideAreaDomain == "" { + return openClawDiscoveryOptions{}, errWideAreaDomainRequired + } + return openClawDiscoveryOptions{ + Timeout: timeout, + WideAreaEnabled: true, + WideAreaDomain: wideAreaDomain, + }, nil + default: + return openClawDiscoveryOptions{}, errors.New("invalid wide_area mode") + } +} diff --git a/bridges/openclaw/discovery_test.go b/bridges/openclaw/discovery_test.go new file mode 100644 index 00000000..69212335 --- /dev/null +++ b/bridges/openclaw/discovery_test.go @@ -0,0 +1,118 @@ +package openclaw + +import ( + "net/http/httptest" + "testing" + "time" + + "maunium.net/go/mautrix/bridgev2" + "maunium.net/go/mautrix/bridgev2/database" + "maunium.net/go/mautrix/id" +) + +func TestRegisterLoginPrefillIsUserScopedAndExpires(t *testing.T) { + connector := &OpenClawConnector{ + Config: Config{ + OpenClaw: OpenClawConfig{ + Discovery: OpenClawDiscoveryConfig{ + PrefillTTLSeconds: 1, + }, + }, + }, + } + user := &bridgev2.User{User: &database.User{MXID: id.UserID("@alice:example.com")}} + otherUser := &bridgev2.User{User: &database.User{MXID: id.UserID("@bob:example.com")}} + + flowID, expiresAt := connector.registerLoginPrefill(user, "wss://gateway.local:443", "Studio") + if flowID == "" { + t.Fatal("expected a generated flow id") + } + if expiresAt.IsZero() { + t.Fatal("expected a non-zero expiry") + } + + prefill, ok := connector.loginPrefill(flowID, user) + if !ok { + t.Fatal("expected prefill to be available for original user") + } + if prefill.URL != "wss://gateway.local:443" || prefill.Label != "Studio" { + t.Fatalf("unexpected prefill: %#v", prefill) + } + if _, ok := connector.loginPrefill(flowID, otherUser); ok { + t.Fatal("expected prefill lookup for another user to fail") + } + + connector.prefillsMu.Lock() + connector.prefills[flowID] = openClawLoginPrefill{ + UserMXID: user.MXID, + URL: prefill.URL, + Label: prefill.Label, + ExpiresAt: time.Now().Add(-time.Second), + } + connector.prefillsMu.Unlock() + if _, ok := connector.loginPrefill(flowID, user); ok { + t.Fatal("expected expired prefill to be pruned") + } +} + +func TestMapDiscoveredGatewaysPrefersResolvedEndpointAndTLS(t *testing.T) { + results := mapDiscoveredGateways([]gatewayBonjourBeacon{ + { + InstanceName: "Office", + Domain: "local.", + DisplayName: "Office", + Host: "gateway.local", + Port: 443, + LanHost: "192.168.1.22", + TailnetDNS: "gateway.tailnet.ts.net", + GatewayTLS: true, + }, + }) + if len(results) != 1 { + t.Fatalf("unexpected discovery result count: %d", len(results)) + } + if results[0].GatewayURL != "wss://gateway.local:443" { + t.Fatalf("unexpected gateway url: %q", results[0].GatewayURL) + } + if results[0].Source != "mdns" { + t.Fatalf("unexpected source: %q", results[0].Source) + } +} + +func TestProvisioningDiscoveryOptions(t *testing.T) { + api := &openClawDiscoveryProvisioningAPI{ + connector: &OpenClawConnector{ + Config: Config{ + OpenClaw: OpenClawConfig{ + Discovery: OpenClawDiscoveryConfig{ + TimeoutMS: 2000, + WideAreaDomain: "tail.example.com", + }, + }, + }, + }, + } + + req := httptest.NewRequest("GET", "/v1/discovery/gateways?timeout_ms=1500&wide_area=on", nil) + opts, err := api.discoveryOptions(req) + if err != nil { + t.Fatalf("discoveryOptions returned error: %v", err) + } + if opts.Timeout != 1500*time.Millisecond { + t.Fatalf("unexpected timeout: %v", opts.Timeout) + } + if !opts.WideAreaEnabled || opts.WideAreaDomain != "tail.example.com" { + t.Fatalf("unexpected wide-area options: %#v", opts) + } + + req = httptest.NewRequest("GET", "/v1/discovery/gateways?timeout_ms=0", nil) + if _, err := api.discoveryOptions(req); err == nil { + t.Fatal("expected invalid timeout to fail") + } + + api.connector.Config.OpenClaw.Discovery.WideAreaDomain = "" + req = httptest.NewRequest("GET", "/v1/discovery/gateways?wide_area=on", nil) + if _, err := api.discoveryOptions(req); err == nil { + t.Fatal("expected wide_area=on without configured domain to fail") + } +} diff --git a/bridges/openclaw/example-config.yaml b/bridges/openclaw/example-config.yaml index 54e05c75..23d27677 100644 --- a/bridges/openclaw/example-config.yaml +++ b/bridges/openclaw/example-config.yaml @@ -2,3 +2,10 @@ bridge: command_prefix: "!openclaw" openclaw: enabled: true + discovery: + enabled: true + timeout_ms: 2000 + # Optional. When set, clients can request wide-area discovery in addition to local mDNS. + wide_area_domain: "" + # Ephemeral prefilled login flow lifetime returned by discovery responses. + prefill_ttl_seconds: 300 diff --git a/bridges/openclaw/login.go b/bridges/openclaw/login.go index dcc06d57..88ebd3c6 100644 --- a/bridges/openclaw/login.go +++ b/bridges/openclaw/login.go @@ -20,20 +20,14 @@ var ( _ bridgev2.LoginProcessDisplayAndWait = (*OpenClawLogin)(nil) ) -const openClawLoginStepCredentials = "io.ai-bridge.openclaw.enter_credentials" - const ( - openClawLoginStepAuthMode = "io.ai-bridge.openclaw.choose_auth_mode" - openClawLoginStepCredentialsNoAuth = "io.ai-bridge.openclaw.enter_credentials.none" - openClawLoginStepCredentialsToken = "io.ai-bridge.openclaw.enter_credentials.token" - openClawLoginStepCredentialsPass = "io.ai-bridge.openclaw.enter_credentials.password" - openClawLoginStepPairingWait = "io.ai-bridge.openclaw.wait_for_pairing" + openClawLoginStepCredentials = "io.ai-bridge.openclaw.enter_credentials" + openClawLoginStepPairingWait = "io.ai-bridge.openclaw.wait_for_pairing" ) type openClawLoginState string const ( - openClawLoginStateAuthMode openClawLoginState = "auth_mode" openClawLoginStateCredentials openClawLoginState = "credentials" openClawLoginStatePairingWait openClawLoginState = "pairing_wait" ) @@ -49,7 +43,6 @@ const ( type openClawPendingLogin struct { gatewayURL string - authMode string token string password string label string @@ -61,14 +54,15 @@ type OpenClawLogin struct { User *bridgev2.User Connector *OpenClawConnector - step openClawLoginState - authMode string - pending *openClawPendingLogin - waitUntil time.Time - preflight func(context.Context, string, string, string) (string, error) - pollEvery time.Duration - returnWait time.Duration - waitFor time.Duration + step openClawLoginState + pending *openClawPendingLogin + waitUntil time.Time + prefillURL string + prefillLabel string + preflight func(context.Context, string, string, string) (string, error) + pollEvery time.Duration + returnWait time.Duration + waitFor time.Duration } func (ol *OpenClawLogin) validate() error { @@ -83,26 +77,10 @@ func (ol *OpenClawLogin) Start(_ context.Context) (*bridgev2.LoginStep, error) { if err := ol.validate(); err != nil { return nil, err } - ol.step = openClawLoginStateAuthMode - ol.authMode = "" + ol.step = openClawLoginStateCredentials ol.pending = nil ol.waitUntil = time.Time{} - return &bridgev2.LoginStep{ - Type: bridgev2.LoginStepTypeUserInput, - StepID: openClawLoginStepAuthMode, - Instructions: "Choose how the bridge should authenticate to your OpenClaw gateway.", - UserInputParams: &bridgev2.LoginUserInputParams{ - Fields: []bridgev2.LoginInputDataField{ - { - Type: bridgev2.LoginInputFieldTypeSelect, - ID: "auth_mode", - Name: "Authentication Mode", - Description: "Pick the gateway auth mode first so the next step only asks for the fields that matter.", - Options: []string{"No auth", "Token", "Password"}, - }, - }, - }, - }, nil + return openClawCredentialStep(ol.prefillURL, ol.prefillLabel), nil } func (ol *OpenClawLogin) SubmitUserInput(ctx context.Context, input map[string]string) (*bridgev2.LoginStep, error) { @@ -110,35 +88,22 @@ func (ol *OpenClawLogin) SubmitUserInput(ctx context.Context, input map[string]s return nil, err } switch ol.step { - case "", openClawLoginStateAuthMode: - authMode, err := normalizeOpenClawAuthMode(input["auth_mode"]) - if err != nil { - return nil, err - } - ol.step = openClawLoginStateCredentials - ol.authMode = authMode - return openClawCredentialStep(authMode), nil - case openClawLoginStateCredentials: + case "", openClawLoginStateCredentials: default: return nil, errors.New("login process is in an invalid state") } - authMode, err := normalizeOpenClawAuthMode(ol.authMode) - if err != nil { - return nil, err - } normalizedURL, err := normalizeOpenClawLoginURL(input["url"]) if err != nil { return nil, err } - token, password, err := normalizeOpenClawAuthCredentials(authMode, input) + token, password, err := normalizeOpenClawAuthCredentials(input) if err != nil { return nil, err } label := strings.TrimSpace(input["label"]) pending := &openClawPendingLogin{ gatewayURL: normalizedURL, - authMode: authMode, token: token, password: password, label: label, @@ -210,6 +175,7 @@ func (ol *OpenClawLogin) Wait(ctx context.Context) (*bridgev2.LoginStep, error) func (ol *OpenClawLogin) Cancel() { ol.BaseLoginProcess.Cancel() + ol.step = "" ol.pending = nil ol.waitUntil = time.Time{} } @@ -274,7 +240,6 @@ func (ol *OpenClawLogin) completeLogin(pending *openClawPendingLogin, deviceToke &UserLoginMetadata{ Provider: ProviderOpenClaw, GatewayURL: pending.gatewayURL, - AuthMode: pending.authMode, GatewayToken: pending.token, GatewayPassword: pending.password, GatewayLabel: pending.label, @@ -294,89 +259,55 @@ func (ol *OpenClawLogin) completeLogin(pending *openClawPendingLogin, deviceToke return step, nil } -func openClawCredentialStep(authMode string) *bridgev2.LoginStep { - fields := []bridgev2.LoginInputDataField{ - { - Type: bridgev2.LoginInputFieldTypeURL, - ID: "url", - Name: "Gateway URL", - Description: "OpenClaw gateway URL, e.g. ws://localhost:18789 or https://gateway.example.com", - DefaultValue: "ws://127.0.0.1:18789", - }, +func openClawCredentialStep(defaultURL, defaultLabel string) *bridgev2.LoginStep { + defaultURL = strings.TrimSpace(defaultURL) + if defaultURL == "" { + defaultURL = "ws://127.0.0.1:18789" } - stepID := openClawLoginStepCredentials - instructions := "Enter your OpenClaw gateway details." - switch authMode { - case "token": - stepID = openClawLoginStepCredentialsToken - instructions = "Enter the OpenClaw gateway URL and shared token." - fields = append(fields, bridgev2.LoginInputDataField{ - Type: bridgev2.LoginInputFieldTypeToken, - ID: "token", - Name: "Gateway Token", - Description: "Shared gateway token or operator device token.", - }) - case "password": - stepID = openClawLoginStepCredentialsPass - instructions = "Enter the OpenClaw gateway URL and shared password." - fields = append(fields, bridgev2.LoginInputDataField{ - Type: bridgev2.LoginInputFieldTypePassword, - ID: "password", - Name: "Gateway Password", - Description: "Shared password for the gateway.", - }) - default: - stepID = openClawLoginStepCredentialsNoAuth - instructions = "Enter the OpenClaw gateway URL." - } - fields = append(fields, bridgev2.LoginInputDataField{ - Type: bridgev2.LoginInputFieldTypeUsername, - ID: "label", - Name: "Gateway Label", - Description: "Optional label to distinguish multiple gateways.", - }) return &bridgev2.LoginStep{ Type: bridgev2.LoginStepTypeUserInput, - StepID: stepID, - Instructions: instructions, + StepID: openClawLoginStepCredentials, + Instructions: "Enter your OpenClaw gateway details. Leave token and password empty for no auth, or provide exactly one of them.", UserInputParams: &bridgev2.LoginUserInputParams{ - Fields: fields, + Fields: []bridgev2.LoginInputDataField{ + { + Type: bridgev2.LoginInputFieldTypeURL, + ID: "url", + Name: "Gateway URL", + Description: "OpenClaw gateway URL, e.g. ws://localhost:18789 or https://gateway.example.com", + DefaultValue: defaultURL, + }, + { + Type: bridgev2.LoginInputFieldTypeToken, + ID: "token", + Name: "Gateway Token", + Description: "Optional shared gateway token or operator device token. Do not fill both token and password.", + }, + { + Type: bridgev2.LoginInputFieldTypePassword, + ID: "password", + Name: "Gateway Password", + Description: "Optional shared password for the gateway. Do not fill both token and password.", + }, + { + Type: bridgev2.LoginInputFieldTypeUsername, + ID: "label", + Name: "Gateway Label", + Description: "Optional label to distinguish multiple gateways.", + DefaultValue: strings.TrimSpace(defaultLabel), + }, + }, }, } } -func normalizeOpenClawAuthMode(raw string) (string, error) { - switch strings.ToLower(strings.TrimSpace(raw)) { - case "", "none", "no auth": - return "none", nil - case "token": - return "token", nil - case "password": - return "password", nil - default: - return "", fmt.Errorf("unsupported auth mode %q", raw) - } -} - -func normalizeOpenClawAuthCredentials(authMode string, input map[string]string) (string, string, error) { +func normalizeOpenClawAuthCredentials(input map[string]string) (string, string, error) { token := strings.TrimSpace(input["token"]) password := strings.TrimSpace(input["password"]) - switch authMode { - case "none": - return "", "", nil - case "token": - if token == "" { - return "", "", errors.New("gateway token is required") - } - return token, "", nil - case "password": - if password == "" { - return "", "", errors.New("gateway password is required") - } - return "", password, nil - default: - return "", "", fmt.Errorf("unsupported auth mode %q", authMode) + if token != "" && password != "" { + return "", "", errors.New("provide either a gateway token or a gateway password, not both") } + return token, password, nil } func (ol *OpenClawLogin) preflightGatewayLogin(ctx context.Context, gatewayURL, token, password string) (string, error) { diff --git a/bridges/openclaw/login_prefill.go b/bridges/openclaw/login_prefill.go new file mode 100644 index 00000000..ab585dbe --- /dev/null +++ b/bridges/openclaw/login_prefill.go @@ -0,0 +1,71 @@ +package openclaw + +import ( + "strings" + "time" + + "github.com/google/uuid" + "maunium.net/go/mautrix/bridgev2" +) + +const openClawPrefillFlowPrefix = "openclaw_prefill:" + +func (oc *OpenClawConnector) loginPrefillTTL() time.Duration { + if oc == nil { + return 5 * time.Minute + } + seconds := oc.Config.OpenClaw.Discovery.PrefillTTLSeconds + if seconds <= 0 { + seconds = 300 + } + return time.Duration(seconds) * time.Second +} + +func (oc *OpenClawConnector) registerLoginPrefill(user *bridgev2.User, url, label string) (string, time.Time) { + if oc == nil || user == nil { + return "", time.Time{} + } + now := time.Now() + expiresAt := now.Add(oc.loginPrefillTTL()) + entry := openClawLoginPrefill{ + UserMXID: user.MXID, + URL: strings.TrimSpace(url), + Label: strings.TrimSpace(label), + ExpiresAt: expiresAt, + } + id := openClawPrefillFlowPrefix + uuid.NewString() + oc.prefillsMu.Lock() + oc.pruneLoginPrefillsLocked(now) + if oc.prefills == nil { + oc.prefills = make(map[string]openClawLoginPrefill) + } + oc.prefills[id] = entry + oc.prefillsMu.Unlock() + return id, expiresAt +} + +func (oc *OpenClawConnector) loginPrefill(flowID string, user *bridgev2.User) (openClawLoginPrefill, bool) { + if oc == nil || user == nil || !strings.HasPrefix(flowID, openClawPrefillFlowPrefix) { + return openClawLoginPrefill{}, false + } + now := time.Now() + oc.prefillsMu.Lock() + defer oc.prefillsMu.Unlock() + oc.pruneLoginPrefillsLocked(now) + prefill, ok := oc.prefills[flowID] + if !ok || prefill.UserMXID != user.MXID { + return openClawLoginPrefill{}, false + } + return prefill, true +} + +func (oc *OpenClawConnector) pruneLoginPrefillsLocked(now time.Time) { + if oc == nil || len(oc.prefills) == 0 { + return + } + for id, prefill := range oc.prefills { + if !prefill.ExpiresAt.IsZero() && !prefill.ExpiresAt.After(now) { + delete(oc.prefills, id) + } + } +} diff --git a/bridges/openclaw/login_test.go b/bridges/openclaw/login_test.go index 3d9621f0..95bbd8b7 100644 --- a/bridges/openclaw/login_test.go +++ b/bridges/openclaw/login_test.go @@ -10,7 +10,7 @@ import ( "maunium.net/go/mautrix/bridgev2" ) -func TestOpenClawLoginStartUsesAuthModeSelect(t *testing.T) { +func TestOpenClawLoginStartUsesSingleCredentialsStep(t *testing.T) { login := &OpenClawLogin{ User: &bridgev2.User{}, Connector: &OpenClawConnector{br: &bridgev2.Bridge{}}, @@ -20,101 +20,88 @@ func TestOpenClawLoginStartUsesAuthModeSelect(t *testing.T) { if err != nil { t.Fatalf("Start returned error: %v", err) } - if step.StepID != openClawLoginStepAuthMode { + if step.StepID != openClawLoginStepCredentials { t.Fatalf("unexpected first step id: %q", step.StepID) } - if step.UserInputParams == nil || len(step.UserInputParams.Fields) != 1 { - t.Fatalf("expected a single select field, got %#v", step.UserInputParams) + if step.UserInputParams == nil || len(step.UserInputParams.Fields) != 4 { + t.Fatalf("expected four credential fields, got %#v", step.UserInputParams) } - field := step.UserInputParams.Fields[0] - if field.Type != bridgev2.LoginInputFieldTypeSelect { - t.Fatalf("expected select field, got %q", field.Type) + wantFieldIDs := []string{"url", "token", "password", "label"} + for i, field := range step.UserInputParams.Fields { + if field.ID != wantFieldIDs[i] { + t.Fatalf("unexpected field order: got %q want %q", field.ID, wantFieldIDs[i]) + } } - if len(field.Options) != 3 { - t.Fatalf("expected three auth mode options, got %#v", field.Options) +} + +func TestOpenClawLoginStartPrefillsDiscoveryValues(t *testing.T) { + login := &OpenClawLogin{ + User: &bridgev2.User{}, + Connector: &OpenClawConnector{br: &bridgev2.Bridge{}}, + prefillURL: "wss://gateway.local:443", + prefillLabel: "Studio", + } + + step, err := login.Start(context.Background()) + if err != nil { + t.Fatalf("Start returned error: %v", err) + } + fields := step.UserInputParams.Fields + if fields[0].DefaultValue != "wss://gateway.local:443" { + t.Fatalf("unexpected url default: %q", fields[0].DefaultValue) + } + if fields[3].DefaultValue != "Studio" { + t.Fatalf("unexpected label default: %q", fields[3].DefaultValue) } } -func TestOpenClawLoginSubmitUserInputReturnsModeSpecificFields(t *testing.T) { - cases := []struct { - name string - inputMode string - wantStepID string - wantFieldIDs []string - }{ - { - name: "no auth", - inputMode: "No auth", - wantStepID: openClawLoginStepCredentialsNoAuth, - wantFieldIDs: []string{"url", "label"}, - }, - { - name: "token", - inputMode: "Token", - wantStepID: openClawLoginStepCredentialsToken, - wantFieldIDs: []string{"url", "token", "label"}, - }, - { - name: "password", - inputMode: "Password", - wantStepID: openClawLoginStepCredentialsPass, - wantFieldIDs: []string{"url", "password", "label"}, - }, +func TestNormalizeOpenClawAuthCredentials(t *testing.T) { + token, password, err := normalizeOpenClawAuthCredentials(map[string]string{}) + if err != nil { + t.Fatalf("unexpected error for no-auth input: %v", err) + } + if token != "" || password != "" { + t.Fatalf("expected empty credentials, got token=%q password=%q", token, password) } - for _, tc := range cases { - t.Run(tc.name, func(t *testing.T) { - login := &OpenClawLogin{ - User: &bridgev2.User{}, - Connector: &OpenClawConnector{br: &bridgev2.Bridge{}}, - } - if _, err := login.Start(context.Background()); err != nil { - t.Fatalf("Start returned error: %v", err) - } + token, password, err = normalizeOpenClawAuthCredentials(map[string]string{"token": "abc"}) + if err != nil { + t.Fatalf("unexpected error for token input: %v", err) + } + if token != "abc" || password != "" { + t.Fatalf("unexpected token credentials: token=%q password=%q", token, password) + } - step, err := login.SubmitUserInput(context.Background(), map[string]string{"auth_mode": tc.inputMode}) - if err != nil { - t.Fatalf("SubmitUserInput returned error: %v", err) - } - if step.StepID != tc.wantStepID { - t.Fatalf("unexpected step id: got %q want %q", step.StepID, tc.wantStepID) - } - if step.UserInputParams == nil { - t.Fatalf("expected user input params for %s", tc.name) - } + token, password, err = normalizeOpenClawAuthCredentials(map[string]string{"password": "secret"}) + if err != nil { + t.Fatalf("unexpected error for password input: %v", err) + } + if token != "" || password != "secret" { + t.Fatalf("unexpected password credentials: token=%q password=%q", token, password) + } - gotFieldIDs := make([]string, 0, len(step.UserInputParams.Fields)) - for _, field := range step.UserInputParams.Fields { - gotFieldIDs = append(gotFieldIDs, field.ID) - } - if len(gotFieldIDs) != len(tc.wantFieldIDs) { - t.Fatalf("unexpected field count: got %#v want %#v", gotFieldIDs, tc.wantFieldIDs) - } - for i := range gotFieldIDs { - if gotFieldIDs[i] != tc.wantFieldIDs[i] { - t.Fatalf("unexpected field ids: got %#v want %#v", gotFieldIDs, tc.wantFieldIDs) - } - } - }) + _, _, err = normalizeOpenClawAuthCredentials(map[string]string{"token": "abc", "password": "secret"}) + if err == nil { + t.Fatal("expected token+password input to fail") } } -func TestNormalizeOpenClawAuthCredentials(t *testing.T) { - if _, _, err := normalizeOpenClawAuthCredentials("token", map[string]string{}); err == nil { - t.Fatal("expected token mode without token to fail") +func TestOpenClawLoginSubmitUserInputRejectsTokenAndPassword(t *testing.T) { + login := &OpenClawLogin{ + User: &bridgev2.User{}, + Connector: &OpenClawConnector{br: &bridgev2.Bridge{}}, } - if _, _, err := normalizeOpenClawAuthCredentials("password", map[string]string{}); err == nil { - t.Fatal("expected password mode without password to fail") + if _, err := login.Start(context.Background()); err != nil { + t.Fatalf("Start returned error: %v", err) } - token, password, err := normalizeOpenClawAuthCredentials("none", map[string]string{ - "token": "abc", - "password": "secret", + + _, err := login.SubmitUserInput(context.Background(), map[string]string{ + "url": "ws://127.0.0.1:18789", + "token": "shared-token", + "password": "shared-password", }) - if err != nil { - t.Fatalf("none auth mode returned error: %v", err) - } - if token != "" || password != "" { - t.Fatalf("expected none auth mode to clear credentials, got token=%q password=%q", token, password) + if err == nil { + t.Fatal("expected SubmitUserInput to reject token+password") } } @@ -134,16 +121,13 @@ func TestOpenClawLoginSubmitUserInputPairingRequiredReturnsWaitStep(t *testing.T if _, err := login.Start(context.Background()); err != nil { t.Fatalf("Start returned error: %v", err) } - if _, err := login.SubmitUserInput(context.Background(), map[string]string{"auth_mode": "Token"}); err != nil { - t.Fatalf("auth mode SubmitUserInput returned error: %v", err) - } step, err := login.SubmitUserInput(context.Background(), map[string]string{ "url": "ws://127.0.0.1:18789", "token": "shared-token", }) if err != nil { - t.Fatalf("credentials SubmitUserInput returned error: %v", err) + t.Fatalf("SubmitUserInput returned error: %v", err) } if step.Type != bridgev2.LoginStepTypeDisplayAndWait { t.Fatalf("unexpected step type: %q", step.Type) @@ -172,7 +156,6 @@ func TestOpenClawLoginWaitReturnsStillWaitingStepOnContextDone(t *testing.T) { step: openClawLoginStatePairingWait, pending: &openClawPendingLogin{ gatewayURL: "ws://127.0.0.1:18789", - authMode: "token", token: "shared-token", requestID: "req-456", }, @@ -203,7 +186,6 @@ func TestOpenClawLoginWaitMapsNonPairingErrors(t *testing.T) { waitFor: time.Second, pending: &openClawPendingLogin{ gatewayURL: "ws://127.0.0.1:18789", - authMode: "token", token: "shared-token", requestID: "req-789", }, diff --git a/bridges/openclaw/metadata.go b/bridges/openclaw/metadata.go index 93cd7f51..938c7c42 100644 --- a/bridges/openclaw/metadata.go +++ b/bridges/openclaw/metadata.go @@ -14,7 +14,6 @@ import ( type UserLoginMetadata struct { Provider string `json:"provider,omitempty"` GatewayURL string `json:"gateway_url,omitempty"` - AuthMode string `json:"auth_mode,omitempty"` GatewayToken string `json:"gateway_token,omitempty"` GatewayPassword string `json:"gateway_password,omitempty"` GatewayLabel string `json:"gateway_label,omitempty"` From d4bd516db6ad958bd2bf63025d2372c2efd8a75a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?batuhan=20i=C3=A7=C3=B6z?= Date: Sun, 22 Mar 2026 21:26:07 +0100 Subject: [PATCH 5/8] Add gateway client identity and update OpenClaw logic Introduce a gatewayClientIdentity and resolver helpers to derive client ID, version, platform, device family, instance ID and user-agent. Wire the identity into websocket dial headers, connect params, HTTP requests, and the signed device payload (so visible client metadata is covered by the device signature). Change default client constants (ID -> "beeper-bridge", mode -> "ui", display name -> "Beeper") and remove sessions.patch from preferred gateway methods. Remove legacy OpenClaw slash-command parsing and session-patch handling from manager; update tests to verify slash commands (/model, /stop) pass through, to check presence of instanceId/userAgent, and to assert the device signature covers visible client metadata. --- bridges/openclaw/commands_test.go | 107 ++++++++----------- bridges/openclaw/gateway_client.go | 100 ++++++++++++++---- bridges/openclaw/gateway_client_test.go | 68 +++++++++++- bridges/openclaw/manager.go | 133 ------------------------ 4 files changed, 190 insertions(+), 218 deletions(-) diff --git a/bridges/openclaw/commands_test.go b/bridges/openclaw/commands_test.go index 1aef4a73..c64a9023 100644 --- a/bridges/openclaw/commands_test.go +++ b/bridges/openclaw/commands_test.go @@ -1,77 +1,56 @@ package openclaw import ( + "context" "testing" + "maunium.net/go/mautrix/bridgev2" "maunium.net/go/mautrix/event" ) -func TestParseOpenClawControlCommand(t *testing.T) { - tests := []struct { - name string - body string - want *openClawControlCommand - wantOK bool - msgType event.MessageType - evtType event.Type - }{ - { - name: "reset", - body: "/reset", - want: &openClawControlCommand{Action: "reset"}, - wantOK: true, - }, - { - name: "rename", - body: "/rename Support Inbox", - want: &openClawControlCommand{Action: "label", Value: "Support Inbox"}, - wantOK: true, - }, - { - name: "clear label", - body: "/label clear", - want: &openClawControlCommand{Action: "label", Clear: true}, - wantOK: true, - }, - { - name: "thinking value", - body: "/thinking high", - want: &openClawControlCommand{Action: "thinking", Value: "high"}, - wantOK: true, - }, - { - name: "reasoning clear", - body: "/reasoning default", - want: &openClawControlCommand{Action: "reasoning", Clear: true}, - wantOK: true, - }, - { - name: "non command", - body: "hello", - wantOK: false, +func TestBuildOutboundPayloadPreservesSlashCommands(t *testing.T) { + mgr := newOpenClawManager(&OpenClawClient{}) + + msg := &bridgev2.MatrixMessage{ + MatrixEventBase: bridgev2.MatrixEventBase[*event.MessageEventContent]{ + Event: &event.Event{Type: event.EventMessage}, + Content: &event.MessageEventContent{MsgType: event.MsgText, Body: "/model openai/gpt-5"}, }, - { - name: "media ignored", - body: "/reset", - msgType: event.MsgImage, - wantOK: false, + } + attachments, text, err := mgr.buildOutboundPayload(context.Background(), msg) + if err != nil { + t.Fatalf("buildOutboundPayload returned error: %v", err) + } + if len(attachments) != 0 { + t.Fatalf("expected no attachments, got %#v", attachments) + } + if text != "/model openai/gpt-5" { + t.Fatalf("expected slash command to pass through unchanged, got %q", text) + } +} + +func TestBuildOutboundPayloadPreservesStopCommand(t *testing.T) { + mgr := newOpenClawManager(&OpenClawClient{}) + + msg := &bridgev2.MatrixMessage{ + MatrixEventBase: bridgev2.MatrixEventBase[*event.MessageEventContent]{ + Event: &event.Event{Type: event.EventMessage}, + Content: &event.MessageEventContent{MsgType: event.MsgText, Body: "/stop"}, }, } - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, ok := parseOpenClawControlCommand(tt.body, tt.msgType, tt.evtType) - if ok != tt.wantOK { - t.Fatalf("unexpected ok: got %v want %v", ok, tt.wantOK) - } - if !tt.wantOK { - return - } - if got == nil { - t.Fatal("expected command") - } - if *got != *tt.want { - t.Fatalf("unexpected command: got %+v want %+v", *got, *tt.want) - } - }) + _, text, err := mgr.buildOutboundPayload(context.Background(), msg) + if err != nil { + t.Fatalf("buildOutboundPayload returned error: %v", err) + } + if text != "/stop" { + t.Fatalf("expected stop command to pass through unchanged, got %q", text) + } +} + +func TestOpenClawPreferredGatewayMethodsDoNotRequireSessionPatch(t *testing.T) { + for _, method := range openClawPreferredGatewayMethods { + if method == "sessions.patch" { + t.Fatal("did not expect sessions.patch in preferred gateway methods") + } } } diff --git a/bridges/openclaw/gateway_client.go b/bridges/openclaw/gateway_client.go index 548286b3..ec4f4f29 100644 --- a/bridges/openclaw/gateway_client.go +++ b/bridges/openclaw/gateway_client.go @@ -15,6 +15,7 @@ import ( "os" "path/filepath" "runtime" + "runtime/debug" "strings" "sync" "sync/atomic" @@ -26,10 +27,9 @@ import ( const ( openClawProtocolVersion = 3 - openClawGatewayClientID = "gateway-client" - openClawGatewayClientMode = "backend" - openClawGatewayDisplayName = "ai-bridge openclaw" - openClawGatewayDeviceFamily = "bridge" + openClawGatewayClientID = "beeper-bridge" + openClawGatewayClientMode = "ui" + openClawGatewayDisplayName = "Beeper" openClawGatewayWSReadLimit = 32 * 1024 * 1024 openClawGatewayPingInterval = 30 * time.Second openClawGatewayPingTimeout = 10 * time.Second @@ -37,6 +37,65 @@ const ( openClawDefaultRequestTimout = 30 * time.Second ) +type gatewayClientIdentity struct { + ID string + DisplayName string + Version string + Platform string + Mode string + DeviceFamily string + InstanceID string + UserAgent string +} + +func resolveGatewayClientIdentity() gatewayClientIdentity { + version := resolveGatewayClientVersion() + return gatewayClientIdentity{ + ID: openClawGatewayClientID, + DisplayName: openClawGatewayDisplayName, + Version: version, + Platform: resolveGatewayClientPlatform(), + Mode: openClawGatewayClientMode, + DeviceFamily: resolveGatewayClientDeviceFamily(), + InstanceID: uuid.NewString(), + UserAgent: "Beeper bridge/" + version, + } +} + +func resolveGatewayClientVersion() string { + if info, ok := debug.ReadBuildInfo(); ok { + if version := strings.TrimSpace(info.Main.Version); version != "" && version != "(devel)" { + return version + } + } + return "dev" +} + +func resolveGatewayClientPlatform() string { + switch runtime.GOOS { + case "darwin": + return "macos" + default: + return runtime.GOOS + } +} + +func resolveGatewayClientDeviceFamily() string { + switch runtime.GOOS { + case "darwin": + return "Mac" + case "linux": + return "Linux" + case "windows": + return "Windows" + default: + if runtime.GOOS == "" { + return "Device" + } + return strings.ToUpper(runtime.GOOS[:1]) + runtime.GOOS[1:] + } +} + type gatewayConnectConfig struct { URL string Token string @@ -454,9 +513,10 @@ func (c *gatewayWSClient) Connect(ctx context.Context) (string, error) { if err != nil { return "", err } + clientIdentity := resolveGatewayClientIdentity() conn, _, err := websocket.Dial(ctx, wsURL, &websocket.DialOptions{ CompressionMode: websocket.CompressionDisabled, - HTTPHeader: http.Header{"User-Agent": []string{"ai-bridge/openclaw"}}, + HTTPHeader: http.Header{"User-Agent": []string{clientIdentity.UserAgent}}, }) if err != nil { return "", fmt.Errorf("dial gateway websocket: %w", err) @@ -734,7 +794,7 @@ func (c *gatewayWSClient) doSessionHistoryRequestWithStatus(req *http.Request) ( if authToken := c.httpBearerAuthToken(); authToken != "" { req.Header.Set("Authorization", "Bearer "+authToken) } - req.Header.Set("User-Agent", "ai-bridge/openclaw") + req.Header.Set("User-Agent", resolveGatewayClientIdentity().UserAgent) resp, err := (&http.Client{Timeout: openClawDefaultRequestTimout}).Do(req) if err != nil { @@ -1299,6 +1359,7 @@ func (c *gatewayWSClient) failPending(err error) { } func (c *gatewayWSClient) buildConnectParams(identity *gatewayDeviceIdentity, nonce string) (map[string]any, error) { + clientIdentity := resolveGatewayClientIdentity() scopes := []string{"operator.read", "operator.write", "operator.approvals"} sharedToken := strings.TrimSpace(c.cfg.Token) deviceToken := strings.TrimSpace(c.cfg.DeviceToken) @@ -1310,12 +1371,13 @@ func (c *gatewayWSClient) buildConnectParams(identity *gatewayDeviceIdentity, no "minProtocol": openClawProtocolVersion, "maxProtocol": openClawProtocolVersion, "client": map[string]any{ - "id": openClawGatewayClientID, - "displayName": openClawGatewayDisplayName, - "version": "0.1.0", - "platform": runtime.GOOS, - "mode": openClawGatewayClientMode, - "deviceFamily": openClawGatewayDeviceFamily, + "id": clientIdentity.ID, + "displayName": clientIdentity.DisplayName, + "version": clientIdentity.Version, + "platform": clientIdentity.Platform, + "mode": clientIdentity.Mode, + "deviceFamily": clientIdentity.DeviceFamily, + "instanceId": clientIdentity.InstanceID, }, "role": "operator", "scopes": scopes, @@ -1323,7 +1385,7 @@ func (c *gatewayWSClient) buildConnectParams(identity *gatewayDeviceIdentity, no "commands": []string{}, "permissions": map[string]bool{}, "locale": "en-US", - "userAgent": "ai-bridge/openclaw", + "userAgent": clientIdentity.UserAgent, } if authToken != "" { auth := map[string]any{"token": authToken} @@ -1335,7 +1397,7 @@ func (c *gatewayWSClient) buildConnectParams(identity *gatewayDeviceIdentity, no params["auth"] = map[string]any{"password": strings.TrimSpace(c.cfg.Password)} } signedAtMs := time.Now().UnixMilli() - device, err := buildSignedGatewayDevice(identity, authToken, scopes, signedAtMs, nonce) + device, err := buildSignedGatewayDevice(identity, clientIdentity, authToken, scopes, signedAtMs, nonce) if err != nil { return nil, err } @@ -1496,7 +1558,7 @@ func gatewayDeviceIdentityPath() (string, error) { return filepath.Join(stateDir, "identity", "device.json"), nil } -func buildSignedGatewayDevice(identity *gatewayDeviceIdentity, authToken string, scopes []string, signedAtMs int64, nonce string) (map[string]any, error) { +func buildSignedGatewayDevice(identity *gatewayDeviceIdentity, clientIdentity gatewayClientIdentity, authToken string, scopes []string, signedAtMs int64, nonce string) (map[string]any, error) { pub, err := base64.StdEncoding.DecodeString(identity.PublicKey) if err != nil { return nil, err @@ -1508,15 +1570,15 @@ func buildSignedGatewayDevice(identity *gatewayDeviceIdentity, authToken string, payload := strings.Join([]string{ "v3", identity.DeviceID, - openClawGatewayClientID, - openClawGatewayClientMode, + clientIdentity.ID, + clientIdentity.Mode, "operator", strings.Join(scopes, ","), fmt.Sprintf("%d", signedAtMs), authToken, nonce, - strings.ToLower(runtime.GOOS), - openClawGatewayDeviceFamily, + strings.ToLower(clientIdentity.Platform), + strings.ToLower(clientIdentity.DeviceFamily), }, "|") signature := ed25519.Sign(ed25519.PrivateKey(priv), []byte(payload)) return map[string]any{ diff --git a/bridges/openclaw/gateway_client_test.go b/bridges/openclaw/gateway_client_test.go index ef3b6bb8..2188088b 100644 --- a/bridges/openclaw/gateway_client_test.go +++ b/bridges/openclaw/gateway_client_test.go @@ -7,9 +7,9 @@ import ( "encoding/base64" "encoding/json" "errors" + "fmt" "net/http" "net/http/httptest" - "runtime" "strings" "testing" ) @@ -45,9 +45,18 @@ func TestBuildConnectParamsUsesOperatorClientShape(t *testing.T) { if got := clientParams["mode"]; got != openClawGatewayClientMode { t.Fatalf("unexpected client mode: %v", got) } - if got := clientParams["platform"]; got != runtime.GOOS { + if got := clientParams["displayName"]; got != openClawGatewayDisplayName { + t.Fatalf("unexpected client display name: %v", got) + } + if got := clientParams["platform"]; got != resolveGatewayClientPlatform() { t.Fatalf("unexpected client platform: %v", got) } + if got := clientParams["deviceFamily"]; got != resolveGatewayClientDeviceFamily() { + t.Fatalf("unexpected client device family: %v", got) + } + if got, ok := clientParams["instanceId"].(string); !ok || strings.TrimSpace(got) == "" { + t.Fatalf("expected non-empty instance id, got %#v", clientParams["instanceId"]) + } if _, ok := clientParams["commands"]; ok { t.Fatalf("commands should not be nested in client params: %#v", clientParams) } @@ -68,6 +77,61 @@ func TestBuildConnectParamsUsesOperatorClientShape(t *testing.T) { if _, ok := params["permissions"].(map[string]bool); !ok { t.Fatalf("expected top-level permissions map, got %#v", params["permissions"]) } + if got, ok := params["scopes"].([]string); !ok || len(got) != 3 { + t.Fatalf("expected least-privilege scopes, got %#v", params["scopes"]) + } + if got := params["userAgent"]; got != "Beeper bridge/"+resolveGatewayClientVersion() { + t.Fatalf("unexpected user agent: %#v", got) + } +} + +func TestBuildConnectParamsSignsVisibleClientMetadata(t *testing.T) { + pub, priv, err := ed25519.GenerateKey(rand.Reader) + if err != nil { + t.Fatalf("GenerateKey returned error: %v", err) + } + + client := newGatewayWSClient(gatewayConnectConfig{ + URL: "ws://127.0.0.1:18789", + Token: "shared-token", + }) + params, err := client.buildConnectParams(&gatewayDeviceIdentity{ + Version: 1, + DeviceID: "device-id", + PublicKey: base64.StdEncoding.EncodeToString(pub), + PrivateKey: base64.StdEncoding.EncodeToString(priv), + }, "nonce") + if err != nil { + t.Fatalf("buildConnectParams returned error: %v", err) + } + + clientParams := params["client"].(map[string]any) + deviceParams, ok := params["device"].(map[string]any) + if !ok { + t.Fatalf("expected device params map, got %#v", params["device"]) + } + + sigEncoded, _ := deviceParams["signature"].(string) + sig, err := base64.RawURLEncoding.DecodeString(sigEncoded) + if err != nil { + t.Fatalf("decode signature: %v", err) + } + payload := strings.Join([]string{ + "v3", + "device-id", + clientParams["id"].(string), + clientParams["mode"].(string), + "operator", + strings.Join(params["scopes"].([]string), ","), + fmt.Sprintf("%d", deviceParams["signedAt"].(int64)), + "shared-token", + deviceParams["nonce"].(string), + strings.ToLower(clientParams["platform"].(string)), + strings.ToLower(clientParams["deviceFamily"].(string)), + }, "|") + if !ed25519.Verify(pub, []byte(payload), sig) { + t.Fatal("expected device signature to cover visible client metadata") + } } func TestGatewaySessionOriginStringParsesStructuredOrigin(t *testing.T) { diff --git a/bridges/openclaw/manager.go b/bridges/openclaw/manager.go index 55e2227f..6630def5 100644 --- a/bridges/openclaw/manager.go +++ b/bridges/openclaw/manager.go @@ -132,7 +132,6 @@ var ( } openClawPreferredGatewayMethods = []string{ "sessions.list", - "sessions.patch", "sessions.resolve", "chat.send", "chat.abort", @@ -566,17 +565,6 @@ func (m *openClawManager) HandleMatrixMessage(ctx context.Context, msg *bridgev2 return nil, err } meta := portalMeta(msg.Portal) - body := strings.TrimSpace(msg.Content.Body) - if isOpenClawAbortCommand(body, msg.Content.MsgType, msg.Event.Type) { - if err := gateway.AbortRun(ctx, meta.OpenClawSessionKey, ""); err != nil { - return nil, err - } - return &bridgev2.MatrixMessageResponse{Pending: false}, nil - } - if handled, err := m.handleControlCommand(ctx, msg, gateway, body); handled || err != nil { - return &bridgev2.MatrixMessageResponse{Pending: false}, err - } - attachments, text, err := m.buildOutboundPayload(ctx, msg) if err != nil { return nil, err @@ -661,127 +649,6 @@ func (m *openClawManager) buildOutboundPayload(ctx context.Context, msg *bridgev } } -func isOpenClawAbortCommand(body string, msgType event.MessageType, evtType event.Type) bool { - if evtType == event.EventSticker || msgType == event.MsgImage || msgType == event.MsgVideo || msgType == event.MsgAudio || msgType == event.MsgFile { - return false - } - body = strings.ToLower(strings.TrimSpace(body)) - switch body { - case "stop", "/stop", "stop run", "stop action", "please stop", "stop openclaw": - return true - default: - return false - } -} - -type openClawControlCommand struct { - Action string - Value string - Clear bool -} - -func parseOpenClawControlCommand(body string, msgType event.MessageType, evtType event.Type) (*openClawControlCommand, bool) { - if evtType == event.EventSticker || msgType == event.MsgImage || msgType == event.MsgVideo || msgType == event.MsgAudio || msgType == event.MsgFile { - return nil, false - } - body = strings.TrimSpace(body) - if !strings.HasPrefix(body, "/") { - return nil, false - } - fields := strings.Fields(body) - if len(fields) == 0 { - return nil, false - } - cmd := strings.ToLower(strings.TrimPrefix(fields[0], "/")) - rest := strings.TrimSpace(strings.TrimPrefix(body, fields[0])) - switch cmd { - case "reset": - if rest != "" { - return nil, false - } - return &openClawControlCommand{Action: "reset"}, true - case "rename", "label": - if rest == "" { - return nil, false - } - if strings.EqualFold(rest, "clear") || rest == "-" { - return &openClawControlCommand{Action: "label", Clear: true}, true - } - return &openClawControlCommand{Action: "label", Value: rest}, true - case "thinking", "verbose", "reasoning": - if rest == "" { - return nil, false - } - value := strings.ToLower(strings.TrimSpace(rest)) - if value == "inherit" || value == "default" || value == "-" { - return &openClawControlCommand{Action: cmd, Clear: true}, true - } - return &openClawControlCommand{Action: cmd, Value: value}, true - default: - return nil, false - } -} - -func (m *openClawManager) applySessionPatch(ctx context.Context, portal *bridgev2.Portal, gateway *gatewayWSClient, sessionKey, apiKey, displayName string, command *openClawControlCommand) error { - var patchValue any - notice := "OpenClaw " + displayName + " cleared." - if !command.Clear { - patchValue = command.Value - notice = "OpenClaw " + displayName + " set to " + command.Value + "." - } - if err := gateway.PatchSession(ctx, sessionKey, map[string]any{apiKey: patchValue}); err != nil { - return err - } - m.client.sendSystemNoticeViaPortal(ctx, portal, notice) - return nil -} - -func (m *openClawManager) handleControlCommand(ctx context.Context, msg *bridgev2.MatrixMessage, gateway *gatewayWSClient, body string) (bool, error) { - if msg == nil || msg.Portal == nil || gateway == nil { - return false, nil - } - command, ok := parseOpenClawControlCommand(body, msg.Content.MsgType, msg.Event.Type) - if !ok { - return false, nil - } - meta := portalMeta(msg.Portal) - sessionKey := strings.TrimSpace(meta.OpenClawSessionKey) - if sessionKey == "" { - m.client.sendSystemNoticeViaPortal(ctx, msg.Portal, "OpenClaw session key is unavailable for this room.") - return true, nil - } - switch command.Action { - case "reset": - if err := gateway.ResetSession(ctx, sessionKey); err != nil { - return true, err - } - m.invalidateHistoryCache(sessionKey) - m.client.sendSystemNoticeViaPortal(ctx, msg.Portal, "OpenClaw session reset.") - case "label": - if err := m.applySessionPatch(ctx, msg.Portal, gateway, sessionKey, "label", "label", command); err != nil { - return true, err - } - case "thinking": - if err := m.applySessionPatch(ctx, msg.Portal, gateway, sessionKey, "thinkingLevel", "thinking level", command); err != nil { - return true, err - } - case "verbose": - if err := m.applySessionPatch(ctx, msg.Portal, gateway, sessionKey, "verboseLevel", "verbose level", command); err != nil { - return true, err - } - case "reasoning": - if err := m.applySessionPatch(ctx, msg.Portal, gateway, sessionKey, "reasoningLevel", "reasoning level", command); err != nil { - return true, err - } - default: - return false, nil - } - if err := m.syncSessions(ctx); err != nil { - m.client.Log().Debug().Err(err).Str("session_key", sessionKey).Msg("Failed to refresh OpenClaw sessions after control command") - } - return true, nil -} - func (m *openClawManager) FetchMessages(ctx context.Context, params bridgev2.FetchMessagesParams) (*bridgev2.FetchMessagesResponse, error) { gateway, err := m.requireGateway() if err != nil { From fd3fb1626a17f9a71b1405c5d6d9d9487d16baa8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?batuhan=20i=C3=A7=C3=B6z?= Date: Mon, 23 Mar 2026 02:45:16 +0100 Subject: [PATCH 6/8] sync --- bridges/openclaw/client.go | 4 ---- bridges/openclaw/discovery_test.go | 7 ++++--- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/bridges/openclaw/client.go b/bridges/openclaw/client.go index 4dbf19fc..6f0c0517 100644 --- a/bridges/openclaw/client.go +++ b/bridges/openclaw/client.go @@ -757,10 +757,6 @@ func (oc *OpenClawClient) sendNoticeViaPortal(ctx context.Context, portal *bridg )) } -func (oc *OpenClawClient) sendSystemNoticeViaPortal(ctx context.Context, portal *bridgev2.Portal, msg string) { - oc.sendNoticeViaPortal(ctx, portal, msg, oc.senderForAgent("gateway", false)) -} - func (oc *OpenClawClient) DownloadAndEncodeMedia(ctx context.Context, mediaURL string, file *event.EncryptedFileInfo, maxMB int) (string, string, error) { return agentremote.DownloadAndEncodeMedia(ctx, oc.UserLogin, mediaURL, file, maxMB) } diff --git a/bridges/openclaw/discovery_test.go b/bridges/openclaw/discovery_test.go index 69212335..d1346cc2 100644 --- a/bridges/openclaw/discovery_test.go +++ b/bridges/openclaw/discovery_test.go @@ -1,6 +1,7 @@ package openclaw import ( + "net/http" "net/http/httptest" "testing" "time" @@ -93,7 +94,7 @@ func TestProvisioningDiscoveryOptions(t *testing.T) { }, } - req := httptest.NewRequest("GET", "/v1/discovery/gateways?timeout_ms=1500&wide_area=on", nil) + req := httptest.NewRequest(http.MethodGet, "/v1/discovery/gateways?timeout_ms=1500&wide_area=on", nil) opts, err := api.discoveryOptions(req) if err != nil { t.Fatalf("discoveryOptions returned error: %v", err) @@ -105,13 +106,13 @@ func TestProvisioningDiscoveryOptions(t *testing.T) { t.Fatalf("unexpected wide-area options: %#v", opts) } - req = httptest.NewRequest("GET", "/v1/discovery/gateways?timeout_ms=0", nil) + req = httptest.NewRequest(http.MethodGet, "/v1/discovery/gateways?timeout_ms=0", nil) if _, err := api.discoveryOptions(req); err == nil { t.Fatal("expected invalid timeout to fail") } api.connector.Config.OpenClaw.Discovery.WideAreaDomain = "" - req = httptest.NewRequest("GET", "/v1/discovery/gateways?wide_area=on", nil) + req = httptest.NewRequest(http.MethodGet, "/v1/discovery/gateways?wide_area=on", nil) if _, err := api.discoveryOptions(req); err == nil { t.Fatal("expected wide_area=on without configured domain to fail") } From 874f6f1c7a53c02574288f3698ef195ab4fa21be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?batuhan=20i=C3=A7=C3=B6z?= Date: Tue, 24 Mar 2026 15:07:07 +0100 Subject: [PATCH 7/8] Bump maunium.net/go/mautrix version Update maunium.net/go/mautrix to pseudo-version v0.26.5-0.20260323230949-0eafa0ba2e5d in go.mod and add the corresponding checksums to go.sum. This brings in the newer upstream commit for mautrix. --- go.mod | 2 +- go.sum | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/go.mod b/go.mod index 18dee920..d7dca619 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( go.mau.fi/util v0.9.7 golang.org/x/image v0.35.0 gopkg.in/yaml.v3 v3.0.1 - maunium.net/go/mautrix v0.26.5-0.20260322120516-fcf7539a7af5 + maunium.net/go/mautrix v0.26.5-0.20260323230949-0eafa0ba2e5d ) require ( diff --git a/go.sum b/go.sum index 0d7918b7..e2a51acd 100644 --- a/go.sum +++ b/go.sum @@ -179,3 +179,5 @@ maunium.net/go/mauflag v1.0.0 h1:YiaRc0tEI3toYtJMRIfjP+jklH45uDHtT80nUamyD4M= maunium.net/go/mauflag v1.0.0/go.mod h1:nLivPOpTpHnpzEh8jEdSL9UqO9+/KBJFmNRlwKfkPeA= maunium.net/go/mautrix v0.26.5-0.20260322120516-fcf7539a7af5 h1:5OubpPeU8vigaSELAquCizvb1fVhZHNV2xIoUaZnEV4= maunium.net/go/mautrix v0.26.5-0.20260322120516-fcf7539a7af5/go.mod h1:YWw8NWTszsbyFAznboicBObwHPgTSLcuTbVX2kY7U2M= +maunium.net/go/mautrix v0.26.5-0.20260323230949-0eafa0ba2e5d h1:QnInZkRrLravXFDouTIVSaNfCBptGse4cNMYIitobTQ= +maunium.net/go/mautrix v0.26.5-0.20260323230949-0eafa0ba2e5d/go.mod h1:YWw8NWTszsbyFAznboicBObwHPgTSLcuTbVX2kY7U2M= From 04cac269b5a5a050a2f8dfa76eec4b4e23d453e1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?batuhan=20i=C3=A7=C3=B6z?= Date: Tue, 24 Mar 2026 15:16:02 +0100 Subject: [PATCH 8/8] Update go.sum --- go.sum | 2 -- 1 file changed, 2 deletions(-) diff --git a/go.sum b/go.sum index e2a51acd..07b9b504 100644 --- a/go.sum +++ b/go.sum @@ -177,7 +177,5 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= maunium.net/go/mauflag v1.0.0 h1:YiaRc0tEI3toYtJMRIfjP+jklH45uDHtT80nUamyD4M= maunium.net/go/mauflag v1.0.0/go.mod h1:nLivPOpTpHnpzEh8jEdSL9UqO9+/KBJFmNRlwKfkPeA= -maunium.net/go/mautrix v0.26.5-0.20260322120516-fcf7539a7af5 h1:5OubpPeU8vigaSELAquCizvb1fVhZHNV2xIoUaZnEV4= -maunium.net/go/mautrix v0.26.5-0.20260322120516-fcf7539a7af5/go.mod h1:YWw8NWTszsbyFAznboicBObwHPgTSLcuTbVX2kY7U2M= maunium.net/go/mautrix v0.26.5-0.20260323230949-0eafa0ba2e5d h1:QnInZkRrLravXFDouTIVSaNfCBptGse4cNMYIitobTQ= maunium.net/go/mautrix v0.26.5-0.20260323230949-0eafa0ba2e5d/go.mod h1:YWw8NWTszsbyFAznboicBObwHPgTSLcuTbVX2kY7U2M=