From e219425197c25a018c5bda1356ffb4cf1197d380 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 11:19:47 -0500 Subject: [PATCH 01/25] py(deps[libtmux]) Pin to sibling chainable-commands worktree why: Build the experimental chain-command MCP tools against the in-progress libtmux._experimental.chain API on the sibling libtmux worktree. what: - Add [tool.uv.sources] libtmux = { path = "../libtmux", editable = true } - Relock against the local editable checkout --- pyproject.toml | 5 +++++ uv.lock | 55 +++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 55 insertions(+), 5 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 094f714..07420ee 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -111,6 +111,11 @@ lint = [ requires = ["hatchling"] build-backend = "hatchling.build" +[tool.uv.sources] +# Experiment: pin libtmux to the sibling chainable-commands worktree so the +# experimental libtmux._experimental.chain API is importable. Do not merge. +libtmux = { path = "../libtmux", editable = true } + [tool.uv.exclude-newer-package] # git-pull packages release in lockstep with their workspaces, so a # fresh release blocking on the 3-day cooldown blocks every diff --git a/uv.lock b/uv.lock index a3eb04f..126e4b1 100644 --- a/uv.lock +++ b/uv.lock @@ -1179,10 +1179,55 @@ wheels = [ [[package]] name = "libtmux" version = "0.58.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/c7/58/346776e0491ede33e1554a4bff9b545dbe9f3164e45abac483195938a1cf/libtmux-0.58.1.tar.gz", hash = "sha256:a294dd585aa419d4ecce36f3e55df656693743c97a0b5b5bb1e5fea31ada2482", size = 519541, upload-time = "2026-06-17T00:03:31.81Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/22/4d/e44ada32edfe947c40d4dfc596a6f5355400a16d08be06016bd754375e41/libtmux-0.58.1-py3-none-any.whl", hash = "sha256:ab0f47d03a59d674962bc23e36e188fcfa4a82b0f270d474afab519e3076839b", size = 113653, upload-time = "2026-06-17T00:03:30.48Z" }, +source = { editable = "../libtmux" } + +[package.metadata] + +[package.metadata.requires-dev] +coverage = [ + { name = "codecov" }, + { name = "coverage" }, + { name = "pytest-cov" }, +] +dev = [ + { name = "codecov" }, + { name = "coverage" }, + { name = "gp-libs" }, + { name = "gp-sphinx", specifier = "==0.0.1a31" }, + { name = "mypy" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-cov" }, + { name = "pytest-mock" }, + { name = "pytest-rerunfailures" }, + { name = "pytest-watcher" }, + { name = "pytest-xdist" }, + { name = "ruff" }, + { name = "sphinx-autobuild" }, + { name = "sphinx-autodoc-api-style", specifier = "==0.0.1a31" }, + { name = "sphinx-autodoc-pytest-fixtures", specifier = "==0.0.1a31" }, + { name = "types-docutils" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +docs = [ + { name = "gp-sphinx", specifier = "==0.0.1a31" }, + { name = "sphinx-autobuild" }, + { name = "sphinx-autodoc-api-style", specifier = "==0.0.1a31" }, + { name = "sphinx-autodoc-pytest-fixtures", specifier = "==0.0.1a31" }, +] +lint = [ + { name = "mypy" }, + { name = "ruff" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +testing = [ + { name = "gp-libs" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-mock" }, + { name = "pytest-rerunfailures" }, + { name = "pytest-watcher" }, + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] [[package]] @@ -1246,7 +1291,7 @@ testing = [ [package.metadata] requires-dist = [ { name = "fastmcp", specifier = ">=3.4.2,<4.0.0" }, - { name = "libtmux", specifier = ">=0.58.0,<1.0" }, + { name = "libtmux", editable = "../libtmux" }, ] [package.metadata.requires-dev] From 28fce41d316b05344ae63063c18938c243744eb5 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 11:28:09 -0500 Subject: [PATCH 02/25] mcp(feat[chain]): one-dispatch tmux command chains why: Agents needed to run several tmux commands as one native invocation instead of one tool call per command. what: - Add run_command_chain: a list of {command, args, target} folded into one `tmux a ; b` dispatch via libtmux._experimental.chain (CommandChain.run, run off the event loop with asyncio.to_thread) - Destructive tier; refuse kill-server; fail closed on an empty list/target - Add ChainCommand / RunCommandChainResult models; register the tool - Tests: one-dispatch effect, atomic abort, validation, kill-server denial --- src/libtmux_mcp/models.py | 39 +++++++++++ src/libtmux_mcp/tools/__init__.py | 2 + src/libtmux_mcp/tools/chain_tools.py | 99 ++++++++++++++++++++++++++++ tests/test_chain_tools.py | 81 +++++++++++++++++++++++ 4 files changed, 221 insertions(+) create mode 100644 src/libtmux_mcp/tools/chain_tools.py create mode 100644 tests/test_chain_tools.py diff --git a/src/libtmux_mcp/models.py b/src/libtmux_mcp/models.py index fc97f71..2dbba30 100644 --- a/src/libtmux_mcp/models.py +++ b/src/libtmux_mcp/models.py @@ -655,3 +655,42 @@ class ContentChangeResult(BaseModel): changed: bool = Field(description="Whether the content changed before timeout") pane_id: str = Field(description="Pane ID that was polled") elapsed_seconds: float = Field(description="Time spent waiting in seconds") + + +class ChainCommand(BaseModel): + """One tmux command in a one-dispatch chain for :func:`run_command_chain`.""" + + model_config = ConfigDict(extra="forbid") + + command: str = Field(description="tmux command name, e.g. 'split-window'.") + args: list[str] = Field( + default_factory=list, + description="Positional argument tokens, rendered in order after the target.", + ) + target: str | None = Field( + default=None, + description=( + "Optional ``-t`` target (pane/window/session id). An empty string is " + "rejected; None means the command carries no target." + ), + ) + + +class RunCommandChainResult(BaseModel): + """Result of one native tmux command-sequence dispatch.""" + + argv: list[str] = Field( + description="Rendered argv, with a standalone ';' token between commands.", + ) + command_count: int = Field( + description="Number of commands folded into the single dispatch.", + ) + returncode: int = Field(description="Exit code of the single tmux invocation.") + stdout: list[str] = Field( + default_factory=list, + description="Merged stdout lines from the sequence.", + ) + stderr: list[str] = Field( + default_factory=list, + description="Merged stderr lines from the sequence.", + ) diff --git a/src/libtmux_mcp/tools/__init__.py b/src/libtmux_mcp/tools/__init__.py index 7a72f9a..62b6d48 100644 --- a/src/libtmux_mcp/tools/__init__.py +++ b/src/libtmux_mcp/tools/__init__.py @@ -13,6 +13,7 @@ def register_tools(mcp: FastMCP) -> None: from libtmux_mcp.tools import ( batch_tools, buffer_tools, + chain_tools, env_tools, hook_tools, option_tools, @@ -24,6 +25,7 @@ def register_tools(mcp: FastMCP) -> None: ) batch_tools.register(mcp) + chain_tools.register(mcp) server_tools.register(mcp) session_tools.register(mcp) window_tools.register(mcp) diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py new file mode 100644 index 0000000..8e4cfdc --- /dev/null +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -0,0 +1,99 @@ +"""MCP tools that run a chain of tmux commands as one native invocation. + +These leverage libtmux's experimental ``libtmux._experimental.chain`` API to +fold an ordered set of tmux commands into a single ``tmux a ; b ; c`` dispatch +(one subprocess), instead of issuing one tmux call per command. The chain API is +experimental and pinned to a sibling worktree; do not ship to a release. +""" + +from __future__ import annotations + +import asyncio +import typing as t + +from libtmux._experimental.chain import CommandCall, CommandChain + +from libtmux_mcp._utils import ( + ANNOTATIONS_DESTRUCTIVE, + TAG_DESTRUCTIVE, + ExpectedToolError, + _get_server, + handle_tool_errors_async, +) +from libtmux_mcp.models import ChainCommand, RunCommandChainResult + +if t.TYPE_CHECKING: + from fastmcp import FastMCP + +#: tmux commands refused outright: unrecoverable, no ``-t`` makes them safe, and +#: they would take down the server hosting this MCP. +_BLOCKED_COMMANDS = frozenset({"kill-server"}) + + +@handle_tool_errors_async +async def run_command_chain( + commands: list[ChainCommand], + socket_name: str | None = None, +) -> RunCommandChainResult: + """Run an ordered list of tmux commands as ONE native tmux invocation. + + The commands are folded into a single ``tmux a ; b ; c`` sequence and + dispatched once, instead of one tmux subprocess per command. tmux applies + its native sequence semantics: a command that errors aborts the rest. Each + command's ``-t`` target is passed through verbatim, so a chain may span + heterogeneous scopes (panes, windows, sessions). + + Parameters + ---------- + commands : list[ChainCommand] + Ordered tmux commands, each ``{command, args, target}``. Must be + non-empty; ``kill-server`` is refused. + socket_name : str, optional + tmux socket name (falls back to ``LIBTMUX_SOCKET``). + + Returns + ------- + RunCommandChainResult + The rendered ``argv`` (with ``;`` separators), the command count, and + the single invocation's merged exit code, stdout, and stderr. + """ + if not commands: + msg = "commands must not be empty" + raise ExpectedToolError(msg) + + blocked = sorted({cmd.command for cmd in commands} & _BLOCKED_COMMANDS) + if blocked: + msg = f"refusing to run unrecoverable command(s): {', '.join(blocked)}" + raise ExpectedToolError(msg) + + server = _get_server(socket_name=socket_name) + try: + chain = CommandChain( + tuple( + CommandCall(cmd.command, tuple(cmd.args), target=cmd.target) + for cmd in commands + ), + ) + except ValueError as exc: # empty-string target / empty chain (fail closed) + raise ExpectedToolError(str(exc)) from exc + + argv = list(chain.argv()) + # A live Server satisfies the CommandRunner protocol; dispatch ONCE, off the + # event loop (libtmux dispatch is blocking). + result = await asyncio.to_thread(chain.run, server) + return RunCommandChainResult( + argv=argv, + command_count=len(commands), + returncode=result.returncode, + stdout=list(result.stdout), + stderr=list(result.stderr), + ) + + +def register(mcp: FastMCP) -> None: + """Register chain tools with the MCP instance.""" + mcp.tool( + title="Run Command Chain", + annotations=ANNOTATIONS_DESTRUCTIVE, + tags={TAG_DESTRUCTIVE}, + )(run_command_chain) diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py new file mode 100644 index 0000000..ef65084 --- /dev/null +++ b/tests/test_chain_tools.py @@ -0,0 +1,81 @@ +"""Tests for the chain command tools (one-dispatch tmux command sequences).""" + +from __future__ import annotations + +import asyncio +import typing as t + +import pytest + +from libtmux_mcp._utils import ExpectedToolError +from libtmux_mcp.models import ChainCommand +from libtmux_mcp.tools.chain_tools import run_command_chain + +if t.TYPE_CHECKING: + from libtmux.session import Session + + +def test_run_command_chain_one_dispatch(mcp_session: Session) -> None: + """Two set-option commands take effect from a single tmux invocation.""" + server = mcp_session.server + result = asyncio.run( + run_command_chain( + commands=[ + ChainCommand(command="set-option", args=["-g", "@cc_a", "1"]), + ChainCommand(command="set-option", args=["-g", "@cc_b", "2"]), + ], + socket_name=server.socket_name, + ), + ) + + assert result.returncode == 0 + assert result.command_count == 2 + assert ";" in result.argv # the standalone separator proves one sequence + assert server.cmd("show-option", "-gv", "@cc_a").stdout == ["1"] + assert server.cmd("show-option", "-gv", "@cc_b").stdout == ["2"] + + +def test_run_command_chain_aborts_on_error(mcp_session: Session) -> None: + """A failing command aborts the rest of the sequence (tmux ; semantics).""" + server = mcp_session.server + result = asyncio.run( + run_command_chain( + commands=[ + ChainCommand(command="rename-window", args=["x"], target="@999999"), + ChainCommand(command="set-option", args=["-g", "@cc_sentinel", "set"]), + ], + socket_name=server.socket_name, + ), + ) + + assert result.returncode != 0 + # the sequence aborted at the failing command, so the sentinel never ran: + assert "set" not in server.cmd("show-option", "-gv", "@cc_sentinel").stdout + + +def test_run_command_chain_validation(mcp_session: Session) -> None: + """An empty list and an empty-string target both fail closed.""" + socket = mcp_session.server.socket_name + with pytest.raises(ExpectedToolError): + asyncio.run(run_command_chain(commands=[], socket_name=socket)) + + with pytest.raises(ExpectedToolError): + asyncio.run( + run_command_chain( + commands=[ChainCommand(command="kill-window", target="")], + socket_name=socket, + ), + ) + + +def test_run_command_chain_blocks_kill_server(mcp_session: Session) -> None: + """kill-server is refused outright and the server survives.""" + server = mcp_session.server + with pytest.raises(ExpectedToolError): + asyncio.run( + run_command_chain( + commands=[ChainCommand(command="kill-server")], + socket_name=server.socket_name, + ), + ) + assert server.is_alive() From 054411ea35653b9946b7bd7334cf913bc8f47a15 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 11:32:41 -0500 Subject: [PATCH 03/25] mcp(feat[chain]): forward-layout pane id capture why: A single tmux `;` chain can't hand back the ids it creates (a fresh id can't be substituted into the same invocation), so callers had no way to split a pane and learn the new pane ids. what: - Add build_forward_layout: split a seed pane N ways and return each new pane id, resolved over the minimum dispatches via ForwardPlan and AsyncServerPlanRunner (off the event loop) - Optional per-split shell / send_keys; mutating tier (reaches a shell) - Add ForwardSplit / ForwardLayoutResult models; register the tool - Tests: two splits capture distinct ids, single-split fold + send_keys lands, empty-list validation --- src/libtmux_mcp/models.py | 30 ++++++++++ src/libtmux_mcp/tools/chain_tools.py | 88 +++++++++++++++++++++++++++- tests/test_chain_tools.py | 60 ++++++++++++++++++- 3 files changed, 174 insertions(+), 4 deletions(-) diff --git a/src/libtmux_mcp/models.py b/src/libtmux_mcp/models.py index 2dbba30..86afefc 100644 --- a/src/libtmux_mcp/models.py +++ b/src/libtmux_mcp/models.py @@ -694,3 +694,33 @@ class RunCommandChainResult(BaseModel): default_factory=list, description="Merged stderr lines from the sequence.", ) + + +class ForwardSplit(BaseModel): + """One split for :func:`build_forward_layout`.""" + + model_config = ConfigDict(extra="forbid") + + horizontal: bool = Field( + default=False, + description="Split left/right (-h) instead of top/bottom (-v).", + ) + shell: str | None = Field( + default=None, + description="Command to run in the new pane instead of the default shell.", + ) + send_keys: str | None = Field( + default=None, + description="Keys to send into the new pane after it is created.", + ) + + +class ForwardLayoutResult(BaseModel): + """Result of :func:`build_forward_layout`.""" + + pane_ids: list[str] = Field( + description="Captured ids of the created panes, in split order.", + ) + dispatch_count: int = Field( + description="Number of native tmux invocations the resolution used.", + ) diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index 8e4cfdc..917eefd 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -11,16 +11,29 @@ import asyncio import typing as t -from libtmux._experimental.chain import CommandCall, CommandChain +from libtmux._experimental.chain import ( + AsyncServerPlanRunner, + CommandCall, + CommandChain, + ForwardPlan, +) from libtmux_mcp._utils import ( ANNOTATIONS_DESTRUCTIVE, + ANNOTATIONS_SHELL, TAG_DESTRUCTIVE, + TAG_MUTATING, ExpectedToolError, _get_server, + _resolve_pane, handle_tool_errors_async, ) -from libtmux_mcp.models import ChainCommand, RunCommandChainResult +from libtmux_mcp.models import ( + ChainCommand, + ForwardLayoutResult, + ForwardSplit, + RunCommandChainResult, +) if t.TYPE_CHECKING: from fastmcp import FastMCP @@ -90,6 +103,72 @@ async def run_command_chain( ) +def _send_keys_decorate(keys: str) -> t.Callable[..., t.Any]: + """Build a send_keys decorate bound to a captured string (per-iteration binding).""" + + def build(handle: t.Any) -> t.Any: + return handle.cmd.send_keys(keys, enter=True) + + return build + + +@handle_tool_errors_async +async def build_forward_layout( + splits: list[ForwardSplit], + pane_id: str | None = None, + session_name: str | None = None, + session_id: str | None = None, + socket_name: str | None = None, +) -> ForwardLayoutResult: + r"""Split a seed pane into several panes, returning their new ids. + + Unlike a single ``\;`` chain, this captures the id tmux assigns each new + pane (a fresh id can't be substituted back into the same invocation), so it + resolves over the minimum number of dispatches: a single split folds into + one, several independent splits take one per creation plus one trailing + chain for the decorations. + + Parameters + ---------- + splits : list[ForwardSplit] + Splits off the seed pane, each ``{horizontal, shell, send_keys}``. + pane_id : str, optional + Seed pane id; defaults to the resolved/active pane. + session_name, session_id : str, optional + Used to resolve the seed pane when ``pane_id`` is omitted. + socket_name : str, optional + tmux socket name. + + Returns + ------- + ForwardLayoutResult + The created pane ids (in split order) and the dispatch count. + """ + if not splits: + msg = "splits must not be empty" + raise ExpectedToolError(msg) + + server = _get_server(socket_name=socket_name) + seed = _resolve_pane( + server, + pane_id=pane_id, + session_name=session_name, + session_id=session_id, + ) + plan = ForwardPlan.from_pane(seed) + for split in splits: + handle = plan.split(horizontal=split.horizontal, shell=split.shell) + if split.send_keys is not None: + handle.do(_send_keys_decorate(split.send_keys)) + + resolved = await plan.run_resolving_async(AsyncServerPlanRunner(server)) + pane_ids = [resolved.bindings[index] for index in range(len(splits))] + return ForwardLayoutResult( + pane_ids=pane_ids, + dispatch_count=len(resolved.results), + ) + + def register(mcp: FastMCP) -> None: """Register chain tools with the MCP instance.""" mcp.tool( @@ -97,3 +176,8 @@ def register(mcp: FastMCP) -> None: annotations=ANNOTATIONS_DESTRUCTIVE, tags={TAG_DESTRUCTIVE}, )(run_command_chain) + mcp.tool( + title="Build Forward Layout", + annotations=ANNOTATIONS_SHELL, + tags={TAG_MUTATING}, + )(build_forward_layout) diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index ef65084..a2b6275 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -8,10 +8,12 @@ import pytest from libtmux_mcp._utils import ExpectedToolError -from libtmux_mcp.models import ChainCommand -from libtmux_mcp.tools.chain_tools import run_command_chain +from libtmux_mcp.models import ChainCommand, ForwardSplit +from libtmux_mcp.tools.chain_tools import build_forward_layout, run_command_chain if t.TYPE_CHECKING: + from libtmux.pane import Pane + from libtmux.server import Server from libtmux.session import Session @@ -79,3 +81,57 @@ def test_run_command_chain_blocks_kill_server(mcp_session: Session) -> None: ), ) assert server.is_alive() + + +def test_build_forward_layout_captures_ids(mcp_server: Server, mcp_pane: Pane) -> None: + """Two splits off a seed pane return two distinct, real pane ids.""" + result = asyncio.run( + build_forward_layout( + splits=[ForwardSplit(horizontal=True), ForwardSplit()], + pane_id=mcp_pane.pane_id, + socket_name=mcp_server.socket_name, + ), + ) + + assert len(result.pane_ids) == 2 + assert result.pane_ids[0] != result.pane_ids[1] + assert all(pid.startswith("%") for pid in result.pane_ids) + assert result.dispatch_count >= 2 # independent splits need a dispatch each + + mcp_pane.window.refresh() + existing = {p.pane_id for p in mcp_pane.window.panes} + assert set(result.pane_ids) <= existing + + +def test_build_forward_layout_single_split_send_keys( + mcp_server: Server, mcp_pane: Pane +) -> None: + """A lone split folds to one dispatch and its send_keys reaches the new pane.""" + from libtmux_mcp.tools.wait_for_tools import wait_for_channel + + channel = "cc_fwd_layout" + keys = f"printf 'CC_FWD\\n'; tmux wait-for -S {channel}" + result = asyncio.run( + build_forward_layout( + splits=[ForwardSplit(send_keys=keys)], + pane_id=mcp_pane.pane_id, + socket_name=mcp_server.socket_name, + ), + ) + + assert len(result.pane_ids) == 1 + assert result.dispatch_count == 1 # single split -> one {marked} invocation + + asyncio.run( + wait_for_channel(channel, timeout=5.0, socket_name=mcp_server.socket_name), + ) + mcp_pane.window.refresh() + new_pane = mcp_pane.window.panes.get(pane_id=result.pane_ids[0]) + assert new_pane is not None + assert "CC_FWD" in "\n".join(new_pane.capture_pane()) + + +def test_build_forward_layout_validation(mcp_pane: Pane) -> None: + """An empty split list fails closed.""" + with pytest.raises(ExpectedToolError): + asyncio.run(build_forward_layout(splits=[], pane_id=mcp_pane.pane_id)) From bd4329cdcf2d8349129e81a8fcd7006575b1d7fb Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 14:53:14 -0500 Subject: [PATCH 04/25] Middleware(test[logs]): Tolerate propagation why: FastMCP log capture can surface the same child logger event through both direct and parent propagation paths, which made the level check brittle. what: - Assert the set of matching fastmcp.errors levels - Keep warning/error demotion coverage for tool errors --- tests/test_middleware.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_middleware.py b/tests/test_middleware.py index 698db81..2011d48 100644 --- a/tests/test_middleware.py +++ b/tests/test_middleware.py @@ -1140,14 +1140,14 @@ async def _call() -> None: with caplog.at_level(logging.DEBUG, logger="fastmcp.errors"): asyncio.run(_call()) - levels = [ + levels = { r.levelno for r in caplog.records if r.name == "fastmcp.errors" and "Error in tools/call" in r.getMessage() and message_fragment in r.getMessage() - ] - assert levels == [expected_level] + } + assert levels == {expected_level} def test_schema_validation_failure_marked_expected_in_meta() -> None: From 005fd47ebcb350c1e34a91f3fb839be821b188b4 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 14:54:35 -0500 Subject: [PATCH 05/25] py(deps[libtmux]): Pin public chain commit why: CI cannot install a sibling worktree path, so the MCP branch needs a public immutable libtmux source for the experimental chain API. what: - Replace the editable sibling path with the published libtmux commit - Regenerate uv.lock with the Git source --- pyproject.toml | 4 +--- uv.lock | 53 ++------------------------------------------------ 2 files changed, 3 insertions(+), 54 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 07420ee..62813b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,9 +112,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [tool.uv.sources] -# Experiment: pin libtmux to the sibling chainable-commands worktree so the -# experimental libtmux._experimental.chain API is importable. Do not merge. -libtmux = { path = "../libtmux", editable = true } +libtmux = { git = "https://github.com/tmux-python/libtmux.git", rev = "591a312f78d165816bb95a035a46219657c4b53d" } [tool.uv.exclude-newer-package] # git-pull packages release in lockstep with their workspaces, so a diff --git a/uv.lock b/uv.lock index 126e4b1..e1e5ac4 100644 --- a/uv.lock +++ b/uv.lock @@ -1179,56 +1179,7 @@ wheels = [ [[package]] name = "libtmux" version = "0.58.1" -source = { editable = "../libtmux" } - -[package.metadata] - -[package.metadata.requires-dev] -coverage = [ - { name = "codecov" }, - { name = "coverage" }, - { name = "pytest-cov" }, -] -dev = [ - { name = "codecov" }, - { name = "coverage" }, - { name = "gp-libs" }, - { name = "gp-sphinx", specifier = "==0.0.1a31" }, - { name = "mypy" }, - { name = "pytest" }, - { name = "pytest-asyncio" }, - { name = "pytest-cov" }, - { name = "pytest-mock" }, - { name = "pytest-rerunfailures" }, - { name = "pytest-watcher" }, - { name = "pytest-xdist" }, - { name = "ruff" }, - { name = "sphinx-autobuild" }, - { name = "sphinx-autodoc-api-style", specifier = "==0.0.1a31" }, - { name = "sphinx-autodoc-pytest-fixtures", specifier = "==0.0.1a31" }, - { name = "types-docutils" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, -] -docs = [ - { name = "gp-sphinx", specifier = "==0.0.1a31" }, - { name = "sphinx-autobuild" }, - { name = "sphinx-autodoc-api-style", specifier = "==0.0.1a31" }, - { name = "sphinx-autodoc-pytest-fixtures", specifier = "==0.0.1a31" }, -] -lint = [ - { name = "mypy" }, - { name = "ruff" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, -] -testing = [ - { name = "gp-libs" }, - { name = "pytest" }, - { name = "pytest-asyncio" }, - { name = "pytest-mock" }, - { name = "pytest-rerunfailures" }, - { name = "pytest-watcher" }, - { name = "typing-extensions", marker = "python_full_version < '3.11'" }, -] +source = { git = "https://github.com/tmux-python/libtmux.git?rev=591a312f78d165816bb95a035a46219657c4b53d#591a312f78d165816bb95a035a46219657c4b53d" } [[package]] name = "libtmux-mcp" @@ -1291,7 +1242,7 @@ testing = [ [package.metadata] requires-dist = [ { name = "fastmcp", specifier = ">=3.4.2,<4.0.0" }, - { name = "libtmux", editable = "../libtmux" }, + { name = "libtmux", git = "https://github.com/tmux-python/libtmux.git?rev=591a312f78d165816bb95a035a46219657c4b53d" }, ] [package.metadata.requires-dev] From 55d85e9dec5db3fb7730b4d3dd5e3f872ae78fe4 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 15:05:00 -0500 Subject: [PATCH 06/25] Chain(feat[tools]): Add typed op compiler why: Raw tmux command chains forced callers to choose between one native dispatch and typed, per-operation results. A typed compiler fills that gap while keeping output and continue-on-error semantics honest. what: - Replace raw chain/layout tools with run_tmux_operations - Add discriminated operation models and structured step results - Fold chainable runs and split standalone output/id captures - Document the new chain tool surface --- docs/conf.py | 11 + docs/reference/api/tools.md | 9 + docs/tools/chain/index.md | 23 ++ docs/tools/chain/run-tmux-operations.md | 36 ++ docs/tools/index.md | 8 + src/libtmux_mcp/models.py | 241 +++++++++-- src/libtmux_mcp/tools/chain_tools.py | 519 ++++++++++++++++++------ tests/test_chain_tools.py | 256 ++++++++---- 8 files changed, 841 insertions(+), 262 deletions(-) create mode 100644 docs/tools/chain/index.md create mode 100644 docs/tools/chain/run-tmux-operations.md diff --git a/docs/conf.py b/docs/conf.py index af8bbd1..100e3b7 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -119,6 +119,7 @@ def _patched_tool_collector_tool(self: ToolCollector, **kwargs: t.Any) -> t.Any: conf["fastmcp_tool_modules"] = [ "libtmux_mcp.tools.batch_tools", + "libtmux_mcp.tools.chain_tools", "libtmux_mcp.tools.server_tools", "libtmux_mcp.tools.session_tools", "libtmux_mcp.tools.window_tools", @@ -131,6 +132,7 @@ def _patched_tool_collector_tool(self: ToolCollector, **kwargs: t.Any) -> t.Any: ] conf["fastmcp_area_map"] = { "batch_tools": "batch/index", + "chain_tools": "chain/index", "server_tools": "server/index", "session_tools": "session/index", "window_tools": "window/index", @@ -161,6 +163,15 @@ def _patched_tool_collector_tool(self: ToolCollector, **kwargs: t.Any) -> t.Any: "SendKeysOperation", "SendKeysOperationResult", "SendKeysBatchResult", + "SplitPaneOperation", + "TmuxSendKeysOperation", + "ResizePaneOperation", + "SelectLayoutOperation", + "SetOptionOperation", + "CapturePaneOperation", + "TmuxOperationStepResult", + "TmuxOperationDispatchResult", + "RunTmuxOperationsResult", "ToolCallOperation", "ToolCallOperationResult", "ToolCallBatchResult", diff --git a/docs/reference/api/tools.md b/docs/reference/api/tools.md index fddc84e..751f038 100644 --- a/docs/reference/api/tools.md +++ b/docs/reference/api/tools.md @@ -9,6 +9,15 @@ :show-inheritance: ``` +## Chain tools + +```{eval-rst} +.. automodule:: libtmux_mcp.tools.chain_tools + :members: + :undoc-members: + :show-inheritance: +``` + ## Server tools ```{eval-rst} diff --git a/docs/tools/chain/index.md b/docs/tools/chain/index.md new file mode 100644 index 0000000..72b8619 --- /dev/null +++ b/docs/tools/chain/index.md @@ -0,0 +1,23 @@ +# Chain tools + +Chain tools compile typed tmux operations into the fewest safe native +tmux dispatches. They are different from batch tools: batch tools call +existing MCP tools one by one, while chain tools lower a typed operation +list directly to tmux command sequences when tmux can preserve the same +semantics. + +::::{grid} 1 1 2 3 +:gutter: 2 2 3 3 + +:::{grid-item-card} {tooliconl}`run-tmux-operations` +Run typed tmux operations with automatic native chaining. +::: + +:::: + +```{toctree} +:hidden: +:maxdepth: 1 + +run-tmux-operations +``` diff --git a/docs/tools/chain/run-tmux-operations.md b/docs/tools/chain/run-tmux-operations.md new file mode 100644 index 0000000..0eb46ec --- /dev/null +++ b/docs/tools/chain/run-tmux-operations.md @@ -0,0 +1,36 @@ +# Run tmux operations + +```{fastmcp-tool} chain_tools.run_tmux_operations +``` + +**Use when** you need several typed tmux operations to run in order and +want libtmux-mcp to fold safe no-output steps into one native tmux +sequence. + +**Avoid when** you need to call arbitrary MCP tools; use +{tooliconl}`call-mutating-tools-batch` for that. Use individual tools +when a workflow has only one step. + +**Side effects:** Mutates tmux state according to the submitted +operation list. With `on_error="stop"`, chainable operations may share +one tmux sequence and native tmux failure semantics stop later steps. +With `on_error="continue"`, operations run as standalone dispatches so +later steps can still run after an earlier failure. + +**Example:** + +```json +{ + "tool": "run_tmux_operations", + "arguments": { + "operations": [ + {"kind": "split_pane", "pane_id": "%1", "ref": "work"}, + {"kind": "send_keys", "pane_ref": "work", "keys": "uv run pytest"} + ], + "on_error": "stop" + } +} +``` + +```{fastmcp-tool-input} chain_tools.run_tmux_operations +``` diff --git a/docs/tools/index.md b/docs/tools/index.md index 9c37b2f..53c1140 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -55,6 +55,7 @@ leave socket selection inside each nested tool's arguments. See - Signal a waiter → {tool}`signal-channel` **Batching typed tool calls?** +- Native tmux operation chains → {tool}`run-tmux-operations` - Read-only observations → {tool}`call-readonly-tools-batch` - Ordered readonly + mutating workflows → {tool}`call-mutating-tools-batch` - Reviewed workflows that include destructive steps → {tool}`call-destructive-tools-batch` @@ -270,6 +271,12 @@ Run a shell command and report exit status. Call typed readonly or mutating tools in order. ::: +:::{grid-item-card} run_tmux_operations +:link: run-tmux-operations +:link-type: ref +Compile typed tmux operations into native chains. +::: + :::{grid-item-card} rename_session :link: rename-session :link-type: ref @@ -455,6 +462,7 @@ Delete an MCP-staged tmux paste buffer. server/index batch/index +chain/index session/index window/index pane/index diff --git a/src/libtmux_mcp/models.py b/src/libtmux_mcp/models.py index 86afefc..d0b6f66 100644 --- a/src/libtmux_mcp/models.py +++ b/src/libtmux_mcp/models.py @@ -2,9 +2,10 @@ from __future__ import annotations +import enum import typing as t -from pydantic import BaseModel, ConfigDict, Field +from pydantic import BaseModel, ConfigDict, Field, model_validator class SessionInfo(BaseModel): @@ -657,70 +658,230 @@ class ContentChangeResult(BaseModel): elapsed_seconds: float = Field(description="Time spent waiting in seconds") -class ChainCommand(BaseModel): - """One tmux command in a one-dispatch chain for :func:`run_command_chain`.""" +def _require_single_pane_target(pane_id: str | None, pane_ref: str | None) -> None: + """Validate exactly one concrete pane target or prior split reference.""" + if (pane_id is None) == (pane_ref is None): + msg = "Provide exactly one of pane_id or pane_ref." + raise ValueError(msg) + + +class TmuxOperationStatus(str, enum.Enum): + """Execution status for one typed tmux operation.""" + + SUCCEEDED = "succeeded" + FAILED = "failed" + SKIPPED = "skipped" + + +class _PaneTargetOperation(BaseModel): + """Shared target fields for operations that act on one pane.""" model_config = ConfigDict(extra="forbid") - command: str = Field(description="tmux command name, e.g. 'split-window'.") - args: list[str] = Field( - default_factory=list, - description="Positional argument tokens, rendered in order after the target.", + pane_id: str | None = Field( + default=None, + description="Concrete tmux pane ID, e.g. '%1'.", ) - target: str | None = Field( + pane_ref: str | None = Field( default=None, - description=( - "Optional ``-t`` target (pane/window/session id). An empty string is " - "rejected; None means the command carries no target." - ), + description="Reference name captured from an earlier split_pane operation.", ) -class RunCommandChainResult(BaseModel): - """Result of one native tmux command-sequence dispatch.""" +class SplitPaneOperation(_PaneTargetOperation): + """Split a pane and optionally expose the new pane under ``ref``.""" - argv: list[str] = Field( - description="Rendered argv, with a standalone ';' token between commands.", + kind: t.Literal["split_pane"] = Field( + default="split_pane", + description="Operation discriminator.", ) - command_count: int = Field( - description="Number of commands folded into the single dispatch.", + ref: str | None = Field( + default=None, + description="Reference name for the created pane ID.", ) - returncode: int = Field(description="Exit code of the single tmux invocation.") - stdout: list[str] = Field( - default_factory=list, - description="Merged stdout lines from the sequence.", + horizontal: bool = Field( + default=False, + description="Split left/right (-h) instead of top/bottom.", ) - stderr: list[str] = Field( - default_factory=list, - description="Merged stderr lines from the sequence.", + shell: str | None = Field( + default=None, + description="Command to run in the new pane instead of the default shell.", ) + @model_validator(mode="after") + def _validate_target(self) -> SplitPaneOperation: + _require_single_pane_target(self.pane_id, self.pane_ref) + return self -class ForwardSplit(BaseModel): - """One split for :func:`build_forward_layout`.""" - model_config = ConfigDict(extra="forbid") +class TmuxSendKeysOperation(_PaneTargetOperation): + """Send keys to a concrete pane or prior split reference.""" - horizontal: bool = Field( + kind: t.Literal["send_keys"] = Field( + default="send_keys", + description="Operation discriminator.", + ) + keys: str = Field(description="Keys or text to send.") + enter: bool = Field(default=True, description="Press Enter after sending keys.") + literal: bool = Field( default=False, - description="Split left/right (-h) instead of top/bottom (-v).", + description="Pass -l so tmux sends keys literally.", ) - shell: str | None = Field( + + @model_validator(mode="after") + def _validate_target(self) -> TmuxSendKeysOperation: + _require_single_pane_target(self.pane_id, self.pane_ref) + return self + + +class ResizePaneOperation(_PaneTargetOperation): + """Resize a pane by dimensions or zoom toggle.""" + + kind: t.Literal["resize_pane"] = Field( + default="resize_pane", + description="Operation discriminator.", + ) + height: int | None = Field(default=None, description="New height in lines.") + width: int | None = Field(default=None, description="New width in columns.") + zoom: bool | None = Field(default=None, description="Toggle pane zoom.") + + @model_validator(mode="after") + def _validate_resize(self) -> ResizePaneOperation: + _require_single_pane_target(self.pane_id, self.pane_ref) + if self.zoom is not None and ( + self.height is not None or self.width is not None + ): + msg = "Cannot combine zoom with height/width." + raise ValueError(msg) + if self.zoom is None and self.height is None and self.width is None: + msg = "Provide height, width, or zoom." + raise ValueError(msg) + return self + + +class SelectLayoutOperation(BaseModel): + """Select a layout for a tmux window.""" + + model_config = ConfigDict(extra="forbid") + + kind: t.Literal["select_layout"] = Field( + default="select_layout", + description="Operation discriminator.", + ) + window_id: str = Field(description="Concrete tmux window ID, e.g. '@1'.") + layout: str = Field(description="Layout name or custom layout string.") + + +class SetOptionOperation(BaseModel): + """Set a tmux option at server, session, window, or pane scope.""" + + model_config = ConfigDict(extra="forbid") + + kind: t.Literal["set_option"] = Field( + default="set_option", + description="Operation discriminator.", + ) + option: str = Field(description="Option name to set.") + value: str = Field(description="Option value.") + scope: t.Literal["server", "session", "window", "pane"] | None = Field( default=None, - description="Command to run in the new pane instead of the default shell.", + description="Option scope; omitted means server option.", ) - send_keys: str | None = Field( + target: str | None = Field( default=None, - description="Keys to send into the new pane after it is created.", + description="Target identifier for session, window, or pane scoped options.", ) + global_: bool = Field(default=False, description="Set the global option table.") + @model_validator(mode="after") + def _validate_target(self) -> SetOptionOperation: + if self.target is not None and self.scope is None: + msg = "scope is required when target is specified." + raise ValueError(msg) + if self.scope in {"session", "window", "pane"} and self.target is None: + msg = "target is required when scope is 'session', 'window', or 'pane'." + raise ValueError(msg) + return self -class ForwardLayoutResult(BaseModel): - """Result of :func:`build_forward_layout`.""" - pane_ids: list[str] = Field( - description="Captured ids of the created panes, in split order.", +class CapturePaneOperation(_PaneTargetOperation): + """Capture pane output as a standalone read operation.""" + + kind: t.Literal["capture_pane"] = Field( + default="capture_pane", + description="Operation discriminator.", + ) + start: int | None = Field(default=None, description="Start capture line.") + end: int | None = Field(default=None, description="End capture line.") + + @model_validator(mode="after") + def _validate_target(self) -> CapturePaneOperation: + _require_single_pane_target(self.pane_id, self.pane_ref) + return self + + +TmuxOperation: t.TypeAlias = t.Annotated[ + SplitPaneOperation + | TmuxSendKeysOperation + | ResizePaneOperation + | SelectLayoutOperation + | SetOptionOperation + | CapturePaneOperation, + Field(discriminator="kind"), +] + + +class TmuxOperationStepResult(BaseModel): + """Result for one typed operation.""" + + index: int = Field(description="Zero-based operation index.") + kind: str = Field(description="Operation kind.") + status: TmuxOperationStatus = Field(description="Execution status.") + returncode: int | None = Field( + default=None, + description="tmux return code when the operation was dispatched.", ) - dispatch_count: int = Field( - description="Number of native tmux invocations the resolution used.", + stdout: list[str] | None = Field( + default=None, + description="stdout lines for standalone/output operations.", + ) + stderr: list[str] | None = Field( + default=None, + description="stderr lines for failed or standalone operations.", + ) + created_pane_id: str | None = Field( + default=None, + description="Pane ID captured from a split_pane operation with ref.", + ) + + +class TmuxOperationDispatchResult(BaseModel): + """Result for one native tmux dispatch.""" + + mode: t.Literal["chain", "standalone"] = Field( + description="Whether the dispatch used a tmux sequence or one command.", + ) + operation_indexes: list[int] = Field( + description="Operation indexes included in this dispatch.", + ) + argv: list[str] = Field(description="Rendered tmux argv.") + returncode: int = Field(description="tmux process exit code.") + stdout: list[str] = Field(default_factory=list, description="stdout lines.") + stderr: list[str] = Field(default_factory=list, description="stderr lines.") + + +class RunTmuxOperationsResult(BaseModel): + """Result of compiling and running typed tmux operations.""" + + succeeded: bool = Field(description="False when any operation failed or skipped.") + dispatch_count: int = Field(description="Number of native tmux dispatches.") + dispatches: list[TmuxOperationDispatchResult] = Field( + description="Native tmux dispatches used by the compiler.", + ) + steps: list[TmuxOperationStepResult] = Field( + description="Per-operation results in input order.", + ) + created_panes: dict[str, str] = Field( + default_factory=dict, + description="Mapping of split_pane ref names to concrete pane IDs.", ) diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index 917eefd..ff43e0f 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -1,10 +1,4 @@ -"""MCP tools that run a chain of tmux commands as one native invocation. - -These leverage libtmux's experimental ``libtmux._experimental.chain`` API to -fold an ordered set of tmux commands into a single ``tmux a ; b ; c`` dispatch -(one subprocess), instead of issuing one tmux call per command. The chain API is -experimental and pinned to a sibling worktree; do not ship to a release. -""" +"""Typed MCP tool for compiling tmux operations into native dispatches.""" from __future__ import annotations @@ -12,172 +6,429 @@ import typing as t from libtmux._experimental.chain import ( - AsyncServerPlanRunner, CommandCall, CommandChain, - ForwardPlan, + CommandResultLike, + CommandRunner, ) +from pydantic import TypeAdapter from libtmux_mcp._utils import ( - ANNOTATIONS_DESTRUCTIVE, ANNOTATIONS_SHELL, - TAG_DESTRUCTIVE, TAG_MUTATING, ExpectedToolError, _get_server, - _resolve_pane, handle_tool_errors_async, ) from libtmux_mcp.models import ( - ChainCommand, - ForwardLayoutResult, - ForwardSplit, - RunCommandChainResult, + CapturePaneOperation, + ResizePaneOperation, + RunTmuxOperationsResult, + SelectLayoutOperation, + SetOptionOperation, + SplitPaneOperation, + TmuxOperation, + TmuxOperationDispatchResult, + TmuxOperationStatus, + TmuxOperationStepResult, + TmuxSendKeysOperation, ) if t.TYPE_CHECKING: from fastmcp import FastMCP -#: tmux commands refused outright: unrecoverable, no ``-t`` makes them safe, and -#: they would take down the server hosting this MCP. -_BLOCKED_COMMANDS = frozenset({"kill-server"}) +TMUX_OPERATIONS_ADAPTER: TypeAdapter[list[TmuxOperation]] = TypeAdapter( + list[TmuxOperation], +) +_PendingCalls: t.TypeAlias = tuple[int, str, tuple[CommandCall, ...]] -@handle_tool_errors_async -async def run_command_chain( - commands: list[ChainCommand], - socket_name: str | None = None, -) -> RunCommandChainResult: - """Run an ordered list of tmux commands as ONE native tmux invocation. - - The commands are folded into a single ``tmux a ; b ; c`` sequence and - dispatched once, instead of one tmux subprocess per command. tmux applies - its native sequence semantics: a command that errors aborts the rest. Each - command's ``-t`` target is passed through verbatim, so a chain may span - heterogeneous scopes (panes, windows, sessions). - - Parameters - ---------- - commands : list[ChainCommand] - Ordered tmux commands, each ``{command, args, target}``. Must be - non-empty; ``kill-server`` is refused. - socket_name : str, optional - tmux socket name (falls back to ``LIBTMUX_SOCKET``). - - Returns - ------- - RunCommandChainResult - The rendered ``argv`` (with ``;`` separators), the command count, and - the single invocation's merged exit code, stdout, and stderr. - """ - if not commands: - msg = "commands must not be empty" - raise ExpectedToolError(msg) - blocked = sorted({cmd.command for cmd in commands} & _BLOCKED_COMMANDS) - if blocked: - msg = f"refusing to run unrecoverable command(s): {', '.join(blocked)}" - raise ExpectedToolError(msg) +class _CompileError(Exception): + """Operation-level compile failure that should become a step result.""" + - server = _get_server(socket_name=socket_name) +def _target_pane( + pane_id: str | None, + pane_ref: str | None, + created_panes: dict[str, str], +) -> str: + """Return the concrete pane target for an operation.""" + if pane_id is not None: + return pane_id + if pane_ref is None: + msg = "operation is missing pane_id or pane_ref" + raise _CompileError(msg) try: - chain = CommandChain( - tuple( - CommandCall(cmd.command, tuple(cmd.args), target=cmd.target) - for cmd in commands - ), - ) - except ValueError as exc: # empty-string target / empty chain (fail closed) - raise ExpectedToolError(str(exc)) from exc - - argv = list(chain.argv()) - # A live Server satisfies the CommandRunner protocol; dispatch ONCE, off the - # event loop (libtmux dispatch is blocking). - result = await asyncio.to_thread(chain.run, server) - return RunCommandChainResult( - argv=argv, - command_count=len(commands), + return created_panes[pane_ref] + except KeyError as exc: + msg = f"unknown pane_ref: {pane_ref}" + raise _CompileError(msg) from exc + + +def _split_calls( + operation: SplitPaneOperation, + created_panes: dict[str, str], +) -> tuple[CommandCall, ...]: + """Build ``split-window`` calls for a typed split operation.""" + args: list[str] = [] + if operation.horizontal: + args.append("-h") + if operation.ref is not None: + args.extend(("-P", "-F", "#{pane_id}")) + if operation.shell is not None: + args.append(operation.shell) + return ( + CommandCall( + "split-window", + tuple(args), + target=_target_pane(operation.pane_id, operation.pane_ref, created_panes), + ), + ) + + +def _send_keys_calls( + operation: TmuxSendKeysOperation, + created_panes: dict[str, str], +) -> tuple[CommandCall, ...]: + """Build one operation's ``send-keys`` calls.""" + target = _target_pane(operation.pane_id, operation.pane_ref, created_panes) + if operation.literal: + calls = [ + CommandCall("send-keys", ("-l", operation.keys), target=target), + ] + if operation.enter: + calls.append(CommandCall("send-keys", ("Enter",), target=target)) + return tuple(calls) + + args: list[str] = [operation.keys] + if operation.enter: + args.append("Enter") + return (CommandCall("send-keys", tuple(args), target=target),) + + +def _resize_pane_calls( + operation: ResizePaneOperation, + created_panes: dict[str, str], +) -> tuple[CommandCall, ...]: + """Build ``resize-pane`` calls for a typed resize operation.""" + args: list[str | int] = [] + if operation.zoom: + args.append("-Z") + if operation.height is not None: + args.extend(("-y", operation.height)) + if operation.width is not None: + args.extend(("-x", operation.width)) + return ( + CommandCall( + "resize-pane", + tuple(args), + target=_target_pane(operation.pane_id, operation.pane_ref, created_panes), + ), + ) + + +def _select_layout_calls(operation: SelectLayoutOperation) -> tuple[CommandCall, ...]: + """Build ``select-layout`` calls for a typed layout operation.""" + return ( + CommandCall("select-layout", (operation.layout,), target=operation.window_id), + ) + + +def _set_option_calls(operation: SetOptionOperation) -> tuple[CommandCall, ...]: + """Build ``set-option`` calls for a typed option operation.""" + args: list[str] = [] + if operation.global_: + args.append("-g") + if operation.scope == "server": + args.append("-s") + elif operation.scope == "window": + args.append("-w") + elif operation.scope == "pane": + args.append("-p") + args.extend((operation.option, operation.value)) + return (CommandCall("set-option", tuple(args), target=operation.target),) + + +def _capture_pane_calls( + operation: CapturePaneOperation, + created_panes: dict[str, str], +) -> tuple[CommandCall, ...]: + """Build ``capture-pane`` calls for a typed capture operation.""" + args: list[str | int] = ["-p"] + if operation.start is not None: + args.extend(("-S", operation.start)) + if operation.end is not None: + args.extend(("-E", operation.end)) + return ( + CommandCall( + "capture-pane", + tuple(args), + target=_target_pane(operation.pane_id, operation.pane_ref, created_panes), + ), + ) + + +def _operation_calls( + operation: TmuxOperation, + created_panes: dict[str, str], +) -> tuple[CommandCall, ...]: + """Lower one typed operation to tmux command calls.""" + if isinstance(operation, SplitPaneOperation): + return _split_calls(operation, created_panes) + if isinstance(operation, TmuxSendKeysOperation): + return _send_keys_calls(operation, created_panes) + if isinstance(operation, ResizePaneOperation): + return _resize_pane_calls(operation, created_panes) + if isinstance(operation, SelectLayoutOperation): + return _select_layout_calls(operation) + if isinstance(operation, SetOptionOperation): + return _set_option_calls(operation) + if isinstance(operation, CapturePaneOperation): + return _capture_pane_calls(operation, created_panes) + msg = f"unsupported operation type: {type(operation).__name__}" + raise TypeError(msg) + + +def _is_output_operation(operation: TmuxOperation) -> bool: + """Return whether an operation must run outside a pending chain.""" + return isinstance(operation, CapturePaneOperation) or ( + isinstance(operation, SplitPaneOperation) and operation.ref is not None + ) + + +def _run_calls( + runner: CommandRunner, + calls: tuple[CommandCall, ...], +) -> tuple[list[str], CommandResultLike]: + """Run one operation's calls as a single native dispatch.""" + if len(calls) == 1: + argv = list(calls[0].argv()) + result = runner.cmd(argv[0], *argv[1:]) + return argv, result + + chain = CommandChain(calls) + result = chain.run(runner) + return list(chain.argv()), result + + +def _dispatch_standalone( + runner: CommandRunner, + index: int, + kind: str, + calls: tuple[CommandCall, ...], + *, + capture_created_pane: bool, +) -> tuple[TmuxOperationDispatchResult, TmuxOperationStepResult, str | None]: + """Run one operation and return dispatch, step, and captured pane id.""" + argv, result = _run_calls(runner, calls) + stdout = list(result.stdout) + stderr = list(result.stderr) + created_pane_id: str | None = None + status = TmuxOperationStatus.SUCCEEDED + if result.returncode != 0: + status = TmuxOperationStatus.FAILED + elif capture_created_pane: + if stdout: + created_pane_id = stdout[0] + else: + status = TmuxOperationStatus.FAILED + stderr = [*stderr, "split-pane did not return a pane id"] + + return ( + TmuxOperationDispatchResult( + mode="standalone", + operation_indexes=[index], + argv=argv, + returncode=result.returncode, + stdout=stdout, + stderr=stderr, + ), + TmuxOperationStepResult( + index=index, + kind=kind, + status=status, + returncode=result.returncode, + stdout=stdout, + stderr=stderr, + created_pane_id=created_pane_id, + ), + created_pane_id, + ) + + +def _dispatch_chain( + runner: CommandRunner, + pending: list[_PendingCalls], +) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult]]: + """Run pending operations as one tmux command sequence.""" + calls = tuple(call for _, _, op_calls in pending for call in op_calls) + chain = CommandChain(calls) + result = chain.run(runner) + stdout = list(result.stdout) + stderr = list(result.stderr) + status = ( + TmuxOperationStatus.SUCCEEDED + if result.returncode == 0 + else TmuxOperationStatus.FAILED + ) + dispatch = TmuxOperationDispatchResult( + mode="chain", + operation_indexes=[index for index, _, _ in pending], + argv=list(chain.argv()), returncode=result.returncode, - stdout=list(result.stdout), - stderr=list(result.stderr), + stdout=stdout, + stderr=stderr, ) + steps = [ + TmuxOperationStepResult( + index=index, + kind=kind, + status=status, + returncode=result.returncode, + stdout=stdout if status == TmuxOperationStatus.FAILED else None, + stderr=stderr if status == TmuxOperationStatus.FAILED else None, + ) + for index, kind, _ in pending + ] + return dispatch, steps -def _send_keys_decorate(keys: str) -> t.Callable[..., t.Any]: - """Build a send_keys decorate bound to a captured string (per-iteration binding).""" +def _compile_failure_step( + index: int, + operation: TmuxOperation, + error: Exception, +) -> TmuxOperationStepResult: + """Convert a compile failure into a step result.""" + return TmuxOperationStepResult( + index=index, + kind=operation.kind, + status=TmuxOperationStatus.FAILED, + stderr=[str(error)], + ) - def build(handle: t.Any) -> t.Any: - return handle.cmd.send_keys(keys, enter=True) - return build +def _skipped_step(index: int, operation: TmuxOperation) -> TmuxOperationStepResult: + """Return a skipped result for an operation after stop-on-error.""" + return TmuxOperationStepResult( + index=index, + kind=operation.kind, + status=TmuxOperationStatus.SKIPPED, + ) @handle_tool_errors_async -async def build_forward_layout( - splits: list[ForwardSplit], - pane_id: str | None = None, - session_name: str | None = None, - session_id: str | None = None, +async def run_tmux_operations( + operations: list[TmuxOperation], + on_error: t.Literal["stop", "continue"] = "stop", socket_name: str | None = None, -) -> ForwardLayoutResult: - r"""Split a seed pane into several panes, returning their new ids. - - Unlike a single ``\;`` chain, this captures the id tmux assigns each new - pane (a fresh id can't be substituted back into the same invocation), so it - resolves over the minimum number of dispatches: a single split folds into - one, several independent splits take one per creation plus one trailing - chain for the decorations. - - Parameters - ---------- - splits : list[ForwardSplit] - Splits off the seed pane, each ``{horizontal, shell, send_keys}``. - pane_id : str, optional - Seed pane id; defaults to the resolved/active pane. - session_name, session_id : str, optional - Used to resolve the seed pane when ``pane_id`` is omitted. - socket_name : str, optional - tmux socket name. - - Returns - ------- - ForwardLayoutResult - The created pane ids (in split order) and the dispatch count. +) -> RunTmuxOperationsResult: + """Run typed tmux operations with minimum safe native dispatches. + + Consecutive chainable, no-output operations fold into one tmux + ``a ; b ; c`` sequence. Operations that need per-step output, such as + ``capture_pane`` and id-producing ``split_pane`` refs, run as standalone + dispatches so their stdout can be attributed to the correct operation. + ``on_error="continue"`` disables folding because tmux sequences abort the + rest of the sequence on first failure. """ - if not splits: - msg = "splits must not be empty" + validated = TMUX_OPERATIONS_ADAPTER.validate_python(operations) + if not validated: + msg = "operations must not be empty" + raise ExpectedToolError(msg) + if on_error not in {"stop", "continue"}: + msg = "on_error must be 'stop' or 'continue'" raise ExpectedToolError(msg) - server = _get_server(socket_name=socket_name) - seed = _resolve_pane( - server, - pane_id=pane_id, - session_name=session_name, - session_id=session_id, - ) - plan = ForwardPlan.from_pane(seed) - for split in splits: - handle = plan.split(horizontal=split.horizontal, shell=split.shell) - if split.send_keys is not None: - handle.do(_send_keys_decorate(split.send_keys)) - - resolved = await plan.run_resolving_async(AsyncServerPlanRunner(server)) - pane_ids = [resolved.bindings[index] for index in range(len(splits))] - return ForwardLayoutResult( - pane_ids=pane_ids, - dispatch_count=len(resolved.results), + runner = _get_server(socket_name=socket_name) + pending: list[_PendingCalls] = [] + dispatches: list[TmuxOperationDispatchResult] = [] + steps_by_index: dict[int, TmuxOperationStepResult] = {} + created_panes: dict[str, str] = {} + + async def flush_pending() -> bool: + if not pending: + return True + dispatch, steps = await asyncio.to_thread(_dispatch_chain, runner, pending) + dispatches.append(dispatch) + pending.clear() + for step in steps: + steps_by_index[step.index] = step + return all(step.status == TmuxOperationStatus.SUCCEEDED for step in steps) + + for index, operation in enumerate(validated): + try: + calls = _operation_calls(operation, created_panes) + except _CompileError as exc: + if not await flush_pending(): + for skip_index, skipped in enumerate(validated[index:], start=index): + steps_by_index[skip_index] = _skipped_step(skip_index, skipped) + break + steps_by_index[index] = _compile_failure_step(index, operation, exc) + if on_error == "stop": + for skip_index, skipped in enumerate( + validated[index + 1 :], + start=index + 1, + ): + steps_by_index[skip_index] = _skipped_step(skip_index, skipped) + break + continue + + force_standalone = on_error == "continue" or _is_output_operation(operation) + if not force_standalone: + pending.append((index, operation.kind, calls)) + continue + + if not await flush_pending() and on_error == "stop": + for skip_index, skipped in enumerate(validated[index:], start=index): + steps_by_index[skip_index] = _skipped_step(skip_index, skipped) + break + + capture_created_pane = ( + isinstance(operation, SplitPaneOperation) and operation.ref is not None + ) + dispatch, step, created_pane_id = await asyncio.to_thread( + _dispatch_standalone, + runner, + index, + operation.kind, + calls, + capture_created_pane=capture_created_pane, + ) + dispatches.append(dispatch) + steps_by_index[index] = step + if ( + isinstance(operation, SplitPaneOperation) + and operation.ref is not None + and created_pane_id is not None + ): + created_panes[operation.ref] = created_pane_id + if step.status != TmuxOperationStatus.SUCCEEDED and on_error == "stop": + for skip_index, skipped in enumerate( + validated[index + 1 :], + start=index + 1, + ): + steps_by_index[skip_index] = _skipped_step(skip_index, skipped) + break + + if pending: + await flush_pending() + + steps = [steps_by_index[index] for index in range(len(validated))] + succeeded = all(step.status == TmuxOperationStatus.SUCCEEDED for step in steps) + return RunTmuxOperationsResult( + succeeded=succeeded, + dispatch_count=len(dispatches), + dispatches=dispatches, + steps=steps, + created_panes=created_panes, ) def register(mcp: FastMCP) -> None: - """Register chain tools with the MCP instance.""" - mcp.tool( - title="Run Command Chain", - annotations=ANNOTATIONS_DESTRUCTIVE, - tags={TAG_DESTRUCTIVE}, - )(run_command_chain) + """Register typed chain tools with the MCP instance.""" mcp.tool( - title="Build Forward Layout", + title="Run tmux Operations", annotations=ANNOTATIONS_SHELL, tags={TAG_MUTATING}, - )(build_forward_layout) + )(run_tmux_operations) diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index a2b6275..8a0ddb6 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -1,4 +1,4 @@ -"""Tests for the chain command tools (one-dispatch tmux command sequences).""" +"""Tests for typed tmux operation chains.""" from __future__ import annotations @@ -6,10 +6,21 @@ import typing as t import pytest +from pydantic import ValidationError from libtmux_mcp._utils import ExpectedToolError -from libtmux_mcp.models import ChainCommand, ForwardSplit -from libtmux_mcp.tools.chain_tools import build_forward_layout, run_command_chain +from libtmux_mcp.models import ( + CapturePaneOperation, + SetOptionOperation, + SplitPaneOperation, + TmuxOperation, + TmuxOperationStatus, + TmuxSendKeysOperation, +) +from libtmux_mcp.tools.chain_tools import ( + TMUX_OPERATIONS_ADAPTER, + run_tmux_operations, +) if t.TYPE_CHECKING: from libtmux.pane import Pane @@ -17,121 +28,190 @@ from libtmux.session import Session -def test_run_command_chain_one_dispatch(mcp_session: Session) -> None: - """Two set-option commands take effect from a single tmux invocation.""" - server = mcp_session.server - result = asyncio.run( - run_command_chain( - commands=[ - ChainCommand(command="set-option", args=["-g", "@cc_a", "1"]), - ChainCommand(command="set-option", args=["-g", "@cc_b", "2"]), - ], - socket_name=server.socket_name, - ), - ) +class SetOptionChainCase(t.NamedTuple): + """Case for option operations that can fold into one dispatch.""" - assert result.returncode == 0 - assert result.command_count == 2 - assert ";" in result.argv # the standalone separator proves one sequence - assert server.cmd("show-option", "-gv", "@cc_a").stdout == ["1"] - assert server.cmd("show-option", "-gv", "@cc_b").stdout == ["2"] + test_id: str + operations: list[TmuxOperation] + expected_values: dict[str, str] -def test_run_command_chain_aborts_on_error(mcp_session: Session) -> None: - """A failing command aborts the rest of the sequence (tmux ; semantics).""" +@pytest.mark.parametrize( + "case", + [ + SetOptionChainCase( + test_id="two_global_options", + operations=[ + SetOptionOperation(option="@cc_ops_a", value="1", global_=True), + SetOptionOperation(option="@cc_ops_b", value="2", global_=True), + ], + expected_values={"@cc_ops_a": "1", "@cc_ops_b": "2"}, + ), + ], + ids=lambda case: case.test_id, +) +def test_run_tmux_operations_folds_chainable_ops( + case: SetOptionChainCase, + mcp_session: Session, +) -> None: + """Consecutive no-output mutating operations use one native chain.""" server = mcp_session.server result = asyncio.run( - run_command_chain( - commands=[ - ChainCommand(command="rename-window", args=["x"], target="@999999"), - ChainCommand(command="set-option", args=["-g", "@cc_sentinel", "set"]), - ], + run_tmux_operations( + operations=case.operations, socket_name=server.socket_name, ), ) - assert result.returncode != 0 - # the sequence aborted at the failing command, so the sentinel never ran: - assert "set" not in server.cmd("show-option", "-gv", "@cc_sentinel").stdout - - -def test_run_command_chain_validation(mcp_session: Session) -> None: - """An empty list and an empty-string target both fail closed.""" - socket = mcp_session.server.socket_name - with pytest.raises(ExpectedToolError): - asyncio.run(run_command_chain(commands=[], socket_name=socket)) - - with pytest.raises(ExpectedToolError): - asyncio.run( - run_command_chain( - commands=[ChainCommand(command="kill-window", target="")], - socket_name=socket, - ), - ) - - -def test_run_command_chain_blocks_kill_server(mcp_session: Session) -> None: - """kill-server is refused outright and the server survives.""" - server = mcp_session.server - with pytest.raises(ExpectedToolError): - asyncio.run( - run_command_chain( - commands=[ChainCommand(command="kill-server")], - socket_name=server.socket_name, - ), - ) - assert server.is_alive() + assert result.succeeded + assert result.dispatch_count == 1 + assert result.dispatches[0].mode == "chain" + assert ";" in result.dispatches[0].argv + assert [step.status for step in result.steps] == [ + TmuxOperationStatus.SUCCEEDED, + TmuxOperationStatus.SUCCEEDED, + ] + for option, value in case.expected_values.items(): + assert server.cmd("show-option", "-gv", option).stdout == [value] + + +def test_run_tmux_operations_breaks_before_output_op( + mcp_server: Server, + mcp_pane: Pane, +) -> None: + """Read operations force a standalone dispatch with per-step stdout.""" + from libtmux_mcp.tools.wait_for_tools import wait_for_channel + channel = "cc_ops_capture" + mcp_pane.send_keys(f"printf 'CC_OPS_CAPTURE\\n'; tmux wait-for -S {channel}") + asyncio.run( + wait_for_channel(channel, timeout=5.0, socket_name=mcp_server.socket_name) + ) -def test_build_forward_layout_captures_ids(mcp_server: Server, mcp_pane: Pane) -> None: - """Two splits off a seed pane return two distinct, real pane ids.""" result = asyncio.run( - build_forward_layout( - splits=[ForwardSplit(horizontal=True), ForwardSplit()], - pane_id=mcp_pane.pane_id, + run_tmux_operations( + operations=[ + SetOptionOperation( + option="@cc_ops_before_capture", + value="1", + global_=True, + ), + CapturePaneOperation(pane_id=mcp_pane.pane_id), + ], socket_name=mcp_server.socket_name, ), ) - assert len(result.pane_ids) == 2 - assert result.pane_ids[0] != result.pane_ids[1] - assert all(pid.startswith("%") for pid in result.pane_ids) - assert result.dispatch_count >= 2 # independent splits need a dispatch each - - mcp_pane.window.refresh() - existing = {p.pane_id for p in mcp_pane.window.panes} - assert set(result.pane_ids) <= existing + assert result.succeeded + assert result.dispatch_count == 2 + assert [dispatch.mode for dispatch in result.dispatches] == [ + "chain", + "standalone", + ] + assert result.steps[1].stdout is not None + assert "CC_OPS_CAPTURE" in "\n".join(result.steps[1].stdout) -def test_build_forward_layout_single_split_send_keys( - mcp_server: Server, mcp_pane: Pane +def test_run_tmux_operations_captures_split_refs( + mcp_server: Server, + mcp_pane: Pane, ) -> None: - """A lone split folds to one dispatch and its send_keys reaches the new pane.""" + """A typed split ref can target later operations without raw commands.""" from libtmux_mcp.tools.wait_for_tools import wait_for_channel - channel = "cc_fwd_layout" - keys = f"printf 'CC_FWD\\n'; tmux wait-for -S {channel}" + channel = "cc_ops_split_ref" + keys = f"printf 'CC_OPS_REF\\n'; tmux wait-for -S {channel}" result = asyncio.run( - build_forward_layout( - splits=[ForwardSplit(send_keys=keys)], - pane_id=mcp_pane.pane_id, + run_tmux_operations( + operations=[ + SplitPaneOperation(ref="child", pane_id=mcp_pane.pane_id), + TmuxSendKeysOperation(pane_ref="child", keys=keys), + ], socket_name=mcp_server.socket_name, ), ) - assert len(result.pane_ids) == 1 - assert result.dispatch_count == 1 # single split -> one {marked} invocation + assert result.succeeded + assert result.dispatch_count == 2 + new_pane_id = result.created_panes["child"] + assert new_pane_id.startswith("%") asyncio.run( - wait_for_channel(channel, timeout=5.0, socket_name=mcp_server.socket_name), + wait_for_channel(channel, timeout=5.0, socket_name=mcp_server.socket_name) ) mcp_pane.window.refresh() - new_pane = mcp_pane.window.panes.get(pane_id=result.pane_ids[0]) + new_pane = mcp_pane.window.panes.get(pane_id=new_pane_id) assert new_pane is not None - assert "CC_FWD" in "\n".join(new_pane.capture_pane()) + assert "CC_OPS_REF" in "\n".join(new_pane.capture_pane()) + +def test_run_tmux_operations_continue_uses_standalone_dispatches( + mcp_session: Session, +) -> None: + """Continue mode preserves later operations instead of native chain abort.""" + server = mcp_session.server + result = asyncio.run( + run_tmux_operations( + operations=[ + TmuxSendKeysOperation(pane_id="%999999", keys="bad", enter=False), + SetOptionOperation( + option="@cc_ops_after_error", + value="set", + global_=True, + ), + ], + on_error="continue", + socket_name=server.socket_name, + ), + ) -def test_build_forward_layout_validation(mcp_pane: Pane) -> None: - """An empty split list fails closed.""" - with pytest.raises(ExpectedToolError): - asyncio.run(build_forward_layout(splits=[], pane_id=mcp_pane.pane_id)) + assert not result.succeeded + assert result.dispatch_count == 2 + assert [step.status for step in result.steps] == [ + TmuxOperationStatus.FAILED, + TmuxOperationStatus.SUCCEEDED, + ] + assert server.cmd("show-option", "-gv", "@cc_ops_after_error").stdout == ["set"] + + +class ValidationCase(t.NamedTuple): + """Case for typed operation validation failures.""" + + test_id: str + operations: object + expected_error: type[Exception] + + +@pytest.mark.parametrize( + "case", + [ + ValidationCase( + test_id="empty_operations", + operations=[], + expected_error=ExpectedToolError, + ), + ValidationCase( + test_id="unknown_raw_kind", + operations=[{"kind": "kill_server"}], + expected_error=ValidationError, + ), + ], + ids=lambda case: case.test_id, +) +def test_run_tmux_operations_validation( + case: ValidationCase, + mcp_session: Session, +) -> None: + """The tool accepts only non-empty typed operation variants.""" + if case.expected_error is ValidationError: + with pytest.raises(case.expected_error): + TMUX_OPERATIONS_ADAPTER.validate_python(case.operations) + return + + with pytest.raises(case.expected_error): + asyncio.run( + run_tmux_operations( + operations=t.cast("list[TmuxOperation]", case.operations), + socket_name=mcp_session.server.socket_name, + ), + ) From cffc57eb3b7ddad807e2f4737144087c894cacb1 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 15:10:31 -0500 Subject: [PATCH 07/25] Chain(feat[tools]): Fold marked split refs why: A split that immediately feeds typed decorations should keep the one-dispatch behavior that tmux supports through the marked-pane target while still returning the created pane id. what: - Detect immediate send_keys/resize operations targeting a fresh split ref - Compile them through tmux's {marked} target in one sequence - Assert the single-dispatch split-ref path in tests and docs --- docs/tools/chain/run-tmux-operations.md | 5 + src/libtmux_mcp/tools/chain_tools.py | 148 +++++++++++++++++++++++- tests/test_chain_tools.py | 4 +- 3 files changed, 152 insertions(+), 5 deletions(-) diff --git a/docs/tools/chain/run-tmux-operations.md b/docs/tools/chain/run-tmux-operations.md index 0eb46ec..a609f73 100644 --- a/docs/tools/chain/run-tmux-operations.md +++ b/docs/tools/chain/run-tmux-operations.md @@ -17,6 +17,11 @@ one tmux sequence and native tmux failure semantics stop later steps. With `on_error="continue"`, operations run as standalone dispatches so later steps can still run after an earlier failure. +An id-producing `split_pane` can fold with immediate `send_keys` or +`resize_pane` operations that target its `pane_ref`; the tool uses +tmux's `{marked}` target internally and still returns the concrete pane +ID in `created_panes`. + **Example:** ```json diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index ff43e0f..bc618d3 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -42,6 +42,7 @@ ) _PendingCalls: t.TypeAlias = tuple[int, str, tuple[CommandCall, ...]] +_MarkedDecorate: t.TypeAlias = tuple[int, TmuxOperation] class _CompileError(Exception): @@ -197,6 +198,28 @@ def _is_output_operation(operation: TmuxOperation) -> bool: ) +def _collect_marked_decorates( + operations: list[TmuxOperation], + start: int, + pane_ref: str, +) -> tuple[list[_MarkedDecorate], int]: + """Collect immediate operations that can target a fresh split via {marked}.""" + decorates: list[_MarkedDecorate] = [] + index = start + 1 + while index < len(operations): + operation = operations[index] + if ( + isinstance(operation, (TmuxSendKeysOperation, ResizePaneOperation)) + and operation.pane_id is None + and operation.pane_ref == pane_ref + ): + decorates.append((index, operation)) + index += 1 + continue + break + return decorates, index + + def _run_calls( runner: CommandRunner, calls: tuple[CommandCall, ...], @@ -257,6 +280,72 @@ def _dispatch_standalone( ) +def _dispatch_marked_split( + runner: CommandRunner, + index: int, + operation: SplitPaneOperation, + split_calls: tuple[CommandCall, ...], + decorates: list[_MarkedDecorate], + created_panes: dict[str, str], +) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult], str | None]: + """Run one id-producing split and its immediate decorates via {marked}.""" + if operation.ref is None: + msg = "marked split dispatch requires a split ref" + raise _CompileError(msg) + + marked_created = {**created_panes, operation.ref: "{marked}"} + calls = [*split_calls, CommandCall("select-pane", ("-m",))] + for _, decorate in decorates: + calls.extend(_operation_calls(decorate, marked_created)) + calls.append(CommandCall("select-pane", ("-M",))) + + chain = CommandChain(tuple(calls)) + result = chain.run(runner) + stdout = list(result.stdout) + stderr = list(result.stderr) + created_pane_id: str | None = None + status = TmuxOperationStatus.SUCCEEDED + if result.returncode != 0: + status = TmuxOperationStatus.FAILED + elif stdout: + created_pane_id = stdout[0] + else: + status = TmuxOperationStatus.FAILED + stderr = [*stderr, "split-pane did not return a pane id"] + + dispatch = TmuxOperationDispatchResult( + mode="chain", + operation_indexes=[index, *(decorate_index for decorate_index, _ in decorates)], + argv=list(chain.argv()), + returncode=result.returncode, + stdout=stdout, + stderr=stderr, + ) + steps = [ + TmuxOperationStepResult( + index=index, + kind=operation.kind, + status=status, + returncode=result.returncode, + stdout=stdout, + stderr=stderr, + created_pane_id=created_pane_id, + ), + *[ + TmuxOperationStepResult( + index=decorate_index, + kind=decorate.kind, + status=status, + returncode=result.returncode, + stdout=stdout if status == TmuxOperationStatus.FAILED else None, + stderr=stderr if status == TmuxOperationStatus.FAILED else None, + ) + for decorate_index, decorate in decorates + ], + ] + return dispatch, steps, created_pane_id + + def _dispatch_chain( runner: CommandRunner, pending: list[_PendingCalls], @@ -326,9 +415,11 @@ async def run_tmux_operations( """Run typed tmux operations with minimum safe native dispatches. Consecutive chainable, no-output operations fold into one tmux - ``a ; b ; c`` sequence. Operations that need per-step output, such as - ``capture_pane`` and id-producing ``split_pane`` refs, run as standalone - dispatches so their stdout can be attributed to the correct operation. + ``a ; b ; c`` sequence. Output operations such as ``capture_pane`` run as + standalone dispatches so their stdout can be attributed to the correct + operation. A single id-producing ``split_pane`` may still fold with + immediate decorations that target its ref through tmux's ``{marked}`` + register. ``on_error="continue"`` disables folding because tmux sequences abort the rest of the sequence on first failure. """ @@ -356,7 +447,9 @@ async def flush_pending() -> bool: steps_by_index[step.index] = step return all(step.status == TmuxOperationStatus.SUCCEEDED for step in steps) - for index, operation in enumerate(validated): + index = 0 + while index < len(validated): + operation = validated[index] try: calls = _operation_calls(operation, created_panes) except _CompileError as exc: @@ -372,11 +465,57 @@ async def flush_pending() -> bool: ): steps_by_index[skip_index] = _skipped_step(skip_index, skipped) break + index += 1 continue + if ( + on_error == "stop" + and isinstance(operation, SplitPaneOperation) + and operation.ref is not None + ): + decorates, next_index = _collect_marked_decorates( + validated, + index, + operation.ref, + ) + if decorates: + if not await flush_pending(): + for skip_index, skipped in enumerate( + validated[index:], start=index + ): + steps_by_index[skip_index] = _skipped_step(skip_index, skipped) + break + dispatch, steps, created_pane_id = await asyncio.to_thread( + _dispatch_marked_split, + runner, + index, + operation, + calls, + decorates, + created_panes, + ) + dispatches.append(dispatch) + for step in steps: + steps_by_index[step.index] = step + if created_pane_id is not None: + created_panes[operation.ref] = created_pane_id + if any(step.status != TmuxOperationStatus.SUCCEEDED for step in steps): + for skip_index, skipped in enumerate( + validated[next_index:], + start=next_index, + ): + steps_by_index[skip_index] = _skipped_step( + skip_index, + skipped, + ) + break + index = next_index + continue + force_standalone = on_error == "continue" or _is_output_operation(operation) if not force_standalone: pending.append((index, operation.kind, calls)) + index += 1 continue if not await flush_pending() and on_error == "stop": @@ -410,6 +549,7 @@ async def flush_pending() -> bool: ): steps_by_index[skip_index] = _skipped_step(skip_index, skipped) break + index += 1 if pending: await flush_pending() diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index 8a0ddb6..438aec3 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -132,7 +132,9 @@ def test_run_tmux_operations_captures_split_refs( ) assert result.succeeded - assert result.dispatch_count == 2 + assert result.dispatch_count == 1 + assert result.dispatches[0].mode == "chain" + assert result.dispatches[0].operation_indexes == [0, 1] new_pane_id = result.created_panes["child"] assert new_pane_id.startswith("%") From 73a146272f9697b8326a3ea084963715eeb9c30b Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 15:26:58 -0500 Subject: [PATCH 08/25] Chain(docs): Document operation chains why: The typed operation compiler is now part of this branch's public surface, but the unreleased notes and tool page did not describe what callers can rely on. what: - Add an unreleased changelog entry for run_tmux_operations - Document dispatch boundaries and the generic batch-tool boundary --- CHANGES | 11 +++++++++++ docs/tools/chain/run-tmux-operations.md | 6 ++++++ 2 files changed, 17 insertions(+) diff --git a/CHANGES b/CHANGES index 4be978a..6966317 100644 --- a/CHANGES +++ b/CHANGES @@ -6,6 +6,17 @@ _Notes on upcoming releases will be added here_ +### What's new + +**Typed tmux operation chains with {tooliconl}`run-tmux-operations`** + +{tooliconl}`run-tmux-operations` accepts an ordered list of typed tmux +operations and compiles safe no-output steps into the fewest native tmux +dispatches it can. It preserves per-operation results for standalone read +steps, returns concrete pane IDs captured from referenced splits, and keeps +{tooliconl}`call-mutating-tools-batch` available for workflows that need to +call arbitrary MCP tools instead of this tool's typed operation set. + ## libtmux-mcp 0.1.0a14 (2026-06-14) libtmux-mcp 0.1.0a14 adds tier-aware tool batching. {tooliconl}`call-readonly-tools-batch`, {tooliconl}`call-mutating-tools-batch`, and {tooliconl}`call-destructive-tools-batch` run an ordered list of existing MCP tools in a single call and return a per-operation result for each, preserving every nested tool's own structured output. Each wrapper caps the safety tier of the calls it will make — regardless of the server's `LIBTMUX_SAFETY` tier — and `on_error` selects stop-at-first-failure or continue-and-report handling. Aggregate results stay within the server's response limit. diff --git a/docs/tools/chain/run-tmux-operations.md b/docs/tools/chain/run-tmux-operations.md index a609f73..f4aedd2 100644 --- a/docs/tools/chain/run-tmux-operations.md +++ b/docs/tools/chain/run-tmux-operations.md @@ -11,6 +11,12 @@ sequence. {tooliconl}`call-mutating-tools-batch` for that. Use individual tools when a workflow has only one step. +**Dispatch boundaries:** Output operations such as `capture_pane` run as +standalone dispatches so their stdout belongs to one step. Referenced +`split_pane` operations also run at a boundary unless their immediate +`send_keys` or `resize_pane` followers target the new pane through the +same `pane_ref`. + **Side effects:** Mutates tmux state according to the submitted operation list. With `on_error="stop"`, chainable operations may share one tmux sequence and native tmux failure semantics stop later steps. From 61ca7e25c841b2c68a0c9498c2576ba23faa94c8 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 15:33:50 -0500 Subject: [PATCH 09/25] Chain(fix[tools]): Use chain metadata why: The MCP operation compiler was duplicating libtmux's chainability and scope contract, which let the two surfaces drift as new operations were added. what: - Validate lowered commands with libtmux chain metadata - Report contract failures as operation-level compile failures - Add an exhaustiveness assertion for typed operation lowering - Cover contract drift with focused tests --- src/libtmux_mcp/tools/chain_tools.py | 171 ++++++++++++++++++++++----- tests/test_chain_tools.py | 81 +++++++++++++ 2 files changed, 224 insertions(+), 28 deletions(-) diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index bc618d3..ec815d3 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -6,10 +6,15 @@ import typing as t from libtmux._experimental.chain import ( + ChainabilityError, CommandCall, CommandChain, CommandResultLike, CommandRunner, + CommandScope, + CommandScopeError, + ensure_chainable, + validate_command_scope, ) from pydantic import TypeAdapter @@ -36,6 +41,14 @@ if t.TYPE_CHECKING: from fastmcp import FastMCP + from typing_extensions import assert_never +else: + + def assert_never(value: object) -> t.NoReturn: + """Runtime fallback for the type-checker-only exhaustiveness helper.""" + msg = f"unhandled operation: {value!r}" + raise AssertionError(msg) + TMUX_OPERATIONS_ADAPTER: TypeAdapter[list[TmuxOperation]] = TypeAdapter( list[TmuxOperation], @@ -49,6 +62,49 @@ class _CompileError(Exception): """Operation-level compile failure that should become a step result.""" +def _operation_scope(operation: TmuxOperation) -> CommandScope: + """Return the tmux target scope for one typed operation.""" + if isinstance( + operation, + ( + SplitPaneOperation, + TmuxSendKeysOperation, + ResizePaneOperation, + CapturePaneOperation, + ), + ): + return "pane" + if isinstance(operation, SelectLayoutOperation): + return "window" + if isinstance(operation, SetOptionOperation): + scope: CommandScope + scope = operation.scope if operation.scope is not None else "server" + return scope + assert_never(operation) + + +def _validate_operation_scope( + operation: TmuxOperation, + calls: tuple[CommandCall, ...], +) -> None: + """Validate typed operation targets against libtmux command metadata.""" + scope = _operation_scope(operation) + try: + for call in calls: + validate_command_scope(call.name, scope) + except CommandScopeError as exc: + raise _CompileError(str(exc)) from exc + + +def _ensure_chainable_calls(calls: tuple[CommandCall, ...]) -> None: + """Raise a compile error unless every call may fold into a tmux chain.""" + try: + for call in calls: + ensure_chainable(call.name) + except ChainabilityError as exc: + raise _CompileError(str(exc)) from exc + + def _target_pane( pane_id: str | None, pane_ref: str | None, @@ -176,19 +232,21 @@ def _operation_calls( ) -> tuple[CommandCall, ...]: """Lower one typed operation to tmux command calls.""" if isinstance(operation, SplitPaneOperation): - return _split_calls(operation, created_panes) - if isinstance(operation, TmuxSendKeysOperation): - return _send_keys_calls(operation, created_panes) - if isinstance(operation, ResizePaneOperation): - return _resize_pane_calls(operation, created_panes) - if isinstance(operation, SelectLayoutOperation): - return _select_layout_calls(operation) - if isinstance(operation, SetOptionOperation): - return _set_option_calls(operation) - if isinstance(operation, CapturePaneOperation): - return _capture_pane_calls(operation, created_panes) - msg = f"unsupported operation type: {type(operation).__name__}" - raise TypeError(msg) + calls = _split_calls(operation, created_panes) + elif isinstance(operation, TmuxSendKeysOperation): + calls = _send_keys_calls(operation, created_panes) + elif isinstance(operation, ResizePaneOperation): + calls = _resize_pane_calls(operation, created_panes) + elif isinstance(operation, SelectLayoutOperation): + calls = _select_layout_calls(operation) + elif isinstance(operation, SetOptionOperation): + calls = _set_option_calls(operation) + elif isinstance(operation, CapturePaneOperation): + calls = _capture_pane_calls(operation, created_panes) + else: + assert_never(operation) + _validate_operation_scope(operation, calls) + return calls def _is_output_operation(operation: TmuxOperation) -> bool: @@ -220,6 +278,27 @@ def _collect_marked_decorates( return decorates, index +def _marked_split_calls( + operation: SplitPaneOperation, + split_calls: tuple[CommandCall, ...], + decorates: list[_MarkedDecorate], + created_panes: dict[str, str], +) -> tuple[CommandCall, ...]: + """Build the folded command calls for a ref-producing split.""" + if operation.ref is None: + msg = "marked split dispatch requires a split ref" + raise _CompileError(msg) + + marked_created = {**created_panes, operation.ref: "{marked}"} + calls = [*split_calls, CommandCall("select-pane", ("-m",))] + for _, decorate in decorates: + calls.extend(_operation_calls(decorate, marked_created)) + calls.append(CommandCall("select-pane", ("-M",))) + marked_calls = tuple(calls) + _ensure_chainable_calls(marked_calls) + return marked_calls + + def _run_calls( runner: CommandRunner, calls: tuple[CommandCall, ...], @@ -284,22 +363,11 @@ def _dispatch_marked_split( runner: CommandRunner, index: int, operation: SplitPaneOperation, - split_calls: tuple[CommandCall, ...], + calls: tuple[CommandCall, ...], decorates: list[_MarkedDecorate], - created_panes: dict[str, str], ) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult], str | None]: """Run one id-producing split and its immediate decorates via {marked}.""" - if operation.ref is None: - msg = "marked split dispatch requires a split ref" - raise _CompileError(msg) - - marked_created = {**created_panes, operation.ref: "{marked}"} - calls = [*split_calls, CommandCall("select-pane", ("-m",))] - for _, decorate in decorates: - calls.extend(_operation_calls(decorate, marked_created)) - calls.append(CommandCall("select-pane", ("-M",))) - - chain = CommandChain(tuple(calls)) + chain = CommandChain(calls) result = chain.run(runner) stdout = list(result.stdout) stderr = list(result.stderr) @@ -485,14 +553,35 @@ async def flush_pending() -> bool: ): steps_by_index[skip_index] = _skipped_step(skip_index, skipped) break + try: + marked_calls = _marked_split_calls( + operation, + calls, + decorates, + created_panes, + ) + except _CompileError as exc: + steps_by_index[index] = _compile_failure_step( + index, + operation, + exc, + ) + for skip_index, skipped in enumerate( + validated[index + 1 :], + start=index + 1, + ): + steps_by_index[skip_index] = _skipped_step( + skip_index, + skipped, + ) + break dispatch, steps, created_pane_id = await asyncio.to_thread( _dispatch_marked_split, runner, index, operation, - calls, + marked_calls, decorates, - created_panes, ) dispatches.append(dispatch) for step in steps: @@ -514,6 +603,32 @@ async def flush_pending() -> bool: force_standalone = on_error == "continue" or _is_output_operation(operation) if not force_standalone: + try: + _ensure_chainable_calls(calls) + except _CompileError as exc: + if not await flush_pending(): + for skip_index, skipped in enumerate( + validated[index:], + start=index, + ): + steps_by_index[skip_index] = _skipped_step( + skip_index, + skipped, + ) + break + steps_by_index[index] = _compile_failure_step(index, operation, exc) + if on_error == "stop": + for skip_index, skipped in enumerate( + validated[index + 1 :], + start=index + 1, + ): + steps_by_index[skip_index] = _skipped_step( + skip_index, + skipped, + ) + break + index += 1 + continue pending.append((index, operation.kind, calls)) index += 1 continue diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index 438aec3..6476c3e 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -6,6 +6,7 @@ import typing as t import pytest +from libtmux._experimental.chain import ChainabilityError, CommandScopeError from pydantic import ValidationError from libtmux_mcp._utils import ExpectedToolError @@ -17,6 +18,7 @@ TmuxOperationStatus, TmuxSendKeysOperation, ) +from libtmux_mcp.tools import chain_tools from libtmux_mcp.tools.chain_tools import ( TMUX_OPERATIONS_ADAPTER, run_tmux_operations, @@ -176,6 +178,85 @@ def test_run_tmux_operations_continue_uses_standalone_dispatches( assert server.cmd("show-option", "-gv", "@cc_ops_after_error").stdout == ["set"] +class CompileContractCase(t.NamedTuple): + """Case for libtmux compiler contract failures.""" + + test_id: str + contract: t.Literal["chainable", "scope"] + expected_error: str + + +@pytest.mark.parametrize( + "case", + [ + CompileContractCase( + test_id="chainability_contract", + contract="chainable", + expected_error="not chainable from test", + ), + CompileContractCase( + test_id="scope_contract", + contract="scope", + expected_error="wrong scope from test", + ), + ], + ids=lambda case: case.test_id, +) +def test_run_tmux_operations_surfaces_libtmux_contract_errors( + case: CompileContractCase, + mcp_session: Session, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """The compiler reports libtmux contract drift as an operation failure. + + The contract metadata is static, so this uses monkeypatch instead of a + tmux fixture to simulate libtmux rejecting a command. + """ + if case.contract == "chainable": + + def fail_chainable(command_name: str) -> None: + msg = f"{command_name} {case.expected_error}" + raise ChainabilityError(msg) + + monkeypatch.setattr( + chain_tools, + "ensure_chainable", + fail_chainable, + raising=False, + ) + else: + + def fail_scope(command_name: str, target_scope: str) -> None: + msg = f"{command_name} {target_scope} {case.expected_error}" + raise CommandScopeError(msg) + + monkeypatch.setattr( + chain_tools, + "validate_command_scope", + fail_scope, + raising=False, + ) + + result = asyncio.run( + run_tmux_operations( + operations=[ + SetOptionOperation( + option="@cc_ops_contract_error", + value="set", + global_=True, + ), + ], + socket_name=mcp_session.server.socket_name, + ), + ) + + assert not result.succeeded + assert result.dispatch_count == 0 + assert result.steps[0].status == TmuxOperationStatus.FAILED + assert result.steps[0].stderr is not None + assert case.expected_error in result.steps[0].stderr[0] + + class ValidationCase(t.NamedTuple): """Case for typed operation validation failures.""" From 5545e3d557d12025a9d10a6e8b9b9cdea51ed430 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 15:39:59 -0500 Subject: [PATCH 10/25] Chain(feat[tools]): Add dry-run plans why: Agents need to inspect the native tmux dispatches a typed operation list would produce before mutating tmux. what: - Add dry_run to run_tmux_operations and result models - Return planned step and dispatch results with nullable exit codes - Use deterministic placeholders for dry-run split pane refs - Document dry-run behavior and add regression tests --- CHANGES | 7 +- docs/tools/chain/run-tmux-operations.md | 5 + src/libtmux_mcp/models.py | 7 +- src/libtmux_mcp/tools/chain_tools.py | 162 +++++++++++++++++++++--- tests/test_chain_tools.py | 106 ++++++++++++++++ 5 files changed, 263 insertions(+), 24 deletions(-) diff --git a/CHANGES b/CHANGES index 6966317..2de0239 100644 --- a/CHANGES +++ b/CHANGES @@ -13,9 +13,10 @@ _Notes on upcoming releases will be added here_ {tooliconl}`run-tmux-operations` accepts an ordered list of typed tmux operations and compiles safe no-output steps into the fewest native tmux dispatches it can. It preserves per-operation results for standalone read -steps, returns concrete pane IDs captured from referenced splits, and keeps -{tooliconl}`call-mutating-tools-batch` available for workflows that need to -call arbitrary MCP tools instead of this tool's typed operation set. +steps, returns concrete pane IDs captured from referenced splits, supports a +dry-run mode that returns the rendered dispatch plan without touching tmux, and +keeps {tooliconl}`call-mutating-tools-batch` available for workflows that need +to call arbitrary MCP tools instead of this tool's typed operation set. ## libtmux-mcp 0.1.0a14 (2026-06-14) diff --git a/docs/tools/chain/run-tmux-operations.md b/docs/tools/chain/run-tmux-operations.md index f4aedd2..4231758 100644 --- a/docs/tools/chain/run-tmux-operations.md +++ b/docs/tools/chain/run-tmux-operations.md @@ -23,6 +23,11 @@ one tmux sequence and native tmux failure semantics stop later steps. With `on_error="continue"`, operations run as standalone dispatches so later steps can still run after an earlier failure. +Set `dry_run` to `true` to compile the operation list and return the +rendered dispatches without touching tmux. Referenced split panes use +deterministic placeholders in `created_panes` until the plan is run for +real. + An id-producing `split_pane` can fold with immediate `send_keys` or `resize_pane` operations that target its `pane_ref`; the tool uses tmux's `{marked}` target internally and still returns the concrete pane diff --git a/src/libtmux_mcp/models.py b/src/libtmux_mcp/models.py index d0b6f66..55a075e 100644 --- a/src/libtmux_mcp/models.py +++ b/src/libtmux_mcp/models.py @@ -671,6 +671,7 @@ class TmuxOperationStatus(str, enum.Enum): SUCCEEDED = "succeeded" FAILED = "failed" SKIPPED = "skipped" + PLANNED = "planned" class _PaneTargetOperation(BaseModel): @@ -865,7 +866,7 @@ class TmuxOperationDispatchResult(BaseModel): description="Operation indexes included in this dispatch.", ) argv: list[str] = Field(description="Rendered tmux argv.") - returncode: int = Field(description="tmux process exit code.") + returncode: int | None = Field(description="tmux process exit code, if run.") stdout: list[str] = Field(default_factory=list, description="stdout lines.") stderr: list[str] = Field(default_factory=list, description="stderr lines.") @@ -874,6 +875,10 @@ class RunTmuxOperationsResult(BaseModel): """Result of compiling and running typed tmux operations.""" succeeded: bool = Field(description="False when any operation failed or skipped.") + dry_run: bool = Field( + default=False, + description="True when dispatches were planned but not executed.", + ) dispatch_count: int = Field(description="Number of native tmux dispatches.") dispatches: list[TmuxOperationDispatchResult] = Field( description="Native tmux dispatches used by the compiler.", diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index ec815d3..6077f6a 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -305,7 +305,7 @@ def _run_calls( ) -> tuple[list[str], CommandResultLike]: """Run one operation's calls as a single native dispatch.""" if len(calls) == 1: - argv = list(calls[0].argv()) + argv = _calls_argv(calls) result = runner.cmd(argv[0], *argv[1:]) return argv, result @@ -314,6 +314,13 @@ def _run_calls( return list(chain.argv()), result +def _calls_argv(calls: tuple[CommandCall, ...]) -> list[str]: + """Render calls as one native tmux dispatch argv.""" + if len(calls) == 1: + return list(calls[0].argv()) + return list(CommandChain(calls).argv()) + + def _dispatch_standalone( runner: CommandRunner, index: int, @@ -451,6 +458,88 @@ def _dispatch_chain( return dispatch, steps +def _planned_pane_ref(ref: str) -> str: + """Return the deterministic placeholder for a dry-run pane ref.""" + return f"" + + +def _planned_step( + index: int, + kind: str, + created_pane_id: str | None = None, +) -> TmuxOperationStepResult: + """Return a planned step result for dry-run compilation.""" + return TmuxOperationStepResult( + index=index, + kind=kind, + status=TmuxOperationStatus.PLANNED, + created_pane_id=created_pane_id, + ) + + +def _plan_standalone( + index: int, + kind: str, + calls: tuple[CommandCall, ...], + *, + created_pane_id: str | None = None, +) -> tuple[TmuxOperationDispatchResult, TmuxOperationStepResult, str | None]: + """Return the dry-run shape for one standalone dispatch.""" + return ( + TmuxOperationDispatchResult( + mode="standalone", + operation_indexes=[index], + argv=_calls_argv(calls), + returncode=None, + ), + _planned_step(index, kind, created_pane_id), + created_pane_id, + ) + + +def _plan_marked_split( + index: int, + operation: SplitPaneOperation, + calls: tuple[CommandCall, ...], + decorates: list[_MarkedDecorate], +) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult], str | None]: + """Return the dry-run shape for one folded split-ref dispatch.""" + created_pane_id = _planned_pane_ref(operation.ref) if operation.ref else None + return ( + TmuxOperationDispatchResult( + mode="chain", + operation_indexes=[ + index, + *(decorate_index for decorate_index, _ in decorates), + ], + argv=list(CommandChain(calls).argv()), + returncode=None, + ), + [ + _planned_step(index, operation.kind, created_pane_id), + *[ + _planned_step(decorate_index, decorate.kind) + for decorate_index, decorate in decorates + ], + ], + created_pane_id, + ) + + +def _plan_chain( + pending: list[_PendingCalls], +) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult]]: + """Return the dry-run shape for a pending folded chain.""" + calls = tuple(call for _, _, op_calls in pending for call in op_calls) + dispatch = TmuxOperationDispatchResult( + mode="chain", + operation_indexes=[index for index, _, _ in pending], + argv=list(CommandChain(calls).argv()), + returncode=None, + ) + return dispatch, [_planned_step(index, kind) for index, kind, _ in pending] + + def _compile_failure_step( index: int, operation: TmuxOperation, @@ -478,6 +567,7 @@ def _skipped_step(index: int, operation: TmuxOperation) -> TmuxOperationStepResu async def run_tmux_operations( operations: list[TmuxOperation], on_error: t.Literal["stop", "continue"] = "stop", + dry_run: bool = False, socket_name: str | None = None, ) -> RunTmuxOperationsResult: """Run typed tmux operations with minimum safe native dispatches. @@ -499,7 +589,7 @@ async def run_tmux_operations( msg = "on_error must be 'stop' or 'continue'" raise ExpectedToolError(msg) - runner = _get_server(socket_name=socket_name) + runner = None if dry_run else _get_server(socket_name=socket_name) pending: list[_PendingCalls] = [] dispatches: list[TmuxOperationDispatchResult] = [] steps_by_index: dict[int, TmuxOperationStepResult] = {} @@ -508,7 +598,11 @@ async def run_tmux_operations( async def flush_pending() -> bool: if not pending: return True - dispatch, steps = await asyncio.to_thread(_dispatch_chain, runner, pending) + if dry_run: + dispatch, steps = _plan_chain(pending) + else: + assert runner is not None + dispatch, steps = await asyncio.to_thread(_dispatch_chain, runner, pending) dispatches.append(dispatch) pending.clear() for step in steps: @@ -575,14 +669,23 @@ async def flush_pending() -> bool: skipped, ) break - dispatch, steps, created_pane_id = await asyncio.to_thread( - _dispatch_marked_split, - runner, - index, - operation, - marked_calls, - decorates, - ) + if dry_run: + dispatch, steps, created_pane_id = _plan_marked_split( + index, + operation, + marked_calls, + decorates, + ) + else: + assert runner is not None + dispatch, steps, created_pane_id = await asyncio.to_thread( + _dispatch_marked_split, + runner, + index, + operation, + marked_calls, + decorates, + ) dispatches.append(dispatch) for step in steps: steps_by_index[step.index] = step @@ -641,14 +744,29 @@ async def flush_pending() -> bool: capture_created_pane = ( isinstance(operation, SplitPaneOperation) and operation.ref is not None ) - dispatch, step, created_pane_id = await asyncio.to_thread( - _dispatch_standalone, - runner, - index, - operation.kind, - calls, - capture_created_pane=capture_created_pane, - ) + if dry_run: + planned_pane_id = ( + _planned_pane_ref(operation.ref) + if isinstance(operation, SplitPaneOperation) + and operation.ref is not None + else None + ) + dispatch, step, created_pane_id = _plan_standalone( + index, + operation.kind, + calls, + created_pane_id=planned_pane_id if capture_created_pane else None, + ) + else: + assert runner is not None + dispatch, step, created_pane_id = await asyncio.to_thread( + _dispatch_standalone, + runner, + index, + operation.kind, + calls, + capture_created_pane=capture_created_pane, + ) dispatches.append(dispatch) steps_by_index[index] = step if ( @@ -670,9 +788,13 @@ async def flush_pending() -> bool: await flush_pending() steps = [steps_by_index[index] for index in range(len(validated))] - succeeded = all(step.status == TmuxOperationStatus.SUCCEEDED for step in steps) + success_statuses = {TmuxOperationStatus.SUCCEEDED} + if dry_run: + success_statuses.add(TmuxOperationStatus.PLANNED) + succeeded = all(step.status in success_statuses for step in steps) return RunTmuxOperationsResult( succeeded=succeeded, + dry_run=dry_run, dispatch_count=len(dispatches), dispatches=dispatches, steps=steps, diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index 6476c3e..9044c49 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -257,6 +257,112 @@ def fail_scope(command_name: str, target_scope: str) -> None: assert case.expected_error in result.steps[0].stderr[0] +class DryRunSetOptionCase(t.NamedTuple): + """Case for dry-run option chains.""" + + test_id: str + operations: list[TmuxOperation] + absent_options: list[str] + + +@pytest.mark.parametrize( + "case", + [ + DryRunSetOptionCase( + test_id="folded_global_options", + operations=[ + SetOptionOperation(option="@cc_ops_dry_a", value="1", global_=True), + SetOptionOperation(option="@cc_ops_dry_b", value="2", global_=True), + ], + absent_options=["@cc_ops_dry_a", "@cc_ops_dry_b"], + ), + ], + ids=lambda case: case.test_id, +) +def test_run_tmux_operations_dry_run_plans_without_mutating( + case: DryRunSetOptionCase, + mcp_session: Session, +) -> None: + """Dry-run returns planned dispatches without changing tmux state.""" + server = mcp_session.server + result = asyncio.run( + run_tmux_operations( + operations=case.operations, + dry_run=True, + socket_name=server.socket_name, + ), + ) + + assert result.succeeded + assert result.dry_run + assert result.dispatch_count == 1 + assert result.dispatches[0].mode == "chain" + assert result.dispatches[0].returncode is None + assert ";" in result.dispatches[0].argv + assert [step.status for step in result.steps] == [ + TmuxOperationStatus.PLANNED, + TmuxOperationStatus.PLANNED, + ] + assert all(step.returncode is None for step in result.steps) + for option in case.absent_options: + assert server.cmd("show-option", "-gv", option).stdout == [] + + +class DryRunSplitRefCase(t.NamedTuple): + """Case for dry-run split refs.""" + + test_id: str + ref: str + keys: str + + +@pytest.mark.parametrize( + "case", + [ + DryRunSplitRefCase( + test_id="marked_split_ref", + ref="child", + keys="printf 'DRY_RUN_REF\\n'", + ), + ], + ids=lambda case: case.test_id, +) +def test_run_tmux_operations_dry_run_plans_marked_split_ref( + case: DryRunSplitRefCase, + mcp_server: Server, + mcp_pane: Pane, +) -> None: + """Dry-run uses placeholders for pane refs without creating panes.""" + mcp_pane.window.refresh() + pane_count = len(mcp_pane.window.panes) + + result = asyncio.run( + run_tmux_operations( + operations=[ + SplitPaneOperation(ref=case.ref, pane_id=mcp_pane.pane_id), + TmuxSendKeysOperation(pane_ref=case.ref, keys=case.keys), + ], + dry_run=True, + socket_name=mcp_server.socket_name, + ), + ) + + placeholder = f"" + assert result.succeeded + assert result.dry_run + assert result.dispatch_count == 1 + assert result.dispatches[0].mode == "chain" + assert result.dispatches[0].returncode is None + assert result.dispatches[0].operation_indexes == [0, 1] + assert result.created_panes == {case.ref: placeholder} + assert result.steps[0].status == TmuxOperationStatus.PLANNED + assert result.steps[0].created_pane_id == placeholder + assert result.steps[1].status == TmuxOperationStatus.PLANNED + + mcp_pane.window.refresh() + assert len(mcp_pane.window.panes) == pane_count + + class ValidationCase(t.NamedTuple): """Case for typed operation validation failures.""" From 35910c675a047c101225db4d8c5a798ec0cbc2c0 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 15:43:54 -0500 Subject: [PATCH 11/25] Chain(fix[tools]): Continue dry-run plans why: Planned dry-run steps should not stop later operations when the compiler flushes a pending dispatch before an output step. what: - Treat planned dry-run steps as successful for control flow - Reuse the same success predicate for final results - Add a regression covering dry-run output-step continuation --- src/libtmux_mcp/tools/chain_tools.py | 27 +++++++++++++++++------ tests/test_chain_tools.py | 32 ++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index 6077f6a..be0e05e 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -563,6 +563,22 @@ def _skipped_step(index: int, operation: TmuxOperation) -> TmuxOperationStepResu ) +def _step_succeeded(step: TmuxOperationStepResult, *, dry_run: bool) -> bool: + """Return whether a step should allow later operations to continue.""" + return step.status == TmuxOperationStatus.SUCCEEDED or ( + dry_run and step.status == TmuxOperationStatus.PLANNED + ) + + +def _steps_succeeded( + steps: t.Iterable[TmuxOperationStepResult], + *, + dry_run: bool, +) -> bool: + """Return whether every step succeeded for control-flow purposes.""" + return all(_step_succeeded(step, dry_run=dry_run) for step in steps) + + @handle_tool_errors_async async def run_tmux_operations( operations: list[TmuxOperation], @@ -607,7 +623,7 @@ async def flush_pending() -> bool: pending.clear() for step in steps: steps_by_index[step.index] = step - return all(step.status == TmuxOperationStatus.SUCCEEDED for step in steps) + return _steps_succeeded(steps, dry_run=dry_run) index = 0 while index < len(validated): @@ -691,7 +707,7 @@ async def flush_pending() -> bool: steps_by_index[step.index] = step if created_pane_id is not None: created_panes[operation.ref] = created_pane_id - if any(step.status != TmuxOperationStatus.SUCCEEDED for step in steps): + if not _steps_succeeded(steps, dry_run=dry_run): for skip_index, skipped in enumerate( validated[next_index:], start=next_index, @@ -775,7 +791,7 @@ async def flush_pending() -> bool: and created_pane_id is not None ): created_panes[operation.ref] = created_pane_id - if step.status != TmuxOperationStatus.SUCCEEDED and on_error == "stop": + if not _step_succeeded(step, dry_run=dry_run) and on_error == "stop": for skip_index, skipped in enumerate( validated[index + 1 :], start=index + 1, @@ -788,10 +804,7 @@ async def flush_pending() -> bool: await flush_pending() steps = [steps_by_index[index] for index in range(len(validated))] - success_statuses = {TmuxOperationStatus.SUCCEEDED} - if dry_run: - success_statuses.add(TmuxOperationStatus.PLANNED) - succeeded = all(step.status in success_statuses for step in steps) + succeeded = _steps_succeeded(steps, dry_run=dry_run) return RunTmuxOperationsResult( succeeded=succeeded, dry_run=dry_run, diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index 9044c49..f5a23f1 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -363,6 +363,38 @@ def test_run_tmux_operations_dry_run_plans_marked_split_ref( assert len(mcp_pane.window.panes) == pane_count +def test_run_tmux_operations_dry_run_continues_after_pending_plan( + mcp_server: Server, + mcp_pane: Pane, +) -> None: + """Dry-run treats planned pending dispatches as successful.""" + result = asyncio.run( + run_tmux_operations( + operations=[ + SetOptionOperation( + option="@cc_ops_dry_pending", + value="1", + global_=True, + ), + CapturePaneOperation(pane_id=mcp_pane.pane_id), + ], + dry_run=True, + socket_name=mcp_server.socket_name, + ), + ) + + assert result.succeeded + assert result.dispatch_count == 2 + assert [dispatch.mode for dispatch in result.dispatches] == [ + "chain", + "standalone", + ] + assert [step.status for step in result.steps] == [ + TmuxOperationStatus.PLANNED, + TmuxOperationStatus.PLANNED, + ] + + class ValidationCase(t.NamedTuple): """Case for typed operation validation failures.""" From 970c2e3e115e410e9fce09bc466e74af1bea07cb Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 15:49:03 -0500 Subject: [PATCH 12/25] Chain(feat[tools]): Bound dispatch waits why: Native tmux chains can block the MCP call when a dispatch stalls, so callers need a typed failure instead of an unbounded await. what: - Add dispatch_timeout validation to run_tmux_operations - Mark timed-out dispatches and included steps as failed - Cover chain, standalone, and marked split timeout paths - Document the timeout behavior and background worker caveat --- CHANGES | 5 +- docs/tools/chain/run-tmux-operations.md | 6 + src/libtmux_mcp/tools/chain_tools.py | 185 +++++++++++++++++++++--- tests/test_chain_tools.py | 138 ++++++++++++++++++ 4 files changed, 315 insertions(+), 19 deletions(-) diff --git a/CHANGES b/CHANGES index 2de0239..f20719a 100644 --- a/CHANGES +++ b/CHANGES @@ -15,8 +15,9 @@ operations and compiles safe no-output steps into the fewest native tmux dispatches it can. It preserves per-operation results for standalone read steps, returns concrete pane IDs captured from referenced splits, supports a dry-run mode that returns the rendered dispatch plan without touching tmux, and -keeps {tooliconl}`call-mutating-tools-batch` available for workflows that need -to call arbitrary MCP tools instead of this tool's typed operation set. +applies a per-dispatch timeout. It keeps +{tooliconl}`call-mutating-tools-batch` available for workflows that need to call +arbitrary MCP tools instead of this tool's typed operation set. ## libtmux-mcp 0.1.0a14 (2026-06-14) diff --git a/docs/tools/chain/run-tmux-operations.md b/docs/tools/chain/run-tmux-operations.md index 4231758..5bbb8ae 100644 --- a/docs/tools/chain/run-tmux-operations.md +++ b/docs/tools/chain/run-tmux-operations.md @@ -28,6 +28,12 @@ rendered dispatches without touching tmux. Referenced split panes use deterministic placeholders in `created_panes` until the plan is run for real. +`dispatch_timeout` defaults to 10 seconds and bounds how long the tool +waits for each native tmux dispatch. A timed-out dispatch marks the +included operations failed with `returncode: null`; because dispatches +run in a worker thread, the underlying tmux subprocess may still finish +after the tool returns. + An id-producing `split_pane` can fold with immediate `send_keys` or `resize_pane` operations that target its `pane_ref`; the tool uses tmux's `{marked}` target internally and still returns the concrete pane diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index be0e05e..fdb5051 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -540,6 +540,97 @@ def _plan_chain( return dispatch, [_planned_step(index, kind) for index, kind, _ in pending] +def _timeout_stderr(dispatch_timeout: float) -> list[str]: + """Return the stderr payload for a bounded dispatch timeout.""" + return [f"tmux dispatch timed out after {dispatch_timeout:g} seconds"] + + +def _timeout_step( + index: int, + kind: str, + stderr: list[str], +) -> TmuxOperationStepResult: + """Return a failed step for a dispatch timeout.""" + return TmuxOperationStepResult( + index=index, + kind=kind, + status=TmuxOperationStatus.FAILED, + stderr=stderr, + ) + + +def _timeout_standalone( + index: int, + kind: str, + calls: tuple[CommandCall, ...], + dispatch_timeout: float, +) -> tuple[TmuxOperationDispatchResult, TmuxOperationStepResult, str | None]: + """Return timeout results for one standalone dispatch.""" + stderr = _timeout_stderr(dispatch_timeout) + return ( + TmuxOperationDispatchResult( + mode="standalone", + operation_indexes=[index], + argv=_calls_argv(calls), + returncode=None, + stderr=stderr, + ), + _timeout_step(index, kind, stderr), + None, + ) + + +def _timeout_marked_split( + index: int, + operation: SplitPaneOperation, + calls: tuple[CommandCall, ...], + decorates: list[_MarkedDecorate], + dispatch_timeout: float, +) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult], str | None]: + """Return timeout results for one folded split-ref dispatch.""" + stderr = _timeout_stderr(dispatch_timeout) + return ( + TmuxOperationDispatchResult( + mode="chain", + operation_indexes=[ + index, + *(decorate_index for decorate_index, _ in decorates), + ], + argv=list(CommandChain(calls).argv()), + returncode=None, + stderr=stderr, + ), + [ + _timeout_step(index, operation.kind, stderr), + *[ + _timeout_step(decorate_index, decorate.kind, stderr) + for decorate_index, decorate in decorates + ], + ], + None, + ) + + +def _timeout_chain( + pending: list[_PendingCalls], + dispatch_timeout: float, +) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult]]: + """Return timeout results for a pending folded chain.""" + stderr = _timeout_stderr(dispatch_timeout) + calls = tuple(call for _, _, op_calls in pending for call in op_calls) + dispatch = TmuxOperationDispatchResult( + mode="chain", + operation_indexes=[index for index, _, _ in pending], + argv=list(CommandChain(calls).argv()), + returncode=None, + stderr=stderr, + ) + return ( + dispatch, + [_timeout_step(index, kind, stderr) for index, kind, _ in pending], + ) + + def _compile_failure_step( index: int, operation: TmuxOperation, @@ -584,6 +675,7 @@ async def run_tmux_operations( operations: list[TmuxOperation], on_error: t.Literal["stop", "continue"] = "stop", dry_run: bool = False, + dispatch_timeout: float | None = 10.0, socket_name: str | None = None, ) -> RunTmuxOperationsResult: """Run typed tmux operations with minimum safe native dispatches. @@ -596,6 +688,8 @@ async def run_tmux_operations( register. ``on_error="continue"`` disables folding because tmux sequences abort the rest of the sequence on first failure. + ``dispatch_timeout`` bounds how long the tool waits for one native tmux + dispatch; timed-out subprocess work may still finish in the background. """ validated = TMUX_OPERATIONS_ADAPTER.validate_python(operations) if not validated: @@ -604,6 +698,9 @@ async def run_tmux_operations( if on_error not in {"stop", "continue"}: msg = "on_error must be 'stop' or 'continue'" raise ExpectedToolError(msg) + if dispatch_timeout is not None and dispatch_timeout <= 0: + msg = "dispatch_timeout must be greater than 0 or null" + raise ExpectedToolError(msg) runner = None if dry_run else _get_server(socket_name=socket_name) pending: list[_PendingCalls] = [] @@ -618,7 +715,23 @@ async def flush_pending() -> bool: dispatch, steps = _plan_chain(pending) else: assert runner is not None - dispatch, steps = await asyncio.to_thread(_dispatch_chain, runner, pending) + pending_snapshot = list(pending) + try: + chain_dispatch_coro = asyncio.to_thread( + _dispatch_chain, + runner, + pending_snapshot, + ) + if dispatch_timeout is None: + dispatch, steps = await chain_dispatch_coro + else: + dispatch, steps = await asyncio.wait_for( + chain_dispatch_coro, + timeout=dispatch_timeout, + ) + except TimeoutError: + assert dispatch_timeout is not None + dispatch, steps = _timeout_chain(pending_snapshot, dispatch_timeout) dispatches.append(dispatch) pending.clear() for step in steps: @@ -694,14 +807,36 @@ async def flush_pending() -> bool: ) else: assert runner is not None - dispatch, steps, created_pane_id = await asyncio.to_thread( - _dispatch_marked_split, - runner, - index, - operation, - marked_calls, - decorates, - ) + decorates_snapshot = list(decorates) + try: + marked_dispatch_coro = asyncio.to_thread( + _dispatch_marked_split, + runner, + index, + operation, + marked_calls, + decorates_snapshot, + ) + if dispatch_timeout is None: + ( + dispatch, + steps, + created_pane_id, + ) = await marked_dispatch_coro + else: + dispatch, steps, created_pane_id = await asyncio.wait_for( + marked_dispatch_coro, + timeout=dispatch_timeout, + ) + except TimeoutError: + assert dispatch_timeout is not None + dispatch, steps, created_pane_id = _timeout_marked_split( + index, + operation, + marked_calls, + decorates_snapshot, + dispatch_timeout, + ) dispatches.append(dispatch) for step in steps: steps_by_index[step.index] = step @@ -775,14 +910,30 @@ async def flush_pending() -> bool: ) else: assert runner is not None - dispatch, step, created_pane_id = await asyncio.to_thread( - _dispatch_standalone, - runner, - index, - operation.kind, - calls, - capture_created_pane=capture_created_pane, - ) + try: + standalone_dispatch_coro = asyncio.to_thread( + _dispatch_standalone, + runner, + index, + operation.kind, + calls, + capture_created_pane=capture_created_pane, + ) + if dispatch_timeout is None: + dispatch, step, created_pane_id = await standalone_dispatch_coro + else: + dispatch, step, created_pane_id = await asyncio.wait_for( + standalone_dispatch_coro, + timeout=dispatch_timeout, + ) + except TimeoutError: + assert dispatch_timeout is not None + dispatch, step, created_pane_id = _timeout_standalone( + index, + operation.kind, + calls, + dispatch_timeout, + ) dispatches.append(dispatch) steps_by_index[index] = step if ( diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index f5a23f1..edb671d 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -3,6 +3,7 @@ from __future__ import annotations import asyncio +import time import typing as t import pytest @@ -395,6 +396,143 @@ def test_run_tmux_operations_dry_run_continues_after_pending_plan( ] +class TimeoutDispatchCase(t.NamedTuple): + """Case for bounded native dispatch execution.""" + + test_id: str + helper_name: str + path: t.Literal["chain", "standalone", "marked"] + expected_mode: t.Literal["chain", "standalone"] + expected_indexes: list[int] + + +def _timeout_operations( + case: TimeoutDispatchCase, + pane_id: str, +) -> list[TmuxOperation]: + """Return operations that route through the case's dispatch helper.""" + if case.path == "chain": + return [ + SetOptionOperation(option="@cc_ops_timeout_a", value="1", global_=True), + SetOptionOperation(option="@cc_ops_timeout_b", value="2", global_=True), + ] + if case.path == "standalone": + return [CapturePaneOperation(pane_id=pane_id)] + if case.path == "marked": + return [ + SplitPaneOperation(ref="child", pane_id=pane_id), + TmuxSendKeysOperation(pane_ref="child", keys="echo timeout"), + ] + raise AssertionError(case.path) + + +@pytest.mark.parametrize( + "case", + [ + TimeoutDispatchCase( + test_id="pending_chain", + helper_name="_dispatch_chain", + path="chain", + expected_mode="chain", + expected_indexes=[0, 1], + ), + TimeoutDispatchCase( + test_id="standalone_output", + helper_name="_dispatch_standalone", + path="standalone", + expected_mode="standalone", + expected_indexes=[0], + ), + TimeoutDispatchCase( + test_id="marked_split", + helper_name="_dispatch_marked_split", + path="marked", + expected_mode="chain", + expected_indexes=[0, 1], + ), + ], + ids=lambda case: case.test_id, +) +def test_run_tmux_operations_dispatch_timeout( + case: TimeoutDispatchCase, + mcp_server: Server, + mcp_pane: Pane, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """A dispatch timeout returns failed per-operation results. + + The dispatch helpers are synchronous wrappers around tmux subprocesses, so + this uses monkeypatch rather than a blocking tmux command. + """ + + def sleep_dispatch(*args: object, **kwargs: object) -> t.NoReturn: + time.sleep(0.05) + msg = "dispatch should have timed out" + raise AssertionError(msg) + + monkeypatch.setattr(chain_tools, case.helper_name, sleep_dispatch) + assert mcp_pane.pane_id is not None + + result = asyncio.run( + run_tmux_operations( + operations=_timeout_operations(case, mcp_pane.pane_id), + dispatch_timeout=0.001, + socket_name=mcp_server.socket_name, + ), + ) + + assert not result.succeeded + assert result.dispatch_count == 1 + assert result.dispatches[0].mode == case.expected_mode + assert result.dispatches[0].operation_indexes == case.expected_indexes + assert result.dispatches[0].returncode is None + assert result.dispatches[0].stderr == [ + "tmux dispatch timed out after 0.001 seconds", + ] + assert [step.status for step in result.steps] == [ + TmuxOperationStatus.FAILED, + *[TmuxOperationStatus.FAILED for _ in case.expected_indexes[1:]], + ] + assert all(step.returncode is None for step in result.steps) + assert all(step.stderr == result.dispatches[0].stderr for step in result.steps) + + +class TimeoutValidationCase(t.NamedTuple): + """Case for timeout input validation.""" + + test_id: str + dispatch_timeout: float + + +@pytest.mark.parametrize( + "case", + [ + TimeoutValidationCase(test_id="zero", dispatch_timeout=0.0), + TimeoutValidationCase(test_id="negative", dispatch_timeout=-1.0), + ], + ids=lambda case: case.test_id, +) +def test_run_tmux_operations_dispatch_timeout_validation( + case: TimeoutValidationCase, + mcp_session: Session, +) -> None: + """Dispatch timeout must be positive when set.""" + with pytest.raises(ExpectedToolError, match="dispatch_timeout"): + asyncio.run( + run_tmux_operations( + operations=[ + SetOptionOperation( + option="@cc_ops_timeout_validation", + value="1", + global_=True, + ), + ], + dispatch_timeout=case.dispatch_timeout, + socket_name=mcp_session.server.socket_name, + ), + ) + + class ValidationCase(t.NamedTuple): """Case for typed operation validation failures.""" From c9bbf5db11e2b8a93e32cddd0aea2d60a1daa825 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 15:51:24 -0500 Subject: [PATCH 13/25] Chain(test[tools]): Cover error paths why: The typed chain compiler has branch-local failure paths for refs, pending flushes, and marked split chains that need explicit coverage. what: - Cover unknown pane_ref compile failures - Cover compile errors after a failed pending dispatch - Cover marked split failure skipping later operations --- tests/test_chain_tools.py | 89 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index edb671d..42aa89c 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -533,6 +533,95 @@ def test_run_tmux_operations_dispatch_timeout_validation( ) +class CompileErrorPathCase(t.NamedTuple): + """Case for branch-local compile error paths.""" + + test_id: str + operations: list[TmuxOperation] + expected_dispatch_count: int + expected_statuses: list[TmuxOperationStatus] + expected_error: str | None + + +@pytest.mark.parametrize( + "case", + [ + CompileErrorPathCase( + test_id="unknown_pane_ref", + operations=[ + TmuxSendKeysOperation(pane_ref="missing", keys="bad", enter=False), + ], + expected_dispatch_count=0, + expected_statuses=[TmuxOperationStatus.FAILED], + expected_error="unknown pane_ref: missing", + ), + CompileErrorPathCase( + test_id="pending_failure_before_compile_error", + operations=[ + TmuxSendKeysOperation(pane_id="%999999", keys="bad", enter=False), + TmuxSendKeysOperation(pane_ref="missing", keys="bad", enter=False), + ], + expected_dispatch_count=1, + expected_statuses=[ + TmuxOperationStatus.FAILED, + TmuxOperationStatus.SKIPPED, + ], + expected_error=None, + ), + ], + ids=lambda case: case.test_id, +) +def test_run_tmux_operations_compile_error_paths( + case: CompileErrorPathCase, + mcp_session: Session, +) -> None: + """Compile errors report directly unless pending work fails first.""" + result = asyncio.run( + run_tmux_operations( + operations=case.operations, + socket_name=mcp_session.server.socket_name, + ), + ) + + assert not result.succeeded + assert result.dispatch_count == case.expected_dispatch_count + assert [step.status for step in result.steps] == case.expected_statuses + if case.expected_error is not None: + assert result.steps[0].stderr is not None + assert result.steps[0].stderr == [case.expected_error] + + +def test_run_tmux_operations_marked_split_failure_skips_later_ops( + mcp_session: Session, +) -> None: + """A failed marked split skips operations after its folded decorations.""" + server = mcp_session.server + result = asyncio.run( + run_tmux_operations( + operations=[ + SplitPaneOperation(ref="child", pane_id="%999999"), + TmuxSendKeysOperation(pane_ref="child", keys="bad", enter=False), + SetOptionOperation( + option="@cc_ops_after_marked_failure", + value="set", + global_=True, + ), + ], + socket_name=server.socket_name, + ), + ) + + assert not result.succeeded + assert result.dispatch_count == 1 + assert result.dispatches[0].operation_indexes == [0, 1] + assert [step.status for step in result.steps] == [ + TmuxOperationStatus.FAILED, + TmuxOperationStatus.FAILED, + TmuxOperationStatus.SKIPPED, + ] + assert server.cmd("show-option", "-gv", "@cc_ops_after_marked_failure").stdout == [] + + class ValidationCase(t.NamedTuple): """Case for typed operation validation failures.""" From ed772a528a0599890fb528a16a538f95900e74d5 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 15:56:23 -0500 Subject: [PATCH 14/25] Chain(feat[tools]): Roll back panes why: A typed operation list can create panes before a later step fails, leaving partial layout state behind for callers that need all-or-nothing behavior. what: - Add rollback_on_error to run_tmux_operations - Kill created split-ref panes in reverse order on failure - Report rolled_back_panes and rollback_errors in results - Document rollback behavior and cover enabled and disabled cases --- CHANGES | 3 +- docs/tools/chain/run-tmux-operations.md | 5 ++ src/libtmux_mcp/models.py | 8 ++++ src/libtmux_mcp/tools/chain_tools.py | 41 +++++++++++++++- tests/test_chain_tools.py | 64 +++++++++++++++++++++++++ 5 files changed, 118 insertions(+), 3 deletions(-) diff --git a/CHANGES b/CHANGES index f20719a..7480c34 100644 --- a/CHANGES +++ b/CHANGES @@ -15,7 +15,8 @@ operations and compiles safe no-output steps into the fewest native tmux dispatches it can. It preserves per-operation results for standalone read steps, returns concrete pane IDs captured from referenced splits, supports a dry-run mode that returns the rendered dispatch plan without touching tmux, and -applies a per-dispatch timeout. It keeps +applies a per-dispatch timeout. It can also roll back panes created by typed +split refs when a later operation fails. It keeps {tooliconl}`call-mutating-tools-batch` available for workflows that need to call arbitrary MCP tools instead of this tool's typed operation set. diff --git a/docs/tools/chain/run-tmux-operations.md b/docs/tools/chain/run-tmux-operations.md index 5bbb8ae..25d9d66 100644 --- a/docs/tools/chain/run-tmux-operations.md +++ b/docs/tools/chain/run-tmux-operations.md @@ -34,6 +34,11 @@ included operations failed with `returncode: null`; because dispatches run in a worker thread, the underlying tmux subprocess may still finish after the tool returns. +Set `rollback_on_error` to `true` to kill panes created by +ref-producing `split_pane` operations when the overall operation list +fails. The result still reports `created_panes`, and adds +`rolled_back_panes` plus `rollback_errors` for cleanup visibility. + An id-producing `split_pane` can fold with immediate `send_keys` or `resize_pane` operations that target its `pane_ref`; the tool uses tmux's `{marked}` target internally and still returns the concrete pane diff --git a/src/libtmux_mcp/models.py b/src/libtmux_mcp/models.py index 55a075e..82b47ac 100644 --- a/src/libtmux_mcp/models.py +++ b/src/libtmux_mcp/models.py @@ -890,3 +890,11 @@ class RunTmuxOperationsResult(BaseModel): default_factory=dict, description="Mapping of split_pane ref names to concrete pane IDs.", ) + rolled_back_panes: list[str] = Field( + default_factory=list, + description="Pane IDs killed by rollback_on_error.", + ) + rollback_errors: list[str] = Field( + default_factory=list, + description="Errors raised while rolling back created panes.", + ) diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index fdb5051..9cae052 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -631,6 +631,23 @@ def _timeout_chain( ) +def _rollback_created_panes( + runner: CommandRunner, + pane_ids: list[str], +) -> tuple[list[str], list[str]]: + """Kill created panes in reverse order and report cleanup failures.""" + rolled_back_panes: list[str] = [] + rollback_errors: list[str] = [] + for pane_id in reversed(pane_ids): + result = runner.cmd("kill-pane", "-t", pane_id) + if result.returncode == 0: + rolled_back_panes.append(pane_id) + continue + stderr = list(result.stderr) or [f"kill-pane exited {result.returncode}"] + rollback_errors.extend(f"{pane_id}: {line}" for line in stderr) + return rolled_back_panes, rollback_errors + + def _compile_failure_step( index: int, operation: TmuxOperation, @@ -676,6 +693,7 @@ async def run_tmux_operations( on_error: t.Literal["stop", "continue"] = "stop", dry_run: bool = False, dispatch_timeout: float | None = 10.0, + rollback_on_error: bool = False, socket_name: str | None = None, ) -> RunTmuxOperationsResult: """Run typed tmux operations with minimum safe native dispatches. @@ -690,6 +708,8 @@ async def run_tmux_operations( rest of the sequence on first failure. ``dispatch_timeout`` bounds how long the tool waits for one native tmux dispatch; timed-out subprocess work may still finish in the background. + ``rollback_on_error`` kills panes created by ref-producing ``split_pane`` + operations when the overall operation list fails. """ validated = TMUX_OPERATIONS_ADAPTER.validate_python(operations) if not validated: @@ -707,6 +727,12 @@ async def run_tmux_operations( dispatches: list[TmuxOperationDispatchResult] = [] steps_by_index: dict[int, TmuxOperationStepResult] = {} created_panes: dict[str, str] = {} + created_pane_order: list[str] = [] + + def record_created_pane(ref: str, pane_id: str) -> None: + created_panes[ref] = pane_id + if pane_id not in created_pane_order: + created_pane_order.append(pane_id) async def flush_pending() -> bool: if not pending: @@ -841,7 +867,7 @@ async def flush_pending() -> bool: for step in steps: steps_by_index[step.index] = step if created_pane_id is not None: - created_panes[operation.ref] = created_pane_id + record_created_pane(operation.ref, created_pane_id) if not _steps_succeeded(steps, dry_run=dry_run): for skip_index, skipped in enumerate( validated[next_index:], @@ -941,7 +967,7 @@ async def flush_pending() -> bool: and operation.ref is not None and created_pane_id is not None ): - created_panes[operation.ref] = created_pane_id + record_created_pane(operation.ref, created_pane_id) if not _step_succeeded(step, dry_run=dry_run) and on_error == "stop": for skip_index, skipped in enumerate( validated[index + 1 :], @@ -956,6 +982,15 @@ async def flush_pending() -> bool: steps = [steps_by_index[index] for index in range(len(validated))] succeeded = _steps_succeeded(steps, dry_run=dry_run) + rolled_back_panes: list[str] = [] + rollback_errors: list[str] = [] + if rollback_on_error and not dry_run and not succeeded and created_pane_order: + assert runner is not None + rolled_back_panes, rollback_errors = await asyncio.to_thread( + _rollback_created_panes, + runner, + created_pane_order, + ) return RunTmuxOperationsResult( succeeded=succeeded, dry_run=dry_run, @@ -963,6 +998,8 @@ async def flush_pending() -> bool: dispatches=dispatches, steps=steps, created_panes=created_panes, + rolled_back_panes=rolled_back_panes, + rollback_errors=rollback_errors, ) diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index 42aa89c..1215e84 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -13,6 +13,7 @@ from libtmux_mcp._utils import ExpectedToolError from libtmux_mcp.models import ( CapturePaneOperation, + RunTmuxOperationsResult, SetOptionOperation, SplitPaneOperation, TmuxOperation, @@ -622,6 +623,69 @@ def test_run_tmux_operations_marked_split_failure_skips_later_ops( assert server.cmd("show-option", "-gv", "@cc_ops_after_marked_failure").stdout == [] +class RollbackCase(t.NamedTuple): + """Case for rollback of created panes.""" + + test_id: str + rollback_on_error: bool + expect_rollback: bool + + +@pytest.mark.parametrize( + "case", + [ + RollbackCase( + test_id="enabled", + rollback_on_error=True, + expect_rollback=True, + ), + RollbackCase( + test_id="disabled", + rollback_on_error=False, + expect_rollback=False, + ), + ], + ids=lambda case: case.test_id, +) +def test_run_tmux_operations_rolls_back_created_panes( + case: RollbackCase, + mcp_server: Server, + mcp_pane: Pane, +) -> None: + """Rollback kills panes created before a later operation fails.""" + result: RunTmuxOperationsResult | None = None + try: + result = asyncio.run( + run_tmux_operations( + operations=[ + SplitPaneOperation(ref="child", pane_id=mcp_pane.pane_id), + TmuxSendKeysOperation( + pane_id="%999999", + keys="bad", + enter=False, + ), + ], + rollback_on_error=case.rollback_on_error, + socket_name=mcp_server.socket_name, + ), + ) + + assert not result.succeeded + new_pane_id = result.created_panes["child"] + assert result.rollback_errors == [] + assert result.rolled_back_panes == ( + [new_pane_id] if case.expect_rollback else [] + ) + mcp_pane.window.refresh() + pane_ids = [pane.pane_id for pane in mcp_pane.window.panes] + assert (new_pane_id not in pane_ids) is case.expect_rollback + finally: + if result is not None and not case.expect_rollback: + pane_id = result.created_panes.get("child") + if pane_id is not None: + mcp_server.cmd("kill-pane", "-t", pane_id) + + class ValidationCase(t.NamedTuple): """Case for typed operation validation failures.""" From 8ab51dafa9b63933371f790683cc9d50cbf5b079 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sat, 20 Jun 2026 19:19:15 -0500 Subject: [PATCH 15/25] py(deps): Pin libtmux control runner why: The MCP chain tools need the pushed libtmux chain control-mode surface that preserves per-command results. what: - Update the libtmux git pin to 6fc3db63 - Refresh uv.lock for the new pinned revision --- pyproject.toml | 2 +- uv.lock | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 62813b1..e9f7dac 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,7 +112,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [tool.uv.sources] -libtmux = { git = "https://github.com/tmux-python/libtmux.git", rev = "591a312f78d165816bb95a035a46219657c4b53d" } +libtmux = { git = "https://github.com/tmux-python/libtmux.git", rev = "6fc3db63e3d4dde6f1bcb0116c3d6468ca5adb61" } [tool.uv.exclude-newer-package] # git-pull packages release in lockstep with their workspaces, so a diff --git a/uv.lock b/uv.lock index e1e5ac4..7954959 100644 --- a/uv.lock +++ b/uv.lock @@ -1179,7 +1179,7 @@ wheels = [ [[package]] name = "libtmux" version = "0.58.1" -source = { git = "https://github.com/tmux-python/libtmux.git?rev=591a312f78d165816bb95a035a46219657c4b53d#591a312f78d165816bb95a035a46219657c4b53d" } +source = { git = "https://github.com/tmux-python/libtmux.git?rev=6fc3db63e3d4dde6f1bcb0116c3d6468ca5adb61#6fc3db63e3d4dde6f1bcb0116c3d6468ca5adb61" } [[package]] name = "libtmux-mcp" @@ -1242,7 +1242,7 @@ testing = [ [package.metadata] requires-dist = [ { name = "fastmcp", specifier = ">=3.4.2,<4.0.0" }, - { name = "libtmux", git = "https://github.com/tmux-python/libtmux.git?rev=591a312f78d165816bb95a035a46219657c4b53d" }, + { name = "libtmux", git = "https://github.com/tmux-python/libtmux.git?rev=6fc3db63e3d4dde6f1bcb0116c3d6468ca5adb61" }, ] [package.metadata.requires-dev] From 05365ed669698d1f23a238eeb0b91b3cf4bfbe9d Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 21 Jun 2026 03:39:10 -0500 Subject: [PATCH 16/25] Chain(feat[tools]): Default control transport why: A folded ";" dispatch returns one merged result, so chained operations lost their own stdout and a single failure aborted the rest. libtmux's ControlModeRunner returns one %begin/%end/%error block per command over a persistent "tmux -C" connection. what: - Add transport="subprocess"|"control" to run_tmux_operations, default control - Route dispatch through ControlModeRunner.run_calls for per-operation results - Skip the {marked} split fold under control (splits self-capture their id) - Close the control connection after each call - Pin the {marked}-specific tests to subprocess; add per-operation attribution - Note control transport in the unreleased changelog --- CHANGES | 9 + src/libtmux_mcp/tools/chain_tools.py | 589 ++++++++++++++++----------- tests/test_chain_tools.py | 48 +++ 3 files changed, 403 insertions(+), 243 deletions(-) diff --git a/CHANGES b/CHANGES index 7480c34..a634722 100644 --- a/CHANGES +++ b/CHANGES @@ -20,6 +20,15 @@ split refs when a later operation fails. It keeps {tooliconl}`call-mutating-tools-batch` available for workflows that need to call arbitrary MCP tools instead of this tool's typed operation set. +**Per-operation results over tmux control mode** + +{tooliconl}`run-tmux-operations` dispatches over a persistent `tmux -C` +control connection by default (`transport="control"`), so each operation in a +folded chain keeps its own stdout and return code and one operation's failure +does not abort the rest. Pass `transport="subprocess"` to fold into a single +native `tmux a ; b ; c` sequence that returns one merged result and aborts on +the first error. + ## libtmux-mcp 0.1.0a14 (2026-06-14) libtmux-mcp 0.1.0a14 adds tier-aware tool batching. {tooliconl}`call-readonly-tools-batch`, {tooliconl}`call-mutating-tools-batch`, and {tooliconl}`call-destructive-tools-batch` run an ordered list of existing MCP tools in a single call and return a per-operation result for each, preserving every nested tool's own structured output. Each wrapper caps the safety tier of the calls it will make — regardless of the server's `LIBTMUX_SAFETY` tier — and `on_error` selects stop-at-first-failure or continue-and-report handling. Aggregate results stay within the server's response limit. diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index 9cae052..ea10838 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -3,6 +3,7 @@ from __future__ import annotations import asyncio +import dataclasses import typing as t from libtmux._experimental.chain import ( @@ -13,6 +14,7 @@ CommandRunner, CommandScope, CommandScopeError, + ControlModeRunner, ensure_chainable, validate_command_scope, ) @@ -62,6 +64,28 @@ class _CompileError(Exception): """Operation-level compile failure that should become a step result.""" +@dataclasses.dataclass +class _CombinedResult: + """A ``CommandResultLike`` merging several control-mode command results.""" + + stdout: list[str] + stderr: list[str] + returncode: int + + +def _combine_results( + results: t.Sequence[CommandResultLike], +) -> _CombinedResult: + """Merge per-command results; the first non-zero return code wins.""" + stdout = [line for result in results for line in result.stdout] + stderr = [line for result in results for line in result.stderr] + returncode = next( + (result.returncode for result in results if result.returncode != 0), + 0, + ) + return _CombinedResult(stdout=stdout, stderr=stderr, returncode=returncode) + + def _operation_scope(operation: TmuxOperation) -> CommandScope: """Return the tmux target scope for one typed operation.""" if isinstance( @@ -304,6 +328,9 @@ def _run_calls( calls: tuple[CommandCall, ...], ) -> tuple[list[str], CommandResultLike]: """Run one operation's calls as a single native dispatch.""" + if isinstance(runner, ControlModeRunner): + results = runner.run_calls(calls) + return _calls_argv(calls), _combine_results(results) if len(calls) == 1: argv = _calls_argv(calls) result = runner.cmd(argv[0], *argv[1:]) @@ -421,11 +448,61 @@ def _dispatch_marked_split( return dispatch, steps, created_pane_id +def _dispatch_chain_control( + runner: ControlModeRunner, + pending: list[_PendingCalls], +) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult]]: + """Run pending operations over control mode, one result per operation.""" + flat_calls: list[CommandCall] = [] + owners: list[int] = [] + for index, _kind, op_calls in pending: + for call in op_calls: + flat_calls.append(call) + owners.append(index) + results = runner.run_calls(tuple(flat_calls)) + by_index: dict[int, list[CommandResultLike]] = {} + for owner_index, result in zip(owners, results, strict=True): + by_index.setdefault(owner_index, []).append(result) + + steps: list[TmuxOperationStepResult] = [] + overall_returncode = 0 + for index, kind, _op_calls in pending: + combined = _combine_results(by_index.get(index, [])) + if combined.returncode != 0: + overall_returncode = combined.returncode + status = ( + TmuxOperationStatus.SUCCEEDED + if combined.returncode == 0 + else TmuxOperationStatus.FAILED + ) + steps.append( + TmuxOperationStepResult( + index=index, + kind=kind, + status=status, + returncode=combined.returncode, + stdout=combined.stdout or None, + stderr=combined.stderr or None, + ) + ) + dispatch = TmuxOperationDispatchResult( + mode="chain", + operation_indexes=[index for index, _, _ in pending], + argv=_calls_argv(tuple(flat_calls)), + returncode=overall_returncode, + stdout=[line for result in results for line in result.stdout], + stderr=[line for result in results for line in result.stderr], + ) + return dispatch, steps + + def _dispatch_chain( runner: CommandRunner, pending: list[_PendingCalls], ) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult]]: """Run pending operations as one tmux command sequence.""" + if isinstance(runner, ControlModeRunner): + return _dispatch_chain_control(runner, pending) calls = tuple(call for _, _, op_calls in pending for call in op_calls) chain = CommandChain(calls) result = chain.run(runner) @@ -694,6 +771,7 @@ async def run_tmux_operations( dry_run: bool = False, dispatch_timeout: float | None = 10.0, rollback_on_error: bool = False, + transport: t.Literal["subprocess", "control"] = "control", socket_name: str | None = None, ) -> RunTmuxOperationsResult: """Run typed tmux operations with minimum safe native dispatches. @@ -710,6 +788,11 @@ async def run_tmux_operations( dispatch; timed-out subprocess work may still finish in the background. ``rollback_on_error`` kills panes created by ref-producing ``split_pane`` operations when the overall operation list fails. + ``transport`` selects the dispatch backend. The default ``"control"`` uses + a persistent ``tmux -C`` connection so each operation keeps its own stdout + and return code, and a failing operation no longer aborts the rest; + ``"subprocess"`` folds into one native ``tmux a ; b ; c`` sequence that + returns a single merged result and aborts on first error. """ validated = TMUX_OPERATIONS_ADAPTER.validate_python(operations) if not validated: @@ -722,285 +805,305 @@ async def run_tmux_operations( msg = "dispatch_timeout must be greater than 0 or null" raise ExpectedToolError(msg) - runner = None if dry_run else _get_server(socket_name=socket_name) - pending: list[_PendingCalls] = [] - dispatches: list[TmuxOperationDispatchResult] = [] - steps_by_index: dict[int, TmuxOperationStepResult] = {} - created_panes: dict[str, str] = {} - created_pane_order: list[str] = [] - - def record_created_pane(ref: str, pane_id: str) -> None: - created_panes[ref] = pane_id - if pane_id not in created_pane_order: - created_pane_order.append(pane_id) - - async def flush_pending() -> bool: - if not pending: - return True - if dry_run: - dispatch, steps = _plan_chain(pending) - else: - assert runner is not None - pending_snapshot = list(pending) - try: - chain_dispatch_coro = asyncio.to_thread( - _dispatch_chain, - runner, - pending_snapshot, - ) - if dispatch_timeout is None: - dispatch, steps = await chain_dispatch_coro - else: - dispatch, steps = await asyncio.wait_for( - chain_dispatch_coro, - timeout=dispatch_timeout, + use_control = transport == "control" and not dry_run + runner: CommandRunner | None + if dry_run: + runner = None + elif use_control: + runner = ControlModeRunner(_get_server(socket_name=socket_name)) + else: + runner = _get_server(socket_name=socket_name) + try: + pending: list[_PendingCalls] = [] + dispatches: list[TmuxOperationDispatchResult] = [] + steps_by_index: dict[int, TmuxOperationStepResult] = {} + created_panes: dict[str, str] = {} + created_pane_order: list[str] = [] + + def record_created_pane(ref: str, pane_id: str) -> None: + created_panes[ref] = pane_id + if pane_id not in created_pane_order: + created_pane_order.append(pane_id) + + async def flush_pending() -> bool: + if not pending: + return True + if dry_run: + dispatch, steps = _plan_chain(pending) + else: + assert runner is not None + pending_snapshot = list(pending) + try: + chain_dispatch_coro = asyncio.to_thread( + _dispatch_chain, + runner, + pending_snapshot, ) - except TimeoutError: - assert dispatch_timeout is not None - dispatch, steps = _timeout_chain(pending_snapshot, dispatch_timeout) - dispatches.append(dispatch) - pending.clear() - for step in steps: - steps_by_index[step.index] = step - return _steps_succeeded(steps, dry_run=dry_run) - - index = 0 - while index < len(validated): - operation = validated[index] - try: - calls = _operation_calls(operation, created_panes) - except _CompileError as exc: - if not await flush_pending(): - for skip_index, skipped in enumerate(validated[index:], start=index): - steps_by_index[skip_index] = _skipped_step(skip_index, skipped) - break - steps_by_index[index] = _compile_failure_step(index, operation, exc) - if on_error == "stop": - for skip_index, skipped in enumerate( - validated[index + 1 :], - start=index + 1, - ): - steps_by_index[skip_index] = _skipped_step(skip_index, skipped) - break - index += 1 - continue - - if ( - on_error == "stop" - and isinstance(operation, SplitPaneOperation) - and operation.ref is not None - ): - decorates, next_index = _collect_marked_decorates( - validated, - index, - operation.ref, - ) - if decorates: + if dispatch_timeout is None: + dispatch, steps = await chain_dispatch_coro + else: + dispatch, steps = await asyncio.wait_for( + chain_dispatch_coro, + timeout=dispatch_timeout, + ) + except TimeoutError: + assert dispatch_timeout is not None + dispatch, steps = _timeout_chain(pending_snapshot, dispatch_timeout) + dispatches.append(dispatch) + pending.clear() + for step in steps: + steps_by_index[step.index] = step + return _steps_succeeded(steps, dry_run=dry_run) + + index = 0 + while index < len(validated): + operation = validated[index] + try: + calls = _operation_calls(operation, created_panes) + except _CompileError as exc: if not await flush_pending(): for skip_index, skipped in enumerate( validated[index:], start=index ): steps_by_index[skip_index] = _skipped_step(skip_index, skipped) break - try: - marked_calls = _marked_split_calls( - operation, - calls, - decorates, - created_panes, - ) - except _CompileError as exc: - steps_by_index[index] = _compile_failure_step( - index, - operation, - exc, - ) + steps_by_index[index] = _compile_failure_step(index, operation, exc) + if on_error == "stop": for skip_index, skipped in enumerate( validated[index + 1 :], start=index + 1, ): - steps_by_index[skip_index] = _skipped_step( - skip_index, - skipped, - ) + steps_by_index[skip_index] = _skipped_step(skip_index, skipped) break - if dry_run: - dispatch, steps, created_pane_id = _plan_marked_split( - index, - operation, - marked_calls, - decorates, - ) - else: - assert runner is not None - decorates_snapshot = list(decorates) + index += 1 + continue + + if ( + on_error == "stop" + and not use_control + and isinstance(operation, SplitPaneOperation) + and operation.ref is not None + ): + decorates, next_index = _collect_marked_decorates( + validated, + index, + operation.ref, + ) + if decorates: + if not await flush_pending(): + for skip_index, skipped in enumerate( + validated[index:], start=index + ): + steps_by_index[skip_index] = _skipped_step( + skip_index, skipped + ) + break try: - marked_dispatch_coro = asyncio.to_thread( - _dispatch_marked_split, - runner, + marked_calls = _marked_split_calls( + operation, + calls, + decorates, + created_panes, + ) + except _CompileError as exc: + steps_by_index[index] = _compile_failure_step( index, operation, - marked_calls, - decorates_snapshot, + exc, ) - if dispatch_timeout is None: - ( - dispatch, - steps, - created_pane_id, - ) = await marked_dispatch_coro - else: - dispatch, steps, created_pane_id = await asyncio.wait_for( - marked_dispatch_coro, - timeout=dispatch_timeout, + for skip_index, skipped in enumerate( + validated[index + 1 :], + start=index + 1, + ): + steps_by_index[skip_index] = _skipped_step( + skip_index, + skipped, ) - except TimeoutError: - assert dispatch_timeout is not None - dispatch, steps, created_pane_id = _timeout_marked_split( + break + if dry_run: + dispatch, steps, created_pane_id = _plan_marked_split( index, operation, marked_calls, - decorates_snapshot, - dispatch_timeout, + decorates, ) - dispatches.append(dispatch) - for step in steps: - steps_by_index[step.index] = step - if created_pane_id is not None: - record_created_pane(operation.ref, created_pane_id) - if not _steps_succeeded(steps, dry_run=dry_run): - for skip_index, skipped in enumerate( - validated[next_index:], - start=next_index, - ): - steps_by_index[skip_index] = _skipped_step( - skip_index, - skipped, - ) - break - index = next_index - continue + else: + assert runner is not None + decorates_snapshot = list(decorates) + try: + marked_dispatch_coro = asyncio.to_thread( + _dispatch_marked_split, + runner, + index, + operation, + marked_calls, + decorates_snapshot, + ) + if dispatch_timeout is None: + ( + dispatch, + steps, + created_pane_id, + ) = await marked_dispatch_coro + else: + ( + dispatch, + steps, + created_pane_id, + ) = await asyncio.wait_for( + marked_dispatch_coro, + timeout=dispatch_timeout, + ) + except TimeoutError: + assert dispatch_timeout is not None + dispatch, steps, created_pane_id = _timeout_marked_split( + index, + operation, + marked_calls, + decorates_snapshot, + dispatch_timeout, + ) + dispatches.append(dispatch) + for step in steps: + steps_by_index[step.index] = step + if created_pane_id is not None: + record_created_pane(operation.ref, created_pane_id) + if not _steps_succeeded(steps, dry_run=dry_run): + for skip_index, skipped in enumerate( + validated[next_index:], + start=next_index, + ): + steps_by_index[skip_index] = _skipped_step( + skip_index, + skipped, + ) + break + index = next_index + continue - force_standalone = on_error == "continue" or _is_output_operation(operation) - if not force_standalone: - try: - _ensure_chainable_calls(calls) - except _CompileError as exc: - if not await flush_pending(): - for skip_index, skipped in enumerate( - validated[index:], - start=index, - ): - steps_by_index[skip_index] = _skipped_step( - skip_index, - skipped, - ) - break - steps_by_index[index] = _compile_failure_step(index, operation, exc) - if on_error == "stop": - for skip_index, skipped in enumerate( - validated[index + 1 :], - start=index + 1, - ): - steps_by_index[skip_index] = _skipped_step( - skip_index, - skipped, - ) - break + force_standalone = on_error == "continue" or _is_output_operation(operation) + if not force_standalone: + try: + _ensure_chainable_calls(calls) + except _CompileError as exc: + if not await flush_pending(): + for skip_index, skipped in enumerate( + validated[index:], + start=index, + ): + steps_by_index[skip_index] = _skipped_step( + skip_index, + skipped, + ) + break + steps_by_index[index] = _compile_failure_step(index, operation, exc) + if on_error == "stop": + for skip_index, skipped in enumerate( + validated[index + 1 :], + start=index + 1, + ): + steps_by_index[skip_index] = _skipped_step( + skip_index, + skipped, + ) + break + index += 1 + continue + pending.append((index, operation.kind, calls)) index += 1 continue - pending.append((index, operation.kind, calls)) - index += 1 - continue - if not await flush_pending() and on_error == "stop": - for skip_index, skipped in enumerate(validated[index:], start=index): - steps_by_index[skip_index] = _skipped_step(skip_index, skipped) - break + if not await flush_pending() and on_error == "stop": + for skip_index, skipped in enumerate(validated[index:], start=index): + steps_by_index[skip_index] = _skipped_step(skip_index, skipped) + break - capture_created_pane = ( - isinstance(operation, SplitPaneOperation) and operation.ref is not None - ) - if dry_run: - planned_pane_id = ( - _planned_pane_ref(operation.ref) - if isinstance(operation, SplitPaneOperation) - and operation.ref is not None - else None - ) - dispatch, step, created_pane_id = _plan_standalone( - index, - operation.kind, - calls, - created_pane_id=planned_pane_id if capture_created_pane else None, + capture_created_pane = ( + isinstance(operation, SplitPaneOperation) and operation.ref is not None ) - else: - assert runner is not None - try: - standalone_dispatch_coro = asyncio.to_thread( - _dispatch_standalone, - runner, - index, - operation.kind, - calls, - capture_created_pane=capture_created_pane, + if dry_run: + planned_pane_id = ( + _planned_pane_ref(operation.ref) + if isinstance(operation, SplitPaneOperation) + and operation.ref is not None + else None ) - if dispatch_timeout is None: - dispatch, step, created_pane_id = await standalone_dispatch_coro - else: - dispatch, step, created_pane_id = await asyncio.wait_for( - standalone_dispatch_coro, - timeout=dispatch_timeout, - ) - except TimeoutError: - assert dispatch_timeout is not None - dispatch, step, created_pane_id = _timeout_standalone( + dispatch, step, created_pane_id = _plan_standalone( index, operation.kind, calls, - dispatch_timeout, + created_pane_id=planned_pane_id if capture_created_pane else None, ) - dispatches.append(dispatch) - steps_by_index[index] = step - if ( - isinstance(operation, SplitPaneOperation) - and operation.ref is not None - and created_pane_id is not None - ): - record_created_pane(operation.ref, created_pane_id) - if not _step_succeeded(step, dry_run=dry_run) and on_error == "stop": - for skip_index, skipped in enumerate( - validated[index + 1 :], - start=index + 1, + else: + assert runner is not None + try: + standalone_dispatch_coro = asyncio.to_thread( + _dispatch_standalone, + runner, + index, + operation.kind, + calls, + capture_created_pane=capture_created_pane, + ) + if dispatch_timeout is None: + dispatch, step, created_pane_id = await standalone_dispatch_coro + else: + dispatch, step, created_pane_id = await asyncio.wait_for( + standalone_dispatch_coro, + timeout=dispatch_timeout, + ) + except TimeoutError: + assert dispatch_timeout is not None + dispatch, step, created_pane_id = _timeout_standalone( + index, + operation.kind, + calls, + dispatch_timeout, + ) + dispatches.append(dispatch) + steps_by_index[index] = step + if ( + isinstance(operation, SplitPaneOperation) + and operation.ref is not None + and created_pane_id is not None ): - steps_by_index[skip_index] = _skipped_step(skip_index, skipped) - break - index += 1 + record_created_pane(operation.ref, created_pane_id) + if not _step_succeeded(step, dry_run=dry_run) and on_error == "stop": + for skip_index, skipped in enumerate( + validated[index + 1 :], + start=index + 1, + ): + steps_by_index[skip_index] = _skipped_step(skip_index, skipped) + break + index += 1 - if pending: - await flush_pending() + if pending: + await flush_pending() - steps = [steps_by_index[index] for index in range(len(validated))] - succeeded = _steps_succeeded(steps, dry_run=dry_run) - rolled_back_panes: list[str] = [] - rollback_errors: list[str] = [] - if rollback_on_error and not dry_run and not succeeded and created_pane_order: - assert runner is not None - rolled_back_panes, rollback_errors = await asyncio.to_thread( - _rollback_created_panes, - runner, - created_pane_order, + steps = [steps_by_index[index] for index in range(len(validated))] + succeeded = _steps_succeeded(steps, dry_run=dry_run) + rolled_back_panes: list[str] = [] + rollback_errors: list[str] = [] + if rollback_on_error and not dry_run and not succeeded and created_pane_order: + assert runner is not None + rolled_back_panes, rollback_errors = await asyncio.to_thread( + _rollback_created_panes, + runner, + created_pane_order, + ) + return RunTmuxOperationsResult( + succeeded=succeeded, + dry_run=dry_run, + dispatch_count=len(dispatches), + dispatches=dispatches, + steps=steps, + created_panes=created_panes, + rolled_back_panes=rolled_back_panes, + rollback_errors=rollback_errors, ) - return RunTmuxOperationsResult( - succeeded=succeeded, - dry_run=dry_run, - dispatch_count=len(dispatches), - dispatches=dispatches, - steps=steps, - created_panes=created_panes, - rolled_back_panes=rolled_back_panes, - rollback_errors=rollback_errors, - ) + finally: + if isinstance(runner, ControlModeRunner): + await asyncio.to_thread(runner.close) def register(mcp: FastMCP) -> None: diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index 1215e84..399e5b2 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -131,6 +131,7 @@ def test_run_tmux_operations_captures_split_refs( SplitPaneOperation(ref="child", pane_id=mcp_pane.pane_id), TmuxSendKeysOperation(pane_ref="child", keys=keys), ], + transport="subprocess", socket_name=mcp_server.socket_name, ), ) @@ -180,6 +181,51 @@ def test_run_tmux_operations_continue_uses_standalone_dispatches( assert server.cmd("show-option", "-gv", "@cc_ops_after_error").stdout == ["set"] +def test_run_tmux_operations_control_attributes_per_operation( + mcp_session: Session, +) -> None: + """Control transport (the default) gives each folded op its own verdict.""" + server = mcp_session.server + operations: list[TmuxOperation] = [ + SetOptionOperation(option="@cc_ops_cm_a", value="1", global_=True), + TmuxSendKeysOperation(pane_id="%999999", keys="bad", enter=False), + SetOptionOperation(option="@cc_ops_cm_b", value="2", global_=True), + ] + + # Default transport is control: the middle op fails but the last op still + # runs, and each op carries its own status. + control = asyncio.run( + run_tmux_operations( + operations=operations, + socket_name=server.socket_name, + ), + ) + assert not control.succeeded + assert [step.status for step in control.steps] == [ + TmuxOperationStatus.SUCCEEDED, + TmuxOperationStatus.FAILED, + TmuxOperationStatus.SUCCEEDED, + ] + assert control.steps[1].stderr is not None + assert "%999999" in "\n".join(control.steps[1].stderr) + assert server.cmd("show-option", "-gv", "@cc_ops_cm_b").stdout == ["2"] + + # The subprocess transport folds into one ';' group, which aborts on first + # error, so every step blurs into the merged failure. + native = asyncio.run( + run_tmux_operations( + operations=operations, + transport="subprocess", + socket_name=server.socket_name, + ), + ) + assert [step.status for step in native.steps] == [ + TmuxOperationStatus.FAILED, + TmuxOperationStatus.FAILED, + TmuxOperationStatus.FAILED, + ] + + class CompileContractCase(t.NamedTuple): """Case for libtmux compiler contract failures.""" @@ -478,6 +524,7 @@ def sleep_dispatch(*args: object, **kwargs: object) -> t.NoReturn: run_tmux_operations( operations=_timeout_operations(case, mcp_pane.pane_id), dispatch_timeout=0.001, + transport="subprocess", socket_name=mcp_server.socket_name, ), ) @@ -608,6 +655,7 @@ def test_run_tmux_operations_marked_split_failure_skips_later_ops( global_=True, ), ], + transport="subprocess", socket_name=server.socket_name, ), ) From 240a26681e6a86ff070a902a5070a6685baba8be Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 21 Jun 2026 05:05:36 -0500 Subject: [PATCH 17/25] Chain(fix[tools]): Pin split pane directory why: A typed split's new pane took its cwd from the issuing client's context, which differs by transport (the subprocess client's cwd vs. the control client's), so the same split could land in different directories. what: - Pass -c "#{pane_current_path}" on split-window so the new pane inherits the target pane's directory deterministically under both transports - Cover the inherited directory for the subprocess and control transports --- src/libtmux_mcp/tools/chain_tools.py | 5 +++ tests/test_chain_tools.py | 50 ++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index ea10838..b3e7341 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -157,6 +157,11 @@ def _split_calls( args.append("-h") if operation.ref is not None: args.extend(("-P", "-F", "#{pane_id}")) + # Pin the new pane to the target pane's directory. Without ``-c`` tmux + # resolves the cwd from the issuing client's context, which differs by + # transport (the subprocess client's cwd vs. the control client's), so an + # explicit format keeps splits deterministic across both. + args.extend(("-c", "#{pane_current_path}")) if operation.shell is not None: args.append(operation.shell) return ( diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index 399e5b2..360b209 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -27,6 +27,8 @@ ) if t.TYPE_CHECKING: + import pathlib + from libtmux.pane import Pane from libtmux.server import Server from libtmux.session import Session @@ -226,6 +228,54 @@ def test_run_tmux_operations_control_attributes_per_operation( ] +@pytest.mark.parametrize("transport", ["subprocess", "control"]) +def test_run_tmux_operations_split_inherits_target_directory( + transport: t.Literal["subprocess", "control"], + mcp_session: Session, + tmp_path: pathlib.Path, +) -> None: + """A split's new pane inherits the split target's working directory.""" + server = mcp_session.server + target_dir = str(tmp_path) + created = server.cmd( + "new-window", + "-t", + mcp_session.session_id, + "-P", + "-F", + "#{pane_id}", + "-c", + target_dir, + ) + target_pane_id = created.stdout[0] + target_cwd = server.cmd( + "display-message", + "-t", + target_pane_id, + "-p", + "#{pane_current_path}", + ).stdout + + result = asyncio.run( + run_tmux_operations( + operations=[SplitPaneOperation(ref="child", pane_id=target_pane_id)], + transport=transport, + socket_name=server.socket_name, + ), + ) + + assert result.succeeded + new_pane_id = result.created_panes["child"] + new_cwd = server.cmd( + "display-message", + "-t", + new_pane_id, + "-p", + "#{pane_current_path}", + ).stdout + assert new_cwd == target_cwd + + class CompileContractCase(t.NamedTuple): """Case for libtmux compiler contract failures.""" From ab44cef2c1f48ca50bc3c189be296393b2d47c07 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 21 Jun 2026 07:18:20 -0500 Subject: [PATCH 18/25] py(deps): Pin libtmux review fixes why: libtmux-mcp should consume the reviewed control/forward-plan fixes from the chainable-commands experiment branch. what: - Update the libtmux git source to 05f55e2a - Regenerate uv.lock for the same libtmux revision --- pyproject.toml | 2 +- uv.lock | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e9f7dac..7d44b27 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -112,7 +112,7 @@ requires = ["hatchling"] build-backend = "hatchling.build" [tool.uv.sources] -libtmux = { git = "https://github.com/tmux-python/libtmux.git", rev = "6fc3db63e3d4dde6f1bcb0116c3d6468ca5adb61" } +libtmux = { git = "https://github.com/tmux-python/libtmux.git", rev = "05f55e2a05fbc746924d9fbacbaf82766f9c0315" } [tool.uv.exclude-newer-package] # git-pull packages release in lockstep with their workspaces, so a diff --git a/uv.lock b/uv.lock index 7954959..40a7f82 100644 --- a/uv.lock +++ b/uv.lock @@ -1179,7 +1179,7 @@ wheels = [ [[package]] name = "libtmux" version = "0.58.1" -source = { git = "https://github.com/tmux-python/libtmux.git?rev=6fc3db63e3d4dde6f1bcb0116c3d6468ca5adb61#6fc3db63e3d4dde6f1bcb0116c3d6468ca5adb61" } +source = { git = "https://github.com/tmux-python/libtmux.git?rev=05f55e2a05fbc746924d9fbacbaf82766f9c0315#05f55e2a05fbc746924d9fbacbaf82766f9c0315" } [[package]] name = "libtmux-mcp" @@ -1242,7 +1242,7 @@ testing = [ [package.metadata] requires-dist = [ { name = "fastmcp", specifier = ">=3.4.2,<4.0.0" }, - { name = "libtmux", git = "https://github.com/tmux-python/libtmux.git?rev=6fc3db63e3d4dde6f1bcb0116c3d6468ca5adb61" }, + { name = "libtmux", git = "https://github.com/tmux-python/libtmux.git?rev=05f55e2a05fbc746924d9fbacbaf82766f9c0315" }, ] [package.metadata.requires-dev] From 82f634a634f4714a5f500a10b32cd93b60dfaf31 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 21 Jun 2026 08:16:38 -0500 Subject: [PATCH 19/25] Chain(refactor[tools]): Run each operation over control why: on_error="stop" did not actually stop. Consecutive no-output operations folded into one control-mode batch that ran every call before any per-step status existed, so a failing middle operation still let later operations run and mutate state. Folding only ever applied under stop, and over the persistent tmux -C connection it saved nothing the connection did not already amortize, while a separate subprocess transport and a {marked} register fold added a parallel execution model with weaker failure attribution. what: - Dispatch each operation on its own over the persistent tmux -C connection so every operation keeps its own stdout and return code - Make on_error="stop" skip every operation after the first failure or unresolved target - Remove the transport parameter; control mode is the only engine - Delete the {marked} split fold, pending-chain dispatch, and the chainability gate that only mattered for folding - Update the tool docs and CHANGES to describe per-operation control dispatch --- CHANGES | 22 +- docs/tools/chain/run-tmux-operations.md | 33 +- src/libtmux_mcp/tools/chain_tools.py | 601 ++---------------------- tests/test_chain_tools.py | 362 ++++---------- 4 files changed, 154 insertions(+), 864 deletions(-) diff --git a/CHANGES b/CHANGES index a634722..6ce3e54 100644 --- a/CHANGES +++ b/CHANGES @@ -11,24 +11,18 @@ _Notes on upcoming releases will be added here_ **Typed tmux operation chains with {tooliconl}`run-tmux-operations`** {tooliconl}`run-tmux-operations` accepts an ordered list of typed tmux -operations and compiles safe no-output steps into the fewest native tmux -dispatches it can. It preserves per-operation results for standalone read -steps, returns concrete pane IDs captured from referenced splits, supports a -dry-run mode that returns the rendered dispatch plan without touching tmux, and -applies a per-dispatch timeout. It can also roll back panes created by typed +operations and runs each one over a persistent `tmux -C` control connection, so +every operation keeps its own stdout and return code. With `on_error="stop"` +(the default) it stops before the next operation once one fails or its target +cannot be resolved, marking the rest skipped; with `on_error="continue"` it +records each failure and runs the rest. It returns concrete pane IDs captured +from referenced splits so later operations can target them through `pane_ref`, +supports a dry-run mode that returns the rendered dispatch plan without touching +tmux, applies a per-dispatch timeout, and can roll back panes created by typed split refs when a later operation fails. It keeps {tooliconl}`call-mutating-tools-batch` available for workflows that need to call arbitrary MCP tools instead of this tool's typed operation set. -**Per-operation results over tmux control mode** - -{tooliconl}`run-tmux-operations` dispatches over a persistent `tmux -C` -control connection by default (`transport="control"`), so each operation in a -folded chain keeps its own stdout and return code and one operation's failure -does not abort the rest. Pass `transport="subprocess"` to fold into a single -native `tmux a ; b ; c` sequence that returns one merged result and aborts on -the first error. - ## libtmux-mcp 0.1.0a14 (2026-06-14) libtmux-mcp 0.1.0a14 adds tier-aware tool batching. {tooliconl}`call-readonly-tools-batch`, {tooliconl}`call-mutating-tools-batch`, and {tooliconl}`call-destructive-tools-batch` run an ordered list of existing MCP tools in a single call and return a per-operation result for each, preserving every nested tool's own structured output. Each wrapper caps the safety tier of the calls it will make — regardless of the server's `LIBTMUX_SAFETY` tier — and `on_error` selects stop-at-first-failure or continue-and-report handling. Aggregate results stay within the server's response limit. diff --git a/docs/tools/chain/run-tmux-operations.md b/docs/tools/chain/run-tmux-operations.md index 25d9d66..8a70a12 100644 --- a/docs/tools/chain/run-tmux-operations.md +++ b/docs/tools/chain/run-tmux-operations.md @@ -3,25 +3,23 @@ ```{fastmcp-tool} chain_tools.run_tmux_operations ``` -**Use when** you need several typed tmux operations to run in order and -want libtmux-mcp to fold safe no-output steps into one native tmux -sequence. +**Use when** you need several typed tmux operations to run in order over +one persistent tmux control connection, with a typed result per step. **Avoid when** you need to call arbitrary MCP tools; use {tooliconl}`call-mutating-tools-batch` for that. Use individual tools when a workflow has only one step. -**Dispatch boundaries:** Output operations such as `capture_pane` run as -standalone dispatches so their stdout belongs to one step. Referenced -`split_pane` operations also run at a boundary unless their immediate -`send_keys` or `resize_pane` followers target the new pane through the -same `pane_ref`. +**Execution:** Each operation is dispatched on its own over a persistent +`tmux -C` control connection, so every operation keeps its own stdout +and return code. A `split_pane` with a `ref` returns the new pane ID in +`created_panes`, and later operations can target it through `pane_ref`. **Side effects:** Mutates tmux state according to the submitted -operation list. With `on_error="stop"`, chainable operations may share -one tmux sequence and native tmux failure semantics stop later steps. -With `on_error="continue"`, operations run as standalone dispatches so -later steps can still run after an earlier failure. +operation list. With `on_error="stop"` (the default), the tool stops +before the next operation once one fails or its target cannot be +resolved, and marks the rest `skipped`. With `on_error="continue"`, +every failure is recorded and the rest still run. Set `dry_run` to `true` to compile the operation list and return the rendered dispatches without touching tmux. Referenced split panes use @@ -30,20 +28,15 @@ real. `dispatch_timeout` defaults to 10 seconds and bounds how long the tool waits for each native tmux dispatch. A timed-out dispatch marks the -included operations failed with `returncode: null`; because dispatches -run in a worker thread, the underlying tmux subprocess may still finish -after the tool returns. +operation failed with `returncode: null`; because dispatches run in a +worker thread, the underlying tmux work may still finish after the tool +returns. Set `rollback_on_error` to `true` to kill panes created by ref-producing `split_pane` operations when the overall operation list fails. The result still reports `created_panes`, and adds `rolled_back_panes` plus `rollback_errors` for cleanup visibility. -An id-producing `split_pane` can fold with immediate `send_keys` or -`resize_pane` operations that target its `pane_ref`; the tool uses -tmux's `{marked}` target internally and still returns the concrete pane -ID in `created_panes`. - **Example:** ```json diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index b3e7341..1914c5d 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -1,4 +1,4 @@ -"""Typed MCP tool for compiling tmux operations into native dispatches.""" +"""Typed MCP tool for running tmux operations over a control connection.""" from __future__ import annotations @@ -7,15 +7,12 @@ import typing as t from libtmux._experimental.chain import ( - ChainabilityError, CommandCall, CommandChain, CommandResultLike, - CommandRunner, CommandScope, CommandScopeError, ControlModeRunner, - ensure_chainable, validate_command_scope, ) from pydantic import TypeAdapter @@ -56,9 +53,6 @@ def assert_never(value: object) -> t.NoReturn: list[TmuxOperation], ) -_PendingCalls: t.TypeAlias = tuple[int, str, tuple[CommandCall, ...]] -_MarkedDecorate: t.TypeAlias = tuple[int, TmuxOperation] - class _CompileError(Exception): """Operation-level compile failure that should become a step result.""" @@ -120,15 +114,6 @@ def _validate_operation_scope( raise _CompileError(str(exc)) from exc -def _ensure_chainable_calls(calls: tuple[CommandCall, ...]) -> None: - """Raise a compile error unless every call may fold into a tmux chain.""" - try: - for call in calls: - ensure_chainable(call.name) - except ChainabilityError as exc: - raise _CompileError(str(exc)) from exc - - def _target_pane( pane_id: str | None, pane_ref: str | None, @@ -158,9 +143,8 @@ def _split_calls( if operation.ref is not None: args.extend(("-P", "-F", "#{pane_id}")) # Pin the new pane to the target pane's directory. Without ``-c`` tmux - # resolves the cwd from the issuing client's context, which differs by - # transport (the subprocess client's cwd vs. the control client's), so an - # explicit format keeps splits deterministic across both. + # resolves the cwd from the control client's context rather than the target + # pane, so an explicit format keeps splits deterministic. args.extend(("-c", "#{pane_current_path}")) if operation.shell is not None: args.append(operation.shell) @@ -278,83 +262,24 @@ def _operation_calls( return calls -def _is_output_operation(operation: TmuxOperation) -> bool: - """Return whether an operation must run outside a pending chain.""" - return isinstance(operation, CapturePaneOperation) or ( - isinstance(operation, SplitPaneOperation) and operation.ref is not None - ) - - -def _collect_marked_decorates( - operations: list[TmuxOperation], - start: int, - pane_ref: str, -) -> tuple[list[_MarkedDecorate], int]: - """Collect immediate operations that can target a fresh split via {marked}.""" - decorates: list[_MarkedDecorate] = [] - index = start + 1 - while index < len(operations): - operation = operations[index] - if ( - isinstance(operation, (TmuxSendKeysOperation, ResizePaneOperation)) - and operation.pane_id is None - and operation.pane_ref == pane_ref - ): - decorates.append((index, operation)) - index += 1 - continue - break - return decorates, index - - -def _marked_split_calls( - operation: SplitPaneOperation, - split_calls: tuple[CommandCall, ...], - decorates: list[_MarkedDecorate], - created_panes: dict[str, str], -) -> tuple[CommandCall, ...]: - """Build the folded command calls for a ref-producing split.""" - if operation.ref is None: - msg = "marked split dispatch requires a split ref" - raise _CompileError(msg) - - marked_created = {**created_panes, operation.ref: "{marked}"} - calls = [*split_calls, CommandCall("select-pane", ("-m",))] - for _, decorate in decorates: - calls.extend(_operation_calls(decorate, marked_created)) - calls.append(CommandCall("select-pane", ("-M",))) - marked_calls = tuple(calls) - _ensure_chainable_calls(marked_calls) - return marked_calls +def _calls_argv(calls: tuple[CommandCall, ...]) -> list[str]: + """Render an operation's calls for the dispatch record.""" + if len(calls) == 1: + return list(calls[0].argv()) + return list(CommandChain(calls).argv()) def _run_calls( - runner: CommandRunner, + runner: ControlModeRunner, calls: tuple[CommandCall, ...], ) -> tuple[list[str], CommandResultLike]: - """Run one operation's calls as a single native dispatch.""" - if isinstance(runner, ControlModeRunner): - results = runner.run_calls(calls) - return _calls_argv(calls), _combine_results(results) - if len(calls) == 1: - argv = _calls_argv(calls) - result = runner.cmd(argv[0], *argv[1:]) - return argv, result - - chain = CommandChain(calls) - result = chain.run(runner) - return list(chain.argv()), result - - -def _calls_argv(calls: tuple[CommandCall, ...]) -> list[str]: - """Render calls as one native tmux dispatch argv.""" - if len(calls) == 1: - return list(calls[0].argv()) - return list(CommandChain(calls).argv()) + """Run one operation's calls over the control connection.""" + results = runner.run_calls(calls) + return _calls_argv(calls), _combine_results(results) def _dispatch_standalone( - runner: CommandRunner, + runner: ControlModeRunner, index: int, kind: str, calls: tuple[CommandCall, ...], @@ -398,148 +323,6 @@ def _dispatch_standalone( ) -def _dispatch_marked_split( - runner: CommandRunner, - index: int, - operation: SplitPaneOperation, - calls: tuple[CommandCall, ...], - decorates: list[_MarkedDecorate], -) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult], str | None]: - """Run one id-producing split and its immediate decorates via {marked}.""" - chain = CommandChain(calls) - result = chain.run(runner) - stdout = list(result.stdout) - stderr = list(result.stderr) - created_pane_id: str | None = None - status = TmuxOperationStatus.SUCCEEDED - if result.returncode != 0: - status = TmuxOperationStatus.FAILED - elif stdout: - created_pane_id = stdout[0] - else: - status = TmuxOperationStatus.FAILED - stderr = [*stderr, "split-pane did not return a pane id"] - - dispatch = TmuxOperationDispatchResult( - mode="chain", - operation_indexes=[index, *(decorate_index for decorate_index, _ in decorates)], - argv=list(chain.argv()), - returncode=result.returncode, - stdout=stdout, - stderr=stderr, - ) - steps = [ - TmuxOperationStepResult( - index=index, - kind=operation.kind, - status=status, - returncode=result.returncode, - stdout=stdout, - stderr=stderr, - created_pane_id=created_pane_id, - ), - *[ - TmuxOperationStepResult( - index=decorate_index, - kind=decorate.kind, - status=status, - returncode=result.returncode, - stdout=stdout if status == TmuxOperationStatus.FAILED else None, - stderr=stderr if status == TmuxOperationStatus.FAILED else None, - ) - for decorate_index, decorate in decorates - ], - ] - return dispatch, steps, created_pane_id - - -def _dispatch_chain_control( - runner: ControlModeRunner, - pending: list[_PendingCalls], -) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult]]: - """Run pending operations over control mode, one result per operation.""" - flat_calls: list[CommandCall] = [] - owners: list[int] = [] - for index, _kind, op_calls in pending: - for call in op_calls: - flat_calls.append(call) - owners.append(index) - results = runner.run_calls(tuple(flat_calls)) - by_index: dict[int, list[CommandResultLike]] = {} - for owner_index, result in zip(owners, results, strict=True): - by_index.setdefault(owner_index, []).append(result) - - steps: list[TmuxOperationStepResult] = [] - overall_returncode = 0 - for index, kind, _op_calls in pending: - combined = _combine_results(by_index.get(index, [])) - if combined.returncode != 0: - overall_returncode = combined.returncode - status = ( - TmuxOperationStatus.SUCCEEDED - if combined.returncode == 0 - else TmuxOperationStatus.FAILED - ) - steps.append( - TmuxOperationStepResult( - index=index, - kind=kind, - status=status, - returncode=combined.returncode, - stdout=combined.stdout or None, - stderr=combined.stderr or None, - ) - ) - dispatch = TmuxOperationDispatchResult( - mode="chain", - operation_indexes=[index for index, _, _ in pending], - argv=_calls_argv(tuple(flat_calls)), - returncode=overall_returncode, - stdout=[line for result in results for line in result.stdout], - stderr=[line for result in results for line in result.stderr], - ) - return dispatch, steps - - -def _dispatch_chain( - runner: CommandRunner, - pending: list[_PendingCalls], -) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult]]: - """Run pending operations as one tmux command sequence.""" - if isinstance(runner, ControlModeRunner): - return _dispatch_chain_control(runner, pending) - calls = tuple(call for _, _, op_calls in pending for call in op_calls) - chain = CommandChain(calls) - result = chain.run(runner) - stdout = list(result.stdout) - stderr = list(result.stderr) - status = ( - TmuxOperationStatus.SUCCEEDED - if result.returncode == 0 - else TmuxOperationStatus.FAILED - ) - dispatch = TmuxOperationDispatchResult( - mode="chain", - operation_indexes=[index for index, _, _ in pending], - argv=list(chain.argv()), - returncode=result.returncode, - stdout=stdout, - stderr=stderr, - ) - steps = [ - TmuxOperationStepResult( - index=index, - kind=kind, - status=status, - returncode=result.returncode, - stdout=stdout if status == TmuxOperationStatus.FAILED else None, - stderr=stderr if status == TmuxOperationStatus.FAILED else None, - ) - for index, kind, _ in pending - ] - return dispatch, steps - - def _planned_pane_ref(ref: str) -> str: """Return the deterministic placeholder for a dry-run pane ref.""" return f"" @@ -566,7 +349,7 @@ def _plan_standalone( *, created_pane_id: str | None = None, ) -> tuple[TmuxOperationDispatchResult, TmuxOperationStepResult, str | None]: - """Return the dry-run shape for one standalone dispatch.""" + """Return the dry-run shape for one operation dispatch.""" return ( TmuxOperationDispatchResult( mode="standalone", @@ -579,49 +362,6 @@ def _plan_standalone( ) -def _plan_marked_split( - index: int, - operation: SplitPaneOperation, - calls: tuple[CommandCall, ...], - decorates: list[_MarkedDecorate], -) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult], str | None]: - """Return the dry-run shape for one folded split-ref dispatch.""" - created_pane_id = _planned_pane_ref(operation.ref) if operation.ref else None - return ( - TmuxOperationDispatchResult( - mode="chain", - operation_indexes=[ - index, - *(decorate_index for decorate_index, _ in decorates), - ], - argv=list(CommandChain(calls).argv()), - returncode=None, - ), - [ - _planned_step(index, operation.kind, created_pane_id), - *[ - _planned_step(decorate_index, decorate.kind) - for decorate_index, decorate in decorates - ], - ], - created_pane_id, - ) - - -def _plan_chain( - pending: list[_PendingCalls], -) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult]]: - """Return the dry-run shape for a pending folded chain.""" - calls = tuple(call for _, _, op_calls in pending for call in op_calls) - dispatch = TmuxOperationDispatchResult( - mode="chain", - operation_indexes=[index for index, _, _ in pending], - argv=list(CommandChain(calls).argv()), - returncode=None, - ) - return dispatch, [_planned_step(index, kind) for index, kind, _ in pending] - - def _timeout_stderr(dispatch_timeout: float) -> list[str]: """Return the stderr payload for a bounded dispatch timeout.""" return [f"tmux dispatch timed out after {dispatch_timeout:g} seconds"] @@ -647,7 +387,7 @@ def _timeout_standalone( calls: tuple[CommandCall, ...], dispatch_timeout: float, ) -> tuple[TmuxOperationDispatchResult, TmuxOperationStepResult, str | None]: - """Return timeout results for one standalone dispatch.""" + """Return timeout results for one operation dispatch.""" stderr = _timeout_stderr(dispatch_timeout) return ( TmuxOperationDispatchResult( @@ -662,59 +402,8 @@ def _timeout_standalone( ) -def _timeout_marked_split( - index: int, - operation: SplitPaneOperation, - calls: tuple[CommandCall, ...], - decorates: list[_MarkedDecorate], - dispatch_timeout: float, -) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult], str | None]: - """Return timeout results for one folded split-ref dispatch.""" - stderr = _timeout_stderr(dispatch_timeout) - return ( - TmuxOperationDispatchResult( - mode="chain", - operation_indexes=[ - index, - *(decorate_index for decorate_index, _ in decorates), - ], - argv=list(CommandChain(calls).argv()), - returncode=None, - stderr=stderr, - ), - [ - _timeout_step(index, operation.kind, stderr), - *[ - _timeout_step(decorate_index, decorate.kind, stderr) - for decorate_index, decorate in decorates - ], - ], - None, - ) - - -def _timeout_chain( - pending: list[_PendingCalls], - dispatch_timeout: float, -) -> tuple[TmuxOperationDispatchResult, list[TmuxOperationStepResult]]: - """Return timeout results for a pending folded chain.""" - stderr = _timeout_stderr(dispatch_timeout) - calls = tuple(call for _, _, op_calls in pending for call in op_calls) - dispatch = TmuxOperationDispatchResult( - mode="chain", - operation_indexes=[index for index, _, _ in pending], - argv=list(CommandChain(calls).argv()), - returncode=None, - stderr=stderr, - ) - return ( - dispatch, - [_timeout_step(index, kind, stderr) for index, kind, _ in pending], - ) - - def _rollback_created_panes( - runner: CommandRunner, + runner: ControlModeRunner, pane_ids: list[str], ) -> tuple[list[str], list[str]]: """Kill created panes in reverse order and report cleanup failures.""" @@ -760,15 +449,6 @@ def _step_succeeded(step: TmuxOperationStepResult, *, dry_run: bool) -> bool: ) -def _steps_succeeded( - steps: t.Iterable[TmuxOperationStepResult], - *, - dry_run: bool, -) -> bool: - """Return whether every step succeeded for control-flow purposes.""" - return all(_step_succeeded(step, dry_run=dry_run) for step in steps) - - @handle_tool_errors_async async def run_tmux_operations( operations: list[TmuxOperation], @@ -776,28 +456,20 @@ async def run_tmux_operations( dry_run: bool = False, dispatch_timeout: float | None = 10.0, rollback_on_error: bool = False, - transport: t.Literal["subprocess", "control"] = "control", socket_name: str | None = None, ) -> RunTmuxOperationsResult: - """Run typed tmux operations with minimum safe native dispatches. - - Consecutive chainable, no-output operations fold into one tmux - ``a ; b ; c`` sequence. Output operations such as ``capture_pane`` run as - standalone dispatches so their stdout can be attributed to the correct - operation. A single id-producing ``split_pane`` may still fold with - immediate decorations that target its ref through tmux's ``{marked}`` - register. - ``on_error="continue"`` disables folding because tmux sequences abort the - rest of the sequence on first failure. + """Run typed tmux operations, one dispatch per operation. + + Each operation is dispatched on its own over a persistent ``tmux -C`` + control connection, so every operation keeps its own stdout and return + code. ``on_error="stop"`` (the default) stops before the next operation + once one fails or its target cannot be resolved, marking the rest as + skipped; ``on_error="continue"`` records each failure and runs the rest. + ``dry_run`` returns the rendered dispatch plan without touching tmux. ``dispatch_timeout`` bounds how long the tool waits for one native tmux - dispatch; timed-out subprocess work may still finish in the background. + dispatch; timed-out work may still finish in the background. ``rollback_on_error`` kills panes created by ref-producing ``split_pane`` operations when the overall operation list fails. - ``transport`` selects the dispatch backend. The default ``"control"`` uses - a persistent ``tmux -C`` connection so each operation keeps its own stdout - and return code, and a failing operation no longer aborts the rest; - ``"subprocess"`` folds into one native ``tmux a ; b ; c`` sequence that - returns a single merged result and aborts on first error. """ validated = TMUX_OPERATIONS_ADAPTER.validate_python(operations) if not validated: @@ -810,16 +482,10 @@ async def run_tmux_operations( msg = "dispatch_timeout must be greater than 0 or null" raise ExpectedToolError(msg) - use_control = transport == "control" and not dry_run - runner: CommandRunner | None - if dry_run: - runner = None - elif use_control: + runner: ControlModeRunner | None = None + if not dry_run: runner = ControlModeRunner(_get_server(socket_name=socket_name)) - else: - runner = _get_server(socket_name=socket_name) try: - pending: list[_PendingCalls] = [] dispatches: list[TmuxOperationDispatchResult] = [] steps_by_index: dict[int, TmuxOperationStepResult] = {} created_panes: dict[str, str] = {} @@ -830,35 +496,9 @@ def record_created_pane(ref: str, pane_id: str) -> None: if pane_id not in created_pane_order: created_pane_order.append(pane_id) - async def flush_pending() -> bool: - if not pending: - return True - if dry_run: - dispatch, steps = _plan_chain(pending) - else: - assert runner is not None - pending_snapshot = list(pending) - try: - chain_dispatch_coro = asyncio.to_thread( - _dispatch_chain, - runner, - pending_snapshot, - ) - if dispatch_timeout is None: - dispatch, steps = await chain_dispatch_coro - else: - dispatch, steps = await asyncio.wait_for( - chain_dispatch_coro, - timeout=dispatch_timeout, - ) - except TimeoutError: - assert dispatch_timeout is not None - dispatch, steps = _timeout_chain(pending_snapshot, dispatch_timeout) - dispatches.append(dispatch) - pending.clear() - for step in steps: - steps_by_index[step.index] = step - return _steps_succeeded(steps, dry_run=dry_run) + def skip_rest(start: int) -> None: + for skip_index, skipped in enumerate(validated[start:], start=start): + steps_by_index[skip_index] = _skipped_step(skip_index, skipped) index = 0 while index < len(validated): @@ -866,163 +506,13 @@ async def flush_pending() -> bool: try: calls = _operation_calls(operation, created_panes) except _CompileError as exc: - if not await flush_pending(): - for skip_index, skipped in enumerate( - validated[index:], start=index - ): - steps_by_index[skip_index] = _skipped_step(skip_index, skipped) - break steps_by_index[index] = _compile_failure_step(index, operation, exc) if on_error == "stop": - for skip_index, skipped in enumerate( - validated[index + 1 :], - start=index + 1, - ): - steps_by_index[skip_index] = _skipped_step(skip_index, skipped) + skip_rest(index + 1) break index += 1 continue - if ( - on_error == "stop" - and not use_control - and isinstance(operation, SplitPaneOperation) - and operation.ref is not None - ): - decorates, next_index = _collect_marked_decorates( - validated, - index, - operation.ref, - ) - if decorates: - if not await flush_pending(): - for skip_index, skipped in enumerate( - validated[index:], start=index - ): - steps_by_index[skip_index] = _skipped_step( - skip_index, skipped - ) - break - try: - marked_calls = _marked_split_calls( - operation, - calls, - decorates, - created_panes, - ) - except _CompileError as exc: - steps_by_index[index] = _compile_failure_step( - index, - operation, - exc, - ) - for skip_index, skipped in enumerate( - validated[index + 1 :], - start=index + 1, - ): - steps_by_index[skip_index] = _skipped_step( - skip_index, - skipped, - ) - break - if dry_run: - dispatch, steps, created_pane_id = _plan_marked_split( - index, - operation, - marked_calls, - decorates, - ) - else: - assert runner is not None - decorates_snapshot = list(decorates) - try: - marked_dispatch_coro = asyncio.to_thread( - _dispatch_marked_split, - runner, - index, - operation, - marked_calls, - decorates_snapshot, - ) - if dispatch_timeout is None: - ( - dispatch, - steps, - created_pane_id, - ) = await marked_dispatch_coro - else: - ( - dispatch, - steps, - created_pane_id, - ) = await asyncio.wait_for( - marked_dispatch_coro, - timeout=dispatch_timeout, - ) - except TimeoutError: - assert dispatch_timeout is not None - dispatch, steps, created_pane_id = _timeout_marked_split( - index, - operation, - marked_calls, - decorates_snapshot, - dispatch_timeout, - ) - dispatches.append(dispatch) - for step in steps: - steps_by_index[step.index] = step - if created_pane_id is not None: - record_created_pane(operation.ref, created_pane_id) - if not _steps_succeeded(steps, dry_run=dry_run): - for skip_index, skipped in enumerate( - validated[next_index:], - start=next_index, - ): - steps_by_index[skip_index] = _skipped_step( - skip_index, - skipped, - ) - break - index = next_index - continue - - force_standalone = on_error == "continue" or _is_output_operation(operation) - if not force_standalone: - try: - _ensure_chainable_calls(calls) - except _CompileError as exc: - if not await flush_pending(): - for skip_index, skipped in enumerate( - validated[index:], - start=index, - ): - steps_by_index[skip_index] = _skipped_step( - skip_index, - skipped, - ) - break - steps_by_index[index] = _compile_failure_step(index, operation, exc) - if on_error == "stop": - for skip_index, skipped in enumerate( - validated[index + 1 :], - start=index + 1, - ): - steps_by_index[skip_index] = _skipped_step( - skip_index, - skipped, - ) - break - index += 1 - continue - pending.append((index, operation.kind, calls)) - index += 1 - continue - - if not await flush_pending() and on_error == "stop": - for skip_index, skipped in enumerate(validated[index:], start=index): - steps_by_index[skip_index] = _skipped_step(skip_index, skipped) - break - capture_created_pane = ( isinstance(operation, SplitPaneOperation) and operation.ref is not None ) @@ -1037,12 +527,12 @@ async def flush_pending() -> bool: index, operation.kind, calls, - created_pane_id=planned_pane_id if capture_created_pane else None, + created_pane_id=planned_pane_id, ) else: assert runner is not None try: - standalone_dispatch_coro = asyncio.to_thread( + dispatch_coro = asyncio.to_thread( _dispatch_standalone, runner, index, @@ -1051,10 +541,10 @@ async def flush_pending() -> bool: capture_created_pane=capture_created_pane, ) if dispatch_timeout is None: - dispatch, step, created_pane_id = await standalone_dispatch_coro + dispatch, step, created_pane_id = await dispatch_coro else: dispatch, step, created_pane_id = await asyncio.wait_for( - standalone_dispatch_coro, + dispatch_coro, timeout=dispatch_timeout, ) except TimeoutError: @@ -1067,26 +557,17 @@ async def flush_pending() -> bool: ) dispatches.append(dispatch) steps_by_index[index] = step - if ( - isinstance(operation, SplitPaneOperation) - and operation.ref is not None - and created_pane_id is not None - ): + if capture_created_pane and created_pane_id is not None: + assert isinstance(operation, SplitPaneOperation) + assert operation.ref is not None record_created_pane(operation.ref, created_pane_id) if not _step_succeeded(step, dry_run=dry_run) and on_error == "stop": - for skip_index, skipped in enumerate( - validated[index + 1 :], - start=index + 1, - ): - steps_by_index[skip_index] = _skipped_step(skip_index, skipped) + skip_rest(index + 1) break index += 1 - if pending: - await flush_pending() - steps = [steps_by_index[index] for index in range(len(validated))] - succeeded = _steps_succeeded(steps, dry_run=dry_run) + succeeded = all(_step_succeeded(step, dry_run=dry_run) for step in steps) rolled_back_panes: list[str] = [] rollback_errors: list[str] = [] if rollback_on_error and not dry_run and not succeeded and created_pane_order: @@ -1107,7 +588,7 @@ async def flush_pending() -> bool: rollback_errors=rollback_errors, ) finally: - if isinstance(runner, ControlModeRunner): + if runner is not None: await asyncio.to_thread(runner.close) diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index 360b209..0f902a1 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -7,7 +7,7 @@ import typing as t import pytest -from libtmux._experimental.chain import ChainabilityError, CommandScopeError +from libtmux._experimental.chain import CommandScopeError from pydantic import ValidationError from libtmux_mcp._utils import ExpectedToolError @@ -34,58 +34,40 @@ from libtmux.session import Session -class SetOptionChainCase(t.NamedTuple): - """Case for option operations that can fold into one dispatch.""" - - test_id: str - operations: list[TmuxOperation] - expected_values: dict[str, str] - - -@pytest.mark.parametrize( - "case", - [ - SetOptionChainCase( - test_id="two_global_options", - operations=[ - SetOptionOperation(option="@cc_ops_a", value="1", global_=True), - SetOptionOperation(option="@cc_ops_b", value="2", global_=True), - ], - expected_values={"@cc_ops_a": "1", "@cc_ops_b": "2"}, - ), - ], - ids=lambda case: case.test_id, -) -def test_run_tmux_operations_folds_chainable_ops( - case: SetOptionChainCase, +def test_run_tmux_operations_dispatches_each_op_standalone( mcp_session: Session, ) -> None: - """Consecutive no-output mutating operations use one native chain.""" + """Each operation runs as its own control-mode dispatch.""" server = mcp_session.server result = asyncio.run( run_tmux_operations( - operations=case.operations, + operations=[ + SetOptionOperation(option="@cc_ops_a", value="1", global_=True), + SetOptionOperation(option="@cc_ops_b", value="2", global_=True), + ], socket_name=server.socket_name, ), ) assert result.succeeded - assert result.dispatch_count == 1 - assert result.dispatches[0].mode == "chain" - assert ";" in result.dispatches[0].argv + assert result.dispatch_count == 2 + assert [dispatch.mode for dispatch in result.dispatches] == [ + "standalone", + "standalone", + ] assert [step.status for step in result.steps] == [ TmuxOperationStatus.SUCCEEDED, TmuxOperationStatus.SUCCEEDED, ] - for option, value in case.expected_values.items(): - assert server.cmd("show-option", "-gv", option).stdout == [value] + assert server.cmd("show-option", "-gv", "@cc_ops_a").stdout == ["1"] + assert server.cmd("show-option", "-gv", "@cc_ops_b").stdout == ["2"] -def test_run_tmux_operations_breaks_before_output_op( +def test_run_tmux_operations_capture_returns_stdout( mcp_server: Server, mcp_pane: Pane, ) -> None: - """Read operations force a standalone dispatch with per-step stdout.""" + """A read operation returns its own stdout on its own step.""" from libtmux_mcp.tools.wait_for_tools import wait_for_channel channel = "cc_ops_capture" @@ -111,7 +93,7 @@ def test_run_tmux_operations_breaks_before_output_op( assert result.succeeded assert result.dispatch_count == 2 assert [dispatch.mode for dispatch in result.dispatches] == [ - "chain", + "standalone", "standalone", ] assert result.steps[1].stdout is not None @@ -133,15 +115,14 @@ def test_run_tmux_operations_captures_split_refs( SplitPaneOperation(ref="child", pane_id=mcp_pane.pane_id), TmuxSendKeysOperation(pane_ref="child", keys=keys), ], - transport="subprocess", socket_name=mcp_server.socket_name, ), ) assert result.succeeded - assert result.dispatch_count == 1 - assert result.dispatches[0].mode == "chain" - assert result.dispatches[0].operation_indexes == [0, 1] + assert result.dispatch_count == 2 + assert result.dispatches[0].operation_indexes == [0] + assert result.dispatches[1].operation_indexes == [1] new_pane_id = result.created_panes["child"] assert new_pane_id.startswith("%") @@ -154,10 +135,10 @@ def test_run_tmux_operations_captures_split_refs( assert "CC_OPS_REF" in "\n".join(new_pane.capture_pane()) -def test_run_tmux_operations_continue_uses_standalone_dispatches( +def test_run_tmux_operations_continue_runs_later_ops( mcp_session: Session, ) -> None: - """Continue mode preserves later operations instead of native chain abort.""" + """Continue mode records each failure and runs the rest.""" server = mcp_session.server result = asyncio.run( run_tmux_operations( @@ -183,54 +164,37 @@ def test_run_tmux_operations_continue_uses_standalone_dispatches( assert server.cmd("show-option", "-gv", "@cc_ops_after_error").stdout == ["set"] -def test_run_tmux_operations_control_attributes_per_operation( +def test_run_tmux_operations_stop_halts_after_failure( mcp_session: Session, ) -> None: - """Control transport (the default) gives each folded op its own verdict.""" + """Stop mode (the default) skips every operation after the first failure.""" server = mcp_session.server - operations: list[TmuxOperation] = [ - SetOptionOperation(option="@cc_ops_cm_a", value="1", global_=True), - TmuxSendKeysOperation(pane_id="%999999", keys="bad", enter=False), - SetOptionOperation(option="@cc_ops_cm_b", value="2", global_=True), - ] - - # Default transport is control: the middle op fails but the last op still - # runs, and each op carries its own status. - control = asyncio.run( + result = asyncio.run( run_tmux_operations( - operations=operations, + operations=[ + SetOptionOperation(option="@cc_ops_cm_a", value="1", global_=True), + TmuxSendKeysOperation(pane_id="%999999", keys="bad", enter=False), + SetOptionOperation(option="@cc_ops_cm_b", value="2", global_=True), + ], socket_name=server.socket_name, ), ) - assert not control.succeeded - assert [step.status for step in control.steps] == [ - TmuxOperationStatus.SUCCEEDED, - TmuxOperationStatus.FAILED, - TmuxOperationStatus.SUCCEEDED, - ] - assert control.steps[1].stderr is not None - assert "%999999" in "\n".join(control.steps[1].stderr) - assert server.cmd("show-option", "-gv", "@cc_ops_cm_b").stdout == ["2"] - # The subprocess transport folds into one ';' group, which aborts on first - # error, so every step blurs into the merged failure. - native = asyncio.run( - run_tmux_operations( - operations=operations, - transport="subprocess", - socket_name=server.socket_name, - ), - ) - assert [step.status for step in native.steps] == [ - TmuxOperationStatus.FAILED, - TmuxOperationStatus.FAILED, + assert not result.succeeded + assert result.dispatch_count == 2 + assert [step.status for step in result.steps] == [ + TmuxOperationStatus.SUCCEEDED, TmuxOperationStatus.FAILED, + TmuxOperationStatus.SKIPPED, ] + assert result.steps[1].stderr is not None + assert "%999999" in "\n".join(result.steps[1].stderr) + # The first op ran; the op after the failure never dispatched. + assert server.cmd("show-option", "-gv", "@cc_ops_cm_a").stdout == ["1"] + assert server.cmd("show-option", "-gv", "@cc_ops_cm_b").stdout == [] -@pytest.mark.parametrize("transport", ["subprocess", "control"]) def test_run_tmux_operations_split_inherits_target_directory( - transport: t.Literal["subprocess", "control"], mcp_session: Session, tmp_path: pathlib.Path, ) -> None: @@ -259,7 +223,6 @@ def test_run_tmux_operations_split_inherits_target_directory( result = asyncio.run( run_tmux_operations( operations=[SplitPaneOperation(ref="child", pane_id=target_pane_id)], - transport=transport, socket_name=server.socket_name, ), ) @@ -276,64 +239,26 @@ def test_run_tmux_operations_split_inherits_target_directory( assert new_cwd == target_cwd -class CompileContractCase(t.NamedTuple): - """Case for libtmux compiler contract failures.""" - - test_id: str - contract: t.Literal["chainable", "scope"] - expected_error: str - - -@pytest.mark.parametrize( - "case", - [ - CompileContractCase( - test_id="chainability_contract", - contract="chainable", - expected_error="not chainable from test", - ), - CompileContractCase( - test_id="scope_contract", - contract="scope", - expected_error="wrong scope from test", - ), - ], - ids=lambda case: case.test_id, -) -def test_run_tmux_operations_surfaces_libtmux_contract_errors( - case: CompileContractCase, +def test_run_tmux_operations_surfaces_libtmux_scope_error( mcp_session: Session, monkeypatch: pytest.MonkeyPatch, ) -> None: - """The compiler reports libtmux contract drift as an operation failure. + """The compiler reports a libtmux scope-contract failure as a step failure. The contract metadata is static, so this uses monkeypatch instead of a - tmux fixture to simulate libtmux rejecting a command. + tmux fixture to simulate libtmux rejecting a command's target scope. """ - if case.contract == "chainable": - - def fail_chainable(command_name: str) -> None: - msg = f"{command_name} {case.expected_error}" - raise ChainabilityError(msg) - - monkeypatch.setattr( - chain_tools, - "ensure_chainable", - fail_chainable, - raising=False, - ) - else: - def fail_scope(command_name: str, target_scope: str) -> None: - msg = f"{command_name} {target_scope} {case.expected_error}" - raise CommandScopeError(msg) + def fail_scope(command_name: str, target_scope: str) -> None: + msg = f"{command_name} {target_scope} wrong scope from test" + raise CommandScopeError(msg) - monkeypatch.setattr( - chain_tools, - "validate_command_scope", - fail_scope, - raising=False, - ) + monkeypatch.setattr( + chain_tools, + "validate_command_scope", + fail_scope, + raising=False, + ) result = asyncio.run( run_tmux_operations( @@ -352,40 +277,20 @@ def fail_scope(command_name: str, target_scope: str) -> None: assert result.dispatch_count == 0 assert result.steps[0].status == TmuxOperationStatus.FAILED assert result.steps[0].stderr is not None - assert case.expected_error in result.steps[0].stderr[0] - + assert "wrong scope from test" in result.steps[0].stderr[0] -class DryRunSetOptionCase(t.NamedTuple): - """Case for dry-run option chains.""" - test_id: str - operations: list[TmuxOperation] - absent_options: list[str] - - -@pytest.mark.parametrize( - "case", - [ - DryRunSetOptionCase( - test_id="folded_global_options", - operations=[ - SetOptionOperation(option="@cc_ops_dry_a", value="1", global_=True), - SetOptionOperation(option="@cc_ops_dry_b", value="2", global_=True), - ], - absent_options=["@cc_ops_dry_a", "@cc_ops_dry_b"], - ), - ], - ids=lambda case: case.test_id, -) def test_run_tmux_operations_dry_run_plans_without_mutating( - case: DryRunSetOptionCase, mcp_session: Session, ) -> None: """Dry-run returns planned dispatches without changing tmux state.""" server = mcp_session.server result = asyncio.run( run_tmux_operations( - operations=case.operations, + operations=[ + SetOptionOperation(option="@cc_ops_dry_a", value="1", global_=True), + SetOptionOperation(option="@cc_ops_dry_b", value="2", global_=True), + ], dry_run=True, socket_name=server.socket_name, ), @@ -393,40 +298,22 @@ def test_run_tmux_operations_dry_run_plans_without_mutating( assert result.succeeded assert result.dry_run - assert result.dispatch_count == 1 - assert result.dispatches[0].mode == "chain" - assert result.dispatches[0].returncode is None - assert ";" in result.dispatches[0].argv + assert result.dispatch_count == 2 + assert [dispatch.mode for dispatch in result.dispatches] == [ + "standalone", + "standalone", + ] + assert all(dispatch.returncode is None for dispatch in result.dispatches) assert [step.status for step in result.steps] == [ TmuxOperationStatus.PLANNED, TmuxOperationStatus.PLANNED, ] assert all(step.returncode is None for step in result.steps) - for option in case.absent_options: + for option in ("@cc_ops_dry_a", "@cc_ops_dry_b"): assert server.cmd("show-option", "-gv", option).stdout == [] -class DryRunSplitRefCase(t.NamedTuple): - """Case for dry-run split refs.""" - - test_id: str - ref: str - keys: str - - -@pytest.mark.parametrize( - "case", - [ - DryRunSplitRefCase( - test_id="marked_split_ref", - ref="child", - keys="printf 'DRY_RUN_REF\\n'", - ), - ], - ids=lambda case: case.test_id, -) -def test_run_tmux_operations_dry_run_plans_marked_split_ref( - case: DryRunSplitRefCase, +def test_run_tmux_operations_dry_run_plans_split_ref( mcp_server: Server, mcp_pane: Pane, ) -> None: @@ -437,22 +324,22 @@ def test_run_tmux_operations_dry_run_plans_marked_split_ref( result = asyncio.run( run_tmux_operations( operations=[ - SplitPaneOperation(ref=case.ref, pane_id=mcp_pane.pane_id), - TmuxSendKeysOperation(pane_ref=case.ref, keys=case.keys), + SplitPaneOperation(ref="child", pane_id=mcp_pane.pane_id), + TmuxSendKeysOperation(pane_ref="child", keys="printf 'DRY_RUN_REF\\n'"), ], dry_run=True, socket_name=mcp_server.socket_name, ), ) - placeholder = f"" + placeholder = "" assert result.succeeded assert result.dry_run - assert result.dispatch_count == 1 - assert result.dispatches[0].mode == "chain" - assert result.dispatches[0].returncode is None - assert result.dispatches[0].operation_indexes == [0, 1] - assert result.created_panes == {case.ref: placeholder} + assert result.dispatch_count == 2 + assert result.dispatches[0].operation_indexes == [0] + assert result.dispatches[1].operation_indexes == [1] + assert all(dispatch.returncode is None for dispatch in result.dispatches) + assert result.created_panes == {"child": placeholder} assert result.steps[0].status == TmuxOperationStatus.PLANNED assert result.steps[0].created_pane_id == placeholder assert result.steps[1].status == TmuxOperationStatus.PLANNED @@ -461,11 +348,11 @@ def test_run_tmux_operations_dry_run_plans_marked_split_ref( assert len(mcp_pane.window.panes) == pane_count -def test_run_tmux_operations_dry_run_continues_after_pending_plan( +def test_run_tmux_operations_dry_run_plans_output_ops( mcp_server: Server, mcp_pane: Pane, ) -> None: - """Dry-run treats planned pending dispatches as successful.""" + """Dry-run plans read operations as planned standalone dispatches.""" result = asyncio.run( run_tmux_operations( operations=[ @@ -484,7 +371,7 @@ def test_run_tmux_operations_dry_run_continues_after_pending_plan( assert result.succeeded assert result.dispatch_count == 2 assert [dispatch.mode for dispatch in result.dispatches] == [ - "chain", + "standalone", "standalone", ] assert [step.status for step in result.steps] == [ @@ -493,73 +380,15 @@ def test_run_tmux_operations_dry_run_continues_after_pending_plan( ] -class TimeoutDispatchCase(t.NamedTuple): - """Case for bounded native dispatch execution.""" - - test_id: str - helper_name: str - path: t.Literal["chain", "standalone", "marked"] - expected_mode: t.Literal["chain", "standalone"] - expected_indexes: list[int] - - -def _timeout_operations( - case: TimeoutDispatchCase, - pane_id: str, -) -> list[TmuxOperation]: - """Return operations that route through the case's dispatch helper.""" - if case.path == "chain": - return [ - SetOptionOperation(option="@cc_ops_timeout_a", value="1", global_=True), - SetOptionOperation(option="@cc_ops_timeout_b", value="2", global_=True), - ] - if case.path == "standalone": - return [CapturePaneOperation(pane_id=pane_id)] - if case.path == "marked": - return [ - SplitPaneOperation(ref="child", pane_id=pane_id), - TmuxSendKeysOperation(pane_ref="child", keys="echo timeout"), - ] - raise AssertionError(case.path) - - -@pytest.mark.parametrize( - "case", - [ - TimeoutDispatchCase( - test_id="pending_chain", - helper_name="_dispatch_chain", - path="chain", - expected_mode="chain", - expected_indexes=[0, 1], - ), - TimeoutDispatchCase( - test_id="standalone_output", - helper_name="_dispatch_standalone", - path="standalone", - expected_mode="standalone", - expected_indexes=[0], - ), - TimeoutDispatchCase( - test_id="marked_split", - helper_name="_dispatch_marked_split", - path="marked", - expected_mode="chain", - expected_indexes=[0, 1], - ), - ], - ids=lambda case: case.test_id, -) def test_run_tmux_operations_dispatch_timeout( - case: TimeoutDispatchCase, mcp_server: Server, mcp_pane: Pane, monkeypatch: pytest.MonkeyPatch, ) -> None: - """A dispatch timeout returns failed per-operation results. + """A dispatch timeout returns a failed per-operation result. - The dispatch helpers are synchronous wrappers around tmux subprocesses, so - this uses monkeypatch rather than a blocking tmux command. + The dispatch helper is a synchronous wrapper around tmux, so this uses + monkeypatch rather than a blocking tmux command. """ def sleep_dispatch(*args: object, **kwargs: object) -> t.NoReturn: @@ -567,32 +396,27 @@ def sleep_dispatch(*args: object, **kwargs: object) -> t.NoReturn: msg = "dispatch should have timed out" raise AssertionError(msg) - monkeypatch.setattr(chain_tools, case.helper_name, sleep_dispatch) + monkeypatch.setattr(chain_tools, "_dispatch_standalone", sleep_dispatch) assert mcp_pane.pane_id is not None result = asyncio.run( run_tmux_operations( - operations=_timeout_operations(case, mcp_pane.pane_id), + operations=[CapturePaneOperation(pane_id=mcp_pane.pane_id)], dispatch_timeout=0.001, - transport="subprocess", socket_name=mcp_server.socket_name, ), ) assert not result.succeeded assert result.dispatch_count == 1 - assert result.dispatches[0].mode == case.expected_mode - assert result.dispatches[0].operation_indexes == case.expected_indexes + assert result.dispatches[0].mode == "standalone" + assert result.dispatches[0].operation_indexes == [0] assert result.dispatches[0].returncode is None assert result.dispatches[0].stderr == [ "tmux dispatch timed out after 0.001 seconds", ] - assert [step.status for step in result.steps] == [ - TmuxOperationStatus.FAILED, - *[TmuxOperationStatus.FAILED for _ in case.expected_indexes[1:]], - ] - assert all(step.returncode is None for step in result.steps) - assert all(step.stderr == result.dispatches[0].stderr for step in result.steps) + assert result.steps[0].status == TmuxOperationStatus.FAILED + assert result.steps[0].stderr == result.dispatches[0].stderr class TimeoutValidationCase(t.NamedTuple): @@ -654,7 +478,7 @@ class CompileErrorPathCase(t.NamedTuple): expected_error="unknown pane_ref: missing", ), CompileErrorPathCase( - test_id="pending_failure_before_compile_error", + test_id="failure_before_compile_error", operations=[ TmuxSendKeysOperation(pane_id="%999999", keys="bad", enter=False), TmuxSendKeysOperation(pane_ref="missing", keys="bad", enter=False), @@ -673,7 +497,7 @@ def test_run_tmux_operations_compile_error_paths( case: CompileErrorPathCase, mcp_session: Session, ) -> None: - """Compile errors report directly unless pending work fails first.""" + """Compile errors report directly; stop mode skips operations after them.""" result = asyncio.run( run_tmux_operations( operations=case.operations, @@ -689,10 +513,10 @@ def test_run_tmux_operations_compile_error_paths( assert result.steps[0].stderr == [case.expected_error] -def test_run_tmux_operations_marked_split_failure_skips_later_ops( +def test_run_tmux_operations_split_failure_skips_later_ops( mcp_session: Session, ) -> None: - """A failed marked split skips operations after its folded decorations.""" + """A failed split skips every later operation under stop mode.""" server = mcp_session.server result = asyncio.run( run_tmux_operations( @@ -700,25 +524,23 @@ def test_run_tmux_operations_marked_split_failure_skips_later_ops( SplitPaneOperation(ref="child", pane_id="%999999"), TmuxSendKeysOperation(pane_ref="child", keys="bad", enter=False), SetOptionOperation( - option="@cc_ops_after_marked_failure", + option="@cc_ops_after_split_failure", value="set", global_=True, ), ], - transport="subprocess", socket_name=server.socket_name, ), ) assert not result.succeeded assert result.dispatch_count == 1 - assert result.dispatches[0].operation_indexes == [0, 1] assert [step.status for step in result.steps] == [ TmuxOperationStatus.FAILED, - TmuxOperationStatus.FAILED, + TmuxOperationStatus.SKIPPED, TmuxOperationStatus.SKIPPED, ] - assert server.cmd("show-option", "-gv", "@cc_ops_after_marked_failure").stdout == [] + assert server.cmd("show-option", "-gv", "@cc_ops_after_split_failure").stdout == [] class RollbackCase(t.NamedTuple): From f97abcaf47b7ce64b2d8ef44637efbb540c56601 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 21 Jun 2026 08:25:34 -0500 Subject: [PATCH 20/25] Chain(feat[tools]): Type each operation result by kind why: the operation input was a discriminated union but the result was a flat model whose stdout, stderr, returncode, and created_pane_id were all optional, so a caller had to know out of band which fields each kind populated. The dispatch records (rendered argv, counts, mode) sat on the primary result and leaked the compiler's mechanism onto every response. what: - Return one typed result per operation, discriminated by kind: capture_pane carries lines, split_pane carries pane_id, and the rest carry status only, with an error message on failure - Move the per-dispatch records behind an explain flag, returned under diagnostics, and shrink each record to one operation's argv and output - Update the tool docs, CHANGES, and the autodoc model list for the new result models, and rework the tests around the typed steps --- CHANGES | 20 +-- docs/conf.py | 5 +- docs/tools/chain/run-tmux-operations.md | 12 +- src/libtmux_mcp/models.py | 84 ++++++++--- src/libtmux_mcp/tools/chain_tools.py | 185 +++++++++++++++--------- tests/test_chain_tools.py | 125 ++++++++-------- 6 files changed, 264 insertions(+), 167 deletions(-) diff --git a/CHANGES b/CHANGES index 6ce3e54..ec3746d 100644 --- a/CHANGES +++ b/CHANGES @@ -11,15 +11,17 @@ _Notes on upcoming releases will be added here_ **Typed tmux operation chains with {tooliconl}`run-tmux-operations`** {tooliconl}`run-tmux-operations` accepts an ordered list of typed tmux -operations and runs each one over a persistent `tmux -C` control connection, so -every operation keeps its own stdout and return code. With `on_error="stop"` -(the default) it stops before the next operation once one fails or its target -cannot be resolved, marking the rest skipped; with `on_error="continue"` it -records each failure and runs the rest. It returns concrete pane IDs captured -from referenced splits so later operations can target them through `pane_ref`, -supports a dry-run mode that returns the rendered dispatch plan without touching -tmux, applies a per-dispatch timeout, and can roll back panes created by typed -split refs when a later operation fails. It keeps +operations and runs each one over a persistent `tmux -C` control connection. It +returns one typed result per operation, discriminated by `kind`: `capture_pane` +returns its `lines`, `split_pane` returns the new `pane_id`, and the rest return +status only. With `on_error="stop"` (the default) it stops before the next +operation once one fails or its target cannot be resolved, marking the rest +skipped; with `on_error="continue"` it records each failure and runs the rest. +It returns concrete pane IDs captured from referenced splits so later operations +can target them through `pane_ref`, supports a dry-run mode that returns the +planned steps without touching tmux, applies a per-dispatch timeout, and can +roll back panes created by typed split refs when a later operation fails. Pass +`explain=true` to attach per-dispatch diagnostics under `diagnostics`. It keeps {tooliconl}`call-mutating-tools-batch` available for workflows that need to call arbitrary MCP tools instead of this tool's typed operation set. diff --git a/docs/conf.py b/docs/conf.py index 100e3b7..f550a9b 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -169,8 +169,11 @@ def _patched_tool_collector_tool(self: ToolCollector, **kwargs: t.Any) -> t.Any: "SelectLayoutOperation", "SetOptionOperation", "CapturePaneOperation", - "TmuxOperationStepResult", + "SplitPaneStepResult", + "CapturePaneStepResult", + "OperationStepResult", "TmuxOperationDispatchResult", + "RunTmuxDiagnostics", "RunTmuxOperationsResult", "ToolCallOperation", "ToolCallOperationResult", diff --git a/docs/tools/chain/run-tmux-operations.md b/docs/tools/chain/run-tmux-operations.md index 8a70a12..3f2ff91 100644 --- a/docs/tools/chain/run-tmux-operations.md +++ b/docs/tools/chain/run-tmux-operations.md @@ -11,9 +11,15 @@ one persistent tmux control connection, with a typed result per step. when a workflow has only one step. **Execution:** Each operation is dispatched on its own over a persistent -`tmux -C` control connection, so every operation keeps its own stdout -and return code. A `split_pane` with a `ref` returns the new pane ID in -`created_panes`, and later operations can target it through `pane_ref`. +`tmux -C` control connection, so every operation keeps its own result. A +`split_pane` with a `ref` returns the new pane ID in `created_panes`, and +later operations can target it through `pane_ref`. + +**Results:** `steps` carries one typed result per operation, discriminated +by `kind`: `capture_pane` returns its `lines`, `split_pane` returns the +new `pane_id`, and the rest return status only. Each step also carries an +`error` message when it fails. Pass `explain` to attach per-dispatch +diagnostics (rendered argv and raw stdout/stderr) under `diagnostics`. **Side effects:** Mutates tmux state according to the submitted operation list. With `on_error="stop"` (the default), the tool stops diff --git a/src/libtmux_mcp/models.py b/src/libtmux_mcp/models.py index 82b47ac..5f657e4 100644 --- a/src/libtmux_mcp/models.py +++ b/src/libtmux_mcp/models.py @@ -832,58 +832,92 @@ def _validate_target(self) -> CapturePaneOperation: ] -class TmuxOperationStepResult(BaseModel): - """Result for one typed operation.""" +class SplitPaneStepResult(BaseModel): + """Result for one ``split_pane`` operation.""" + kind: t.Literal["split_pane"] = Field( + default="split_pane", + description="Operation kind discriminator.", + ) index: int = Field(description="Zero-based operation index.") - kind: str = Field(description="Operation kind.") status: TmuxOperationStatus = Field(description="Execution status.") - returncode: int | None = Field( + pane_id: str | None = Field( default=None, - description="tmux return code when the operation was dispatched.", + description="Concrete pane ID created by a ref-producing split, if any.", ) - stdout: list[str] | None = Field( + error: str | None = Field( default=None, - description="stdout lines for standalone/output operations.", + description="Failure message when the operation failed.", ) - stderr: list[str] | None = Field( + + +class CapturePaneStepResult(BaseModel): + """Result for one ``capture_pane`` operation.""" + + kind: t.Literal["capture_pane"] = Field( + default="capture_pane", + description="Operation kind discriminator.", + ) + index: int = Field(description="Zero-based operation index.") + status: TmuxOperationStatus = Field(description="Execution status.") + lines: list[str] | None = Field( default=None, - description="stderr lines for failed or standalone operations.", + description="Captured pane lines on success.", ) - created_pane_id: str | None = Field( + error: str | None = Field( default=None, - description="Pane ID captured from a split_pane operation with ref.", + description="Failure message when the operation failed.", ) -class TmuxOperationDispatchResult(BaseModel): - """Result for one native tmux dispatch.""" +class OperationStepResult(BaseModel): + """Result for an operation that returns status only.""" - mode: t.Literal["chain", "standalone"] = Field( - description="Whether the dispatch used a tmux sequence or one command.", + kind: t.Literal["send_keys", "resize_pane", "select_layout", "set_option"] = Field( + description="Operation kind discriminator.", ) - operation_indexes: list[int] = Field( - description="Operation indexes included in this dispatch.", + index: int = Field(description="Zero-based operation index.") + status: TmuxOperationStatus = Field(description="Execution status.") + error: str | None = Field( + default=None, + description="Failure message when the operation failed.", ) + + +TmuxStepResult: t.TypeAlias = t.Annotated[ + SplitPaneStepResult | CapturePaneStepResult | OperationStepResult, + Field(discriminator="kind"), +] + + +class TmuxOperationDispatchResult(BaseModel): + """Diagnostics for one native tmux dispatch.""" + + index: int = Field(description="Operation index this dispatch ran.") argv: list[str] = Field(description="Rendered tmux argv.") returncode: int | None = Field(description="tmux process exit code, if run.") stdout: list[str] = Field(default_factory=list, description="stdout lines.") stderr: list[str] = Field(default_factory=list, description="stderr lines.") +class RunTmuxDiagnostics(BaseModel): + """Dispatch diagnostics returned only when ``explain`` is set.""" + + dispatch_count: int = Field(description="Number of native tmux dispatches.") + dispatches: list[TmuxOperationDispatchResult] = Field( + description="Native tmux dispatches used to run the operations.", + ) + + class RunTmuxOperationsResult(BaseModel): - """Result of compiling and running typed tmux operations.""" + """Result of running typed tmux operations.""" succeeded: bool = Field(description="False when any operation failed or skipped.") dry_run: bool = Field( default=False, description="True when dispatches were planned but not executed.", ) - dispatch_count: int = Field(description="Number of native tmux dispatches.") - dispatches: list[TmuxOperationDispatchResult] = Field( - description="Native tmux dispatches used by the compiler.", - ) - steps: list[TmuxOperationStepResult] = Field( + steps: list[TmuxStepResult] = Field( description="Per-operation results in input order.", ) created_panes: dict[str, str] = Field( @@ -898,3 +932,7 @@ class RunTmuxOperationsResult(BaseModel): default_factory=list, description="Errors raised while rolling back created panes.", ) + diagnostics: RunTmuxDiagnostics | None = Field( + default=None, + description="Dispatch diagnostics, present only when explain is set.", + ) diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index 1914c5d..db4e8cc 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -26,16 +26,20 @@ ) from libtmux_mcp.models import ( CapturePaneOperation, + CapturePaneStepResult, + OperationStepResult, ResizePaneOperation, + RunTmuxDiagnostics, RunTmuxOperationsResult, SelectLayoutOperation, SetOptionOperation, SplitPaneOperation, + SplitPaneStepResult, TmuxOperation, TmuxOperationDispatchResult, TmuxOperationStatus, - TmuxOperationStepResult, TmuxSendKeysOperation, + TmuxStepResult, ) if t.TYPE_CHECKING: @@ -58,6 +62,18 @@ class _CompileError(Exception): """Operation-level compile failure that should become a step result.""" +@dataclasses.dataclass +class _Outcome: + """Internal per-operation outcome before shaping into a typed result.""" + + index: int + kind: str + status: TmuxOperationStatus + stdout: list[str] = dataclasses.field(default_factory=list) + stderr: list[str] = dataclasses.field(default_factory=list) + created_pane_id: str | None = None + + @dataclasses.dataclass class _CombinedResult: """A ``CommandResultLike`` merging several control-mode command results.""" @@ -285,8 +301,8 @@ def _dispatch_standalone( calls: tuple[CommandCall, ...], *, capture_created_pane: bool, -) -> tuple[TmuxOperationDispatchResult, TmuxOperationStepResult, str | None]: - """Run one operation and return dispatch, step, and captured pane id.""" +) -> tuple[TmuxOperationDispatchResult, _Outcome, str | None]: + """Run one operation and return dispatch, outcome, and captured pane id.""" argv, result = _run_calls(runner, calls) stdout = list(result.stdout) stderr = list(result.stderr) @@ -303,18 +319,16 @@ def _dispatch_standalone( return ( TmuxOperationDispatchResult( - mode="standalone", - operation_indexes=[index], + index=index, argv=argv, returncode=result.returncode, stdout=stdout, stderr=stderr, ), - TmuxOperationStepResult( + _Outcome( index=index, kind=kind, status=status, - returncode=result.returncode, stdout=stdout, stderr=stderr, created_pane_id=created_pane_id, @@ -328,36 +342,26 @@ def _planned_pane_ref(ref: str) -> str: return f"" -def _planned_step( - index: int, - kind: str, - created_pane_id: str | None = None, -) -> TmuxOperationStepResult: - """Return a planned step result for dry-run compilation.""" - return TmuxOperationStepResult( - index=index, - kind=kind, - status=TmuxOperationStatus.PLANNED, - created_pane_id=created_pane_id, - ) - - def _plan_standalone( index: int, kind: str, calls: tuple[CommandCall, ...], *, created_pane_id: str | None = None, -) -> tuple[TmuxOperationDispatchResult, TmuxOperationStepResult, str | None]: +) -> tuple[TmuxOperationDispatchResult, _Outcome, str | None]: """Return the dry-run shape for one operation dispatch.""" return ( TmuxOperationDispatchResult( - mode="standalone", - operation_indexes=[index], + index=index, argv=_calls_argv(calls), returncode=None, ), - _planned_step(index, kind, created_pane_id), + _Outcome( + index=index, + kind=kind, + status=TmuxOperationStatus.PLANNED, + created_pane_id=created_pane_id, + ), created_pane_id, ) @@ -367,37 +371,27 @@ def _timeout_stderr(dispatch_timeout: float) -> list[str]: return [f"tmux dispatch timed out after {dispatch_timeout:g} seconds"] -def _timeout_step( - index: int, - kind: str, - stderr: list[str], -) -> TmuxOperationStepResult: - """Return a failed step for a dispatch timeout.""" - return TmuxOperationStepResult( - index=index, - kind=kind, - status=TmuxOperationStatus.FAILED, - stderr=stderr, - ) - - def _timeout_standalone( index: int, kind: str, calls: tuple[CommandCall, ...], dispatch_timeout: float, -) -> tuple[TmuxOperationDispatchResult, TmuxOperationStepResult, str | None]: +) -> tuple[TmuxOperationDispatchResult, _Outcome, str | None]: """Return timeout results for one operation dispatch.""" stderr = _timeout_stderr(dispatch_timeout) return ( TmuxOperationDispatchResult( - mode="standalone", - operation_indexes=[index], + index=index, argv=_calls_argv(calls), returncode=None, stderr=stderr, ), - _timeout_step(index, kind, stderr), + _Outcome( + index=index, + kind=kind, + status=TmuxOperationStatus.FAILED, + stderr=stderr, + ), None, ) @@ -419,13 +413,13 @@ def _rollback_created_panes( return rolled_back_panes, rollback_errors -def _compile_failure_step( +def _compile_failure_outcome( index: int, operation: TmuxOperation, error: Exception, -) -> TmuxOperationStepResult: - """Convert a compile failure into a step result.""" - return TmuxOperationStepResult( +) -> _Outcome: + """Convert a compile failure into an outcome.""" + return _Outcome( index=index, kind=operation.kind, status=TmuxOperationStatus.FAILED, @@ -433,19 +427,51 @@ def _compile_failure_step( ) -def _skipped_step(index: int, operation: TmuxOperation) -> TmuxOperationStepResult: - """Return a skipped result for an operation after stop-on-error.""" - return TmuxOperationStepResult( +def _skipped_outcome(index: int, operation: TmuxOperation) -> _Outcome: + """Return a skipped outcome for an operation after stop-on-error.""" + return _Outcome( index=index, kind=operation.kind, status=TmuxOperationStatus.SKIPPED, ) -def _step_succeeded(step: TmuxOperationStepResult, *, dry_run: bool) -> bool: - """Return whether a step should allow later operations to continue.""" - return step.status == TmuxOperationStatus.SUCCEEDED or ( - dry_run and step.status == TmuxOperationStatus.PLANNED +def _outcome_succeeded(outcome: _Outcome, *, dry_run: bool) -> bool: + """Return whether an outcome should allow later operations to continue.""" + return outcome.status == TmuxOperationStatus.SUCCEEDED or ( + dry_run and outcome.status == TmuxOperationStatus.PLANNED + ) + + +def _to_step_result(outcome: _Outcome) -> TmuxStepResult: + """Shape an internal outcome into the typed, per-kind step result.""" + error = "\n".join(outcome.stderr) if outcome.stderr else None + if outcome.kind == "split_pane": + return SplitPaneStepResult( + index=outcome.index, + status=outcome.status, + pane_id=outcome.created_pane_id, + error=error, + ) + if outcome.kind == "capture_pane": + lines = ( + outcome.stdout if outcome.status == TmuxOperationStatus.SUCCEEDED else None + ) + return CapturePaneStepResult( + index=outcome.index, + status=outcome.status, + lines=lines, + error=error, + ) + status_kind = t.cast( + "t.Literal['send_keys', 'resize_pane', 'select_layout', 'set_option']", + outcome.kind, + ) + return OperationStepResult( + kind=status_kind, + index=outcome.index, + status=outcome.status, + error=error, ) @@ -456,20 +482,27 @@ async def run_tmux_operations( dry_run: bool = False, dispatch_timeout: float | None = 10.0, rollback_on_error: bool = False, + explain: bool = False, socket_name: str | None = None, ) -> RunTmuxOperationsResult: """Run typed tmux operations, one dispatch per operation. Each operation is dispatched on its own over a persistent ``tmux -C`` control connection, so every operation keeps its own stdout and return - code. ``on_error="stop"`` (the default) stops before the next operation - once one fails or its target cannot be resolved, marking the rest as - skipped; ``on_error="continue"`` records each failure and runs the rest. - ``dry_run`` returns the rendered dispatch plan without touching tmux. + code. The result carries one typed, per-kind ``steps`` entry per + operation: ``capture_pane`` returns ``lines``, ``split_pane`` returns + ``pane_id``, and the rest return status only. + + ``on_error="stop"`` (the default) stops before the next operation once one + fails or its target cannot be resolved, marking the rest as skipped; + ``on_error="continue"`` records each failure and runs the rest. + ``dry_run`` returns the planned steps without touching tmux. ``dispatch_timeout`` bounds how long the tool waits for one native tmux dispatch; timed-out work may still finish in the background. ``rollback_on_error`` kills panes created by ref-producing ``split_pane`` operations when the overall operation list fails. + ``explain`` attaches per-dispatch diagnostics (rendered argv and raw + stdout/stderr) under ``diagnostics``. """ validated = TMUX_OPERATIONS_ADAPTER.validate_python(operations) if not validated: @@ -487,7 +520,7 @@ async def run_tmux_operations( runner = ControlModeRunner(_get_server(socket_name=socket_name)) try: dispatches: list[TmuxOperationDispatchResult] = [] - steps_by_index: dict[int, TmuxOperationStepResult] = {} + outcomes_by_index: dict[int, _Outcome] = {} created_panes: dict[str, str] = {} created_pane_order: list[str] = [] @@ -498,7 +531,7 @@ def record_created_pane(ref: str, pane_id: str) -> None: def skip_rest(start: int) -> None: for skip_index, skipped in enumerate(validated[start:], start=start): - steps_by_index[skip_index] = _skipped_step(skip_index, skipped) + outcomes_by_index[skip_index] = _skipped_outcome(skip_index, skipped) index = 0 while index < len(validated): @@ -506,7 +539,9 @@ def skip_rest(start: int) -> None: try: calls = _operation_calls(operation, created_panes) except _CompileError as exc: - steps_by_index[index] = _compile_failure_step(index, operation, exc) + outcomes_by_index[index] = _compile_failure_outcome( + index, operation, exc + ) if on_error == "stop": skip_rest(index + 1) break @@ -523,7 +558,7 @@ def skip_rest(start: int) -> None: and operation.ref is not None else None ) - dispatch, step, created_pane_id = _plan_standalone( + dispatch, outcome, created_pane_id = _plan_standalone( index, operation.kind, calls, @@ -541,33 +576,35 @@ def skip_rest(start: int) -> None: capture_created_pane=capture_created_pane, ) if dispatch_timeout is None: - dispatch, step, created_pane_id = await dispatch_coro + dispatch, outcome, created_pane_id = await dispatch_coro else: - dispatch, step, created_pane_id = await asyncio.wait_for( + dispatch, outcome, created_pane_id = await asyncio.wait_for( dispatch_coro, timeout=dispatch_timeout, ) except TimeoutError: assert dispatch_timeout is not None - dispatch, step, created_pane_id = _timeout_standalone( + dispatch, outcome, created_pane_id = _timeout_standalone( index, operation.kind, calls, dispatch_timeout, ) dispatches.append(dispatch) - steps_by_index[index] = step + outcomes_by_index[index] = outcome if capture_created_pane and created_pane_id is not None: assert isinstance(operation, SplitPaneOperation) assert operation.ref is not None record_created_pane(operation.ref, created_pane_id) - if not _step_succeeded(step, dry_run=dry_run) and on_error == "stop": + if not _outcome_succeeded(outcome, dry_run=dry_run) and on_error == "stop": skip_rest(index + 1) break index += 1 - steps = [steps_by_index[index] for index in range(len(validated))] - succeeded = all(_step_succeeded(step, dry_run=dry_run) for step in steps) + outcomes = [outcomes_by_index[index] for index in range(len(validated))] + succeeded = all( + _outcome_succeeded(outcome, dry_run=dry_run) for outcome in outcomes + ) rolled_back_panes: list[str] = [] rollback_errors: list[str] = [] if rollback_on_error and not dry_run and not succeeded and created_pane_order: @@ -577,15 +614,19 @@ def skip_rest(start: int) -> None: runner, created_pane_order, ) + diagnostics = ( + RunTmuxDiagnostics(dispatch_count=len(dispatches), dispatches=dispatches) + if explain + else None + ) return RunTmuxOperationsResult( succeeded=succeeded, dry_run=dry_run, - dispatch_count=len(dispatches), - dispatches=dispatches, - steps=steps, + steps=[_to_step_result(outcome) for outcome in outcomes], created_panes=created_panes, rolled_back_panes=rolled_back_panes, rollback_errors=rollback_errors, + diagnostics=diagnostics, ) finally: if runner is not None: diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index 0f902a1..13281a3 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -13,9 +13,11 @@ from libtmux_mcp._utils import ExpectedToolError from libtmux_mcp.models import ( CapturePaneOperation, + CapturePaneStepResult, RunTmuxOperationsResult, SetOptionOperation, SplitPaneOperation, + SplitPaneStepResult, TmuxOperation, TmuxOperationStatus, TmuxSendKeysOperation, @@ -34,10 +36,10 @@ from libtmux.session import Session -def test_run_tmux_operations_dispatches_each_op_standalone( +def test_run_tmux_operations_runs_each_operation( mcp_session: Session, ) -> None: - """Each operation runs as its own control-mode dispatch.""" + """Each operation runs and reports its own typed status.""" server = mcp_session.server result = asyncio.run( run_tmux_operations( @@ -50,24 +52,45 @@ def test_run_tmux_operations_dispatches_each_op_standalone( ) assert result.succeeded - assert result.dispatch_count == 2 - assert [dispatch.mode for dispatch in result.dispatches] == [ - "standalone", - "standalone", - ] assert [step.status for step in result.steps] == [ TmuxOperationStatus.SUCCEEDED, TmuxOperationStatus.SUCCEEDED, ] + assert result.diagnostics is None assert server.cmd("show-option", "-gv", "@cc_ops_a").stdout == ["1"] assert server.cmd("show-option", "-gv", "@cc_ops_b").stdout == ["2"] -def test_run_tmux_operations_capture_returns_stdout( +def test_run_tmux_operations_explain_attaches_diagnostics( + mcp_session: Session, +) -> None: + """``explain`` attaches one per-operation dispatch record.""" + server = mcp_session.server + result = asyncio.run( + run_tmux_operations( + operations=[ + SetOptionOperation(option="@cc_ops_x", value="1", global_=True), + SetOptionOperation(option="@cc_ops_y", value="2", global_=True), + ], + explain=True, + socket_name=server.socket_name, + ), + ) + + assert result.succeeded + assert result.diagnostics is not None + assert result.diagnostics.dispatch_count == 2 + assert [dispatch.index for dispatch in result.diagnostics.dispatches] == [0, 1] + assert all( + dispatch.argv[0] == "set-option" for dispatch in result.diagnostics.dispatches + ) + + +def test_run_tmux_operations_capture_returns_lines( mcp_server: Server, mcp_pane: Pane, ) -> None: - """A read operation returns its own stdout on its own step.""" + """A read operation returns its own captured lines on its own step.""" from libtmux_mcp.tools.wait_for_tools import wait_for_channel channel = "cc_ops_capture" @@ -91,13 +114,10 @@ def test_run_tmux_operations_capture_returns_stdout( ) assert result.succeeded - assert result.dispatch_count == 2 - assert [dispatch.mode for dispatch in result.dispatches] == [ - "standalone", - "standalone", - ] - assert result.steps[1].stdout is not None - assert "CC_OPS_CAPTURE" in "\n".join(result.steps[1].stdout) + capture = result.steps[1] + assert isinstance(capture, CapturePaneStepResult) + assert capture.lines is not None + assert "CC_OPS_CAPTURE" in "\n".join(capture.lines) def test_run_tmux_operations_captures_split_refs( @@ -120,11 +140,11 @@ def test_run_tmux_operations_captures_split_refs( ) assert result.succeeded - assert result.dispatch_count == 2 - assert result.dispatches[0].operation_indexes == [0] - assert result.dispatches[1].operation_indexes == [1] + split = result.steps[0] + assert isinstance(split, SplitPaneStepResult) new_pane_id = result.created_panes["child"] assert new_pane_id.startswith("%") + assert split.pane_id == new_pane_id asyncio.run( wait_for_channel(channel, timeout=5.0, socket_name=mcp_server.socket_name) @@ -156,7 +176,6 @@ def test_run_tmux_operations_continue_runs_later_ops( ) assert not result.succeeded - assert result.dispatch_count == 2 assert [step.status for step in result.steps] == [ TmuxOperationStatus.FAILED, TmuxOperationStatus.SUCCEEDED, @@ -181,14 +200,13 @@ def test_run_tmux_operations_stop_halts_after_failure( ) assert not result.succeeded - assert result.dispatch_count == 2 assert [step.status for step in result.steps] == [ TmuxOperationStatus.SUCCEEDED, TmuxOperationStatus.FAILED, TmuxOperationStatus.SKIPPED, ] - assert result.steps[1].stderr is not None - assert "%999999" in "\n".join(result.steps[1].stderr) + assert result.steps[1].error is not None + assert "%999999" in result.steps[1].error # The first op ran; the op after the failure never dispatched. assert server.cmd("show-option", "-gv", "@cc_ops_cm_a").stdout == ["1"] assert server.cmd("show-option", "-gv", "@cc_ops_cm_b").stdout == [] @@ -269,21 +287,23 @@ def fail_scope(command_name: str, target_scope: str) -> None: global_=True, ), ], + explain=True, socket_name=mcp_session.server.socket_name, ), ) assert not result.succeeded - assert result.dispatch_count == 0 + assert result.diagnostics is not None + assert result.diagnostics.dispatch_count == 0 assert result.steps[0].status == TmuxOperationStatus.FAILED - assert result.steps[0].stderr is not None - assert "wrong scope from test" in result.steps[0].stderr[0] + assert result.steps[0].error is not None + assert "wrong scope from test" in result.steps[0].error def test_run_tmux_operations_dry_run_plans_without_mutating( mcp_session: Session, ) -> None: - """Dry-run returns planned dispatches without changing tmux state.""" + """Dry-run returns planned steps without changing tmux state.""" server = mcp_session.server result = asyncio.run( run_tmux_operations( @@ -292,23 +312,22 @@ def test_run_tmux_operations_dry_run_plans_without_mutating( SetOptionOperation(option="@cc_ops_dry_b", value="2", global_=True), ], dry_run=True, + explain=True, socket_name=server.socket_name, ), ) assert result.succeeded assert result.dry_run - assert result.dispatch_count == 2 - assert [dispatch.mode for dispatch in result.dispatches] == [ - "standalone", - "standalone", - ] - assert all(dispatch.returncode is None for dispatch in result.dispatches) + assert result.diagnostics is not None + assert result.diagnostics.dispatch_count == 2 + assert all( + dispatch.returncode is None for dispatch in result.diagnostics.dispatches + ) assert [step.status for step in result.steps] == [ TmuxOperationStatus.PLANNED, TmuxOperationStatus.PLANNED, ] - assert all(step.returncode is None for step in result.steps) for option in ("@cc_ops_dry_a", "@cc_ops_dry_b"): assert server.cmd("show-option", "-gv", option).stdout == [] @@ -335,13 +354,11 @@ def test_run_tmux_operations_dry_run_plans_split_ref( placeholder = "" assert result.succeeded assert result.dry_run - assert result.dispatch_count == 2 - assert result.dispatches[0].operation_indexes == [0] - assert result.dispatches[1].operation_indexes == [1] - assert all(dispatch.returncode is None for dispatch in result.dispatches) assert result.created_panes == {"child": placeholder} - assert result.steps[0].status == TmuxOperationStatus.PLANNED - assert result.steps[0].created_pane_id == placeholder + split = result.steps[0] + assert isinstance(split, SplitPaneStepResult) + assert split.status == TmuxOperationStatus.PLANNED + assert split.pane_id == placeholder assert result.steps[1].status == TmuxOperationStatus.PLANNED mcp_pane.window.refresh() @@ -352,7 +369,7 @@ def test_run_tmux_operations_dry_run_plans_output_ops( mcp_server: Server, mcp_pane: Pane, ) -> None: - """Dry-run plans read operations as planned standalone dispatches.""" + """Dry-run plans read operations as planned steps.""" result = asyncio.run( run_tmux_operations( operations=[ @@ -369,11 +386,6 @@ def test_run_tmux_operations_dry_run_plans_output_ops( ) assert result.succeeded - assert result.dispatch_count == 2 - assert [dispatch.mode for dispatch in result.dispatches] == [ - "standalone", - "standalone", - ] assert [step.status for step in result.steps] == [ TmuxOperationStatus.PLANNED, TmuxOperationStatus.PLANNED, @@ -403,20 +415,21 @@ def sleep_dispatch(*args: object, **kwargs: object) -> t.NoReturn: run_tmux_operations( operations=[CapturePaneOperation(pane_id=mcp_pane.pane_id)], dispatch_timeout=0.001, + explain=True, socket_name=mcp_server.socket_name, ), ) assert not result.succeeded - assert result.dispatch_count == 1 - assert result.dispatches[0].mode == "standalone" - assert result.dispatches[0].operation_indexes == [0] - assert result.dispatches[0].returncode is None - assert result.dispatches[0].stderr == [ + assert result.diagnostics is not None + assert result.diagnostics.dispatch_count == 1 + assert result.diagnostics.dispatches[0].index == 0 + assert result.diagnostics.dispatches[0].returncode is None + assert result.diagnostics.dispatches[0].stderr == [ "tmux dispatch timed out after 0.001 seconds", ] assert result.steps[0].status == TmuxOperationStatus.FAILED - assert result.steps[0].stderr == result.dispatches[0].stderr + assert result.steps[0].error == "tmux dispatch timed out after 0.001 seconds" class TimeoutValidationCase(t.NamedTuple): @@ -460,7 +473,6 @@ class CompileErrorPathCase(t.NamedTuple): test_id: str operations: list[TmuxOperation] - expected_dispatch_count: int expected_statuses: list[TmuxOperationStatus] expected_error: str | None @@ -473,7 +485,6 @@ class CompileErrorPathCase(t.NamedTuple): operations=[ TmuxSendKeysOperation(pane_ref="missing", keys="bad", enter=False), ], - expected_dispatch_count=0, expected_statuses=[TmuxOperationStatus.FAILED], expected_error="unknown pane_ref: missing", ), @@ -483,7 +494,6 @@ class CompileErrorPathCase(t.NamedTuple): TmuxSendKeysOperation(pane_id="%999999", keys="bad", enter=False), TmuxSendKeysOperation(pane_ref="missing", keys="bad", enter=False), ], - expected_dispatch_count=1, expected_statuses=[ TmuxOperationStatus.FAILED, TmuxOperationStatus.SKIPPED, @@ -506,11 +516,9 @@ def test_run_tmux_operations_compile_error_paths( ) assert not result.succeeded - assert result.dispatch_count == case.expected_dispatch_count assert [step.status for step in result.steps] == case.expected_statuses if case.expected_error is not None: - assert result.steps[0].stderr is not None - assert result.steps[0].stderr == [case.expected_error] + assert result.steps[0].error == case.expected_error def test_run_tmux_operations_split_failure_skips_later_ops( @@ -534,7 +542,6 @@ def test_run_tmux_operations_split_failure_skips_later_ops( ) assert not result.succeeded - assert result.dispatch_count == 1 assert [step.status for step in result.steps] == [ TmuxOperationStatus.FAILED, TmuxOperationStatus.SKIPPED, From 5ad5e56095764cb24f12b3d85e5d5c3d051b3e93 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 21 Jun 2026 08:35:00 -0500 Subject: [PATCH 21/25] Chain(feat[tools]): Take a typed pane target why: every pane operation carried pane_id and pane_ref as two nullable fields guarded by a model validator that enforced exactly one of them, repeated across four operations. Two nullable fields plus a validator is easy for a caller to get wrong and gives the schema no single, self-describing place for the target. what: - Replace the pane_id/pane_ref pair with one discriminated target union (PaneIdTarget for a concrete pane, RefTarget for a name minted by an earlier split), removing the four repeated validators - Resolve a ref target against the panes created earlier in the same list - Keep targets to the two that resolve unambiguously over a detached control connection; relative and active targets depend on a client's current pane, which a control connection does not track reliably - Update the tool docs, CHANGES, autodoc model list, and tests --- CHANGES | 12 +-- docs/conf.py | 2 + docs/tools/chain/run-tmux-operations.md | 12 ++- src/libtmux_mcp/models.py | 74 ++++++++++--------- src/libtmux_mcp/tools/chain_tools.py | 37 +++++----- tests/test_chain_tools.py | 97 ++++++++++++++++++++----- 6 files changed, 155 insertions(+), 79 deletions(-) diff --git a/CHANGES b/CHANGES index ec3746d..b038b57 100644 --- a/CHANGES +++ b/CHANGES @@ -17,11 +17,13 @@ returns its `lines`, `split_pane` returns the new `pane_id`, and the rest return status only. With `on_error="stop"` (the default) it stops before the next operation once one fails or its target cannot be resolved, marking the rest skipped; with `on_error="continue"` it records each failure and runs the rest. -It returns concrete pane IDs captured from referenced splits so later operations -can target them through `pane_ref`, supports a dry-run mode that returns the -planned steps without touching tmux, applies a per-dispatch timeout, and can -roll back panes created by typed split refs when a later operation fails. Pass -`explain=true` to attach per-dispatch diagnostics under `diagnostics`. It keeps +Each pane operation takes one typed `target`, discriminated by `kind`: a +concrete `pane_id` or a `ref` minted by an earlier split. It +returns concrete pane IDs captured from referenced splits so later operations +can target them, supports a dry-run mode that returns the planned steps without +touching tmux, applies a per-dispatch timeout, and can roll back panes created +by typed split refs when a later operation fails. Pass `explain=true` to attach +per-dispatch diagnostics under `diagnostics`. It keeps {tooliconl}`call-mutating-tools-batch` available for workflows that need to call arbitrary MCP tools instead of this tool's typed operation set. diff --git a/docs/conf.py b/docs/conf.py index f550a9b..34affd2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -163,6 +163,8 @@ def _patched_tool_collector_tool(self: ToolCollector, **kwargs: t.Any) -> t.Any: "SendKeysOperation", "SendKeysOperationResult", "SendKeysBatchResult", + "PaneIdTarget", + "RefTarget", "SplitPaneOperation", "TmuxSendKeysOperation", "ResizePaneOperation", diff --git a/docs/tools/chain/run-tmux-operations.md b/docs/tools/chain/run-tmux-operations.md index 3f2ff91..bfb84a9 100644 --- a/docs/tools/chain/run-tmux-operations.md +++ b/docs/tools/chain/run-tmux-operations.md @@ -13,7 +13,11 @@ when a workflow has only one step. **Execution:** Each operation is dispatched on its own over a persistent `tmux -C` control connection, so every operation keeps its own result. A `split_pane` with a `ref` returns the new pane ID in `created_panes`, and -later operations can target it through `pane_ref`. +later operations can target it with a `ref` target. + +**Targets:** Each pane operation takes one typed `target`, discriminated by +`kind`: `pane_id` (a concrete `%id`) or `ref` (a name minted by an earlier +`split_pane`). **Results:** `steps` carries one typed result per operation, discriminated by `kind`: `capture_pane` returns its `lines`, `split_pane` returns the @@ -50,8 +54,10 @@ fails. The result still reports `created_panes`, and adds "tool": "run_tmux_operations", "arguments": { "operations": [ - {"kind": "split_pane", "pane_id": "%1", "ref": "work"}, - {"kind": "send_keys", "pane_ref": "work", "keys": "uv run pytest"} + {"kind": "split_pane", "target": {"kind": "pane_id", "pane_id": "%1"}, + "ref": "work"}, + {"kind": "send_keys", "target": {"kind": "ref", "ref": "work"}, + "keys": "uv run pytest"} ], "on_error": "stop" } diff --git a/src/libtmux_mcp/models.py b/src/libtmux_mcp/models.py index 5f657e4..73016b2 100644 --- a/src/libtmux_mcp/models.py +++ b/src/libtmux_mcp/models.py @@ -658,13 +658,6 @@ class ContentChangeResult(BaseModel): elapsed_seconds: float = Field(description="Time spent waiting in seconds") -def _require_single_pane_target(pane_id: str | None, pane_ref: str | None) -> None: - """Validate exactly one concrete pane target or prior split reference.""" - if (pane_id is None) == (pane_ref is None): - msg = "Provide exactly one of pane_id or pane_ref." - raise ValueError(msg) - - class TmuxOperationStatus(str, enum.Enum): """Execution status for one typed tmux operation.""" @@ -674,28 +667,48 @@ class TmuxOperationStatus(str, enum.Enum): PLANNED = "planned" -class _PaneTargetOperation(BaseModel): - """Shared target fields for operations that act on one pane.""" +class PaneIdTarget(BaseModel): + """Target a concrete pane by its tmux ID.""" model_config = ConfigDict(extra="forbid") - pane_id: str | None = Field( - default=None, - description="Concrete tmux pane ID, e.g. '%1'.", + kind: t.Literal["pane_id"] = Field( + default="pane_id", + description="Target discriminator.", ) - pane_ref: str | None = Field( - default=None, + pane_id: str = Field(description="Concrete tmux pane ID, e.g. '%1'.") + + +class RefTarget(BaseModel): + """Target a pane created earlier in the same operation list.""" + + model_config = ConfigDict(extra="forbid") + + kind: t.Literal["ref"] = Field( + default="ref", + description="Target discriminator.", + ) + ref: str = Field( description="Reference name captured from an earlier split_pane operation.", ) -class SplitPaneOperation(_PaneTargetOperation): +PaneTarget: t.TypeAlias = t.Annotated[ + PaneIdTarget | RefTarget, + Field(discriminator="kind"), +] + + +class SplitPaneOperation(BaseModel): """Split a pane and optionally expose the new pane under ``ref``.""" + model_config = ConfigDict(extra="forbid") + kind: t.Literal["split_pane"] = Field( default="split_pane", description="Operation discriminator.", ) + target: PaneTarget = Field(description="Pane to split.") ref: str | None = Field( default=None, description="Reference name for the created pane ID.", @@ -709,19 +722,17 @@ class SplitPaneOperation(_PaneTargetOperation): description="Command to run in the new pane instead of the default shell.", ) - @model_validator(mode="after") - def _validate_target(self) -> SplitPaneOperation: - _require_single_pane_target(self.pane_id, self.pane_ref) - return self +class TmuxSendKeysOperation(BaseModel): + """Send keys to a pane target.""" -class TmuxSendKeysOperation(_PaneTargetOperation): - """Send keys to a concrete pane or prior split reference.""" + model_config = ConfigDict(extra="forbid") kind: t.Literal["send_keys"] = Field( default="send_keys", description="Operation discriminator.", ) + target: PaneTarget = Field(description="Pane to send keys to.") keys: str = Field(description="Keys or text to send.") enter: bool = Field(default=True, description="Press Enter after sending keys.") literal: bool = Field( @@ -729,26 +740,23 @@ class TmuxSendKeysOperation(_PaneTargetOperation): description="Pass -l so tmux sends keys literally.", ) - @model_validator(mode="after") - def _validate_target(self) -> TmuxSendKeysOperation: - _require_single_pane_target(self.pane_id, self.pane_ref) - return self - -class ResizePaneOperation(_PaneTargetOperation): +class ResizePaneOperation(BaseModel): """Resize a pane by dimensions or zoom toggle.""" + model_config = ConfigDict(extra="forbid") + kind: t.Literal["resize_pane"] = Field( default="resize_pane", description="Operation discriminator.", ) + target: PaneTarget = Field(description="Pane to resize.") height: int | None = Field(default=None, description="New height in lines.") width: int | None = Field(default=None, description="New width in columns.") zoom: bool | None = Field(default=None, description="Toggle pane zoom.") @model_validator(mode="after") def _validate_resize(self) -> ResizePaneOperation: - _require_single_pane_target(self.pane_id, self.pane_ref) if self.zoom is not None and ( self.height is not None or self.width is not None ): @@ -805,21 +813,19 @@ def _validate_target(self) -> SetOptionOperation: return self -class CapturePaneOperation(_PaneTargetOperation): +class CapturePaneOperation(BaseModel): """Capture pane output as a standalone read operation.""" + model_config = ConfigDict(extra="forbid") + kind: t.Literal["capture_pane"] = Field( default="capture_pane", description="Operation discriminator.", ) + target: PaneTarget = Field(description="Pane to capture.") start: int | None = Field(default=None, description="Start capture line.") end: int | None = Field(default=None, description="End capture line.") - @model_validator(mode="after") - def _validate_target(self) -> CapturePaneOperation: - _require_single_pane_target(self.pane_id, self.pane_ref) - return self - TmuxOperation: t.TypeAlias = t.Annotated[ SplitPaneOperation diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index db4e8cc..e20a764 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -28,6 +28,9 @@ CapturePaneOperation, CapturePaneStepResult, OperationStepResult, + PaneIdTarget, + PaneTarget, + RefTarget, ResizePaneOperation, RunTmuxDiagnostics, RunTmuxOperationsResult, @@ -130,22 +133,20 @@ def _validate_operation_scope( raise _CompileError(str(exc)) from exc -def _target_pane( - pane_id: str | None, - pane_ref: str | None, +def _resolve_target( + target: PaneTarget, created_panes: dict[str, str], ) -> str: - """Return the concrete pane target for an operation.""" - if pane_id is not None: - return pane_id - if pane_ref is None: - msg = "operation is missing pane_id or pane_ref" - raise _CompileError(msg) - try: - return created_panes[pane_ref] - except KeyError as exc: - msg = f"unknown pane_ref: {pane_ref}" - raise _CompileError(msg) from exc + """Resolve a typed pane target to a concrete tmux target token.""" + if isinstance(target, PaneIdTarget): + return target.pane_id + if isinstance(target, RefTarget): + try: + return created_panes[target.ref] + except KeyError as exc: + msg = f"unknown ref: {target.ref}" + raise _CompileError(msg) from exc + assert_never(target) def _split_calls( @@ -168,7 +169,7 @@ def _split_calls( CommandCall( "split-window", tuple(args), - target=_target_pane(operation.pane_id, operation.pane_ref, created_panes), + target=_resolve_target(operation.target, created_panes), ), ) @@ -178,7 +179,7 @@ def _send_keys_calls( created_panes: dict[str, str], ) -> tuple[CommandCall, ...]: """Build one operation's ``send-keys`` calls.""" - target = _target_pane(operation.pane_id, operation.pane_ref, created_panes) + target = _resolve_target(operation.target, created_panes) if operation.literal: calls = [ CommandCall("send-keys", ("-l", operation.keys), target=target), @@ -209,7 +210,7 @@ def _resize_pane_calls( CommandCall( "resize-pane", tuple(args), - target=_target_pane(operation.pane_id, operation.pane_ref, created_panes), + target=_resolve_target(operation.target, created_panes), ), ) @@ -250,7 +251,7 @@ def _capture_pane_calls( CommandCall( "capture-pane", tuple(args), - target=_target_pane(operation.pane_id, operation.pane_ref, created_panes), + target=_resolve_target(operation.target, created_panes), ), ) diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index 13281a3..c2897e1 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -14,6 +14,8 @@ from libtmux_mcp.models import ( CapturePaneOperation, CapturePaneStepResult, + PaneIdTarget, + RefTarget, RunTmuxOperationsResult, SetOptionOperation, SplitPaneOperation, @@ -36,6 +38,12 @@ from libtmux.session import Session +def _pane_target(pane: Pane) -> PaneIdTarget: + """Return a typed pane-id target for a fixture pane.""" + assert pane.pane_id is not None + return PaneIdTarget(pane_id=pane.pane_id) + + def test_run_tmux_operations_runs_each_operation( mcp_session: Session, ) -> None: @@ -107,7 +115,7 @@ def test_run_tmux_operations_capture_returns_lines( value="1", global_=True, ), - CapturePaneOperation(pane_id=mcp_pane.pane_id), + CapturePaneOperation(target=_pane_target(mcp_pane)), ], socket_name=mcp_server.socket_name, ), @@ -132,8 +140,11 @@ def test_run_tmux_operations_captures_split_refs( result = asyncio.run( run_tmux_operations( operations=[ - SplitPaneOperation(ref="child", pane_id=mcp_pane.pane_id), - TmuxSendKeysOperation(pane_ref="child", keys=keys), + SplitPaneOperation( + ref="child", + target=_pane_target(mcp_pane), + ), + TmuxSendKeysOperation(target=RefTarget(ref="child"), keys=keys), ], socket_name=mcp_server.socket_name, ), @@ -163,7 +174,11 @@ def test_run_tmux_operations_continue_runs_later_ops( result = asyncio.run( run_tmux_operations( operations=[ - TmuxSendKeysOperation(pane_id="%999999", keys="bad", enter=False), + TmuxSendKeysOperation( + target=PaneIdTarget(pane_id="%999999"), + keys="bad", + enter=False, + ), SetOptionOperation( option="@cc_ops_after_error", value="set", @@ -192,7 +207,11 @@ def test_run_tmux_operations_stop_halts_after_failure( run_tmux_operations( operations=[ SetOptionOperation(option="@cc_ops_cm_a", value="1", global_=True), - TmuxSendKeysOperation(pane_id="%999999", keys="bad", enter=False), + TmuxSendKeysOperation( + target=PaneIdTarget(pane_id="%999999"), + keys="bad", + enter=False, + ), SetOptionOperation(option="@cc_ops_cm_b", value="2", global_=True), ], socket_name=server.socket_name, @@ -240,7 +259,12 @@ def test_run_tmux_operations_split_inherits_target_directory( result = asyncio.run( run_tmux_operations( - operations=[SplitPaneOperation(ref="child", pane_id=target_pane_id)], + operations=[ + SplitPaneOperation( + ref="child", + target=PaneIdTarget(pane_id=target_pane_id), + ), + ], socket_name=server.socket_name, ), ) @@ -343,8 +367,14 @@ def test_run_tmux_operations_dry_run_plans_split_ref( result = asyncio.run( run_tmux_operations( operations=[ - SplitPaneOperation(ref="child", pane_id=mcp_pane.pane_id), - TmuxSendKeysOperation(pane_ref="child", keys="printf 'DRY_RUN_REF\\n'"), + SplitPaneOperation( + ref="child", + target=_pane_target(mcp_pane), + ), + TmuxSendKeysOperation( + target=RefTarget(ref="child"), + keys="printf 'DRY_RUN_REF\\n'", + ), ], dry_run=True, socket_name=mcp_server.socket_name, @@ -378,7 +408,7 @@ def test_run_tmux_operations_dry_run_plans_output_ops( value="1", global_=True, ), - CapturePaneOperation(pane_id=mcp_pane.pane_id), + CapturePaneOperation(target=_pane_target(mcp_pane)), ], dry_run=True, socket_name=mcp_server.socket_name, @@ -413,7 +443,7 @@ def sleep_dispatch(*args: object, **kwargs: object) -> t.NoReturn: result = asyncio.run( run_tmux_operations( - operations=[CapturePaneOperation(pane_id=mcp_pane.pane_id)], + operations=[CapturePaneOperation(target=_pane_target(mcp_pane))], dispatch_timeout=0.001, explain=True, socket_name=mcp_server.socket_name, @@ -481,18 +511,30 @@ class CompileErrorPathCase(t.NamedTuple): "case", [ CompileErrorPathCase( - test_id="unknown_pane_ref", + test_id="unknown_ref", operations=[ - TmuxSendKeysOperation(pane_ref="missing", keys="bad", enter=False), + TmuxSendKeysOperation( + target=RefTarget(ref="missing"), + keys="bad", + enter=False, + ), ], expected_statuses=[TmuxOperationStatus.FAILED], - expected_error="unknown pane_ref: missing", + expected_error="unknown ref: missing", ), CompileErrorPathCase( test_id="failure_before_compile_error", operations=[ - TmuxSendKeysOperation(pane_id="%999999", keys="bad", enter=False), - TmuxSendKeysOperation(pane_ref="missing", keys="bad", enter=False), + TmuxSendKeysOperation( + target=PaneIdTarget(pane_id="%999999"), + keys="bad", + enter=False, + ), + TmuxSendKeysOperation( + target=RefTarget(ref="missing"), + keys="bad", + enter=False, + ), ], expected_statuses=[ TmuxOperationStatus.FAILED, @@ -529,8 +571,15 @@ def test_run_tmux_operations_split_failure_skips_later_ops( result = asyncio.run( run_tmux_operations( operations=[ - SplitPaneOperation(ref="child", pane_id="%999999"), - TmuxSendKeysOperation(pane_ref="child", keys="bad", enter=False), + SplitPaneOperation( + ref="child", + target=PaneIdTarget(pane_id="%999999"), + ), + TmuxSendKeysOperation( + target=RefTarget(ref="child"), + keys="bad", + enter=False, + ), SetOptionOperation( option="@cc_ops_after_split_failure", value="set", @@ -585,9 +634,12 @@ def test_run_tmux_operations_rolls_back_created_panes( result = asyncio.run( run_tmux_operations( operations=[ - SplitPaneOperation(ref="child", pane_id=mcp_pane.pane_id), + SplitPaneOperation( + ref="child", + target=_pane_target(mcp_pane), + ), TmuxSendKeysOperation( - pane_id="%999999", + target=PaneIdTarget(pane_id="%999999"), keys="bad", enter=False, ), @@ -634,6 +686,13 @@ class ValidationCase(t.NamedTuple): operations=[{"kind": "kill_server"}], expected_error=ValidationError, ), + ValidationCase( + test_id="unknown_target_kind", + operations=[ + {"kind": "send_keys", "keys": "x", "target": {"kind": "bogus"}} + ], + expected_error=ValidationError, + ), ], ids=lambda case: case.test_id, ) From 32505d791823dd19cfa94e38301bc8824330572a Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 21 Jun 2026 08:40:40 -0500 Subject: [PATCH 22/25] Chain(refactor[tools]): Rename run_tmux_operations to run_tmux_plan why: the tool reads best as a plan an agent applies, not a list of implementation operations to run, so naming it around the intent (a tmux plan) fits the call better than naming it around the mechanism. The symbol has not shipped, so it can be renamed in place with no alias. what: - Rename run_tmux_operations to run_tmux_plan and RunTmuxOperationsResult to RunTmuxPlanResult, keeping dispatch_timeout and the operations argument - Retitle the tool and rename its docs page to run-tmux-plan - Refresh the chain index and card text for per-operation control execution - Update CHANGES, the autodoc model list, and the tests --- CHANGES | 4 +- docs/conf.py | 2 +- docs/tools/chain/index.md | 16 ++++---- ...un-tmux-operations.md => run-tmux-plan.md} | 8 ++-- docs/tools/index.md | 8 ++-- src/libtmux_mcp/models.py | 2 +- src/libtmux_mcp/tools/chain_tools.py | 12 +++--- tests/test_chain_tools.py | 40 +++++++++---------- 8 files changed, 46 insertions(+), 46 deletions(-) rename docs/tools/chain/{run-tmux-operations.md => run-tmux-plan.md} (93%) diff --git a/CHANGES b/CHANGES index b038b57..eb351ae 100644 --- a/CHANGES +++ b/CHANGES @@ -8,9 +8,9 @@ _Notes on upcoming releases will be added here_ ### What's new -**Typed tmux operation chains with {tooliconl}`run-tmux-operations`** +**Typed tmux operation chains with {tooliconl}`run-tmux-plan`** -{tooliconl}`run-tmux-operations` accepts an ordered list of typed tmux +{tooliconl}`run-tmux-plan` accepts an ordered list of typed tmux operations and runs each one over a persistent `tmux -C` control connection. It returns one typed result per operation, discriminated by `kind`: `capture_pane` returns its `lines`, `split_pane` returns the new `pane_id`, and the rest return diff --git a/docs/conf.py b/docs/conf.py index 34affd2..7c0df58 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -176,7 +176,7 @@ def _patched_tool_collector_tool(self: ToolCollector, **kwargs: t.Any) -> t.Any: "OperationStepResult", "TmuxOperationDispatchResult", "RunTmuxDiagnostics", - "RunTmuxOperationsResult", + "RunTmuxPlanResult", "ToolCallOperation", "ToolCallOperationResult", "ToolCallBatchResult", diff --git a/docs/tools/chain/index.md b/docs/tools/chain/index.md index 72b8619..372ef9d 100644 --- a/docs/tools/chain/index.md +++ b/docs/tools/chain/index.md @@ -1,16 +1,16 @@ # Chain tools -Chain tools compile typed tmux operations into the fewest safe native -tmux dispatches. They are different from batch tools: batch tools call -existing MCP tools one by one, while chain tools lower a typed operation -list directly to tmux command sequences when tmux can preserve the same -semantics. +Chain tools run a typed list of tmux operations over a persistent tmux +control connection, one dispatch per operation, and return one typed +result per step. They are different from batch tools: batch tools call +existing MCP tools one by one, while chain tools take a typed tmux +operation list directly. ::::{grid} 1 1 2 3 :gutter: 2 2 3 3 -:::{grid-item-card} {tooliconl}`run-tmux-operations` -Run typed tmux operations with automatic native chaining. +:::{grid-item-card} {tooliconl}`run-tmux-plan` +Run a typed plan of tmux operations, one result per step. ::: :::: @@ -19,5 +19,5 @@ Run typed tmux operations with automatic native chaining. :hidden: :maxdepth: 1 -run-tmux-operations +run-tmux-plan ``` diff --git a/docs/tools/chain/run-tmux-operations.md b/docs/tools/chain/run-tmux-plan.md similarity index 93% rename from docs/tools/chain/run-tmux-operations.md rename to docs/tools/chain/run-tmux-plan.md index bfb84a9..83c8a97 100644 --- a/docs/tools/chain/run-tmux-operations.md +++ b/docs/tools/chain/run-tmux-plan.md @@ -1,6 +1,6 @@ -# Run tmux operations +# Run tmux plan -```{fastmcp-tool} chain_tools.run_tmux_operations +```{fastmcp-tool} chain_tools.run_tmux_plan ``` **Use when** you need several typed tmux operations to run in order over @@ -51,7 +51,7 @@ fails. The result still reports `created_panes`, and adds ```json { - "tool": "run_tmux_operations", + "tool": "run_tmux_plan", "arguments": { "operations": [ {"kind": "split_pane", "target": {"kind": "pane_id", "pane_id": "%1"}, @@ -64,5 +64,5 @@ fails. The result still reports `created_panes`, and adds } ``` -```{fastmcp-tool-input} chain_tools.run_tmux_operations +```{fastmcp-tool-input} chain_tools.run_tmux_plan ``` diff --git a/docs/tools/index.md b/docs/tools/index.md index 53c1140..9bc33b1 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -55,7 +55,7 @@ leave socket selection inside each nested tool's arguments. See - Signal a waiter → {tool}`signal-channel` **Batching typed tool calls?** -- Native tmux operation chains → {tool}`run-tmux-operations` +- Native tmux operation chains → {tool}`run-tmux-plan` - Read-only observations → {tool}`call-readonly-tools-batch` - Ordered readonly + mutating workflows → {tool}`call-mutating-tools-batch` - Reviewed workflows that include destructive steps → {tool}`call-destructive-tools-batch` @@ -271,10 +271,10 @@ Run a shell command and report exit status. Call typed readonly or mutating tools in order. ::: -:::{grid-item-card} run_tmux_operations -:link: run-tmux-operations +:::{grid-item-card} run_tmux_plan +:link: run-tmux-plan :link-type: ref -Compile typed tmux operations into native chains. +Run a typed plan of tmux operations, one result per step. ::: :::{grid-item-card} rename_session diff --git a/src/libtmux_mcp/models.py b/src/libtmux_mcp/models.py index 73016b2..d971924 100644 --- a/src/libtmux_mcp/models.py +++ b/src/libtmux_mcp/models.py @@ -915,7 +915,7 @@ class RunTmuxDiagnostics(BaseModel): ) -class RunTmuxOperationsResult(BaseModel): +class RunTmuxPlanResult(BaseModel): """Result of running typed tmux operations.""" succeeded: bool = Field(description="False when any operation failed or skipped.") diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index e20a764..8ebe9d7 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -33,7 +33,7 @@ RefTarget, ResizePaneOperation, RunTmuxDiagnostics, - RunTmuxOperationsResult, + RunTmuxPlanResult, SelectLayoutOperation, SetOptionOperation, SplitPaneOperation, @@ -477,7 +477,7 @@ def _to_step_result(outcome: _Outcome) -> TmuxStepResult: @handle_tool_errors_async -async def run_tmux_operations( +async def run_tmux_plan( operations: list[TmuxOperation], on_error: t.Literal["stop", "continue"] = "stop", dry_run: bool = False, @@ -485,7 +485,7 @@ async def run_tmux_operations( rollback_on_error: bool = False, explain: bool = False, socket_name: str | None = None, -) -> RunTmuxOperationsResult: +) -> RunTmuxPlanResult: """Run typed tmux operations, one dispatch per operation. Each operation is dispatched on its own over a persistent ``tmux -C`` @@ -620,7 +620,7 @@ def skip_rest(start: int) -> None: if explain else None ) - return RunTmuxOperationsResult( + return RunTmuxPlanResult( succeeded=succeeded, dry_run=dry_run, steps=[_to_step_result(outcome) for outcome in outcomes], @@ -637,7 +637,7 @@ def skip_rest(start: int) -> None: def register(mcp: FastMCP) -> None: """Register typed chain tools with the MCP instance.""" mcp.tool( - title="Run tmux Operations", + title="Run tmux Plan", annotations=ANNOTATIONS_SHELL, tags={TAG_MUTATING}, - )(run_tmux_operations) + )(run_tmux_plan) diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index c2897e1..fc48746 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -16,7 +16,7 @@ CapturePaneStepResult, PaneIdTarget, RefTarget, - RunTmuxOperationsResult, + RunTmuxPlanResult, SetOptionOperation, SplitPaneOperation, SplitPaneStepResult, @@ -27,7 +27,7 @@ from libtmux_mcp.tools import chain_tools from libtmux_mcp.tools.chain_tools import ( TMUX_OPERATIONS_ADAPTER, - run_tmux_operations, + run_tmux_plan, ) if t.TYPE_CHECKING: @@ -50,7 +50,7 @@ def test_run_tmux_operations_runs_each_operation( """Each operation runs and reports its own typed status.""" server = mcp_session.server result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ SetOptionOperation(option="@cc_ops_a", value="1", global_=True), SetOptionOperation(option="@cc_ops_b", value="2", global_=True), @@ -75,7 +75,7 @@ def test_run_tmux_operations_explain_attaches_diagnostics( """``explain`` attaches one per-operation dispatch record.""" server = mcp_session.server result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ SetOptionOperation(option="@cc_ops_x", value="1", global_=True), SetOptionOperation(option="@cc_ops_y", value="2", global_=True), @@ -108,7 +108,7 @@ def test_run_tmux_operations_capture_returns_lines( ) result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ SetOptionOperation( option="@cc_ops_before_capture", @@ -138,7 +138,7 @@ def test_run_tmux_operations_captures_split_refs( channel = "cc_ops_split_ref" keys = f"printf 'CC_OPS_REF\\n'; tmux wait-for -S {channel}" result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ SplitPaneOperation( ref="child", @@ -172,7 +172,7 @@ def test_run_tmux_operations_continue_runs_later_ops( """Continue mode records each failure and runs the rest.""" server = mcp_session.server result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ TmuxSendKeysOperation( target=PaneIdTarget(pane_id="%999999"), @@ -204,7 +204,7 @@ def test_run_tmux_operations_stop_halts_after_failure( """Stop mode (the default) skips every operation after the first failure.""" server = mcp_session.server result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ SetOptionOperation(option="@cc_ops_cm_a", value="1", global_=True), TmuxSendKeysOperation( @@ -258,7 +258,7 @@ def test_run_tmux_operations_split_inherits_target_directory( ).stdout result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ SplitPaneOperation( ref="child", @@ -303,7 +303,7 @@ def fail_scope(command_name: str, target_scope: str) -> None: ) result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ SetOptionOperation( option="@cc_ops_contract_error", @@ -330,7 +330,7 @@ def test_run_tmux_operations_dry_run_plans_without_mutating( """Dry-run returns planned steps without changing tmux state.""" server = mcp_session.server result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ SetOptionOperation(option="@cc_ops_dry_a", value="1", global_=True), SetOptionOperation(option="@cc_ops_dry_b", value="2", global_=True), @@ -365,7 +365,7 @@ def test_run_tmux_operations_dry_run_plans_split_ref( pane_count = len(mcp_pane.window.panes) result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ SplitPaneOperation( ref="child", @@ -401,7 +401,7 @@ def test_run_tmux_operations_dry_run_plans_output_ops( ) -> None: """Dry-run plans read operations as planned steps.""" result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ SetOptionOperation( option="@cc_ops_dry_pending", @@ -442,7 +442,7 @@ def sleep_dispatch(*args: object, **kwargs: object) -> t.NoReturn: assert mcp_pane.pane_id is not None result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[CapturePaneOperation(target=_pane_target(mcp_pane))], dispatch_timeout=0.001, explain=True, @@ -484,7 +484,7 @@ def test_run_tmux_operations_dispatch_timeout_validation( """Dispatch timeout must be positive when set.""" with pytest.raises(ExpectedToolError, match="dispatch_timeout"): asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ SetOptionOperation( option="@cc_ops_timeout_validation", @@ -551,7 +551,7 @@ def test_run_tmux_operations_compile_error_paths( ) -> None: """Compile errors report directly; stop mode skips operations after them.""" result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=case.operations, socket_name=mcp_session.server.socket_name, ), @@ -569,7 +569,7 @@ def test_run_tmux_operations_split_failure_skips_later_ops( """A failed split skips every later operation under stop mode.""" server = mcp_session.server result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ SplitPaneOperation( ref="child", @@ -629,10 +629,10 @@ def test_run_tmux_operations_rolls_back_created_panes( mcp_pane: Pane, ) -> None: """Rollback kills panes created before a later operation fails.""" - result: RunTmuxOperationsResult | None = None + result: RunTmuxPlanResult | None = None try: result = asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=[ SplitPaneOperation( ref="child", @@ -708,7 +708,7 @@ def test_run_tmux_operations_validation( with pytest.raises(case.expected_error): asyncio.run( - run_tmux_operations( + run_tmux_plan( operations=t.cast("list[TmuxOperation]", case.operations), socket_name=mcp_session.server.socket_name, ), From 4375c6375fd6adadd9b33ed48ad86785d2ec54b5 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 21 Jun 2026 08:47:24 -0500 Subject: [PATCH 23/25] Chain(feat[tools]): Add typed layout operations why: building an even split or a grid meant emitting one split_pane per pane and then a raw select_layout string, which is verbose and error-prone for a caller to assemble. A declarative layout operation states the intent directly. what: - Add split_evenly (an even row or column of count panes) and make_grid (a rows by cols tiled grid), each lowering to native splits plus a select-layout - Validate each command's target scope per call so a single operation can mix pane-scoped splits with a window-scoped layout - Keep the raw select_layout operation for any other tmux layout - Document the layout operations and cover them with tests --- CHANGES | 4 +- docs/conf.py | 2 + docs/tools/chain/run-tmux-plan.md | 5 ++ src/libtmux_mcp/models.py | 51 ++++++++++++++++- src/libtmux_mcp/tools/chain_tools.py | 84 ++++++++++++++++++++-------- tests/test_chain_tools.py | 45 +++++++++++++++ 6 files changed, 166 insertions(+), 25 deletions(-) diff --git a/CHANGES b/CHANGES index eb351ae..6c6d489 100644 --- a/CHANGES +++ b/CHANGES @@ -18,7 +18,9 @@ status only. With `on_error="stop"` (the default) it stops before the next operation once one fails or its target cannot be resolved, marking the rest skipped; with `on_error="continue"` it records each failure and runs the rest. Each pane operation takes one typed `target`, discriminated by `kind`: a -concrete `pane_id` or a `ref` minted by an earlier split. It +concrete `pane_id` or a `ref` minted by an earlier split. Typed `split_evenly` +and `make_grid` operations build an even row or column or a tiled grid of panes +without hand-written layout strings. It returns concrete pane IDs captured from referenced splits so later operations can target them, supports a dry-run mode that returns the planned steps without touching tmux, applies a per-dispatch timeout, and can roll back panes created diff --git a/docs/conf.py b/docs/conf.py index 7c0df58..666afd3 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -171,6 +171,8 @@ def _patched_tool_collector_tool(self: ToolCollector, **kwargs: t.Any) -> t.Any: "SelectLayoutOperation", "SetOptionOperation", "CapturePaneOperation", + "SplitEvenlyOperation", + "MakeGridOperation", "SplitPaneStepResult", "CapturePaneStepResult", "OperationStepResult", diff --git a/docs/tools/chain/run-tmux-plan.md b/docs/tools/chain/run-tmux-plan.md index 83c8a97..0fdbcde 100644 --- a/docs/tools/chain/run-tmux-plan.md +++ b/docs/tools/chain/run-tmux-plan.md @@ -19,6 +19,11 @@ later operations can target it with a `ref` target. `kind`: `pane_id` (a concrete `%id`) or `ref` (a name minted by an earlier `split_pane`). +**Layouts:** `split_evenly` splits a pane into an even row or column of +`count` panes, and `make_grid` tiles a pane's window into a `rows` by `cols` +grid. Both compile to native splits plus a `select-layout`; use the raw +`select_layout` operation for any other tmux layout. + **Results:** `steps` carries one typed result per operation, discriminated by `kind`: `capture_pane` returns its `lines`, `split_pane` returns the new `pane_id`, and the rest return status only. Each step also carries an diff --git a/src/libtmux_mcp/models.py b/src/libtmux_mcp/models.py index d971924..0ba93a4 100644 --- a/src/libtmux_mcp/models.py +++ b/src/libtmux_mcp/models.py @@ -827,13 +827,53 @@ class CapturePaneOperation(BaseModel): end: int | None = Field(default=None, description="End capture line.") +class SplitEvenlyOperation(BaseModel): + """Split a pane into an evenly sized row or column of panes.""" + + model_config = ConfigDict(extra="forbid") + + kind: t.Literal["split_evenly"] = Field( + default="split_evenly", + description="Operation discriminator.", + ) + target: PaneTarget = Field(description="Pane to split into even panes.") + count: int = Field(description="Total number of resulting panes.", ge=2) + axis: t.Literal["horizontal", "vertical"] = Field( + default="vertical", + description="Lay the panes out side by side (horizontal) or stacked.", + ) + + +class MakeGridOperation(BaseModel): + """Arrange a pane's window into an evenly tiled grid of panes.""" + + model_config = ConfigDict(extra="forbid") + + kind: t.Literal["make_grid"] = Field( + default="make_grid", + description="Operation discriminator.", + ) + target: PaneTarget = Field(description="Pane whose window becomes a grid.") + rows: int = Field(description="Grid rows.", ge=1) + cols: int = Field(description="Grid columns.", ge=1) + + @model_validator(mode="after") + def _validate_grid(self) -> MakeGridOperation: + if self.rows * self.cols < 2: + msg = "make_grid must produce at least 2 panes (rows * cols >= 2)." + raise ValueError(msg) + return self + + TmuxOperation: t.TypeAlias = t.Annotated[ SplitPaneOperation | TmuxSendKeysOperation | ResizePaneOperation | SelectLayoutOperation | SetOptionOperation - | CapturePaneOperation, + | CapturePaneOperation + | SplitEvenlyOperation + | MakeGridOperation, Field(discriminator="kind"), ] @@ -879,7 +919,14 @@ class CapturePaneStepResult(BaseModel): class OperationStepResult(BaseModel): """Result for an operation that returns status only.""" - kind: t.Literal["send_keys", "resize_pane", "select_layout", "set_option"] = Field( + kind: t.Literal[ + "send_keys", + "resize_pane", + "select_layout", + "set_option", + "split_evenly", + "make_grid", + ] = Field( description="Operation kind discriminator.", ) index: int = Field(description="Zero-based operation index.") diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index 8ebe9d7..bd98011 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -27,6 +27,7 @@ from libtmux_mcp.models import ( CapturePaneOperation, CapturePaneStepResult, + MakeGridOperation, OperationStepResult, PaneIdTarget, PaneTarget, @@ -36,6 +37,7 @@ RunTmuxPlanResult, SelectLayoutOperation, SetOptionOperation, + SplitEvenlyOperation, SplitPaneOperation, SplitPaneStepResult, TmuxOperation, @@ -99,36 +101,30 @@ def _combine_results( return _CombinedResult(stdout=stdout, stderr=stderr, returncode=returncode) -def _operation_scope(operation: TmuxOperation) -> CommandScope: - """Return the tmux target scope for one typed operation.""" - if isinstance( - operation, - ( - SplitPaneOperation, - TmuxSendKeysOperation, - ResizePaneOperation, - CapturePaneOperation, - ), - ): - return "pane" - if isinstance(operation, SelectLayoutOperation): - return "window" +_FIXED_COMMAND_SCOPE: dict[str, CommandScope] = { + "split-window": "pane", + "send-keys": "pane", + "resize-pane": "pane", + "capture-pane": "pane", + "select-layout": "window", +} + + +def _call_scope(operation: TmuxOperation, call: CommandCall) -> CommandScope: + """Return the tmux target scope for one command of an operation.""" if isinstance(operation, SetOptionOperation): - scope: CommandScope - scope = operation.scope if operation.scope is not None else "server" - return scope - assert_never(operation) + return operation.scope if operation.scope is not None else "server" + return _FIXED_COMMAND_SCOPE[call.name] def _validate_operation_scope( operation: TmuxOperation, calls: tuple[CommandCall, ...], ) -> None: - """Validate typed operation targets against libtmux command metadata.""" - scope = _operation_scope(operation) + """Validate each command's target scope against libtmux command metadata.""" try: for call in calls: - validate_command_scope(call.name, scope) + validate_command_scope(call.name, _call_scope(operation, call)) except CommandScopeError as exc: raise _CompileError(str(exc)) from exc @@ -256,6 +252,45 @@ def _capture_pane_calls( ) +def _split_evenly_calls( + operation: SplitEvenlyOperation, + created_panes: dict[str, str], +) -> tuple[CommandCall, ...]: + """Build splits plus an even layout for a typed split-evenly operation.""" + target = _resolve_target(operation.target, created_panes) + flag = "-h" if operation.axis == "horizontal" else "-v" + layout = "even-horizontal" if operation.axis == "horizontal" else "even-vertical" + calls = [ + CommandCall( + "split-window", + (flag, "-c", "#{pane_current_path}"), + target=target, + ) + for _ in range(operation.count - 1) + ] + calls.append(CommandCall("select-layout", (layout,), target=target)) + return tuple(calls) + + +def _make_grid_calls( + operation: MakeGridOperation, + created_panes: dict[str, str], +) -> tuple[CommandCall, ...]: + """Build splits plus a tiled layout for a typed make-grid operation.""" + target = _resolve_target(operation.target, created_panes) + panes = operation.rows * operation.cols + calls = [ + CommandCall( + "split-window", + ("-c", "#{pane_current_path}"), + target=target, + ) + for _ in range(panes - 1) + ] + calls.append(CommandCall("select-layout", ("tiled",), target=target)) + return tuple(calls) + + def _operation_calls( operation: TmuxOperation, created_panes: dict[str, str], @@ -273,6 +308,10 @@ def _operation_calls( calls = _set_option_calls(operation) elif isinstance(operation, CapturePaneOperation): calls = _capture_pane_calls(operation, created_panes) + elif isinstance(operation, SplitEvenlyOperation): + calls = _split_evenly_calls(operation, created_panes) + elif isinstance(operation, MakeGridOperation): + calls = _make_grid_calls(operation, created_panes) else: assert_never(operation) _validate_operation_scope(operation, calls) @@ -465,7 +504,8 @@ def _to_step_result(outcome: _Outcome) -> TmuxStepResult: error=error, ) status_kind = t.cast( - "t.Literal['send_keys', 'resize_pane', 'select_layout', 'set_option']", + "t.Literal['send_keys', 'resize_pane', 'select_layout', 'set_option', " + "'split_evenly', 'make_grid']", outcome.kind, ) return OperationStepResult( diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index fc48746..9376ca8 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -14,10 +14,12 @@ from libtmux_mcp.models import ( CapturePaneOperation, CapturePaneStepResult, + MakeGridOperation, PaneIdTarget, RefTarget, RunTmuxPlanResult, SetOptionOperation, + SplitEvenlyOperation, SplitPaneOperation, SplitPaneStepResult, TmuxOperation, @@ -166,6 +168,49 @@ def test_run_tmux_operations_captures_split_refs( assert "CC_OPS_REF" in "\n".join(new_pane.capture_pane()) +def test_run_tmux_plan_split_evenly( + mcp_server: Server, + mcp_pane: Pane, +) -> None: + """split_evenly creates an even row/column of the requested pane count.""" + result = asyncio.run( + run_tmux_plan( + operations=[ + SplitEvenlyOperation( + target=_pane_target(mcp_pane), + count=3, + axis="horizontal", + ), + ], + socket_name=mcp_server.socket_name, + ), + ) + + assert result.succeeded + assert result.steps[0].status == TmuxOperationStatus.SUCCEEDED + mcp_pane.window.refresh() + assert len(mcp_pane.window.panes) == 3 + + +def test_run_tmux_plan_make_grid( + mcp_server: Server, + mcp_pane: Pane, +) -> None: + """make_grid tiles a pane's window into rows * cols panes.""" + result = asyncio.run( + run_tmux_plan( + operations=[ + MakeGridOperation(target=_pane_target(mcp_pane), rows=2, cols=2), + ], + socket_name=mcp_server.socket_name, + ), + ) + + assert result.succeeded + mcp_pane.window.refresh() + assert len(mcp_pane.window.panes) == 4 + + def test_run_tmux_operations_continue_runs_later_ops( mcp_session: Session, ) -> None: From ba93302c80152a071fb9d28fe326512998fd9f75 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 21 Jun 2026 08:57:00 -0500 Subject: [PATCH 24/25] Chain(feat[tools]): Gate a typed kill_pane on the destructive tier why: the plan tool is registered mutating, so the server's safety middleware admits it at the mutating tier. A destructive operation placed inside the plan would then run without the destructive tier the standalone kill tools require, bypassing the safety gate. A typed close operation needs its own per-operation tier check so the plan cannot smuggle destruction past it. what: - Add a typed kill_pane operation that targets a pane by id or ref - Reject kill_pane unless the effective safety tier is destructive, failing the operation closed (and skipping the rest under stop) before any dispatch - Share the safety-level resolution from _utils so the tool and the server read the same effective tier - Validate each command's scope per call, add kill-pane to the scope table, and cover the gate at both the mutating and destructive tiers --- CHANGES | 4 ++- docs/conf.py | 1 + docs/tools/chain/run-tmux-plan.md | 5 +++ src/libtmux_mcp/_utils.py | 20 ++++++++++++ src/libtmux_mcp/models.py | 20 +++++++++++- src/libtmux_mcp/server.py | 16 +--------- src/libtmux_mcp/tools/chain_tools.py | 28 ++++++++++++++++- tests/test_chain_tools.py | 46 ++++++++++++++++++++++++++++ tests/test_server.py | 2 +- 9 files changed, 123 insertions(+), 19 deletions(-) diff --git a/CHANGES b/CHANGES index 6c6d489..cf6443e 100644 --- a/CHANGES +++ b/CHANGES @@ -20,7 +20,9 @@ skipped; with `on_error="continue"` it records each failure and runs the rest. Each pane operation takes one typed `target`, discriminated by `kind`: a concrete `pane_id` or a `ref` minted by an earlier split. Typed `split_evenly` and `make_grid` operations build an even row or column or a tiled grid of panes -without hand-written layout strings. It +without hand-written layout strings. A typed `kill_pane` operation closes a +pane, and runs only when the server's safety tier is `destructive` so a +mutating-tier plan cannot smuggle a destructive command past the safety gate. It returns concrete pane IDs captured from referenced splits so later operations can target them, supports a dry-run mode that returns the planned steps without touching tmux, applies a per-dispatch timeout, and can roll back panes created diff --git a/docs/conf.py b/docs/conf.py index 666afd3..e19fa76 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -173,6 +173,7 @@ def _patched_tool_collector_tool(self: ToolCollector, **kwargs: t.Any) -> t.Any: "CapturePaneOperation", "SplitEvenlyOperation", "MakeGridOperation", + "KillPaneOperation", "SplitPaneStepResult", "CapturePaneStepResult", "OperationStepResult", diff --git a/docs/tools/chain/run-tmux-plan.md b/docs/tools/chain/run-tmux-plan.md index 0fdbcde..502c2de 100644 --- a/docs/tools/chain/run-tmux-plan.md +++ b/docs/tools/chain/run-tmux-plan.md @@ -36,6 +36,11 @@ before the next operation once one fails or its target cannot be resolved, and marks the rest `skipped`. With `on_error="continue"`, every failure is recorded and the rest still run. +**Destructive operations:** `kill_pane` is destructive. A plan that +contains it runs that operation only when the server's safety tier is +`destructive`; otherwise the operation fails closed with an error and the +rest of the plan is skipped (or recorded, under `on_error="continue"`). + Set `dry_run` to `true` to compile the operation list and return the rendered dispatches without touching tmux. Referenced split panes use deterministic placeholders in `created_panes` until the plan is run for diff --git a/src/libtmux_mcp/_utils.py b/src/libtmux_mcp/_utils.py index f6a239b..0cf035f 100644 --- a/src/libtmux_mcp/_utils.py +++ b/src/libtmux_mcp/_utils.py @@ -341,6 +341,26 @@ def _caller_is_strictly_on_server( VALID_SAFETY_LEVELS = frozenset({TAG_READONLY, TAG_MUTATING, TAG_DESTRUCTIVE}) + +def _resolve_safety_level(value: str | None) -> str: + """Return the effective safety level for a ``LIBTMUX_SAFETY`` value.""" + if value is None: + return TAG_MUTATING + if value in VALID_SAFETY_LEVELS: + return value + logger.warning( + "invalid LIBTMUX_SAFETY=%r, falling back to %s", + value, + TAG_READONLY, + ) + return TAG_READONLY + + +def effective_safety_level() -> str: + """Resolve the current process safety level from the environment.""" + return _resolve_safety_level(os.environ.get("LIBTMUX_SAFETY")) + + # --------------------------------------------------------------------------- # Reusable annotation presets for tool registration # --------------------------------------------------------------------------- diff --git a/src/libtmux_mcp/models.py b/src/libtmux_mcp/models.py index 0ba93a4..7f96fa9 100644 --- a/src/libtmux_mcp/models.py +++ b/src/libtmux_mcp/models.py @@ -865,6 +865,22 @@ def _validate_grid(self) -> MakeGridOperation: return self +class KillPaneOperation(BaseModel): + """Kill a pane. + + Destructive: a plan that contains this operation runs it only when the + server's safety tier is ``destructive``. + """ + + model_config = ConfigDict(extra="forbid") + + kind: t.Literal["kill_pane"] = Field( + default="kill_pane", + description="Operation discriminator.", + ) + target: PaneTarget = Field(description="Pane to kill.") + + TmuxOperation: t.TypeAlias = t.Annotated[ SplitPaneOperation | TmuxSendKeysOperation @@ -873,7 +889,8 @@ def _validate_grid(self) -> MakeGridOperation: | SetOptionOperation | CapturePaneOperation | SplitEvenlyOperation - | MakeGridOperation, + | MakeGridOperation + | KillPaneOperation, Field(discriminator="kind"), ] @@ -926,6 +943,7 @@ class OperationStepResult(BaseModel): "set_option", "split_evenly", "make_grid", + "kill_pane", ] = Field( description="Operation kind discriminator.", ) diff --git a/src/libtmux_mcp/server.py b/src/libtmux_mcp/server.py index 849bb26..2c0895a 100644 --- a/src/libtmux_mcp/server.py +++ b/src/libtmux_mcp/server.py @@ -22,7 +22,7 @@ TAG_DESTRUCTIVE, TAG_MUTATING, TAG_READONLY, - VALID_SAFETY_LEVELS, + _resolve_safety_level, _server_cache, ) from libtmux_mcp.middleware import ( @@ -190,20 +190,6 @@ def _build_instructions(safety_level: str = TAG_MUTATING) -> str: return "".join(parts) -def _resolve_safety_level(value: str | None) -> str: - """Return the effective safety level for a ``LIBTMUX_SAFETY`` value.""" - if value is None: - return TAG_MUTATING - if value in VALID_SAFETY_LEVELS: - return value - logger.warning( - "invalid LIBTMUX_SAFETY=%r, falling back to %s", - value, - TAG_READONLY, - ) - return TAG_READONLY - - _safety_level = _resolve_safety_level(os.environ.get("LIBTMUX_SAFETY")) #: Tools covered by the tail-preserving response limiter. Only tools diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index bd98011..98f7e1f 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -19,14 +19,17 @@ from libtmux_mcp._utils import ( ANNOTATIONS_SHELL, + TAG_DESTRUCTIVE, TAG_MUTATING, ExpectedToolError, _get_server, + effective_safety_level, handle_tool_errors_async, ) from libtmux_mcp.models import ( CapturePaneOperation, CapturePaneStepResult, + KillPaneOperation, MakeGridOperation, OperationStepResult, PaneIdTarget, @@ -107,6 +110,7 @@ def _combine_results( "resize-pane": "pane", "capture-pane": "pane", "select-layout": "window", + "kill-pane": "pane", } @@ -291,11 +295,31 @@ def _make_grid_calls( return tuple(calls) +def _kill_pane_calls( + operation: KillPaneOperation, + created_panes: dict[str, str], +) -> tuple[CommandCall, ...]: + """Build ``kill-pane`` calls for a typed kill operation.""" + return ( + CommandCall( + "kill-pane", + (), + target=_resolve_target(operation.target, created_panes), + ), + ) + + def _operation_calls( operation: TmuxOperation, created_panes: dict[str, str], ) -> tuple[CommandCall, ...]: """Lower one typed operation to tmux command calls.""" + if ( + isinstance(operation, KillPaneOperation) + and effective_safety_level() != TAG_DESTRUCTIVE + ): + msg = "kill_pane requires the destructive safety tier" + raise _CompileError(msg) if isinstance(operation, SplitPaneOperation): calls = _split_calls(operation, created_panes) elif isinstance(operation, TmuxSendKeysOperation): @@ -312,6 +336,8 @@ def _operation_calls( calls = _split_evenly_calls(operation, created_panes) elif isinstance(operation, MakeGridOperation): calls = _make_grid_calls(operation, created_panes) + elif isinstance(operation, KillPaneOperation): + calls = _kill_pane_calls(operation, created_panes) else: assert_never(operation) _validate_operation_scope(operation, calls) @@ -505,7 +531,7 @@ def _to_step_result(outcome: _Outcome) -> TmuxStepResult: ) status_kind = t.cast( "t.Literal['send_keys', 'resize_pane', 'select_layout', 'set_option', " - "'split_evenly', 'make_grid']", + "'split_evenly', 'make_grid', 'kill_pane']", outcome.kind, ) return OperationStepResult( diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index 9376ca8..a0f4122 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -14,6 +14,7 @@ from libtmux_mcp.models import ( CapturePaneOperation, CapturePaneStepResult, + KillPaneOperation, MakeGridOperation, PaneIdTarget, RefTarget, @@ -211,6 +212,51 @@ def test_run_tmux_plan_make_grid( assert len(mcp_pane.window.panes) == 4 +def test_run_tmux_plan_kill_pane_requires_destructive_tier( + mcp_session: Session, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """kill_pane fails closed unless the server runs at the destructive tier.""" + monkeypatch.setenv("LIBTMUX_SAFETY", "mutating") + result = asyncio.run( + run_tmux_plan( + operations=[KillPaneOperation(target=PaneIdTarget(pane_id="%999999"))], + explain=True, + socket_name=mcp_session.server.socket_name, + ), + ) + + assert not result.succeeded + assert result.diagnostics is not None + assert result.diagnostics.dispatch_count == 0 + assert result.steps[0].status == TmuxOperationStatus.FAILED + assert result.steps[0].error == "kill_pane requires the destructive safety tier" + + +def test_run_tmux_plan_kill_pane_at_destructive_tier( + mcp_server: Server, + mcp_pane: Pane, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """kill_pane removes a pane when the server runs at the destructive tier.""" + monkeypatch.setenv("LIBTMUX_SAFETY", "destructive") + result = asyncio.run( + run_tmux_plan( + operations=[ + SplitPaneOperation(ref="child", target=_pane_target(mcp_pane)), + KillPaneOperation(target=RefTarget(ref="child")), + ], + socket_name=mcp_server.socket_name, + ), + ) + + assert result.succeeded + new_pane_id = result.created_panes["child"] + mcp_pane.window.refresh() + pane_ids = [pane.pane_id for pane in mcp_pane.window.panes] + assert new_pane_id not in pane_ids + + def test_run_tmux_operations_continue_runs_later_ops( mcp_session: Session, ) -> None: diff --git a/tests/test_server.py b/tests/test_server.py index 023e406..ecfcdf9 100644 --- a/tests/test_server.py +++ b/tests/test_server.py @@ -162,7 +162,7 @@ def test_resolve_safety_level( expected_level: str, ) -> None: """Safety env values resolve to the server's effective tier.""" - from libtmux_mcp.server import _resolve_safety_level + from libtmux_mcp._utils import _resolve_safety_level assert test_id assert _resolve_safety_level(env_value) == expected_level From d8f12f87c81d24a6170dde688da601158c5ad2f7 Mon Sep 17 00:00:00 2001 From: Tony Narlock Date: Sun, 21 Jun 2026 09:09:59 -0500 Subject: [PATCH 25/25] Chain(feat[tools]): One generic batch tool and a plan history flag why: three call_*_tools_batch tools differed only by a frozen safety-tier ceiling, so an agent had to pick among three near-identical entries for one capability. The chain's send_keys also could not suppress shell history the way the standalone send-keys tool can, so a plan could not stage a secret-bearing command out of history. what: - Replace the three batch wrappers with one call_tools_batch that takes an optional max_tier ceiling; tag it readonly so it stays callable at every tier, while each nested call still re-runs the safety middleware so the visibility never widens what the batch can actually run - Add suppress_history to the plan's send_keys operation, space-prefixing the keys like the standalone tool - Update SOCKET_NAME_EXEMPT, the batch docs (three pages to one), the tool index, CHANGES, and the tests, including a server-tier bound test --- CHANGES | 16 +- docs/index.md | 6 +- .../batch/call-destructive-tools-batch.md | 32 ---- docs/tools/batch/call-mutating-tools-batch.md | 41 ------ docs/tools/batch/call-readonly-tools-batch.md | 34 ----- docs/tools/batch/call-tools-batch.md | 39 +++++ docs/tools/batch/index.md | 16 +- docs/tools/chain/run-tmux-plan.md | 2 +- docs/tools/index.md | 22 +-- docs/topics/architecture.md | 2 +- src/libtmux_mcp/models.py | 7 + src/libtmux_mcp/tools/batch_tools.py | 92 +++--------- src/libtmux_mcp/tools/chain_tools.py | 5 +- src/libtmux_mcp/tools/server_tools.py | 4 +- tests/test_batch_tools.py | 138 ++++++++---------- tests/test_chain_tools.py | 24 +++ 16 files changed, 185 insertions(+), 295 deletions(-) delete mode 100644 docs/tools/batch/call-destructive-tools-batch.md delete mode 100644 docs/tools/batch/call-mutating-tools-batch.md delete mode 100644 docs/tools/batch/call-readonly-tools-batch.md create mode 100644 docs/tools/batch/call-tools-batch.md diff --git a/CHANGES b/CHANGES index cf6443e..64f6b36 100644 --- a/CHANGES +++ b/CHANGES @@ -28,18 +28,28 @@ can target them, supports a dry-run mode that returns the planned steps without touching tmux, applies a per-dispatch timeout, and can roll back panes created by typed split refs when a later operation fails. Pass `explain=true` to attach per-dispatch diagnostics under `diagnostics`. It keeps -{tooliconl}`call-mutating-tools-batch` available for workflows that need to call +{tooliconl}`call-tools-batch` available for workflows that need to call arbitrary MCP tools instead of this tool's typed operation set. +**One generic batch tool with {tooliconl}`call-tools-batch`** + +{tooliconl}`call-tools-batch` runs an ordered list of existing MCP tools in a +single call and returns a per-operation result for each, preserving every +nested tool's structured output. Each nested call still runs through the +server's safety tier, and an optional `max_tier` caps the batch below that tier +(`readonly` refuses mutating or destructive nested calls, `mutating` refuses +destructive ones). It replaces the separate readonly, mutating, and destructive +batch tools with one tool plus the `max_tier` argument. + ## libtmux-mcp 0.1.0a14 (2026-06-14) -libtmux-mcp 0.1.0a14 adds tier-aware tool batching. {tooliconl}`call-readonly-tools-batch`, {tooliconl}`call-mutating-tools-batch`, and {tooliconl}`call-destructive-tools-batch` run an ordered list of existing MCP tools in a single call and return a per-operation result for each, preserving every nested tool's own structured output. Each wrapper caps the safety tier of the calls it will make — regardless of the server's `LIBTMUX_SAFETY` tier — and `on_error` selects stop-at-first-failure or continue-and-report handling. Aggregate results stay within the server's response limit. +libtmux-mcp 0.1.0a14 adds tier-aware tool batching. `call_readonly_tools_batch`, `call_mutating_tools_batch`, and `call_destructive_tools_batch` run an ordered list of existing MCP tools in a single call and return a per-operation result for each, preserving every nested tool's own structured output. Each wrapper caps the safety tier of the calls it will make — regardless of the server's `LIBTMUX_SAFETY` tier — and `on_error` selects stop-at-first-failure or continue-and-report handling. Aggregate results stay within the server's response limit. ### What's new **Tier-aware tool batching** -{tooliconl}`call-readonly-tools-batch`, {tooliconl}`call-mutating-tools-batch`, and {tooliconl}`call-destructive-tools-batch` run an ordered list of existing MCP tools in a single call and return a per-operation result for each, preserving every nested tool's own structured output. Each wrapper caps the safety tier of the calls it will make — the readonly wrapper refuses mutating or destructive operations, and the mutating wrapper refuses destructive ones — regardless of the server's `LIBTMUX_SAFETY` tier. Nested calls keep their normal schema validation, middleware, and safety checks, and `on_error` selects stop-at-first-failure or continue-and-report handling. Large aggregate results stay within the server's response limit — oversized nested payloads are dropped (with the truncation flagged in the result), and very large operation lists are rejected rather than allowed to overflow it. (#79) +`call_readonly_tools_batch`, `call_mutating_tools_batch`, and `call_destructive_tools_batch` run an ordered list of existing MCP tools in a single call and return a per-operation result for each, preserving every nested tool's own structured output. Each wrapper caps the safety tier of the calls it will make — the readonly wrapper refuses mutating or destructive operations, and the mutating wrapper refuses destructive ones — regardless of the server's `LIBTMUX_SAFETY` tier. Nested calls keep their normal schema validation, middleware, and safety checks, and `on_error` selects stop-at-first-failure or continue-and-report handling. Large aggregate results stay within the server's response limit — oversized nested payloads are dropped (with the truncation flagged in the result), and very large operation lists are rejected rather than allowed to overflow it. (#79) ## libtmux-mcp 0.1.0a13 (2026-06-13) diff --git a/docs/index.md b/docs/index.md index e735b4f..85845ab 100644 --- a/docs/index.md +++ b/docs/index.md @@ -71,19 +71,19 @@ Config blocks for Claude Desktop, Claude Code, Cursor, and others. Read tmux state without changing anything. -{toolref}`list-sessions` · {toolref}`capture-pane` · {toolref}`capture-since` · {toolref}`snapshot-pane` · {toolref}`get-pane-info` · {toolref}`find-pane-by-position` · {toolref}`search-panes` · {toolref}`wait-for-text` · {toolref}`wait-for-content-change` · {toolref}`display-message` · {toolref}`call-readonly-tools-batch` +{toolref}`list-sessions` · {toolref}`capture-pane` · {toolref}`capture-since` · {toolref}`snapshot-pane` · {toolref}`get-pane-info` · {toolref}`find-pane-by-position` · {toolref}`search-panes` · {toolref}`wait-for-text` · {toolref}`wait-for-content-change` · {toolref}`display-message` · {toolref}`call-tools-batch` ### Act (mutating) Create or modify tmux objects. -{toolref}`create-session` · {toolref}`send-keys` · {toolref}`send-keys-batch` · {toolref}`run-command` · {toolref}`paste-text` · {toolref}`create-window` · {toolref}`split-window` · {toolref}`select-pane` · {toolref}`select-window` · {toolref}`move-window` · {toolref}`resize-pane` · {toolref}`pipe-pane` · {toolref}`set-option` · {toolref}`call-mutating-tools-batch` +{toolref}`create-session` · {toolref}`send-keys` · {toolref}`send-keys-batch` · {toolref}`run-command` · {toolref}`paste-text` · {toolref}`create-window` · {toolref}`split-window` · {toolref}`select-pane` · {toolref}`select-window` · {toolref}`move-window` · {toolref}`resize-pane` · {toolref}`pipe-pane` · {toolref}`set-option` ### Destroy (destructive) Tear down tmux objects. Not reversible. -{toolref}`kill-session` · {toolref}`kill-window` · {toolref}`kill-pane` · {toolref}`kill-server` · {toolref}`call-destructive-tools-batch` +{toolref}`kill-session` · {toolref}`kill-window` · {toolref}`kill-pane` · {toolref}`kill-server` [Browse all tools →](tools/index) diff --git a/docs/tools/batch/call-destructive-tools-batch.md b/docs/tools/batch/call-destructive-tools-batch.md deleted file mode 100644 index aad382c..0000000 --- a/docs/tools/batch/call-destructive-tools-batch.md +++ /dev/null @@ -1,32 +0,0 @@ -# Call destructive tools batch - -```{fastmcp-tool} batch_tools.call_destructive_tools_batch -``` - -**Use when** a reviewed workflow intentionally includes destructive -tools and should still return one per-operation result envelope. - -**Avoid when** the workflow can fit inside -{tooliconl}`call-mutating-tools-batch`. This wrapper can invoke -destructive nested tools when the server safety tier permits them. - -**Side effects:** Runs readonly, mutating, and destructive nested tools -in order. Recursive batch calls are rejected. - -**Example:** - -```json -{ - "tool": "call_destructive_tools_batch", - "arguments": { - "operations": [ - {"tool": "kill_pane", "arguments": {"pane_id": "%7"}}, - {"tool": "list_panes", "arguments": {"window_id": "@3"}} - ], - "on_error": "stop" - } -} -``` - -```{fastmcp-tool-input} batch_tools.call_destructive_tools_batch -``` diff --git a/docs/tools/batch/call-mutating-tools-batch.md b/docs/tools/batch/call-mutating-tools-batch.md deleted file mode 100644 index ceb34bc..0000000 --- a/docs/tools/batch/call-mutating-tools-batch.md +++ /dev/null @@ -1,41 +0,0 @@ -# Call mutating tools batch - -```{fastmcp-tool} batch_tools.call_mutating_tools_batch -``` - -**Use when** you need an ordered workflow made from existing typed MCP -tools, such as renaming and splitting a known window, while preserving -each tool's own schema and safety checks. - -**Avoid when** you need tmux's native semicolon command parsing. This -tool batches MCP tools; it does not create one tmux command sequence. -For shell commands with completion and output, prefer -{tooliconl}`run-command`. - -**Side effects:** Runs readonly and mutating nested tools in order. -Destructive nested tools are rejected even when the server process is -running with `LIBTMUX_SAFETY=destructive`. - -**Example:** - -```json -{ - "tool": "call_mutating_tools_batch", - "arguments": { - "operations": [ - { - "tool": "rename_window", - "arguments": {"window_id": "@2", "new_name": "logs"} - }, - { - "tool": "split_window", - "arguments": {"window_id": "@2", "direction": "right"} - } - ], - "on_error": "stop" - } -} -``` - -```{fastmcp-tool-input} batch_tools.call_mutating_tools_batch -``` diff --git a/docs/tools/batch/call-readonly-tools-batch.md b/docs/tools/batch/call-readonly-tools-batch.md deleted file mode 100644 index 8d32190..0000000 --- a/docs/tools/batch/call-readonly-tools-batch.md +++ /dev/null @@ -1,34 +0,0 @@ -# Call readonly tools batch - -```{fastmcp-tool} batch_tools.call_readonly_tools_batch -``` - -**Use when** you need several read-only observations in one ordered -MCP turn, such as listing sessions and then reading server metadata. - -**Avoid when** any nested operation changes tmux state — use -{tooliconl}`call-mutating-tools-batch` for readonly + mutating -workflows, or call the individual tools when each result should be -reviewed before choosing the next action. - -**Side effects:** None beyond the nested readonly tools. Mutating and -destructive nested tools are rejected even when the server process is -running with a higher safety tier. - -**Example:** - -```json -{ - "tool": "call_readonly_tools_batch", - "arguments": { - "operations": [ - {"tool": "list_sessions", "arguments": {}}, - {"tool": "get_server_info", "arguments": {}} - ], - "on_error": "stop" - } -} -``` - -```{fastmcp-tool-input} batch_tools.call_readonly_tools_batch -``` diff --git a/docs/tools/batch/call-tools-batch.md b/docs/tools/batch/call-tools-batch.md new file mode 100644 index 0000000..fc33c4f --- /dev/null +++ b/docs/tools/batch/call-tools-batch.md @@ -0,0 +1,39 @@ +# Call tools batch + +```{fastmcp-tool} batch_tools.call_tools_batch +``` + +**Use when** you need an ordered workflow made from existing typed MCP +tools, such as renaming and splitting a known window, while preserving +each tool's own schema and safety checks. + +**Avoid when** the steps are tmux pane or window operations; prefer the +typed {tooliconl}`run-tmux-plan` tool. For shell commands with completion +and output, prefer {tooliconl}`run-command`. + +**Safety:** Each nested call still runs through the server's safety tier, +so the batch can never run a nested tool the tier hides. Set `max_tier` to +cap the batch below the server tier: `readonly` refuses any mutating or +destructive nested call, and `mutating` refuses destructive ones. The +default permits every tier the server already allows. + +**Example:** + +```json +{ + "tool": "call_tools_batch", + "arguments": { + "operations": [ + {"tool": "rename_window", + "arguments": {"window_id": "@2", "new_name": "logs"}}, + {"tool": "split_window", + "arguments": {"window_id": "@2", "direction": "right"}} + ], + "max_tier": "mutating", + "on_error": "stop" + } +} +``` + +```{fastmcp-tool-input} batch_tools.call_tools_batch +``` diff --git a/docs/tools/batch/index.md b/docs/tools/batch/index.md index be5684c..36e33bb 100644 --- a/docs/tools/batch/index.md +++ b/docs/tools/batch/index.md @@ -7,16 +7,8 @@ including `socket_name` when needed. ::::{grid} 1 1 2 3 :gutter: 2 2 3 3 -:::{grid-item-card} {tooliconl}`call-readonly-tools-batch` -Call readonly tools in order. -::: - -:::{grid-item-card} {tooliconl}`call-mutating-tools-batch` -Call readonly or mutating tools in order. -::: - -:::{grid-item-card} {tooliconl}`call-destructive-tools-batch` -Call readonly, mutating, or destructive tools in order. +:::{grid-item-card} {tooliconl}`call-tools-batch` +Call existing MCP tools in order, with an optional safety-tier cap. ::: :::: @@ -25,7 +17,5 @@ Call readonly, mutating, or destructive tools in order. :hidden: :maxdepth: 1 -call-readonly-tools-batch -call-mutating-tools-batch -call-destructive-tools-batch +call-tools-batch ``` diff --git a/docs/tools/chain/run-tmux-plan.md b/docs/tools/chain/run-tmux-plan.md index 502c2de..89e4ed6 100644 --- a/docs/tools/chain/run-tmux-plan.md +++ b/docs/tools/chain/run-tmux-plan.md @@ -7,7 +7,7 @@ one persistent tmux control connection, with a typed result per step. **Avoid when** you need to call arbitrary MCP tools; use -{tooliconl}`call-mutating-tools-batch` for that. Use individual tools +{tooliconl}`call-tools-batch` for that. Use individual tools when a workflow has only one step. **Execution:** Each operation is dispatched on its own over a persistent diff --git a/docs/tools/index.md b/docs/tools/index.md index 9bc33b1..828619f 100644 --- a/docs/tools/index.md +++ b/docs/tools/index.md @@ -56,9 +56,7 @@ leave socket selection inside each nested tool's arguments. See **Batching typed tool calls?** - Native tmux operation chains → {tool}`run-tmux-plan` -- Read-only observations → {tool}`call-readonly-tools-batch` -- Ordered readonly + mutating workflows → {tool}`call-mutating-tools-batch` -- Reviewed workflows that include destructive steps → {tool}`call-destructive-tools-batch` +- Ordered calls to existing MCP tools → {tool}`call-tools-batch` **Staging multi-line input?** - Stage content → {tool}`load-buffer` @@ -160,10 +158,10 @@ Wait for text to appear in a pane. Get tmux server info. ::: -:::{grid-item-card} call_readonly_tools_batch -:link: call-readonly-tools-batch +:::{grid-item-card} call_tools_batch +:link: call-tools-batch :link-type: ref -Call typed readonly tools in order. +Call existing MCP tools in order, with an optional safety-tier cap. ::: :::{grid-item-card} list_servers @@ -265,12 +263,6 @@ Send several ordered raw-input operations. Run a shell command and report exit status. ::: -:::{grid-item-card} call_mutating_tools_batch -:link: call-mutating-tools-batch -:link-type: ref -Call typed readonly or mutating tools in order. -::: - :::{grid-item-card} run_tmux_plan :link: run-tmux-plan :link-type: ref @@ -442,12 +434,6 @@ Destroy a pane. Kill the entire tmux server. ::: -:::{grid-item-card} call_destructive_tools_batch -:link: call-destructive-tools-batch -:link-type: ref -Call typed tools including destructive steps. -::: - :::{grid-item-card} delete_buffer :link: delete-buffer :link-type: ref diff --git a/docs/topics/architecture.md b/docs/topics/architecture.md index a630b00..f371ed5 100644 --- a/docs/topics/architecture.md +++ b/docs/topics/architecture.md @@ -15,7 +15,7 @@ src/libtmux_mcp/ models.py # Pydantic output models middleware.py # Safety, audit, retry, and error-result middleware tools/ - batch_tools.py # call_readonly_tools_batch, call_mutating_tools_batch, call_destructive_tools_batch + batch_tools.py # call_tools_batch server_tools.py # list_servers, list_sessions, create_session, kill_server, get_server_info session_tools.py # list_windows, create_window, rename_session, kill_session window_tools.py # list_panes, split_window, rename_window, kill_window, select_layout, resize_window diff --git a/src/libtmux_mcp/models.py b/src/libtmux_mcp/models.py index 7f96fa9..31bd64c 100644 --- a/src/libtmux_mcp/models.py +++ b/src/libtmux_mcp/models.py @@ -739,6 +739,13 @@ class TmuxSendKeysOperation(BaseModel): default=False, description="Pass -l so tmux sends keys literally.", ) + suppress_history: bool = Field( + default=False, + description=( + "Prepend a space so the shell ignores the command in history, " + "where the shell honors space-prefixed commands." + ), + ) class ResizePaneOperation(BaseModel): diff --git a/src/libtmux_mcp/tools/batch_tools.py b/src/libtmux_mcp/tools/batch_tools.py index 65d760b..e299897 100644 --- a/src/libtmux_mcp/tools/batch_tools.py +++ b/src/libtmux_mcp/tools/batch_tools.py @@ -11,7 +11,6 @@ from pydantic import BaseModel from libtmux_mcp._utils import ( - ANNOTATIONS_RO, TAG_DESTRUCTIVE, TAG_MUTATING, TAG_READONLY, @@ -36,13 +35,7 @@ TAG_DESTRUCTIVE: 2, } -_BATCH_TOOL_NAMES: frozenset[str] = frozenset( - { - "call_readonly_tools_batch", - "call_mutating_tools_batch", - "call_destructive_tools_batch", - } -) +_BATCH_TOOL_NAMES: frozenset[str] = frozenset({"call_tools_batch"}) MAX_BATCH_OPERATIONS = 1_000 @@ -285,81 +278,40 @@ async def _call_tools_batch( @handle_tool_errors_async -async def call_readonly_tools_batch( +async def call_tools_batch( operations: list[ToolCallOperation], on_error: _OnError = "stop", + max_tier: t.Literal["readonly", "mutating", "destructive"] | None = None, ctx: Context | None = None, ) -> ToolCallBatchResult: - """Call readonly MCP tools serially and return per-tool results. - - Use when several read-only observations should be made in one agent - turn. Each nested call still goes through FastMCP validation, - middleware, and safety checks. Mutating and destructive tools are - rejected even if the server process itself is running at a higher - safety tier. + """Call existing MCP tools serially and return per-tool results. + + Use for ordered tmux workflows where every step is an existing typed MCP + tool. Each nested call still goes through FastMCP validation, middleware, + and the server's safety tier, so the batch can never run a nested tool the + server tier hides. ``max_tier`` optionally caps the batch below the server + tier: ``"readonly"`` refuses any mutating or destructive nested call and + ``"mutating"`` refuses destructive ones. The default permits every tier the + server already allows. Prefer the typed run_tmux_plan tool for tmux + operations; reach for this batch only to drive arbitrary registered tools. """ return await _call_tools_batch( operations=operations, on_error=on_error, - max_tier=TAG_READONLY, + max_tier=max_tier if max_tier is not None else TAG_DESTRUCTIVE, ctx=ctx, ) -@handle_tool_errors_async -async def call_mutating_tools_batch( - operations: list[ToolCallOperation], - on_error: _OnError = "stop", - ctx: Context | None = None, -) -> ToolCallBatchResult: - """Call readonly or mutating MCP tools serially and return per-tool results. - - Use for ordered tmux workflows where every step is still an existing - typed MCP tool. Destructive tools are rejected regardless of the - process-wide safety tier. - """ - return await _call_tools_batch( - operations=operations, - on_error=on_error, - max_tier=TAG_MUTATING, - ctx=ctx, - ) - - -@handle_tool_errors_async -async def call_destructive_tools_batch( - operations: list[ToolCallOperation], - on_error: _OnError = "stop", - ctx: Context | None = None, -) -> ToolCallBatchResult: - """Call readonly, mutating, or destructive MCP tools serially. +def register(mcp: FastMCP) -> None: + """Register the generic MCP batch tool. - This wrapper preserves the normal per-tool schemas and middleware - but its tier permits destructive nested operations. Prefer the - narrower readonly or mutating wrappers whenever possible. + Tagged ``readonly`` so it stays callable at every safety tier; each nested + call is still re-checked against the server tier by the safety middleware, + so visibility never widens what the batch can actually run. """ - return await _call_tools_batch( - operations=operations, - on_error=on_error, - max_tier=TAG_DESTRUCTIVE, - ctx=ctx, - ) - - -def register(mcp: FastMCP) -> None: - """Register generic MCP batch tools.""" mcp.tool( - title="Call Readonly Tools Batch", - annotations=ANNOTATIONS_RO, - tags={TAG_READONLY}, - )(call_readonly_tools_batch) - mcp.tool( - title="Call Mutating Tools Batch", - annotations=_ANNOTATIONS_BATCH_SIDE_EFFECTS, - tags={TAG_MUTATING}, - )(call_mutating_tools_batch) - mcp.tool( - title="Call Destructive Tools Batch", + title="Call Tools Batch", annotations=_ANNOTATIONS_BATCH_SIDE_EFFECTS, - tags={TAG_DESTRUCTIVE}, - )(call_destructive_tools_batch) + tags={TAG_READONLY}, + )(call_tools_batch) diff --git a/src/libtmux_mcp/tools/chain_tools.py b/src/libtmux_mcp/tools/chain_tools.py index 98f7e1f..c043911 100644 --- a/src/libtmux_mcp/tools/chain_tools.py +++ b/src/libtmux_mcp/tools/chain_tools.py @@ -180,15 +180,16 @@ def _send_keys_calls( ) -> tuple[CommandCall, ...]: """Build one operation's ``send-keys`` calls.""" target = _resolve_target(operation.target, created_panes) + keys = (" " if operation.suppress_history else "") + operation.keys if operation.literal: calls = [ - CommandCall("send-keys", ("-l", operation.keys), target=target), + CommandCall("send-keys", ("-l", keys), target=target), ] if operation.enter: calls.append(CommandCall("send-keys", ("Enter",), target=target)) return tuple(calls) - args: list[str] = [operation.keys] + args: list[str] = [keys] if operation.enter: args.append("Enter") return (CommandCall("send-keys", tuple(args), target=target),) diff --git a/src/libtmux_mcp/tools/server_tools.py b/src/libtmux_mcp/tools/server_tools.py index 9b0b9a2..ed1f1f9 100644 --- a/src/libtmux_mcp/tools/server_tools.py +++ b/src/libtmux_mcp/tools/server_tools.py @@ -272,9 +272,7 @@ def _probe_server_by_path(socket_path: pathlib.Path) -> ServerInfo | None: #: ``_BASE_INSTRUCTIONS`` so the two stay in lockstep. SOCKET_NAME_EXEMPT: frozenset[str] = frozenset( { - "call_destructive_tools_batch", - "call_mutating_tools_batch", - "call_readonly_tools_batch", + "call_tools_batch", "list_servers", } ) diff --git a/tests/test_batch_tools.py b/tests/test_batch_tools.py index 96d3115..94b4d2a 100644 --- a/tests/test_batch_tools.py +++ b/tests/test_batch_tools.py @@ -51,37 +51,6 @@ class BatchOperationLimitFixture(t.NamedTuple): ] -class BatchAnnotationFixture(t.NamedTuple): - """Test fixture for generic batch wrapper annotations.""" - - test_id: str - tool_name: str - read_only_hint: bool - destructive_hint: bool - idempotent_hint: bool - open_world_hint: bool - - -BATCH_ANNOTATION_FIXTURES: list[BatchAnnotationFixture] = [ - BatchAnnotationFixture( - test_id="mutating_batch_warns_destructive_open_world", - tool_name="call_mutating_tools_batch", - read_only_hint=False, - destructive_hint=True, - idempotent_hint=False, - open_world_hint=True, - ), - BatchAnnotationFixture( - test_id="destructive_batch_warns_destructive_open_world", - tool_name="call_destructive_tools_batch", - read_only_hint=False, - destructive_hint=True, - idempotent_hint=False, - open_world_hint=True, - ), -] - - def _content_block_to_wire(block: t.Any) -> dict[str, t.Any]: if hasattr(block, "model_dump"): dumped = block.model_dump(mode="json", by_alias=True, exclude_none=True) @@ -98,8 +67,8 @@ def _call_tool_result_wire(result: t.Any) -> dict[str, t.Any]: } -def _batch_probe_server() -> FastMCP: - """Build a small FastMCP server with batch tools and tiered probes.""" +def _batch_probe_server(server_tier: str = TAG_DESTRUCTIVE) -> FastMCP: + """Build a small FastMCP server with the batch tool and tiered probes.""" from fastmcp import FastMCP from libtmux_mcp.middleware import SafetyMiddleware, ToolErrorResultMiddleware @@ -109,7 +78,7 @@ def _batch_probe_server() -> FastMCP: name="batch-probe", middleware=[ ToolErrorResultMiddleware(transform_errors=True), - SafetyMiddleware(max_tier=TAG_DESTRUCTIVE), + SafetyMiddleware(max_tier=server_tier), ], ) register_batch_tools(mcp) @@ -137,14 +106,14 @@ def destructive_probe(value: str) -> dict[str, str]: return mcp -def test_call_readonly_tools_batch_preserves_structured_results() -> None: - """The readonly batch wrapper returns per-tool structured content.""" +def test_call_tools_batch_preserves_structured_results() -> None: + """The batch tool returns per-tool structured content.""" from fastmcp import Client async def _call() -> t.Any: async with Client(_batch_probe_server()) as client: return await client.call_tool( - "call_readonly_tools_batch", + "call_tools_batch", { "operations": [ { @@ -196,7 +165,7 @@ async def _call() -> t.Any: BATCH_RESPONSE_LIMIT_FIXTURES, ids=[fixture.test_id for fixture in BATCH_RESPONSE_LIMIT_FIXTURES], ) -def test_call_readonly_tools_batch_caps_aggregate_response( +def test_call_tools_batch_caps_aggregate_response( test_id: str, payload_size: int, ) -> None: @@ -211,7 +180,7 @@ def test_call_readonly_tools_batch_caps_aggregate_response( async def _call() -> t.Any: async with Client(_batch_probe_server()) as client: return await client.call_tool( - "call_readonly_tools_batch", + "call_tools_batch", { "operations": [ { @@ -271,11 +240,11 @@ async def _call() -> t.Any: BATCH_OPERATION_LIMIT_FIXTURES, ids=[fixture.test_id for fixture in BATCH_OPERATION_LIMIT_FIXTURES], ) -def test_call_readonly_tools_batch_rejects_oversized_operation_count( +def test_call_tools_batch_rejects_oversized_operation_count( test_id: str, operation_count: int, ) -> None: - """The batch wrapper rejects requests whose rows alone can exceed the cap.""" + """The batch tool rejects requests whose rows alone can exceed the cap.""" from fastmcp import Client from libtmux_mcp.middleware import DEFAULT_RESPONSE_LIMIT_BYTES @@ -285,7 +254,7 @@ def test_call_readonly_tools_batch_rejects_oversized_operation_count( async def _call() -> t.Any: async with Client(_batch_probe_server()) as client: return await client.call_tool( - "call_readonly_tools_batch", + "call_tools_batch", { "operations": [ { @@ -312,15 +281,16 @@ async def _call() -> t.Any: assert "operations must contain at most" in serialized -def test_call_readonly_tools_batch_rejects_mutating_inner_tool() -> None: - """Readonly batching does not tunnel a mutating tool call.""" +def test_call_tools_batch_max_tier_readonly_rejects_mutating_inner_tool() -> None: + """max_tier="readonly" refuses a mutating nested tool below the server tier.""" from fastmcp import Client async def _call() -> t.Any: async with Client(_batch_probe_server()) as client: return await client.call_tool( - "call_readonly_tools_batch", + "call_tools_batch", { + "max_tier": "readonly", "operations": [ { "tool": "mutating_probe", @@ -342,15 +312,16 @@ async def _call() -> t.Any: assert "exceeds batch tier readonly" in operation["error"] -def test_call_mutating_tools_batch_rejects_destructive_inner_tool() -> None: - """Mutating batching does not tunnel a destructive tool call.""" +def test_call_tools_batch_max_tier_mutating_rejects_destructive_inner_tool() -> None: + """max_tier="mutating" refuses a destructive nested tool below the server tier.""" from fastmcp import Client async def _call() -> t.Any: async with Client(_batch_probe_server()) as client: return await client.call_tool( - "call_mutating_tools_batch", + "call_tools_batch", { + "max_tier": "mutating", "operations": [ { "tool": "destructive_probe", @@ -369,14 +340,45 @@ async def _call() -> t.Any: assert "exceeds batch tier mutating" in operation["error"] -def test_call_mutating_tools_batch_continues_after_error() -> None: +def test_call_tools_batch_bounded_by_server_tier() -> None: + """A readonly-tier server blocks a mutating nested tool even with no max_tier. + + The batch tool is registered readonly so it stays callable at every tier, + but each nested call re-runs the safety middleware, so a readonly server + still refuses a mutating nested tool the batch did not cap itself. + """ + from fastmcp import Client + + async def _call() -> t.Any: + async with Client(_batch_probe_server(server_tier=TAG_READONLY)) as client: + return await client.call_tool( + "call_tools_batch", + { + "operations": [ + { + "tool": "mutating_probe", + "arguments": {"value": "changed"}, + } + ], + }, + raise_on_error=False, + ) + + result = asyncio.run(_call()) + + assert result.is_error is False + [operation] = result.structured_content["results"] + assert operation["success"] is False + + +def test_call_tools_batch_continues_after_error() -> None: """Continue mode attempts later operations after a failed tool call.""" from fastmcp import Client async def _call() -> t.Any: async with Client(_batch_probe_server()) as client: return await client.call_tool( - "call_mutating_tools_batch", + "call_tools_batch", { "on_error": "continue", "operations": [ @@ -406,17 +408,17 @@ async def _call() -> t.Any: def test_call_tools_batch_rejects_self_invocation() -> None: - """Batch wrappers cannot recursively call batch wrappers.""" + """The batch tool cannot recursively call the batch tool.""" from fastmcp import Client async def _call() -> t.Any: async with Client(_batch_probe_server()) as client: return await client.call_tool( - "call_destructive_tools_batch", + "call_tools_batch", { "operations": [ { - "tool": "call_destructive_tools_batch", + "tool": "call_tools_batch", "arguments": {"operations": []}, } ], @@ -432,26 +434,14 @@ async def _call() -> t.Any: assert "cannot call batch tools recursively" in operation["error"] -@pytest.mark.parametrize( - BatchAnnotationFixture._fields, - BATCH_ANNOTATION_FIXTURES, - ids=[fixture.test_id for fixture in BATCH_ANNOTATION_FIXTURES], -) -def test_batch_wrappers_advertise_worst_case_annotations( - test_id: str, - tool_name: str, - read_only_hint: bool, - destructive_hint: bool, - idempotent_hint: bool, - open_world_hint: bool, -) -> None: - """Batch wrappers advertise the strongest hint from their allowed tools.""" +def test_call_tools_batch_advertises_worst_case_annotations() -> None: + """The batch tool advertises possible side effects.""" mcp = _batch_probe_server() - tool = asyncio.run(mcp.get_tool(tool_name)) - assert tool is not None, f"{tool_name} should be registered" - assert tool.annotations is not None, f"{tool_name} should carry annotations" - assert tool.annotations.readOnlyHint is read_only_hint - assert tool.annotations.destructiveHint is destructive_hint - assert tool.annotations.idempotentHint is idempotent_hint - assert tool.annotations.openWorldHint is open_world_hint + tool = asyncio.run(mcp.get_tool("call_tools_batch")) + assert tool is not None + assert tool.annotations is not None + assert tool.annotations.readOnlyHint is False + assert tool.annotations.destructiveHint is True + assert tool.annotations.idempotentHint is False + assert tool.annotations.openWorldHint is True diff --git a/tests/test_chain_tools.py b/tests/test_chain_tools.py index a0f4122..00c1db0 100644 --- a/tests/test_chain_tools.py +++ b/tests/test_chain_tools.py @@ -212,6 +212,30 @@ def test_run_tmux_plan_make_grid( assert len(mcp_pane.window.panes) == 4 +def test_run_tmux_plan_send_keys_suppress_history( + mcp_session: Session, +) -> None: + """suppress_history space-prefixes the sent keys.""" + result = asyncio.run( + run_tmux_plan( + operations=[ + TmuxSendKeysOperation( + target=PaneIdTarget(pane_id="%999999"), + keys="secret", + enter=False, + suppress_history=True, + ), + ], + on_error="continue", + explain=True, + socket_name=mcp_session.server.socket_name, + ), + ) + + assert result.diagnostics is not None + assert " secret" in result.diagnostics.dispatches[0].argv + + def test_run_tmux_plan_kill_pane_requires_destructive_tier( mcp_session: Session, monkeypatch: pytest.MonkeyPatch,