From bb76f6630b67f5cfac6ad18430070d93259cc02d Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 20:01:17 +0000 Subject: [PATCH 1/6] feat(tool): add scripted tool orchestration (OrchestratorTool) Compose multiple CallableTools into a single Tool that accepts bash scripts. Each CallableTool becomes a builtin command, so an LLM can orchestrate N tools in one call using pipes, variables, loops, and jq. - CallableTool trait: sync sub-tool interface (name, description, call) - OrchestratorTool: implements Tool, creates fresh Bash per execute() - Arc-based sharing: tools survive across multiple execute() calls - OrchestratorToolBuilder: fluent API for configuration - Auto-generated system_prompt/help with tool command docs - 19 unit tests + doctests covering pipelines, loops, error handling - Example: e-commerce API demo (get_user, list_orders, get_inventory) - Spec: specs/014-scripted-tool-orchestration.md https://claude.ai/code/session_01UcTwLqLrhbCFAuL26Fcm5W --- AGENTS.md | 1 + .../examples/scripted_tool_orchestration.rs | 256 ++++++ crates/bashkit/src/lib.rs | 3 + crates/bashkit/src/scripted_tool.rs | 837 ++++++++++++++++++ specs/014-scripted-tool-orchestration.md | 108 +++ 5 files changed, 1205 insertions(+) create mode 100644 crates/bashkit/examples/scripted_tool_orchestration.rs create mode 100644 crates/bashkit/src/scripted_tool.rs create mode 100644 specs/014-scripted-tool-orchestration.md diff --git a/AGENTS.md b/AGENTS.md index 6a5480f6..8f28e785 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -40,6 +40,7 @@ Fix root cause. Unsure: read more code; if stuck, ask w/ short options. Unrecogn | 012-eval | LLM evaluation harness, dataset format, scoring | | 012-maintenance | Pre-release maintenance checklist | | 013-python-package | Python bindings, PyPI wheels, platform matrix | +| 014-scripted-tool-orchestration | Compose CallableTools into OrchestratorTool via bash scripts | ### Documentation diff --git a/crates/bashkit/examples/scripted_tool_orchestration.rs b/crates/bashkit/examples/scripted_tool_orchestration.rs new file mode 100644 index 00000000..ab55efbe --- /dev/null +++ b/crates/bashkit/examples/scripted_tool_orchestration.rs @@ -0,0 +1,256 @@ +//! Scripted Tool Orchestration Example +//! +//! Demonstrates composing multiple API-like tools into a single OrchestratorTool +//! that an LLM agent can call with bash scripts. +//! +//! Run with: cargo run --example scripted_tool_orchestration +//! +//! This example simulates an e-commerce API with tools for users, orders, and +//! inventory. The OrchestratorTool lets an agent compose these in one call. + +use bashkit::{CallableTool, OrchestratorTool, Tool, ToolRequest}; + +// ============================================================================ +// Mock API tools (in real use, these would call actual HTTP endpoints) +// ============================================================================ + +struct GetUser; + +impl CallableTool for GetUser { + fn name(&self) -> &str { + "get_user" + } + fn description(&self) -> &str { + "Fetch user by ID. Usage: get_user " + } + fn call(&self, args: &[String], _stdin: Option<&str>) -> Result { + let id: u64 = args + .first() + .and_then(|s| s.parse().ok()) + .ok_or("usage: get_user ")?; + + let users = [ + (1, "Alice", "alice@example.com", "premium"), + (2, "Bob", "bob@example.com", "basic"), + (3, "Charlie", "charlie@example.com", "premium"), + ]; + + match users.iter().find(|(uid, ..)| *uid == id) { + Some((uid, name, email, tier)) => Ok(format!( + "{{\"id\":{uid},\"name\":\"{name}\",\"email\":\"{email}\",\"tier\":\"{tier}\"}}\n" + )), + None => Err(format!("user {} not found", id)), + } + } +} + +struct ListOrders; + +impl CallableTool for ListOrders { + fn name(&self) -> &str { + "list_orders" + } + fn description(&self) -> &str { + "List orders for a user. Usage: list_orders " + } + fn call(&self, args: &[String], _stdin: Option<&str>) -> Result { + let uid: u64 = args + .first() + .and_then(|s| s.parse().ok()) + .ok_or("usage: list_orders ")?; + + let orders = match uid { + 1 => { + r#"[{"order_id":101,"item":"Laptop","qty":1,"price":999.99},{"order_id":102,"item":"Mouse","qty":2,"price":29.99}]"# + } + 2 => r#"[{"order_id":201,"item":"Keyboard","qty":1,"price":79.99}]"#, + 3 => r#"[]"#, + _ => return Err(format!("no orders for user {}", uid)), + }; + + Ok(format!("{orders}\n")) + } +} + +struct GetInventory; + +impl CallableTool for GetInventory { + fn name(&self) -> &str { + "get_inventory" + } + fn description(&self) -> &str { + "Check inventory for an item. Usage: get_inventory " + } + fn call(&self, args: &[String], _stdin: Option<&str>) -> Result { + let item = args.first().ok_or("usage: get_inventory ")?; + + let stock = match item.to_lowercase().as_str() { + "laptop" => 15, + "mouse" => 142, + "keyboard" => 67, + _ => 0, + }; + + Ok(format!( + "{{\"item\":\"{}\",\"in_stock\":{}}}\n", + item, stock + )) + } +} + +struct CreateDiscount; + +impl CallableTool for CreateDiscount { + fn name(&self) -> &str { + "create_discount" + } + fn description(&self) -> &str { + "Create a discount code. Usage: create_discount " + } + fn call(&self, args: &[String], _stdin: Option<&str>) -> Result { + let uid = args + .first() + .ok_or("usage: create_discount ")?; + let pct = args + .get(1) + .ok_or("usage: create_discount ")?; + Ok(format!( + "{{\"code\":\"SAVE{pct}-U{uid}\",\"percent\":{pct},\"user_id\":{uid}}}\n" + )) + } +} + +// ============================================================================ +// Demo: show how OrchestratorTool works +// ============================================================================ + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + println!("=== Scripted Tool Orchestration Demo ===\n"); + + // Build the orchestrator with all our API tools + let mut tool = OrchestratorTool::builder("ecommerce_api") + .short_description("E-commerce API orchestrator with user, order, and inventory tools") + .tool(Box::new(GetUser)) + .tool(Box::new(ListOrders)) + .tool(Box::new(GetInventory)) + .tool(Box::new(CreateDiscount)) + .env("STORE_NAME", "Bashkit Shop") + .build(); + + // ---- Show what the LLM sees ---- + println!("--- Tool name ---"); + println!("{}\n", tool.name()); + + println!("--- System prompt (what goes in LLM system message) ---"); + println!("{}", tool.system_prompt()); + + // ---- Demo 1: Simple single tool call ---- + println!("--- Demo 1: Single tool call ---"); + let resp = tool + .execute(ToolRequest { + commands: "get_user 1".to_string(), + }) + .await; + println!("$ get_user 1"); + println!("{}", resp.stdout); + + // ---- Demo 2: Pipeline with jq ---- + println!("--- Demo 2: Pipeline with jq ---"); + let resp = tool + .execute(ToolRequest { + commands: "get_user 1 | jq -r '.name'".to_string(), + }) + .await; + println!("$ get_user 1 | jq -r '.name'"); + println!("{}", resp.stdout); + + // ---- Demo 3: Multi-step orchestration ---- + println!("--- Demo 3: Multi-step orchestration ---"); + let script = r#" + user=$(get_user 1) + name=$(echo "$user" | jq -r '.name') + tier=$(echo "$user" | jq -r '.tier') + orders=$(list_orders 1) + total=$(echo "$orders" | jq '[.[].price] | add') + count=$(echo "$orders" | jq 'length') + echo "Customer: $name (tier: $tier)" + echo "Orders: $count, Estimated total: $total" + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + println!("$ "); + print!("{}", resp.stdout); + println!(); + + // ---- Demo 4: Loop + conditional ---- + println!("--- Demo 4: Loop with conditional ---"); + let script = r#" + for uid in 1 2 3; do + user=$(get_user $uid) + name=$(echo "$user" | jq -r '.name') + tier=$(echo "$user" | jq -r '.tier') + if [ "$tier" = "premium" ]; then + echo "$name is premium - creating discount" + create_discount $uid 20 | jq -r '.code' + else + echo "$name is $tier - no discount" + fi + done + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + println!("$ "); + print!("{}", resp.stdout); + println!(); + + // ---- Demo 5: Inventory check with error handling ---- + println!("--- Demo 5: Error handling ---"); + let script = r#" + for item in Laptop Mouse Keyboard Widget; do + result=$(get_inventory "$item") + stock=$(echo "$result" | jq '.in_stock') + if [ "$stock" -eq 0 ]; then + echo "$item: OUT OF STOCK" + else + echo "$item: $stock in stock" + fi + done + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + println!("$ "); + print!("{}", resp.stdout); + println!(); + + // ---- Demo 6: Data aggregation ---- + println!("--- Demo 6: Aggregate data across tools ---"); + let script = r#" + echo "=== $STORE_NAME Report ===" + for uid in 1 2; do + name=$(get_user $uid | jq -r '.name') + orders=$(list_orders $uid) + count=$(echo "$orders" | jq 'length') + echo "$name: $count orders" + done + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + println!("$ "); + print!("{}", resp.stdout); + + println!("\n=== Demo Complete ==="); + Ok(()) +} diff --git a/crates/bashkit/src/lib.rs b/crates/bashkit/src/lib.rs index f4f52119..59da5cd1 100644 --- a/crates/bashkit/src/lib.rs +++ b/crates/bashkit/src/lib.rs @@ -372,6 +372,8 @@ mod logging_impl; mod network; /// Parser module - exposed for fuzzing and testing pub mod parser; +/// Scripted tool orchestration (compose multiple tools via bash scripts) +pub mod scripted_tool; /// Tool contract for LLM integration pub mod tool; @@ -386,6 +388,7 @@ pub use git::GitConfig; pub use interpreter::{ControlFlow, ExecResult}; pub use limits::{ExecutionCounters, ExecutionLimits, LimitExceeded}; pub use network::NetworkAllowlist; +pub use scripted_tool::{CallableTool, OrchestratorTool, OrchestratorToolBuilder}; pub use tool::{BashTool, BashToolBuilder, Tool, ToolRequest, ToolResponse, ToolStatus, VERSION}; #[cfg(feature = "http_client")] diff --git a/crates/bashkit/src/scripted_tool.rs b/crates/bashkit/src/scripted_tool.rs new file mode 100644 index 00000000..c8f6a203 --- /dev/null +++ b/crates/bashkit/src/scripted_tool.rs @@ -0,0 +1,837 @@ +//! Scripted tool orchestration +//! +//! Compose multiple [`CallableTool`]s into a single [`Tool`] that accepts bash +//! scripts. Each `CallableTool` becomes a builtin command inside the interpreter, +//! so an LLM can orchestrate many tools in one call using pipes, variables, loops, +//! and conditionals. +//! +//! # Architecture +//! +//! ```text +//! ┌─────────────────────────────────────────┐ +//! │ OrchestratorTool (implements Tool) │ +//! │ │ +//! │ ┌─────────┐ ┌─────────┐ ┌──────────┐ │ +//! │ │get_user │ │get_order│ │inventory │ │ +//! │ │(builtin)│ │(builtin)│ │(builtin) │ │ +//! │ └─────────┘ └─────────┘ └──────────┘ │ +//! │ ↑ ↑ ↑ │ +//! │ bash script: pipes, vars, jq, loops │ +//! └─────────────────────────────────────────┘ +//! ``` +//! +//! # Example +//! +//! ```rust +//! use bashkit::{ +//! OrchestratorTool, CallableTool, Tool, ToolRequest, +//! }; +//! +//! struct GetUser; +//! +//! impl CallableTool for GetUser { +//! fn name(&self) -> &str { "get_user" } +//! fn description(&self) -> &str { "Fetch user by id. Usage: get_user " } +//! fn call(&self, args: &[String], _stdin: Option<&str>) -> Result { +//! let id = args.first().ok_or("missing id")?; +//! Ok(format!("{{\"id\":{},\"name\":\"Alice\"}}\n", id)) +//! } +//! } +//! +//! # tokio_test::block_on(async { +//! let mut tool = OrchestratorTool::builder("api") +//! .tool(Box::new(GetUser)) +//! .build(); +//! +//! let resp = tool.execute(ToolRequest { +//! commands: "get_user 42 | jq '.name'".to_string(), +//! }).await; +//! +//! assert_eq!(resp.stdout.trim(), "\"Alice\""); +//! # }); +//! ``` + +use crate::builtins::{Builtin, Context}; +use crate::error::Result; +use crate::interpreter::ExecResult; +use crate::tool::{Tool, ToolRequest, ToolResponse, ToolStatus, VERSION}; +use crate::{Bash, ExecutionLimits}; +use async_trait::async_trait; +use schemars::schema_for; +use std::sync::Arc; + +// ============================================================================ +// CallableTool — sub-tool trait +// ============================================================================ + +/// A callable sub-tool that can be exposed as a bash builtin. +/// +/// Implement this for each external operation (API call, DB query, etc.) +/// you want to compose via bash scripts. +/// +/// # Sync design +/// +/// `call` is intentionally synchronous. Most tool calls are fast (mock data, +/// HTTP via blocking client, cache lookups). If you need async, use +/// `tokio::runtime::Handle::current().block_on()` or similar inside `call`. +pub trait CallableTool: Send + Sync { + /// Command name used as the builtin (e.g. `"get_user"`). + fn name(&self) -> &str; + + /// One-line description for LLM consumption. + fn description(&self) -> &str; + + /// Execute the tool. + /// + /// - `args`: command arguments (not including the command name itself). + /// For `get_user --id 5`, args is `["--id", "5"]`. + /// - `stdin`: pipeline input from the previous command, if any. + /// + /// Return `Ok(stdout)` on success or `Err(message)` on failure. + fn call(&self, args: &[String], stdin: Option<&str>) -> std::result::Result; +} + +// ============================================================================ +// CallableToolBuiltin — adapter from CallableTool to Builtin +// ============================================================================ + +/// Wraps an `Arc` as a [`Builtin`] so the interpreter can +/// execute it. Using Arc allows the same tool to be shared across multiple +/// Bash instances (one per `execute()` call). +struct CallableToolBuiltin { + inner: Arc, +} + +#[async_trait] +impl Builtin for CallableToolBuiltin { + async fn execute(&self, ctx: Context<'_>) -> Result { + match self.inner.call(ctx.args, ctx.stdin) { + Ok(stdout) => Ok(ExecResult::ok(stdout)), + Err(msg) => Ok(ExecResult::err(msg, 1)), + } + } + + fn llm_hint(&self) -> Option<&'static str> { + None + } +} + +// ============================================================================ +// OrchestratorToolBuilder +// ============================================================================ + +/// Builder for [`OrchestratorTool`]. +/// +/// ```rust +/// use bashkit::{OrchestratorTool, CallableTool}; +/// +/// struct Ping; +/// impl CallableTool for Ping { +/// fn name(&self) -> &str { "ping" } +/// fn description(&self) -> &str { "Ping a host" } +/// fn call(&self, args: &[String], _stdin: Option<&str>) -> Result { +/// Ok(format!("pong {}\n", args.first().unwrap_or(&String::new()))) +/// } +/// } +/// +/// let tool = OrchestratorTool::builder("net") +/// .short_description("Network tools") +/// .tool(Box::new(Ping)) +/// .build(); +/// ``` +pub struct OrchestratorToolBuilder { + name: String, + short_desc: Option, + tools: Vec>, + limits: Option, + env_vars: Vec<(String, String)>, +} + +impl OrchestratorToolBuilder { + fn new(name: impl Into) -> Self { + Self { + name: name.into(), + short_desc: None, + tools: Vec::new(), + limits: None, + env_vars: Vec::new(), + } + } + + /// One-line description for tool listings. + pub fn short_description(mut self, desc: impl Into) -> Self { + self.short_desc = Some(desc.into()); + self + } + + /// Register a sub-tool. It becomes a bash builtin with the same name. + pub fn tool(mut self, tool: Box) -> Self { + self.tools.push(tool); + self + } + + /// Set execution limits for the bash interpreter. + pub fn limits(mut self, limits: ExecutionLimits) -> Self { + self.limits = Some(limits); + self + } + + /// Add an environment variable visible inside scripts. + pub fn env(mut self, key: impl Into, value: impl Into) -> Self { + self.env_vars.push((key.into(), value.into())); + self + } + + /// Build the [`OrchestratorTool`]. + pub fn build(self) -> OrchestratorTool { + let tool_descriptions: Vec = self + .tools + .iter() + .map(|t| ToolDescription { + name: t.name().to_string(), + description: t.description().to_string(), + }) + .collect(); + + // Wrap in Arc so each execute() can clone refs into new Bash instances + let tools: Vec> = self.tools.into_iter().map(Arc::from).collect(); + + let short_desc = self + .short_desc + .unwrap_or_else(|| format!("Scripted orchestrator: {}", self.name)); + + OrchestratorTool { + name: self.name, + short_desc, + tool_descriptions, + tools, + limits: self.limits, + env_vars: self.env_vars, + } + } +} + +// ============================================================================ +// OrchestratorTool +// ============================================================================ + +/// Metadata kept for documentation generation. +struct ToolDescription { + name: String, + description: String, +} + +/// A [`Tool`] that orchestrates multiple [`CallableTool`]s via bash scripts. +/// +/// Each registered `CallableTool` becomes a bash builtin. The LLM sends a +/// bash script that can pipe, loop, branch, and compose these builtins +/// together with standard utilities like `jq`, `grep`, `sed`, etc. +/// +/// The tool is reusable — `execute()` can be called multiple times. Each call +/// gets a fresh Bash interpreter with the same set of tool-builtins. +/// +/// Create via [`OrchestratorTool::builder`]. +pub struct OrchestratorTool { + name: String, + short_desc: String, + tool_descriptions: Vec, + /// Arc-wrapped tools that are cloned into each Bash instance. + tools: Vec>, + limits: Option, + env_vars: Vec<(String, String)>, +} + +impl OrchestratorTool { + /// Create a builder with the given tool name. + pub fn builder(name: impl Into) -> OrchestratorToolBuilder { + OrchestratorToolBuilder::new(name) + } + + /// Create a fresh Bash instance with all tool-builtins registered. + fn create_bash(&self) -> Bash { + let mut builder = Bash::builder(); + + if let Some(ref limits) = self.limits { + builder = builder.limits(limits.clone()); + } + for (key, value) in &self.env_vars { + builder = builder.env(key, value); + } + // Clone Arc refs into fresh builtin adapters for this Bash instance + for tool in &self.tools { + let name = tool.name().to_string(); + let builtin: Box = Box::new(CallableToolBuiltin { + inner: Arc::clone(tool), + }); + builder = builder.builtin(name, builtin); + } + + builder.build() + } + + fn build_description(&self) -> String { + let mut desc = format!( + "Scripted tool orchestrator. Available tool-commands: {}", + self.tool_descriptions + .iter() + .map(|t| t.name.as_str()) + .collect::>() + .join(", ") + ); + desc.push_str(". Also supports standard bash builtins (echo, jq, grep, sed, awk, etc.)."); + desc + } + + fn build_help(&self) -> String { + let mut doc = format!( + "{name}(1) Tool Commands {name}(1)\n\n\ + NAME\n\ + \x20 {name} - {short_desc}\n\n\ + SYNOPSIS\n\ + \x20 {{\"commands\": \"\"}}\n\n\ + DESCRIPTION\n\ + \x20 Executes a bash script with access to tool-specific commands\n\ + \x20 and standard bash builtins. Use pipes, variables, loops, and\n\ + \x20 conditionals to orchestrate multiple tool calls in one request.\n\n\ + TOOL COMMANDS\n", + name = self.name, + short_desc = self.short_desc, + ); + + for t in &self.tool_descriptions { + doc.push_str(&format!(" {:<20} {}\n", t.name, t.description)); + } + + doc.push_str( + "\nBASH BUILTINS\n\ + \x20 echo, cat, grep, sed, awk, jq, head, tail, sort, uniq, cut, tr,\n\ + \x20 wc, printf, test, [, true, false, cd, pwd, ls, find, xargs\n\n\ + INPUT\n\ + \x20 commands Bash script to execute\n\n\ + OUTPUT\n\ + \x20 stdout Combined standard output\n\ + \x20 stderr Errors from tool commands or bash\n\ + \x20 exit_code 0 on success\n\n\ + EXAMPLES\n\ + \x20 Single tool call:\n\ + \x20 {{\"commands\": \"get_user 42\"}}\n\n\ + \x20 Pipeline:\n\ + \x20 {{\"commands\": \"get_user 42 | jq '.name'\"}}\n\n\ + \x20 Multi-step orchestration:\n\ + \x20 {{\"commands\": \"user=$(get_user 42)\\norders=$(get_orders 42)\\necho $user | jq -r '.name'\"}}\n", + ); + + doc + } + + fn build_system_prompt(&self) -> String { + let mut prompt = format!("# {}\n\n", self.name); + prompt.push_str(&format!("{}\n\n", self.short_desc)); + + prompt.push_str("Input: {\"commands\": \"\"}\n"); + prompt.push_str("Output: {stdout, stderr, exit_code}\n\n"); + + prompt.push_str("## Available tool commands\n\n"); + for t in &self.tool_descriptions { + prompt.push_str(&format!("- `{}`: {}\n", t.name, t.description)); + } + + prompt.push_str( + "\n## Tips\n\n\ + - Pipe tool output through `jq` for JSON processing\n\ + - Use variables to pass data between tool calls\n\ + - Use `set -e` to stop on first error\n\ + - Standard builtins (echo, grep, sed, awk, etc.) are available\n", + ); + + prompt + } +} + +#[async_trait] +impl Tool for OrchestratorTool { + fn name(&self) -> &str { + &self.name + } + + fn short_description(&self) -> &str { + &self.short_desc + } + + fn description(&self) -> String { + self.build_description() + } + + fn help(&self) -> String { + self.build_help() + } + + fn system_prompt(&self) -> String { + self.build_system_prompt() + } + + fn input_schema(&self) -> serde_json::Value { + let schema = schema_for!(ToolRequest); + serde_json::to_value(schema).unwrap_or_default() + } + + fn output_schema(&self) -> serde_json::Value { + let schema = schema_for!(ToolResponse); + serde_json::to_value(schema).unwrap_or_default() + } + + fn version(&self) -> &str { + VERSION + } + + async fn execute(&mut self, req: ToolRequest) -> ToolResponse { + if req.commands.is_empty() { + return ToolResponse { + stdout: String::new(), + stderr: String::new(), + exit_code: 0, + error: None, + }; + } + + let mut bash = self.create_bash(); + + match bash.exec(&req.commands).await { + Ok(result) => result.into(), + Err(e) => ToolResponse { + stdout: String::new(), + stderr: e.to_string(), + exit_code: 1, + error: Some(format!("{:?}", e)), + }, + } + } + + async fn execute_with_status( + &mut self, + req: ToolRequest, + mut status_callback: Box, + ) -> ToolResponse { + status_callback(ToolStatus::new("validate").with_percent(0.0)); + + if req.commands.is_empty() { + status_callback(ToolStatus::new("complete").with_percent(100.0)); + return ToolResponse { + stdout: String::new(), + stderr: String::new(), + exit_code: 0, + error: None, + }; + } + + status_callback(ToolStatus::new("parse").with_percent(10.0)); + let mut bash = self.create_bash(); + status_callback(ToolStatus::new("execute").with_percent(20.0)); + + let response = match bash.exec(&req.commands).await { + Ok(result) => result.into(), + Err(e) => ToolResponse { + stdout: String::new(), + stderr: e.to_string(), + exit_code: 1, + error: Some(format!("{:?}", e)), + }, + }; + + status_callback(ToolStatus::new("complete").with_percent(100.0)); + response + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + + // -- Test CallableTool implementations -- + + struct Echo; + + impl CallableTool for Echo { + fn name(&self) -> &str { + "api_echo" + } + fn description(&self) -> &str { + "Echo args back as JSON" + } + fn call( + &self, + args: &[String], + _stdin: Option<&str>, + ) -> std::result::Result { + Ok(format!( + "{{\"args\":[{}]}}\n", + args.iter() + .map(|a| format!("\"{}\"", a)) + .collect::>() + .join(",") + )) + } + } + + struct GetUser; + + impl CallableTool for GetUser { + fn name(&self) -> &str { + "get_user" + } + fn description(&self) -> &str { + "Fetch user by id. Usage: get_user " + } + fn call( + &self, + args: &[String], + _stdin: Option<&str>, + ) -> std::result::Result { + let id = args.first().ok_or("missing user id".to_string())?; + Ok(format!( + "{{\"id\":{},\"name\":\"Alice\",\"email\":\"alice@example.com\"}}\n", + id + )) + } + } + + struct GetOrders; + + impl CallableTool for GetOrders { + fn name(&self) -> &str { + "get_orders" + } + fn description(&self) -> &str { + "List orders for user. Usage: get_orders " + } + fn call( + &self, + args: &[String], + _stdin: Option<&str>, + ) -> std::result::Result { + let uid = args.first().ok_or("missing user id".to_string())?; + Ok(format!( + "[{{\"order_id\":1,\"user_id\":{uid},\"total\":29.99}},\ + {{\"order_id\":2,\"user_id\":{uid},\"total\":49.50}}]\n" + )) + } + } + + struct FailTool; + + impl CallableTool for FailTool { + fn name(&self) -> &str { + "fail_tool" + } + fn description(&self) -> &str { + "Always fails (for testing error handling)" + } + fn call( + &self, + _args: &[String], + _stdin: Option<&str>, + ) -> std::result::Result { + Err("service unavailable".to_string()) + } + } + + struct StdinTool; + + impl CallableTool for StdinTool { + fn name(&self) -> &str { + "from_stdin" + } + fn description(&self) -> &str { + "Read from stdin, uppercase it" + } + fn call( + &self, + _args: &[String], + stdin: Option<&str>, + ) -> std::result::Result { + match stdin { + Some(input) => Ok(input.to_uppercase()), + None => Err("no stdin".to_string()), + } + } + } + + fn build_test_tool() -> OrchestratorTool { + OrchestratorTool::builder("test_api") + .short_description("Test API orchestrator") + .tool(Box::new(GetUser)) + .tool(Box::new(GetOrders)) + .tool(Box::new(FailTool)) + .tool(Box::new(StdinTool)) + .build() + } + + // -- Builder tests -- + + #[test] + fn test_builder_name_and_description() { + let tool = build_test_tool(); + assert_eq!(tool.name(), "test_api"); + assert_eq!(tool.short_description(), "Test API orchestrator"); + } + + #[test] + fn test_builder_default_short_description() { + let tool = OrchestratorTool::builder("mytools") + .tool(Box::new(Echo)) + .build(); + assert_eq!(tool.short_description(), "Scripted orchestrator: mytools"); + } + + #[test] + fn test_description_lists_tools() { + let tool = build_test_tool(); + let desc = tool.description(); + assert!(desc.contains("get_user")); + assert!(desc.contains("get_orders")); + assert!(desc.contains("fail_tool")); + assert!(desc.contains("from_stdin")); + } + + #[test] + fn test_help_has_tool_commands_section() { + let tool = build_test_tool(); + let help = tool.help(); + assert!(help.contains("TOOL COMMANDS")); + assert!(help.contains("get_user")); + assert!(help.contains("Fetch user by id")); + } + + #[test] + fn test_system_prompt_lists_tools() { + let tool = build_test_tool(); + let sp = tool.system_prompt(); + assert!(sp.contains("# test_api")); + assert!(sp.contains("- `get_user`:")); + assert!(sp.contains("- `get_orders`:")); + assert!(sp.contains("jq")); + } + + #[test] + fn test_schemas() { + let tool = build_test_tool(); + let input = tool.input_schema(); + assert!(input["properties"]["commands"].is_object()); + let output = tool.output_schema(); + assert!(output["properties"]["stdout"].is_object()); + } + + #[test] + fn test_version() { + let tool = build_test_tool(); + assert_eq!(tool.version(), VERSION); + } + + // -- Execution tests -- + + #[tokio::test] + async fn test_execute_empty() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: String::new(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.is_empty()); + } + + #[tokio::test] + async fn test_execute_single_tool() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "get_user 42".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("\"name\":\"Alice\"")); + assert!(resp.stdout.contains("\"id\":42")); + } + + #[tokio::test] + async fn test_execute_pipeline_with_jq() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "get_user 42 | jq -r '.name'".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "Alice"); + } + + #[tokio::test] + async fn test_execute_multi_step() { + let mut tool = build_test_tool(); + let script = r#" + user=$(get_user 1) + name=$(echo "$user" | jq -r '.name') + orders=$(get_orders 1) + total=$(echo "$orders" | jq '[.[].total] | add') + echo "User: $name, Total: $total" + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "User: Alice, Total: 79.49"); + } + + #[tokio::test] + async fn test_execute_tool_failure() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "fail_tool".to_string(), + }) + .await; + assert_ne!(resp.exit_code, 0); + assert!(resp.stderr.contains("service unavailable")); + } + + #[tokio::test] + async fn test_execute_tool_failure_with_fallback() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "fail_tool || echo 'fallback'".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("fallback")); + } + + #[tokio::test] + async fn test_execute_stdin_pipe() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "echo hello | from_stdin".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "HELLO"); + } + + #[tokio::test] + async fn test_execute_loop_over_tools() { + let mut tool = build_test_tool(); + let script = r#" + for uid in 1 2 3; do + get_user $uid | jq -r '.name' + done + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "Alice\nAlice\nAlice"); + } + + #[tokio::test] + async fn test_execute_conditional() { + let mut tool = build_test_tool(); + let script = r#" + user=$(get_user 5) + name=$(echo "$user" | jq -r '.name') + if [ "$name" = "Alice" ]; then + echo "found alice" + else + echo "not alice" + fi + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "found alice"); + } + + #[tokio::test] + async fn test_execute_with_env() { + let mut tool = OrchestratorTool::builder("env_test") + .env("API_BASE", "https://api.example.com") + .tool(Box::new(Echo)) + .build(); + + let resp = tool + .execute(ToolRequest { + commands: "echo $API_BASE".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "https://api.example.com"); + } + + #[tokio::test] + async fn test_execute_with_status_callback() { + use std::sync::{Arc, Mutex}; + + let mut tool = build_test_tool(); + let phases = Arc::new(Mutex::new(Vec::new())); + let phases_clone = phases.clone(); + + let resp = tool + .execute_with_status( + ToolRequest { + commands: "get_user 1".to_string(), + }, + Box::new(move |status| { + phases_clone + .lock() + .expect("lock poisoned") + .push(status.phase.clone()); + }), + ) + .await; + + assert_eq!(resp.exit_code, 0); + let phases = phases.lock().expect("lock poisoned"); + assert!(phases.contains(&"validate".to_string())); + assert!(phases.contains(&"execute".to_string())); + assert!(phases.contains(&"complete".to_string())); + } + + /// Verify the tool can be called multiple times (Arc sharing works). + #[tokio::test] + async fn test_multiple_execute_calls() { + let mut tool = build_test_tool(); + + let resp1 = tool + .execute(ToolRequest { + commands: "get_user 1 | jq -r '.name'".to_string(), + }) + .await; + assert_eq!(resp1.stdout.trim(), "Alice"); + + let resp2 = tool + .execute(ToolRequest { + commands: "get_orders 1 | jq 'length'".to_string(), + }) + .await; + assert_eq!(resp2.stdout.trim(), "2"); + + let resp3 = tool + .execute(ToolRequest { + commands: "get_user 2 | jq -r '.email'".to_string(), + }) + .await; + assert_eq!(resp3.stdout.trim(), "alice@example.com"); + } +} diff --git a/specs/014-scripted-tool-orchestration.md b/specs/014-scripted-tool-orchestration.md new file mode 100644 index 00000000..c4a752ea --- /dev/null +++ b/specs/014-scripted-tool-orchestration.md @@ -0,0 +1,108 @@ +# Spec 014: Scripted Tool Orchestration + +## Summary + +Compose multiple `CallableTool`s into a single `OrchestratorTool` that accepts bash scripts. Each sub-tool becomes a builtin command, letting LLMs orchestrate N tools in one call using pipes, variables, loops, and conditionals. + +## Motivation + +When an LLM has access to many tools (get_user, list_orders, get_inventory, etc.), each tool call is a separate round-trip. A data-gathering task that needs 5 tools requires 5+ turns. With `OrchestratorTool`, the LLM writes a single bash script that calls all tools, pipes results through `jq`, and returns composed output — reducing latency and token cost. + +## Design + +### CallableTool trait + +```rust +pub trait CallableTool: Send + Sync { + fn name(&self) -> &str; + fn description(&self) -> &str; + fn call(&self, args: &[String], stdin: Option<&str>) -> Result; +} +``` + +- Sync by design. Most sub-tools are fast (HTTP stubs, cache, mock data). +- `args`: positional args after the command name. +- `stdin`: pipeline input from prior command. +- Returns stdout string on success, error message on failure. + +### CallableToolBuiltin adapter + +Internal struct wrapping `Arc`. Implements `Builtin` so the interpreter can execute it. Arc sharing ensures the same tools survive across multiple `execute()` calls. + +### OrchestratorToolBuilder + +```rust +OrchestratorTool::builder("api_name") + .short_description("...") + .tool(Box::new(GetUser)) + .tool(Box::new(ListOrders)) + .env("API_KEY", "...") + .limits(ExecutionLimits::new().max_commands(500)) + .build() +``` + +### OrchestratorTool + +Implements the `Tool` trait. On each `execute()`: + +1. Creates a fresh `Bash` instance. +2. Registers each `CallableTool` as a builtin via `Arc::clone`. +3. Runs the user-provided script. +4. Returns `ToolResponse { stdout, stderr, exit_code }`. + +Reusable — multiple `execute()` calls share the same `Arc` instances. + +### LLM integration + +`system_prompt()` generates markdown with available tool commands and tips. Example output: + +```markdown +# api_name + +Input: {"commands": ""} +Output: {stdout, stderr, exit_code} + +## Available tool commands + +- `get_user`: Fetch user by ID. Usage: get_user +- `list_orders`: List orders for user. Usage: list_orders + +## Tips + +- Pipe tool output through `jq` for JSON processing +- Use variables to pass data between tool calls +``` + +## Module location + +`crates/bashkit/src/scripted_tool.rs` + +Public exports from `lib.rs`: `CallableTool`, `OrchestratorTool`, `OrchestratorToolBuilder`. + +## Example + +`crates/bashkit/examples/scripted_tool_orchestration.rs` — e-commerce API demo with get_user, list_orders, get_inventory, create_discount. + +## Test coverage + +19 unit tests covering: +- Builder configuration (name, description, defaults) +- Introspection (help, system_prompt, schemas) +- Single tool execution +- Pipeline with jq +- Multi-step orchestration (variables, command substitution) +- Error handling and fallback (`||`) +- Stdin piping +- Loops and conditionals +- Environment variables +- Status callbacks +- Multiple sequential `execute()` calls (Arc reuse) + +## Security + +Inherits all bashkit sandbox guarantees: +- Virtual filesystem (no host access) +- Resource limits (max commands, loop iterations, function depth) +- No network access unless explicitly configured + +Sub-tool implementations control their own security boundaries. From c4bbf20827ffeccd12849b23cce4835ff1774c12 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 20:28:33 +0000 Subject: [PATCH 2/6] refactor(orchestrator): replace CallableTool trait with ToolDef+callback API MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Rename scripted_tool.rs → orchestrator/ module (mod.rs + execute.rs) - Replace CallableTool trait with ToolDef (OpenAPI-style: name, description, input_schema) + closure callback — two arguments to builder.tool() - Gate entire module behind `orchestrator` feature flag - System prompt now renders input_schema when present - Rename example to orchestrator.rs - Update spec 014 and AGENTS.md https://claude.ai/code/session_01UcTwLqLrhbCFAuL26Fcm5W --- AGENTS.md | 2 +- crates/bashkit/Cargo.toml | 7 + crates/bashkit/examples/orchestrator.rs | 233 +++++ .../examples/scripted_tool_orchestration.rs | 256 ------ crates/bashkit/src/lib.rs | 10 +- crates/bashkit/src/orchestrator/execute.rs | 242 +++++ crates/bashkit/src/orchestrator/mod.rs | 580 ++++++++++++ crates/bashkit/src/scripted_tool.rs | 837 ------------------ specs/014-scripted-tool-orchestration.md | 83 +- 9 files changed, 1129 insertions(+), 1121 deletions(-) create mode 100644 crates/bashkit/examples/orchestrator.rs delete mode 100644 crates/bashkit/examples/scripted_tool_orchestration.rs create mode 100644 crates/bashkit/src/orchestrator/execute.rs create mode 100644 crates/bashkit/src/orchestrator/mod.rs delete mode 100644 crates/bashkit/src/scripted_tool.rs diff --git a/AGENTS.md b/AGENTS.md index 8f28e785..7ec66b92 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -40,7 +40,7 @@ Fix root cause. Unsure: read more code; if stuck, ask w/ short options. Unrecogn | 012-eval | LLM evaluation harness, dataset format, scoring | | 012-maintenance | Pre-release maintenance checklist | | 013-python-package | Python bindings, PyPI wheels, platform matrix | -| 014-scripted-tool-orchestration | Compose CallableTools into OrchestratorTool via bash scripts | +| 014-scripted-tool-orchestration | Compose ToolDef+callback pairs into OrchestratorTool via bash scripts | ### Documentation diff --git a/crates/bashkit/Cargo.toml b/crates/bashkit/Cargo.toml index c16899b5..624eb9ee 100644 --- a/crates/bashkit/Cargo.toml +++ b/crates/bashkit/Cargo.toml @@ -77,6 +77,9 @@ logging = ["tracing"] # Phase 2 will add gix dependency for remote operations # Usage: cargo build --features git git = [] +# Enable OrchestratorTool: compose ToolDef+callback pairs into a single Tool +# Usage: cargo build --features orchestrator +orchestrator = [] # Enable python/python3 builtins via embedded Monty interpreter # Monty is a git dep (not yet on crates.io) — feature unavailable from registry python = ["dep:monty"] @@ -101,6 +104,10 @@ required-features = ["http_client"] name = "git_workflow" required-features = ["git"] +[[example]] +name = "orchestrator" +required-features = ["orchestrator"] + [[example]] name = "python_scripts" required-features = ["python"] diff --git a/crates/bashkit/examples/orchestrator.rs b/crates/bashkit/examples/orchestrator.rs new file mode 100644 index 00000000..43c8c1c5 --- /dev/null +++ b/crates/bashkit/examples/orchestrator.rs @@ -0,0 +1,233 @@ +//! Orchestrator Tool Example +//! +//! Demonstrates composing multiple API-like tools (ToolDef + closures) into a +//! single OrchestratorTool that an LLM agent can call with bash scripts. +//! +//! Run with: cargo run --example orchestrator --features orchestrator +//! +//! This example simulates an e-commerce API with tools for users, orders, and +//! inventory. The OrchestratorTool lets an agent compose these in one call. + +use bashkit::{OrchestratorTool, Tool, ToolDef, ToolRequest}; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + println!("=== Orchestrator Tool Demo ===\n"); + + // Build the orchestrator with tool definitions + closures + let mut tool = OrchestratorTool::builder("ecommerce_api") + .short_description("E-commerce API orchestrator with user, order, and inventory tools") + .tool( + ToolDef::new("get_user", "Fetch user by ID. Usage: get_user ") + .with_schema(serde_json::json!({ + "type": "object", + "properties": { + "id": {"type": "integer", "description": "User ID"} + }, + "required": ["id"] + })), + |args, _stdin| { + let id: u64 = args + .first() + .and_then(|s| s.parse().ok()) + .ok_or("usage: get_user ")?; + + let users = [ + (1, "Alice", "alice@example.com", "premium"), + (2, "Bob", "bob@example.com", "basic"), + (3, "Charlie", "charlie@example.com", "premium"), + ]; + + match users.iter().find(|(uid, ..)| *uid == id) { + Some((uid, name, email, tier)) => Ok(format!( + "{{\"id\":{uid},\"name\":\"{name}\",\"email\":\"{email}\",\"tier\":\"{tier}\"}}\n" + )), + None => Err(format!("user {} not found", id)), + } + }, + ) + .tool( + ToolDef::new( + "list_orders", + "List orders for a user. Usage: list_orders ", + ) + .with_schema(serde_json::json!({ + "type": "object", + "properties": { + "user_id": {"type": "integer", "description": "User ID"} + }, + "required": ["user_id"] + })), + |args, _stdin| { + let uid: u64 = args + .first() + .and_then(|s| s.parse().ok()) + .ok_or("usage: list_orders ")?; + + let orders = match uid { + 1 => r#"[{"order_id":101,"item":"Laptop","qty":1,"price":999.99},{"order_id":102,"item":"Mouse","qty":2,"price":29.99}]"#, + 2 => r#"[{"order_id":201,"item":"Keyboard","qty":1,"price":79.99}]"#, + 3 => r#"[]"#, + _ => return Err(format!("no orders for user {}", uid)), + }; + + Ok(format!("{orders}\n")) + }, + ) + .tool( + ToolDef::new( + "get_inventory", + "Check inventory for an item. Usage: get_inventory ", + ), + |args, _stdin| { + let item = args.first().ok_or("usage: get_inventory ")?; + + let stock = match item.to_lowercase().as_str() { + "laptop" => 15, + "mouse" => 142, + "keyboard" => 67, + _ => 0, + }; + + Ok(format!( + "{{\"item\":\"{}\",\"in_stock\":{}}}\n", + item, stock + )) + }, + ) + .tool( + ToolDef::new( + "create_discount", + "Create a discount code. Usage: create_discount ", + ), + |args, _stdin| { + let uid = args + .first() + .ok_or("usage: create_discount ")?; + let pct = args + .get(1) + .ok_or("usage: create_discount ")?; + Ok(format!( + "{{\"code\":\"SAVE{pct}-U{uid}\",\"percent\":{pct},\"user_id\":{uid}}}\n" + )) + }, + ) + .env("STORE_NAME", "Bashkit Shop") + .build(); + + // ---- Show what the LLM sees ---- + println!("--- Tool name ---"); + println!("{}\n", tool.name()); + + println!("--- System prompt (what goes in LLM system message) ---"); + println!("{}", tool.system_prompt()); + + // ---- Demo 1: Simple single tool call ---- + println!("--- Demo 1: Single tool call ---"); + let resp = tool + .execute(ToolRequest { + commands: "get_user 1".to_string(), + }) + .await; + println!("$ get_user 1"); + println!("{}", resp.stdout); + + // ---- Demo 2: Pipeline with jq ---- + println!("--- Demo 2: Pipeline with jq ---"); + let resp = tool + .execute(ToolRequest { + commands: "get_user 1 | jq -r '.name'".to_string(), + }) + .await; + println!("$ get_user 1 | jq -r '.name'"); + println!("{}", resp.stdout); + + // ---- Demo 3: Multi-step orchestration ---- + println!("--- Demo 3: Multi-step orchestration ---"); + let script = r#" + user=$(get_user 1) + name=$(echo "$user" | jq -r '.name') + tier=$(echo "$user" | jq -r '.tier') + orders=$(list_orders 1) + total=$(echo "$orders" | jq '[.[].price] | add') + count=$(echo "$orders" | jq 'length') + echo "Customer: $name (tier: $tier)" + echo "Orders: $count, Estimated total: $total" + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + println!("$ "); + print!("{}", resp.stdout); + println!(); + + // ---- Demo 4: Loop + conditional ---- + println!("--- Demo 4: Loop with conditional ---"); + let script = r#" + for uid in 1 2 3; do + user=$(get_user $uid) + name=$(echo "$user" | jq -r '.name') + tier=$(echo "$user" | jq -r '.tier') + if [ "$tier" = "premium" ]; then + echo "$name is premium - creating discount" + create_discount $uid 20 | jq -r '.code' + else + echo "$name is $tier - no discount" + fi + done + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + println!("$ "); + print!("{}", resp.stdout); + println!(); + + // ---- Demo 5: Inventory check with error handling ---- + println!("--- Demo 5: Error handling ---"); + let script = r#" + for item in Laptop Mouse Keyboard Widget; do + result=$(get_inventory "$item") + stock=$(echo "$result" | jq '.in_stock') + if [ "$stock" -eq 0 ]; then + echo "$item: OUT OF STOCK" + else + echo "$item: $stock in stock" + fi + done + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + println!("$ "); + print!("{}", resp.stdout); + println!(); + + // ---- Demo 6: Data aggregation ---- + println!("--- Demo 6: Aggregate data across tools ---"); + let script = r#" + echo "=== $STORE_NAME Report ===" + for uid in 1 2; do + name=$(get_user $uid | jq -r '.name') + orders=$(list_orders $uid) + count=$(echo "$orders" | jq 'length') + echo "$name: $count orders" + done + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + println!("$ "); + print!("{}", resp.stdout); + + println!("\n=== Demo Complete ==="); + Ok(()) +} diff --git a/crates/bashkit/examples/scripted_tool_orchestration.rs b/crates/bashkit/examples/scripted_tool_orchestration.rs deleted file mode 100644 index ab55efbe..00000000 --- a/crates/bashkit/examples/scripted_tool_orchestration.rs +++ /dev/null @@ -1,256 +0,0 @@ -//! Scripted Tool Orchestration Example -//! -//! Demonstrates composing multiple API-like tools into a single OrchestratorTool -//! that an LLM agent can call with bash scripts. -//! -//! Run with: cargo run --example scripted_tool_orchestration -//! -//! This example simulates an e-commerce API with tools for users, orders, and -//! inventory. The OrchestratorTool lets an agent compose these in one call. - -use bashkit::{CallableTool, OrchestratorTool, Tool, ToolRequest}; - -// ============================================================================ -// Mock API tools (in real use, these would call actual HTTP endpoints) -// ============================================================================ - -struct GetUser; - -impl CallableTool for GetUser { - fn name(&self) -> &str { - "get_user" - } - fn description(&self) -> &str { - "Fetch user by ID. Usage: get_user " - } - fn call(&self, args: &[String], _stdin: Option<&str>) -> Result { - let id: u64 = args - .first() - .and_then(|s| s.parse().ok()) - .ok_or("usage: get_user ")?; - - let users = [ - (1, "Alice", "alice@example.com", "premium"), - (2, "Bob", "bob@example.com", "basic"), - (3, "Charlie", "charlie@example.com", "premium"), - ]; - - match users.iter().find(|(uid, ..)| *uid == id) { - Some((uid, name, email, tier)) => Ok(format!( - "{{\"id\":{uid},\"name\":\"{name}\",\"email\":\"{email}\",\"tier\":\"{tier}\"}}\n" - )), - None => Err(format!("user {} not found", id)), - } - } -} - -struct ListOrders; - -impl CallableTool for ListOrders { - fn name(&self) -> &str { - "list_orders" - } - fn description(&self) -> &str { - "List orders for a user. Usage: list_orders " - } - fn call(&self, args: &[String], _stdin: Option<&str>) -> Result { - let uid: u64 = args - .first() - .and_then(|s| s.parse().ok()) - .ok_or("usage: list_orders ")?; - - let orders = match uid { - 1 => { - r#"[{"order_id":101,"item":"Laptop","qty":1,"price":999.99},{"order_id":102,"item":"Mouse","qty":2,"price":29.99}]"# - } - 2 => r#"[{"order_id":201,"item":"Keyboard","qty":1,"price":79.99}]"#, - 3 => r#"[]"#, - _ => return Err(format!("no orders for user {}", uid)), - }; - - Ok(format!("{orders}\n")) - } -} - -struct GetInventory; - -impl CallableTool for GetInventory { - fn name(&self) -> &str { - "get_inventory" - } - fn description(&self) -> &str { - "Check inventory for an item. Usage: get_inventory " - } - fn call(&self, args: &[String], _stdin: Option<&str>) -> Result { - let item = args.first().ok_or("usage: get_inventory ")?; - - let stock = match item.to_lowercase().as_str() { - "laptop" => 15, - "mouse" => 142, - "keyboard" => 67, - _ => 0, - }; - - Ok(format!( - "{{\"item\":\"{}\",\"in_stock\":{}}}\n", - item, stock - )) - } -} - -struct CreateDiscount; - -impl CallableTool for CreateDiscount { - fn name(&self) -> &str { - "create_discount" - } - fn description(&self) -> &str { - "Create a discount code. Usage: create_discount " - } - fn call(&self, args: &[String], _stdin: Option<&str>) -> Result { - let uid = args - .first() - .ok_or("usage: create_discount ")?; - let pct = args - .get(1) - .ok_or("usage: create_discount ")?; - Ok(format!( - "{{\"code\":\"SAVE{pct}-U{uid}\",\"percent\":{pct},\"user_id\":{uid}}}\n" - )) - } -} - -// ============================================================================ -// Demo: show how OrchestratorTool works -// ============================================================================ - -#[tokio::main] -async fn main() -> anyhow::Result<()> { - println!("=== Scripted Tool Orchestration Demo ===\n"); - - // Build the orchestrator with all our API tools - let mut tool = OrchestratorTool::builder("ecommerce_api") - .short_description("E-commerce API orchestrator with user, order, and inventory tools") - .tool(Box::new(GetUser)) - .tool(Box::new(ListOrders)) - .tool(Box::new(GetInventory)) - .tool(Box::new(CreateDiscount)) - .env("STORE_NAME", "Bashkit Shop") - .build(); - - // ---- Show what the LLM sees ---- - println!("--- Tool name ---"); - println!("{}\n", tool.name()); - - println!("--- System prompt (what goes in LLM system message) ---"); - println!("{}", tool.system_prompt()); - - // ---- Demo 1: Simple single tool call ---- - println!("--- Demo 1: Single tool call ---"); - let resp = tool - .execute(ToolRequest { - commands: "get_user 1".to_string(), - }) - .await; - println!("$ get_user 1"); - println!("{}", resp.stdout); - - // ---- Demo 2: Pipeline with jq ---- - println!("--- Demo 2: Pipeline with jq ---"); - let resp = tool - .execute(ToolRequest { - commands: "get_user 1 | jq -r '.name'".to_string(), - }) - .await; - println!("$ get_user 1 | jq -r '.name'"); - println!("{}", resp.stdout); - - // ---- Demo 3: Multi-step orchestration ---- - println!("--- Demo 3: Multi-step orchestration ---"); - let script = r#" - user=$(get_user 1) - name=$(echo "$user" | jq -r '.name') - tier=$(echo "$user" | jq -r '.tier') - orders=$(list_orders 1) - total=$(echo "$orders" | jq '[.[].price] | add') - count=$(echo "$orders" | jq 'length') - echo "Customer: $name (tier: $tier)" - echo "Orders: $count, Estimated total: $total" - "#; - let resp = tool - .execute(ToolRequest { - commands: script.to_string(), - }) - .await; - println!("$ "); - print!("{}", resp.stdout); - println!(); - - // ---- Demo 4: Loop + conditional ---- - println!("--- Demo 4: Loop with conditional ---"); - let script = r#" - for uid in 1 2 3; do - user=$(get_user $uid) - name=$(echo "$user" | jq -r '.name') - tier=$(echo "$user" | jq -r '.tier') - if [ "$tier" = "premium" ]; then - echo "$name is premium - creating discount" - create_discount $uid 20 | jq -r '.code' - else - echo "$name is $tier - no discount" - fi - done - "#; - let resp = tool - .execute(ToolRequest { - commands: script.to_string(), - }) - .await; - println!("$ "); - print!("{}", resp.stdout); - println!(); - - // ---- Demo 5: Inventory check with error handling ---- - println!("--- Demo 5: Error handling ---"); - let script = r#" - for item in Laptop Mouse Keyboard Widget; do - result=$(get_inventory "$item") - stock=$(echo "$result" | jq '.in_stock') - if [ "$stock" -eq 0 ]; then - echo "$item: OUT OF STOCK" - else - echo "$item: $stock in stock" - fi - done - "#; - let resp = tool - .execute(ToolRequest { - commands: script.to_string(), - }) - .await; - println!("$ "); - print!("{}", resp.stdout); - println!(); - - // ---- Demo 6: Data aggregation ---- - println!("--- Demo 6: Aggregate data across tools ---"); - let script = r#" - echo "=== $STORE_NAME Report ===" - for uid in 1 2; do - name=$(get_user $uid | jq -r '.name') - orders=$(list_orders $uid) - count=$(echo "$orders" | jq 'length') - echo "$name: $count orders" - done - "#; - let resp = tool - .execute(ToolRequest { - commands: script.to_string(), - }) - .await; - println!("$ "); - print!("{}", resp.stdout); - - println!("\n=== Demo Complete ==="); - Ok(()) -} diff --git a/crates/bashkit/src/lib.rs b/crates/bashkit/src/lib.rs index 59da5cd1..60da377b 100644 --- a/crates/bashkit/src/lib.rs +++ b/crates/bashkit/src/lib.rs @@ -370,10 +370,12 @@ mod limits; #[cfg(feature = "logging")] mod logging_impl; mod network; +/// Tool orchestration: compose ToolDef+callback pairs into a single Tool via bash scripts. +/// Requires the `orchestrator` feature. +#[cfg(feature = "orchestrator")] +pub mod orchestrator; /// Parser module - exposed for fuzzing and testing pub mod parser; -/// Scripted tool orchestration (compose multiple tools via bash scripts) -pub mod scripted_tool; /// Tool contract for LLM integration pub mod tool; @@ -388,9 +390,11 @@ pub use git::GitConfig; pub use interpreter::{ControlFlow, ExecResult}; pub use limits::{ExecutionCounters, ExecutionLimits, LimitExceeded}; pub use network::NetworkAllowlist; -pub use scripted_tool::{CallableTool, OrchestratorTool, OrchestratorToolBuilder}; pub use tool::{BashTool, BashToolBuilder, Tool, ToolRequest, ToolResponse, ToolStatus, VERSION}; +#[cfg(feature = "orchestrator")] +pub use orchestrator::{OrchestratorTool, OrchestratorToolBuilder, ToolCallback, ToolDef}; + #[cfg(feature = "http_client")] pub use network::HttpClient; diff --git a/crates/bashkit/src/orchestrator/execute.rs b/crates/bashkit/src/orchestrator/execute.rs new file mode 100644 index 00000000..d51cb439 --- /dev/null +++ b/crates/bashkit/src/orchestrator/execute.rs @@ -0,0 +1,242 @@ +//! OrchestratorTool execution: Tool impl, builtin adapter, documentation helpers. + +use super::{OrchestratorTool, ToolCallback}; +use crate::builtins::{Builtin, Context}; +use crate::error::Result; +use crate::interpreter::ExecResult; +use crate::tool::{Tool, ToolRequest, ToolResponse, ToolStatus, VERSION}; +use crate::Bash; +use async_trait::async_trait; +use schemars::schema_for; +use std::sync::Arc; + +// ============================================================================ +// ToolBuiltinAdapter — wraps ToolCallback as a Builtin +// ============================================================================ + +/// Adapts a [`ToolCallback`] into a [`Builtin`] so the interpreter can execute it. +struct ToolBuiltinAdapter { + callback: ToolCallback, +} + +#[async_trait] +impl Builtin for ToolBuiltinAdapter { + async fn execute(&self, ctx: Context<'_>) -> Result { + match (self.callback)(ctx.args, ctx.stdin) { + Ok(stdout) => Ok(ExecResult::ok(stdout)), + Err(msg) => Ok(ExecResult::err(msg, 1)), + } + } +} + +// ============================================================================ +// OrchestratorTool — internal helpers +// ============================================================================ + +impl OrchestratorTool { + /// Create a fresh Bash instance with all tool builtins registered. + fn create_bash(&self) -> Bash { + let mut builder = Bash::builder(); + + if let Some(ref limits) = self.limits { + builder = builder.limits(limits.clone()); + } + for (key, value) in &self.env_vars { + builder = builder.env(key, value); + } + for tool in &self.tools { + let name = tool.def.name.clone(); + let builtin: Box = Box::new(ToolBuiltinAdapter { + callback: Arc::clone(&tool.callback), + }); + builder = builder.builtin(name, builtin); + } + + builder.build() + } + + fn build_description(&self) -> String { + let mut desc = format!( + "Scripted tool orchestrator. Available tool-commands: {}", + self.tools + .iter() + .map(|t| t.def.name.as_str()) + .collect::>() + .join(", ") + ); + desc.push_str(". Also supports standard bash builtins (echo, jq, grep, sed, awk, etc.)."); + desc + } + + fn build_help(&self) -> String { + let mut doc = format!( + "{name}(1) Tool Commands {name}(1)\n\n\ + NAME\n\ + \x20 {name} - {short_desc}\n\n\ + SYNOPSIS\n\ + \x20 {{\"commands\": \"\"}}\n\n\ + DESCRIPTION\n\ + \x20 Executes a bash script with access to tool-specific commands\n\ + \x20 and standard bash builtins. Use pipes, variables, loops, and\n\ + \x20 conditionals to orchestrate multiple tool calls in one request.\n\n\ + TOOL COMMANDS\n", + name = self.name, + short_desc = self.short_desc, + ); + + for t in &self.tools { + doc.push_str(&format!( + " {:<20} {}\n", + t.def.name, t.def.description + )); + } + + doc.push_str( + "\nBASH BUILTINS\n\ + \x20 echo, cat, grep, sed, awk, jq, head, tail, sort, uniq, cut, tr,\n\ + \x20 wc, printf, test, [, true, false, cd, pwd, ls, find, xargs\n\n\ + INPUT\n\ + \x20 commands Bash script to execute\n\n\ + OUTPUT\n\ + \x20 stdout Combined standard output\n\ + \x20 stderr Errors from tool commands or bash\n\ + \x20 exit_code 0 on success\n\n\ + EXAMPLES\n\ + \x20 Single tool call:\n\ + \x20 {{\"commands\": \"get_user 42\"}}\n\n\ + \x20 Pipeline:\n\ + \x20 {{\"commands\": \"get_user 42 | jq '.name'\"}}\n\n\ + \x20 Multi-step orchestration:\n\ + \x20 {{\"commands\": \"user=$(get_user 42)\\norders=$(get_orders 42)\\necho $user | jq -r '.name'\"}}\n", + ); + + doc + } + + fn build_system_prompt(&self) -> String { + let mut prompt = format!("# {}\n\n", self.name); + prompt.push_str(&format!("{}\n\n", self.short_desc)); + + prompt.push_str("Input: {\"commands\": \"\"}\n"); + prompt.push_str("Output: {stdout, stderr, exit_code}\n\n"); + + prompt.push_str("## Available tool commands\n\n"); + for t in &self.tools { + prompt.push_str(&format!("- `{}`: {}\n", t.def.name, t.def.description)); + if let Some(obj) = t.def.input_schema.as_object() { + if !obj.is_empty() { + prompt.push_str(&format!(" Schema: {}\n", t.def.input_schema)); + } + } + } + + prompt.push_str( + "\n## Tips\n\n\ + - Pipe tool output through `jq` for JSON processing\n\ + - Use variables to pass data between tool calls\n\ + - Use `set -e` to stop on first error\n\ + - Standard builtins (echo, grep, sed, awk, etc.) are available\n", + ); + + prompt + } +} + +// ============================================================================ +// Tool trait implementation +// ============================================================================ + +#[async_trait] +impl Tool for OrchestratorTool { + fn name(&self) -> &str { + &self.name + } + + fn short_description(&self) -> &str { + &self.short_desc + } + + fn description(&self) -> String { + self.build_description() + } + + fn help(&self) -> String { + self.build_help() + } + + fn system_prompt(&self) -> String { + self.build_system_prompt() + } + + fn input_schema(&self) -> serde_json::Value { + let schema = schema_for!(ToolRequest); + serde_json::to_value(schema).unwrap_or_default() + } + + fn output_schema(&self) -> serde_json::Value { + let schema = schema_for!(ToolResponse); + serde_json::to_value(schema).unwrap_or_default() + } + + fn version(&self) -> &str { + VERSION + } + + async fn execute(&mut self, req: ToolRequest) -> ToolResponse { + if req.commands.is_empty() { + return ToolResponse { + stdout: String::new(), + stderr: String::new(), + exit_code: 0, + error: None, + }; + } + + let mut bash = self.create_bash(); + + match bash.exec(&req.commands).await { + Ok(result) => result.into(), + Err(e) => ToolResponse { + stdout: String::new(), + stderr: e.to_string(), + exit_code: 1, + error: Some(format!("{:?}", e)), + }, + } + } + + async fn execute_with_status( + &mut self, + req: ToolRequest, + mut status_callback: Box, + ) -> ToolResponse { + status_callback(ToolStatus::new("validate").with_percent(0.0)); + + if req.commands.is_empty() { + status_callback(ToolStatus::new("complete").with_percent(100.0)); + return ToolResponse { + stdout: String::new(), + stderr: String::new(), + exit_code: 0, + error: None, + }; + } + + status_callback(ToolStatus::new("parse").with_percent(10.0)); + let mut bash = self.create_bash(); + status_callback(ToolStatus::new("execute").with_percent(20.0)); + + let response = match bash.exec(&req.commands).await { + Ok(result) => result.into(), + Err(e) => ToolResponse { + stdout: String::new(), + stderr: e.to_string(), + exit_code: 1, + error: Some(format!("{:?}", e)), + }, + }; + + status_callback(ToolStatus::new("complete").with_percent(100.0)); + response + } +} diff --git a/crates/bashkit/src/orchestrator/mod.rs b/crates/bashkit/src/orchestrator/mod.rs new file mode 100644 index 00000000..97c93a86 --- /dev/null +++ b/crates/bashkit/src/orchestrator/mod.rs @@ -0,0 +1,580 @@ +//! Tool orchestration +//! +//! Compose tool definitions + callbacks into a single [`Tool`] that accepts bash +//! scripts. Each tool becomes a builtin command inside the interpreter, so an LLM +//! can orchestrate many tools in one call using pipes, variables, loops, and +//! conditionals. +//! +//! # Architecture +//! +//! ```text +//! ┌─────────────────────────────────────────┐ +//! │ OrchestratorTool (implements Tool) │ +//! │ │ +//! │ ┌─────────┐ ┌─────────┐ ┌──────────┐ │ +//! │ │get_user │ │get_order│ │inventory │ │ +//! │ │(builtin)│ │(builtin)│ │(builtin) │ │ +//! │ └─────────┘ └─────────┘ └──────────┘ │ +//! │ ↑ ↑ ↑ │ +//! │ bash script: pipes, vars, jq, loops │ +//! └─────────────────────────────────────────┘ +//! ``` +//! +//! # Example +//! +//! ```rust +//! use bashkit::{OrchestratorTool, ToolDef, Tool, ToolRequest}; +//! +//! # tokio_test::block_on(async { +//! let mut tool = OrchestratorTool::builder("api") +//! .tool( +//! ToolDef::new("get_user", "Fetch user by id. Usage: get_user "), +//! |args, _stdin| { +//! let id = args.first().ok_or("missing id")?; +//! Ok(format!("{{\"id\":{},\"name\":\"Alice\"}}\n", id)) +//! }, +//! ) +//! .build(); +//! +//! let resp = tool.execute(ToolRequest { +//! commands: "get_user 42 | jq '.name'".to_string(), +//! }).await; +//! +//! assert_eq!(resp.stdout.trim(), "\"Alice\""); +//! # }); +//! ``` + +mod execute; + +use crate::ExecutionLimits; +use std::sync::Arc; + +// ============================================================================ +// ToolDef — OpenAPI-style tool definition +// ============================================================================ + +/// OpenAPI-style tool definition: name, description, input schema. +/// +/// Describes a sub-tool registered with [`OrchestratorToolBuilder`]. +/// The `input_schema` is optional JSON Schema for documentation / LLM prompts. +pub struct ToolDef { + /// Command name used as bash builtin (e.g. `"get_user"`). + pub name: String, + /// Human-readable description for LLM consumption. + pub description: String, + /// JSON Schema describing accepted arguments. Empty object if unspecified. + pub input_schema: serde_json::Value, +} + +impl ToolDef { + /// Create a tool definition with name and description. + pub fn new(name: impl Into, description: impl Into) -> Self { + Self { + name: name.into(), + description: description.into(), + input_schema: serde_json::Value::Object(Default::default()), + } + } + + /// Attach a JSON Schema for the tool's input parameters. + pub fn with_schema(mut self, schema: serde_json::Value) -> Self { + self.input_schema = schema; + self + } +} + +// ============================================================================ +// ToolCallback — execution callback type +// ============================================================================ + +/// Execution callback for a registered tool. +/// +/// - `args`: positional arguments after command name. +/// For `get_user --id 5`, args is `["--id", "5"]`. +/// - `stdin`: pipeline input from prior command, if any. +/// +/// Return `Ok(stdout)` on success or `Err(message)` on failure. +pub type ToolCallback = + Arc) -> Result + Send + Sync>; + +// ============================================================================ +// RegisteredTool — internal definition + callback pair +// ============================================================================ + +/// A registered tool: definition + callback. +pub(crate) struct RegisteredTool { + pub(crate) def: ToolDef, + pub(crate) callback: ToolCallback, +} + +// ============================================================================ +// OrchestratorToolBuilder +// ============================================================================ + +/// Builder for [`OrchestratorTool`]. +/// +/// ```rust +/// use bashkit::{OrchestratorTool, ToolDef}; +/// +/// let tool = OrchestratorTool::builder("net") +/// .short_description("Network tools") +/// .tool( +/// ToolDef::new("ping", "Ping a host"), +/// |args, _stdin| { +/// Ok(format!("pong {}\n", args.first().unwrap_or(&String::new()))) +/// }, +/// ) +/// .build(); +/// ``` +pub struct OrchestratorToolBuilder { + name: String, + short_desc: Option, + tools: Vec, + limits: Option, + env_vars: Vec<(String, String)>, +} + +impl OrchestratorToolBuilder { + pub(crate) fn new(name: impl Into) -> Self { + Self { + name: name.into(), + short_desc: None, + tools: Vec::new(), + limits: None, + env_vars: Vec::new(), + } + } + + /// One-line description for tool listings. + pub fn short_description(mut self, desc: impl Into) -> Self { + self.short_desc = Some(desc.into()); + self + } + + /// Register a tool with its definition and execution callback. + pub fn tool( + mut self, + def: ToolDef, + callback: impl Fn(&[String], Option<&str>) -> Result + Send + Sync + 'static, + ) -> Self { + self.tools.push(RegisteredTool { + def, + callback: Arc::new(callback), + }); + self + } + + /// Set execution limits for the bash interpreter. + pub fn limits(mut self, limits: ExecutionLimits) -> Self { + self.limits = Some(limits); + self + } + + /// Add an environment variable visible inside scripts. + pub fn env(mut self, key: impl Into, value: impl Into) -> Self { + self.env_vars.push((key.into(), value.into())); + self + } + + /// Build the [`OrchestratorTool`]. + pub fn build(self) -> OrchestratorTool { + let short_desc = self + .short_desc + .unwrap_or_else(|| format!("Orchestrator: {}", self.name)); + + OrchestratorTool { + name: self.name, + short_desc, + tools: self.tools, + limits: self.limits, + env_vars: self.env_vars, + } + } +} + +// ============================================================================ +// OrchestratorTool +// ============================================================================ + +/// A [`Tool`](crate::tool::Tool) that orchestrates multiple tools via bash scripts. +/// +/// Each registered tool (defined by [`ToolDef`] + callback) becomes a bash builtin. +/// The LLM sends a bash script that can pipe, loop, branch, and compose these +/// builtins together with standard utilities like `jq`, `grep`, `sed`, etc. +/// +/// Reusable — `execute()` can be called multiple times. Each call gets a fresh +/// Bash interpreter with the same set of tool builtins. +/// +/// Create via [`OrchestratorTool::builder`]. +pub struct OrchestratorTool { + pub(crate) name: String, + pub(crate) short_desc: String, + pub(crate) tools: Vec, + pub(crate) limits: Option, + pub(crate) env_vars: Vec<(String, String)>, +} + +impl OrchestratorTool { + /// Create a builder with the given tool name. + pub fn builder(name: impl Into) -> OrchestratorToolBuilder { + OrchestratorToolBuilder::new(name) + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::tool::{Tool, ToolRequest, VERSION}; + + fn build_test_tool() -> OrchestratorTool { + OrchestratorTool::builder("test_api") + .short_description("Test API orchestrator") + .tool( + ToolDef::new("get_user", "Fetch user by id. Usage: get_user "), + |args, _stdin| { + let id = args.first().ok_or("missing user id")?; + Ok(format!( + "{{\"id\":{id},\"name\":\"Alice\",\"email\":\"alice@example.com\"}}\n" + )) + }, + ) + .tool( + ToolDef::new( + "get_orders", + "List orders for user. Usage: get_orders ", + ), + |args, _stdin| { + let uid = args.first().ok_or("missing user id")?; + Ok(format!( + "[{{\"order_id\":1,\"user_id\":{uid},\"total\":29.99}},\ + {{\"order_id\":2,\"user_id\":{uid},\"total\":49.50}}]\n" + )) + }, + ) + .tool( + ToolDef::new("fail_tool", "Always fails (for testing error handling)"), + |_args, _stdin| Err("service unavailable".to_string()), + ) + .tool( + ToolDef::new("from_stdin", "Read from stdin, uppercase it"), + |_args, stdin| match stdin { + Some(input) => Ok(input.to_uppercase()), + None => Err("no stdin".to_string()), + }, + ) + .build() + } + + // -- Builder tests -- + + #[test] + fn test_builder_name_and_description() { + let tool = build_test_tool(); + assert_eq!(tool.name(), "test_api"); + assert_eq!(tool.short_description(), "Test API orchestrator"); + } + + #[test] + fn test_builder_default_short_description() { + let tool = OrchestratorTool::builder("mytools") + .tool( + ToolDef::new("api_echo", "Echo args back as JSON"), + |args, _stdin| { + Ok(format!( + "{{\"args\":[{}]}}\n", + args.iter() + .map(|a| format!("\"{}\"", a)) + .collect::>() + .join(",") + )) + }, + ) + .build(); + assert_eq!(tool.short_description(), "Orchestrator: mytools"); + } + + #[test] + fn test_description_lists_tools() { + let tool = build_test_tool(); + let desc = tool.description(); + assert!(desc.contains("get_user")); + assert!(desc.contains("get_orders")); + assert!(desc.contains("fail_tool")); + assert!(desc.contains("from_stdin")); + } + + #[test] + fn test_help_has_tool_commands_section() { + let tool = build_test_tool(); + let help = tool.help(); + assert!(help.contains("TOOL COMMANDS")); + assert!(help.contains("get_user")); + assert!(help.contains("Fetch user by id")); + } + + #[test] + fn test_system_prompt_lists_tools() { + let tool = build_test_tool(); + let sp = tool.system_prompt(); + assert!(sp.contains("# test_api")); + assert!(sp.contains("- `get_user`:")); + assert!(sp.contains("- `get_orders`:")); + assert!(sp.contains("jq")); + } + + #[test] + fn test_system_prompt_includes_schema() { + let tool = OrchestratorTool::builder("schema_test") + .tool( + ToolDef::new("get_user", "Fetch user by id").with_schema(serde_json::json!({ + "type": "object", + "properties": { + "id": {"type": "integer"} + }, + "required": ["id"] + })), + |_args, _stdin| Ok("ok\n".to_string()), + ) + .build(); + let sp = tool.system_prompt(); + assert!( + sp.contains("Schema:"), + "system prompt should include schema" + ); + assert!(sp.contains("\"type\":\"integer\"")); + } + + #[test] + fn test_schemas() { + let tool = build_test_tool(); + let input = tool.input_schema(); + assert!(input["properties"]["commands"].is_object()); + let output = tool.output_schema(); + assert!(output["properties"]["stdout"].is_object()); + } + + #[test] + fn test_version() { + let tool = build_test_tool(); + assert_eq!(tool.version(), VERSION); + } + + // -- Execution tests -- + + #[tokio::test] + async fn test_execute_empty() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: String::new(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.is_empty()); + } + + #[tokio::test] + async fn test_execute_single_tool() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "get_user 42".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("\"name\":\"Alice\"")); + assert!(resp.stdout.contains("\"id\":42")); + } + + #[tokio::test] + async fn test_execute_pipeline_with_jq() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "get_user 42 | jq -r '.name'".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "Alice"); + } + + #[tokio::test] + async fn test_execute_multi_step() { + let mut tool = build_test_tool(); + let script = r#" + user=$(get_user 1) + name=$(echo "$user" | jq -r '.name') + orders=$(get_orders 1) + total=$(echo "$orders" | jq '[.[].total] | add') + echo "User: $name, Total: $total" + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "User: Alice, Total: 79.49"); + } + + #[tokio::test] + async fn test_execute_tool_failure() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "fail_tool".to_string(), + }) + .await; + assert_ne!(resp.exit_code, 0); + assert!(resp.stderr.contains("service unavailable")); + } + + #[tokio::test] + async fn test_execute_tool_failure_with_fallback() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "fail_tool || echo 'fallback'".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("fallback")); + } + + #[tokio::test] + async fn test_execute_stdin_pipe() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "echo hello | from_stdin".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "HELLO"); + } + + #[tokio::test] + async fn test_execute_loop_over_tools() { + let mut tool = build_test_tool(); + let script = r#" + for uid in 1 2 3; do + get_user $uid | jq -r '.name' + done + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "Alice\nAlice\nAlice"); + } + + #[tokio::test] + async fn test_execute_conditional() { + let mut tool = build_test_tool(); + let script = r#" + user=$(get_user 5) + name=$(echo "$user" | jq -r '.name') + if [ "$name" = "Alice" ]; then + echo "found alice" + else + echo "not alice" + fi + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "found alice"); + } + + #[tokio::test] + async fn test_execute_with_env() { + let mut tool = OrchestratorTool::builder("env_test") + .env("API_BASE", "https://api.example.com") + .tool( + ToolDef::new("api_echo", "Echo args back as JSON"), + |args, _stdin| { + Ok(format!( + "{{\"args\":[{}]}}\n", + args.iter() + .map(|a| format!("\"{}\"", a)) + .collect::>() + .join(",") + )) + }, + ) + .build(); + + let resp = tool + .execute(ToolRequest { + commands: "echo $API_BASE".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "https://api.example.com"); + } + + #[tokio::test] + async fn test_execute_with_status_callback() { + use std::sync::{Arc, Mutex}; + + let mut tool = build_test_tool(); + let phases = Arc::new(Mutex::new(Vec::new())); + let phases_clone = phases.clone(); + + let resp = tool + .execute_with_status( + ToolRequest { + commands: "get_user 1".to_string(), + }, + Box::new(move |status| { + phases_clone + .lock() + .expect("lock poisoned") + .push(status.phase.clone()); + }), + ) + .await; + + assert_eq!(resp.exit_code, 0); + let phases = phases.lock().expect("lock poisoned"); + assert!(phases.contains(&"validate".to_string())); + assert!(phases.contains(&"execute".to_string())); + assert!(phases.contains(&"complete".to_string())); + } + + /// Verify the tool can be called multiple times (Arc sharing works). + #[tokio::test] + async fn test_multiple_execute_calls() { + let mut tool = build_test_tool(); + + let resp1 = tool + .execute(ToolRequest { + commands: "get_user 1 | jq -r '.name'".to_string(), + }) + .await; + assert_eq!(resp1.stdout.trim(), "Alice"); + + let resp2 = tool + .execute(ToolRequest { + commands: "get_orders 1 | jq 'length'".to_string(), + }) + .await; + assert_eq!(resp2.stdout.trim(), "2"); + + let resp3 = tool + .execute(ToolRequest { + commands: "get_user 2 | jq -r '.email'".to_string(), + }) + .await; + assert_eq!(resp3.stdout.trim(), "alice@example.com"); + } +} diff --git a/crates/bashkit/src/scripted_tool.rs b/crates/bashkit/src/scripted_tool.rs deleted file mode 100644 index c8f6a203..00000000 --- a/crates/bashkit/src/scripted_tool.rs +++ /dev/null @@ -1,837 +0,0 @@ -//! Scripted tool orchestration -//! -//! Compose multiple [`CallableTool`]s into a single [`Tool`] that accepts bash -//! scripts. Each `CallableTool` becomes a builtin command inside the interpreter, -//! so an LLM can orchestrate many tools in one call using pipes, variables, loops, -//! and conditionals. -//! -//! # Architecture -//! -//! ```text -//! ┌─────────────────────────────────────────┐ -//! │ OrchestratorTool (implements Tool) │ -//! │ │ -//! │ ┌─────────┐ ┌─────────┐ ┌──────────┐ │ -//! │ │get_user │ │get_order│ │inventory │ │ -//! │ │(builtin)│ │(builtin)│ │(builtin) │ │ -//! │ └─────────┘ └─────────┘ └──────────┘ │ -//! │ ↑ ↑ ↑ │ -//! │ bash script: pipes, vars, jq, loops │ -//! └─────────────────────────────────────────┘ -//! ``` -//! -//! # Example -//! -//! ```rust -//! use bashkit::{ -//! OrchestratorTool, CallableTool, Tool, ToolRequest, -//! }; -//! -//! struct GetUser; -//! -//! impl CallableTool for GetUser { -//! fn name(&self) -> &str { "get_user" } -//! fn description(&self) -> &str { "Fetch user by id. Usage: get_user " } -//! fn call(&self, args: &[String], _stdin: Option<&str>) -> Result { -//! let id = args.first().ok_or("missing id")?; -//! Ok(format!("{{\"id\":{},\"name\":\"Alice\"}}\n", id)) -//! } -//! } -//! -//! # tokio_test::block_on(async { -//! let mut tool = OrchestratorTool::builder("api") -//! .tool(Box::new(GetUser)) -//! .build(); -//! -//! let resp = tool.execute(ToolRequest { -//! commands: "get_user 42 | jq '.name'".to_string(), -//! }).await; -//! -//! assert_eq!(resp.stdout.trim(), "\"Alice\""); -//! # }); -//! ``` - -use crate::builtins::{Builtin, Context}; -use crate::error::Result; -use crate::interpreter::ExecResult; -use crate::tool::{Tool, ToolRequest, ToolResponse, ToolStatus, VERSION}; -use crate::{Bash, ExecutionLimits}; -use async_trait::async_trait; -use schemars::schema_for; -use std::sync::Arc; - -// ============================================================================ -// CallableTool — sub-tool trait -// ============================================================================ - -/// A callable sub-tool that can be exposed as a bash builtin. -/// -/// Implement this for each external operation (API call, DB query, etc.) -/// you want to compose via bash scripts. -/// -/// # Sync design -/// -/// `call` is intentionally synchronous. Most tool calls are fast (mock data, -/// HTTP via blocking client, cache lookups). If you need async, use -/// `tokio::runtime::Handle::current().block_on()` or similar inside `call`. -pub trait CallableTool: Send + Sync { - /// Command name used as the builtin (e.g. `"get_user"`). - fn name(&self) -> &str; - - /// One-line description for LLM consumption. - fn description(&self) -> &str; - - /// Execute the tool. - /// - /// - `args`: command arguments (not including the command name itself). - /// For `get_user --id 5`, args is `["--id", "5"]`. - /// - `stdin`: pipeline input from the previous command, if any. - /// - /// Return `Ok(stdout)` on success or `Err(message)` on failure. - fn call(&self, args: &[String], stdin: Option<&str>) -> std::result::Result; -} - -// ============================================================================ -// CallableToolBuiltin — adapter from CallableTool to Builtin -// ============================================================================ - -/// Wraps an `Arc` as a [`Builtin`] so the interpreter can -/// execute it. Using Arc allows the same tool to be shared across multiple -/// Bash instances (one per `execute()` call). -struct CallableToolBuiltin { - inner: Arc, -} - -#[async_trait] -impl Builtin for CallableToolBuiltin { - async fn execute(&self, ctx: Context<'_>) -> Result { - match self.inner.call(ctx.args, ctx.stdin) { - Ok(stdout) => Ok(ExecResult::ok(stdout)), - Err(msg) => Ok(ExecResult::err(msg, 1)), - } - } - - fn llm_hint(&self) -> Option<&'static str> { - None - } -} - -// ============================================================================ -// OrchestratorToolBuilder -// ============================================================================ - -/// Builder for [`OrchestratorTool`]. -/// -/// ```rust -/// use bashkit::{OrchestratorTool, CallableTool}; -/// -/// struct Ping; -/// impl CallableTool for Ping { -/// fn name(&self) -> &str { "ping" } -/// fn description(&self) -> &str { "Ping a host" } -/// fn call(&self, args: &[String], _stdin: Option<&str>) -> Result { -/// Ok(format!("pong {}\n", args.first().unwrap_or(&String::new()))) -/// } -/// } -/// -/// let tool = OrchestratorTool::builder("net") -/// .short_description("Network tools") -/// .tool(Box::new(Ping)) -/// .build(); -/// ``` -pub struct OrchestratorToolBuilder { - name: String, - short_desc: Option, - tools: Vec>, - limits: Option, - env_vars: Vec<(String, String)>, -} - -impl OrchestratorToolBuilder { - fn new(name: impl Into) -> Self { - Self { - name: name.into(), - short_desc: None, - tools: Vec::new(), - limits: None, - env_vars: Vec::new(), - } - } - - /// One-line description for tool listings. - pub fn short_description(mut self, desc: impl Into) -> Self { - self.short_desc = Some(desc.into()); - self - } - - /// Register a sub-tool. It becomes a bash builtin with the same name. - pub fn tool(mut self, tool: Box) -> Self { - self.tools.push(tool); - self - } - - /// Set execution limits for the bash interpreter. - pub fn limits(mut self, limits: ExecutionLimits) -> Self { - self.limits = Some(limits); - self - } - - /// Add an environment variable visible inside scripts. - pub fn env(mut self, key: impl Into, value: impl Into) -> Self { - self.env_vars.push((key.into(), value.into())); - self - } - - /// Build the [`OrchestratorTool`]. - pub fn build(self) -> OrchestratorTool { - let tool_descriptions: Vec = self - .tools - .iter() - .map(|t| ToolDescription { - name: t.name().to_string(), - description: t.description().to_string(), - }) - .collect(); - - // Wrap in Arc so each execute() can clone refs into new Bash instances - let tools: Vec> = self.tools.into_iter().map(Arc::from).collect(); - - let short_desc = self - .short_desc - .unwrap_or_else(|| format!("Scripted orchestrator: {}", self.name)); - - OrchestratorTool { - name: self.name, - short_desc, - tool_descriptions, - tools, - limits: self.limits, - env_vars: self.env_vars, - } - } -} - -// ============================================================================ -// OrchestratorTool -// ============================================================================ - -/// Metadata kept for documentation generation. -struct ToolDescription { - name: String, - description: String, -} - -/// A [`Tool`] that orchestrates multiple [`CallableTool`]s via bash scripts. -/// -/// Each registered `CallableTool` becomes a bash builtin. The LLM sends a -/// bash script that can pipe, loop, branch, and compose these builtins -/// together with standard utilities like `jq`, `grep`, `sed`, etc. -/// -/// The tool is reusable — `execute()` can be called multiple times. Each call -/// gets a fresh Bash interpreter with the same set of tool-builtins. -/// -/// Create via [`OrchestratorTool::builder`]. -pub struct OrchestratorTool { - name: String, - short_desc: String, - tool_descriptions: Vec, - /// Arc-wrapped tools that are cloned into each Bash instance. - tools: Vec>, - limits: Option, - env_vars: Vec<(String, String)>, -} - -impl OrchestratorTool { - /// Create a builder with the given tool name. - pub fn builder(name: impl Into) -> OrchestratorToolBuilder { - OrchestratorToolBuilder::new(name) - } - - /// Create a fresh Bash instance with all tool-builtins registered. - fn create_bash(&self) -> Bash { - let mut builder = Bash::builder(); - - if let Some(ref limits) = self.limits { - builder = builder.limits(limits.clone()); - } - for (key, value) in &self.env_vars { - builder = builder.env(key, value); - } - // Clone Arc refs into fresh builtin adapters for this Bash instance - for tool in &self.tools { - let name = tool.name().to_string(); - let builtin: Box = Box::new(CallableToolBuiltin { - inner: Arc::clone(tool), - }); - builder = builder.builtin(name, builtin); - } - - builder.build() - } - - fn build_description(&self) -> String { - let mut desc = format!( - "Scripted tool orchestrator. Available tool-commands: {}", - self.tool_descriptions - .iter() - .map(|t| t.name.as_str()) - .collect::>() - .join(", ") - ); - desc.push_str(". Also supports standard bash builtins (echo, jq, grep, sed, awk, etc.)."); - desc - } - - fn build_help(&self) -> String { - let mut doc = format!( - "{name}(1) Tool Commands {name}(1)\n\n\ - NAME\n\ - \x20 {name} - {short_desc}\n\n\ - SYNOPSIS\n\ - \x20 {{\"commands\": \"\"}}\n\n\ - DESCRIPTION\n\ - \x20 Executes a bash script with access to tool-specific commands\n\ - \x20 and standard bash builtins. Use pipes, variables, loops, and\n\ - \x20 conditionals to orchestrate multiple tool calls in one request.\n\n\ - TOOL COMMANDS\n", - name = self.name, - short_desc = self.short_desc, - ); - - for t in &self.tool_descriptions { - doc.push_str(&format!(" {:<20} {}\n", t.name, t.description)); - } - - doc.push_str( - "\nBASH BUILTINS\n\ - \x20 echo, cat, grep, sed, awk, jq, head, tail, sort, uniq, cut, tr,\n\ - \x20 wc, printf, test, [, true, false, cd, pwd, ls, find, xargs\n\n\ - INPUT\n\ - \x20 commands Bash script to execute\n\n\ - OUTPUT\n\ - \x20 stdout Combined standard output\n\ - \x20 stderr Errors from tool commands or bash\n\ - \x20 exit_code 0 on success\n\n\ - EXAMPLES\n\ - \x20 Single tool call:\n\ - \x20 {{\"commands\": \"get_user 42\"}}\n\n\ - \x20 Pipeline:\n\ - \x20 {{\"commands\": \"get_user 42 | jq '.name'\"}}\n\n\ - \x20 Multi-step orchestration:\n\ - \x20 {{\"commands\": \"user=$(get_user 42)\\norders=$(get_orders 42)\\necho $user | jq -r '.name'\"}}\n", - ); - - doc - } - - fn build_system_prompt(&self) -> String { - let mut prompt = format!("# {}\n\n", self.name); - prompt.push_str(&format!("{}\n\n", self.short_desc)); - - prompt.push_str("Input: {\"commands\": \"\"}\n"); - prompt.push_str("Output: {stdout, stderr, exit_code}\n\n"); - - prompt.push_str("## Available tool commands\n\n"); - for t in &self.tool_descriptions { - prompt.push_str(&format!("- `{}`: {}\n", t.name, t.description)); - } - - prompt.push_str( - "\n## Tips\n\n\ - - Pipe tool output through `jq` for JSON processing\n\ - - Use variables to pass data between tool calls\n\ - - Use `set -e` to stop on first error\n\ - - Standard builtins (echo, grep, sed, awk, etc.) are available\n", - ); - - prompt - } -} - -#[async_trait] -impl Tool for OrchestratorTool { - fn name(&self) -> &str { - &self.name - } - - fn short_description(&self) -> &str { - &self.short_desc - } - - fn description(&self) -> String { - self.build_description() - } - - fn help(&self) -> String { - self.build_help() - } - - fn system_prompt(&self) -> String { - self.build_system_prompt() - } - - fn input_schema(&self) -> serde_json::Value { - let schema = schema_for!(ToolRequest); - serde_json::to_value(schema).unwrap_or_default() - } - - fn output_schema(&self) -> serde_json::Value { - let schema = schema_for!(ToolResponse); - serde_json::to_value(schema).unwrap_or_default() - } - - fn version(&self) -> &str { - VERSION - } - - async fn execute(&mut self, req: ToolRequest) -> ToolResponse { - if req.commands.is_empty() { - return ToolResponse { - stdout: String::new(), - stderr: String::new(), - exit_code: 0, - error: None, - }; - } - - let mut bash = self.create_bash(); - - match bash.exec(&req.commands).await { - Ok(result) => result.into(), - Err(e) => ToolResponse { - stdout: String::new(), - stderr: e.to_string(), - exit_code: 1, - error: Some(format!("{:?}", e)), - }, - } - } - - async fn execute_with_status( - &mut self, - req: ToolRequest, - mut status_callback: Box, - ) -> ToolResponse { - status_callback(ToolStatus::new("validate").with_percent(0.0)); - - if req.commands.is_empty() { - status_callback(ToolStatus::new("complete").with_percent(100.0)); - return ToolResponse { - stdout: String::new(), - stderr: String::new(), - exit_code: 0, - error: None, - }; - } - - status_callback(ToolStatus::new("parse").with_percent(10.0)); - let mut bash = self.create_bash(); - status_callback(ToolStatus::new("execute").with_percent(20.0)); - - let response = match bash.exec(&req.commands).await { - Ok(result) => result.into(), - Err(e) => ToolResponse { - stdout: String::new(), - stderr: e.to_string(), - exit_code: 1, - error: Some(format!("{:?}", e)), - }, - }; - - status_callback(ToolStatus::new("complete").with_percent(100.0)); - response - } -} - -// ============================================================================ -// Tests -// ============================================================================ - -#[cfg(test)] -mod tests { - use super::*; - - // -- Test CallableTool implementations -- - - struct Echo; - - impl CallableTool for Echo { - fn name(&self) -> &str { - "api_echo" - } - fn description(&self) -> &str { - "Echo args back as JSON" - } - fn call( - &self, - args: &[String], - _stdin: Option<&str>, - ) -> std::result::Result { - Ok(format!( - "{{\"args\":[{}]}}\n", - args.iter() - .map(|a| format!("\"{}\"", a)) - .collect::>() - .join(",") - )) - } - } - - struct GetUser; - - impl CallableTool for GetUser { - fn name(&self) -> &str { - "get_user" - } - fn description(&self) -> &str { - "Fetch user by id. Usage: get_user " - } - fn call( - &self, - args: &[String], - _stdin: Option<&str>, - ) -> std::result::Result { - let id = args.first().ok_or("missing user id".to_string())?; - Ok(format!( - "{{\"id\":{},\"name\":\"Alice\",\"email\":\"alice@example.com\"}}\n", - id - )) - } - } - - struct GetOrders; - - impl CallableTool for GetOrders { - fn name(&self) -> &str { - "get_orders" - } - fn description(&self) -> &str { - "List orders for user. Usage: get_orders " - } - fn call( - &self, - args: &[String], - _stdin: Option<&str>, - ) -> std::result::Result { - let uid = args.first().ok_or("missing user id".to_string())?; - Ok(format!( - "[{{\"order_id\":1,\"user_id\":{uid},\"total\":29.99}},\ - {{\"order_id\":2,\"user_id\":{uid},\"total\":49.50}}]\n" - )) - } - } - - struct FailTool; - - impl CallableTool for FailTool { - fn name(&self) -> &str { - "fail_tool" - } - fn description(&self) -> &str { - "Always fails (for testing error handling)" - } - fn call( - &self, - _args: &[String], - _stdin: Option<&str>, - ) -> std::result::Result { - Err("service unavailable".to_string()) - } - } - - struct StdinTool; - - impl CallableTool for StdinTool { - fn name(&self) -> &str { - "from_stdin" - } - fn description(&self) -> &str { - "Read from stdin, uppercase it" - } - fn call( - &self, - _args: &[String], - stdin: Option<&str>, - ) -> std::result::Result { - match stdin { - Some(input) => Ok(input.to_uppercase()), - None => Err("no stdin".to_string()), - } - } - } - - fn build_test_tool() -> OrchestratorTool { - OrchestratorTool::builder("test_api") - .short_description("Test API orchestrator") - .tool(Box::new(GetUser)) - .tool(Box::new(GetOrders)) - .tool(Box::new(FailTool)) - .tool(Box::new(StdinTool)) - .build() - } - - // -- Builder tests -- - - #[test] - fn test_builder_name_and_description() { - let tool = build_test_tool(); - assert_eq!(tool.name(), "test_api"); - assert_eq!(tool.short_description(), "Test API orchestrator"); - } - - #[test] - fn test_builder_default_short_description() { - let tool = OrchestratorTool::builder("mytools") - .tool(Box::new(Echo)) - .build(); - assert_eq!(tool.short_description(), "Scripted orchestrator: mytools"); - } - - #[test] - fn test_description_lists_tools() { - let tool = build_test_tool(); - let desc = tool.description(); - assert!(desc.contains("get_user")); - assert!(desc.contains("get_orders")); - assert!(desc.contains("fail_tool")); - assert!(desc.contains("from_stdin")); - } - - #[test] - fn test_help_has_tool_commands_section() { - let tool = build_test_tool(); - let help = tool.help(); - assert!(help.contains("TOOL COMMANDS")); - assert!(help.contains("get_user")); - assert!(help.contains("Fetch user by id")); - } - - #[test] - fn test_system_prompt_lists_tools() { - let tool = build_test_tool(); - let sp = tool.system_prompt(); - assert!(sp.contains("# test_api")); - assert!(sp.contains("- `get_user`:")); - assert!(sp.contains("- `get_orders`:")); - assert!(sp.contains("jq")); - } - - #[test] - fn test_schemas() { - let tool = build_test_tool(); - let input = tool.input_schema(); - assert!(input["properties"]["commands"].is_object()); - let output = tool.output_schema(); - assert!(output["properties"]["stdout"].is_object()); - } - - #[test] - fn test_version() { - let tool = build_test_tool(); - assert_eq!(tool.version(), VERSION); - } - - // -- Execution tests -- - - #[tokio::test] - async fn test_execute_empty() { - let mut tool = build_test_tool(); - let resp = tool - .execute(ToolRequest { - commands: String::new(), - }) - .await; - assert_eq!(resp.exit_code, 0); - assert!(resp.stdout.is_empty()); - } - - #[tokio::test] - async fn test_execute_single_tool() { - let mut tool = build_test_tool(); - let resp = tool - .execute(ToolRequest { - commands: "get_user 42".to_string(), - }) - .await; - assert_eq!(resp.exit_code, 0); - assert!(resp.stdout.contains("\"name\":\"Alice\"")); - assert!(resp.stdout.contains("\"id\":42")); - } - - #[tokio::test] - async fn test_execute_pipeline_with_jq() { - let mut tool = build_test_tool(); - let resp = tool - .execute(ToolRequest { - commands: "get_user 42 | jq -r '.name'".to_string(), - }) - .await; - assert_eq!(resp.exit_code, 0); - assert_eq!(resp.stdout.trim(), "Alice"); - } - - #[tokio::test] - async fn test_execute_multi_step() { - let mut tool = build_test_tool(); - let script = r#" - user=$(get_user 1) - name=$(echo "$user" | jq -r '.name') - orders=$(get_orders 1) - total=$(echo "$orders" | jq '[.[].total] | add') - echo "User: $name, Total: $total" - "#; - let resp = tool - .execute(ToolRequest { - commands: script.to_string(), - }) - .await; - assert_eq!(resp.exit_code, 0); - assert_eq!(resp.stdout.trim(), "User: Alice, Total: 79.49"); - } - - #[tokio::test] - async fn test_execute_tool_failure() { - let mut tool = build_test_tool(); - let resp = tool - .execute(ToolRequest { - commands: "fail_tool".to_string(), - }) - .await; - assert_ne!(resp.exit_code, 0); - assert!(resp.stderr.contains("service unavailable")); - } - - #[tokio::test] - async fn test_execute_tool_failure_with_fallback() { - let mut tool = build_test_tool(); - let resp = tool - .execute(ToolRequest { - commands: "fail_tool || echo 'fallback'".to_string(), - }) - .await; - assert_eq!(resp.exit_code, 0); - assert!(resp.stdout.contains("fallback")); - } - - #[tokio::test] - async fn test_execute_stdin_pipe() { - let mut tool = build_test_tool(); - let resp = tool - .execute(ToolRequest { - commands: "echo hello | from_stdin".to_string(), - }) - .await; - assert_eq!(resp.exit_code, 0); - assert_eq!(resp.stdout.trim(), "HELLO"); - } - - #[tokio::test] - async fn test_execute_loop_over_tools() { - let mut tool = build_test_tool(); - let script = r#" - for uid in 1 2 3; do - get_user $uid | jq -r '.name' - done - "#; - let resp = tool - .execute(ToolRequest { - commands: script.to_string(), - }) - .await; - assert_eq!(resp.exit_code, 0); - assert_eq!(resp.stdout.trim(), "Alice\nAlice\nAlice"); - } - - #[tokio::test] - async fn test_execute_conditional() { - let mut tool = build_test_tool(); - let script = r#" - user=$(get_user 5) - name=$(echo "$user" | jq -r '.name') - if [ "$name" = "Alice" ]; then - echo "found alice" - else - echo "not alice" - fi - "#; - let resp = tool - .execute(ToolRequest { - commands: script.to_string(), - }) - .await; - assert_eq!(resp.exit_code, 0); - assert_eq!(resp.stdout.trim(), "found alice"); - } - - #[tokio::test] - async fn test_execute_with_env() { - let mut tool = OrchestratorTool::builder("env_test") - .env("API_BASE", "https://api.example.com") - .tool(Box::new(Echo)) - .build(); - - let resp = tool - .execute(ToolRequest { - commands: "echo $API_BASE".to_string(), - }) - .await; - assert_eq!(resp.exit_code, 0); - assert_eq!(resp.stdout.trim(), "https://api.example.com"); - } - - #[tokio::test] - async fn test_execute_with_status_callback() { - use std::sync::{Arc, Mutex}; - - let mut tool = build_test_tool(); - let phases = Arc::new(Mutex::new(Vec::new())); - let phases_clone = phases.clone(); - - let resp = tool - .execute_with_status( - ToolRequest { - commands: "get_user 1".to_string(), - }, - Box::new(move |status| { - phases_clone - .lock() - .expect("lock poisoned") - .push(status.phase.clone()); - }), - ) - .await; - - assert_eq!(resp.exit_code, 0); - let phases = phases.lock().expect("lock poisoned"); - assert!(phases.contains(&"validate".to_string())); - assert!(phases.contains(&"execute".to_string())); - assert!(phases.contains(&"complete".to_string())); - } - - /// Verify the tool can be called multiple times (Arc sharing works). - #[tokio::test] - async fn test_multiple_execute_calls() { - let mut tool = build_test_tool(); - - let resp1 = tool - .execute(ToolRequest { - commands: "get_user 1 | jq -r '.name'".to_string(), - }) - .await; - assert_eq!(resp1.stdout.trim(), "Alice"); - - let resp2 = tool - .execute(ToolRequest { - commands: "get_orders 1 | jq 'length'".to_string(), - }) - .await; - assert_eq!(resp2.stdout.trim(), "2"); - - let resp3 = tool - .execute(ToolRequest { - commands: "get_user 2 | jq -r '.email'".to_string(), - }) - .await; - assert_eq!(resp3.stdout.trim(), "alice@example.com"); - } -} diff --git a/specs/014-scripted-tool-orchestration.md b/specs/014-scripted-tool-orchestration.md index c4a752ea..2c585783 100644 --- a/specs/014-scripted-tool-orchestration.md +++ b/specs/014-scripted-tool-orchestration.md @@ -1,8 +1,12 @@ -# Spec 014: Scripted Tool Orchestration +# Spec 014: Tool Orchestration ## Summary -Compose multiple `CallableTool`s into a single `OrchestratorTool` that accepts bash scripts. Each sub-tool becomes a builtin command, letting LLMs orchestrate N tools in one call using pipes, variables, loops, and conditionals. +Compose tool definitions (`ToolDef`) + execution callbacks into a single `OrchestratorTool` that accepts bash scripts. Each sub-tool becomes a builtin command, letting LLMs orchestrate N tools in one call using pipes, variables, loops, and conditionals. + +## Feature flag + +`orchestrator` — the entire module is gated behind `#[cfg(feature = "orchestrator")]`. ## Motivation @@ -10,51 +14,72 @@ When an LLM has access to many tools (get_user, list_orders, get_inventory, etc. ## Design -### CallableTool trait +### ToolDef — OpenAPI-style tool definition ```rust -pub trait CallableTool: Send + Sync { - fn name(&self) -> &str; - fn description(&self) -> &str; - fn call(&self, args: &[String], stdin: Option<&str>) -> Result; +pub struct ToolDef { + pub name: String, + pub description: String, + pub input_schema: serde_json::Value, // JSON Schema, empty object if unset } + +impl ToolDef { + pub fn new(name: impl Into, description: impl Into) -> Self; + pub fn with_schema(self, schema: serde_json::Value) -> Self; +} +``` + +Standard OpenAPI fields: `name`, `description`, `input_schema`. Schema is optional — defaults to `{}`. + +### ToolCallback + +```rust +pub type ToolCallback = + Arc) -> Result + Send + Sync>; ``` -- Sync by design. Most sub-tools are fast (HTTP stubs, cache, mock data). - `args`: positional args after the command name. - `stdin`: pipeline input from prior command. - Returns stdout string on success, error message on failure. -### CallableToolBuiltin adapter - -Internal struct wrapping `Arc`. Implements `Builtin` so the interpreter can execute it. Arc sharing ensures the same tools survive across multiple `execute()` calls. - ### OrchestratorToolBuilder +Two arguments per tool: definition + callback. + ```rust OrchestratorTool::builder("api_name") .short_description("...") - .tool(Box::new(GetUser)) - .tool(Box::new(ListOrders)) + .tool( + ToolDef::new("get_user", "Fetch user by ID") + .with_schema(json!({"type": "object", "properties": {"id": {"type": "integer"}}})), + |args, _stdin| { + let id = args.first().ok_or("missing id")?; + Ok(format!("{{\"id\":{id}}}\n")) + }, + ) .env("API_KEY", "...") .limits(ExecutionLimits::new().max_commands(500)) .build() ``` +### ToolBuiltinAdapter (internal) + +Wraps `ToolCallback` (Arc) as a `Builtin` for the interpreter. Cheap Arc clone into each Bash instance. + ### OrchestratorTool Implements the `Tool` trait. On each `execute()`: 1. Creates a fresh `Bash` instance. -2. Registers each `CallableTool` as a builtin via `Arc::clone`. +2. Registers each callback as a builtin via `Arc::clone`. 3. Runs the user-provided script. 4. Returns `ToolResponse { stdout, stderr, exit_code }`. -Reusable — multiple `execute()` calls share the same `Arc` instances. +Reusable — multiple `execute()` calls share the same `Arc` instances. ### LLM integration -`system_prompt()` generates markdown with available tool commands and tips. Example output: +`system_prompt()` generates markdown with available tool commands, input schemas (when present), and tips. Example output: ```markdown # api_name @@ -64,7 +89,8 @@ Output: {stdout, stderr, exit_code} ## Available tool commands -- `get_user`: Fetch user by ID. Usage: get_user +- `get_user`: Fetch user by ID + Schema: {"type":"object","properties":{"id":{"type":"integer"}}} - `list_orders`: List orders for user. Usage: list_orders ## Tips @@ -75,19 +101,28 @@ Output: {stdout, stderr, exit_code} ## Module location -`crates/bashkit/src/scripted_tool.rs` +`crates/bashkit/src/orchestrator/` -Public exports from `lib.rs`: `CallableTool`, `OrchestratorTool`, `OrchestratorToolBuilder`. +``` +orchestrator/ +├── mod.rs — ToolDef, ToolCallback, OrchestratorToolBuilder, OrchestratorTool struct, tests +└── execute.rs — Tool impl, ToolBuiltinAdapter, documentation helpers +``` + +Public exports from `lib.rs` (gated by `orchestrator` feature): +`ToolDef`, `ToolCallback`, `OrchestratorTool`, `OrchestratorToolBuilder`. ## Example -`crates/bashkit/examples/scripted_tool_orchestration.rs` — e-commerce API demo with get_user, list_orders, get_inventory, create_discount. +`crates/bashkit/examples/orchestrator.rs` — e-commerce API demo with get_user, list_orders, get_inventory, create_discount. Uses `ToolDef` + closures (no trait impls needed). + +Run: `cargo run --example orchestrator --features orchestrator` ## Test coverage -19 unit tests covering: +20 unit tests covering: - Builder configuration (name, description, defaults) -- Introspection (help, system_prompt, schemas) +- Introspection (help, system_prompt, schemas, schema rendering) - Single tool execution - Pipeline with jq - Multi-step orchestration (variables, command substitution) @@ -105,4 +140,4 @@ Inherits all bashkit sandbox guarantees: - Resource limits (max commands, loop iterations, function depth) - No network access unless explicitly configured -Sub-tool implementations control their own security boundaries. +Sub-tool callback implementations control their own security boundaries. From 6d0b92a43539c3d8182b3ec68437c758debaf8e7 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 22:39:26 +0000 Subject: [PATCH 3/6] =?UTF-8?q?refactor(scripted=5Ftool):=20rename=20Orche?= =?UTF-8?q?stratorTool=20=E2=86=92=20ScriptedTool?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Module: orchestrator/ → scripted_tool/ - Structs: OrchestratorTool → ScriptedTool, OrchestratorToolBuilder → ScriptedToolBuilder - Feature flag: orchestrator → scripted_tool - Example: orchestrator.rs → scripted_tool.rs - Update spec 014 https://claude.ai/code/session_01UcTwLqLrhbCFAuL26Fcm5W --- crates/bashkit/Cargo.toml | 10 ++-- .../{orchestrator.rs => scripted_tool.rs} | 14 ++--- crates/bashkit/src/lib.rs | 12 ++--- .../execute.rs | 10 ++-- .../{orchestrator => scripted_tool}/mod.rs | 52 +++++++++---------- specs/014-scripted-tool-orchestration.md | 28 +++++----- 6 files changed, 63 insertions(+), 63 deletions(-) rename crates/bashkit/examples/{orchestrator.rs => scripted_tool.rs} (94%) rename crates/bashkit/src/{orchestrator => scripted_tool}/execute.rs (97%) rename crates/bashkit/src/{orchestrator => scripted_tool}/mod.rs (93%) diff --git a/crates/bashkit/Cargo.toml b/crates/bashkit/Cargo.toml index 624eb9ee..602b1cc5 100644 --- a/crates/bashkit/Cargo.toml +++ b/crates/bashkit/Cargo.toml @@ -77,9 +77,9 @@ logging = ["tracing"] # Phase 2 will add gix dependency for remote operations # Usage: cargo build --features git git = [] -# Enable OrchestratorTool: compose ToolDef+callback pairs into a single Tool -# Usage: cargo build --features orchestrator -orchestrator = [] +# Enable ScriptedTool: compose ToolDef+callback pairs into a single Tool +# Usage: cargo build --features scripted_tool +scripted_tool = [] # Enable python/python3 builtins via embedded Monty interpreter # Monty is a git dep (not yet on crates.io) — feature unavailable from registry python = ["dep:monty"] @@ -105,8 +105,8 @@ name = "git_workflow" required-features = ["git"] [[example]] -name = "orchestrator" -required-features = ["orchestrator"] +name = "scripted_tool" +required-features = ["scripted_tool"] [[example]] name = "python_scripts" diff --git a/crates/bashkit/examples/orchestrator.rs b/crates/bashkit/examples/scripted_tool.rs similarity index 94% rename from crates/bashkit/examples/orchestrator.rs rename to crates/bashkit/examples/scripted_tool.rs index 43c8c1c5..b3c36b11 100644 --- a/crates/bashkit/examples/orchestrator.rs +++ b/crates/bashkit/examples/scripted_tool.rs @@ -1,21 +1,21 @@ -//! Orchestrator Tool Example +//! Scripted Tool Example //! //! Demonstrates composing multiple API-like tools (ToolDef + closures) into a -//! single OrchestratorTool that an LLM agent can call with bash scripts. +//! single ScriptedTool that an LLM agent can call with bash scripts. //! -//! Run with: cargo run --example orchestrator --features orchestrator +//! Run with: cargo run --example scripted_tool --features scripted_tool //! //! This example simulates an e-commerce API with tools for users, orders, and -//! inventory. The OrchestratorTool lets an agent compose these in one call. +//! inventory. The ScriptedTool lets an agent compose these in one call. -use bashkit::{OrchestratorTool, Tool, ToolDef, ToolRequest}; +use bashkit::{ScriptedTool, Tool, ToolDef, ToolRequest}; #[tokio::main] async fn main() -> anyhow::Result<()> { - println!("=== Orchestrator Tool Demo ===\n"); + println!("=== Scripted Tool Demo ===\n"); // Build the orchestrator with tool definitions + closures - let mut tool = OrchestratorTool::builder("ecommerce_api") + let mut tool = ScriptedTool::builder("ecommerce_api") .short_description("E-commerce API orchestrator with user, order, and inventory tools") .tool( ToolDef::new("get_user", "Fetch user by ID. Usage: get_user ") diff --git a/crates/bashkit/src/lib.rs b/crates/bashkit/src/lib.rs index 60da377b..88f25194 100644 --- a/crates/bashkit/src/lib.rs +++ b/crates/bashkit/src/lib.rs @@ -370,12 +370,12 @@ mod limits; #[cfg(feature = "logging")] mod logging_impl; mod network; -/// Tool orchestration: compose ToolDef+callback pairs into a single Tool via bash scripts. -/// Requires the `orchestrator` feature. -#[cfg(feature = "orchestrator")] -pub mod orchestrator; /// Parser module - exposed for fuzzing and testing pub mod parser; +/// Scripted tool: compose ToolDef+callback pairs into a single Tool via bash scripts. +/// Requires the `scripted_tool` feature. +#[cfg(feature = "scripted_tool")] +pub mod scripted_tool; /// Tool contract for LLM integration pub mod tool; @@ -392,8 +392,8 @@ pub use limits::{ExecutionCounters, ExecutionLimits, LimitExceeded}; pub use network::NetworkAllowlist; pub use tool::{BashTool, BashToolBuilder, Tool, ToolRequest, ToolResponse, ToolStatus, VERSION}; -#[cfg(feature = "orchestrator")] -pub use orchestrator::{OrchestratorTool, OrchestratorToolBuilder, ToolCallback, ToolDef}; +#[cfg(feature = "scripted_tool")] +pub use scripted_tool::{ScriptedTool, ScriptedToolBuilder, ToolCallback, ToolDef}; #[cfg(feature = "http_client")] pub use network::HttpClient; diff --git a/crates/bashkit/src/orchestrator/execute.rs b/crates/bashkit/src/scripted_tool/execute.rs similarity index 97% rename from crates/bashkit/src/orchestrator/execute.rs rename to crates/bashkit/src/scripted_tool/execute.rs index d51cb439..42f01dd3 100644 --- a/crates/bashkit/src/orchestrator/execute.rs +++ b/crates/bashkit/src/scripted_tool/execute.rs @@ -1,6 +1,6 @@ -//! OrchestratorTool execution: Tool impl, builtin adapter, documentation helpers. +//! ScriptedTool execution: Tool impl, builtin adapter, documentation helpers. -use super::{OrchestratorTool, ToolCallback}; +use super::{ScriptedTool, ToolCallback}; use crate::builtins::{Builtin, Context}; use crate::error::Result; use crate::interpreter::ExecResult; @@ -30,10 +30,10 @@ impl Builtin for ToolBuiltinAdapter { } // ============================================================================ -// OrchestratorTool — internal helpers +// ScriptedTool — internal helpers // ============================================================================ -impl OrchestratorTool { +impl ScriptedTool { /// Create a fresh Bash instance with all tool builtins registered. fn create_bash(&self) -> Bash { let mut builder = Bash::builder(); @@ -147,7 +147,7 @@ impl OrchestratorTool { // ============================================================================ #[async_trait] -impl Tool for OrchestratorTool { +impl Tool for ScriptedTool { fn name(&self) -> &str { &self.name } diff --git a/crates/bashkit/src/orchestrator/mod.rs b/crates/bashkit/src/scripted_tool/mod.rs similarity index 93% rename from crates/bashkit/src/orchestrator/mod.rs rename to crates/bashkit/src/scripted_tool/mod.rs index 97c93a86..0b18fb43 100644 --- a/crates/bashkit/src/orchestrator/mod.rs +++ b/crates/bashkit/src/scripted_tool/mod.rs @@ -9,7 +9,7 @@ //! //! ```text //! ┌─────────────────────────────────────────┐ -//! │ OrchestratorTool (implements Tool) │ +//! │ ScriptedTool (implements Tool) │ //! │ │ //! │ ┌─────────┐ ┌─────────┐ ┌──────────┐ │ //! │ │get_user │ │get_order│ │inventory │ │ @@ -23,10 +23,10 @@ //! # Example //! //! ```rust -//! use bashkit::{OrchestratorTool, ToolDef, Tool, ToolRequest}; +//! use bashkit::{ScriptedTool, ToolDef, Tool, ToolRequest}; //! //! # tokio_test::block_on(async { -//! let mut tool = OrchestratorTool::builder("api") +//! let mut tool = ScriptedTool::builder("api") //! .tool( //! ToolDef::new("get_user", "Fetch user by id. Usage: get_user "), //! |args, _stdin| { @@ -55,7 +55,7 @@ use std::sync::Arc; /// OpenAPI-style tool definition: name, description, input schema. /// -/// Describes a sub-tool registered with [`OrchestratorToolBuilder`]. +/// Describes a sub-tool registered with [`ScriptedToolBuilder`]. /// The `input_schema` is optional JSON Schema for documentation / LLM prompts. pub struct ToolDef { /// Command name used as bash builtin (e.g. `"get_user"`). @@ -108,15 +108,15 @@ pub(crate) struct RegisteredTool { } // ============================================================================ -// OrchestratorToolBuilder +// ScriptedToolBuilder // ============================================================================ -/// Builder for [`OrchestratorTool`]. +/// Builder for [`ScriptedTool`]. /// /// ```rust -/// use bashkit::{OrchestratorTool, ToolDef}; +/// use bashkit::{ScriptedTool, ToolDef}; /// -/// let tool = OrchestratorTool::builder("net") +/// let tool = ScriptedTool::builder("net") /// .short_description("Network tools") /// .tool( /// ToolDef::new("ping", "Ping a host"), @@ -126,7 +126,7 @@ pub(crate) struct RegisteredTool { /// ) /// .build(); /// ``` -pub struct OrchestratorToolBuilder { +pub struct ScriptedToolBuilder { name: String, short_desc: Option, tools: Vec, @@ -134,7 +134,7 @@ pub struct OrchestratorToolBuilder { env_vars: Vec<(String, String)>, } -impl OrchestratorToolBuilder { +impl ScriptedToolBuilder { pub(crate) fn new(name: impl Into) -> Self { Self { name: name.into(), @@ -176,13 +176,13 @@ impl OrchestratorToolBuilder { self } - /// Build the [`OrchestratorTool`]. - pub fn build(self) -> OrchestratorTool { + /// Build the [`ScriptedTool`]. + pub fn build(self) -> ScriptedTool { let short_desc = self .short_desc - .unwrap_or_else(|| format!("Orchestrator: {}", self.name)); + .unwrap_or_else(|| format!("ScriptedTool: {}", self.name)); - OrchestratorTool { + ScriptedTool { name: self.name, short_desc, tools: self.tools, @@ -193,7 +193,7 @@ impl OrchestratorToolBuilder { } // ============================================================================ -// OrchestratorTool +// ScriptedTool // ============================================================================ /// A [`Tool`](crate::tool::Tool) that orchestrates multiple tools via bash scripts. @@ -205,8 +205,8 @@ impl OrchestratorToolBuilder { /// Reusable — `execute()` can be called multiple times. Each call gets a fresh /// Bash interpreter with the same set of tool builtins. /// -/// Create via [`OrchestratorTool::builder`]. -pub struct OrchestratorTool { +/// Create via [`ScriptedTool::builder`]. +pub struct ScriptedTool { pub(crate) name: String, pub(crate) short_desc: String, pub(crate) tools: Vec, @@ -214,10 +214,10 @@ pub struct OrchestratorTool { pub(crate) env_vars: Vec<(String, String)>, } -impl OrchestratorTool { +impl ScriptedTool { /// Create a builder with the given tool name. - pub fn builder(name: impl Into) -> OrchestratorToolBuilder { - OrchestratorToolBuilder::new(name) + pub fn builder(name: impl Into) -> ScriptedToolBuilder { + ScriptedToolBuilder::new(name) } } @@ -230,8 +230,8 @@ mod tests { use super::*; use crate::tool::{Tool, ToolRequest, VERSION}; - fn build_test_tool() -> OrchestratorTool { - OrchestratorTool::builder("test_api") + fn build_test_tool() -> ScriptedTool { + ScriptedTool::builder("test_api") .short_description("Test API orchestrator") .tool( ToolDef::new("get_user", "Fetch user by id. Usage: get_user "), @@ -280,7 +280,7 @@ mod tests { #[test] fn test_builder_default_short_description() { - let tool = OrchestratorTool::builder("mytools") + let tool = ScriptedTool::builder("mytools") .tool( ToolDef::new("api_echo", "Echo args back as JSON"), |args, _stdin| { @@ -294,7 +294,7 @@ mod tests { }, ) .build(); - assert_eq!(tool.short_description(), "Orchestrator: mytools"); + assert_eq!(tool.short_description(), "ScriptedTool: mytools"); } #[test] @@ -328,7 +328,7 @@ mod tests { #[test] fn test_system_prompt_includes_schema() { - let tool = OrchestratorTool::builder("schema_test") + let tool = ScriptedTool::builder("schema_test") .tool( ToolDef::new("get_user", "Fetch user by id").with_schema(serde_json::json!({ "type": "object", @@ -497,7 +497,7 @@ mod tests { #[tokio::test] async fn test_execute_with_env() { - let mut tool = OrchestratorTool::builder("env_test") + let mut tool = ScriptedTool::builder("env_test") .env("API_BASE", "https://api.example.com") .tool( ToolDef::new("api_echo", "Echo args back as JSON"), diff --git a/specs/014-scripted-tool-orchestration.md b/specs/014-scripted-tool-orchestration.md index 2c585783..e6d93295 100644 --- a/specs/014-scripted-tool-orchestration.md +++ b/specs/014-scripted-tool-orchestration.md @@ -1,16 +1,16 @@ -# Spec 014: Tool Orchestration +# Spec 014: Scripted Tool Orchestration ## Summary -Compose tool definitions (`ToolDef`) + execution callbacks into a single `OrchestratorTool` that accepts bash scripts. Each sub-tool becomes a builtin command, letting LLMs orchestrate N tools in one call using pipes, variables, loops, and conditionals. +Compose tool definitions (`ToolDef`) + execution callbacks into a single `ScriptedTool` that accepts bash scripts. Each sub-tool becomes a builtin command, letting LLMs orchestrate N tools in one call using pipes, variables, loops, and conditionals. ## Feature flag -`orchestrator` — the entire module is gated behind `#[cfg(feature = "orchestrator")]`. +`scripted_tool` — the entire module is gated behind `#[cfg(feature = "scripted_tool")]`. ## Motivation -When an LLM has access to many tools (get_user, list_orders, get_inventory, etc.), each tool call is a separate round-trip. A data-gathering task that needs 5 tools requires 5+ turns. With `OrchestratorTool`, the LLM writes a single bash script that calls all tools, pipes results through `jq`, and returns composed output — reducing latency and token cost. +When an LLM has access to many tools (get_user, list_orders, get_inventory, etc.), each tool call is a separate round-trip. A data-gathering task that needs 5 tools requires 5+ turns. With `ScriptedTool`, the LLM writes a single bash script that calls all tools, pipes results through `jq`, and returns composed output — reducing latency and token cost. ## Design @@ -42,12 +42,12 @@ pub type ToolCallback = - `stdin`: pipeline input from prior command. - Returns stdout string on success, error message on failure. -### OrchestratorToolBuilder +### ScriptedToolBuilder Two arguments per tool: definition + callback. ```rust -OrchestratorTool::builder("api_name") +ScriptedTool::builder("api_name") .short_description("...") .tool( ToolDef::new("get_user", "Fetch user by ID") @@ -66,7 +66,7 @@ OrchestratorTool::builder("api_name") Wraps `ToolCallback` (Arc) as a `Builtin` for the interpreter. Cheap Arc clone into each Bash instance. -### OrchestratorTool +### ScriptedTool Implements the `Tool` trait. On each `execute()`: @@ -101,22 +101,22 @@ Output: {stdout, stderr, exit_code} ## Module location -`crates/bashkit/src/orchestrator/` +`crates/bashkit/src/scripted_tool/` ``` -orchestrator/ -├── mod.rs — ToolDef, ToolCallback, OrchestratorToolBuilder, OrchestratorTool struct, tests +scripted_tool/ +├── mod.rs — ToolDef, ToolCallback, ScriptedToolBuilder, ScriptedTool struct, tests └── execute.rs — Tool impl, ToolBuiltinAdapter, documentation helpers ``` -Public exports from `lib.rs` (gated by `orchestrator` feature): -`ToolDef`, `ToolCallback`, `OrchestratorTool`, `OrchestratorToolBuilder`. +Public exports from `lib.rs` (gated by `scripted_tool` feature): +`ToolDef`, `ToolCallback`, `ScriptedTool`, `ScriptedToolBuilder`. ## Example -`crates/bashkit/examples/orchestrator.rs` — e-commerce API demo with get_user, list_orders, get_inventory, create_discount. Uses `ToolDef` + closures (no trait impls needed). +`crates/bashkit/examples/scripted_tool.rs` — e-commerce API demo with get_user, list_orders, get_inventory, create_discount. Uses `ToolDef` + closures (no trait impls needed). -Run: `cargo run --example orchestrator --features orchestrator` +Run: `cargo run --example scripted_tool --features scripted_tool` ## Test coverage From e6428135da43be8080313db51e3030ba68184921 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 17 Feb 2026 23:02:17 +0000 Subject: [PATCH 4/6] feat(scripted_tool): replace positional args with ToolArgs + --key value flag parsing Callbacks now receive ToolArgs {params, stdin} instead of (&[String], Option<&str>). The adapter parses --key value / --key=value flags from bash args, coercing types per the tool's input_schema (integer, number, boolean, string). ToolArgs provides convenience accessors: param_str, param_i64, param_f64, param_bool. https://claude.ai/code/session_01UcTwLqLrhbCFAuL26Fcm5W --- crates/bashkit/examples/scripted_tool.rs | 96 ++++---- crates/bashkit/src/lib.rs | 2 +- crates/bashkit/src/scripted_tool/execute.rs | 239 +++++++++++++++++-- crates/bashkit/src/scripted_tool/mod.rs | 245 +++++++++++++------- specs/014-scripted-tool-orchestration.md | 57 ++++- 5 files changed, 485 insertions(+), 154 deletions(-) diff --git a/crates/bashkit/examples/scripted_tool.rs b/crates/bashkit/examples/scripted_tool.rs index b3c36b11..5cbb8684 100644 --- a/crates/bashkit/examples/scripted_tool.rs +++ b/crates/bashkit/examples/scripted_tool.rs @@ -18,7 +18,7 @@ async fn main() -> anyhow::Result<()> { let mut tool = ScriptedTool::builder("ecommerce_api") .short_description("E-commerce API orchestrator with user, order, and inventory tools") .tool( - ToolDef::new("get_user", "Fetch user by ID. Usage: get_user ") + ToolDef::new("get_user", "Fetch user by ID") .with_schema(serde_json::json!({ "type": "object", "properties": { @@ -26,11 +26,8 @@ async fn main() -> anyhow::Result<()> { }, "required": ["id"] })), - |args, _stdin| { - let id: u64 = args - .first() - .and_then(|s| s.parse().ok()) - .ok_or("usage: get_user ")?; + |args| { + let id = args.param_i64("id").ok_or("missing --id")?; let users = [ (1, "Alice", "alice@example.com", "premium"), @@ -47,22 +44,16 @@ async fn main() -> anyhow::Result<()> { }, ) .tool( - ToolDef::new( - "list_orders", - "List orders for a user. Usage: list_orders ", - ) - .with_schema(serde_json::json!({ - "type": "object", - "properties": { - "user_id": {"type": "integer", "description": "User ID"} - }, - "required": ["user_id"] - })), - |args, _stdin| { - let uid: u64 = args - .first() - .and_then(|s| s.parse().ok()) - .ok_or("usage: list_orders ")?; + ToolDef::new("list_orders", "List orders for a user") + .with_schema(serde_json::json!({ + "type": "object", + "properties": { + "user_id": {"type": "integer", "description": "User ID"} + }, + "required": ["user_id"] + })), + |args| { + let uid = args.param_i64("user_id").ok_or("missing --user_id")?; let orders = match uid { 1 => r#"[{"order_id":101,"item":"Laptop","qty":1,"price":999.99},{"order_id":102,"item":"Mouse","qty":2,"price":29.99}]"#, @@ -75,12 +66,16 @@ async fn main() -> anyhow::Result<()> { }, ) .tool( - ToolDef::new( - "get_inventory", - "Check inventory for an item. Usage: get_inventory ", - ), - |args, _stdin| { - let item = args.first().ok_or("usage: get_inventory ")?; + ToolDef::new("get_inventory", "Check inventory for an item") + .with_schema(serde_json::json!({ + "type": "object", + "properties": { + "item": {"type": "string", "description": "Item name"} + }, + "required": ["item"] + })), + |args| { + let item = args.param_str("item").ok_or("missing --item")?; let stock = match item.to_lowercase().as_str() { "laptop" => 15, @@ -96,17 +91,18 @@ async fn main() -> anyhow::Result<()> { }, ) .tool( - ToolDef::new( - "create_discount", - "Create a discount code. Usage: create_discount ", - ), - |args, _stdin| { - let uid = args - .first() - .ok_or("usage: create_discount ")?; - let pct = args - .get(1) - .ok_or("usage: create_discount ")?; + ToolDef::new("create_discount", "Create a discount code") + .with_schema(serde_json::json!({ + "type": "object", + "properties": { + "user_id": {"type": "integer", "description": "User ID"}, + "percent": {"type": "integer", "description": "Discount percentage"} + }, + "required": ["user_id", "percent"] + })), + |args| { + let uid = args.param_i64("user_id").ok_or("missing --user_id")?; + let pct = args.param_i64("percent").ok_or("missing --percent")?; Ok(format!( "{{\"code\":\"SAVE{pct}-U{uid}\",\"percent\":{pct},\"user_id\":{uid}}}\n" )) @@ -126,29 +122,29 @@ async fn main() -> anyhow::Result<()> { println!("--- Demo 1: Single tool call ---"); let resp = tool .execute(ToolRequest { - commands: "get_user 1".to_string(), + commands: "get_user --id 1".to_string(), }) .await; - println!("$ get_user 1"); + println!("$ get_user --id 1"); println!("{}", resp.stdout); // ---- Demo 2: Pipeline with jq ---- println!("--- Demo 2: Pipeline with jq ---"); let resp = tool .execute(ToolRequest { - commands: "get_user 1 | jq -r '.name'".to_string(), + commands: "get_user --id 1 | jq -r '.name'".to_string(), }) .await; - println!("$ get_user 1 | jq -r '.name'"); + println!("$ get_user --id 1 | jq -r '.name'"); println!("{}", resp.stdout); // ---- Demo 3: Multi-step orchestration ---- println!("--- Demo 3: Multi-step orchestration ---"); let script = r#" - user=$(get_user 1) + user=$(get_user --id 1) name=$(echo "$user" | jq -r '.name') tier=$(echo "$user" | jq -r '.tier') - orders=$(list_orders 1) + orders=$(list_orders --user_id 1) total=$(echo "$orders" | jq '[.[].price] | add') count=$(echo "$orders" | jq 'length') echo "Customer: $name (tier: $tier)" @@ -167,12 +163,12 @@ async fn main() -> anyhow::Result<()> { println!("--- Demo 4: Loop with conditional ---"); let script = r#" for uid in 1 2 3; do - user=$(get_user $uid) + user=$(get_user --id $uid) name=$(echo "$user" | jq -r '.name') tier=$(echo "$user" | jq -r '.tier') if [ "$tier" = "premium" ]; then echo "$name is premium - creating discount" - create_discount $uid 20 | jq -r '.code' + create_discount --user_id $uid --percent 20 | jq -r '.code' else echo "$name is $tier - no discount" fi @@ -191,7 +187,7 @@ async fn main() -> anyhow::Result<()> { println!("--- Demo 5: Error handling ---"); let script = r#" for item in Laptop Mouse Keyboard Widget; do - result=$(get_inventory "$item") + result=$(get_inventory --item "$item") stock=$(echo "$result" | jq '.in_stock') if [ "$stock" -eq 0 ]; then echo "$item: OUT OF STOCK" @@ -214,8 +210,8 @@ async fn main() -> anyhow::Result<()> { let script = r#" echo "=== $STORE_NAME Report ===" for uid in 1 2; do - name=$(get_user $uid | jq -r '.name') - orders=$(list_orders $uid) + name=$(get_user --id $uid | jq -r '.name') + orders=$(list_orders --user_id $uid) count=$(echo "$orders" | jq 'length') echo "$name: $count orders" done diff --git a/crates/bashkit/src/lib.rs b/crates/bashkit/src/lib.rs index 88f25194..497aa6dd 100644 --- a/crates/bashkit/src/lib.rs +++ b/crates/bashkit/src/lib.rs @@ -393,7 +393,7 @@ pub use network::NetworkAllowlist; pub use tool::{BashTool, BashToolBuilder, Tool, ToolRequest, ToolResponse, ToolStatus, VERSION}; #[cfg(feature = "scripted_tool")] -pub use scripted_tool::{ScriptedTool, ScriptedToolBuilder, ToolCallback, ToolDef}; +pub use scripted_tool::{ScriptedTool, ScriptedToolBuilder, ToolArgs, ToolCallback, ToolDef}; #[cfg(feature = "http_client")] pub use network::HttpClient; diff --git a/crates/bashkit/src/scripted_tool/execute.rs b/crates/bashkit/src/scripted_tool/execute.rs index 42f01dd3..7af5bb56 100644 --- a/crates/bashkit/src/scripted_tool/execute.rs +++ b/crates/bashkit/src/scripted_tool/execute.rs @@ -1,6 +1,6 @@ -//! ScriptedTool execution: Tool impl, builtin adapter, documentation helpers. +//! ScriptedTool execution: Tool impl, builtin adapter, flag parser, documentation helpers. -use super::{ScriptedTool, ToolCallback}; +use super::{ScriptedTool, ToolArgs, ToolCallback}; use crate::builtins::{Builtin, Context}; use crate::error::Result; use crate::interpreter::ExecResult; @@ -10,19 +10,135 @@ use async_trait::async_trait; use schemars::schema_for; use std::sync::Arc; +// ============================================================================ +// Flag parser — `--key value` / `--key=value` → JSON object +// ============================================================================ + +/// Parse `--key value` and `--key=value` flags into a JSON object. +/// Types are coerced according to the schema's property definitions. +/// Unknown flags (not in schema) are kept as strings. +/// Bare `--flag` without a value is treated as `true` if the schema says boolean, +/// otherwise as `true` when the next arg also starts with `--` or is absent. +fn parse_flags( + raw_args: &[String], + schema: &serde_json::Value, +) -> std::result::Result { + let properties = schema + .get("properties") + .and_then(|p| p.as_object()) + .cloned() + .unwrap_or_default(); + + let mut result = serde_json::Map::new(); + let mut i = 0; + + while i < raw_args.len() { + let arg = &raw_args[i]; + + let Some(flag) = arg.strip_prefix("--") else { + return Err(format!("expected --flag, got: {arg}")); + }; + + // --key=value + if let Some((key, raw_value)) = flag.split_once('=') { + let value = coerce_value(raw_value, properties.get(key)); + result.insert(key.to_string(), value); + i += 1; + continue; + } + + // --flag (boolean) or --key value + let key = flag; + let prop_schema = properties.get(key); + let is_boolean = prop_schema + .and_then(|s| s.get("type")) + .and_then(|t| t.as_str()) + == Some("boolean"); + + if is_boolean { + result.insert(key.to_string(), serde_json::Value::Bool(true)); + i += 1; + } else if i + 1 < raw_args.len() && !raw_args[i + 1].starts_with("--") { + let raw_value = &raw_args[i + 1]; + let value = coerce_value(raw_value, prop_schema); + result.insert(key.to_string(), value); + i += 2; + } else { + // No value follows and not boolean — treat as true + result.insert(key.to_string(), serde_json::Value::Bool(true)); + i += 1; + } + } + + Ok(serde_json::Value::Object(result)) +} + +/// Coerce a raw string value to the type declared in the property schema. +fn coerce_value(raw: &str, prop_schema: Option<&serde_json::Value>) -> serde_json::Value { + let type_str = prop_schema + .and_then(|s| s.get("type")) + .and_then(|t| t.as_str()) + .unwrap_or("string"); + + match type_str { + "integer" => raw + .parse::() + .map(serde_json::Value::from) + .unwrap_or_else(|_| serde_json::Value::String(raw.to_string())), + "number" => raw + .parse::() + .map(|n| serde_json::json!(n)) + .unwrap_or_else(|_| serde_json::Value::String(raw.to_string())), + "boolean" => match raw { + "true" | "1" | "yes" => serde_json::Value::Bool(true), + "false" | "0" | "no" => serde_json::Value::Bool(false), + _ => serde_json::Value::String(raw.to_string()), + }, + _ => serde_json::Value::String(raw.to_string()), + } +} + +/// Generate a usage hint from schema properties: `--id --name `. +fn usage_from_schema(schema: &serde_json::Value) -> Option { + let props = schema.get("properties")?.as_object()?; + if props.is_empty() { + return None; + } + let flags: Vec = props + .iter() + .map(|(key, prop)| { + let ty = prop.get("type").and_then(|t| t.as_str()).unwrap_or("value"); + format!("--{key} <{ty}>") + }) + .collect(); + Some(flags.join(" ")) +} + // ============================================================================ // ToolBuiltinAdapter — wraps ToolCallback as a Builtin // ============================================================================ /// Adapts a [`ToolCallback`] into a [`Builtin`] so the interpreter can execute it. +/// Parses `--key value` flags from `ctx.args` using the schema for type coercion. struct ToolBuiltinAdapter { callback: ToolCallback, + schema: serde_json::Value, } #[async_trait] impl Builtin for ToolBuiltinAdapter { async fn execute(&self, ctx: Context<'_>) -> Result { - match (self.callback)(ctx.args, ctx.stdin) { + let params = match parse_flags(ctx.args, &self.schema) { + Ok(p) => p, + Err(msg) => return Ok(ExecResult::err(msg, 2)), + }; + + let tool_args = ToolArgs { + params, + stdin: ctx.stdin.map(String::from), + }; + + match (self.callback)(&tool_args) { Ok(stdout) => Ok(ExecResult::ok(stdout)), Err(msg) => Ok(ExecResult::err(msg, 1)), } @@ -48,6 +164,7 @@ impl ScriptedTool { let name = tool.def.name.clone(); let builtin: Box = Box::new(ToolBuiltinAdapter { callback: Arc::clone(&tool.callback), + schema: tool.def.input_schema.clone(), }); builder = builder.builtin(name, builtin); } @@ -57,7 +174,7 @@ impl ScriptedTool { fn build_description(&self) -> String { let mut desc = format!( - "Scripted tool orchestrator. Available tool-commands: {}", + "Scripted tool. Available tool-commands: {}", self.tools .iter() .map(|t| t.def.name.as_str()) @@ -78,16 +195,20 @@ impl ScriptedTool { DESCRIPTION\n\ \x20 Executes a bash script with access to tool-specific commands\n\ \x20 and standard bash builtins. Use pipes, variables, loops, and\n\ - \x20 conditionals to orchestrate multiple tool calls in one request.\n\n\ + \x20 conditionals to orchestrate multiple tool calls in one request.\n\ + \x20 Arguments are passed as --key value or --key=value flags.\n\n\ TOOL COMMANDS\n", name = self.name, short_desc = self.short_desc, ); for t in &self.tools { + let usage = usage_from_schema(&t.def.input_schema) + .map(|u| format!(" ({})", u)) + .unwrap_or_default(); doc.push_str(&format!( - " {:<20} {}\n", - t.def.name, t.def.description + " {:<20} {}{}\n", + t.def.name, t.def.description, usage )); } @@ -103,11 +224,11 @@ impl ScriptedTool { \x20 exit_code 0 on success\n\n\ EXAMPLES\n\ \x20 Single tool call:\n\ - \x20 {{\"commands\": \"get_user 42\"}}\n\n\ + \x20 {{\"commands\": \"get_user --id 42\"}}\n\n\ \x20 Pipeline:\n\ - \x20 {{\"commands\": \"get_user 42 | jq '.name'\"}}\n\n\ + \x20 {{\"commands\": \"get_user --id 42 | jq '.name'\"}}\n\n\ \x20 Multi-step orchestration:\n\ - \x20 {{\"commands\": \"user=$(get_user 42)\\norders=$(get_orders 42)\\necho $user | jq -r '.name'\"}}\n", + \x20 {{\"commands\": \"user=$(get_user --id 42)\\necho $user | jq -r '.name'\"}}\n", ); doc @@ -123,15 +244,14 @@ impl ScriptedTool { prompt.push_str("## Available tool commands\n\n"); for t in &self.tools { prompt.push_str(&format!("- `{}`: {}\n", t.def.name, t.def.description)); - if let Some(obj) = t.def.input_schema.as_object() { - if !obj.is_empty() { - prompt.push_str(&format!(" Schema: {}\n", t.def.input_schema)); - } + if let Some(usage) = usage_from_schema(&t.def.input_schema) { + prompt.push_str(&format!(" Usage: `{} {}`\n", t.def.name, usage)); } } prompt.push_str( "\n## Tips\n\n\ + - Pass arguments as `--key value` or `--key=value` flags\n\ - Pipe tool output through `jq` for JSON processing\n\ - Use variables to pass data between tool calls\n\ - Use `set -e` to stop on first error\n\ @@ -240,3 +360,94 @@ impl Tool for ScriptedTool { response } } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_flags_key_value() { + let schema = serde_json::json!({ + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"} + } + }); + let args = vec!["--id".into(), "42".into(), "--name".into(), "Alice".into()]; + let result = parse_flags(&args, &schema).unwrap(); + assert_eq!(result["id"], 42); + assert_eq!(result["name"], "Alice"); + } + + #[test] + fn test_parse_flags_equals_syntax() { + let schema = serde_json::json!({ + "type": "object", + "properties": { "id": {"type": "integer"} } + }); + let args = vec!["--id=99".into()]; + let result = parse_flags(&args, &schema).unwrap(); + assert_eq!(result["id"], 99); + } + + #[test] + fn test_parse_flags_boolean() { + let schema = serde_json::json!({ + "type": "object", + "properties": { + "verbose": {"type": "boolean"}, + "query": {"type": "string"} + } + }); + let args = vec!["--verbose".into(), "--query".into(), "hello".into()]; + let result = parse_flags(&args, &schema).unwrap(); + assert_eq!(result["verbose"], true); + assert_eq!(result["query"], "hello"); + } + + #[test] + fn test_parse_flags_no_schema() { + let schema = serde_json::json!({}); + let args = vec!["--name".into(), "Bob".into()]; + let result = parse_flags(&args, &schema).unwrap(); + assert_eq!(result["name"], "Bob"); + } + + #[test] + fn test_parse_flags_empty() { + let schema = serde_json::json!({}); + let result = parse_flags(&[], &schema).unwrap(); + assert_eq!(result, serde_json::json!({})); + } + + #[test] + fn test_parse_flags_rejects_positional() { + let schema = serde_json::json!({}); + let result = parse_flags(&["42".into()], &schema); + assert!(result.is_err()); + assert!(result.unwrap_err().contains("expected --flag")); + } + + #[test] + fn test_usage_from_schema() { + let schema = serde_json::json!({ + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"} + } + }); + let usage = usage_from_schema(&schema).unwrap(); + assert!(usage.contains("--id ")); + assert!(usage.contains("--name ")); + } + + #[test] + fn test_usage_from_empty_schema() { + assert!(usage_from_schema(&serde_json::json!({})).is_none()); + assert!( + usage_from_schema(&serde_json::json!({"type": "object", "properties": {}})).is_none() + ); + } +} diff --git a/crates/bashkit/src/scripted_tool/mod.rs b/crates/bashkit/src/scripted_tool/mod.rs index 0b18fb43..c1d48da3 100644 --- a/crates/bashkit/src/scripted_tool/mod.rs +++ b/crates/bashkit/src/scripted_tool/mod.rs @@ -1,4 +1,4 @@ -//! Tool orchestration +//! Scripted tool //! //! Compose tool definitions + callbacks into a single [`Tool`] that accepts bash //! scripts. Each tool becomes a builtin command inside the interpreter, so an LLM @@ -9,7 +9,7 @@ //! //! ```text //! ┌─────────────────────────────────────────┐ -//! │ ScriptedTool (implements Tool) │ +//! │ ScriptedTool (implements Tool) │ //! │ │ //! │ ┌─────────┐ ┌─────────┐ ┌──────────┐ │ //! │ │get_user │ │get_order│ │inventory │ │ @@ -23,24 +23,28 @@ //! # Example //! //! ```rust -//! use bashkit::{ScriptedTool, ToolDef, Tool, ToolRequest}; +//! use bashkit::{ScriptedTool, ToolArgs, ToolDef, Tool, ToolRequest}; //! //! # tokio_test::block_on(async { //! let mut tool = ScriptedTool::builder("api") //! .tool( -//! ToolDef::new("get_user", "Fetch user by id. Usage: get_user "), -//! |args, _stdin| { -//! let id = args.first().ok_or("missing id")?; -//! Ok(format!("{{\"id\":{},\"name\":\"Alice\"}}\n", id)) +//! ToolDef::new("greet", "Greet a user") +//! .with_schema(serde_json::json!({ +//! "type": "object", +//! "properties": { "name": {"type": "string"} } +//! })), +//! |args: &ToolArgs| { +//! let name = args.param_str("name").unwrap_or("world"); +//! Ok(format!("hello {name}\n")) //! }, //! ) //! .build(); //! //! let resp = tool.execute(ToolRequest { -//! commands: "get_user 42 | jq '.name'".to_string(), +//! commands: "greet --name Alice".to_string(), //! }).await; //! -//! assert_eq!(resp.stdout.trim(), "\"Alice\""); +//! assert_eq!(resp.stdout.trim(), "hello Alice"); //! # }); //! ``` @@ -56,7 +60,8 @@ use std::sync::Arc; /// OpenAPI-style tool definition: name, description, input schema. /// /// Describes a sub-tool registered with [`ScriptedToolBuilder`]. -/// The `input_schema` is optional JSON Schema for documentation / LLM prompts. +/// The `input_schema` is optional JSON Schema for documentation / LLM prompts +/// and for type coercion of `--key value` flags. pub struct ToolDef { /// Command name used as bash builtin (e.g. `"get_user"`). pub name: String, @@ -83,19 +88,53 @@ impl ToolDef { } } +// ============================================================================ +// ToolArgs — parsed arguments passed to callbacks +// ============================================================================ + +/// Parsed arguments passed to a tool callback. +/// +/// `params` is a JSON object built from `--key value` flags, with values +/// type-coerced per the `ToolDef`'s `input_schema`. +/// `stdin` carries pipeline input from a prior command, if any. +pub struct ToolArgs { + /// Parsed parameters as a JSON object. Keys from `--key value` flags. + pub params: serde_json::Value, + /// Pipeline input from a prior command (e.g. `echo data | tool`). + pub stdin: Option, +} + +impl ToolArgs { + /// Get a string parameter by name. + pub fn param_str(&self, key: &str) -> Option<&str> { + self.params.get(key).and_then(|v| v.as_str()) + } + + /// Get an integer parameter by name. + pub fn param_i64(&self, key: &str) -> Option { + self.params.get(key).and_then(|v| v.as_i64()) + } + + /// Get a float parameter by name. + pub fn param_f64(&self, key: &str) -> Option { + self.params.get(key).and_then(|v| v.as_f64()) + } + + /// Get a boolean parameter by name. + pub fn param_bool(&self, key: &str) -> Option { + self.params.get(key).and_then(|v| v.as_bool()) + } +} + // ============================================================================ // ToolCallback — execution callback type // ============================================================================ /// Execution callback for a registered tool. /// -/// - `args`: positional arguments after command name. -/// For `get_user --id 5`, args is `["--id", "5"]`. -/// - `stdin`: pipeline input from prior command, if any. -/// +/// Receives parsed [`ToolArgs`] with typed parameters and optional stdin. /// Return `Ok(stdout)` on success or `Err(message)` on failure. -pub type ToolCallback = - Arc) -> Result + Send + Sync>; +pub type ToolCallback = Arc Result + Send + Sync>; // ============================================================================ // RegisteredTool — internal definition + callback pair @@ -114,14 +153,18 @@ pub(crate) struct RegisteredTool { /// Builder for [`ScriptedTool`]. /// /// ```rust -/// use bashkit::{ScriptedTool, ToolDef}; +/// use bashkit::{ScriptedTool, ToolArgs, ToolDef}; /// /// let tool = ScriptedTool::builder("net") /// .short_description("Network tools") /// .tool( -/// ToolDef::new("ping", "Ping a host"), -/// |args, _stdin| { -/// Ok(format!("pong {}\n", args.first().unwrap_or(&String::new()))) +/// ToolDef::new("ping", "Ping a host") +/// .with_schema(serde_json::json!({ +/// "type": "object", +/// "properties": { "host": {"type": "string"} } +/// })), +/// |args: &ToolArgs| { +/// Ok(format!("pong {}\n", args.param_str("host").unwrap_or("?"))) /// }, /// ) /// .build(); @@ -152,10 +195,13 @@ impl ScriptedToolBuilder { } /// Register a tool with its definition and execution callback. + /// + /// The callback receives [`ToolArgs`] with `--key value` flags parsed into + /// a JSON object, type-coerced per the schema. pub fn tool( mut self, def: ToolDef, - callback: impl Fn(&[String], Option<&str>) -> Result + Send + Sync + 'static, + callback: impl Fn(&ToolArgs) -> Result + Send + Sync + 'static, ) -> Self { self.tools.push(RegisteredTool { def, @@ -202,6 +248,9 @@ impl ScriptedToolBuilder { /// The LLM sends a bash script that can pipe, loop, branch, and compose these /// builtins together with standard utilities like `jq`, `grep`, `sed`, etc. /// +/// Arguments are passed as `--key value` flags and parsed into typed JSON +/// per the tool's `input_schema`. +/// /// Reusable — `execute()` can be called multiple times. Each call gets a fresh /// Bash interpreter with the same set of tool builtins. /// @@ -232,23 +281,30 @@ mod tests { fn build_test_tool() -> ScriptedTool { ScriptedTool::builder("test_api") - .short_description("Test API orchestrator") + .short_description("Test API") .tool( - ToolDef::new("get_user", "Fetch user by id. Usage: get_user "), - |args, _stdin| { - let id = args.first().ok_or("missing user id")?; + ToolDef::new("get_user", "Fetch user by id").with_schema(serde_json::json!({ + "type": "object", + "properties": { + "id": {"type": "integer"} + } + })), + |args: &ToolArgs| { + let id = args.param_i64("id").ok_or("missing --id")?; Ok(format!( "{{\"id\":{id},\"name\":\"Alice\",\"email\":\"alice@example.com\"}}\n" )) }, ) .tool( - ToolDef::new( - "get_orders", - "List orders for user. Usage: get_orders ", - ), - |args, _stdin| { - let uid = args.first().ok_or("missing user id")?; + ToolDef::new("get_orders", "List orders for user").with_schema(serde_json::json!({ + "type": "object", + "properties": { + "user_id": {"type": "integer"} + } + })), + |args: &ToolArgs| { + let uid = args.param_i64("user_id").ok_or("missing --user_id")?; Ok(format!( "[{{\"order_id\":1,\"user_id\":{uid},\"total\":29.99}},\ {{\"order_id\":2,\"user_id\":{uid},\"total\":49.50}}]\n" @@ -256,12 +312,12 @@ mod tests { }, ) .tool( - ToolDef::new("fail_tool", "Always fails (for testing error handling)"), - |_args, _stdin| Err("service unavailable".to_string()), + ToolDef::new("fail_tool", "Always fails"), + |_args: &ToolArgs| Err("service unavailable".to_string()), ) .tool( ToolDef::new("from_stdin", "Read from stdin, uppercase it"), - |_args, stdin| match stdin { + |args: &ToolArgs| match args.stdin.as_deref() { Some(input) => Ok(input.to_uppercase()), None => Err("no stdin".to_string()), }, @@ -275,24 +331,15 @@ mod tests { fn test_builder_name_and_description() { let tool = build_test_tool(); assert_eq!(tool.name(), "test_api"); - assert_eq!(tool.short_description(), "Test API orchestrator"); + assert_eq!(tool.short_description(), "Test API"); } #[test] fn test_builder_default_short_description() { let tool = ScriptedTool::builder("mytools") - .tool( - ToolDef::new("api_echo", "Echo args back as JSON"), - |args, _stdin| { - Ok(format!( - "{{\"args\":[{}]}}\n", - args.iter() - .map(|a| format!("\"{}\"", a)) - .collect::>() - .join(",") - )) - }, - ) + .tool(ToolDef::new("noop", "No-op"), |_args: &ToolArgs| { + Ok("ok\n".to_string()) + }) .build(); assert_eq!(tool.short_description(), "ScriptedTool: mytools"); } @@ -323,7 +370,7 @@ mod tests { assert!(sp.contains("# test_api")); assert!(sp.contains("- `get_user`:")); assert!(sp.contains("- `get_orders`:")); - assert!(sp.contains("jq")); + assert!(sp.contains("--key value")); } #[test] @@ -337,15 +384,12 @@ mod tests { }, "required": ["id"] })), - |_args, _stdin| Ok("ok\n".to_string()), + |_args: &ToolArgs| Ok("ok\n".to_string()), ) .build(); let sp = tool.system_prompt(); - assert!( - sp.contains("Schema:"), - "system prompt should include schema" - ); - assert!(sp.contains("\"type\":\"integer\"")); + assert!(sp.contains("--id"), "system prompt should show flags"); + assert!(sp.contains("integer")); } #[test] @@ -382,7 +426,7 @@ mod tests { let mut tool = build_test_tool(); let resp = tool .execute(ToolRequest { - commands: "get_user 42".to_string(), + commands: "get_user --id 42".to_string(), }) .await; assert_eq!(resp.exit_code, 0); @@ -390,12 +434,24 @@ mod tests { assert!(resp.stdout.contains("\"id\":42")); } + #[tokio::test] + async fn test_execute_key_equals_value() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "get_user --id=42".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("\"id\":42")); + } + #[tokio::test] async fn test_execute_pipeline_with_jq() { let mut tool = build_test_tool(); let resp = tool .execute(ToolRequest { - commands: "get_user 42 | jq -r '.name'".to_string(), + commands: "get_user --id 42 | jq -r '.name'".to_string(), }) .await; assert_eq!(resp.exit_code, 0); @@ -406,9 +462,9 @@ mod tests { async fn test_execute_multi_step() { let mut tool = build_test_tool(); let script = r#" - user=$(get_user 1) + user=$(get_user --id 1) name=$(echo "$user" | jq -r '.name') - orders=$(get_orders 1) + orders=$(get_orders --user_id 1) total=$(echo "$orders" | jq '[.[].total] | add') echo "User: $name, Total: $total" "#; @@ -462,7 +518,7 @@ mod tests { let mut tool = build_test_tool(); let script = r#" for uid in 1 2 3; do - get_user $uid | jq -r '.name' + get_user --id $uid | jq -r '.name' done "#; let resp = tool @@ -478,7 +534,7 @@ mod tests { async fn test_execute_conditional() { let mut tool = build_test_tool(); let script = r#" - user=$(get_user 5) + user=$(get_user --id 5) name=$(echo "$user" | jq -r '.name') if [ "$name" = "Alice" ]; then echo "found alice" @@ -499,18 +555,9 @@ mod tests { async fn test_execute_with_env() { let mut tool = ScriptedTool::builder("env_test") .env("API_BASE", "https://api.example.com") - .tool( - ToolDef::new("api_echo", "Echo args back as JSON"), - |args, _stdin| { - Ok(format!( - "{{\"args\":[{}]}}\n", - args.iter() - .map(|a| format!("\"{}\"", a)) - .collect::>() - .join(",") - )) - }, - ) + .tool(ToolDef::new("noop", "No-op"), |_args: &ToolArgs| { + Ok("ok\n".to_string()) + }) .build(); let resp = tool @@ -533,7 +580,7 @@ mod tests { let resp = tool .execute_with_status( ToolRequest { - commands: "get_user 1".to_string(), + commands: "get_user --id 1".to_string(), }, Box::new(move |status| { phases_clone @@ -551,30 +598,70 @@ mod tests { assert!(phases.contains(&"complete".to_string())); } - /// Verify the tool can be called multiple times (Arc sharing works). #[tokio::test] async fn test_multiple_execute_calls() { let mut tool = build_test_tool(); let resp1 = tool .execute(ToolRequest { - commands: "get_user 1 | jq -r '.name'".to_string(), + commands: "get_user --id 1 | jq -r '.name'".to_string(), }) .await; assert_eq!(resp1.stdout.trim(), "Alice"); let resp2 = tool .execute(ToolRequest { - commands: "get_orders 1 | jq 'length'".to_string(), + commands: "get_orders --user_id 1 | jq 'length'".to_string(), }) .await; assert_eq!(resp2.stdout.trim(), "2"); + } - let resp3 = tool + #[tokio::test] + async fn test_boolean_flag() { + let mut tool = ScriptedTool::builder("bool_test") + .tool( + ToolDef::new("search", "Search").with_schema(serde_json::json!({ + "type": "object", + "properties": { + "query": {"type": "string"}, + "verbose": {"type": "boolean"} + } + })), + |args: &ToolArgs| { + let q = args.param_str("query").unwrap_or(""); + let v = args.param_bool("verbose").unwrap_or(false); + Ok(format!("q={q} verbose={v}\n")) + }, + ) + .build(); + + let resp = tool .execute(ToolRequest { - commands: "get_user 2 | jq -r '.email'".to_string(), + commands: "search --verbose --query hello".to_string(), }) .await; - assert_eq!(resp3.stdout.trim(), "alice@example.com"); + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "q=hello verbose=true"); + } + + #[tokio::test] + async fn test_no_schema_treats_as_strings() { + let mut tool = ScriptedTool::builder("str_test") + .tool( + ToolDef::new("echo_args", "Echo params as JSON"), + |args: &ToolArgs| Ok(format!("{}\n", args.params)), + ) + .build(); + + let resp = tool + .execute(ToolRequest { + commands: "echo_args --name Alice --count 3".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + let parsed: serde_json::Value = serde_json::from_str(resp.stdout.trim()).unwrap(); + assert_eq!(parsed["name"], "Alice"); + assert_eq!(parsed["count"], "3"); // string, not int — no schema } } diff --git a/specs/014-scripted-tool-orchestration.md b/specs/014-scripted-tool-orchestration.md index e6d93295..f8a9f7b1 100644 --- a/specs/014-scripted-tool-orchestration.md +++ b/specs/014-scripted-tool-orchestration.md @@ -31,17 +31,50 @@ impl ToolDef { Standard OpenAPI fields: `name`, `description`, `input_schema`. Schema is optional — defaults to `{}`. +### ToolArgs — parsed arguments passed to callbacks + +```rust +pub struct ToolArgs { + pub params: serde_json::Value, // JSON object from --key value flags + pub stdin: Option, // pipeline input from prior command +} + +impl ToolArgs { + pub fn param_str(&self, key: &str) -> Option<&str>; + pub fn param_i64(&self, key: &str) -> Option; + pub fn param_f64(&self, key: &str) -> Option; + pub fn param_bool(&self, key: &str) -> Option; +} +``` + +The adapter parses `--key value` and `--key=value` flags from the bash command line, +coerces types according to the tool's `input_schema`, and passes the result as `ToolArgs`. + ### ToolCallback ```rust pub type ToolCallback = - Arc) -> Result + Send + Sync>; + Arc Result + Send + Sync>; ``` -- `args`: positional args after the command name. -- `stdin`: pipeline input from prior command. +- `args.params`: JSON object with parsed `--key value` flags, typed per schema. +- `args.stdin`: pipeline input from prior command. - Returns stdout string on success, error message on failure. +### Flag parsing + +Bash command args are parsed into a JSON object: + +| Syntax | Result | +|--------|--------| +| `--id 42` | `{"id": 42}` (if schema says integer) | +| `--id=42` | `{"id": 42}` | +| `--verbose` | `{"verbose": true}` (if schema says boolean) | +| `--name Alice` | `{"name": "Alice"}` | + +Type coercion follows the `input_schema` property types: `integer`, `number`, `boolean`, `string`. +Unknown flags (not in schema) are kept as strings. + ### ScriptedToolBuilder Two arguments per tool: definition + callback. @@ -52,8 +85,8 @@ ScriptedTool::builder("api_name") .tool( ToolDef::new("get_user", "Fetch user by ID") .with_schema(json!({"type": "object", "properties": {"id": {"type": "integer"}}})), - |args, _stdin| { - let id = args.first().ok_or("missing id")?; + |args| { + let id = args.param_i64("id").ok_or("missing --id")?; Ok(format!("{{\"id\":{id}}}\n")) }, ) @@ -64,7 +97,8 @@ ScriptedTool::builder("api_name") ### ToolBuiltinAdapter (internal) -Wraps `ToolCallback` (Arc) as a `Builtin` for the interpreter. Cheap Arc clone into each Bash instance. +Wraps `ToolCallback` (Arc) as a `Builtin` for the interpreter. Parses `--key value` flags +from `ctx.args` using the tool's schema for type coercion, then calls the callback with `ToolArgs`. ### ScriptedTool @@ -90,11 +124,13 @@ Output: {stdout, stderr, exit_code} ## Available tool commands - `get_user`: Fetch user by ID - Schema: {"type":"object","properties":{"id":{"type":"integer"}}} -- `list_orders`: List orders for user. Usage: list_orders + Usage: `get_user --id ` +- `list_orders`: List orders for user + Usage: `list_orders --user_id ` ## Tips +- Pass arguments as `--key value` or `--key=value` flags - Pipe tool output through `jq` for JSON processing - Use variables to pass data between tool calls ``` @@ -110,7 +146,7 @@ scripted_tool/ ``` Public exports from `lib.rs` (gated by `scripted_tool` feature): -`ToolDef`, `ToolCallback`, `ScriptedTool`, `ScriptedToolBuilder`. +`ToolDef`, `ToolArgs`, `ToolCallback`, `ScriptedTool`, `ScriptedToolBuilder`. ## Example @@ -120,9 +156,10 @@ Run: `cargo run --example scripted_tool --features scripted_tool` ## Test coverage -20 unit tests covering: +31 unit tests covering: - Builder configuration (name, description, defaults) - Introspection (help, system_prompt, schemas, schema rendering) +- Flag parsing (`--key value`, `--key=value`, boolean flags, type coercion) - Single tool execution - Pipeline with jq - Multi-step orchestration (variables, command substitution) From 746b0f2712caa36fb292e4551dd70984b53ea1d0 Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 18 Feb 2026 00:27:57 +0000 Subject: [PATCH 5/6] fix(scripted_tool): replace unwrap() with expect() to satisfy clippy Clippy disallows unwrap() even in test code. Replace all instances with expect() providing descriptive panic messages. https://claude.ai/code/session_01MARrisPkoUhn1L7j3rirwU --- crates/bashkit/src/scripted_tool/execute.rs | 14 +++++++------- crates/bashkit/src/scripted_tool/mod.rs | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/crates/bashkit/src/scripted_tool/execute.rs b/crates/bashkit/src/scripted_tool/execute.rs index 7af5bb56..9a34b500 100644 --- a/crates/bashkit/src/scripted_tool/execute.rs +++ b/crates/bashkit/src/scripted_tool/execute.rs @@ -375,7 +375,7 @@ mod tests { } }); let args = vec!["--id".into(), "42".into(), "--name".into(), "Alice".into()]; - let result = parse_flags(&args, &schema).unwrap(); + let result = parse_flags(&args, &schema).expect("parse_flags should succeed"); assert_eq!(result["id"], 42); assert_eq!(result["name"], "Alice"); } @@ -387,7 +387,7 @@ mod tests { "properties": { "id": {"type": "integer"} } }); let args = vec!["--id=99".into()]; - let result = parse_flags(&args, &schema).unwrap(); + let result = parse_flags(&args, &schema).expect("parse_flags should succeed"); assert_eq!(result["id"], 99); } @@ -401,7 +401,7 @@ mod tests { } }); let args = vec!["--verbose".into(), "--query".into(), "hello".into()]; - let result = parse_flags(&args, &schema).unwrap(); + let result = parse_flags(&args, &schema).expect("parse_flags should succeed"); assert_eq!(result["verbose"], true); assert_eq!(result["query"], "hello"); } @@ -410,14 +410,14 @@ mod tests { fn test_parse_flags_no_schema() { let schema = serde_json::json!({}); let args = vec!["--name".into(), "Bob".into()]; - let result = parse_flags(&args, &schema).unwrap(); + let result = parse_flags(&args, &schema).expect("parse_flags should succeed"); assert_eq!(result["name"], "Bob"); } #[test] fn test_parse_flags_empty() { let schema = serde_json::json!({}); - let result = parse_flags(&[], &schema).unwrap(); + let result = parse_flags(&[], &schema).expect("parse_flags should succeed"); assert_eq!(result, serde_json::json!({})); } @@ -426,7 +426,7 @@ mod tests { let schema = serde_json::json!({}); let result = parse_flags(&["42".into()], &schema); assert!(result.is_err()); - assert!(result.unwrap_err().contains("expected --flag")); + assert!(result.expect_err("should reject positional").contains("expected --flag")); } #[test] @@ -438,7 +438,7 @@ mod tests { "name": {"type": "string"} } }); - let usage = usage_from_schema(&schema).unwrap(); + let usage = usage_from_schema(&schema).expect("should produce usage string"); assert!(usage.contains("--id ")); assert!(usage.contains("--name ")); } diff --git a/crates/bashkit/src/scripted_tool/mod.rs b/crates/bashkit/src/scripted_tool/mod.rs index c1d48da3..c43737f0 100644 --- a/crates/bashkit/src/scripted_tool/mod.rs +++ b/crates/bashkit/src/scripted_tool/mod.rs @@ -660,7 +660,7 @@ mod tests { }) .await; assert_eq!(resp.exit_code, 0); - let parsed: serde_json::Value = serde_json::from_str(resp.stdout.trim()).unwrap(); + let parsed: serde_json::Value = serde_json::from_str(resp.stdout.trim()).expect("stdout should be valid JSON"); assert_eq!(parsed["name"], "Alice"); assert_eq!(parsed["count"], "3"); // string, not int — no schema } From 2e5c1584997a14f7c138b7f6499f94ec5c0bd6ba Mon Sep 17 00:00:00 2001 From: Claude Date: Wed, 18 Feb 2026 00:33:45 +0000 Subject: [PATCH 6/6] fix(scripted_tool): fix rustfmt formatting for long expect() lines https://claude.ai/code/session_01MARrisPkoUhn1L7j3rirwU --- crates/bashkit/src/scripted_tool/execute.rs | 4 +++- crates/bashkit/src/scripted_tool/mod.rs | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/crates/bashkit/src/scripted_tool/execute.rs b/crates/bashkit/src/scripted_tool/execute.rs index 9a34b500..f5110d0a 100644 --- a/crates/bashkit/src/scripted_tool/execute.rs +++ b/crates/bashkit/src/scripted_tool/execute.rs @@ -426,7 +426,9 @@ mod tests { let schema = serde_json::json!({}); let result = parse_flags(&["42".into()], &schema); assert!(result.is_err()); - assert!(result.expect_err("should reject positional").contains("expected --flag")); + assert!(result + .expect_err("should reject positional") + .contains("expected --flag")); } #[test] diff --git a/crates/bashkit/src/scripted_tool/mod.rs b/crates/bashkit/src/scripted_tool/mod.rs index c43737f0..bd6b80d8 100644 --- a/crates/bashkit/src/scripted_tool/mod.rs +++ b/crates/bashkit/src/scripted_tool/mod.rs @@ -660,7 +660,8 @@ mod tests { }) .await; assert_eq!(resp.exit_code, 0); - let parsed: serde_json::Value = serde_json::from_str(resp.stdout.trim()).expect("stdout should be valid JSON"); + let parsed: serde_json::Value = + serde_json::from_str(resp.stdout.trim()).expect("stdout should be valid JSON"); assert_eq!(parsed["name"], "Alice"); assert_eq!(parsed["count"], "3"); // string, not int — no schema }