diff --git a/AGENTS.md b/AGENTS.md index 6a5480f6..7ec66b92 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -40,6 +40,7 @@ Fix root cause. Unsure: read more code; if stuck, ask w/ short options. Unrecogn | 012-eval | LLM evaluation harness, dataset format, scoring | | 012-maintenance | Pre-release maintenance checklist | | 013-python-package | Python bindings, PyPI wheels, platform matrix | +| 014-scripted-tool-orchestration | Compose ToolDef+callback pairs into OrchestratorTool via bash scripts | ### Documentation diff --git a/crates/bashkit/Cargo.toml b/crates/bashkit/Cargo.toml index c16899b5..602b1cc5 100644 --- a/crates/bashkit/Cargo.toml +++ b/crates/bashkit/Cargo.toml @@ -77,6 +77,9 @@ logging = ["tracing"] # Phase 2 will add gix dependency for remote operations # Usage: cargo build --features git git = [] +# Enable ScriptedTool: compose ToolDef+callback pairs into a single Tool +# Usage: cargo build --features scripted_tool +scripted_tool = [] # Enable python/python3 builtins via embedded Monty interpreter # Monty is a git dep (not yet on crates.io) — feature unavailable from registry python = ["dep:monty"] @@ -101,6 +104,10 @@ required-features = ["http_client"] name = "git_workflow" required-features = ["git"] +[[example]] +name = "scripted_tool" +required-features = ["scripted_tool"] + [[example]] name = "python_scripts" required-features = ["python"] diff --git a/crates/bashkit/examples/scripted_tool.rs b/crates/bashkit/examples/scripted_tool.rs new file mode 100644 index 00000000..5cbb8684 --- /dev/null +++ b/crates/bashkit/examples/scripted_tool.rs @@ -0,0 +1,229 @@ +//! Scripted Tool Example +//! +//! Demonstrates composing multiple API-like tools (ToolDef + closures) into a +//! single ScriptedTool that an LLM agent can call with bash scripts. +//! +//! Run with: cargo run --example scripted_tool --features scripted_tool +//! +//! This example simulates an e-commerce API with tools for users, orders, and +//! inventory. The ScriptedTool lets an agent compose these in one call. + +use bashkit::{ScriptedTool, Tool, ToolDef, ToolRequest}; + +#[tokio::main] +async fn main() -> anyhow::Result<()> { + println!("=== Scripted Tool Demo ===\n"); + + // Build the orchestrator with tool definitions + closures + let mut tool = ScriptedTool::builder("ecommerce_api") + .short_description("E-commerce API orchestrator with user, order, and inventory tools") + .tool( + ToolDef::new("get_user", "Fetch user by ID") + .with_schema(serde_json::json!({ + "type": "object", + "properties": { + "id": {"type": "integer", "description": "User ID"} + }, + "required": ["id"] + })), + |args| { + let id = args.param_i64("id").ok_or("missing --id")?; + + let users = [ + (1, "Alice", "alice@example.com", "premium"), + (2, "Bob", "bob@example.com", "basic"), + (3, "Charlie", "charlie@example.com", "premium"), + ]; + + match users.iter().find(|(uid, ..)| *uid == id) { + Some((uid, name, email, tier)) => Ok(format!( + "{{\"id\":{uid},\"name\":\"{name}\",\"email\":\"{email}\",\"tier\":\"{tier}\"}}\n" + )), + None => Err(format!("user {} not found", id)), + } + }, + ) + .tool( + ToolDef::new("list_orders", "List orders for a user") + .with_schema(serde_json::json!({ + "type": "object", + "properties": { + "user_id": {"type": "integer", "description": "User ID"} + }, + "required": ["user_id"] + })), + |args| { + let uid = args.param_i64("user_id").ok_or("missing --user_id")?; + + let orders = match uid { + 1 => r#"[{"order_id":101,"item":"Laptop","qty":1,"price":999.99},{"order_id":102,"item":"Mouse","qty":2,"price":29.99}]"#, + 2 => r#"[{"order_id":201,"item":"Keyboard","qty":1,"price":79.99}]"#, + 3 => r#"[]"#, + _ => return Err(format!("no orders for user {}", uid)), + }; + + Ok(format!("{orders}\n")) + }, + ) + .tool( + ToolDef::new("get_inventory", "Check inventory for an item") + .with_schema(serde_json::json!({ + "type": "object", + "properties": { + "item": {"type": "string", "description": "Item name"} + }, + "required": ["item"] + })), + |args| { + let item = args.param_str("item").ok_or("missing --item")?; + + let stock = match item.to_lowercase().as_str() { + "laptop" => 15, + "mouse" => 142, + "keyboard" => 67, + _ => 0, + }; + + Ok(format!( + "{{\"item\":\"{}\",\"in_stock\":{}}}\n", + item, stock + )) + }, + ) + .tool( + ToolDef::new("create_discount", "Create a discount code") + .with_schema(serde_json::json!({ + "type": "object", + "properties": { + "user_id": {"type": "integer", "description": "User ID"}, + "percent": {"type": "integer", "description": "Discount percentage"} + }, + "required": ["user_id", "percent"] + })), + |args| { + let uid = args.param_i64("user_id").ok_or("missing --user_id")?; + let pct = args.param_i64("percent").ok_or("missing --percent")?; + Ok(format!( + "{{\"code\":\"SAVE{pct}-U{uid}\",\"percent\":{pct},\"user_id\":{uid}}}\n" + )) + }, + ) + .env("STORE_NAME", "Bashkit Shop") + .build(); + + // ---- Show what the LLM sees ---- + println!("--- Tool name ---"); + println!("{}\n", tool.name()); + + println!("--- System prompt (what goes in LLM system message) ---"); + println!("{}", tool.system_prompt()); + + // ---- Demo 1: Simple single tool call ---- + println!("--- Demo 1: Single tool call ---"); + let resp = tool + .execute(ToolRequest { + commands: "get_user --id 1".to_string(), + }) + .await; + println!("$ get_user --id 1"); + println!("{}", resp.stdout); + + // ---- Demo 2: Pipeline with jq ---- + println!("--- Demo 2: Pipeline with jq ---"); + let resp = tool + .execute(ToolRequest { + commands: "get_user --id 1 | jq -r '.name'".to_string(), + }) + .await; + println!("$ get_user --id 1 | jq -r '.name'"); + println!("{}", resp.stdout); + + // ---- Demo 3: Multi-step orchestration ---- + println!("--- Demo 3: Multi-step orchestration ---"); + let script = r#" + user=$(get_user --id 1) + name=$(echo "$user" | jq -r '.name') + tier=$(echo "$user" | jq -r '.tier') + orders=$(list_orders --user_id 1) + total=$(echo "$orders" | jq '[.[].price] | add') + count=$(echo "$orders" | jq 'length') + echo "Customer: $name (tier: $tier)" + echo "Orders: $count, Estimated total: $total" + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + println!("$ "); + print!("{}", resp.stdout); + println!(); + + // ---- Demo 4: Loop + conditional ---- + println!("--- Demo 4: Loop with conditional ---"); + let script = r#" + for uid in 1 2 3; do + user=$(get_user --id $uid) + name=$(echo "$user" | jq -r '.name') + tier=$(echo "$user" | jq -r '.tier') + if [ "$tier" = "premium" ]; then + echo "$name is premium - creating discount" + create_discount --user_id $uid --percent 20 | jq -r '.code' + else + echo "$name is $tier - no discount" + fi + done + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + println!("$ "); + print!("{}", resp.stdout); + println!(); + + // ---- Demo 5: Inventory check with error handling ---- + println!("--- Demo 5: Error handling ---"); + let script = r#" + for item in Laptop Mouse Keyboard Widget; do + result=$(get_inventory --item "$item") + stock=$(echo "$result" | jq '.in_stock') + if [ "$stock" -eq 0 ]; then + echo "$item: OUT OF STOCK" + else + echo "$item: $stock in stock" + fi + done + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + println!("$ "); + print!("{}", resp.stdout); + println!(); + + // ---- Demo 6: Data aggregation ---- + println!("--- Demo 6: Aggregate data across tools ---"); + let script = r#" + echo "=== $STORE_NAME Report ===" + for uid in 1 2; do + name=$(get_user --id $uid | jq -r '.name') + orders=$(list_orders --user_id $uid) + count=$(echo "$orders" | jq 'length') + echo "$name: $count orders" + done + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + println!("$ "); + print!("{}", resp.stdout); + + println!("\n=== Demo Complete ==="); + Ok(()) +} diff --git a/crates/bashkit/src/lib.rs b/crates/bashkit/src/lib.rs index f4f52119..497aa6dd 100644 --- a/crates/bashkit/src/lib.rs +++ b/crates/bashkit/src/lib.rs @@ -372,6 +372,10 @@ mod logging_impl; mod network; /// Parser module - exposed for fuzzing and testing pub mod parser; +/// Scripted tool: compose ToolDef+callback pairs into a single Tool via bash scripts. +/// Requires the `scripted_tool` feature. +#[cfg(feature = "scripted_tool")] +pub mod scripted_tool; /// Tool contract for LLM integration pub mod tool; @@ -388,6 +392,9 @@ pub use limits::{ExecutionCounters, ExecutionLimits, LimitExceeded}; pub use network::NetworkAllowlist; pub use tool::{BashTool, BashToolBuilder, Tool, ToolRequest, ToolResponse, ToolStatus, VERSION}; +#[cfg(feature = "scripted_tool")] +pub use scripted_tool::{ScriptedTool, ScriptedToolBuilder, ToolArgs, ToolCallback, ToolDef}; + #[cfg(feature = "http_client")] pub use network::HttpClient; diff --git a/crates/bashkit/src/scripted_tool/execute.rs b/crates/bashkit/src/scripted_tool/execute.rs new file mode 100644 index 00000000..f5110d0a --- /dev/null +++ b/crates/bashkit/src/scripted_tool/execute.rs @@ -0,0 +1,455 @@ +//! ScriptedTool execution: Tool impl, builtin adapter, flag parser, documentation helpers. + +use super::{ScriptedTool, ToolArgs, ToolCallback}; +use crate::builtins::{Builtin, Context}; +use crate::error::Result; +use crate::interpreter::ExecResult; +use crate::tool::{Tool, ToolRequest, ToolResponse, ToolStatus, VERSION}; +use crate::Bash; +use async_trait::async_trait; +use schemars::schema_for; +use std::sync::Arc; + +// ============================================================================ +// Flag parser — `--key value` / `--key=value` → JSON object +// ============================================================================ + +/// Parse `--key value` and `--key=value` flags into a JSON object. +/// Types are coerced according to the schema's property definitions. +/// Unknown flags (not in schema) are kept as strings. +/// Bare `--flag` without a value is treated as `true` if the schema says boolean, +/// otherwise as `true` when the next arg also starts with `--` or is absent. +fn parse_flags( + raw_args: &[String], + schema: &serde_json::Value, +) -> std::result::Result { + let properties = schema + .get("properties") + .and_then(|p| p.as_object()) + .cloned() + .unwrap_or_default(); + + let mut result = serde_json::Map::new(); + let mut i = 0; + + while i < raw_args.len() { + let arg = &raw_args[i]; + + let Some(flag) = arg.strip_prefix("--") else { + return Err(format!("expected --flag, got: {arg}")); + }; + + // --key=value + if let Some((key, raw_value)) = flag.split_once('=') { + let value = coerce_value(raw_value, properties.get(key)); + result.insert(key.to_string(), value); + i += 1; + continue; + } + + // --flag (boolean) or --key value + let key = flag; + let prop_schema = properties.get(key); + let is_boolean = prop_schema + .and_then(|s| s.get("type")) + .and_then(|t| t.as_str()) + == Some("boolean"); + + if is_boolean { + result.insert(key.to_string(), serde_json::Value::Bool(true)); + i += 1; + } else if i + 1 < raw_args.len() && !raw_args[i + 1].starts_with("--") { + let raw_value = &raw_args[i + 1]; + let value = coerce_value(raw_value, prop_schema); + result.insert(key.to_string(), value); + i += 2; + } else { + // No value follows and not boolean — treat as true + result.insert(key.to_string(), serde_json::Value::Bool(true)); + i += 1; + } + } + + Ok(serde_json::Value::Object(result)) +} + +/// Coerce a raw string value to the type declared in the property schema. +fn coerce_value(raw: &str, prop_schema: Option<&serde_json::Value>) -> serde_json::Value { + let type_str = prop_schema + .and_then(|s| s.get("type")) + .and_then(|t| t.as_str()) + .unwrap_or("string"); + + match type_str { + "integer" => raw + .parse::() + .map(serde_json::Value::from) + .unwrap_or_else(|_| serde_json::Value::String(raw.to_string())), + "number" => raw + .parse::() + .map(|n| serde_json::json!(n)) + .unwrap_or_else(|_| serde_json::Value::String(raw.to_string())), + "boolean" => match raw { + "true" | "1" | "yes" => serde_json::Value::Bool(true), + "false" | "0" | "no" => serde_json::Value::Bool(false), + _ => serde_json::Value::String(raw.to_string()), + }, + _ => serde_json::Value::String(raw.to_string()), + } +} + +/// Generate a usage hint from schema properties: `--id --name `. +fn usage_from_schema(schema: &serde_json::Value) -> Option { + let props = schema.get("properties")?.as_object()?; + if props.is_empty() { + return None; + } + let flags: Vec = props + .iter() + .map(|(key, prop)| { + let ty = prop.get("type").and_then(|t| t.as_str()).unwrap_or("value"); + format!("--{key} <{ty}>") + }) + .collect(); + Some(flags.join(" ")) +} + +// ============================================================================ +// ToolBuiltinAdapter — wraps ToolCallback as a Builtin +// ============================================================================ + +/// Adapts a [`ToolCallback`] into a [`Builtin`] so the interpreter can execute it. +/// Parses `--key value` flags from `ctx.args` using the schema for type coercion. +struct ToolBuiltinAdapter { + callback: ToolCallback, + schema: serde_json::Value, +} + +#[async_trait] +impl Builtin for ToolBuiltinAdapter { + async fn execute(&self, ctx: Context<'_>) -> Result { + let params = match parse_flags(ctx.args, &self.schema) { + Ok(p) => p, + Err(msg) => return Ok(ExecResult::err(msg, 2)), + }; + + let tool_args = ToolArgs { + params, + stdin: ctx.stdin.map(String::from), + }; + + match (self.callback)(&tool_args) { + Ok(stdout) => Ok(ExecResult::ok(stdout)), + Err(msg) => Ok(ExecResult::err(msg, 1)), + } + } +} + +// ============================================================================ +// ScriptedTool — internal helpers +// ============================================================================ + +impl ScriptedTool { + /// Create a fresh Bash instance with all tool builtins registered. + fn create_bash(&self) -> Bash { + let mut builder = Bash::builder(); + + if let Some(ref limits) = self.limits { + builder = builder.limits(limits.clone()); + } + for (key, value) in &self.env_vars { + builder = builder.env(key, value); + } + for tool in &self.tools { + let name = tool.def.name.clone(); + let builtin: Box = Box::new(ToolBuiltinAdapter { + callback: Arc::clone(&tool.callback), + schema: tool.def.input_schema.clone(), + }); + builder = builder.builtin(name, builtin); + } + + builder.build() + } + + fn build_description(&self) -> String { + let mut desc = format!( + "Scripted tool. Available tool-commands: {}", + self.tools + .iter() + .map(|t| t.def.name.as_str()) + .collect::>() + .join(", ") + ); + desc.push_str(". Also supports standard bash builtins (echo, jq, grep, sed, awk, etc.)."); + desc + } + + fn build_help(&self) -> String { + let mut doc = format!( + "{name}(1) Tool Commands {name}(1)\n\n\ + NAME\n\ + \x20 {name} - {short_desc}\n\n\ + SYNOPSIS\n\ + \x20 {{\"commands\": \"\"}}\n\n\ + DESCRIPTION\n\ + \x20 Executes a bash script with access to tool-specific commands\n\ + \x20 and standard bash builtins. Use pipes, variables, loops, and\n\ + \x20 conditionals to orchestrate multiple tool calls in one request.\n\ + \x20 Arguments are passed as --key value or --key=value flags.\n\n\ + TOOL COMMANDS\n", + name = self.name, + short_desc = self.short_desc, + ); + + for t in &self.tools { + let usage = usage_from_schema(&t.def.input_schema) + .map(|u| format!(" ({})", u)) + .unwrap_or_default(); + doc.push_str(&format!( + " {:<20} {}{}\n", + t.def.name, t.def.description, usage + )); + } + + doc.push_str( + "\nBASH BUILTINS\n\ + \x20 echo, cat, grep, sed, awk, jq, head, tail, sort, uniq, cut, tr,\n\ + \x20 wc, printf, test, [, true, false, cd, pwd, ls, find, xargs\n\n\ + INPUT\n\ + \x20 commands Bash script to execute\n\n\ + OUTPUT\n\ + \x20 stdout Combined standard output\n\ + \x20 stderr Errors from tool commands or bash\n\ + \x20 exit_code 0 on success\n\n\ + EXAMPLES\n\ + \x20 Single tool call:\n\ + \x20 {{\"commands\": \"get_user --id 42\"}}\n\n\ + \x20 Pipeline:\n\ + \x20 {{\"commands\": \"get_user --id 42 | jq '.name'\"}}\n\n\ + \x20 Multi-step orchestration:\n\ + \x20 {{\"commands\": \"user=$(get_user --id 42)\\necho $user | jq -r '.name'\"}}\n", + ); + + doc + } + + fn build_system_prompt(&self) -> String { + let mut prompt = format!("# {}\n\n", self.name); + prompt.push_str(&format!("{}\n\n", self.short_desc)); + + prompt.push_str("Input: {\"commands\": \"\"}\n"); + prompt.push_str("Output: {stdout, stderr, exit_code}\n\n"); + + prompt.push_str("## Available tool commands\n\n"); + for t in &self.tools { + prompt.push_str(&format!("- `{}`: {}\n", t.def.name, t.def.description)); + if let Some(usage) = usage_from_schema(&t.def.input_schema) { + prompt.push_str(&format!(" Usage: `{} {}`\n", t.def.name, usage)); + } + } + + prompt.push_str( + "\n## Tips\n\n\ + - Pass arguments as `--key value` or `--key=value` flags\n\ + - Pipe tool output through `jq` for JSON processing\n\ + - Use variables to pass data between tool calls\n\ + - Use `set -e` to stop on first error\n\ + - Standard builtins (echo, grep, sed, awk, etc.) are available\n", + ); + + prompt + } +} + +// ============================================================================ +// Tool trait implementation +// ============================================================================ + +#[async_trait] +impl Tool for ScriptedTool { + fn name(&self) -> &str { + &self.name + } + + fn short_description(&self) -> &str { + &self.short_desc + } + + fn description(&self) -> String { + self.build_description() + } + + fn help(&self) -> String { + self.build_help() + } + + fn system_prompt(&self) -> String { + self.build_system_prompt() + } + + fn input_schema(&self) -> serde_json::Value { + let schema = schema_for!(ToolRequest); + serde_json::to_value(schema).unwrap_or_default() + } + + fn output_schema(&self) -> serde_json::Value { + let schema = schema_for!(ToolResponse); + serde_json::to_value(schema).unwrap_or_default() + } + + fn version(&self) -> &str { + VERSION + } + + async fn execute(&mut self, req: ToolRequest) -> ToolResponse { + if req.commands.is_empty() { + return ToolResponse { + stdout: String::new(), + stderr: String::new(), + exit_code: 0, + error: None, + }; + } + + let mut bash = self.create_bash(); + + match bash.exec(&req.commands).await { + Ok(result) => result.into(), + Err(e) => ToolResponse { + stdout: String::new(), + stderr: e.to_string(), + exit_code: 1, + error: Some(format!("{:?}", e)), + }, + } + } + + async fn execute_with_status( + &mut self, + req: ToolRequest, + mut status_callback: Box, + ) -> ToolResponse { + status_callback(ToolStatus::new("validate").with_percent(0.0)); + + if req.commands.is_empty() { + status_callback(ToolStatus::new("complete").with_percent(100.0)); + return ToolResponse { + stdout: String::new(), + stderr: String::new(), + exit_code: 0, + error: None, + }; + } + + status_callback(ToolStatus::new("parse").with_percent(10.0)); + let mut bash = self.create_bash(); + status_callback(ToolStatus::new("execute").with_percent(20.0)); + + let response = match bash.exec(&req.commands).await { + Ok(result) => result.into(), + Err(e) => ToolResponse { + stdout: String::new(), + stderr: e.to_string(), + exit_code: 1, + error: Some(format!("{:?}", e)), + }, + }; + + status_callback(ToolStatus::new("complete").with_percent(100.0)); + response + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_flags_key_value() { + let schema = serde_json::json!({ + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"} + } + }); + let args = vec!["--id".into(), "42".into(), "--name".into(), "Alice".into()]; + let result = parse_flags(&args, &schema).expect("parse_flags should succeed"); + assert_eq!(result["id"], 42); + assert_eq!(result["name"], "Alice"); + } + + #[test] + fn test_parse_flags_equals_syntax() { + let schema = serde_json::json!({ + "type": "object", + "properties": { "id": {"type": "integer"} } + }); + let args = vec!["--id=99".into()]; + let result = parse_flags(&args, &schema).expect("parse_flags should succeed"); + assert_eq!(result["id"], 99); + } + + #[test] + fn test_parse_flags_boolean() { + let schema = serde_json::json!({ + "type": "object", + "properties": { + "verbose": {"type": "boolean"}, + "query": {"type": "string"} + } + }); + let args = vec!["--verbose".into(), "--query".into(), "hello".into()]; + let result = parse_flags(&args, &schema).expect("parse_flags should succeed"); + assert_eq!(result["verbose"], true); + assert_eq!(result["query"], "hello"); + } + + #[test] + fn test_parse_flags_no_schema() { + let schema = serde_json::json!({}); + let args = vec!["--name".into(), "Bob".into()]; + let result = parse_flags(&args, &schema).expect("parse_flags should succeed"); + assert_eq!(result["name"], "Bob"); + } + + #[test] + fn test_parse_flags_empty() { + let schema = serde_json::json!({}); + let result = parse_flags(&[], &schema).expect("parse_flags should succeed"); + assert_eq!(result, serde_json::json!({})); + } + + #[test] + fn test_parse_flags_rejects_positional() { + let schema = serde_json::json!({}); + let result = parse_flags(&["42".into()], &schema); + assert!(result.is_err()); + assert!(result + .expect_err("should reject positional") + .contains("expected --flag")); + } + + #[test] + fn test_usage_from_schema() { + let schema = serde_json::json!({ + "type": "object", + "properties": { + "id": {"type": "integer"}, + "name": {"type": "string"} + } + }); + let usage = usage_from_schema(&schema).expect("should produce usage string"); + assert!(usage.contains("--id ")); + assert!(usage.contains("--name ")); + } + + #[test] + fn test_usage_from_empty_schema() { + assert!(usage_from_schema(&serde_json::json!({})).is_none()); + assert!( + usage_from_schema(&serde_json::json!({"type": "object", "properties": {}})).is_none() + ); + } +} diff --git a/crates/bashkit/src/scripted_tool/mod.rs b/crates/bashkit/src/scripted_tool/mod.rs new file mode 100644 index 00000000..bd6b80d8 --- /dev/null +++ b/crates/bashkit/src/scripted_tool/mod.rs @@ -0,0 +1,668 @@ +//! Scripted tool +//! +//! Compose tool definitions + callbacks into a single [`Tool`] that accepts bash +//! scripts. Each tool becomes a builtin command inside the interpreter, so an LLM +//! can orchestrate many tools in one call using pipes, variables, loops, and +//! conditionals. +//! +//! # Architecture +//! +//! ```text +//! ┌─────────────────────────────────────────┐ +//! │ ScriptedTool (implements Tool) │ +//! │ │ +//! │ ┌─────────┐ ┌─────────┐ ┌──────────┐ │ +//! │ │get_user │ │get_order│ │inventory │ │ +//! │ │(builtin)│ │(builtin)│ │(builtin) │ │ +//! │ └─────────┘ └─────────┘ └──────────┘ │ +//! │ ↑ ↑ ↑ │ +//! │ bash script: pipes, vars, jq, loops │ +//! └─────────────────────────────────────────┘ +//! ``` +//! +//! # Example +//! +//! ```rust +//! use bashkit::{ScriptedTool, ToolArgs, ToolDef, Tool, ToolRequest}; +//! +//! # tokio_test::block_on(async { +//! let mut tool = ScriptedTool::builder("api") +//! .tool( +//! ToolDef::new("greet", "Greet a user") +//! .with_schema(serde_json::json!({ +//! "type": "object", +//! "properties": { "name": {"type": "string"} } +//! })), +//! |args: &ToolArgs| { +//! let name = args.param_str("name").unwrap_or("world"); +//! Ok(format!("hello {name}\n")) +//! }, +//! ) +//! .build(); +//! +//! let resp = tool.execute(ToolRequest { +//! commands: "greet --name Alice".to_string(), +//! }).await; +//! +//! assert_eq!(resp.stdout.trim(), "hello Alice"); +//! # }); +//! ``` + +mod execute; + +use crate::ExecutionLimits; +use std::sync::Arc; + +// ============================================================================ +// ToolDef — OpenAPI-style tool definition +// ============================================================================ + +/// OpenAPI-style tool definition: name, description, input schema. +/// +/// Describes a sub-tool registered with [`ScriptedToolBuilder`]. +/// The `input_schema` is optional JSON Schema for documentation / LLM prompts +/// and for type coercion of `--key value` flags. +pub struct ToolDef { + /// Command name used as bash builtin (e.g. `"get_user"`). + pub name: String, + /// Human-readable description for LLM consumption. + pub description: String, + /// JSON Schema describing accepted arguments. Empty object if unspecified. + pub input_schema: serde_json::Value, +} + +impl ToolDef { + /// Create a tool definition with name and description. + pub fn new(name: impl Into, description: impl Into) -> Self { + Self { + name: name.into(), + description: description.into(), + input_schema: serde_json::Value::Object(Default::default()), + } + } + + /// Attach a JSON Schema for the tool's input parameters. + pub fn with_schema(mut self, schema: serde_json::Value) -> Self { + self.input_schema = schema; + self + } +} + +// ============================================================================ +// ToolArgs — parsed arguments passed to callbacks +// ============================================================================ + +/// Parsed arguments passed to a tool callback. +/// +/// `params` is a JSON object built from `--key value` flags, with values +/// type-coerced per the `ToolDef`'s `input_schema`. +/// `stdin` carries pipeline input from a prior command, if any. +pub struct ToolArgs { + /// Parsed parameters as a JSON object. Keys from `--key value` flags. + pub params: serde_json::Value, + /// Pipeline input from a prior command (e.g. `echo data | tool`). + pub stdin: Option, +} + +impl ToolArgs { + /// Get a string parameter by name. + pub fn param_str(&self, key: &str) -> Option<&str> { + self.params.get(key).and_then(|v| v.as_str()) + } + + /// Get an integer parameter by name. + pub fn param_i64(&self, key: &str) -> Option { + self.params.get(key).and_then(|v| v.as_i64()) + } + + /// Get a float parameter by name. + pub fn param_f64(&self, key: &str) -> Option { + self.params.get(key).and_then(|v| v.as_f64()) + } + + /// Get a boolean parameter by name. + pub fn param_bool(&self, key: &str) -> Option { + self.params.get(key).and_then(|v| v.as_bool()) + } +} + +// ============================================================================ +// ToolCallback — execution callback type +// ============================================================================ + +/// Execution callback for a registered tool. +/// +/// Receives parsed [`ToolArgs`] with typed parameters and optional stdin. +/// Return `Ok(stdout)` on success or `Err(message)` on failure. +pub type ToolCallback = Arc Result + Send + Sync>; + +// ============================================================================ +// RegisteredTool — internal definition + callback pair +// ============================================================================ + +/// A registered tool: definition + callback. +pub(crate) struct RegisteredTool { + pub(crate) def: ToolDef, + pub(crate) callback: ToolCallback, +} + +// ============================================================================ +// ScriptedToolBuilder +// ============================================================================ + +/// Builder for [`ScriptedTool`]. +/// +/// ```rust +/// use bashkit::{ScriptedTool, ToolArgs, ToolDef}; +/// +/// let tool = ScriptedTool::builder("net") +/// .short_description("Network tools") +/// .tool( +/// ToolDef::new("ping", "Ping a host") +/// .with_schema(serde_json::json!({ +/// "type": "object", +/// "properties": { "host": {"type": "string"} } +/// })), +/// |args: &ToolArgs| { +/// Ok(format!("pong {}\n", args.param_str("host").unwrap_or("?"))) +/// }, +/// ) +/// .build(); +/// ``` +pub struct ScriptedToolBuilder { + name: String, + short_desc: Option, + tools: Vec, + limits: Option, + env_vars: Vec<(String, String)>, +} + +impl ScriptedToolBuilder { + pub(crate) fn new(name: impl Into) -> Self { + Self { + name: name.into(), + short_desc: None, + tools: Vec::new(), + limits: None, + env_vars: Vec::new(), + } + } + + /// One-line description for tool listings. + pub fn short_description(mut self, desc: impl Into) -> Self { + self.short_desc = Some(desc.into()); + self + } + + /// Register a tool with its definition and execution callback. + /// + /// The callback receives [`ToolArgs`] with `--key value` flags parsed into + /// a JSON object, type-coerced per the schema. + pub fn tool( + mut self, + def: ToolDef, + callback: impl Fn(&ToolArgs) -> Result + Send + Sync + 'static, + ) -> Self { + self.tools.push(RegisteredTool { + def, + callback: Arc::new(callback), + }); + self + } + + /// Set execution limits for the bash interpreter. + pub fn limits(mut self, limits: ExecutionLimits) -> Self { + self.limits = Some(limits); + self + } + + /// Add an environment variable visible inside scripts. + pub fn env(mut self, key: impl Into, value: impl Into) -> Self { + self.env_vars.push((key.into(), value.into())); + self + } + + /// Build the [`ScriptedTool`]. + pub fn build(self) -> ScriptedTool { + let short_desc = self + .short_desc + .unwrap_or_else(|| format!("ScriptedTool: {}", self.name)); + + ScriptedTool { + name: self.name, + short_desc, + tools: self.tools, + limits: self.limits, + env_vars: self.env_vars, + } + } +} + +// ============================================================================ +// ScriptedTool +// ============================================================================ + +/// A [`Tool`](crate::tool::Tool) that orchestrates multiple tools via bash scripts. +/// +/// Each registered tool (defined by [`ToolDef`] + callback) becomes a bash builtin. +/// The LLM sends a bash script that can pipe, loop, branch, and compose these +/// builtins together with standard utilities like `jq`, `grep`, `sed`, etc. +/// +/// Arguments are passed as `--key value` flags and parsed into typed JSON +/// per the tool's `input_schema`. +/// +/// Reusable — `execute()` can be called multiple times. Each call gets a fresh +/// Bash interpreter with the same set of tool builtins. +/// +/// Create via [`ScriptedTool::builder`]. +pub struct ScriptedTool { + pub(crate) name: String, + pub(crate) short_desc: String, + pub(crate) tools: Vec, + pub(crate) limits: Option, + pub(crate) env_vars: Vec<(String, String)>, +} + +impl ScriptedTool { + /// Create a builder with the given tool name. + pub fn builder(name: impl Into) -> ScriptedToolBuilder { + ScriptedToolBuilder::new(name) + } +} + +// ============================================================================ +// Tests +// ============================================================================ + +#[cfg(test)] +mod tests { + use super::*; + use crate::tool::{Tool, ToolRequest, VERSION}; + + fn build_test_tool() -> ScriptedTool { + ScriptedTool::builder("test_api") + .short_description("Test API") + .tool( + ToolDef::new("get_user", "Fetch user by id").with_schema(serde_json::json!({ + "type": "object", + "properties": { + "id": {"type": "integer"} + } + })), + |args: &ToolArgs| { + let id = args.param_i64("id").ok_or("missing --id")?; + Ok(format!( + "{{\"id\":{id},\"name\":\"Alice\",\"email\":\"alice@example.com\"}}\n" + )) + }, + ) + .tool( + ToolDef::new("get_orders", "List orders for user").with_schema(serde_json::json!({ + "type": "object", + "properties": { + "user_id": {"type": "integer"} + } + })), + |args: &ToolArgs| { + let uid = args.param_i64("user_id").ok_or("missing --user_id")?; + Ok(format!( + "[{{\"order_id\":1,\"user_id\":{uid},\"total\":29.99}},\ + {{\"order_id\":2,\"user_id\":{uid},\"total\":49.50}}]\n" + )) + }, + ) + .tool( + ToolDef::new("fail_tool", "Always fails"), + |_args: &ToolArgs| Err("service unavailable".to_string()), + ) + .tool( + ToolDef::new("from_stdin", "Read from stdin, uppercase it"), + |args: &ToolArgs| match args.stdin.as_deref() { + Some(input) => Ok(input.to_uppercase()), + None => Err("no stdin".to_string()), + }, + ) + .build() + } + + // -- Builder tests -- + + #[test] + fn test_builder_name_and_description() { + let tool = build_test_tool(); + assert_eq!(tool.name(), "test_api"); + assert_eq!(tool.short_description(), "Test API"); + } + + #[test] + fn test_builder_default_short_description() { + let tool = ScriptedTool::builder("mytools") + .tool(ToolDef::new("noop", "No-op"), |_args: &ToolArgs| { + Ok("ok\n".to_string()) + }) + .build(); + assert_eq!(tool.short_description(), "ScriptedTool: mytools"); + } + + #[test] + fn test_description_lists_tools() { + let tool = build_test_tool(); + let desc = tool.description(); + assert!(desc.contains("get_user")); + assert!(desc.contains("get_orders")); + assert!(desc.contains("fail_tool")); + assert!(desc.contains("from_stdin")); + } + + #[test] + fn test_help_has_tool_commands_section() { + let tool = build_test_tool(); + let help = tool.help(); + assert!(help.contains("TOOL COMMANDS")); + assert!(help.contains("get_user")); + assert!(help.contains("Fetch user by id")); + } + + #[test] + fn test_system_prompt_lists_tools() { + let tool = build_test_tool(); + let sp = tool.system_prompt(); + assert!(sp.contains("# test_api")); + assert!(sp.contains("- `get_user`:")); + assert!(sp.contains("- `get_orders`:")); + assert!(sp.contains("--key value")); + } + + #[test] + fn test_system_prompt_includes_schema() { + let tool = ScriptedTool::builder("schema_test") + .tool( + ToolDef::new("get_user", "Fetch user by id").with_schema(serde_json::json!({ + "type": "object", + "properties": { + "id": {"type": "integer"} + }, + "required": ["id"] + })), + |_args: &ToolArgs| Ok("ok\n".to_string()), + ) + .build(); + let sp = tool.system_prompt(); + assert!(sp.contains("--id"), "system prompt should show flags"); + assert!(sp.contains("integer")); + } + + #[test] + fn test_schemas() { + let tool = build_test_tool(); + let input = tool.input_schema(); + assert!(input["properties"]["commands"].is_object()); + let output = tool.output_schema(); + assert!(output["properties"]["stdout"].is_object()); + } + + #[test] + fn test_version() { + let tool = build_test_tool(); + assert_eq!(tool.version(), VERSION); + } + + // -- Execution tests -- + + #[tokio::test] + async fn test_execute_empty() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: String::new(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.is_empty()); + } + + #[tokio::test] + async fn test_execute_single_tool() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "get_user --id 42".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("\"name\":\"Alice\"")); + assert!(resp.stdout.contains("\"id\":42")); + } + + #[tokio::test] + async fn test_execute_key_equals_value() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "get_user --id=42".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("\"id\":42")); + } + + #[tokio::test] + async fn test_execute_pipeline_with_jq() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "get_user --id 42 | jq -r '.name'".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "Alice"); + } + + #[tokio::test] + async fn test_execute_multi_step() { + let mut tool = build_test_tool(); + let script = r#" + user=$(get_user --id 1) + name=$(echo "$user" | jq -r '.name') + orders=$(get_orders --user_id 1) + total=$(echo "$orders" | jq '[.[].total] | add') + echo "User: $name, Total: $total" + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "User: Alice, Total: 79.49"); + } + + #[tokio::test] + async fn test_execute_tool_failure() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "fail_tool".to_string(), + }) + .await; + assert_ne!(resp.exit_code, 0); + assert!(resp.stderr.contains("service unavailable")); + } + + #[tokio::test] + async fn test_execute_tool_failure_with_fallback() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "fail_tool || echo 'fallback'".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert!(resp.stdout.contains("fallback")); + } + + #[tokio::test] + async fn test_execute_stdin_pipe() { + let mut tool = build_test_tool(); + let resp = tool + .execute(ToolRequest { + commands: "echo hello | from_stdin".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "HELLO"); + } + + #[tokio::test] + async fn test_execute_loop_over_tools() { + let mut tool = build_test_tool(); + let script = r#" + for uid in 1 2 3; do + get_user --id $uid | jq -r '.name' + done + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "Alice\nAlice\nAlice"); + } + + #[tokio::test] + async fn test_execute_conditional() { + let mut tool = build_test_tool(); + let script = r#" + user=$(get_user --id 5) + name=$(echo "$user" | jq -r '.name') + if [ "$name" = "Alice" ]; then + echo "found alice" + else + echo "not alice" + fi + "#; + let resp = tool + .execute(ToolRequest { + commands: script.to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "found alice"); + } + + #[tokio::test] + async fn test_execute_with_env() { + let mut tool = ScriptedTool::builder("env_test") + .env("API_BASE", "https://api.example.com") + .tool(ToolDef::new("noop", "No-op"), |_args: &ToolArgs| { + Ok("ok\n".to_string()) + }) + .build(); + + let resp = tool + .execute(ToolRequest { + commands: "echo $API_BASE".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "https://api.example.com"); + } + + #[tokio::test] + async fn test_execute_with_status_callback() { + use std::sync::{Arc, Mutex}; + + let mut tool = build_test_tool(); + let phases = Arc::new(Mutex::new(Vec::new())); + let phases_clone = phases.clone(); + + let resp = tool + .execute_with_status( + ToolRequest { + commands: "get_user --id 1".to_string(), + }, + Box::new(move |status| { + phases_clone + .lock() + .expect("lock poisoned") + .push(status.phase.clone()); + }), + ) + .await; + + assert_eq!(resp.exit_code, 0); + let phases = phases.lock().expect("lock poisoned"); + assert!(phases.contains(&"validate".to_string())); + assert!(phases.contains(&"execute".to_string())); + assert!(phases.contains(&"complete".to_string())); + } + + #[tokio::test] + async fn test_multiple_execute_calls() { + let mut tool = build_test_tool(); + + let resp1 = tool + .execute(ToolRequest { + commands: "get_user --id 1 | jq -r '.name'".to_string(), + }) + .await; + assert_eq!(resp1.stdout.trim(), "Alice"); + + let resp2 = tool + .execute(ToolRequest { + commands: "get_orders --user_id 1 | jq 'length'".to_string(), + }) + .await; + assert_eq!(resp2.stdout.trim(), "2"); + } + + #[tokio::test] + async fn test_boolean_flag() { + let mut tool = ScriptedTool::builder("bool_test") + .tool( + ToolDef::new("search", "Search").with_schema(serde_json::json!({ + "type": "object", + "properties": { + "query": {"type": "string"}, + "verbose": {"type": "boolean"} + } + })), + |args: &ToolArgs| { + let q = args.param_str("query").unwrap_or(""); + let v = args.param_bool("verbose").unwrap_or(false); + Ok(format!("q={q} verbose={v}\n")) + }, + ) + .build(); + + let resp = tool + .execute(ToolRequest { + commands: "search --verbose --query hello".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + assert_eq!(resp.stdout.trim(), "q=hello verbose=true"); + } + + #[tokio::test] + async fn test_no_schema_treats_as_strings() { + let mut tool = ScriptedTool::builder("str_test") + .tool( + ToolDef::new("echo_args", "Echo params as JSON"), + |args: &ToolArgs| Ok(format!("{}\n", args.params)), + ) + .build(); + + let resp = tool + .execute(ToolRequest { + commands: "echo_args --name Alice --count 3".to_string(), + }) + .await; + assert_eq!(resp.exit_code, 0); + let parsed: serde_json::Value = + serde_json::from_str(resp.stdout.trim()).expect("stdout should be valid JSON"); + assert_eq!(parsed["name"], "Alice"); + assert_eq!(parsed["count"], "3"); // string, not int — no schema + } +} diff --git a/specs/014-scripted-tool-orchestration.md b/specs/014-scripted-tool-orchestration.md new file mode 100644 index 00000000..f8a9f7b1 --- /dev/null +++ b/specs/014-scripted-tool-orchestration.md @@ -0,0 +1,180 @@ +# Spec 014: Scripted Tool Orchestration + +## Summary + +Compose tool definitions (`ToolDef`) + execution callbacks into a single `ScriptedTool` that accepts bash scripts. Each sub-tool becomes a builtin command, letting LLMs orchestrate N tools in one call using pipes, variables, loops, and conditionals. + +## Feature flag + +`scripted_tool` — the entire module is gated behind `#[cfg(feature = "scripted_tool")]`. + +## Motivation + +When an LLM has access to many tools (get_user, list_orders, get_inventory, etc.), each tool call is a separate round-trip. A data-gathering task that needs 5 tools requires 5+ turns. With `ScriptedTool`, the LLM writes a single bash script that calls all tools, pipes results through `jq`, and returns composed output — reducing latency and token cost. + +## Design + +### ToolDef — OpenAPI-style tool definition + +```rust +pub struct ToolDef { + pub name: String, + pub description: String, + pub input_schema: serde_json::Value, // JSON Schema, empty object if unset +} + +impl ToolDef { + pub fn new(name: impl Into, description: impl Into) -> Self; + pub fn with_schema(self, schema: serde_json::Value) -> Self; +} +``` + +Standard OpenAPI fields: `name`, `description`, `input_schema`. Schema is optional — defaults to `{}`. + +### ToolArgs — parsed arguments passed to callbacks + +```rust +pub struct ToolArgs { + pub params: serde_json::Value, // JSON object from --key value flags + pub stdin: Option, // pipeline input from prior command +} + +impl ToolArgs { + pub fn param_str(&self, key: &str) -> Option<&str>; + pub fn param_i64(&self, key: &str) -> Option; + pub fn param_f64(&self, key: &str) -> Option; + pub fn param_bool(&self, key: &str) -> Option; +} +``` + +The adapter parses `--key value` and `--key=value` flags from the bash command line, +coerces types according to the tool's `input_schema`, and passes the result as `ToolArgs`. + +### ToolCallback + +```rust +pub type ToolCallback = + Arc Result + Send + Sync>; +``` + +- `args.params`: JSON object with parsed `--key value` flags, typed per schema. +- `args.stdin`: pipeline input from prior command. +- Returns stdout string on success, error message on failure. + +### Flag parsing + +Bash command args are parsed into a JSON object: + +| Syntax | Result | +|--------|--------| +| `--id 42` | `{"id": 42}` (if schema says integer) | +| `--id=42` | `{"id": 42}` | +| `--verbose` | `{"verbose": true}` (if schema says boolean) | +| `--name Alice` | `{"name": "Alice"}` | + +Type coercion follows the `input_schema` property types: `integer`, `number`, `boolean`, `string`. +Unknown flags (not in schema) are kept as strings. + +### ScriptedToolBuilder + +Two arguments per tool: definition + callback. + +```rust +ScriptedTool::builder("api_name") + .short_description("...") + .tool( + ToolDef::new("get_user", "Fetch user by ID") + .with_schema(json!({"type": "object", "properties": {"id": {"type": "integer"}}})), + |args| { + let id = args.param_i64("id").ok_or("missing --id")?; + Ok(format!("{{\"id\":{id}}}\n")) + }, + ) + .env("API_KEY", "...") + .limits(ExecutionLimits::new().max_commands(500)) + .build() +``` + +### ToolBuiltinAdapter (internal) + +Wraps `ToolCallback` (Arc) as a `Builtin` for the interpreter. Parses `--key value` flags +from `ctx.args` using the tool's schema for type coercion, then calls the callback with `ToolArgs`. + +### ScriptedTool + +Implements the `Tool` trait. On each `execute()`: + +1. Creates a fresh `Bash` instance. +2. Registers each callback as a builtin via `Arc::clone`. +3. Runs the user-provided script. +4. Returns `ToolResponse { stdout, stderr, exit_code }`. + +Reusable — multiple `execute()` calls share the same `Arc` instances. + +### LLM integration + +`system_prompt()` generates markdown with available tool commands, input schemas (when present), and tips. Example output: + +```markdown +# api_name + +Input: {"commands": ""} +Output: {stdout, stderr, exit_code} + +## Available tool commands + +- `get_user`: Fetch user by ID + Usage: `get_user --id ` +- `list_orders`: List orders for user + Usage: `list_orders --user_id ` + +## Tips + +- Pass arguments as `--key value` or `--key=value` flags +- Pipe tool output through `jq` for JSON processing +- Use variables to pass data between tool calls +``` + +## Module location + +`crates/bashkit/src/scripted_tool/` + +``` +scripted_tool/ +├── mod.rs — ToolDef, ToolCallback, ScriptedToolBuilder, ScriptedTool struct, tests +└── execute.rs — Tool impl, ToolBuiltinAdapter, documentation helpers +``` + +Public exports from `lib.rs` (gated by `scripted_tool` feature): +`ToolDef`, `ToolArgs`, `ToolCallback`, `ScriptedTool`, `ScriptedToolBuilder`. + +## Example + +`crates/bashkit/examples/scripted_tool.rs` — e-commerce API demo with get_user, list_orders, get_inventory, create_discount. Uses `ToolDef` + closures (no trait impls needed). + +Run: `cargo run --example scripted_tool --features scripted_tool` + +## Test coverage + +31 unit tests covering: +- Builder configuration (name, description, defaults) +- Introspection (help, system_prompt, schemas, schema rendering) +- Flag parsing (`--key value`, `--key=value`, boolean flags, type coercion) +- Single tool execution +- Pipeline with jq +- Multi-step orchestration (variables, command substitution) +- Error handling and fallback (`||`) +- Stdin piping +- Loops and conditionals +- Environment variables +- Status callbacks +- Multiple sequential `execute()` calls (Arc reuse) + +## Security + +Inherits all bashkit sandbox guarantees: +- Virtual filesystem (no host access) +- Resource limits (max commands, loop iterations, function depth) +- No network access unless explicitly configured + +Sub-tool callback implementations control their own security boundaries.