Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ local*
coverage/
.venv/

# Claude Code worktrees
.claude/worktrees/

# Python build artifacts
__pycache__/
*.pyc
Expand Down
4 changes: 2 additions & 2 deletions crates/bashkit-eval/data/eval-tasks.jsonl
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
{"id":"data_json_query","category":"data_transformation","description":"Query JSON inventory for low-stock items","prompt":"Read /data/inventory.json and find all items where the quantity field is less than 10. Print only their names, one per line.","files":{"/data/inventory.json":"[{\"name\":\"screws\",\"quantity\":5},{\"name\":\"bolts\",\"quantity\":50},{\"name\":\"washers\",\"quantity\":3},{\"name\":\"nuts\",\"quantity\":100},{\"name\":\"nails\",\"quantity\":8}]"},"expectations":[{"check":"stdout_contains:screws"},{"check":"stdout_contains:washers"},{"check":"stdout_contains:nails"},{"check":"exit_code:0"}]}
{"id":"data_log_summarize","category":"data_transformation","description":"Summarize log entries by level","prompt":"Analyze /var/log/app.log and produce a count per log level (INFO, WARN, ERROR). Print each as 'LEVEL: N' on its own line.","files":{"/var/log/app.log":"INFO: app started\nINFO: connected to db\nWARN: slow query detected\nERROR: connection timeout\nINFO: retry succeeded\nWARN: high memory usage\nERROR: write failed\nINFO: request completed\nERROR: connection timeout\nINFO: shutting down\n"},"expectations":[{"check":"stdout_contains:INFO"},{"check":"stdout_contains:5"},{"check":"stdout_contains:ERROR"},{"check":"stdout_contains:3"},{"check":"stdout_contains:WARN"},{"check":"stdout_contains:2"},{"check":"exit_code:0"}]}
{"id":"error_missing_file","category":"error_recovery","description":"Handle missing file gracefully","prompt":"Read the file /data/input.txt. If it does not exist, create the directory /data if needed, then create the file with the content 'default data', and finally read and print the file contents.","files":{},"expectations":[{"check":"stdout_contains:default data"},{"check":"file_exists:/data/input.txt"},{"check":"file_contains:/data/input.txt:default data"},{"check":"exit_code:0"}]}
{"id":"error_graceful_parse","category":"error_recovery","description":"Detect and fix broken JSON","prompt":"Read /data/broken.json. It contains invalid JSON (a trailing comma before the closing brace). Fix the JSON by removing the trailing comma, save the fixed version back to the same file, and then parse it to print the 'name' field.","files":{"/data/broken.json":"{\"name\": \"test-app\", \"version\": \"1.0\", \"debug\": true, }"},"expectations":[{"check":"stdout_contains:test-app"},{"check":"exit_code:0"},{"check":"tool_calls_min:2"}]}
{"id":"error_graceful_parse","category":"error_recovery","description":"Detect and fix broken JSON","prompt":"Read /data/broken.json. It contains invalid JSON (a trailing comma before the closing brace). Fix the JSON by removing the trailing comma, save the fixed version back to the same file, and then parse it to print the 'name' field.","files":{"/data/broken.json":"{\"name\": \"test-app\", \"version\": \"1.0\", \"debug\": true, }"},"expectations":[{"check":"stdout_contains:test-app"},{"check":"exit_code:0"}]}
{"id":"sysinfo_env_report","category":"system_info","description":"Print system environment report","prompt":"Print a system report with four lines: 'user: <whoami output>', 'host: <hostname output>', 'cwd: <pwd output>', 'shell: bash'. Use the actual commands to get the values.","files":{},"expectations":[{"check":"stdout_contains:user: eval"},{"check":"stdout_contains:host: bashkit-eval"},{"check":"stdout_contains:cwd:"},{"check":"exit_code:0"}]}
{"id":"sysinfo_date_calc","category":"system_info","description":"Print current date and compute a future date","prompt":"Print today's date in YYYY-MM-DD format. Then compute and print what the date was 30 days ago, also in YYYY-MM-DD format.","files":{},"expectations":[{"check":"exit_code:0"},{"check":"tool_calls_min:1"},{"check":"stdout_regex:\\d{4}-\\d{2}-\\d{2}"}]}
{"id":"archive_create_extract","category":"archive_operations","description":"Create tar.gz archive and extract to new location","prompt":"Create a tar.gz archive of the /project directory and save it as /tmp/project.tar.gz. Then create /backup directory and extract the archive there. Verify the files exist in /backup.","files":{"/project/README.md":"# My Project\nThis is a test project.\n","/project/src/main.sh":"#!/bin/bash\necho 'Hello from project'\n"},"expectations":[{"check":"file_exists:/tmp/project.tar.gz"},{"check":"exit_code:0"}]}
{"id":"archive_selective","category":"archive_operations","description":"Create archive then list and selectively extract","prompt":"Create files: /tmp/notes.txt with 'remember this', /tmp/data.csv with 'a,b,c', /tmp/script.sh with '#!/bin/bash'. Create a tar.gz archive /tmp/bundle.tar.gz containing all three files. Then list the archive contents. Finally extract only notes.txt to /output/ directory.","files":{},"expectations":[{"check":"file_exists:/output/notes.txt","weight":2.0},{"check":"file_contains:/output/notes.txt:remember this"},{"check":"exit_code:0"}]}
{"id":"json_nested_names","category":"json_processing","description":"Extract and deduplicate names from nested JSON","prompt":"Read /data/org.json which has a nested structure of teams with members. Extract all unique member names across all teams, sort them alphabetically, and print one per line.","files":{"/data/org.json":"{\"teams\":[{\"name\":\"backend\",\"members\":[{\"name\":\"alice\"},{\"name\":\"bob\"}]},{\"name\":\"frontend\",\"members\":[{\"name\":\"charlie\"},{\"name\":\"alice\"}]},{\"name\":\"devops\",\"members\":[{\"name\":\"dave\"},{\"name\":\"bob\"}]}]}"},"expectations":[{"check":"stdout_contains:alice"},{"check":"stdout_contains:bob"},{"check":"stdout_contains:charlie"},{"check":"stdout_contains:dave"},{"check":"exit_code:0"}]}
{"id":"json_api_pagination","category":"json_processing","description":"Parse paginated API response and extract IDs","prompt":"Read /data/response.json which contains a paginated API response. Print the current page number, print the total count, and print all item IDs one per line.","files":{"/data/response.json":"{\"page\":2,\"per_page\":3,\"total\":15,\"items\":[{\"id\":201,\"name\":\"alpha\",\"status\":\"active\"},{\"id\":202,\"name\":\"beta\",\"status\":\"inactive\"},{\"id\":203,\"name\":\"gamma\",\"status\":\"active\"}]}"},"expectations":[{"check":"stdout_contains:201"},{"check":"stdout_contains:202"},{"check":"stdout_contains:203"},{"check":"stdout_contains:15"},{"check":"exit_code:0"}]}
{"id":"complex_todo_app","category":"complex_tasks","description":"Build and demonstrate a CLI TODO app","prompt":"Build a command-line TODO app. Create /app/todo.sh that supports these subcommands: 'add <task>' appends a task to /app/tasks.txt, 'list' prints all tasks with line numbers, 'done <number>' removes that line from the file. Then demonstrate: add 'Buy groceries', add 'Write tests', add 'Deploy app', list all tasks, mark task 1 as done, list again to show remaining tasks.","files":{},"expectations":[{"check":"file_exists:/app/todo.sh"},{"check":"file_exists:/app/tasks.txt"},{"check":"stdout_contains:Write tests"},{"check":"stdout_contains:Deploy app"},{"check":"tool_calls_min:3"},{"check":"exit_code:0"}]}
{"id":"complex_todo_app","category":"complex_tasks","description":"Build and demonstrate a CLI TODO app","prompt":"Build a command-line TODO app. Create /app/todo.sh that supports these subcommands: 'add <task>' appends a task to /app/tasks.txt, 'list' prints all tasks with line numbers, 'done <number>' removes that line from the file. Then demonstrate: add 'Buy groceries', add 'Write tests', add 'Deploy app', list all tasks, mark task 1 as done, list again to show remaining tasks.","files":{},"expectations":[{"check":"file_exists:/app/todo.sh"},{"check":"file_exists:/app/tasks.txt"},{"check":"stdout_contains:Write tests"},{"check":"stdout_contains:Deploy app"},{"check":"exit_code:0"}]}
{"id":"complex_markdown_toc","category":"complex_tasks","description":"Generate table of contents from markdown headings","prompt":"Read /doc/README.md and generate a table of contents from its headings (lines starting with # or ##). Insert the TOC after the first heading, formatted as a markdown list with '- [Heading Text](#anchor)' where anchor is the heading text lowercased with spaces replaced by hyphens. Write the result back to the file.","files":{"/doc/README.md":"# Project Alpha\n\nIntroduction to the project.\n\n## Installation\n\nRun the installer.\n\n## Usage\n\nHow to use it.\n\n## API Reference\n\nEndpoint documentation.\n\n## Contributing\n\nPR guidelines.\n"},"expectations":[{"check":"file_contains:/doc/README.md:Installation","weight":0.5},{"check":"file_contains:/doc/README.md:Contributing","weight":0.5},{"check":"file_contains:/doc/README.md:installation"},{"check":"file_contains:/doc/README.md:contributing"},{"check":"exit_code:0"}]}
{"id":"complex_diff_report","category":"complex_tasks","description":"Compare two config versions and summarize changes","prompt":"Compare /data/v1.conf and /data/v2.conf. Produce a human-readable summary showing: which keys were added, which were removed, and which had their values changed. Print the summary to stdout.","files":{"/data/v1.conf":"port=8080\nhost=localhost\nlog_level=info\nworkers=4\nmax_connections=100\n","/data/v2.conf":"port=9090\nhost=0.0.0.0\nlog_level=debug\nworkers=4\ntimeout=30\n"},"expectations":[{"check":"stdout_contains:port"},{"check":"stdout_contains:host"},{"check":"stdout_contains:log_level"},{"check":"stdout_contains:timeout"},{"check":"stdout_contains:max_connections"},{"check":"exit_code:0"}]}
{"id":"json_config_merge","category":"json_processing","description":"Deep-merge two JSON config files with overrides","prompt":"Merge /config/defaults.json with /config/production.json where production values override defaults. Perform a deep merge so that keys present only in defaults are preserved while keys in production take precedence. Save the merged result to /config/merged.json and print it.","files":{"/config/defaults.json":"{\"app\":{\"name\":\"myservice\",\"port\":3000,\"debug\":true},\"db\":{\"host\":\"localhost\",\"port\":5432,\"pool_size\":5},\"log\":{\"level\":\"debug\",\"format\":\"text\"}}","/config/production.json":"{\"app\":{\"port\":8080,\"debug\":false},\"db\":{\"host\":\"db.prod.internal\",\"pool_size\":20},\"log\":{\"level\":\"warn\",\"format\":\"json\"}}"},"expectations":[{"check":"file_exists:/config/merged.json"},{"check":"file_contains:/config/merged.json:myservice"},{"check":"file_contains:/config/merged.json:8080"},{"check":"file_contains:/config/merged.json:db.prod.internal"},{"check":"file_contains:/config/merged.json:20"},{"check":"file_contains:/config/merged.json:warn"},{"check":"stdout_contains:myservice"},{"check":"exit_code:0"}]}
Expand Down
24 changes: 20 additions & 4 deletions crates/bashkit/src/builtins/vars.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,15 +95,19 @@ impl Builtin for Set {
}

let mut i = 0;
let saw_dashdash = false;
while i < ctx.args.len() {
let arg = &ctx.args[i];
if arg == "--" {
// Everything after `--` becomes positional parameters.
// Encode as unit-separator-delimited string for the interpreter
// to pick up (same pattern as _SHIFT_COUNT).
// Encode as count\x1Farg1\x1Farg2... so empty args are preserved.
let positional: Vec<&str> = ctx.args[i + 1..].iter().map(|s| s.as_str()).collect();
ctx.variables
.insert("_SET_POSITIONAL".to_string(), positional.join("\x1F"));
let mut encoded = positional.len().to_string();
for p in &positional {
encoded.push('\x1F');
encoded.push_str(p);
}
ctx.variables.insert("_SET_POSITIONAL".to_string(), encoded);
break;
} else if (arg.starts_with('-') || arg.starts_with('+'))
&& arg.len() > 1
Expand Down Expand Up @@ -138,6 +142,18 @@ impl Builtin for Set {
i += 1;
}

// After --, remaining args become positional parameters.
// Encode with unit separator for the interpreter to decode.
if saw_dashdash {
let positional: Vec<&str> = ctx.args[i..].iter().map(|s| s.as_str()).collect();
let count = positional.len();
let encoded = positional.join("\x1f");
ctx.variables.insert(
"_SET_POSITIONAL".to_string(),
format!("{}\x1f{}", count, encoded),
);
}

Ok(ExecResult::ok(String::new()))
}
}
Expand Down
Loading
Loading