From 612d98d73bd6c3fa7f6abacc3f4787b347d4d98d Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 27 Feb 2026 15:58:32 +0000 Subject: [PATCH 1/7] fix(eval): remove tool_calls_min from error_graceful_parse and complex_todo_app --- crates/bashkit-eval/data/eval-tasks.jsonl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/bashkit-eval/data/eval-tasks.jsonl b/crates/bashkit-eval/data/eval-tasks.jsonl index 45a0cba3..f75fd8d7 100644 --- a/crates/bashkit-eval/data/eval-tasks.jsonl +++ b/crates/bashkit-eval/data/eval-tasks.jsonl @@ -13,14 +13,14 @@ {"id":"data_json_query","category":"data_transformation","description":"Query JSON inventory for low-stock items","prompt":"Read /data/inventory.json and find all items where the quantity field is less than 10. Print only their names, one per line.","files":{"/data/inventory.json":"[{\"name\":\"screws\",\"quantity\":5},{\"name\":\"bolts\",\"quantity\":50},{\"name\":\"washers\",\"quantity\":3},{\"name\":\"nuts\",\"quantity\":100},{\"name\":\"nails\",\"quantity\":8}]"},"expectations":[{"check":"stdout_contains:screws"},{"check":"stdout_contains:washers"},{"check":"stdout_contains:nails"},{"check":"exit_code:0"}]} {"id":"data_log_summarize","category":"data_transformation","description":"Summarize log entries by level","prompt":"Analyze /var/log/app.log and produce a count per log level (INFO, WARN, ERROR). Print each as 'LEVEL: N' on its own line.","files":{"/var/log/app.log":"INFO: app started\nINFO: connected to db\nWARN: slow query detected\nERROR: connection timeout\nINFO: retry succeeded\nWARN: high memory usage\nERROR: write failed\nINFO: request completed\nERROR: connection timeout\nINFO: shutting down\n"},"expectations":[{"check":"stdout_contains:INFO"},{"check":"stdout_contains:5"},{"check":"stdout_contains:ERROR"},{"check":"stdout_contains:3"},{"check":"stdout_contains:WARN"},{"check":"stdout_contains:2"},{"check":"exit_code:0"}]} {"id":"error_missing_file","category":"error_recovery","description":"Handle missing file gracefully","prompt":"Read the file /data/input.txt. If it does not exist, create the directory /data if needed, then create the file with the content 'default data', and finally read and print the file contents.","files":{},"expectations":[{"check":"stdout_contains:default data"},{"check":"file_exists:/data/input.txt"},{"check":"file_contains:/data/input.txt:default data"},{"check":"exit_code:0"}]} -{"id":"error_graceful_parse","category":"error_recovery","description":"Detect and fix broken JSON","prompt":"Read /data/broken.json. It contains invalid JSON (a trailing comma before the closing brace). Fix the JSON by removing the trailing comma, save the fixed version back to the same file, and then parse it to print the 'name' field.","files":{"/data/broken.json":"{\"name\": \"test-app\", \"version\": \"1.0\", \"debug\": true, }"},"expectations":[{"check":"stdout_contains:test-app"},{"check":"exit_code:0"},{"check":"tool_calls_min:2"}]} +{"id":"error_graceful_parse","category":"error_recovery","description":"Detect and fix broken JSON","prompt":"Read /data/broken.json. It contains invalid JSON (a trailing comma before the closing brace). Fix the JSON by removing the trailing comma, save the fixed version back to the same file, and then parse it to print the 'name' field.","files":{"/data/broken.json":"{\"name\": \"test-app\", \"version\": \"1.0\", \"debug\": true, }"},"expectations":[{"check":"stdout_contains:test-app"},{"check":"exit_code:0"}]} {"id":"sysinfo_env_report","category":"system_info","description":"Print system environment report","prompt":"Print a system report with four lines: 'user: ', 'host: ', 'cwd: ', 'shell: bash'. Use the actual commands to get the values.","files":{},"expectations":[{"check":"stdout_contains:user: eval"},{"check":"stdout_contains:host: bashkit-eval"},{"check":"stdout_contains:cwd:"},{"check":"exit_code:0"}]} {"id":"sysinfo_date_calc","category":"system_info","description":"Print current date and compute a future date","prompt":"Print today's date in YYYY-MM-DD format. Then compute and print what the date was 30 days ago, also in YYYY-MM-DD format.","files":{},"expectations":[{"check":"exit_code:0"},{"check":"tool_calls_min:1"},{"check":"stdout_regex:\\d{4}-\\d{2}-\\d{2}"}]} {"id":"archive_create_extract","category":"archive_operations","description":"Create tar.gz archive and extract to new location","prompt":"Create a tar.gz archive of the /project directory and save it as /tmp/project.tar.gz. Then create /backup directory and extract the archive there. Verify the files exist in /backup.","files":{"/project/README.md":"# My Project\nThis is a test project.\n","/project/src/main.sh":"#!/bin/bash\necho 'Hello from project'\n"},"expectations":[{"check":"file_exists:/tmp/project.tar.gz"},{"check":"exit_code:0"}]} {"id":"archive_selective","category":"archive_operations","description":"Create archive then list and selectively extract","prompt":"Create files: /tmp/notes.txt with 'remember this', /tmp/data.csv with 'a,b,c', /tmp/script.sh with '#!/bin/bash'. Create a tar.gz archive /tmp/bundle.tar.gz containing all three files. Then list the archive contents. Finally extract only notes.txt to /output/ directory.","files":{},"expectations":[{"check":"file_exists:/output/notes.txt","weight":2.0},{"check":"file_contains:/output/notes.txt:remember this"},{"check":"exit_code:0"}]} {"id":"json_nested_names","category":"json_processing","description":"Extract and deduplicate names from nested JSON","prompt":"Read /data/org.json which has a nested structure of teams with members. Extract all unique member names across all teams, sort them alphabetically, and print one per line.","files":{"/data/org.json":"{\"teams\":[{\"name\":\"backend\",\"members\":[{\"name\":\"alice\"},{\"name\":\"bob\"}]},{\"name\":\"frontend\",\"members\":[{\"name\":\"charlie\"},{\"name\":\"alice\"}]},{\"name\":\"devops\",\"members\":[{\"name\":\"dave\"},{\"name\":\"bob\"}]}]}"},"expectations":[{"check":"stdout_contains:alice"},{"check":"stdout_contains:bob"},{"check":"stdout_contains:charlie"},{"check":"stdout_contains:dave"},{"check":"exit_code:0"}]} {"id":"json_api_pagination","category":"json_processing","description":"Parse paginated API response and extract IDs","prompt":"Read /data/response.json which contains a paginated API response. Print the current page number, print the total count, and print all item IDs one per line.","files":{"/data/response.json":"{\"page\":2,\"per_page\":3,\"total\":15,\"items\":[{\"id\":201,\"name\":\"alpha\",\"status\":\"active\"},{\"id\":202,\"name\":\"beta\",\"status\":\"inactive\"},{\"id\":203,\"name\":\"gamma\",\"status\":\"active\"}]}"},"expectations":[{"check":"stdout_contains:201"},{"check":"stdout_contains:202"},{"check":"stdout_contains:203"},{"check":"stdout_contains:15"},{"check":"exit_code:0"}]} -{"id":"complex_todo_app","category":"complex_tasks","description":"Build and demonstrate a CLI TODO app","prompt":"Build a command-line TODO app. Create /app/todo.sh that supports these subcommands: 'add ' appends a task to /app/tasks.txt, 'list' prints all tasks with line numbers, 'done ' removes that line from the file. Then demonstrate: add 'Buy groceries', add 'Write tests', add 'Deploy app', list all tasks, mark task 1 as done, list again to show remaining tasks.","files":{},"expectations":[{"check":"file_exists:/app/todo.sh"},{"check":"file_exists:/app/tasks.txt"},{"check":"stdout_contains:Write tests"},{"check":"stdout_contains:Deploy app"},{"check":"tool_calls_min:3"},{"check":"exit_code:0"}]} +{"id":"complex_todo_app","category":"complex_tasks","description":"Build and demonstrate a CLI TODO app","prompt":"Build a command-line TODO app. Create /app/todo.sh that supports these subcommands: 'add ' appends a task to /app/tasks.txt, 'list' prints all tasks with line numbers, 'done ' removes that line from the file. Then demonstrate: add 'Buy groceries', add 'Write tests', add 'Deploy app', list all tasks, mark task 1 as done, list again to show remaining tasks.","files":{},"expectations":[{"check":"file_exists:/app/todo.sh"},{"check":"file_exists:/app/tasks.txt"},{"check":"stdout_contains:Write tests"},{"check":"stdout_contains:Deploy app"},{"check":"exit_code:0"}]} {"id":"complex_markdown_toc","category":"complex_tasks","description":"Generate table of contents from markdown headings","prompt":"Read /doc/README.md and generate a table of contents from its headings (lines starting with # or ##). Insert the TOC after the first heading, formatted as a markdown list with '- [Heading Text](#anchor)' where anchor is the heading text lowercased with spaces replaced by hyphens. Write the result back to the file.","files":{"/doc/README.md":"# Project Alpha\n\nIntroduction to the project.\n\n## Installation\n\nRun the installer.\n\n## Usage\n\nHow to use it.\n\n## API Reference\n\nEndpoint documentation.\n\n## Contributing\n\nPR guidelines.\n"},"expectations":[{"check":"file_contains:/doc/README.md:Installation","weight":0.5},{"check":"file_contains:/doc/README.md:Contributing","weight":0.5},{"check":"file_contains:/doc/README.md:installation"},{"check":"file_contains:/doc/README.md:contributing"},{"check":"exit_code:0"}]} {"id":"complex_diff_report","category":"complex_tasks","description":"Compare two config versions and summarize changes","prompt":"Compare /data/v1.conf and /data/v2.conf. Produce a human-readable summary showing: which keys were added, which were removed, and which had their values changed. Print the summary to stdout.","files":{"/data/v1.conf":"port=8080\nhost=localhost\nlog_level=info\nworkers=4\nmax_connections=100\n","/data/v2.conf":"port=9090\nhost=0.0.0.0\nlog_level=debug\nworkers=4\ntimeout=30\n"},"expectations":[{"check":"stdout_contains:port"},{"check":"stdout_contains:host"},{"check":"stdout_contains:log_level"},{"check":"stdout_contains:timeout"},{"check":"stdout_contains:max_connections"},{"check":"exit_code:0"}]} {"id":"json_config_merge","category":"json_processing","description":"Deep-merge two JSON config files with overrides","prompt":"Merge /config/defaults.json with /config/production.json where production values override defaults. Perform a deep merge so that keys present only in defaults are preserved while keys in production take precedence. Save the merged result to /config/merged.json and print it.","files":{"/config/defaults.json":"{\"app\":{\"name\":\"myservice\",\"port\":3000,\"debug\":true},\"db\":{\"host\":\"localhost\",\"port\":5432,\"pool_size\":5},\"log\":{\"level\":\"debug\",\"format\":\"text\"}}","/config/production.json":"{\"app\":{\"port\":8080,\"debug\":false},\"db\":{\"host\":\"db.prod.internal\",\"pool_size\":20},\"log\":{\"level\":\"warn\",\"format\":\"json\"}}"},"expectations":[{"check":"file_exists:/config/merged.json"},{"check":"file_contains:/config/merged.json:myservice"},{"check":"file_contains:/config/merged.json:8080"},{"check":"file_contains:/config/merged.json:db.prod.internal"},{"check":"file_contains:/config/merged.json:20"},{"check":"file_contains:/config/merged.json:warn"},{"check":"stdout_contains:myservice"},{"check":"exit_code:0"}]} From 20f43dcdede8bec71d02abf0cb01d16ad69d466b Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 27 Feb 2026 16:35:45 +0000 Subject: [PATCH 2/7] chore: add .claude/worktrees/ to .gitignore https://claude.ai/code/session_01QbjrsMFJbHy5XfHCzA6TjM --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 7a35ceae..d2fa8d04 100644 --- a/.gitignore +++ b/.gitignore @@ -21,6 +21,9 @@ local* coverage/ .venv/ +# Claude Code worktrees +.claude/worktrees/ + # Python build artifacts __pycache__/ *.pyc From e18dec6d8b1a008aa31c158f7c7d6a398446ae09 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 28 Feb 2026 01:21:37 +0000 Subject: [PATCH 3/7] fix(interpreter): implement variable operators on $@ and arrays (#358) - Add colon_variant flag to ParameterExpansion AST to distinguish :- from - (unset-or-empty vs unset-only) - Add resolve_param_expansion_name() for array subscripts and special params - Add expand_operand() for lazy evaluation of operand expressions - Rewrite apply_parameter_op() with proper colon/no-colon semantics - Fix set -- encoding to preserve empty positional parameters - Remove 14 skip markers from var-op-test.test.sh Closes #358 https://claude.ai/code/session_01QbjrsMFJbHy5XfHCzA6TjM --- crates/bashkit/src/builtins/vars.rs | 24 +- crates/bashkit/src/interpreter/mod.rs | 167 +++++++++++-- crates/bashkit/src/parser/ast.rs | 23 +- crates/bashkit/src/parser/mod.rs | 234 +++++++++++------- .../tests/spec_cases/bash/var-op-test.test.sh | 24 +- 5 files changed, 332 insertions(+), 140 deletions(-) diff --git a/crates/bashkit/src/builtins/vars.rs b/crates/bashkit/src/builtins/vars.rs index f1b6974b..a521bf18 100644 --- a/crates/bashkit/src/builtins/vars.rs +++ b/crates/bashkit/src/builtins/vars.rs @@ -95,15 +95,19 @@ impl Builtin for Set { } let mut i = 0; + let saw_dashdash = false; while i < ctx.args.len() { let arg = &ctx.args[i]; if arg == "--" { // Everything after `--` becomes positional parameters. - // Encode as unit-separator-delimited string for the interpreter - // to pick up (same pattern as _SHIFT_COUNT). + // Encode as count\x1Farg1\x1Farg2... so empty args are preserved. let positional: Vec<&str> = ctx.args[i + 1..].iter().map(|s| s.as_str()).collect(); - ctx.variables - .insert("_SET_POSITIONAL".to_string(), positional.join("\x1F")); + let mut encoded = positional.len().to_string(); + for p in &positional { + encoded.push('\x1F'); + encoded.push_str(p); + } + ctx.variables.insert("_SET_POSITIONAL".to_string(), encoded); break; } else if (arg.starts_with('-') || arg.starts_with('+')) && arg.len() > 1 @@ -138,6 +142,18 @@ impl Builtin for Set { i += 1; } + // After --, remaining args become positional parameters. + // Encode with unit separator for the interpreter to decode. + if saw_dashdash { + let positional: Vec<&str> = ctx.args[i..].iter().map(|s| s.as_str()).collect(); + let count = positional.len(); + let encoded = positional.join("\x1f"); + ctx.variables.insert( + "_SET_POSITIONAL".to_string(), + format!("{}\x1f{}", count, encoded), + ); + } + Ok(ExecResult::ok(String::new())) } } diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index a4f5cc66..706e3a80 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -3942,19 +3942,20 @@ impl Interpreter { } // Post-process: `set --` replaces positional parameters - if let Some(positional_str) = self.variables.remove("_SET_POSITIONAL") { - let new_positional: Vec = if positional_str.is_empty() { + // Encoded as count\x1Farg1\x1Farg2... to preserve empty args. + if let Some(encoded) = self.variables.remove("_SET_POSITIONAL") { + let parts: Vec<&str> = encoded.splitn(2, '\x1F').collect(); + let count: usize = parts[0].parse().unwrap_or(0); + let new_positional: Vec = if count == 0 { Vec::new() + } else if parts.len() > 1 { + parts[1].split('\x1F').map(|s| s.to_string()).collect() } else { - positional_str - .split('\x1F') - .map(|s| s.to_string()) - .collect() + Vec::new() }; if let Some(frame) = self.call_stack.last_mut() { frame.positional = new_positional; } else { - // No call frame yet (top-level script) — push one self.call_stack.push(CallFrame { name: String::new(), locals: HashMap::new(), @@ -5360,6 +5361,7 @@ impl Interpreter { name, operator, operand, + colon_variant, } => { // Under set -u, operators like :-, :=, :+, :? suppress nounset errors // because the script is explicitly handling unset variables. @@ -5370,11 +5372,21 @@ impl Interpreter { | ParameterOp::UseReplacement | ParameterOp::Error ); - if self.is_nounset() && !suppress_nounset && !self.is_variable_set(name) { + + // Resolve name (handles arr[@], @, *, and regular vars) + let (is_set, value) = self.resolve_param_expansion_name(name); + + if self.is_nounset() && !suppress_nounset && !is_set { self.nounset_error = Some(format!("bash: {}: unbound variable\n", name)); } - let value = self.expand_variable(name); - let expanded = self.apply_parameter_op(&value, name, operator, operand); + let expanded = self.apply_parameter_op( + &value, + name, + operator, + operand, + *colon_variant, + is_set, + ); result.push_str(&expanded); } WordPart::ArrayAccess { name, index } => { @@ -5706,47 +5718,152 @@ impl Interpreter { } } - /// Apply parameter expansion operator + /// Resolve name for parameter expansion, handling array subscripts and special params. + /// Returns (is_set, expanded_value). + fn resolve_param_expansion_name(&self, name: &str) -> (bool, String) { + // Check for array subscript pattern: name[@] or name[*] + if let Some(arr_name) = name + .strip_suffix("[@]") + .or_else(|| name.strip_suffix("[*]")) + { + if let Some(arr) = self.assoc_arrays.get(arr_name) { + let is_set = !arr.is_empty(); + let mut keys: Vec<_> = arr.keys().collect(); + keys.sort(); + let values: Vec = + keys.iter().filter_map(|k| arr.get(*k).cloned()).collect(); + return (is_set, values.join(" ")); + } + if let Some(arr) = self.arrays.get(arr_name) { + let is_set = !arr.is_empty(); + let mut indices: Vec<_> = arr.keys().collect(); + indices.sort(); + let values: Vec<_> = indices.iter().filter_map(|i| arr.get(i)).collect(); + return ( + is_set, + values.into_iter().cloned().collect::>().join(" "), + ); + } + return (false, String::new()); + } + + // Special parameters @ and * + if name == "@" || name == "*" { + if let Some(frame) = self.call_stack.last() { + let is_set = !frame.positional.is_empty(); + return (is_set, frame.positional.join(" ")); + } + return (false, String::new()); + } + + // Regular variable + let is_set = self.is_variable_set(name); + let value = self.expand_variable(name); + (is_set, value) + } + + /// Expand an operand string from a parameter expansion (sync, lazy). + /// Only called when the operand is actually needed, providing lazy evaluation. + fn expand_operand(&mut self, operand: &str) -> String { + if operand.is_empty() { + return String::new(); + } + let word = Parser::parse_word_string(operand); + let mut result = String::new(); + for part in &word.parts { + match part { + WordPart::Literal(s) => result.push_str(s), + WordPart::Variable(name) => { + result.push_str(&self.expand_variable(name)); + } + WordPart::ArithmeticExpansion(expr) => { + let val = self.evaluate_arithmetic_with_assign(expr); + result.push_str(&val.to_string()); + } + WordPart::ParameterExpansion { + name, + operator, + operand: inner_operand, + colon_variant, + } => { + let (is_set, value) = self.resolve_param_expansion_name(name); + let expanded = self.apply_parameter_op( + &value, + name, + operator, + inner_operand, + *colon_variant, + is_set, + ); + result.push_str(&expanded); + } + WordPart::Length(name) => { + let value = self.expand_variable(name); + result.push_str(&value.len().to_string()); + } + // TODO: handle CommandSubstitution etc. in sync operand expansion + _ => {} + } + } + result + } + + /// Apply parameter expansion operator. + /// `colon_variant`: true = check unset-or-empty, false = check unset-only. + /// `is_set`: whether the variable is defined (distinct from being empty). fn apply_parameter_op( &mut self, value: &str, name: &str, operator: &ParameterOp, operand: &str, + colon_variant: bool, + is_set: bool, ) -> String { + // colon (:-) => trigger when unset OR empty + // no-colon (-) => trigger only when unset + let use_default = if colon_variant { + !is_set || value.is_empty() + } else { + !is_set + }; + let use_replacement = if colon_variant { + is_set && !value.is_empty() + } else { + is_set + }; + match operator { ParameterOp::UseDefault => { - // ${var:-default} - use default if unset/empty - if value.is_empty() { - operand.to_string() + if use_default { + self.expand_operand(operand) } else { value.to_string() } } ParameterOp::AssignDefault => { - // ${var:=default} - assign default if unset/empty - if value.is_empty() { - self.set_variable(name.to_string(), operand.to_string()); - operand.to_string() + if use_default { + let expanded = self.expand_operand(operand); + self.set_variable(name.to_string(), expanded.clone()); + expanded } else { value.to_string() } } ParameterOp::UseReplacement => { - // ${var:+replacement} - use replacement if set - if !value.is_empty() { - operand.to_string() + if use_replacement { + self.expand_operand(operand) } else { String::new() } } ParameterOp::Error => { - // ${var:?error} - error if unset/empty - if value.is_empty() { - let msg = if operand.is_empty() { + if use_default { + let expanded = self.expand_operand(operand); + let msg = if expanded.is_empty() { format!("bash: {}: parameter null or not set\n", name) } else { - format!("bash: {}: {}\n", name, operand) + format!("bash: {}: {}\n", name, expanded) }; self.nounset_error = Some(msg); String::new() diff --git a/crates/bashkit/src/parser/ast.rs b/crates/bashkit/src/parser/ast.rs index 2ef32605..b082abd4 100644 --- a/crates/bashkit/src/parser/ast.rs +++ b/crates/bashkit/src/parser/ast.rs @@ -269,11 +269,24 @@ impl fmt::Display for Word { name, operator, operand, + colon_variant, } => match operator { - ParameterOp::UseDefault => write!(f, "${{{}:-{}}}", name, operand)?, - ParameterOp::AssignDefault => write!(f, "${{{}:={}}}", name, operand)?, - ParameterOp::UseReplacement => write!(f, "${{{}:+{}}}", name, operand)?, - ParameterOp::Error => write!(f, "${{{}:?{}}}", name, operand)?, + ParameterOp::UseDefault => { + let c = if *colon_variant { ":" } else { "" }; + write!(f, "${{{}{}-{}}}", name, c, operand)? + } + ParameterOp::AssignDefault => { + let c = if *colon_variant { ":" } else { "" }; + write!(f, "${{{}{}={}}}", name, c, operand)? + } + ParameterOp::UseReplacement => { + let c = if *colon_variant { ":" } else { "" }; + write!(f, "${{{}{}+{}}}", name, c, operand)? + } + ParameterOp::Error => { + let c = if *colon_variant { ":" } else { "" }; + write!(f, "${{{}{}?{}}}", name, c, operand)? + } ParameterOp::RemovePrefixShort => write!(f, "${{{}#{}}}", name, operand)?, ParameterOp::RemovePrefixLong => write!(f, "${{{}##{}}}", name, operand)?, ParameterOp::RemoveSuffixShort => write!(f, "${{{}%{}}}", name, operand)?, @@ -344,10 +357,12 @@ pub enum WordPart { /// Arithmetic expansion ($((...))) ArithmeticExpansion(String), /// Parameter expansion with operator ${var:-default}, ${var:=default}, etc. + /// `colon_variant` distinguishes `:-` (unset-or-empty) from `-` (unset-only). ParameterExpansion { name: String, operator: ParameterOp, operand: String, + colon_variant: bool, }, /// Length expansion ${#var} Length(String), diff --git a/crates/bashkit/src/parser/mod.rs b/crates/bashkit/src/parser/mod.rs index c7673b6f..a9b6a392 100644 --- a/crates/bashkit/src/parser/mod.rs +++ b/crates/bashkit/src/parser/mod.rs @@ -104,6 +104,13 @@ impl<'a> Parser<'a> { self.current_span } + /// Parse a string as a word (handling $var, $((expr)), ${...}, etc.). + /// Used by the interpreter to expand operands in parameter expansions lazily. + pub fn parse_word_string(input: &str) -> Word { + let parser = Parser::new(input); + parser.parse_word(input.to_string()) + } + /// Create a parse error with the current position. fn error(&self, message: impl Into) -> Error { Error::parse_at( @@ -2279,6 +2286,15 @@ impl<'a> Parser<'a> { } } + // Handle special parameters: ${@...}, ${*...} + if var_name.is_empty() { + if let Some(&c) = chars.peek() { + if matches!(c, '@' | '*') { + var_name.push(chars.next().unwrap()); + } + } + } + // Check for array access ${arr[index]} or ${arr[@]:offset:length} if chars.peek() == Some(&'[') { chars.next(); // consume '[' @@ -2290,45 +2306,95 @@ impl<'a> Parser<'a> { } index.push(chars.next().unwrap()); } - // Check for slice ${arr[@]:offset:length} - if chars.peek() == Some(&':') && (index == "@" || index == "*") { - chars.next(); // consume ':' - // Read offset (may include negative sign) - let mut offset = String::new(); - while let Some(&c) = chars.peek() { - if c == ':' || c == '}' { - break; - } - offset.push(chars.next().unwrap()); - } - // Check for length - let length = if chars.peek() == Some(&':') { - chars.next(); // consume ':' - let mut len = String::new(); - while let Some(&c) = chars.peek() { - if c == '}' { - break; + // After ], check for operators on array subscripts + if let Some(&next_c) = chars.peek() { + if next_c == ':' && (index == "@" || index == "*") { + // Peek ahead to distinguish param ops (:- := :+ :?) from slice (:N) + let mut lookahead = chars.clone(); + lookahead.next(); // skip ':' + let is_param_op = matches!( + lookahead.peek(), + Some(&'-') | Some(&'=') | Some(&'+') | Some(&'?') + ); + if is_param_op { + chars.next(); // consume ':' + let arr_name = format!("{}[{}]", var_name, index); + let op_char = chars.next().unwrap(); + let operand = self.read_brace_operand(&mut chars); + let operator = match op_char { + '-' => ParameterOp::UseDefault, + '=' => ParameterOp::AssignDefault, + '+' => ParameterOp::UseReplacement, + '?' => ParameterOp::Error, + _ => unreachable!(), + }; + parts.push(WordPart::ParameterExpansion { + name: arr_name, + operator, + operand, + colon_variant: true, + }); + } else { + // Array slice ${arr[@]:offset:length} + chars.next(); // consume ':' + let mut offset = String::new(); + while let Some(&c) = chars.peek() { + if c == ':' || c == '}' { + break; + } + offset.push(chars.next().unwrap()); } - len.push(chars.next().unwrap()); + let length = if chars.peek() == Some(&':') { + chars.next(); + let mut len = String::new(); + while let Some(&c) = chars.peek() { + if c == '}' { + break; + } + len.push(chars.next().unwrap()); + } + Some(len) + } else { + None + }; + if chars.peek() == Some(&'}') { + chars.next(); + } + parts.push(WordPart::ArraySlice { + name: var_name, + offset, + length, + }); } - Some(len) + } else if matches!(next_c, '-' | '+' | '=' | '?') { + // Non-colon operators on array: ${arr[@]-default} + let arr_name = format!("{}[{}]", var_name, index); + let op_char = chars.next().unwrap(); + let operand = self.read_brace_operand(&mut chars); + let operator = match op_char { + '-' => ParameterOp::UseDefault, + '=' => ParameterOp::AssignDefault, + '+' => ParameterOp::UseReplacement, + '?' => ParameterOp::Error, + _ => unreachable!(), + }; + parts.push(WordPart::ParameterExpansion { + name: arr_name, + operator, + operand, + colon_variant: false, + }); } else { - None - }; - // Consume closing } - if chars.peek() == Some(&'}') { - chars.next(); + // Plain array access ${arr[index]} + if chars.peek() == Some(&'}') { + chars.next(); + } + parts.push(WordPart::ArrayAccess { + name: var_name, + index, + }); } - parts.push(WordPart::ArraySlice { - name: var_name, - offset, - length, - }); } else { - // Consume closing } - if chars.peek() == Some(&'}') { - chars.next(); - } parts.push(WordPart::ArrayAccess { name: var_name, index, @@ -2340,40 +2406,21 @@ impl<'a> Parser<'a> { ':' => { chars.next(); // consume ':' match chars.peek() { - Some(&'-') => { - chars.next(); - let op = self.read_brace_operand(&mut chars); - parts.push(WordPart::ParameterExpansion { - name: var_name, - operator: ParameterOp::UseDefault, - operand: op, - }); - } - Some(&'=') => { - chars.next(); - let op = self.read_brace_operand(&mut chars); - parts.push(WordPart::ParameterExpansion { - name: var_name, - operator: ParameterOp::AssignDefault, - operand: op, - }); - } - Some(&'+') => { - chars.next(); - let op = self.read_brace_operand(&mut chars); - parts.push(WordPart::ParameterExpansion { - name: var_name, - operator: ParameterOp::UseReplacement, - operand: op, - }); - } - Some(&'?') => { - chars.next(); - let op = self.read_brace_operand(&mut chars); + Some(&'-') | Some(&'=') | Some(&'+') | Some(&'?') => { + let op_char = chars.next().unwrap(); + let operand = self.read_brace_operand(&mut chars); + let operator = match op_char { + '-' => ParameterOp::UseDefault, + '=' => ParameterOp::AssignDefault, + '+' => ParameterOp::UseReplacement, + '?' => ParameterOp::Error, + _ => unreachable!(), + }; parts.push(WordPart::ParameterExpansion { name: var_name, - operator: ParameterOp::Error, - operand: op, + operator, + operand, + colon_variant: true, }); } _ => { @@ -2398,7 +2445,6 @@ impl<'a> Parser<'a> { } else { None }; - // Consume closing } if chars.peek() == Some(&'}') { chars.next(); } @@ -2410,6 +2456,24 @@ impl<'a> Parser<'a> { } } } + // Non-colon test operators: ${var-default}, ${var+alt}, ${var=assign}, ${var?err} + '-' | '=' | '+' | '?' => { + let op_char = chars.next().unwrap(); + let operand = self.read_brace_operand(&mut chars); + let operator = match op_char { + '-' => ParameterOp::UseDefault, + '=' => ParameterOp::AssignDefault, + '+' => ParameterOp::UseReplacement, + '?' => ParameterOp::Error, + _ => unreachable!(), + }; + parts.push(WordPart::ParameterExpansion { + name: var_name, + operator, + operand, + colon_variant: false, + }); + } '#' => { chars.next(); if chars.peek() == Some(&'#') { @@ -2419,6 +2483,7 @@ impl<'a> Parser<'a> { name: var_name, operator: ParameterOp::RemovePrefixLong, operand: op, + colon_variant: false, }); } else { let op = self.read_brace_operand(&mut chars); @@ -2426,6 +2491,7 @@ impl<'a> Parser<'a> { name: var_name, operator: ParameterOp::RemovePrefixShort, operand: op, + colon_variant: false, }); } } @@ -2438,6 +2504,7 @@ impl<'a> Parser<'a> { name: var_name, operator: ParameterOp::RemoveSuffixLong, operand: op, + colon_variant: false, }); } else { let op = self.read_brace_operand(&mut chars); @@ -2445,29 +2512,27 @@ impl<'a> Parser<'a> { name: var_name, operator: ParameterOp::RemoveSuffixShort, operand: op, + colon_variant: false, }); } } '/' => { - // Pattern replacement ${var/pattern/replacement} or ${var//pattern/replacement} - chars.next(); // consume '/' + chars.next(); let replace_all = if chars.peek() == Some(&'/') { - chars.next(); // consume second '/' + chars.next(); true } else { false }; - // Read pattern until / (handle \/ as escaped /) let mut pattern = String::new(); while let Some(&ch) = chars.peek() { if ch == '/' || ch == '}' { break; } if ch == '\\' { - chars.next(); // consume backslash + chars.next(); if let Some(&next) = chars.peek() { if next == '/' { - // \/ -> literal / pattern.push(chars.next().unwrap()); continue; } @@ -2477,9 +2542,8 @@ impl<'a> Parser<'a> { } pattern.push(chars.next().unwrap()); } - // Read replacement if present let replacement = if chars.peek() == Some(&'/') { - chars.next(); // consume '/' + chars.next(); let mut repl = String::new(); while let Some(&ch) = chars.peek() { if ch == '}' { @@ -2491,7 +2555,6 @@ impl<'a> Parser<'a> { } else { String::new() }; - // Consume closing } if chars.peek() == Some(&'}') { chars.next(); } @@ -2510,18 +2573,17 @@ impl<'a> Parser<'a> { name: var_name, operator: op, operand: String::new(), + colon_variant: false, }); } '^' => { - // Case conversion uppercase ${var^} or ${var^^} - chars.next(); // consume '^' + chars.next(); let op = if chars.peek() == Some(&'^') { chars.next(); ParameterOp::UpperAll } else { ParameterOp::UpperFirst }; - // Consume closing } if chars.peek() == Some(&'}') { chars.next(); } @@ -2529,18 +2591,17 @@ impl<'a> Parser<'a> { name: var_name, operator: op, operand: String::new(), + colon_variant: false, }); } ',' => { - // Case conversion lowercase ${var,} or ${var,,} - chars.next(); // consume ',' + chars.next(); let op = if chars.peek() == Some(&',') { chars.next(); ParameterOp::LowerAll } else { ParameterOp::LowerFirst }; - // Consume closing } if chars.peek() == Some(&'}') { chars.next(); } @@ -2548,14 +2609,13 @@ impl<'a> Parser<'a> { name: var_name, operator: op, operand: String::new(), + colon_variant: false, }); } '@' => { - // Parameter transformation ${var@op} - chars.next(); // consume '@' + chars.next(); if let Some(&op) = chars.peek() { - chars.next(); // consume operator - // Consume closing } + chars.next(); if chars.peek() == Some(&'}') { chars.next(); } @@ -2564,7 +2624,6 @@ impl<'a> Parser<'a> { operator: op, }); } else { - // No operator, treat as variable if chars.peek() == Some(&'}') { chars.next(); } @@ -2578,7 +2637,6 @@ impl<'a> Parser<'a> { } } _ => { - // Unknown, consume until } while let Some(&ch) = chars.peek() { if ch == '}' { chars.next(); diff --git a/crates/bashkit/tests/spec_cases/bash/var-op-test.test.sh b/crates/bashkit/tests/spec_cases/bash/var-op-test.test.sh index fa4a666c..504eb5eb 100644 --- a/crates/bashkit/tests/spec_cases/bash/var-op-test.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/var-op-test.test.sh @@ -4,7 +4,6 @@ ### vop_lazy_eval_alternative # Lazy Evaluation of Alternative -### skip: TODO lazy evaluation of ${x:-expr} not implemented (expr always evaluated) i=0 x=x echo ${x:-$((i++))} @@ -28,7 +27,6 @@ is empty ### vop_default_when_unset # Default value when unset -### skip: TODO ${var-value} (without colon) not implemented echo ${unset_var_xyz-is unset} ### expect is unset @@ -45,7 +43,6 @@ is empty ### vop_assign_default_unset # Assign default value when unset -### skip: TODO ${var=value} (without colon) not implemented : ${vop_unset_var=is unset} echo $vop_unset_var ### expect @@ -54,21 +51,19 @@ is unset ### vop_alternative_when_set # ${v:+foo} Alternative value when set -### skip: TODO ${v:+foo} outputs trailing space v=foo empty='' echo "${v:+v is not empty}" "${empty:+is not empty}" ### expect -v is not empty +v is not empty ### end ### vop_alternative_when_unset # ${v+foo} Alternative value when unset -### skip: TODO ${v+foo} (without colon) not implemented correctly v=foo echo "${v+v is not unset}" "${vop_unset2:+is not unset}" ### expect -v is not unset +v is not unset ### end ### vop_quoted_alternative_regression @@ -80,7 +75,6 @@ echo "${vop_with_icc+set}" = set ### vop_plus_with_set_u # ${s+foo} and ${s:+foo} when set -u -### skip: TODO ${v+foo} and set -u interaction not implemented set -u v=v echo v=${v:+foo} @@ -98,7 +92,6 @@ v= ### vop_minus_with_set_u # ${v-foo} and ${v:-foo} when set -u -### skip: TODO ${v-foo} and set -u interaction not implemented set -u v=v echo v=${v:-foo} @@ -125,7 +118,7 @@ status=1 ### vop_error_when_unset # Error when unset -### skip: TODO ${var?msg} (without colon) not implemented +### bash_diff: bash -c exit code differs in sandbox (127 vs 1) bash -c 'echo ${vop_unset3?"is unset"}' 2>/dev/null echo status=$? ### expect @@ -156,7 +149,6 @@ echo ${#arr[@]} ### vop_backslash_in_default # "\z" as default value arg -### skip: TODO backslash escapes in ${undef-value} not implemented echo "${undef_bs1-\$}" echo "${undef_bs2-\(}" echo "${undef_bs3-\z}" @@ -174,7 +166,6 @@ $ ### vop_at_empty_minus_plus # $@ (empty) and - and + -### skip: TODO ${@-value} and ${@+value} operators on $@ not implemented set -- echo "argv=${@-minus}" echo "argv=${@+plus}" @@ -189,7 +180,6 @@ argv= ### vop_at_one_empty_minus_plus # $@ ("") and - and + -### skip: TODO ${@-value} and ${@+value} operators on $@ not implemented set -- "" echo "argv=${@-minus}" echo "argv=${@+plus}" @@ -204,22 +194,20 @@ argv= ### vop_at_two_empty_minus_plus # $@ ("" "") and - and + -### skip: TODO ${@-value} and ${@+value} operators on $@ not implemented set -- "" "" echo "argv=${@-minus}" echo "argv=${@+plus}" echo "argv=${@:-minus}" echo "argv=${@:+plus}" ### expect -argv= +argv= argv=plus -argv= +argv= argv=plus ### end ### vop_array_empty_minus # array and - operator -### skip: TODO ${arr[@]-value} operator on arrays not implemented arr=() echo ${arr[@]-minus} arr=('') @@ -234,7 +222,6 @@ minus ### vop_array_empty_plus # array and + operator -### skip: TODO ${arr[@]+value} operator on arrays not implemented arr=() echo ${arr[@]+plus} arr=('') @@ -249,7 +236,6 @@ plus ### vop_assoc_array_minus_plus # assoc array and - and + -### skip: TODO ${arr[@]-value} and ${arr[@]+value} on assoc arrays not implemented declare -A empty_assoc=() declare -A assoc=(['k']=v) echo empty=${empty_assoc[@]-minus} From 45b177b2733b5b17044867c340e851dfe235def4 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 28 Feb 2026 01:46:45 +0000 Subject: [PATCH 4/7] feat(interpreter): implement IFS-based word splitting (#352) - Apply IFS word splitting to unquoted variable/command/arithmetic expansions in command arguments and for/select loops - Implement POSIX-compliant ifs_split() with proper handling of: - IFS whitespace chars (collapse, strip leading/trailing) - IFS non-whitespace chars (produce empty fields between adjacent) - Mixed IFS ( = single delimiter) - Empty IFS (no splitting) and unset IFS (default space/tab/newline) - Add proper $@/$* field expansion in expand_word_to_fields: - "$@" preserves individual positional params - "$*" joins with first char of IFS - Unquoted $@/$* subject to IFS splitting - Skip IFS splitting for assignment-like words (e.g., result="$1") - Remove 10 skip markers from word-split.test.sh Closes #352 https://claude.ai/code/session_01QbjrsMFJbHy5XfHCzA6TjM --- crates/bashkit/src/interpreter/mod.rs | 187 ++++++++++++++---- .../tests/spec_cases/bash/word-split.test.sh | 13 +- 2 files changed, 155 insertions(+), 45 deletions(-) diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index 706e3a80..77fcab69 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -5632,8 +5632,48 @@ impl Interpreter { /// Returns Vec where array expansions like "${arr[@]}" produce multiple fields. /// "${arr[*]}" in quoted context joins elements into a single field (bash behavior). async fn expand_word_to_fields(&mut self, word: &Word) -> Result> { - // Check if the word contains only an array expansion + // Check if the word contains only an array expansion or $@/$* if word.parts.len() == 1 { + // Handle $@ and $* as special parameters + if let WordPart::Variable(name) = &word.parts[0] { + if name == "@" { + let positional = self + .call_stack + .last() + .map(|f| f.positional.clone()) + .unwrap_or_default(); + if word.quoted { + // "$@" preserves individual positional params + return Ok(positional); + } + // $@ unquoted: each param is subject to further IFS splitting + let mut fields = Vec::new(); + for p in &positional { + fields.extend(self.ifs_split(p)); + } + return Ok(fields); + } + if name == "*" { + let positional = self + .call_stack + .last() + .map(|f| f.positional.clone()) + .unwrap_or_default(); + if word.quoted { + // "$*" joins with first char of IFS + let ifs = self + .variables + .get("IFS") + .cloned() + .unwrap_or_else(|| " \t\n".to_string()); + let sep = ifs.chars().next().unwrap_or(' '); + return Ok(vec![positional.join(&sep.to_string())]); + } + // $* unquoted: join with space, then IFS split + let joined = positional.join(" "); + return Ok(self.ifs_split(&joined)); + } + } if let WordPart::ArrayAccess { name, index } = &word.parts[0] { if index == "@" || index == "*" { // Check assoc arrays first @@ -5678,41 +5718,31 @@ impl Interpreter { } // For other words, expand to a single field then apply IFS word splitting - // when the word is unquoted and contains a command substitution. - // Per POSIX, unquoted $() results undergo field splitting on IFS. + // when the word is unquoted and contains an expansion. + // Per POSIX, unquoted variable/command/arithmetic expansion results undergo + // field splitting on IFS. let expanded = self.expand_word(word).await?; - let has_command_subst = !word.quoted - && word - .parts - .iter() - .any(|p| matches!(p, WordPart::CommandSubstitution(_))); - - if has_command_subst { - // Split on IFS characters (default: space, tab, newline). - // Consecutive IFS-whitespace characters are collapsed (no empty fields). - let ifs = self - .variables - .get("IFS") - .cloned() - .unwrap_or_else(|| " \t\n".to_string()); - - if ifs.is_empty() { - // Empty IFS: no splitting - return Ok(vec![expanded]); - } - - let fields: Vec = expanded - .split(|c: char| ifs.contains(c)) - .filter(|s| !s.is_empty()) - .map(|s| s.to_string()) - .collect(); + // IFS splitting applies to unquoted expansions only. + // Skip splitting for assignment-like words (e.g., result="$1") where + // the lexer stripped quotes from a mixed-quoted word (produces Token::Word + // with quoted: false even though the expansion was inside double quotes). + let is_assignment_word = matches!(word.parts.first(), Some(WordPart::Literal(s)) if s.contains('=')); + let has_expansion = !word.quoted + && !is_assignment_word + && word.parts.iter().any(|p| { + matches!( + p, + WordPart::Variable(_) + | WordPart::CommandSubstitution(_) + | WordPart::ArithmeticExpansion(_) + | WordPart::ParameterExpansion { .. } + | WordPart::ArrayAccess { .. } + ) + }); - if fields.is_empty() { - // All-whitespace expansion produces zero fields (elision) - return Ok(Vec::new()); - } - Ok(fields) + if has_expansion { + Ok(self.ifs_split(&expanded)) } else { Ok(vec![expanded]) } @@ -5762,6 +5792,97 @@ impl Interpreter { (is_set, value) } + /// Split a string on IFS characters according to POSIX rules. + /// + /// - IFS whitespace (space, tab, newline) collapses; leading/trailing stripped. + /// - IFS non-whitespace chars are significant delimiters. Two adjacent produce + /// an empty field between them. + /// - `` = single delimiter (ws absorbed into the nws delimiter). + /// - Empty IFS → no splitting. Unset IFS → default " \t\n". + fn ifs_split(&self, s: &str) -> Vec { + let ifs = self + .variables + .get("IFS") + .cloned() + .unwrap_or_else(|| " \t\n".to_string()); + + if ifs.is_empty() { + return vec![s.to_string()]; + } + + let is_ifs = |c: char| ifs.contains(c); + let is_ifs_ws = |c: char| ifs.contains(c) && " \t\n".contains(c); + let is_ifs_nws = |c: char| ifs.contains(c) && !" \t\n".contains(c); + let all_whitespace_ifs = ifs.chars().all(|c| " \t\n".contains(c)); + + if all_whitespace_ifs { + // IFS is only whitespace: split on runs, elide empties + return s + .split(|c: char| is_ifs(c)) + .filter(|f| !f.is_empty()) + .map(|f| f.to_string()) + .collect(); + } + + // Mixed or pure non-whitespace IFS. + let mut fields: Vec = Vec::new(); + let mut current = String::new(); + let chars: Vec = s.chars().collect(); + let mut i = 0; + + // Skip leading IFS whitespace + while i < chars.len() && is_ifs_ws(chars[i]) { + i += 1; + } + // Leading non-whitespace IFS produces an empty first field + if i < chars.len() && is_ifs_nws(chars[i]) { + fields.push(String::new()); + i += 1; + while i < chars.len() && is_ifs_ws(chars[i]) { + i += 1; + } + } + + while i < chars.len() { + let c = chars[i]; + if is_ifs_nws(c) { + // Non-whitespace IFS delimiter: finalize current field + fields.push(std::mem::take(&mut current)); + i += 1; + // Consume trailing IFS whitespace + while i < chars.len() && is_ifs_ws(chars[i]) { + i += 1; + } + } else if is_ifs_ws(c) { + // IFS whitespace: skip it, then check for non-ws delimiter + while i < chars.len() && is_ifs_ws(chars[i]) { + i += 1; + } + if i < chars.len() && is_ifs_nws(chars[i]) { + // = single delimiter. Push current field. + fields.push(std::mem::take(&mut current)); + i += 1; // consume the nws char + while i < chars.len() && is_ifs_ws(chars[i]) { + i += 1; + } + } else if i < chars.len() { + // ws alone as delimiter (no nws follows) + fields.push(std::mem::take(&mut current)); + } + // trailing ws at end → ignore (don't push empty field) + } else { + current.push(c); + i += 1; + } + } + + if !current.is_empty() { + fields.push(current); + } + + fields + } + /// Expand an operand string from a parameter expansion (sync, lazy). /// Only called when the operand is actually needed, providing lazy evaluation. fn expand_operand(&mut self, operand: &str) -> String { diff --git a/crates/bashkit/tests/spec_cases/bash/word-split.test.sh b/crates/bashkit/tests/spec_cases/bash/word-split.test.sh index a7fa81e6..e3ae9185 100644 --- a/crates/bashkit/tests/spec_cases/bash/word-split.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/word-split.test.sh @@ -4,7 +4,7 @@ ### ws_ifs_scoped # IFS is scoped with local -### skip: TODO IFS-based word splitting not implemented +### skip: TODO local IFS not checked during word splitting IFS=b word=abcd f() { local IFS=c; echo "$word" | tr "$IFS" '\n' | while read part; do printf '[%s]' "$part"; done; echo; } @@ -21,7 +21,6 @@ echo "$#:$1:$2" ### ws_tilde_not_split # Tilde sub is not split, but var sub is -### skip: TODO set -- with word splitting not implemented HOME="foo bar" set -- ~ echo $# @@ -143,7 +142,6 @@ echo "$*" ### ws_elision_space # Unquoted whitespace-only var is elided -### skip: TODO word splitting does not elide whitespace-only expansions yet s1=' ' set -- $s1 echo $# @@ -169,7 +167,6 @@ set -- $empty; echo $# ### ws_leading_trailing_nonwhitespace_ifs # Leading/trailing with non-whitespace IFS -### skip: TODO IFS-based word splitting not implemented IFS=_ s1='_a_b_' set -- $s1 @@ -180,7 +177,6 @@ echo "$#:$1:$2:$3" ### ws_mixed_ifs_whitespace_nonwhitespace # Mixed whitespace and non-whitespace IFS -### skip: TODO IFS-based word splitting not implemented IFS='_ ' s1='_ a b _ ' s2=' a b _ ' @@ -193,7 +189,6 @@ set -- $s2; echo "$#:$1:$2" ### ws_multiple_nonwhitespace_ifs # Multiple non-whitespace IFS chars produce empty fields -### skip: TODO IFS-based word splitting not implemented IFS=_- s1='a__b---c_d' set -- $s1 @@ -212,7 +207,6 @@ for arg in "$@"; do echo "[$arg]"; done ### ws_ifs_whitespace_and_nonwhitespace # IFS with whitespace and non-whitespace -### skip: TODO IFS-based word splitting not implemented IFS='_ ' s1='a_b _ _ _ c _d e' set -- $s1 @@ -240,7 +234,6 @@ fun ### ws_unquoted_empty_elided # unquoted empty var is elided -### skip: TODO word elision not implemented empty="" set -- 1 $empty 2 echo $# @@ -250,7 +243,6 @@ echo $# ### ws_unquoted_whitespace_elided # unquoted whitespace var is elided -### skip: TODO word elision not implemented space=" " set -- 1 $space 2 echo $# @@ -270,7 +262,6 @@ echo $# ### ws_no_split_empty_ifs # no splitting when IFS is empty -### skip: TODO IFS-based word splitting not implemented IFS="" foo="a b" set -- $foo @@ -307,7 +298,6 @@ echo "$2" ### ws_ifs_empty_no_split # IFS empty prevents all splitting -### skip: TODO IFS-based word splitting not implemented IFS='' x="a b c" set -- $x @@ -318,7 +308,6 @@ echo "$#:$1" ### ws_ifs_unset_default # IFS unset behaves like space/tab/newline -### skip: TODO IFS-based word splitting not implemented unset IFS x="a b c" set -- $x From 3b1a62893f96d7fd7ff27efb9c372df5960080b6 Mon Sep 17 00:00:00 2001 From: Claude Date: Sat, 28 Feb 2026 01:44:35 +0000 Subject: [PATCH 5/7] fix(parser): detect syntax errors for empty bodies and invalid constructs (#360) - Reject empty bodies in brace groups, while/until/for/select loops, and if/elif/else clauses - Detect unterminated single and double quoted strings in lexer via Token::Error variant - Add check_error_token() to parser for propagating lexer errors - Remove skip markers from 7 spec tests that now pass - Add 9 unit tests for new error detection https://claude.ai/code/session_01QbjrsMFJbHy5XfHCzA6TjM --- crates/bashkit/src/interpreter/mod.rs | 3 +- crates/bashkit/src/lib.rs | 6 +- crates/bashkit/src/parser/lexer.rs | 12 ++ crates/bashkit/src/parser/mod.rs | 151 +++++++++++++++++- crates/bashkit/src/parser/tokens.rs | 3 + .../spec_cases/bash/empty-bodies.test.sh | 15 +- .../spec_cases/bash/parse-errors.test.sh | 2 - 7 files changed, 175 insertions(+), 17 deletions(-) diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index 77fcab69..c3177faa 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -5727,7 +5727,8 @@ impl Interpreter { // Skip splitting for assignment-like words (e.g., result="$1") where // the lexer stripped quotes from a mixed-quoted word (produces Token::Word // with quoted: false even though the expansion was inside double quotes). - let is_assignment_word = matches!(word.parts.first(), Some(WordPart::Literal(s)) if s.contains('=')); + let is_assignment_word = + matches!(word.parts.first(), Some(WordPart::Literal(s)) if s.contains('=')); let has_expansion = !word.quoted && !is_assignment_word && word.parts.iter().any(|p| { diff --git a/crates/bashkit/src/lib.rs b/crates/bashkit/src/lib.rs index f2e0237e..66a51f45 100644 --- a/crates/bashkit/src/lib.rs +++ b/crates/bashkit/src/lib.rs @@ -3848,10 +3848,10 @@ echo missing fi"#, assert!(result.is_err()); let err = result.unwrap_err(); let err_msg = format!("{}", err); - // Error should mention the problem + // Error should mention the problem (either "expected" or "syntax error") assert!( - err_msg.contains("expected"), - "Error should mention what's expected: {}", + err_msg.contains("expected") || err_msg.contains("syntax error"), + "Error should be a parse error: {}", err_msg ); } diff --git a/crates/bashkit/src/parser/lexer.rs b/crates/bashkit/src/parser/lexer.rs index 8a10a299..a364787b 100644 --- a/crates/bashkit/src/parser/lexer.rs +++ b/crates/bashkit/src/parser/lexer.rs @@ -708,16 +708,22 @@ impl<'a> Lexer<'a> { fn read_single_quoted_string(&mut self) -> Option { self.advance(); // consume opening ' let mut content = String::new(); + let mut closed = false; while let Some(ch) = self.peek_char() { if ch == '\'' { self.advance(); // consume closing ' + closed = true; break; } content.push(ch); self.advance(); } + if !closed { + return Some(Token::Error("unterminated single quote".to_string())); + } + // If next char is another quote or word char, concatenate (e.g., 'EOF'"2" -> EOF2). // Any quoting makes the whole token literal. self.read_continuation_into(&mut content); @@ -893,11 +899,13 @@ impl<'a> Lexer<'a> { fn read_double_quoted_string(&mut self) -> Option { self.advance(); // consume opening " let mut content = String::new(); + let mut closed = false; while let Some(ch) = self.peek_char() { match ch { '"' => { self.advance(); // consume closing " + closed = true; break; } '\\' => { @@ -966,6 +974,10 @@ impl<'a> Lexer<'a> { } } + if !closed { + return Some(Token::Error("unterminated double quote".to_string())); + } + Some(Token::QuotedWord(content)) } diff --git a/crates/bashkit/src/parser/mod.rs b/crates/bashkit/src/parser/mod.rs index a9b6a392..72015b9f 100644 --- a/crates/bashkit/src/parser/mod.rs +++ b/crates/bashkit/src/parser/mod.rs @@ -152,14 +152,26 @@ impl<'a> Parser<'a> { } } + /// Check if current token is an error token and return the error if so + fn check_error_token(&self) -> Result<()> { + if let Some(tokens::Token::Error(msg)) = &self.current_token { + return Err(self.error(format!("syntax error: {}", msg))); + } + Ok(()) + } + /// Parse the input and return the AST. pub fn parse(mut self) -> Result