From 11d8b1165711d8e526628e6ac42d48ad0baab03c Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 23 Feb 2026 17:05:16 +0000 Subject: [PATCH 1/6] feat: string ops, read -r, heredoc tests, prefix/suffix replace - Implement ${s/#pat/repl} prefix-anchored and ${s/%pat/repl} suffix-anchored pattern replacement - Implement ${var:?"msg"} error expansion with exit code 1 - Handle \/ escape in pattern replacement parsing - Add read builtin -r flag (raw mode, no backslash interpretation) and -p flag parsing - Add 31 new bash spec tests: - heredoc.test.sh (9): variable expansion, quoted delimiters, file redirects, command/arithmetic substitution - string-ops.test.sh (15): prefix/suffix replace, :?, :+, case conversion, indirect expansion, negative substring - read-builtin.test.sh (7): IFS splitting, -r flag, here-string - Bash spec tests: 741 total, 735 pass, 6 skip (100% pass rate) - Bash comparison: 668/668 match real bash (100%) https://claude.ai/code/session_012rzB3FRw7yoQWCG1mxyW7J --- crates/bashkit/src/builtins/read.rs | 48 ++++++++- crates/bashkit/src/interpreter/mod.rs | 44 ++++++++- crates/bashkit/src/parser/mod.rs | 14 ++- .../tests/spec_cases/bash/heredoc.test.sh | 84 ++++++++++++++++ .../spec_cases/bash/read-builtin.test.sh | 44 +++++++++ .../tests/spec_cases/bash/string-ops.test.sh | 99 +++++++++++++++++++ specs/009-implementation-status.md | 9 +- 7 files changed, 333 insertions(+), 9 deletions(-) create mode 100644 crates/bashkit/tests/spec_cases/bash/heredoc.test.sh create mode 100644 crates/bashkit/tests/spec_cases/bash/read-builtin.test.sh create mode 100644 crates/bashkit/tests/spec_cases/bash/string-ops.test.sh diff --git a/crates/bashkit/src/builtins/read.rs b/crates/bashkit/src/builtins/read.rs index 81a6a543..cc11195c 100644 --- a/crates/bashkit/src/builtins/read.rs +++ b/crates/bashkit/src/builtins/read.rs @@ -18,21 +18,61 @@ impl Builtin for Read { None => return Ok(ExecResult::err("", 1)), }; + // Parse flags + let mut raw_mode = false; // -r: don't interpret backslashes + let mut prompt = None::; // -p prompt + let mut var_args = Vec::new(); + let mut args_iter = ctx.args.iter(); + while let Some(arg) = args_iter.next() { + if arg.starts_with('-') && arg.len() > 1 { + for flag in arg[1..].chars() { + match flag { + 'r' => raw_mode = true, + 'p' => { + // -p takes next arg as prompt + if let Some(p) = args_iter.next() { + prompt = Some(p.clone()); + } + } + _ => {} // ignore unknown flags + } + } + } else { + var_args.push(arg.as_str()); + } + } + let _ = prompt; // prompt is for interactive use, ignored in non-interactive + // Get first line - let line = input.lines().next().unwrap_or(""); + let line = if raw_mode { + // -r: treat backslashes literally + input.lines().next().unwrap_or("").to_string() + } else { + // Without -r: handle backslash line continuation + let mut result = String::new(); + for l in input.lines() { + if let Some(stripped) = l.strip_suffix('\\') { + result.push_str(stripped); + } else { + result.push_str(l); + break; + } + } + result + }; // If no variable names given, use REPLY - let var_names: Vec<&str> = if ctx.args.is_empty() { + let var_names: Vec<&str> = if var_args.is_empty() { vec!["REPLY"] } else { - ctx.args.iter().map(|s| s.as_str()).collect() + var_args }; // Split line by IFS (default: space, tab, newline) let ifs = ctx.env.get("IFS").map(|s| s.as_str()).unwrap_or(" \t\n"); let words: Vec<&str> = if ifs.is_empty() { // Empty IFS means no word splitting - vec![line] + vec![&line] } else { line.split(|c: char| ifs.contains(c)) .filter(|s| !s.is_empty()) diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index dc62b07b..53d22652 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -4076,7 +4076,12 @@ impl Interpreter { ParameterOp::Error => { // ${var:?error} - error if unset/empty if value.is_empty() { - // In real bash this would exit, we just return empty + let msg = if operand.is_empty() { + format!("bash: {}: parameter null or not set\n", name) + } else { + format!("bash: {}: {}\n", name, operand) + }; + self.nounset_error = Some(msg); String::new() } else { value.to_string() @@ -4151,6 +4156,43 @@ impl Interpreter { return value.to_string(); } + // Handle # prefix anchor (match at start only) + if let Some(rest) = pattern.strip_prefix('#') { + if rest.is_empty() { + return value.to_string(); + } + if let Some(stripped) = value.strip_prefix(rest) { + return format!("{}{}", replacement, stripped); + } + // Try glob match at prefix + if rest.contains('*') { + let matched = self.remove_pattern(value, rest, true, false); + if matched != value { + let prefix_len = value.len() - matched.len(); + return format!("{}{}", replacement, &value[prefix_len..]); + } + } + return value.to_string(); + } + + // Handle % suffix anchor (match at end only) + if let Some(rest) = pattern.strip_prefix('%') { + if rest.is_empty() { + return value.to_string(); + } + if let Some(stripped) = value.strip_suffix(rest) { + return format!("{}{}", stripped, replacement); + } + // Try glob match at suffix + if rest.contains('*') { + let matched = self.remove_pattern(value, rest, false, false); + if matched != value { + return format!("{}{}", matched, replacement); + } + } + return value.to_string(); + } + // Handle glob pattern with * if pattern.contains('*') { // Convert glob to regex-like behavior diff --git a/crates/bashkit/src/parser/mod.rs b/crates/bashkit/src/parser/mod.rs index d7154f6c..62fec285 100644 --- a/crates/bashkit/src/parser/mod.rs +++ b/crates/bashkit/src/parser/mod.rs @@ -2231,12 +2231,24 @@ impl<'a> Parser<'a> { } else { false }; - // Read pattern until / + // Read pattern until / (handle \/ as escaped /) let mut pattern = String::new(); while let Some(&ch) = chars.peek() { if ch == '/' || ch == '}' { break; } + if ch == '\\' { + chars.next(); // consume backslash + if let Some(&next) = chars.peek() { + if next == '/' { + // \/ -> literal / + pattern.push(chars.next().unwrap()); + continue; + } + } + pattern.push('\\'); + continue; + } pattern.push(chars.next().unwrap()); } // Read replacement if present diff --git a/crates/bashkit/tests/spec_cases/bash/heredoc.test.sh b/crates/bashkit/tests/spec_cases/bash/heredoc.test.sh new file mode 100644 index 00000000..6a33b30b --- /dev/null +++ b/crates/bashkit/tests/spec_cases/bash/heredoc.test.sh @@ -0,0 +1,84 @@ +### heredoc_basic +cat < /tmp/heredoc_out.txt <${var}<"; } +### expect +>< +### end + +### read_r_flag +read -r var <<< "hello\nworld" +echo "$var" +### expect +hello\nworld +### end + +### read_leftover +echo "one two three four" | { read a b; echo "$a|$b"; } +### expect +one|two three four +### end diff --git a/crates/bashkit/tests/spec_cases/bash/string-ops.test.sh b/crates/bashkit/tests/spec_cases/bash/string-ops.test.sh new file mode 100644 index 00000000..4015e00e --- /dev/null +++ b/crates/bashkit/tests/spec_cases/bash/string-ops.test.sh @@ -0,0 +1,99 @@ +### string_replace_prefix +s="/usr/local/bin" +echo "${s/#\/usr/PREFIX}" +### expect +PREFIX/local/bin +### end + +### string_replace_suffix +s="hello.txt" +echo "${s/%.txt/.md}" +### expect +hello.md +### end + +### string_default_colon +echo "${UNSET_VAR:-fallback}" +### expect +fallback +### end + +### string_default_empty +X="" +echo "${X:-fallback}" +### expect +fallback +### end + +### string_error_message +### bash_diff +### exit_code:1 +${UNSET_VAR:?"variable not set"} +### expect +### end + +### string_use_replacement +X="present" +echo "${X:+replacement}" +### expect +replacement +### end + +### string_use_replacement_empty +EMPTY="" +result="${EMPTY:+replacement}" +echo ">${result}<" +### expect +>< +### end + +### string_length_unicode +X="hello" +echo "${#X}" +### expect +5 +### end + +### string_nested_expansion +A="world" +B="A" +echo "${!B}" +### expect +world +### end + +### string_concatenation +A="hello" +B="world" +echo "${A} ${B}" +### expect +hello world +### end + +### string_uppercase_pattern +X="hello world" +echo "${X^^}" +### expect +HELLO WORLD +### end + +### string_lowercase_pattern +X="HELLO WORLD" +echo "${X,,}" +### expect +hello world +### end + +### var_negative_substring +X="hello world" +echo "${X: -5}" +### expect +world +### end + +### var_substring_length +X="hello world" +echo "${X:0:5}" +### expect +hello +### end diff --git a/specs/009-implementation-status.md b/specs/009-implementation-status.md index 63774117..a3d93b3f 100644 --- a/specs/009-implementation-status.md +++ b/specs/009-implementation-status.md @@ -107,17 +107,17 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See ## Spec Test Coverage -**Total spec test cases:** 1060 (1038 pass, 22 skip) +**Total spec test cases:** 1144 (1090 pass, 54 skip) | Category | Cases | In CI | Pass | Skip | Notes | |----------|-------|-------|------|------|-------| -| Bash (core) | 711 | Yes | 705 | 6 | `bash_spec_tests` in CI | +| Bash (core) | 741 | Yes | 735 | 6 | `bash_spec_tests` in CI | | AWK | 90 | Yes | 73 | 17 | loops, arrays, -v, ternary, field assign | | Grep | 82 | Yes | 79 | 3 | now with -z, -r, -a, -b, -H, -h, -f, -P, --include, --exclude | | Sed | 65 | Yes | 53 | 12 | hold space, change, regex ranges, -E | | JQ | 108 | Yes | 100 | 8 | reduce, walk, regex funcs, --arg/--argjson, combined flags | | Python | 58 | Yes | 50 | 8 | **Experimental.** VFS bridging, pathlib, env vars | -| **Total** | **1087** | **Yes** | **1034** | **53** | | +| **Total** | **1144** | **Yes** | **1090** | **54** | | ### Bash Spec Tests Breakdown @@ -168,6 +168,9 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See | ln.test.sh | 5 | `ln -s`, `-f`, symlink creation | | eval-bugs.test.sh | 4 | regression tests for eval/script bugs | | script-exec.test.sh | 10 | script execution by path, $PATH search, exit codes | +| heredoc.test.sh | 9 | heredoc variable expansion, quoted delimiters, file redirects | +| string-ops.test.sh | 15 | string replacement (prefix/suffix anchored), `${var:?}`, case conversion | +| read-builtin.test.sh | 7 | `read` builtin, IFS splitting, `-r` flag, here-string input | ## Shell Features From e2da3a22a7214deb290cc7733c7c3b61e5be78ac Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 24 Feb 2026 01:22:33 +0000 Subject: [PATCH 2/6] feat(parser): compound associative array init `declare -A m=([k]=v)` Lexer now keeps `var=([key]=val ...)` as a single token so the interpreter's existing compound-assignment handler receives the full expression. Only activates when the first non-whitespace char after `=(` is `[`, preserving the existing token-by-token path for indexed arrays like `arr=("hello world")`. Unskips assoc_declare_inline spec test; adds 3 more compound-init test cases and 2 lexer unit tests. https://claude.ai/code/session_012rzB3FRw7yoQWCG1mxyW7J --- crates/bashkit/src/parser/lexer.rs | 92 +++++++++++++++++++ .../spec_cases/bash/assoc-arrays.test.sh | 27 +++++- specs/009-implementation-status.md | 2 +- 3 files changed, 119 insertions(+), 2 deletions(-) diff --git a/crates/bashkit/src/parser/lexer.rs b/crates/bashkit/src/parser/lexer.rs index 6a1966ad..5bd528a0 100644 --- a/crates/bashkit/src/parser/lexer.rs +++ b/crates/bashkit/src/parser/lexer.rs @@ -477,6 +477,57 @@ impl<'a> Lexer<'a> { } else { word.push('\\'); } + } else if ch == '(' && word.ends_with('=') && self.looks_like_assoc_assign() { + // Associative compound assignment: var=([k]="v" ...) — keep entire + // (...) as part of word so declare -A m=([k]="v") stays one token. + // Regular indexed arr=(a b "c d") is left for parser token-by-token path. + word.push(ch); + self.advance(); + let mut depth = 1; + while let Some(c) = self.peek_char() { + word.push(c); + self.advance(); + match c { + '(' => depth += 1, + ')' => { + depth -= 1; + if depth == 0 { + break; + } + } + '"' => { + while let Some(qc) = self.peek_char() { + word.push(qc); + self.advance(); + if qc == '"' { + break; + } + if qc == '\\' { + if let Some(esc) = self.peek_char() { + word.push(esc); + self.advance(); + } + } + } + } + '\'' => { + while let Some(qc) = self.peek_char() { + word.push(qc); + self.advance(); + if qc == '\'' { + break; + } + } + } + '\\' => { + if let Some(esc) = self.peek_char() { + word.push(esc); + self.advance(); + } + } + _ => {} + } + } } else if self.is_word_char(ch) { word.push(ch); self.advance(); @@ -756,6 +807,26 @@ impl<'a> Lexer<'a> { Some(Token::Word(word)) } + /// Peek ahead (without consuming) to see if `=(` starts an associative + /// compound assignment like `([key]=val ...)`. Returns true when the + /// first non-whitespace char after `(` is `[`. + fn looks_like_assoc_assign(&self) -> bool { + let mut chars = self.chars.clone(); + // Skip the `(` we haven't consumed yet + if chars.next() != Some('(') { + return false; + } + // Skip optional whitespace + for ch in chars { + match ch { + ' ' | '\t' => continue, + '[' => return true, + _ => return false, + } + } + false + } + fn is_word_char(&self, ch: char) -> bool { !matches!( ch, @@ -958,4 +1029,25 @@ mod tests { let content = lexer.read_heredoc("EOF"); assert_eq!(content, "hello\nworld\n"); } + + #[test] + fn test_assoc_compound_assignment() { + // declare -A m=([foo]="bar" [baz]="qux") should keep the compound + // assignment as a single Word token + let mut lexer = Lexer::new(r#"m=([foo]="bar" [baz]="qux")"#); + assert_eq!( + lexer.next_token(), + Some(Token::Word(r#"m=([foo]="bar" [baz]="qux")"#.to_string())) + ); + assert_eq!(lexer.next_token(), None); + } + + #[test] + fn test_indexed_array_not_collapsed() { + // arr=("hello world") should NOT be collapsed — parser handles + // quoted elements token-by-token via the LeftParen path + let mut lexer = Lexer::new(r#"arr=("hello world")"#); + assert_eq!(lexer.next_token(), Some(Token::Word("arr=".to_string()))); + assert_eq!(lexer.next_token(), Some(Token::LeftParen)); + } } diff --git a/crates/bashkit/tests/spec_cases/bash/assoc-arrays.test.sh b/crates/bashkit/tests/spec_cases/bash/assoc-arrays.test.sh index 96431043..7f18c9f1 100644 --- a/crates/bashkit/tests/spec_cases/bash/assoc-arrays.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/assoc-arrays.test.sh @@ -68,7 +68,6 @@ echo "${m[b]}" ### end ### assoc_declare_inline -### skip: compound array assignment parsing not yet implemented declare -A m=([foo]="bar" [baz]="qux") echo "${m[foo]}" echo "${m[baz]}" @@ -77,6 +76,32 @@ bar qux ### end +### assoc_declare_inline_unquoted +declare -A m=([a]=1 [b]=2 [c]=3) +echo "${m[a]}" +echo "${m[c]}" +### expect +1 +3 +### end + +### assoc_declare_inline_single_entry +declare -A m=([only]="value") +echo "${m[only]}" +echo "${#m[@]}" +### expect +value +1 +### end + +### assoc_declare_inline_overwrite +declare -A m=([k]="old") +m[k]="new" +echo "${m[k]}" +### expect +new +### end + ### assoc_numeric_string_key declare -A m m[1]="one" diff --git a/specs/009-implementation-status.md b/specs/009-implementation-status.md index a3d93b3f..0f57be8d 100644 --- a/specs/009-implementation-status.md +++ b/specs/009-implementation-status.md @@ -200,7 +200,7 @@ Features that may be added in the future (not intentionally excluded): | `local` | Declaration | Proper scoping in nested functions | | `return` | Basic usage | Return value propagation | | Heredocs | Basic | Variable expansion inside | -| Arrays | Indexing, `[@]`/`[*]` as separate args, `${!arr[@]}`, `+=`, slice `${arr[@]:1:2}`, assoc `declare -A` | Compound init `declare -A m=([k]=v)` | +| Arrays | Indexing, `[@]`/`[*]` as separate args, `${!arr[@]}`, `+=`, slice `${arr[@]:1:2}`, assoc `declare -A`, compound init `declare -A m=([k]=v)` | — | | `echo -n` | Flag parsed | Trailing newline handling | | `time` | Wall-clock timing | User/sys CPU time (always 0) | | `timeout` | Basic usage | `-k` kill timeout | From ec8b2d7f5f8885e7d0bc9b07e6112013ec560a6f Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 24 Feb 2026 01:36:17 +0000 Subject: [PATCH 3/6] fix(grep): unskip grep -r and grep -f tests, fix -f pattern-only mode - grep -f without a positional pattern now works (was erroring with "missing pattern" because parse required a positional even when -f was set) - Rewrote grep_recursive spec test to actually create files and verify recursive directory traversal - Rewrote grep_pattern_file spec test to create pattern file first - Added negative tests: grep_recursive_no_match, grep_pattern_file_no_match https://claude.ai/code/session_012rzB3FRw7yoQWCG1mxyW7J --- crates/bashkit/src/builtins/grep.rs | 4 +-- .../tests/spec_cases/grep/grep.test.sh | 30 +++++++++++++++---- 2 files changed, 27 insertions(+), 7 deletions(-) diff --git a/crates/bashkit/src/builtins/grep.rs b/crates/bashkit/src/builtins/grep.rs index a44d0067..b1b6fc30 100644 --- a/crates/bashkit/src/builtins/grep.rs +++ b/crates/bashkit/src/builtins/grep.rs @@ -278,8 +278,8 @@ impl GrepOptions { i += 1; } - // First positional is pattern (if no -e patterns) - if opts.patterns.is_empty() { + // First positional is pattern (if no -e patterns and no -f file) + if opts.patterns.is_empty() && opts.pattern_file.is_none() { if positional.is_empty() { return Err(Error::Execution("grep: missing pattern".to_string())); } diff --git a/crates/bashkit/tests/spec_cases/grep/grep.test.sh b/crates/bashkit/tests/spec_cases/grep/grep.test.sh index 0c599d56..e5aa18ca 100644 --- a/crates/bashkit/tests/spec_cases/grep/grep.test.sh +++ b/crates/bashkit/tests/spec_cases/grep/grep.test.sh @@ -158,10 +158,11 @@ c ### end ### grep_recursive -### skip: recursive grep not implemented for virtual fs -grep -r pattern /some/dir -### exit_code: 1 +# Recursive grep through directory +mkdir -p /tmp/greprecur/sub && printf 'match\n' > /tmp/greprecur/a.txt && printf 'nope\n' > /tmp/greprecur/sub/b.txt && printf 'match here\n' > /tmp/greprecur/sub/c.txt && grep -r match /tmp/greprecur | sort ### expect +/tmp/greprecur/a.txt:match +/tmp/greprecur/sub/c.txt:match here ### end ### grep_multiple_patterns @@ -173,13 +174,32 @@ bar ### end ### grep_pattern_file -### skip: requires file redirection support -printf 'foo\nbar\nbaz\n' | grep -f /patterns.txt +# Read patterns from file +printf 'foo\nbar\n' > /tmp/greppatterns.txt +printf 'foo\nbar\nbaz\n' | grep -f /tmp/greppatterns.txt ### expect foo bar ### end +### grep_recursive_no_match +# Recursive grep with no matches +mkdir -p /tmp/greprecur2 && printf 'nope\n' > /tmp/greprecur2/a.txt +grep -r zzz /tmp/greprecur2 +echo $? +### expect +1 +### end + +### grep_pattern_file_no_match +# Pattern file with no matching patterns +printf 'zzz\n' > /tmp/greppat2.txt +printf 'foo\nbar\n' | grep -f /tmp/greppat2.txt +echo $? +### expect +1 +### end + ### grep_null_data # Null-terminated mode with -z printf 'foo\0bar\0' | grep -z foo From f120384ee1f20b06de370194b77153f999049d02 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 24 Feb 2026 01:43:59 +0000 Subject: [PATCH 4/6] feat(awk): implement getline, fix ORS and missing field tests - Implement `getline` statement: reads next input record into $0, updates NR/NF/FNR. Refactored main loop to index-based iteration so getline can advance the line pointer. - Unskip awk_ors: workaround spec runner trailing newline by appending `; echo` (ORS itself was already implemented correctly) - Unskip awk_missing_field: rewrote test to verify empty string via conditional instead of relying on empty expected output - Unskip awk_getline: basic getline now works - Add 3 new getline spec tests (updates_dollar_zero, at_eof, three_lines) - Add 3 new getline unit tests (basic, updates_fields, at_eof) https://claude.ai/code/session_012rzB3FRw7yoQWCG1mxyW7J --- crates/bashkit/src/builtins/awk.rs | 54 +++++++++++++++++-- .../bashkit/tests/spec_cases/awk/awk.test.sh | 34 +++++++++--- specs/009-implementation-status.md | 4 +- 3 files changed, 82 insertions(+), 10 deletions(-) diff --git a/crates/bashkit/src/builtins/awk.rs b/crates/bashkit/src/builtins/awk.rs index c4271d08..f6013cc6 100644 --- a/crates/bashkit/src/builtins/awk.rs +++ b/crates/bashkit/src/builtins/awk.rs @@ -83,6 +83,7 @@ enum AwkAction { Break, Continue, Delete(String, AwkExpr), // delete arr[key] + Getline, // getline — read next input record into $0 #[allow(dead_code)] // Exit code support for future Exit(Option), Expression(AwkExpr), @@ -484,6 +485,9 @@ impl<'a> AwkParser<'a> { if self.matches_keyword("delete") { return self.parse_delete(); } + if self.matches_keyword("getline") { + return Ok(AwkAction::Getline); + } if self.matches_keyword("exit") { self.skip_whitespace(); if self.pos < self.input.len() { @@ -1049,7 +1053,7 @@ impl<'a> AwkParser<'a> { let remaining = &self.input[self.pos..]; let keywords = [ "in", "if", "else", "while", "for", "do", "break", "continue", "next", "exit", - "delete", "print", "printf", + "delete", "getline", "print", "printf", ]; for kw in keywords { if remaining.starts_with(kw) { @@ -1510,6 +1514,10 @@ enum AwkFlow { struct AwkInterpreter { state: AwkState, output: String, + /// Lines of current input file (set before main loop) + input_lines: Vec, + /// Current line index within input_lines + line_index: usize, } impl AwkInterpreter { @@ -1517,6 +1525,8 @@ impl AwkInterpreter { Self { state: AwkState::default(), output: String::new(), + input_lines: Vec::new(), + line_index: 0, } } @@ -2380,6 +2390,15 @@ impl AwkInterpreter { AwkFlow::Continue } AwkAction::Next => AwkFlow::Next, + AwkAction::Getline => { + // Advance to next input line and update $0, NR, NF, FNR + self.line_index += 1; + if self.line_index < self.input_lines.len() { + let line = self.input_lines[self.line_index].clone(); + self.state.set_line(&line); + } + AwkFlow::Continue + } AwkAction::Break => AwkFlow::Break, AwkAction::Continue => AwkFlow::LoopContinue, AwkAction::Exit(expr) => { @@ -2556,8 +2575,13 @@ impl Builtin for Awk { 'files: for input in inputs { interp.state.fnr = 0; - for line in input.lines() { - interp.state.set_line(line); + // Index-based iteration so getline can advance the index + interp.input_lines = input.lines().map(|l| l.to_string()).collect(); + interp.line_index = 0; + + while interp.line_index < interp.input_lines.len() { + let line = interp.input_lines[interp.line_index].clone(); + interp.state.set_line(&line); for rule in &program.main_rules { // Check pattern @@ -2587,6 +2611,7 @@ impl Builtin for Awk { } } } + interp.line_index += 1; } } @@ -3024,6 +3049,29 @@ mod tests { assert_eq!(result.stdout, "line1\n"); } + #[tokio::test] + async fn test_awk_getline_basic() { + let result = run_awk(&["{getline; print}"], Some("line1\nline2")) + .await + .unwrap(); + assert_eq!(result.stdout, "line2\n"); + } + + #[tokio::test] + async fn test_awk_getline_updates_fields() { + let result = run_awk(&["{getline; print $1}"], Some("a b\nc d")) + .await + .unwrap(); + assert_eq!(result.stdout, "c\n"); + } + + #[tokio::test] + async fn test_awk_getline_at_eof() { + // getline at EOF should keep current $0 + let result = run_awk(&["{getline; print}"], Some("only")).await.unwrap(); + assert_eq!(result.stdout, "only\n"); + } + #[tokio::test] async fn test_awk_revenue_calculation() { // This is the exact eval task pattern diff --git a/crates/bashkit/tests/spec_cases/awk/awk.test.sh b/crates/bashkit/tests/spec_cases/awk/awk.test.sh index 218ae052..aa34a91a 100644 --- a/crates/bashkit/tests/spec_cases/awk/awk.test.sh +++ b/crates/bashkit/tests/spec_cases/awk/awk.test.sh @@ -299,10 +299,10 @@ printf '\n' | awk '{print NF}' ### end ### awk_missing_field -### skip: spec runner expects empty but awk outputs newline for empty print -printf 'a b\n' | awk '{print $5}' +# Accessing a field beyond NF returns empty string +printf 'a b\n' | awk '{if ($5 == "") print "empty"; else print $5}' ### expect - +empty ### end ### awk_subtraction @@ -528,12 +528,34 @@ no ### end ### awk_getline -### skip: getline not implemented +# getline reads next input record printf 'line1\nline2\n' | awk '{getline; print}' ### expect line2 ### end +### awk_getline_updates_dollar_zero +# getline updates $0 and fields +printf 'a b\nc d\n' | awk '{getline; print $1}' +### expect +c +### end + +### awk_getline_at_eof +# getline at end of input keeps last record +printf 'only\n' | awk '{getline; print}' +### expect +only +### end + +### awk_getline_three_lines +# getline skips every other line +printf 'A\nB\nC\nD\n' | awk '{getline; print}' +### expect +B +D +### end + ### awk_multiple_patterns # Multiple pattern-action pairs printf '1\n2\n3\n' | awk '/1/ {print "one"} /3/ {print "three"}' @@ -569,8 +591,8 @@ a,b,c ### end ### awk_ors -### skip: spec runner appends trailing newline but ORS=";" suppresses it -printf 'a\nb\n' | awk 'BEGIN {ORS=";"} {print $0}' +# Custom output record separator +printf 'a\nb\n' | awk 'BEGIN {ORS=";"} {print $0}'; echo ### expect a;b; ### end diff --git a/specs/009-implementation-status.md b/specs/009-implementation-status.md index 0f57be8d..45a422a5 100644 --- a/specs/009-implementation-status.md +++ b/specs/009-implementation-status.md @@ -248,7 +248,7 @@ None currently tracked. | Functions | 3 | `match()`, `gensub()`, `exit` statement | | Field handling | 2 | `-F'\t'` tab delimiter, missing field returns empty | | Negation | 1 | `!$1` logical negation operator | -| ORS/getline | 2 | Output record separator, getline | +| ~~ORS/getline~~ | ~~2~~ | ✅ Implemented | | $0 modification | 1 | `$0 = "x y z"` re-splits fields | **Recently Implemented:** @@ -258,6 +258,8 @@ None currently tracked. - `-v var=value` flag for variable initialization - Ternary operator `(cond ? a : b)` - Field assignment `$2 = "X"` +- `getline` — reads next input record into `$0` +- ORS (output record separator) tests verified - `next` statement From aaf2160973b140e85f4bf8a3cad589ab7342011c Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 24 Feb 2026 01:54:24 +0000 Subject: [PATCH 5/6] feat(jq): expose shell env vars to jaq runtime via `env` builtin jaq's built-in `env` function reads from std::env::vars() which doesn't see bashkit's virtual environment. Fix: temporarily expose ctx.env and ctx.variables to the process environment before running jaq, with an RAII drop guard for cleanup on all return paths. - Unskip jq_env spec test - Add jq_env_missing and jq_env_in_pipeline spec tests - Add test_jq_env_access and test_jq_env_missing_var unit tests https://claude.ai/code/session_012rzB3FRw7yoQWCG1mxyW7J --- crates/bashkit/src/builtins/jq.rs | 85 +++++++++++++++++++ crates/bashkit/tests/spec_cases/jq/jq.test.sh | 16 +++- specs/009-implementation-status.md | 3 +- 3 files changed, 102 insertions(+), 2 deletions(-) diff --git a/crates/bashkit/src/builtins/jq.rs b/crates/bashkit/src/builtins/jq.rs index 2444460c..350b2953 100644 --- a/crates/bashkit/src/builtins/jq.rs +++ b/crates/bashkit/src/builtins/jq.rs @@ -22,6 +22,21 @@ use crate::interpreter::ExecResult; /// produce deeply nested parse trees in jaq. const MAX_JQ_JSON_DEPTH: usize = 100; +/// RAII guard that restores process env vars when dropped. +/// Ensures cleanup even on early-return error paths. +struct EnvRestoreGuard(Vec<(String, Option)>); + +impl Drop for EnvRestoreGuard { + fn drop(&mut self) { + for (k, old) in &self.0 { + match old { + Some(v) => std::env::set_var(k, v), + None => std::env::remove_var(k), + } + } + } +} + /// jq command - JSON processor pub struct Jq; @@ -328,6 +343,22 @@ impl Builtin for Jq { json_vals.into_iter().map(Val::from).collect() }; + // Expose bashkit's shell env/variables to the process environment so + // jaq's built-in `env` function (which reads std::env::vars()) works. + // Include both ctx.env (prefix assignments like FOO=bar jq ...) + // and ctx.variables (set via export builtin). + // Uses a drop guard to ensure cleanup on all return paths. + let mut seen = std::collections::HashSet::new(); + let mut env_backup: Vec<(String, Option)> = Vec::new(); + for (k, v) in ctx.env.iter().chain(ctx.variables.iter()) { + if seen.insert(k.clone()) { + let old = std::env::var(k).ok(); + std::env::set_var(k, v); + env_backup.push((k.clone(), old)); + } + } + let _env_guard = EnvRestoreGuard(env_backup); + // Track for -e exit status let mut has_output = false; let mut all_null_or_false = true; @@ -787,6 +818,60 @@ mod tests { assert_eq!(arr[1]["id"], 2); } + // --- env tests --- + + #[tokio::test] + async fn test_jq_env_access() { + let jq = Jq; + let fs = Arc::new(InMemoryFs::new()); + let mut vars = HashMap::new(); + let mut cwd = PathBuf::from("/"); + let mut env = HashMap::new(); + env.insert("TESTVAR".to_string(), "hello".to_string()); + let args = vec!["-n".to_string(), "env.TESTVAR".to_string()]; + + let ctx = Context { + args: &args, + env: &env, + variables: &mut vars, + cwd: &mut cwd, + fs, + stdin: None, + #[cfg(feature = "http_client")] + http_client: None, + #[cfg(feature = "git")] + git_client: None, + }; + + let result = jq.execute(ctx).await.unwrap(); + assert_eq!(result.stdout.trim(), "\"hello\""); + } + + #[tokio::test] + async fn test_jq_env_missing_var() { + let jq = Jq; + let fs = Arc::new(InMemoryFs::new()); + let mut vars = HashMap::new(); + let mut cwd = PathBuf::from("/"); + let args = vec!["-n".to_string(), "env.NO_SUCH_VAR_999".to_string()]; + + let ctx = Context { + args: &args, + env: &HashMap::new(), + variables: &mut vars, + cwd: &mut cwd, + fs, + stdin: None, + #[cfg(feature = "http_client")] + http_client: None, + #[cfg(feature = "git")] + git_client: None, + }; + + let result = jq.execute(ctx).await.unwrap(); + assert_eq!(result.stdout.trim(), "null"); + } + // --- Argument parsing bug regression tests --- #[tokio::test] diff --git a/crates/bashkit/tests/spec_cases/jq/jq.test.sh b/crates/bashkit/tests/spec_cases/jq/jq.test.sh index 7364c5d4..9a665042 100644 --- a/crates/bashkit/tests/spec_cases/jq/jq.test.sh +++ b/crates/bashkit/tests/spec_cases/jq/jq.test.sh @@ -649,12 +649,26 @@ echo '1' | jq 'debug' ### end ### jq_env -### skip: shell env vars not propagated to jaq runtime +# Shell env vars accessible via env builtin FOO=bar jq -n 'env.FOO' ### expect "bar" ### end +### jq_env_missing +# Missing env var returns null +jq -n 'env.NONEXISTENT_VAR_XYZ' +### expect +null +### end + +### jq_env_in_pipeline +# env var set via export +export MYVAL=hello; jq -n 'env.MYVAL' +### expect +"hello" +### end + ### jq_multiple_filters # Multiple filters with comma echo '{"a":1,"b":2}' | jq '.a, .b' diff --git a/specs/009-implementation-status.md b/specs/009-implementation-status.md index 45a422a5..845eb5df 100644 --- a/specs/009-implementation-status.md +++ b/specs/009-implementation-status.md @@ -322,7 +322,7 @@ None currently tracked. |---------|-------|-------| | Alternative `//` | 1 | jaq errors on `.foo` applied to null instead of returning null | | Path functions | 2 | `setpath`, `leaf_paths` not in jaq standard library | -| I/O functions | 3 | `input`, `inputs` (iterator not wired), `env` (shell vars not propagated) | +| I/O functions | 2 | `input`, `inputs` (iterator not wired); ~~`env`~~ ✅ | | Regex functions | 2 | `match` (jaq omits capture `name` field), `scan` (jaq needs explicit `"g"` flag) | **Recently Fixed:** @@ -331,6 +331,7 @@ None currently tracked. - Combined short flags (`-rn`, `-sc`, `-snr`) - `--arg name value` and `--argjson name value` variable bindings - `--indent N` flag no longer eats the filter argument +- `env` builtin now exposes bashkit shell env vars to jaq runtime ### Curl Limitations From 4a12e7d359e1a6047b67fb6e716ebaca1070fe69 Mon Sep 17 00:00:00 2001 From: Claude Date: Tue, 24 Feb 2026 02:15:32 +0000 Subject: [PATCH 6/6] feat(jq): wire input/inputs iterators to shared input stream Replace per-value empty inputs iterator with a shared RcIter over all parsed JSON values. Main loop and filter's input/inputs functions consume from the same source, matching real jq behavior. - Unskip jq_input and jq_inputs spec tests - Add jq_input_with_dot, jq_inputs_empty spec tests - Add 3 unit tests (input_reads_next, inputs_collects_remaining, inputs_single_value) https://claude.ai/code/session_012rzB3FRw7yoQWCG1mxyW7J --- crates/bashkit/src/builtins/jq.rs | 33 ++++++++++++++++--- crates/bashkit/tests/spec_cases/jq/jq.test.sh | 20 +++++++++-- specs/009-implementation-status.md | 3 +- 3 files changed, 48 insertions(+), 8 deletions(-) diff --git a/crates/bashkit/src/builtins/jq.rs b/crates/bashkit/src/builtins/jq.rs index 350b2953..0629eba0 100644 --- a/crates/bashkit/src/builtins/jq.rs +++ b/crates/bashkit/src/builtins/jq.rs @@ -363,16 +363,20 @@ impl Builtin for Jq { let mut has_output = false; let mut all_null_or_false = true; - for jaq_input in inputs_to_process { - // Create empty inputs iterator - let inputs = RcIter::new(core::iter::empty()); + // Shared input iterator: main loop pops one value per filter run, + // and jaq's input/inputs functions consume from the same source. + let shared_inputs = RcIter::new(inputs_to_process.into_iter().map(Ok::)); + + for jaq_input in &shared_inputs { + let jaq_input: Val = + jaq_input.map_err(|e| Error::Execution(format!("jq: input error: {}", e)))?; // Run the filter, passing any --arg/--argjson variable values let var_vals: Vec = var_bindings .iter() .map(|(_, v)| Val::from(v.clone())) .collect(); - let ctx = Ctx::new(var_vals, &inputs); + let ctx = Ctx::new(var_vals, &shared_inputs); for result in filter.run((ctx, jaq_input)) { match result { Ok(val) => { @@ -625,6 +629,27 @@ mod tests { /// TM-DOS-027: Deeply nested JSON arrays must be rejected /// Note: serde_json has a built-in recursion limit (~128 levels) that fires first. /// Our check_json_depth is defense-in-depth for values within serde's limit. + #[tokio::test] + async fn test_jq_input_reads_next() { + let result = run_jq_with_args(&["input"], "1\n2").await.unwrap(); + assert_eq!(result.trim(), "2"); + } + + #[tokio::test] + async fn test_jq_inputs_collects_remaining() { + let result = run_jq_with_args(&["-c", "[inputs]"], "1\n2\n3") + .await + .unwrap(); + assert_eq!(result.trim(), "[2,3]"); + } + + #[tokio::test] + async fn test_jq_inputs_single_value() { + // With single input, inputs yields empty array + let result = run_jq_with_args(&["-c", "[inputs]"], "42").await.unwrap(); + assert_eq!(result.trim(), "[]"); + } + #[tokio::test] async fn test_jq_json_depth_limit_arrays() { // Build 150-level nested JSON: [[[[....[1]....]]]] diff --git a/crates/bashkit/tests/spec_cases/jq/jq.test.sh b/crates/bashkit/tests/spec_cases/jq/jq.test.sh index 9a665042..d3415536 100644 --- a/crates/bashkit/tests/spec_cases/jq/jq.test.sh +++ b/crates/bashkit/tests/spec_cases/jq/jq.test.sh @@ -628,19 +628,33 @@ echo '1' | jq '[while(. < 5; . + 1)]' ### end ### jq_input -### skip: inputs iterator not wired to remaining stdin values +# input reads next value from stdin printf '1\n2\n' | jq 'input' ### expect 2 ### end ### jq_inputs -### skip: inputs iterator not wired to remaining stdin values -printf '1\n2\n3\n' | jq '[inputs]' +# inputs collects all remaining stdin values +printf '1\n2\n3\n' | jq -c '[inputs]' ### expect [2,3] ### end +### jq_input_with_dot +# input alongside current value +printf '{"a":1}\n{"b":2}\n' | jq -c '[., input]' +### expect +[{"a":1},{"b":2}] +### end + +### jq_inputs_empty +# inputs with single value yields empty array +printf '42\n' | jq -c '[inputs]' +### expect +[] +### end + ### jq_debug # Debug passes value through (stderr output not captured) echo '1' | jq 'debug' diff --git a/specs/009-implementation-status.md b/specs/009-implementation-status.md index 845eb5df..dbe8839b 100644 --- a/specs/009-implementation-status.md +++ b/specs/009-implementation-status.md @@ -322,7 +322,7 @@ None currently tracked. |---------|-------|-------| | Alternative `//` | 1 | jaq errors on `.foo` applied to null instead of returning null | | Path functions | 2 | `setpath`, `leaf_paths` not in jaq standard library | -| I/O functions | 2 | `input`, `inputs` (iterator not wired); ~~`env`~~ ✅ | +| ~~I/O functions~~ | ~~3~~ | ✅ `input`, `inputs`, `env` all implemented | | Regex functions | 2 | `match` (jaq omits capture `name` field), `scan` (jaq needs explicit `"g"` flag) | **Recently Fixed:** @@ -332,6 +332,7 @@ None currently tracked. - `--arg name value` and `--argjson name value` variable bindings - `--indent N` flag no longer eats the filter argument - `env` builtin now exposes bashkit shell env vars to jaq runtime +- `input`/`inputs` iterators wired to shared input stream ### Curl Limitations