diff --git a/crates/bashkit/src/builtins/awk.rs b/crates/bashkit/src/builtins/awk.rs index f6013cc6..304e290b 100644 --- a/crates/bashkit/src/builtins/awk.rs +++ b/crates/bashkit/src/builtins/awk.rs @@ -107,6 +107,50 @@ enum AwkValue { Uninitialized, } +/// Format number using AWK's OFMT (%.6g): 6 significant digits, trim trailing zeros. +fn format_awk_number(n: f64) -> String { + if n.is_nan() { + return "nan".to_string(); + } + if n.is_infinite() { + return if n > 0.0 { "inf" } else { "-inf" }.to_string(); + } + // Integers: no decimal point + if n.fract() == 0.0 && n.abs() < 1e16 { + return format!("{}", n as i64); + } + // %.6g: use 6 significant digits + let abs = n.abs(); + let exp = abs.log10().floor() as i32; + if !(-4..6).contains(&exp) { + // Scientific notation: 5 decimal places = 6 sig digits + let mut s = format!("{:.*e}", 5, n); + // Trim trailing zeros in mantissa + if let Some(e_pos) = s.find('e') { + let (mantissa, exp_part) = s.split_at(e_pos); + let trimmed = mantissa.trim_end_matches('0').trim_end_matches('.'); + s = format!("{}{}", trimmed, exp_part); + } + // Normalize exponent format: e1 -> e+01 etc. to match C printf + // Actually AWK uses e+06 style. Rust uses e6. Fix: + if let Some(e_pos) = s.find('e') { + let exp_str = &s[e_pos + 1..]; + let exp_val: i32 = exp_str.parse().unwrap_or(0); + let mantissa = &s[..e_pos]; + s = format!("{}e{:+03}", mantissa, exp_val); + } + s + } else { + // Fixed notation + let decimal_places = (5 - exp).max(0) as usize; + let mut s = format!("{:.*}", decimal_places, n); + if s.contains('.') { + s = s.trim_end_matches('0').trim_end_matches('.').to_string(); + } + s + } +} + impl AwkValue { fn as_number(&self) -> f64 { match self { @@ -118,13 +162,7 @@ impl AwkValue { fn as_string(&self) -> String { match self { - AwkValue::Number(n) => { - if n.fract() == 0.0 { - format!("{}", *n as i64) - } else { - format!("{}", n) - } - } + AwkValue::Number(n) => format_awk_number(*n), AwkValue::String(s) => s.clone(), AwkValue::Uninitialized => String::new(), } @@ -2340,13 +2378,18 @@ impl AwkInterpreter { AwkAction::ForIn(var, arr_name, actions) => { // Collect array keys matching the pattern arr_name[*] let prefix = format!("{}[", arr_name); - let keys: Vec = self + let mut keys: Vec = self .state .variables .keys() .filter(|k| k.starts_with(&prefix) && k.ends_with(']')) .map(|k| k[prefix.len()..k.len() - 1].to_string()) .collect(); + // Sort for deterministic iteration: numeric keys first, then lexical + keys.sort_by(|a, b| match (a.parse::(), b.parse::()) { + (Ok(na), Ok(nb)) => na.partial_cmp(&nb).unwrap_or(std::cmp::Ordering::Equal), + _ => a.cmp(b), + }); for key in keys { self.state.set_variable(var, AwkValue::String(key)); diff --git a/crates/bashkit/src/builtins/grep.rs b/crates/bashkit/src/builtins/grep.rs index b1b6fc30..043f4b9a 100644 --- a/crates/bashkit/src/builtins/grep.rs +++ b/crates/bashkit/src/builtins/grep.rs @@ -570,6 +570,9 @@ impl Builtin for Grep { let mut match_count = 0; let mut file_matched = false; + // Binary detection: content with null bytes, -a and -z not set + let is_binary = !opts.binary_as_text && !opts.null_terminated && content.contains('\0'); + // Split on null bytes if -z flag is set, otherwise split on newlines let lines: Vec<&str> = if opts.null_terminated { content.split('\0').collect() @@ -669,6 +672,16 @@ impl Builtin for Grep { } // Now generate output + // Binary file: just report "Binary file X matches" instead of lines + if is_binary && file_matched && !opts.count_only && !opts.files_with_matches { + let display_name = if filename.is_empty() { + "(standard input)" + } else { + filename.as_str() + }; + output.push_str(&format!("Binary file {} matches\n", display_name)); + continue 'file_loop; + } if opts.files_with_matches && file_matched { output.push_str(filename); output.push('\n'); diff --git a/crates/bashkit/tests/spec_cases/awk/awk.test.sh b/crates/bashkit/tests/spec_cases/awk/awk.test.sh index aa34a91a..5dce9cd2 100644 --- a/crates/bashkit/tests/spec_cases/awk/awk.test.sh +++ b/crates/bashkit/tests/spec_cases/awk/awk.test.sh @@ -418,12 +418,18 @@ printf '0\n' | awk '{print sin($1), cos($1)}' ### end ### awk_exp_log_func -### skip: exp/log function output precision differs +# exp/log use %.6g formatting (6 significant digits) printf '1\n' | awk '{print exp($1)}' ### expect 2.71828 ### end +### awk_log_func +printf '100\n' | awk '{print log($1)}' +### expect +4.60517 +### end + ### awk_match_func printf 'hello world\n' | awk '{if (match($0, /wor/)) print RSTART, RLENGTH}' ### expect @@ -514,13 +520,22 @@ found ### end ### awk_for_in_array -### skip: for-in array iteration order not deterministic +# for-in iterates keys in sorted order (numeric, then lexical) printf 'a\n' | awk 'BEGIN {a[1]="x"; a[2]="y"} {for (k in a) print k, a[k]}' ### expect 1 x 2 y ### end +### awk_for_in_string_keys +# for-in with string keys sorts lexically +printf 'a\n' | awk 'BEGIN {a["b"]="2"; a["a"]="1"; a["c"]="3"} {for (k in a) print k, a[k]}' +### expect +a 1 +b 2 +c 3 +### end + ### awk_delete_array printf 'a\n' | awk 'BEGIN {a[1]="x"; delete a[1]} {print (1 in a) ? "yes" : "no"}' ### expect diff --git a/crates/bashkit/tests/spec_cases/grep/grep.test.sh b/crates/bashkit/tests/spec_cases/grep/grep.test.sh index e5aa18ca..5e09d667 100644 --- a/crates/bashkit/tests/spec_cases/grep/grep.test.sh +++ b/crates/bashkit/tests/spec_cases/grep/grep.test.sh @@ -348,11 +348,18 @@ printf 'foo123\n' | grep -P 'foo\d+' foo123 ### end -### grep_ignore_binary -### skip: binary file detection not implemented +### grep_binary_detect +# Binary file detection: content with null bytes triggers binary message printf 'foo\0bar\n' | grep foo ### expect -foo +Binary file (standard input) matches +### end + +### grep_binary_with_a_flag +# -a flag treats binary as text, outputs match normally +printf 'foo\0bar\n' | grep -a foo +### expect +foobar ### end ### grep_include_pattern diff --git a/specs/009-implementation-status.md b/specs/009-implementation-status.md index dbe8839b..b7d1a9d5 100644 --- a/specs/009-implementation-status.md +++ b/specs/009-implementation-status.md @@ -107,17 +107,16 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See ## Spec Test Coverage -**Total spec test cases:** 1144 (1090 pass, 54 skip) +**Total spec test cases:** 1105 (1095 pass, 10 skip) | Category | Cases | In CI | Pass | Skip | Notes | |----------|-------|-------|------|------|-------| -| Bash (core) | 741 | Yes | 735 | 6 | `bash_spec_tests` in CI | -| AWK | 90 | Yes | 73 | 17 | loops, arrays, -v, ternary, field assign | -| Grep | 82 | Yes | 79 | 3 | now with -z, -r, -a, -b, -H, -h, -f, -P, --include, --exclude | -| Sed | 65 | Yes | 53 | 12 | hold space, change, regex ranges, -E | -| JQ | 108 | Yes | 100 | 8 | reduce, walk, regex funcs, --arg/--argjson, combined flags | -| Python | 58 | Yes | 50 | 8 | **Experimental.** VFS bridging, pathlib, env vars | -| **Total** | **1144** | **Yes** | **1090** | **54** | | +| Bash (core) | 744 | Yes | 739 | 5 | `bash_spec_tests` in CI | +| AWK | 96 | Yes | 96 | 0 | loops, arrays, -v, ternary, field assign, getline, %.6g | +| Grep | 76 | Yes | 76 | 0 | -z, -r, -a, -b, -H, -h, -f, -P, --include, --exclude, binary detect | +| Sed | 75 | Yes | 75 | 0 | hold space, change, regex ranges, -E | +| JQ | 114 | Yes | 109 | 5 | reduce, walk, regex funcs, --arg/--argjson, combined flags, input/inputs, env | +| **Total** | **1105** | **Yes** | **1095** | **10** | | ### Bash Spec Tests Breakdown @@ -239,38 +238,24 @@ None currently tracked. - Array assignment in split: `split($0, arr, ":")` ✅ - Complex regex patterns -**Skipped Tests (15):** +**Skipped Tests: 0** (all AWK tests pass) -| Feature | Count | Notes | -|---------|-------|-------| -| Power operators | 2 | `^`, `**` | -| Printf formats | 4 | `%x`, `%o`, `%c`, width specifier | -| Functions | 3 | `match()`, `gensub()`, `exit` statement | -| Field handling | 2 | `-F'\t'` tab delimiter, missing field returns empty | -| Negation | 1 | `!$1` logical negation operator | -| ~~ORS/getline~~ | ~~2~~ | ✅ Implemented | -| $0 modification | 1 | `$0 = "x y z"` re-splits fields | - -**Recently Implemented:** +**Implemented Features:** - For/while/do-while loops with break/continue - Postfix/prefix increment/decrement (`i++`, `++i`, `i--`, `--i`) -- Arrays: `arr[key]=val`, `"key" in arr`, `for (k in arr)`, `delete arr[k]` +- Arrays: `arr[key]=val`, `"key" in arr`, `for (k in arr)` (sorted), `delete arr[k]` - `-v var=value` flag for variable initialization - Ternary operator `(cond ? a : b)` -- Field assignment `$2 = "X"` +- Field assignment `$2 = "X"`, `$0 = "x y z"` re-splits fields - `getline` — reads next input record into `$0` -- ORS (output record separator) tests verified -- `next` statement - - - - - - - - - - +- ORS (output record separator) +- `next`, `exit` with code +- Power operators `^`, `**` +- Printf formats: `%x`, `%o`, `%c`, width specifier +- `match()` (RSTART/RLENGTH), `gensub()`, `sub()`, `gsub()` +- `!$1` logical negation, `-F'\t'` tab delimiter +- `%.6g` number formatting (OFMT-compatible) +- Deterministic `for-in` iteration (sorted keys) ### Sed Limitations @@ -292,13 +277,7 @@ None currently tracked. ### Grep Limitations -**Skipped Tests (3):** - -| Feature | Count | Notes | -|---------|-------|-------| -| Recursive test | 1 | Test needs VFS setup with files | -| Pattern file `-f` | 1 | Requires file redirection support | -| Binary detection | 1 | Auto-detect binary files | +**Skipped Tests: 0** (all grep tests pass) **Implemented Features:** - Basic flags: `-i`, `-v`, `-c`, `-n`, `-o`, `-l`, `-w`, `-E`, `-F`, `-q`, `-m`, `-x` @@ -310,19 +289,18 @@ None currently tracked. - Byte offset: `-b` - Null-terminated: `-z` (split on `\0` instead of `\n`) - Recursive: `-r`/`-R` (uses VFS read_dir) -- Binary handling: `-a` (filter null bytes) +- Binary handling: `-a` (filter null bytes), auto-detect binary (null byte → "Binary file ... matches") - Perl regex: `-P` (regex crate supports PCRE features) - No-op flags: `--color`, `--line-buffered` ### JQ Limitations -**Skipped Tests (8):** +**Skipped Tests (5):** | Feature | Count | Notes | |---------|-------|-------| | Alternative `//` | 1 | jaq errors on `.foo` applied to null instead of returning null | | Path functions | 2 | `setpath`, `leaf_paths` not in jaq standard library | -| ~~I/O functions~~ | ~~3~~ | ✅ `input`, `inputs`, `env` all implemented | | Regex functions | 2 | `match` (jaq omits capture `name` field), `scan` (jaq needs explicit `"g"` flag) | **Recently Fixed:**