Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 51 additions & 8 deletions crates/bashkit/src/builtins/awk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,50 @@ enum AwkValue {
Uninitialized,
}

/// Format number using AWK's OFMT (%.6g): 6 significant digits, trim trailing zeros.
fn format_awk_number(n: f64) -> String {
if n.is_nan() {
return "nan".to_string();
}
if n.is_infinite() {
return if n > 0.0 { "inf" } else { "-inf" }.to_string();
}
// Integers: no decimal point
if n.fract() == 0.0 && n.abs() < 1e16 {
return format!("{}", n as i64);
}
// %.6g: use 6 significant digits
let abs = n.abs();
let exp = abs.log10().floor() as i32;
if !(-4..6).contains(&exp) {
// Scientific notation: 5 decimal places = 6 sig digits
let mut s = format!("{:.*e}", 5, n);
// Trim trailing zeros in mantissa
if let Some(e_pos) = s.find('e') {
let (mantissa, exp_part) = s.split_at(e_pos);
let trimmed = mantissa.trim_end_matches('0').trim_end_matches('.');
s = format!("{}{}", trimmed, exp_part);
}
// Normalize exponent format: e1 -> e+01 etc. to match C printf
// Actually AWK uses e+06 style. Rust uses e6. Fix:
if let Some(e_pos) = s.find('e') {
let exp_str = &s[e_pos + 1..];
let exp_val: i32 = exp_str.parse().unwrap_or(0);
let mantissa = &s[..e_pos];
s = format!("{}e{:+03}", mantissa, exp_val);
}
s
} else {
// Fixed notation
let decimal_places = (5 - exp).max(0) as usize;
let mut s = format!("{:.*}", decimal_places, n);
if s.contains('.') {
s = s.trim_end_matches('0').trim_end_matches('.').to_string();
}
s
}
}

impl AwkValue {
fn as_number(&self) -> f64 {
match self {
Expand All @@ -118,13 +162,7 @@ impl AwkValue {

fn as_string(&self) -> String {
match self {
AwkValue::Number(n) => {
if n.fract() == 0.0 {
format!("{}", *n as i64)
} else {
format!("{}", n)
}
}
AwkValue::Number(n) => format_awk_number(*n),
AwkValue::String(s) => s.clone(),
AwkValue::Uninitialized => String::new(),
}
Expand Down Expand Up @@ -2340,13 +2378,18 @@ impl AwkInterpreter {
AwkAction::ForIn(var, arr_name, actions) => {
// Collect array keys matching the pattern arr_name[*]
let prefix = format!("{}[", arr_name);
let keys: Vec<String> = self
let mut keys: Vec<String> = self
.state
.variables
.keys()
.filter(|k| k.starts_with(&prefix) && k.ends_with(']'))
.map(|k| k[prefix.len()..k.len() - 1].to_string())
.collect();
// Sort for deterministic iteration: numeric keys first, then lexical
keys.sort_by(|a, b| match (a.parse::<f64>(), b.parse::<f64>()) {
(Ok(na), Ok(nb)) => na.partial_cmp(&nb).unwrap_or(std::cmp::Ordering::Equal),
_ => a.cmp(b),
});

for key in keys {
self.state.set_variable(var, AwkValue::String(key));
Expand Down
13 changes: 13 additions & 0 deletions crates/bashkit/src/builtins/grep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,9 @@ impl Builtin for Grep {
let mut match_count = 0;
let mut file_matched = false;

// Binary detection: content with null bytes, -a and -z not set
let is_binary = !opts.binary_as_text && !opts.null_terminated && content.contains('\0');

// Split on null bytes if -z flag is set, otherwise split on newlines
let lines: Vec<&str> = if opts.null_terminated {
content.split('\0').collect()
Expand Down Expand Up @@ -669,6 +672,16 @@ impl Builtin for Grep {
}

// Now generate output
// Binary file: just report "Binary file X matches" instead of lines
if is_binary && file_matched && !opts.count_only && !opts.files_with_matches {
let display_name = if filename.is_empty() {
"(standard input)"
} else {
filename.as_str()
};
output.push_str(&format!("Binary file {} matches\n", display_name));
continue 'file_loop;
}
if opts.files_with_matches && file_matched {
output.push_str(filename);
output.push('\n');
Expand Down
19 changes: 17 additions & 2 deletions crates/bashkit/tests/spec_cases/awk/awk.test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -418,12 +418,18 @@ printf '0\n' | awk '{print sin($1), cos($1)}'
### end

### awk_exp_log_func
### skip: exp/log function output precision differs
# exp/log use %.6g formatting (6 significant digits)
printf '1\n' | awk '{print exp($1)}'
### expect
2.71828
### end

### awk_log_func
printf '100\n' | awk '{print log($1)}'
### expect
4.60517
### end

### awk_match_func
printf 'hello world\n' | awk '{if (match($0, /wor/)) print RSTART, RLENGTH}'
### expect
Expand Down Expand Up @@ -514,13 +520,22 @@ found
### end

### awk_for_in_array
### skip: for-in array iteration order not deterministic
# for-in iterates keys in sorted order (numeric, then lexical)
printf 'a\n' | awk 'BEGIN {a[1]="x"; a[2]="y"} {for (k in a) print k, a[k]}'
### expect
1 x
2 y
### end

### awk_for_in_string_keys
# for-in with string keys sorts lexically
printf 'a\n' | awk 'BEGIN {a["b"]="2"; a["a"]="1"; a["c"]="3"} {for (k in a) print k, a[k]}'
### expect
a 1
b 2
c 3
### end

### awk_delete_array
printf 'a\n' | awk 'BEGIN {a[1]="x"; delete a[1]} {print (1 in a) ? "yes" : "no"}'
### expect
Expand Down
13 changes: 10 additions & 3 deletions crates/bashkit/tests/spec_cases/grep/grep.test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -348,11 +348,18 @@ printf 'foo123\n' | grep -P 'foo\d+'
foo123
### end

### grep_ignore_binary
### skip: binary file detection not implemented
### grep_binary_detect
# Binary file detection: content with null bytes triggers binary message
printf 'foo\0bar\n' | grep foo
### expect
foo
Binary file (standard input) matches
### end

### grep_binary_with_a_flag
# -a flag treats binary as text, outputs match normally
printf 'foo\0bar\n' | grep -a foo
### expect
foobar
### end

### grep_include_pattern
Expand Down
66 changes: 22 additions & 44 deletions specs/009-implementation-status.md
Original file line number Diff line number Diff line change
Expand Up @@ -107,17 +107,16 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See

## Spec Test Coverage

**Total spec test cases:** 1144 (1090 pass, 54 skip)
**Total spec test cases:** 1105 (1095 pass, 10 skip)

| Category | Cases | In CI | Pass | Skip | Notes |
|----------|-------|-------|------|------|-------|
| Bash (core) | 741 | Yes | 735 | 6 | `bash_spec_tests` in CI |
| AWK | 90 | Yes | 73 | 17 | loops, arrays, -v, ternary, field assign |
| Grep | 82 | Yes | 79 | 3 | now with -z, -r, -a, -b, -H, -h, -f, -P, --include, --exclude |
| Sed | 65 | Yes | 53 | 12 | hold space, change, regex ranges, -E |
| JQ | 108 | Yes | 100 | 8 | reduce, walk, regex funcs, --arg/--argjson, combined flags |
| Python | 58 | Yes | 50 | 8 | **Experimental.** VFS bridging, pathlib, env vars |
| **Total** | **1144** | **Yes** | **1090** | **54** | |
| Bash (core) | 744 | Yes | 739 | 5 | `bash_spec_tests` in CI |
| AWK | 96 | Yes | 96 | 0 | loops, arrays, -v, ternary, field assign, getline, %.6g |
| Grep | 76 | Yes | 76 | 0 | -z, -r, -a, -b, -H, -h, -f, -P, --include, --exclude, binary detect |
| Sed | 75 | Yes | 75 | 0 | hold space, change, regex ranges, -E |
| JQ | 114 | Yes | 109 | 5 | reduce, walk, regex funcs, --arg/--argjson, combined flags, input/inputs, env |
| **Total** | **1105** | **Yes** | **1095** | **10** | |

### Bash Spec Tests Breakdown

Expand Down Expand Up @@ -239,38 +238,24 @@ None currently tracked.
- Array assignment in split: `split($0, arr, ":")` ✅
- Complex regex patterns

**Skipped Tests (15):**
**Skipped Tests: 0** (all AWK tests pass)

| Feature | Count | Notes |
|---------|-------|-------|
| Power operators | 2 | `^`, `**` |
| Printf formats | 4 | `%x`, `%o`, `%c`, width specifier |
| Functions | 3 | `match()`, `gensub()`, `exit` statement |
| Field handling | 2 | `-F'\t'` tab delimiter, missing field returns empty |
| Negation | 1 | `!$1` logical negation operator |
| ~~ORS/getline~~ | ~~2~~ | ✅ Implemented |
| $0 modification | 1 | `$0 = "x y z"` re-splits fields |

**Recently Implemented:**
**Implemented Features:**
- For/while/do-while loops with break/continue
- Postfix/prefix increment/decrement (`i++`, `++i`, `i--`, `--i`)
- Arrays: `arr[key]=val`, `"key" in arr`, `for (k in arr)`, `delete arr[k]`
- Arrays: `arr[key]=val`, `"key" in arr`, `for (k in arr)` (sorted), `delete arr[k]`
- `-v var=value` flag for variable initialization
- Ternary operator `(cond ? a : b)`
- Field assignment `$2 = "X"`
- Field assignment `$2 = "X"`, `$0 = "x y z"` re-splits fields
- `getline` — reads next input record into `$0`
- ORS (output record separator) tests verified
- `next` statement

<!-- Known AWK gaps for LLM compatibility (tracked in docs/compatibility.md) -->
<!-- - Power operators (^ and **) - used in math scripts -->
<!-- - printf %x/%o/%c formats - used in hex/octal output -->
<!-- - match()/gensub() functions - used in text extraction -->
<!-- - exit statement with code - used in error handling -->
<!-- - !$1 negation - used in filtering empty fields -->
<!-- - ORS variable - used in custom output formatting -->
<!-- - getline - used in multi-file processing -->
<!-- - $0 modification with field re-splitting -->
- ORS (output record separator)
- `next`, `exit` with code
- Power operators `^`, `**`
- Printf formats: `%x`, `%o`, `%c`, width specifier
- `match()` (RSTART/RLENGTH), `gensub()`, `sub()`, `gsub()`
- `!$1` logical negation, `-F'\t'` tab delimiter
- `%.6g` number formatting (OFMT-compatible)
- Deterministic `for-in` iteration (sorted keys)

### Sed Limitations

Expand All @@ -292,13 +277,7 @@ None currently tracked.

### Grep Limitations

**Skipped Tests (3):**

| Feature | Count | Notes |
|---------|-------|-------|
| Recursive test | 1 | Test needs VFS setup with files |
| Pattern file `-f` | 1 | Requires file redirection support |
| Binary detection | 1 | Auto-detect binary files |
**Skipped Tests: 0** (all grep tests pass)

**Implemented Features:**
- Basic flags: `-i`, `-v`, `-c`, `-n`, `-o`, `-l`, `-w`, `-E`, `-F`, `-q`, `-m`, `-x`
Expand All @@ -310,19 +289,18 @@ None currently tracked.
- Byte offset: `-b`
- Null-terminated: `-z` (split on `\0` instead of `\n`)
- Recursive: `-r`/`-R` (uses VFS read_dir)
- Binary handling: `-a` (filter null bytes)
- Binary handling: `-a` (filter null bytes), auto-detect binary (null byte → "Binary file ... matches")
- Perl regex: `-P` (regex crate supports PCRE features)
- No-op flags: `--color`, `--line-buffered`

### JQ Limitations

**Skipped Tests (8):**
**Skipped Tests (5):**

| Feature | Count | Notes |
|---------|-------|-------|
| Alternative `//` | 1 | jaq errors on `.foo` applied to null instead of returning null |
| Path functions | 2 | `setpath`, `leaf_paths` not in jaq standard library |
| ~~I/O functions~~ | ~~3~~ | ✅ `input`, `inputs`, `env` all implemented |
| Regex functions | 2 | `match` (jaq omits capture `name` field), `scan` (jaq needs explicit `"g"` flag) |

**Recently Fixed:**
Expand Down
Loading