diff --git a/crates/bashkit/src/builtins/cat.rs b/crates/bashkit/src/builtins/cat.rs index a1076403..5fa789e4 100644 --- a/crates/bashkit/src/builtins/cat.rs +++ b/crates/bashkit/src/builtins/cat.rs @@ -14,40 +14,93 @@ pub struct Cat; impl Builtin for Cat { async fn execute(&self, ctx: Context<'_>) -> Result { let mut output = String::new(); + let mut show_nonprinting = false; + let mut number_lines = false; + let mut files: Vec<&str> = Vec::new(); - // If no arguments and stdin is provided, output stdin - if ctx.args.is_empty() { + // Parse flags + for arg in ctx.args { + if arg.starts_with('-') && arg.len() > 1 && !arg.starts_with("--") { + for ch in arg[1..].chars() { + match ch { + 'v' => show_nonprinting = true, + 'n' => number_lines = true, + 'e' => show_nonprinting = true, // -e implies -v + show $ at EOL (simplified) + 't' => show_nonprinting = true, // -t implies -v + show ^I for tabs (simplified) + _ => {} + } + } + } else { + files.push(arg); + } + } + + let mut raw = String::new(); + + if files.is_empty() { if let Some(stdin) = ctx.stdin { - output.push_str(stdin); + raw.push_str(stdin); } } else { - // Read files - for arg in ctx.args { - // Handle - as stdin - if arg == "-" { + for file in &files { + if *file == "-" { if let Some(stdin) = ctx.stdin { - output.push_str(stdin); + raw.push_str(stdin); } } else { - let path = if Path::new(arg).is_absolute() { - arg.to_string() + let path = if Path::new(file).is_absolute() { + file.to_string() } else { - ctx.cwd.join(arg).to_string_lossy().to_string() + ctx.cwd.join(file).to_string_lossy().to_string() }; match ctx.fs.read_file(Path::new(&path)).await { Ok(content) => { let text = String::from_utf8_lossy(&content); - output.push_str(&text); + raw.push_str(&text); } Err(e) => { - return Ok(ExecResult::err(format!("cat: {}: {}\n", arg, e), 1)); + return Ok(ExecResult::err(format!("cat: {}: {}\n", file, e), 1)); } } } } } + if show_nonprinting { + for ch in raw.chars() { + match ch { + '\n' | '\t' => output.push(ch), // pass through newline and tab + c if (c as u32) < 32 => { + // Control characters: ^@, ^A, ..., ^Z, ^[, ^\, ^], ^^, ^_ + output.push('^'); + output.push((c as u8 + 64) as char); + } + '\x7f' => { + output.push('^'); + output.push('?'); + } + c => output.push(c), + } + } + } else { + output = raw; + } + + if number_lines { + let lines: Vec<&str> = output.split('\n').collect(); + let mut numbered = String::new(); + for (i, line) in lines.iter().enumerate() { + if i < lines.len() - 1 || !line.is_empty() { + numbered.push_str(&format!(" {}\t{}", i + 1, line)); + if i < lines.len() - 1 { + numbered.push('\n'); + } + } + } + output = numbered; + } + Ok(ExecResult::ok(output)) } } diff --git a/crates/bashkit/src/builtins/date.rs b/crates/bashkit/src/builtins/date.rs index 9719bfd7..ac73fcee 100644 --- a/crates/bashkit/src/builtins/date.rs +++ b/crates/bashkit/src/builtins/date.rs @@ -343,8 +343,13 @@ impl Builtin for Date { } let default_format = "%a %b %e %H:%M:%S %Z %Y".to_string(); + let format_owned; let format = match &format_arg { - Some(fmt) => &fmt[1..], // Strip leading '+' + Some(fmt) => { + let without_plus = &fmt[1..]; // Strip leading '+' + format_owned = strip_surrounding_quotes(without_plus).to_string(); + &format_owned + } None => &default_format, }; diff --git a/crates/bashkit/src/builtins/sortuniq.rs b/crates/bashkit/src/builtins/sortuniq.rs index dfddd085..43398c3d 100644 --- a/crates/bashkit/src/builtins/sortuniq.rs +++ b/crates/bashkit/src/builtins/sortuniq.rs @@ -92,6 +92,7 @@ impl Builtin for Sort { let mut check_sorted = false; let mut human_numeric = false; let mut month_sort = false; + let mut merge = false; let mut delimiter: Option = None; let mut key_field: Option = None; let mut output_file: Option = None; @@ -148,6 +149,7 @@ impl Builtin for Sort { 'c' | 'C' => check_sorted = true, 'h' => human_numeric = true, 'M' => month_sort = true, + 'm' => merge = true, 'z' => zero_terminated = true, _ => {} } @@ -195,6 +197,62 @@ impl Builtin for Sort { } } + // Merge mode: k-way merge of pre-sorted inputs + if merge && !files.is_empty() { + let mut streams: Vec> = Vec::new(); + for file in &files { + let path = if file.starts_with('/') { + std::path::PathBuf::from(file) + } else { + ctx.cwd.join(file) + }; + match ctx.fs.read_file(&path).await { + Ok(content) => { + let text = String::from_utf8_lossy(&content); + let lines: Vec = text + .split(line_sep) + .filter(|l| !l.is_empty()) + .map(|l| l.to_string()) + .collect(); + streams.push(lines); + } + Err(e) => { + return Ok(ExecResult::err(format!("sort: {}: {}\n", file, e), 1)); + } + } + } + // k-way merge using indices + let mut indices: Vec = vec![0; streams.len()]; + let mut merged = Vec::new(); + loop { + let mut best: Option<(usize, &str)> = None; + for (i, stream) in streams.iter().enumerate() { + if indices[i] < stream.len() { + let line = &stream[indices[i]]; + if let Some((_, best_line)) = best { + if line.as_str() < best_line { + best = Some((i, line)); + } + } else { + best = Some((i, line)); + } + } + } + if let Some((i, line)) = best { + merged.push(line.to_string()); + indices[i] += 1; + } else { + break; + } + } + let sep = if zero_terminated { "\0" } else { "\n" }; + let mut output = merged.join(sep); + if !output.is_empty() { + output.push_str(sep); + } + return Ok(ExecResult::ok(output)); + } + // Check sorted mode if check_sorted { for i in 1..all_lines.len() { diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index 610cfa7c..7663ff07 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -4697,6 +4697,11 @@ impl Interpreter { let suffix: String = chars[end + 1..].iter().collect(); let brace_content: String = chars[start + 1..end].iter().collect(); + // Brace content with leading/trailing space is not expanded + if brace_content.starts_with(' ') || brace_content.ends_with(' ') { + return vec![s.to_string()]; + } + // Check for range expansion like {1..5} or {a..z} if let Some(range_result) = self.try_expand_range(&brace_content) { let mut results = Vec::new(); diff --git a/crates/bashkit/src/parser/lexer.rs b/crates/bashkit/src/parser/lexer.rs index 9db64b07..6a1966ad 100644 --- a/crates/bashkit/src/parser/lexer.rs +++ b/crates/bashkit/src/parser/lexer.rs @@ -306,24 +306,40 @@ impl<'a> Lexer<'a> { // Handle quoted strings within words (e.g., a="Hello" or VAR="value") // This handles the case where a word like `a=` is followed by a quoted string if ch == '"' || ch == '\'' { - // Check if this is a quoted value in an assignment (word ends with = or +=) - if word.ends_with('=') || word.ends_with("+=") { - // Include the quoted string as part of this word - let quote_char = ch; - word.push(ch); - self.advance(); - while let Some(c) = self.peek_char() { - word.push(c); + if word.is_empty() { + // Start of a new token — let the main tokenizer handle quotes + break; + } + // Word already has content — concatenate the quoted segment + // This handles: VAR="val", date +"%Y", echo foo"bar" + let quote_char = ch; + self.advance(); // consume opening quote + while let Some(c) = self.peek_char() { + if c == quote_char { + self.advance(); // consume closing quote + break; + } + if c == '\\' && quote_char == '"' { self.advance(); - if c == quote_char && !word.ends_with(&format!("\\{}", quote_char)) { - break; + if let Some(next) = self.peek_char() { + match next { + '"' | '\\' | '$' | '`' => { + word.push(next); + self.advance(); + } + _ => { + word.push('\\'); + word.push(next); + self.advance(); + } + } + continue; } } - continue; - } else { - // Not after =, so this is a separate quoted token - break; + word.push(c); + self.advance(); } + continue; } else if ch == '$' { // Handle variable references and command substitution word.push(ch); diff --git a/crates/bashkit/src/parser/mod.rs b/crates/bashkit/src/parser/mod.rs index d2daa451..d7154f6c 100644 --- a/crates/bashkit/src/parser/mod.rs +++ b/crates/bashkit/src/parser/mod.rs @@ -1695,6 +1695,18 @@ impl<'a> Parser<'a> { target: Word::literal(dst_fd.to_string()), }); } + // { and } as arguments (not in command position) are literal words + Some(tokens::Token::LeftBrace) | Some(tokens::Token::RightBrace) + if !words.is_empty() => + { + let sym = if matches!(self.current_token, Some(tokens::Token::LeftBrace)) { + "{" + } else { + "}" + }; + words.push(Word::literal(sym)); + self.advance(); + } Some(tokens::Token::Newline) | Some(tokens::Token::Semicolon) | Some(tokens::Token::Pipe) diff --git a/crates/bashkit/tests/spec_cases/bash/date.test.sh b/crates/bashkit/tests/spec_cases/bash/date.test.sh index 3a14d69a..9986b600 100644 --- a/crates/bashkit/tests/spec_cases/bash/date.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/date.test.sh @@ -120,7 +120,6 @@ valid ### end ### date_combined_format -### skip: quoted format string not handling space correctly # Multiple format specifiers date +"%Y-%m-%d %H:%M:%S" | grep -qE '^[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}$' && echo "valid" ### expect diff --git a/crates/bashkit/tests/spec_cases/bash/echo.test.sh b/crates/bashkit/tests/spec_cases/bash/echo.test.sh index fb6548bd..5f3f72f7 100644 --- a/crates/bashkit/tests/spec_cases/bash/echo.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/echo.test.sh @@ -70,7 +70,6 @@ hello world ### end ### echo_escape_r -### skip: carriage return display varies by terminal, test verifies CR is processed # Echo with carriage return - raw output contains CR character echo -e "hello\rworld" | cat -v | grep -q 'hello.*world' && echo "valid" ### expect diff --git a/crates/bashkit/tests/spec_cases/bash/negative-tests.test.sh b/crates/bashkit/tests/spec_cases/bash/negative-tests.test.sh index 85de2a87..e57bcb6f 100644 --- a/crates/bashkit/tests/spec_cases/bash/negative-tests.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/negative-tests.test.sh @@ -1,9 +1,8 @@ ### neg_array_indices_empty -### skip: empty array indices expansion outputs extra newline # Empty array has no indices -arr=(); echo "${!arr[@]}" +arr=(); echo ">${!arr[@]}<" ### expect - +>< ### end ### neg_test_nonexistent_file @@ -29,7 +28,6 @@ echo {item} ### end ### neg_brace_no_expand_space -### skip: brace with space parsing issue # Brace with space doesn't expand echo { a,b,c } ### expect diff --git a/crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh b/crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh index 34c3cd43..a85842b9 100644 --- a/crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/sortuniq.test.sh @@ -155,7 +155,6 @@ echo $? ### end ### sort_merge -### skip: sort -m (merge) not implemented printf 'a\nc\n' > /tmp/f1 && printf 'b\nd\n' > /tmp/f2 && sort -m /tmp/f1 /tmp/f2 ### expect a diff --git a/specs/005-builtins.md b/specs/005-builtins.md index fa63425c..0342a929 100644 --- a/specs/005-builtins.md +++ b/specs/005-builtins.md @@ -76,7 +76,7 @@ execution → $PATH search → "command not found". - `chmod` - Change permissions (octal mode) #### Text Processing -- `cat` - Concatenate files +- `cat` - Concatenate files (`-v`, `-n`, `-e`, `-t`) - `nl` - Number lines (`-b`, `-n`, `-s`, `-i`, `-v`, `-w`) - `head`, `tail` - First/last N lines - `grep` - Pattern matching (`-i`, `-v`, `-c`, `-n`, `-o`, `-l`, `-w`, `-E`, `-F`, `-P`, `-q`, `-m`, `-x`, `-A`, `-B`, `-C`, `-e`, `-f`, `-H`, `-h`, `-b`, `-a`, `-z`, `-r`) diff --git a/specs/009-implementation-status.md b/specs/009-implementation-status.md index 75bf1f77..8f281fe9 100644 --- a/specs/009-implementation-status.md +++ b/specs/009-implementation-status.md @@ -107,17 +107,17 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See ## Spec Test Coverage -**Total spec test cases:** 1096 (1047 pass, 49 skip) +**Total spec test cases:** 1087 (1034 pass, 53 skip) | Category | Cases | In CI | Pass | Skip | Notes | |----------|-------|-------|------|------|-------| -| Bash (core) | 684 | Yes | 674 | 10 | `bash_spec_tests` in CI | +| Bash (core) | 684 | Yes | 679 | 5 | `bash_spec_tests` in CI | | AWK | 90 | Yes | 73 | 17 | loops, arrays, -v, ternary, field assign | | Grep | 82 | Yes | 79 | 3 | now with -z, -r, -a, -b, -H, -h, -f, -P, --include, --exclude | | Sed | 65 | Yes | 53 | 12 | hold space, change, regex ranges, -E | | JQ | 108 | Yes | 100 | 8 | reduce, walk, regex funcs, --arg/--argjson, combined flags | | Python | 58 | Yes | 50 | 8 | **Experimental.** VFS bridging, pathlib, env vars | -| **Total** | **1087** | **Yes** | **1029** | **58** | | +| **Total** | **1087** | **Yes** | **1034** | **53** | | ### Bash Spec Tests Breakdown @@ -135,9 +135,9 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See | command-subst.test.sh | 14 | includes backtick substitution (1 skipped) | | control-flow.test.sh | 32 | if/elif/else, for, while, case | | cuttr.test.sh | 32 | cut and tr commands, `-z` zero-terminated | -| date.test.sh | 38 | format specifiers, `-d` relative/compound/epoch, `-R`, `-I`, `%N` (3 skipped) | +| date.test.sh | 38 | format specifiers, `-d` relative/compound/epoch, `-R`, `-I`, `%N` (2 skipped) | | diff.test.sh | 4 | line diffs | -| echo.test.sh | 24 | escape sequences (1 skipped) | +| echo.test.sh | 24 | escape sequences | | errexit.test.sh | 8 | set -e tests | | fileops.test.sh | 21 | | | find.test.sh | 10 | file search | @@ -147,7 +147,7 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See | headtail.test.sh | 14 | | | herestring.test.sh | 8 | 1 skipped | | hextools.test.sh | 5 | od/xxd/hexdump (3 skipped) | -| negative-tests.test.sh | 13 | error conditions (2 skipped) | +| negative-tests.test.sh | 13 | error conditions | | nl.test.sh | 14 | line numbering | | nounset.test.sh | 7 | `set -u` unbound variable checks, `${var:-default}` nounset-aware | | paste.test.sh | 4 | line merging with `-s` serial and `-d` delimiter | @@ -156,7 +156,7 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See | printf.test.sh | 24 | format specifiers, array expansion | | procsub.test.sh | 6 | | | sleep.test.sh | 6 | | -| sortuniq.test.sh | 32 | sort and uniq, `-z` zero-terminated (1 skipped) | +| sortuniq.test.sh | 32 | sort and uniq, `-z` zero-terminated, `-m` merge | | source.test.sh | 21 | source/., function loading, PATH search, positional params | | test-operators.test.sh | 17 | file/string tests | | time.test.sh | 11 | Wall-clock only (user/sys always 0) |