Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 28 additions & 1 deletion crates/bashkit/src/builtins/awk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1889,14 +1889,41 @@ impl AwkInterpreter {
}
let s = self.eval_expr(&args[0]).as_string();
let pattern = self.eval_expr(&args[1]).as_string();
// Extract capture array name from 3rd arg (gawk extension)
let arr_name = if args.len() >= 3 {
if let AwkExpr::Variable(name) = &args[2] {
Some(name.clone())
} else {
None
}
} else {
None
};
if let Ok(re) = Regex::new(&pattern) {
if let Some(m) = re.find(&s) {
if let Some(caps) = re.captures(&s) {
let m = caps.get(0).unwrap();
let rstart = m.start() + 1; // awk is 1-indexed
let rlength = m.end() - m.start();
self.state
.set_variable("RSTART", AwkValue::Number(rstart as f64));
self.state
.set_variable("RLENGTH", AwkValue::Number(rlength as f64));
// Populate capture array if 3rd arg provided
if let Some(ref arr) = arr_name {
// arr[0] = entire match
let full_key = format!("{}[0]", arr);
self.state
.set_variable(&full_key, AwkValue::String(m.as_str().to_string()));
// arr[1..N] = capture groups
for i in 1..caps.len() {
let key = format!("{}[{}]", arr, i);
let val = caps
.get(i)
.map(|c| c.as_str().to_string())
.unwrap_or_default();
self.state.set_variable(&key, AwkValue::String(val));
}
}
AwkValue::Number(rstart as f64)
} else {
self.state.set_variable("RSTART", AwkValue::Number(0.0));
Expand Down
119 changes: 101 additions & 18 deletions crates/bashkit/src/builtins/cuttr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,31 +202,91 @@ impl Builtin for Tr {
}
}

/// Expand a character set specification like "a-z" into a list of characters
/// Expand a character set specification like "a-z" into a list of characters.
/// Supports POSIX character classes: [:lower:], [:upper:], [:digit:], [:alpha:], [:alnum:], [:space:]
fn expand_char_set(spec: &str) -> Vec<char> {
let mut chars = Vec::new();
let mut iter = spec.chars().peekable();

while let Some(c) = iter.next() {
if iter.peek() == Some(&'-') {
iter.next(); // consume '-'
if let Some(&end) = iter.peek() {
iter.next(); // consume end char
// Expand range
let start = c as u32;
let end = end as u32;
for code in start..=end {
if let Some(ch) = char::from_u32(code) {
chars.push(ch);
let mut i = 0;
let bytes = spec.as_bytes();

while i < bytes.len() {
// Check for POSIX character class [:class:]
if bytes[i] == b'[' && i + 1 < bytes.len() && bytes[i + 1] == b':' {
if let Some(end) = spec[i + 2..].find(":]") {
let class_name = &spec[i + 2..i + 2 + end];
match class_name {
"lower" => chars.extend('a'..='z'),
"upper" => chars.extend('A'..='Z'),
"digit" => chars.extend('0'..='9'),
"alpha" => {
chars.extend('a'..='z');
chars.extend('A'..='Z');
}
"alnum" => {
chars.extend('a'..='z');
chars.extend('A'..='Z');
chars.extend('0'..='9');
}
"space" => chars.extend([' ', '\t', '\n', '\r', '\x0b', '\x0c']),
"blank" => chars.extend([' ', '\t']),
"print" | "graph" => {
for code in 0x20u8..=0x7e {
chars.push(code as char);
}
}
_ => {
// Unknown class, treat literally
chars.push('[');
i += 1;
continue;
}
}
} else {
// Trailing dash, treat literally
chars.push(c);
chars.push('-');
i += 2 + end + 2; // skip past [: + class + :]
continue;
}
}

let c = bytes[i] as char;
// Check for range like a-z
if i + 2 < bytes.len() && bytes[i + 1] == b'-' {
let end = bytes[i + 2] as char;
let start = c as u32;
let end = end as u32;
for code in start..=end {
if let Some(ch) = char::from_u32(code) {
chars.push(ch);
}
}
i += 3;
} else if i + 1 == bytes.len() - 1 && bytes[i + 1] == b'-' {
// Trailing dash
chars.push(c);
chars.push('-');
i += 2;
} else {
// Handle escape sequences
if c == '\\' && i + 1 < bytes.len() {
match bytes[i + 1] {
b'n' => {
chars.push('\n');
i += 2;
continue;
}
b't' => {
chars.push('\t');
i += 2;
continue;
}
b'\\' => {
chars.push('\\');
i += 2;
continue;
}
_ => {}
}
}
chars.push(c);
i += 1;
}
}

Expand Down Expand Up @@ -338,6 +398,29 @@ mod tests {
assert_eq!(expand_char_set("0-2"), vec!['0', '1', '2']);
}

#[test]
fn test_expand_char_class_lower() {
let lower = expand_char_set("[:lower:]");
assert_eq!(lower.len(), 26);
assert_eq!(lower[0], 'a');
assert_eq!(lower[25], 'z');
}

#[test]
fn test_expand_char_class_upper() {
let upper = expand_char_set("[:upper:]");
assert_eq!(upper.len(), 26);
assert_eq!(upper[0], 'A');
assert_eq!(upper[25], 'Z');
}

#[tokio::test]
async fn test_tr_char_class_lower_to_upper() {
let result = run_tr(&["[:lower:]", "[:upper:]"], Some("hello world\n")).await;
assert_eq!(result.exit_code, 0);
assert_eq!(result.stdout, "HELLO WORLD\n");
}

#[test]
fn test_parse_field_spec() {
assert_eq!(parse_field_spec("1"), vec![1]);
Expand Down
48 changes: 46 additions & 2 deletions crates/bashkit/src/builtins/grep.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ struct GrepOptions {
count_only: bool,
files_with_matches: bool,
fixed_strings: bool,
extended_regex: bool,
only_matching: bool,
word_regex: bool,
quiet: bool,
Expand Down Expand Up @@ -78,6 +79,7 @@ impl GrepOptions {
count_only: false,
files_with_matches: false,
fixed_strings: false,
extended_regex: false,
only_matching: false,
word_regex: false,
quiet: false,
Expand Down Expand Up @@ -114,8 +116,8 @@ impl GrepOptions {
'o' => opts.only_matching = true,
'w' => opts.word_regex = true,
'F' => opts.fixed_strings = true,
'E' => {} // Extended regex is default
'P' => {} // Perl regex - regex crate supports most Perl features
'E' => opts.extended_regex = true,
'P' => opts.extended_regex = true, // Perl regex implies ERE
'q' => opts.quiet = true,
'x' => opts.whole_line = true,
'H' => opts.show_filename = true,
Expand Down Expand Up @@ -298,6 +300,11 @@ impl GrepOptions {
}
let pat = if self.fixed_strings {
regex::escape(p)
} else if !self.extended_regex {
// BRE mode: convert to ERE for the regex crate
// In BRE: ( ) are literal, \( \) are groups
// In ERE/regex crate: ( ) are groups, \( \) are literal
bre_to_ere(p)
} else {
p.clone()
};
Expand Down Expand Up @@ -335,6 +342,43 @@ impl GrepOptions {
}
}

/// Convert a BRE (Basic Regular Expression) pattern to ERE for the regex crate.
/// In BRE: ( ) { } are literal; \( \) \{ \} \+ \? \| are metacharacters.
/// In ERE/regex crate: ( ) { } + ? | are metacharacters.
fn bre_to_ere(pattern: &str) -> String {
let mut result = String::with_capacity(pattern.len());
let chars: Vec<char> = pattern.chars().collect();
let mut i = 0;

while i < chars.len() {
if chars[i] == '\\' && i + 1 < chars.len() {
match chars[i + 1] {
// BRE escaped metacharacters → ERE unescaped
'(' | ')' | '{' | '}' | '+' | '?' | '|' => {
result.push(chars[i + 1]);
i += 2;
}
// Other escapes pass through
_ => {
result.push('\\');
result.push(chars[i + 1]);
i += 2;
}
}
} else if chars[i] == '(' || chars[i] == ')' || chars[i] == '{' || chars[i] == '}' {
// BRE literal chars → escape them for ERE
result.push('\\');
result.push(chars[i]);
i += 1;
} else {
result.push(chars[i]);
i += 1;
}
}

result
}

#[async_trait]
impl Builtin for Grep {
async fn execute(&self, ctx: Context<'_>) -> Result<ExecResult> {
Expand Down
Loading
Loading