diff --git a/crates/bashkit/src/interpreter/mod.rs b/crates/bashkit/src/interpreter/mod.rs index 1525654b..78eb2e01 100644 --- a/crates/bashkit/src/interpreter/mod.rs +++ b/crates/bashkit/src/interpreter/mod.rs @@ -53,6 +53,33 @@ use fail::fail_point; /// This is handled at the interpreter level to prevent custom filesystems from bypassing it. const DEV_NULL: &str = "/dev/null"; +/// Check if a name is a shell keyword (for `command -v`/`command -V`). +fn is_keyword(name: &str) -> bool { + matches!( + name, + "if" | "then" + | "else" + | "elif" + | "fi" + | "for" + | "while" + | "until" + | "do" + | "done" + | "case" + | "esac" + | "in" + | "function" + | "select" + | "time" + | "{" + | "}" + | "[[" + | "]]" + | "!" + ) +} + /// Check if a path refers to /dev/null after normalization. /// Handles attempts to bypass via paths like `/dev/../dev/null`. fn is_dev_null(path: &Path) -> bool { @@ -138,6 +165,8 @@ pub struct Interpreter { /// Monotonic counter incremented each time output is emitted via callback. /// Used to detect whether sub-calls already emitted output, preventing duplicates. output_emit_count: u64, + /// Pending nounset (set -u) error message, consumed by execute_command. + nounset_error: Option, } impl Interpreter { @@ -301,6 +330,7 @@ impl Interpreter { pipeline_stdin: None, output_callback: None, output_emit_count: 0, + nounset_error: None, } } @@ -418,6 +448,11 @@ impl Interpreter { exit_code = result.exit_code; self.last_exit_code = exit_code; + // Stop on control flow (e.g. nounset error uses Return to abort) + if result.control_flow != ControlFlow::None { + break; + } + // NOTE: errexit (set -e) is handled internally by execute_command, // execute_list, and execute_command_sequence. We don't check here // because those methods handle the nuances of && / || chains, @@ -447,7 +482,7 @@ impl Interpreter { CompoundCommand::Case(cmd) => cmd.span.line(), CompoundCommand::Time(cmd) => cmd.span.line(), CompoundCommand::Subshell(_) | CompoundCommand::BraceGroup(_) => 1, - CompoundCommand::Arithmetic(_) => 1, + CompoundCommand::Arithmetic(_) | CompoundCommand::Conditional(_) => 1, }, Command::Function(c) => c.span.line(), } @@ -526,6 +561,7 @@ impl Interpreter { CompoundCommand::Case(case_cmd) => self.execute_case(case_cmd).await, CompoundCommand::Arithmetic(expr) => self.execute_arithmetic_command(expr).await, CompoundCommand::Time(time_cmd) => self.execute_time(time_cmd).await, + CompoundCommand::Conditional(words) => self.execute_conditional(words).await, } } @@ -776,6 +812,164 @@ impl Interpreter { /// Execute an arithmetic command ((expression)) /// Returns exit code 0 if result is non-zero, 1 if result is zero + /// Execute a [[ conditional expression ]] + async fn execute_conditional(&mut self, words: &[Word]) -> Result { + // Expand all words + let mut expanded = Vec::new(); + for word in words { + expanded.push(self.expand_word(word).await?); + } + + let result = self.evaluate_conditional(&expanded).await; + let exit_code = if result { 0 } else { 1 }; + self.last_exit_code = exit_code; + + Ok(ExecResult { + stdout: String::new(), + stderr: String::new(), + exit_code, + control_flow: ControlFlow::None, + }) + } + + /// Evaluate a [[ ]] conditional expression from expanded words. + fn evaluate_conditional<'a>( + &'a mut self, + args: &'a [String], + ) -> std::pin::Pin + Send + 'a>> { + Box::pin(async move { + if args.is_empty() { + return false; + } + + // Handle negation + if args[0] == "!" { + return !self.evaluate_conditional(&args[1..]).await; + } + + // Handle parentheses + if args.first().map(|s| s.as_str()) == Some("(") + && args.last().map(|s| s.as_str()) == Some(")") + { + return self.evaluate_conditional(&args[1..args.len() - 1]).await; + } + + // Look for logical operators (lowest precedence, right to left) + for i in (0..args.len()).rev() { + if args[i] == "&&" && i > 0 { + return self.evaluate_conditional(&args[..i]).await + && self.evaluate_conditional(&args[i + 1..]).await; + } + } + for i in (0..args.len()).rev() { + if args[i] == "||" && i > 0 { + return self.evaluate_conditional(&args[..i]).await + || self.evaluate_conditional(&args[i + 1..]).await; + } + } + + match args.len() { + 1 => !args[0].is_empty(), + 2 => { + // Unary operators + match args[0].as_str() { + "-z" => args[1].is_empty(), + "-n" => !args[1].is_empty(), + "-e" | "-a" => self + .fs + .exists(std::path::Path::new(&args[1])) + .await + .unwrap_or(false), + "-f" => self + .fs + .stat(std::path::Path::new(&args[1])) + .await + .map(|m| m.file_type.is_file()) + .unwrap_or(false), + "-d" => self + .fs + .stat(std::path::Path::new(&args[1])) + .await + .map(|m| m.file_type.is_dir()) + .unwrap_or(false), + "-r" | "-w" | "-x" => self + .fs + .exists(std::path::Path::new(&args[1])) + .await + .unwrap_or(false), + "-s" => self + .fs + .stat(std::path::Path::new(&args[1])) + .await + .map(|m| m.size > 0) + .unwrap_or(false), + _ => !args[0].is_empty(), + } + } + 3 => { + // Binary operators + match args[1].as_str() { + "=" | "==" => args[0] == args[2], + "!=" => args[0] != args[2], + "<" => args[0] < args[2], + ">" => args[0] > args[2], + "-eq" => { + args[0].parse::().unwrap_or(0) + == args[2].parse::().unwrap_or(0) + } + "-ne" => { + args[0].parse::().unwrap_or(0) + != args[2].parse::().unwrap_or(0) + } + "-lt" => { + args[0].parse::().unwrap_or(0) + < args[2].parse::().unwrap_or(0) + } + "-le" => { + args[0].parse::().unwrap_or(0) + <= args[2].parse::().unwrap_or(0) + } + "-gt" => { + args[0].parse::().unwrap_or(0) + > args[2].parse::().unwrap_or(0) + } + "-ge" => { + args[0].parse::().unwrap_or(0) + >= args[2].parse::().unwrap_or(0) + } + "=~" => self.regex_match(&args[0], &args[2]), + _ => false, + } + } + _ => false, + } + }) + } + + /// Perform regex match and set BASH_REMATCH array. + fn regex_match(&mut self, string: &str, pattern: &str) -> bool { + match regex::Regex::new(pattern) { + Ok(re) => { + if let Some(captures) = re.captures(string) { + // Set BASH_REMATCH array + let mut rematch = HashMap::new(); + for (i, m) in captures.iter().enumerate() { + rematch.insert(i, m.map(|m| m.as_str().to_string()).unwrap_or_default()); + } + self.arrays.insert("BASH_REMATCH".to_string(), rematch); + true + } else { + self.arrays.remove("BASH_REMATCH"); + false + } + } + Err(_) => { + self.arrays.remove("BASH_REMATCH"); + false + } + } + } + async fn execute_arithmetic_command(&mut self, expr: &str) -> Result { let result = self.execute_arithmetic_with_side_effects(expr); let exit_code = if result != 0 { 0 } else { 1 }; @@ -1948,6 +2142,28 @@ impl Interpreter { let name = self.expand_word(&command.name).await?; + // Check for nounset error from variable expansion + if let Some(err_msg) = self.nounset_error.take() { + // Restore variable saves since we're aborting + for (name, old) in var_saves.into_iter().rev() { + match old { + Some(v) => { + self.variables.insert(name, v); + } + None => { + self.variables.remove(&name); + } + } + } + self.last_exit_code = 1; + return Ok(ExecResult { + stdout: String::new(), + stderr: err_msg, + exit_code: 1, + control_flow: ControlFlow::Return(1), + }); + } + // If name is empty, this is an assignment-only command - keep permanently. // Preserve last_exit_code from any command substitution in the value // (bash behavior: `x=$(false)` sets $? to 1). @@ -2047,6 +2263,17 @@ impl Interpreter { } } + // Check for nounset error from argument expansion + if let Some(err_msg) = self.nounset_error.take() { + self.last_exit_code = 1; + return Ok(ExecResult { + stdout: String::new(), + stderr: err_msg, + exit_code: 1, + control_flow: ControlFlow::Return(1), + }); + } + // Handle input redirections first let stdin = self .process_input_redirections(stdin, &command.redirects) @@ -2161,6 +2388,18 @@ impl Interpreter { return self.execute_eval(&args, stdin, &command.redirects).await; } + // Handle `command` builtin - needs interpreter-level access to builtins/functions + if name == "command" { + return self + .execute_command_builtin(&args, stdin, &command.redirects) + .await; + } + + // Handle `getopts` builtin - needs to read/write shell variables (OPTIND, OPTARG) + if name == "getopts" { + return self.execute_getopts(&args, &command.redirects).await; + } + // Check for builtins if let Some(builtin) = self.builtins.get(name) { let ctx = builtins::Context { @@ -2538,6 +2777,300 @@ impl Interpreter { Ok(result) } + /// Execute the `getopts` builtin (POSIX option parsing). + /// + /// Usage: `getopts optstring name [args...]` + /// + /// Parses options from positional params (or `args`). + /// Uses/updates `OPTIND` variable for tracking position. + /// Sets `name` variable to the found option letter. + /// Sets `OPTARG` for options that take arguments (marked with `:` in optstring). + /// Returns 0 while options remain, 1 when done. + async fn execute_getopts( + &mut self, + args: &[String], + redirects: &[Redirect], + ) -> Result { + if args.len() < 2 { + let result = ExecResult::err("getopts: usage: getopts optstring name [arg ...]\n", 2); + return Ok(result); + } + + let optstring = &args[0]; + let varname = &args[1]; + + // Get the arguments to parse (remaining args, or positional params) + let parse_args: Vec = if args.len() > 2 { + args[2..].to_vec() + } else { + // Use positional parameters $1, $2, ... + self.call_stack + .last() + .map(|frame| frame.positional.clone()) + .unwrap_or_default() + }; + + // Get current OPTIND (1-based index into args) + let optind: usize = self + .variables + .get("OPTIND") + .and_then(|v| v.parse().ok()) + .unwrap_or(1); + + // Check if we're past the end + if optind < 1 || optind > parse_args.len() { + self.variables.insert(varname.clone(), "?".to_string()); + return Ok(ExecResult { + stdout: String::new(), + stderr: String::new(), + exit_code: 1, + control_flow: crate::interpreter::ControlFlow::None, + }); + } + + let current_arg = &parse_args[optind - 1]; + + // Check if this is an option (starts with -) + if !current_arg.starts_with('-') || current_arg == "-" || current_arg == "--" { + self.variables.insert(varname.clone(), "?".to_string()); + if current_arg == "--" { + self.variables + .insert("OPTIND".to_string(), (optind + 1).to_string()); + } + return Ok(ExecResult { + stdout: String::new(), + stderr: String::new(), + exit_code: 1, + control_flow: crate::interpreter::ControlFlow::None, + }); + } + + // Parse the option character(s) from current arg + // Handle multi-char option groups like -abc + let opt_chars: Vec = current_arg[1..].chars().collect(); + + // Track position within the current argument for multi-char options + let char_idx: usize = self + .variables + .get("_OPTCHAR_IDX") + .and_then(|v| v.parse().ok()) + .unwrap_or(0); + + if char_idx >= opt_chars.len() { + // Should not happen, but advance + self.variables + .insert("OPTIND".to_string(), (optind + 1).to_string()); + self.variables.remove("_OPTCHAR_IDX"); + self.variables.insert(varname.clone(), "?".to_string()); + return Ok(ExecResult { + stdout: String::new(), + stderr: String::new(), + exit_code: 1, + control_flow: crate::interpreter::ControlFlow::None, + }); + } + + let opt_char = opt_chars[char_idx]; + let silent = optstring.starts_with(':'); + let spec = if silent { &optstring[1..] } else { optstring }; + + // Check if this option is in the optstring + if let Some(pos) = spec.find(opt_char) { + let needs_arg = spec.get(pos + 1..pos + 2) == Some(":"); + self.variables.insert(varname.clone(), opt_char.to_string()); + + if needs_arg { + // Option needs an argument + if char_idx + 1 < opt_chars.len() { + // Rest of current arg is the argument + let arg_val: String = opt_chars[char_idx + 1..].iter().collect(); + self.variables.insert("OPTARG".to_string(), arg_val); + self.variables + .insert("OPTIND".to_string(), (optind + 1).to_string()); + self.variables.remove("_OPTCHAR_IDX"); + } else if optind < parse_args.len() { + // Next arg is the argument + self.variables + .insert("OPTARG".to_string(), parse_args[optind].clone()); + self.variables + .insert("OPTIND".to_string(), (optind + 2).to_string()); + self.variables.remove("_OPTCHAR_IDX"); + } else { + // Missing argument + self.variables.remove("OPTARG"); + self.variables + .insert("OPTIND".to_string(), (optind + 1).to_string()); + self.variables.remove("_OPTCHAR_IDX"); + if silent { + self.variables.insert(varname.clone(), ":".to_string()); + self.variables + .insert("OPTARG".to_string(), opt_char.to_string()); + } else { + self.variables.insert(varname.clone(), "?".to_string()); + let mut result = ExecResult::ok(String::new()); + result.stderr = format!( + "bash: getopts: option requires an argument -- '{}'\n", + opt_char + ); + result = self.apply_redirections(result, redirects).await?; + return Ok(result); + } + } + } else { + // No argument needed + self.variables.remove("OPTARG"); + if char_idx + 1 < opt_chars.len() { + // More chars in this arg + self.variables + .insert("_OPTCHAR_IDX".to_string(), (char_idx + 1).to_string()); + } else { + // Move to next arg + self.variables + .insert("OPTIND".to_string(), (optind + 1).to_string()); + self.variables.remove("_OPTCHAR_IDX"); + } + } + } else { + // Unknown option + self.variables.remove("OPTARG"); + if char_idx + 1 < opt_chars.len() { + self.variables + .insert("_OPTCHAR_IDX".to_string(), (char_idx + 1).to_string()); + } else { + self.variables + .insert("OPTIND".to_string(), (optind + 1).to_string()); + self.variables.remove("_OPTCHAR_IDX"); + } + + if silent { + self.variables.insert(varname.clone(), "?".to_string()); + self.variables + .insert("OPTARG".to_string(), opt_char.to_string()); + } else { + self.variables.insert(varname.clone(), "?".to_string()); + let mut result = ExecResult::ok(String::new()); + result.stderr = format!("bash: getopts: illegal option -- '{}'\n", opt_char); + result = self.apply_redirections(result, redirects).await?; + return Ok(result); + } + } + + let mut result = ExecResult::ok(String::new()); + result = self.apply_redirections(result, redirects).await?; + Ok(result) + } + + /// Execute the `command` builtin. + /// + /// - `command -v name` — print command path/name if found (exit 0) or nothing (exit 1) + /// - `command -V name` — verbose: describe what `name` is + /// - `command name args...` — run `name` bypassing shell functions + async fn execute_command_builtin( + &mut self, + args: &[String], + _stdin: Option, + redirects: &[Redirect], + ) -> Result { + if args.is_empty() { + return Ok(ExecResult::ok(String::new())); + } + + let mut mode = ' '; // default: run the command + let mut cmd_args_start = 0; + + // Parse flags + let mut i = 0; + while i < args.len() { + let arg = &args[i]; + if arg == "-v" { + mode = 'v'; + i += 1; + } else if arg == "-V" { + mode = 'V'; + i += 1; + } else if arg == "-p" { + // -p: use default PATH (ignore in sandboxed env) + i += 1; + } else { + cmd_args_start = i; + break; + } + } + + if cmd_args_start >= args.len() { + return Ok(ExecResult::ok(String::new())); + } + + let cmd_name = &args[cmd_args_start]; + + match mode { + 'v' => { + // command -v: print name if it's a known command + let found = self.builtins.contains_key(cmd_name.as_str()) + || self.functions.contains_key(cmd_name.as_str()) + || is_keyword(cmd_name); + let mut result = if found { + ExecResult::ok(format!("{}\n", cmd_name)) + } else { + ExecResult { + stdout: String::new(), + stderr: String::new(), + exit_code: 1, + control_flow: crate::interpreter::ControlFlow::None, + } + }; + result = self.apply_redirections(result, redirects).await?; + Ok(result) + } + 'V' => { + // command -V: verbose description + let description = if self.functions.contains_key(cmd_name.as_str()) { + format!("{} is a function\n", cmd_name) + } else if self.builtins.contains_key(cmd_name.as_str()) { + format!("{} is a shell builtin\n", cmd_name) + } else if is_keyword(cmd_name) { + format!("{} is a shell keyword\n", cmd_name) + } else { + return Ok(ExecResult::err( + format!("bash: command: {}: not found\n", cmd_name), + 1, + )); + }; + let mut result = ExecResult::ok(description); + result = self.apply_redirections(result, redirects).await?; + Ok(result) + } + _ => { + // command name args...: run bypassing functions (use builtin only) + // Build a synthetic simple command and execute it, skipping function lookup + let remaining = &args[cmd_args_start..]; + if let Some(builtin) = self.builtins.get(remaining[0].as_str()) { + let builtin_args = &remaining[1..]; + let ctx = builtins::Context { + args: builtin_args, + env: &self.env, + variables: &mut self.variables, + cwd: &mut self.cwd, + fs: Arc::clone(&self.fs), + stdin: _stdin.as_deref(), + #[cfg(feature = "http_client")] + http_client: self.http_client.as_ref(), + #[cfg(feature = "git")] + git_client: self.git_client.as_ref(), + }; + let mut result = builtin.execute(ctx).await?; + result = self.apply_redirections(result, redirects).await?; + Ok(result) + } else { + Ok(ExecResult::err( + format!("bash: {}: command not found\n", remaining[0]), + 127, + )) + } + } + } + } + /// Process input redirections (< file, <<< string) async fn process_input_redirections( &mut self, @@ -2764,6 +3297,10 @@ impl Interpreter { } } WordPart::Variable(name) => { + // set -u (nounset): error on unset variables + if self.is_nounset() && !self.is_variable_set(name) { + self.nounset_error = Some(format!("bash: {}: unbound variable\n", name)); + } result.push_str(&self.expand_variable(name)); } WordPart::CommandSubstitution(commands) => { @@ -3725,6 +4262,48 @@ impl Interpreter { String::new() } + /// Check if a variable is set (for `set -u` / nounset). + fn is_variable_set(&self, name: &str) -> bool { + // Special variables are always "set" + if matches!( + name, + "?" | "#" | "@" | "*" | "$" | "!" | "-" | "RANDOM" | "LINENO" + ) { + return true; + } + // Positional params $0..$N + if let Ok(n) = name.parse::() { + if n == 0 { + return true; + } + return self + .call_stack + .last() + .map(|f| n <= f.positional.len()) + .unwrap_or(false); + } + // Local variables + for frame in self.call_stack.iter().rev() { + if frame.locals.contains_key(name) { + return true; + } + } + // Shell variables + if self.variables.contains_key(name) { + return true; + } + // Environment + self.env.contains_key(name) + } + + /// Check if nounset (`set -u`) is active. + fn is_nounset(&self) -> bool { + self.variables + .get("SHOPT_u") + .map(|v| v == "1") + .unwrap_or(false) + } + /// Set a local variable in the current call frame #[allow(dead_code)] fn set_local(&mut self, name: &str, value: &str) { @@ -3889,6 +4468,11 @@ impl Interpreter { /// Expand a glob pattern against the filesystem async fn expand_glob(&self, pattern: &str) -> Result> { + // Check for ** (recursive glob) + if pattern.contains("**") { + return self.expand_glob_recursive(pattern).await; + } + let mut matches = Vec::new(); // Split pattern into directory and filename parts @@ -3954,6 +4538,86 @@ impl Interpreter { matches.sort(); Ok(matches) } + + /// Expand a glob pattern containing ** (recursive directory matching). + async fn expand_glob_recursive(&self, pattern: &str) -> Result> { + let is_absolute = pattern.starts_with('/'); + let components: Vec<&str> = pattern.split('/').filter(|s| !s.is_empty()).collect(); + + // Find the ** component + let star_star_idx = match components.iter().position(|&c| c == "**") { + Some(i) => i, + None => return Ok(Vec::new()), + }; + + // Build the base directory from components before ** + let base_dir = if is_absolute { + let mut p = PathBuf::from("/"); + for c in &components[..star_star_idx] { + p.push(c); + } + p + } else { + let mut p = self.cwd.clone(); + for c in &components[..star_star_idx] { + p.push(c); + } + p + }; + + // Pattern components after ** + let after_pattern: Vec<&str> = components[star_star_idx + 1..].to_vec(); + + // Collect all directories recursively (including the base) + let mut all_dirs = vec![base_dir.clone()]; + self.collect_dirs_recursive(&base_dir, &mut all_dirs).await; + + let mut matches = Vec::new(); + + for dir in &all_dirs { + if after_pattern.is_empty() { + // ** alone matches all files recursively + if let Ok(entries) = self.fs.read_dir(dir).await { + for entry in entries { + if !entry.metadata.file_type.is_dir() { + matches.push(dir.join(&entry.name).to_string_lossy().to_string()); + } + } + } + } else if after_pattern.len() == 1 { + // Single pattern after **: match files in this directory + if let Ok(entries) = self.fs.read_dir(dir).await { + for entry in entries { + if self.glob_match(&entry.name, after_pattern[0]) { + matches.push(dir.join(&entry.name).to_string_lossy().to_string()); + } + } + } + } + } + + matches.sort(); + Ok(matches) + } + + /// Recursively collect all subdirectories starting from dir. + fn collect_dirs_recursive<'a>( + &'a self, + dir: &'a Path, + result: &'a mut Vec, + ) -> std::pin::Pin + Send + 'a>> { + Box::pin(async move { + if let Ok(entries) = self.fs.read_dir(dir).await { + for entry in entries { + if entry.metadata.file_type.is_dir() { + let subdir = dir.join(&entry.name); + result.push(subdir.clone()); + self.collect_dirs_recursive(&subdir, result).await; + } + } + } + }) + } } #[cfg(test)] diff --git a/crates/bashkit/src/parser/ast.rs b/crates/bashkit/src/parser/ast.rs index 4675aac7..421fd09f 100644 --- a/crates/bashkit/src/parser/ast.rs +++ b/crates/bashkit/src/parser/ast.rs @@ -108,6 +108,8 @@ pub enum CompoundCommand { Arithmetic(String), /// Time command - measure execution time Time(TimeCommand), + /// Conditional expression [[ ... ]] + Conditional(Vec), } /// Time command - wraps a command and measures its execution time. diff --git a/crates/bashkit/src/parser/lexer.rs b/crates/bashkit/src/parser/lexer.rs index 14f6e0e6..9db64b07 100644 --- a/crates/bashkit/src/parser/lexer.rs +++ b/crates/bashkit/src/parser/lexer.rs @@ -418,6 +418,34 @@ impl<'a> Lexer<'a> { } } } + } else if ch == '`' { + // Backtick command substitution: convert `cmd` to $(cmd) + self.advance(); // consume opening ` + word.push_str("$("); + while let Some(c) = self.peek_char() { + if c == '`' { + self.advance(); // consume closing ` + break; + } + if c == '\\' { + // In backticks, backslash only escapes $, `, \, newline + self.advance(); + if let Some(next) = self.peek_char() { + if matches!(next, '$' | '`' | '\\' | '\n') { + word.push(next); + self.advance(); + } else { + word.push('\\'); + word.push(next); + self.advance(); + } + } + } else { + word.push(c); + self.advance(); + } + } + word.push(')'); } else if ch == '\\' { self.advance(); if let Some(next) = self.peek_char() { @@ -492,6 +520,34 @@ impl<'a> Lexer<'a> { } } } + '`' => { + // Backtick command substitution inside double quotes + self.advance(); // consume opening ` + content.push_str("$("); + while let Some(c) = self.peek_char() { + if c == '`' { + self.advance(); + break; + } + if c == '\\' { + self.advance(); + if let Some(next) = self.peek_char() { + if matches!(next, '$' | '`' | '\\' | '"') { + content.push(next); + self.advance(); + } else { + content.push('\\'); + content.push(next); + self.advance(); + } + } + } else { + content.push(c); + self.advance(); + } + } + content.push(')'); + } _ => { content.push(ch); self.advance(); diff --git a/crates/bashkit/src/parser/mod.rs b/crates/bashkit/src/parser/mod.rs index eca1e0c7..9d8a5681 100644 --- a/crates/bashkit/src/parser/mod.rs +++ b/crates/bashkit/src/parser/mod.rs @@ -441,6 +441,11 @@ impl<'a> Parser<'a> { } } + // Check for conditional expression [[ ... ]] + if matches!(self.current_token, Some(tokens::Token::DoubleLeftBracket)) { + return self.parse_compound_with_redirects(|s| s.parse_conditional()); + } + // Check for arithmetic command ((expression)) if matches!(self.current_token, Some(tokens::Token::DoubleLeftParen)) { return self.parse_compound_with_redirects(|s| s.parse_arithmetic_command()); @@ -994,6 +999,118 @@ impl<'a> Parser<'a> { } /// Parse arithmetic command ((expression)) + /// Parse [[ conditional expression ]] + fn parse_conditional(&mut self) -> Result { + self.advance(); // consume '[[' + + let mut words = Vec::new(); + let mut saw_regex_op = false; + + loop { + match &self.current_token { + Some(tokens::Token::DoubleRightBracket) => { + self.advance(); // consume ']]' + break; + } + Some(tokens::Token::Word(w)) + | Some(tokens::Token::LiteralWord(w)) + | Some(tokens::Token::QuotedWord(w)) => { + let w_clone = w.clone(); + let is_quoted = + matches!(self.current_token, Some(tokens::Token::QuotedWord(_))); + let is_literal = + matches!(self.current_token, Some(tokens::Token::LiteralWord(_))); + + // After =~, collect the regex pattern (may contain parens) + if saw_regex_op { + let pattern = self.collect_conditional_regex_pattern(&w_clone); + words.push(Word::literal(&pattern)); + saw_regex_op = false; + continue; + } + + if w_clone == "=~" { + saw_regex_op = true; + } + + let word = if is_literal { + Word { + parts: vec![WordPart::Literal(w_clone)], + quoted: true, + } + } else { + let mut parsed = self.parse_word(w_clone); + if is_quoted { + parsed.quoted = true; + } + parsed + }; + words.push(word); + self.advance(); + } + // Operators that the lexer tokenizes separately + Some(tokens::Token::And) => { + words.push(Word::literal("&&")); + self.advance(); + } + Some(tokens::Token::Or) => { + words.push(Word::literal("||")); + self.advance(); + } + Some(tokens::Token::LeftParen) => { + words.push(Word::literal("(")); + self.advance(); + } + Some(tokens::Token::RightParen) => { + words.push(Word::literal(")")); + self.advance(); + } + None => { + return Err(crate::error::Error::Parse( + "unexpected end of input in [[ ]]".to_string(), + )); + } + _ => { + // Skip unknown tokens + self.advance(); + } + } + } + + Ok(CompoundCommand::Conditional(words)) + } + + /// Collect a regex pattern after =~ in [[ ]], handling parens and special chars. + fn collect_conditional_regex_pattern(&mut self, first_word: &str) -> String { + let mut pattern = first_word.to_string(); + self.advance(); // consume the first word + + // Concatenate adjacent tokens that are part of the regex pattern + loop { + match &self.current_token { + Some(tokens::Token::DoubleRightBracket) => break, + Some(tokens::Token::And) | Some(tokens::Token::Or) => break, + Some(tokens::Token::LeftParen) => { + pattern.push('('); + self.advance(); + } + Some(tokens::Token::RightParen) => { + pattern.push(')'); + self.advance(); + } + Some(tokens::Token::Word(w)) + | Some(tokens::Token::LiteralWord(w)) + | Some(tokens::Token::QuotedWord(w)) => { + pattern.push_str(w); + self.advance(); + } + _ => break, + } + } + + pattern + } + fn parse_arithmetic_command(&mut self) -> Result { self.advance(); // consume '((' diff --git a/crates/bashkit/tests/security_failpoint_tests.rs b/crates/bashkit/tests/security_failpoint_tests.rs index 694cbf2f..238a609b 100644 --- a/crates/bashkit/tests/security_failpoint_tests.rs +++ b/crates/bashkit/tests/security_failpoint_tests.rs @@ -107,36 +107,49 @@ async fn security_loop_counter_reset() { } /// Test: Function depth bypass is detected -#[tokio::test] +/// +/// This test deliberately bypasses function depth limits, causing deep recursion. +/// Run on a thread with 8MB stack so the command limit (not the OS stack) halts it. +#[test] #[serial] -async fn security_function_depth_bypass() { - fail::cfg("limits::push_function", "return(skip_check)").unwrap(); - - // Try recursive function - without limit check, this would cause stack overflow - let result = run_script_with_limits( - r#" - recurse() { - echo "depth" - recurse - } - recurse - "#, - ExecutionLimits::new() - .max_function_depth(5) - .max_commands(100) - .timeout(Duration::from_secs(2)), - ) - .await; - - fail::cfg("limits::push_function", "off").unwrap(); - - // Should hit command limit even if function depth is bypassed - assert!( - result.stderr.contains("limit") - || result.stderr.contains("exceeded") - || result.exit_code != 0, - "Recursive function should be limited" - ); +fn security_function_depth_bypass() { + let handle = std::thread::Builder::new() + .stack_size(8 * 1024 * 1024) // 8 MB stack + .spawn(|| { + let rt = tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap(); + rt.block_on(async { + fail::cfg("limits::push_function", "return(skip_check)").unwrap(); + + let result = run_script_with_limits( + r#" + recurse() { + echo "depth" + recurse + } + recurse + "#, + ExecutionLimits::new() + .max_function_depth(5) + .max_commands(100) + .timeout(Duration::from_secs(2)), + ) + .await; + + fail::cfg("limits::push_function", "off").unwrap(); + + assert!( + result.stderr.contains("limit") + || result.stderr.contains("exceeded") + || result.exit_code != 0, + "Recursive function should be limited" + ); + }); + }) + .unwrap(); + handle.join().unwrap(); } // ============================================================================= diff --git a/crates/bashkit/tests/spec_cases/bash/command-subst.test.sh b/crates/bashkit/tests/spec_cases/bash/command-subst.test.sh index a115ed55..4c3417f8 100644 --- a/crates/bashkit/tests/spec_cases/bash/command-subst.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/command-subst.test.sh @@ -71,7 +71,6 @@ result=$(false); echo $? ### end ### subst_backtick -### skip: backtick command substitution not implemented echo `echo hello` ### expect hello diff --git a/crates/bashkit/tests/spec_cases/bash/command.test.sh b/crates/bashkit/tests/spec_cases/bash/command.test.sh new file mode 100644 index 00000000..a6224d04 --- /dev/null +++ b/crates/bashkit/tests/spec_cases/bash/command.test.sh @@ -0,0 +1,66 @@ +### command_v_builtin +# command -v finds builtins +command -v echo +### expect +echo +### end + +### command_v_not_found +# command -v returns 1 for unknown commands +### exit_code:1 +command -v nonexistent_cmd_xyz +### expect +### end + +### command_v_function +# command -v finds user functions +my_func() { echo hi; } +command -v my_func +### expect +my_func +### end + +### command_V_builtin +# command -V describes builtins +command -V echo +### expect +echo is a shell builtin +### end + +### command_V_function +### bash_diff: real bash also prints the function body +# command -V describes functions +my_func() { echo hi; } +command -V my_func +### expect +my_func is a function +### end + +### command_V_keyword +# command -V identifies keywords +command -V if +### expect +if is a shell keyword +### end + +### command_run_builtin +# command runs builtins directly +command echo hello +### expect +hello +### end + +### command_bypasses_function +# command bypasses function override +echo() { printf "OVERRIDE\n"; } +command echo real +### expect +real +### end + +### command_v_keyword +# command -v finds shell keywords +command -v for +### expect +for +### end diff --git a/crates/bashkit/tests/spec_cases/bash/conditional.test.sh b/crates/bashkit/tests/spec_cases/bash/conditional.test.sh new file mode 100644 index 00000000..006e3435 --- /dev/null +++ b/crates/bashkit/tests/spec_cases/bash/conditional.test.sh @@ -0,0 +1,120 @@ +### cond_string_equal +# [[ string comparison == +[[ "hello" == "hello" ]] && echo "yes" || echo "no" +### expect +yes +### end + +### cond_string_not_equal +# [[ string != +[[ "hello" != "world" ]] && echo "yes" || echo "no" +### expect +yes +### end + +### cond_string_less +# [[ string < +[[ "abc" < "def" ]] && echo "yes" || echo "no" +### expect +yes +### end + +### cond_string_greater +# [[ string > +[[ "def" > "abc" ]] && echo "yes" || echo "no" +### expect +yes +### end + +### cond_negation +# [[ ! negation +[[ ! "hello" == "world" ]] && echo "yes" || echo "no" +### expect +yes +### end + +### cond_and +# [[ && logical and +[[ "a" == "a" && "b" == "b" ]] && echo "yes" || echo "no" +### expect +yes +### end + +### cond_or +# [[ || logical or +[[ "a" == "b" || "b" == "b" ]] && echo "yes" || echo "no" +### expect +yes +### end + +### cond_z_empty +# [[ -z empty string +[[ -z "" ]] && echo "yes" || echo "no" +### expect +yes +### end + +### cond_n_nonempty +# [[ -n non-empty string +[[ -n "hello" ]] && echo "yes" || echo "no" +### expect +yes +### end + +### cond_numeric_eq +# [[ numeric -eq +[[ 42 -eq 42 ]] && echo "yes" || echo "no" +### expect +yes +### end + +### cond_numeric_lt +# [[ numeric -lt +[[ 5 -lt 10 ]] && echo "yes" || echo "no" +### expect +yes +### end + +### cond_regex_match +# [[ =~ basic regex match +[[ "hello123" =~ ^hello[0-9]+$ ]] && echo "yes" || echo "no" +### expect +yes +### end + +### cond_regex_no_match +# [[ =~ regex no match +[[ "hello" =~ ^[0-9]+$ ]] && echo "yes" || echo "no" +### expect +no +### end + +### cond_regex_capture +# [[ =~ with BASH_REMATCH capture groups +[[ "hello-world" =~ ^([a-z]+)-([a-z]+)$ ]] && echo "${BASH_REMATCH[1]} ${BASH_REMATCH[2]}" +### expect +hello world +### end + +### cond_regex_rematch_full +# [[ =~ BASH_REMATCH[0] is full match +[[ "abc123" =~ [0-9]+ ]] && echo "${BASH_REMATCH[0]}" +### expect +123 +### end + +### cond_variable_expansion +# [[ with variable expansion +VAR="hello" +[[ "$VAR" == "hello" ]] && echo "yes" || echo "no" +### expect +yes +### end + +### cond_exit_code_false +# [[ returns exit code 1 on false +[[ "a" == "b" ]] +echo $? +### expect +1 +### end diff --git a/crates/bashkit/tests/spec_cases/bash/getopts.test.sh b/crates/bashkit/tests/spec_cases/bash/getopts.test.sh new file mode 100644 index 00000000..995f0a41 --- /dev/null +++ b/crates/bashkit/tests/spec_cases/bash/getopts.test.sh @@ -0,0 +1,98 @@ +### getopts_basic +# Parse simple options +OPTIND=1 +while getopts "ab" opt -a -b; do + echo "opt=$opt" +done +### expect +opt=a +opt=b +### end + +### getopts_with_arg +# Parse option with argument +OPTIND=1 +while getopts "f:" opt -f myfile; do + echo "opt=$opt OPTARG=$OPTARG" +done +### expect +opt=f OPTARG=myfile +### end + +### getopts_mixed +# Mix of options with and without args +OPTIND=1 +while getopts "vf:o:" opt -v -f input -o output; do + if [ -n "${OPTARG:-}" ]; then + echo "$opt $OPTARG" + else + echo "$opt" + fi +done +### expect +v +f input +o output +### end + +### getopts_unknown +# Unknown option produces ? +OPTIND=1 +getopts "ab" opt -x 2>/dev/null +echo "opt=$opt" +### expect +opt=? +### end + +### getopts_no_more_options +# Returns 1 when no more options +OPTIND=1 +### exit_code:1 +getopts "a" opt hello +### expect +### end + +### getopts_combined_flags +# Combined flags like -abc +OPTIND=1 +while getopts "abc" opt -abc; do + echo "$opt" +done +### expect +a +b +c +### end + +### getopts_combined_with_arg +# Combined flag with trailing argument +OPTIND=1 +while getopts "af:" opt -af myfile; do + if [ -n "${OPTARG:-}" ]; then + echo "$opt $OPTARG" + else + echo "$opt" + fi +done +### expect +a +f myfile +### end + +### getopts_silent_mode +# Silent mode (leading :) suppresses errors +OPTIND=1 +getopts ":ab" opt -x 2>/dev/null +echo "opt=$opt OPTARG=$OPTARG" +### expect +opt=? OPTARG=x +### end + +### getopts_double_dash +# -- stops option processing +OPTIND=1 +getopts "a" opt -- -a +echo "exit=$?" +### expect +exit=1 +### end diff --git a/crates/bashkit/tests/spec_cases/bash/globs.test.sh b/crates/bashkit/tests/spec_cases/bash/globs.test.sh index 30c69224..9f791b9d 100644 --- a/crates/bashkit/tests/spec_cases/bash/globs.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/globs.test.sh @@ -36,9 +36,17 @@ echo a > /x1.txt; echo b > /x2.txt; echo /x[12].txt ### end ### glob_recursive -### skip: recursive glob (**) not implemented -echo /**/*.txt +### bash_diff: Bashkit VFS has files, real bash CI filesystem does not +# Recursive glob with ** +mkdir -p /recur/sub1/deep +mkdir -p /recur/sub2 +echo a > /recur/f1.txt +echo b > /recur/sub1/f2.txt +echo c > /recur/sub1/deep/f3.txt +echo d > /recur/sub2/f4.txt +echo /recur/**/*.txt ### expect +/recur/f1.txt /recur/sub1/deep/f3.txt /recur/sub1/f2.txt /recur/sub2/f4.txt ### end ### glob_brace diff --git a/crates/bashkit/tests/spec_cases/bash/nounset.test.sh b/crates/bashkit/tests/spec_cases/bash/nounset.test.sh new file mode 100644 index 00000000..aa45afbd --- /dev/null +++ b/crates/bashkit/tests/spec_cases/bash/nounset.test.sh @@ -0,0 +1,64 @@ +### nounset_check_flag +# set -u sets SHOPT_u +### bash_diff: SHOPT_u is bashkit-internal variable +set -u +echo "SHOPT_u=$SHOPT_u" +### expect +SHOPT_u=1 +### end + +### nounset_unset_var_error +# set -u aborts on unset variables +### bash_diff: real bash prints to stderr and exits shell +### exit_code:1 +set -u +echo $UNDEFINED_VAR_XYZ +echo "should not reach" +### expect +### end + +### nounset_set_var_ok +# set -u allows set variables +set -u +MY_VAR=hello +echo "$MY_VAR" +### expect +hello +### end + +### nounset_special_vars +# set -u allows special variables +set -u +echo "$?" +### expect +0 +### end + +### nounset_empty_var_ok +# set -u allows empty but set variables +set -u +EMPTY="" +echo "value=$EMPTY" +### expect +value= +### end + +### nounset_default_value_ok +# ${var:-default} should not error under set -u +### skip: parameter expansion with :- needs nounset awareness +set -u +echo "${UNDEFINED_XYZ:-fallback}" +### expect +fallback +### end + +### nounset_disable +# set +u disables nounset +set -u +set +u +echo "$UNDEFINED_VAR_XYZ" +echo "ok" +### expect + +ok +### end diff --git a/crates/bashkit/tests/spec_cases/bash/paste.test.sh b/crates/bashkit/tests/spec_cases/bash/paste.test.sh index dc5ea20e..8c2c5e73 100644 --- a/crates/bashkit/tests/spec_cases/bash/paste.test.sh +++ b/crates/bashkit/tests/spec_cases/bash/paste.test.sh @@ -8,7 +8,6 @@ c ### end ### paste_serial -### skip: paste -s requires file arguments, not stdin-only in bashkit # Serial mode merges lines with tabs printf 'a\nb\nc\n' | paste -s ### expect @@ -16,8 +15,7 @@ a b c ### end ### paste_custom_delimiter -### skip: paste -d requires file arguments -# Custom delimiter +# Custom delimiter with serial mode printf '1\n2\n3\n' | paste -d, -s ### expect 1,2,3 diff --git a/specs/009-implementation-status.md b/specs/009-implementation-status.md index e1286967..26f08d75 100644 --- a/specs/009-implementation-status.md +++ b/specs/009-implementation-status.md @@ -107,17 +107,17 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See ## Spec Test Coverage -**Total spec test cases:** 1043 (954 pass, 89 skip) +**Total spec test cases:** 1085 (997 pass, 88 skip) | Category | Cases | In CI | Pass | Skip | Notes | |----------|-------|-------|------|------|-------| -| Bash (core) | 640 | Yes | 592 | 48 | `bash_spec_tests` in CI | +| Bash (core) | 673 | Yes | 624 | 49 | `bash_spec_tests` in CI | | AWK | 90 | Yes | 73 | 17 | loops, arrays, -v, ternary, field assign | | Grep | 82 | Yes | 79 | 3 | now with -z, -r, -a, -b, -H, -h, -f, -P, --include, --exclude | | Sed | 65 | Yes | 53 | 12 | hold space, change, regex ranges, -E | | JQ | 108 | Yes | 100 | 8 | reduce, walk, regex funcs, --arg/--argjson, combined flags | | Python | 58 | Yes | 50 | 8 | **Experimental.** VFS bridging, pathlib, env vars | -| **Total** | **1043** | **Yes** | **948** | **95** | | +| **Total** | **1076** | **Yes** | **980** | **96** | | ### Bash Spec Tests Breakdown @@ -129,8 +129,10 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See | bash-command.test.sh | 34 | bash/sh re-invocation | | brace-expansion.test.sh | 21 | {a,b,c}, {1..5}, for-loop brace expansion | | column.test.sh | 10 | column alignment | +| command.test.sh | 9 | `command -v`, `-V`, function bypass | | command-not-found.test.sh | 17 | unknown command handling | -| command-subst.test.sh | 14 | 2 skipped | +| conditional.test.sh | 17 | `[[ ]]` conditionals, `=~` regex, BASH_REMATCH | +| command-subst.test.sh | 14 | includes backtick substitution (1 skipped) | | control-flow.test.sh | 33 | if/elif/else, for, while, case | | cuttr.test.sh | 32 | cut and tr commands (25 skipped) | | date.test.sh | 38 | format specifiers, `-d` relative/compound/epoch, `-R`, `-I`, `%N` (3 skipped) | @@ -140,13 +142,15 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See | fileops.test.sh | 21 | | | find.test.sh | 10 | file search | | functions.test.sh | 14 | | -| globs.test.sh | 12 | for-loop glob expansion, 1 skipped | +| getopts.test.sh | 9 | POSIX option parsing, combined flags, silent mode | +| globs.test.sh | 12 | for-loop glob expansion, recursive `**` | | headtail.test.sh | 14 | | | herestring.test.sh | 8 | 1 skipped | | hextools.test.sh | 5 | od/xxd/hexdump (3 skipped) | | negative-tests.test.sh | 16 | error conditions (3 skipped) | | nl.test.sh | 14 | line numbering | -| paste.test.sh | 4 | line merging (2 skipped) | +| nounset.test.sh | 7 | `set -u` unbound variable checks (1 skipped) | +| paste.test.sh | 4 | line merging with `-s` serial and `-d` delimiter | | path.test.sh | 14 | | | pipes-redirects.test.sh | 19 | includes stderr redirects | | printf.test.sh | 24 | format specifiers, array expansion | @@ -173,9 +177,9 @@ Features that may be added in the future (not intentionally excluded): | Coprocesses `coproc` | Low | Rarely used | | Extended globs `@()` `!()` | Medium | Requires `shopt -s extglob` | | Associative arrays `declare -A` | Medium | Bash 4+ feature | -| `[[ =~ ]]` regex matching | Medium | Bash extension | -| `getopts` | Medium | POSIX option parsing | -| `command` builtin | Medium | POSIX command lookup | +| ~~`[[ =~ ]]` regex matching~~ | ~~Medium~~ | Implemented: `[[ ]]` conditionals with `=~` and BASH_REMATCH | +| ~~`getopts`~~ | ~~Medium~~ | Implemented: POSIX option parsing | +| ~~`command` builtin~~ | ~~Medium~~ | Implemented: `-v`, `-V`, bypass functions | | `alias` | Low | Interactive feature | | History expansion | Out of scope | Interactive only | @@ -197,14 +201,14 @@ Features that may be added in the future (not intentionally excluded): ### Implemented -**82 core builtins + 3 feature-gated = 85 total** +**84 core builtins + 3 feature-gated = 87 total** `echo`, `printf`, `cat`, `nl`, `cd`, `pwd`, `true`, `false`, `exit`, `test`, `[`, `export`, `set`, `unset`, `local`, `source`, `.`, `read`, `shift`, `break`, `continue`, `return`, `grep`, `sed`, `awk`, `jq`, `sleep`, `head`, `tail`, `basename`, `dirname`, `mkdir`, `rm`, `cp`, `mv`, `touch`, `chmod`, `wc`, `sort`, `uniq`, `cut`, `tr`, `paste`, `column`, `diff`, `comm`, `date`, -`wait`, `curl`, `wget`, `timeout`, +`wait`, `curl`, `wget`, `timeout`, `command`, `getopts`, `time` (keyword), `whoami`, `hostname`, `uname`, `id`, `ls`, `rmdir`, `find`, `xargs`, `tee`, `:` (colon), `eval`, `readonly`, `times`, `bash`, `sh`, `od`, `xxd`, `hexdump`, `strings`, @@ -215,8 +219,8 @@ Features that may be added in the future (not intentionally excluded): ### Not Yet Implemented -`ln`, `chown`, `type`, `which`, `command`, `hash`, `declare`, -`typeset`, `getopts`, `kill` +`ln`, `chown`, `type`, `which`, `hash`, `declare`, +`typeset`, `kill` ## Text Processing