diff --git a/crates/bashkit/src/builtins/grep.rs b/crates/bashkit/src/builtins/grep.rs index f87fb856..a44d0067 100644 --- a/crates/bashkit/src/builtins/grep.rs +++ b/crates/bashkit/src/builtins/grep.rs @@ -59,13 +59,15 @@ struct GrepOptions { whole_line: bool, after_context: usize, before_context: usize, - show_filename: bool, // -H: always show filename - no_filename: bool, // -h: never show filename - byte_offset: bool, // -b: show byte offset - pattern_file: Option, // -f: read patterns from file - null_terminated: bool, // -z: null-terminated lines - recursive: bool, // -r: recursive search - binary_as_text: bool, // -a: treat binary as text + show_filename: bool, // -H: always show filename + no_filename: bool, // -h: never show filename + byte_offset: bool, // -b: show byte offset + pattern_file: Option, // -f: read patterns from file + null_terminated: bool, // -z: null-terminated lines + recursive: bool, // -r: recursive search + binary_as_text: bool, // -a: treat binary as text + include_patterns: Vec, // --include=GLOB + exclude_patterns: Vec, // --exclude=GLOB } impl GrepOptions { @@ -94,6 +96,8 @@ impl GrepOptions { null_terminated: false, recursive: false, binary_as_text: false, + include_patterns: Vec::new(), + exclude_patterns: Vec::new(), }; let mut positional = Vec::new(); @@ -262,10 +266,10 @@ impl GrepOptions { // --color / --color=always/never/auto - no-op (we don't output ANSI) } else if opt == "line-buffered" { // --line-buffered - no-op (output is already line-oriented) - } else if opt.starts_with("include=") { - // --include=PATTERN - skip for now, requires -r - } else if opt.starts_with("exclude=") { - // --exclude=PATTERN - skip for now, requires -r + } else if let Some(pat) = opt.strip_prefix("include=") { + opts.include_patterns.push(strip_quotes(pat)); + } else if let Some(pat) = opt.strip_prefix("exclude=") { + opts.exclude_patterns.push(strip_quotes(pat)); } // Ignore other unknown long options } else { @@ -342,6 +346,37 @@ impl GrepOptions { } } +/// Strip surrounding single or double quotes from a value +fn strip_quotes(s: &str) -> String { + if (s.starts_with('\'') && s.ends_with('\'')) || (s.starts_with('"') && s.ends_with('"')) { + s[1..s.len() - 1].to_string() + } else { + s.to_string() + } +} + +/// Check if a filename matches a simple glob pattern (e.g., "*.txt", "*.log") +fn glob_matches(filename: &str, pattern: &str) -> bool { + if let Some(suffix) = pattern.strip_prefix('*') { + filename.ends_with(suffix) + } else if let Some(prefix) = pattern.strip_suffix('*') { + filename.starts_with(prefix) + } else { + filename == pattern + } +} + +/// Check if a filename should be included based on include/exclude patterns +fn should_include_file(filename: &str, include: &[String], exclude: &[String]) -> bool { + if !include.is_empty() && !include.iter().any(|p| glob_matches(filename, p)) { + return false; + } + if exclude.iter().any(|p| glob_matches(filename, p)) { + return false; + } + true +} + /// Convert a BRE (Basic Regular Expression) pattern to ERE for the regex crate. /// In BRE: ( ) { } are literal; \( \) \{ \} \+ \? \| are metacharacters. /// In ERE/regex crate: ( ) { } + ? | are metacharacters. @@ -466,12 +501,17 @@ impl Builtin for Grep { let entry_path = path.join(&entry.name); if entry.metadata.file_type.is_dir() { dirs_to_process.push(entry_path); - } else if entry.metadata.file_type.is_file() { + } else if entry.metadata.file_type.is_file() + && should_include_file( + &entry.name, + &opts.include_patterns, + &opts.exclude_patterns, + ) + { if let Ok(content) = ctx.fs.read_file(&entry_path).await { let text = process_content(content, opts.binary_as_text); inputs.push((entry_path.to_string_lossy().into_owned(), text)); } - // Skip unreadable files in recursive mode } } } else if let Ok(content) = ctx.fs.read_file(&path).await { @@ -507,13 +547,13 @@ impl Builtin for Grep { inputs }; - // -H forces filename display, -h suppresses it, otherwise show for multiple files + // -H forces filename display, -h suppresses it, otherwise show for multiple files/recursive let show_filename = if opts.no_filename { false - } else if opts.show_filename { + } else if opts.show_filename || opts.recursive { true } else { - opts.files.len() > 1 + inputs.len() > 1 }; let has_context = opts.before_context > 0 || opts.after_context > 0; @@ -753,7 +793,7 @@ impl Builtin for Grep { #[allow(clippy::unwrap_used)] mod tests { use super::*; - use crate::fs::InMemoryFs; + use crate::fs::{FileSystem, InMemoryFs}; use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; @@ -895,4 +935,106 @@ mod tests { assert_eq!(result.exit_code, 0); assert_eq!(result.stdout, "(stdin)\n"); } + + #[test] + fn test_glob_matches() { + assert!(glob_matches("file.txt", "*.txt")); + assert!(!glob_matches("file.log", "*.txt")); + assert!(glob_matches("readme.md", "readme*")); + assert!(!glob_matches("license.md", "readme*")); + assert!(glob_matches("exact.txt", "exact.txt")); + assert!(!glob_matches("other.txt", "exact.txt")); + } + + #[test] + fn test_should_include_file() { + assert!(should_include_file("foo.txt", &[], &[])); + + let inc = vec!["*.txt".to_string()]; + assert!(should_include_file("foo.txt", &inc, &[])); + assert!(!should_include_file("foo.log", &inc, &[])); + + let exc = vec!["*.log".to_string()]; + assert!(should_include_file("foo.txt", &[], &exc)); + assert!(!should_include_file("foo.log", &[], &exc)); + + assert!(should_include_file("foo.txt", &inc, &exc)); + assert!(!should_include_file("foo.log", &inc, &exc)); + } + + #[tokio::test] + async fn test_grep_recursive_include() { + let grep = Grep; + let fs = Arc::new(InMemoryFs::new()); + fs.mkdir(&PathBuf::from("/dir"), true).await.unwrap(); + fs.write_file(&PathBuf::from("/dir/a.txt"), b"hello\n") + .await + .unwrap(); + fs.write_file(&PathBuf::from("/dir/b.log"), b"hello\n") + .await + .unwrap(); + + let mut vars = HashMap::new(); + let mut cwd = PathBuf::from("/"); + let args: Vec = ["-r", "--include=*.txt", "hello", "/dir"] + .iter() + .map(|s| s.to_string()) + .collect(); + + let ctx = Context { + args: &args, + env: &HashMap::new(), + variables: &mut vars, + cwd: &mut cwd, + fs, + stdin: None, + #[cfg(feature = "http_client")] + http_client: None, + #[cfg(feature = "git")] + git_client: None, + }; + + let result = grep.execute(ctx).await.unwrap(); + assert_eq!(result.exit_code, 0); + assert!(result.stdout.contains("/dir/a.txt:hello")); + assert!(!result.stdout.contains("b.log")); + } + + #[tokio::test] + async fn test_grep_recursive_exclude() { + let grep = Grep; + let fs = Arc::new(InMemoryFs::new()); + fs.mkdir(&PathBuf::from("/dir"), true).await.unwrap(); + fs.write_file(&PathBuf::from("/dir/a.txt"), b"hello\n") + .await + .unwrap(); + fs.write_file(&PathBuf::from("/dir/b.log"), b"hello\n") + .await + .unwrap(); + + let mut vars = HashMap::new(); + let mut cwd = PathBuf::from("/"); + let args: Vec = ["-r", "--exclude=*.log", "hello", "/dir"] + .iter() + .map(|s| s.to_string()) + .collect(); + + let ctx = Context { + args: &args, + env: &HashMap::new(), + variables: &mut vars, + cwd: &mut cwd, + fs, + stdin: None, + #[cfg(feature = "http_client")] + http_client: None, + #[cfg(feature = "git")] + git_client: None, + }; + + let result = grep.execute(ctx).await.unwrap(); + assert_eq!(result.exit_code, 0); + assert!(result.stdout.contains("/dir/a.txt:hello")); + assert!(!result.stdout.contains("b.log")); + } } diff --git a/crates/bashkit/tests/spec_cases/grep/grep.test.sh b/crates/bashkit/tests/spec_cases/grep/grep.test.sh index a1bec5af..0c599d56 100644 --- a/crates/bashkit/tests/spec_cases/grep/grep.test.sh +++ b/crates/bashkit/tests/spec_cases/grep/grep.test.sh @@ -336,17 +336,25 @@ foo ### end ### grep_include_pattern -### skip: --include not implemented -grep --include='*.txt' pattern /some/dir -### exit_code: 1 +# Include only .txt files in recursive search +mkdir -p /tmp/grepdir && printf 'hello\n' > /tmp/grepdir/a.txt && printf 'hello\n' > /tmp/grepdir/b.log && grep -r --include='*.txt' hello /tmp/grepdir ### expect +/tmp/grepdir/a.txt:hello ### end ### grep_exclude_pattern -### skip: --exclude not implemented -grep --exclude='*.log' pattern /some/dir -### exit_code: 1 +# Exclude .log files in recursive search +mkdir -p /tmp/grepdir2 && printf 'hello\n' > /tmp/grepdir2/a.txt && printf 'hello\n' > /tmp/grepdir2/b.log && grep -r --exclude='*.log' hello /tmp/grepdir2 +### expect +/tmp/grepdir2/a.txt:hello +### end + +### grep_include_no_match +# Include pattern that matches no files +mkdir -p /tmp/grepdir3 && printf 'hello\n' > /tmp/grepdir3/a.log && grep -r --include='*.txt' hello /tmp/grepdir3 +echo $? ### expect +1 ### end ### grep_line_buffered diff --git a/specs/009-implementation-status.md b/specs/009-implementation-status.md index 605a4680..851c83c0 100644 --- a/specs/009-implementation-status.md +++ b/specs/009-implementation-status.md @@ -107,17 +107,17 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See ## Spec Test Coverage -**Total spec test cases:** 1042 +**Total spec test cases:** 1043 | Category | Cases | In CI | Pass | Skip | Notes | |----------|-------|-------|------|------|-------| | Bash (core) | 640 | Yes | 592 | 48 | `bash_spec_tests` in CI | | AWK | 90 | Yes | 73 | 17 | loops, arrays, -v, ternary, field assign | -| Grep | 81 | Yes | 76 | 5 | now with -z, -r, -a, -b, -H, -h, -f, -P | +| Grep | 82 | Yes | 79 | 3 | now with -z, -r, -a, -b, -H, -h, -f, -P, --include, --exclude | | Sed | 65 | Yes | 53 | 12 | hold space, change, regex ranges, -E | | JQ | 108 | Yes | 100 | 8 | reduce, walk, regex funcs, --arg/--argjson, combined flags | | Python | 58 | Yes | 50 | 8 | **Experimental.** VFS bridging, pathlib, env vars | -| **Total** | **1042** | **Yes** | **944** | **98** | | +| **Total** | **1043** | **Yes** | **948** | **95** | | ### Bash Spec Tests Breakdown @@ -277,19 +277,19 @@ Features that may be added in the future (not intentionally excluded): ### Grep Limitations -**Skipped Tests (5):** +**Skipped Tests (3):** | Feature | Count | Notes | |---------|-------|-------| | Recursive test | 1 | Test needs VFS setup with files | | Pattern file `-f` | 1 | Requires file redirection support | -| Include/exclude | 2 | `--include`, `--exclude` patterns | | Binary detection | 1 | Auto-detect binary files | **Implemented Features:** - Basic flags: `-i`, `-v`, `-c`, `-n`, `-o`, `-l`, `-w`, `-E`, `-F`, `-q`, `-m`, `-x` - Context: `-A`, `-B`, `-C` (after/before/context lines) - Multiple patterns: `-e` +- Include/exclude: `--include=GLOB`, `--exclude=GLOB` for recursive search - Pattern file: `-f` (requires file to exist in VFS) - Filename control: `-H` (always show), `-h` (never show) - Byte offset: `-b`