From 5d07e00466f84780f4bcc958451ade751c7dcccc Mon Sep 17 00:00:00 2001 From: Claude Date: Mon, 23 Feb 2026 07:03:26 +0000 Subject: [PATCH] feat(grep): add --include/--exclude glob patterns for recursive search Implement --include=GLOB and --exclude=GLOB options for grep -r recursive search. Filenames are matched against glob patterns to filter which files are searched. Also fix recursive grep to always show filenames (matching real grep behavior) and use input count instead of CLI arg count for multi-file filename display. Remove 2 skip markers, add 1 new spec test, 4 new unit tests. https://claude.ai/code/session_012rzB3FRw7yoQWCG1mxyW7J --- crates/bashkit/src/builtins/grep.rs | 176 ++++++++++++++++-- .../tests/spec_cases/grep/grep.test.sh | 20 +- specs/009-implementation-status.md | 10 +- 3 files changed, 178 insertions(+), 28 deletions(-) diff --git a/crates/bashkit/src/builtins/grep.rs b/crates/bashkit/src/builtins/grep.rs index f87fb856..a44d0067 100644 --- a/crates/bashkit/src/builtins/grep.rs +++ b/crates/bashkit/src/builtins/grep.rs @@ -59,13 +59,15 @@ struct GrepOptions { whole_line: bool, after_context: usize, before_context: usize, - show_filename: bool, // -H: always show filename - no_filename: bool, // -h: never show filename - byte_offset: bool, // -b: show byte offset - pattern_file: Option, // -f: read patterns from file - null_terminated: bool, // -z: null-terminated lines - recursive: bool, // -r: recursive search - binary_as_text: bool, // -a: treat binary as text + show_filename: bool, // -H: always show filename + no_filename: bool, // -h: never show filename + byte_offset: bool, // -b: show byte offset + pattern_file: Option, // -f: read patterns from file + null_terminated: bool, // -z: null-terminated lines + recursive: bool, // -r: recursive search + binary_as_text: bool, // -a: treat binary as text + include_patterns: Vec, // --include=GLOB + exclude_patterns: Vec, // --exclude=GLOB } impl GrepOptions { @@ -94,6 +96,8 @@ impl GrepOptions { null_terminated: false, recursive: false, binary_as_text: false, + include_patterns: Vec::new(), + exclude_patterns: Vec::new(), }; let mut positional = Vec::new(); @@ -262,10 +266,10 @@ impl GrepOptions { // --color / --color=always/never/auto - no-op (we don't output ANSI) } else if opt == "line-buffered" { // --line-buffered - no-op (output is already line-oriented) - } else if opt.starts_with("include=") { - // --include=PATTERN - skip for now, requires -r - } else if opt.starts_with("exclude=") { - // --exclude=PATTERN - skip for now, requires -r + } else if let Some(pat) = opt.strip_prefix("include=") { + opts.include_patterns.push(strip_quotes(pat)); + } else if let Some(pat) = opt.strip_prefix("exclude=") { + opts.exclude_patterns.push(strip_quotes(pat)); } // Ignore other unknown long options } else { @@ -342,6 +346,37 @@ impl GrepOptions { } } +/// Strip surrounding single or double quotes from a value +fn strip_quotes(s: &str) -> String { + if (s.starts_with('\'') && s.ends_with('\'')) || (s.starts_with('"') && s.ends_with('"')) { + s[1..s.len() - 1].to_string() + } else { + s.to_string() + } +} + +/// Check if a filename matches a simple glob pattern (e.g., "*.txt", "*.log") +fn glob_matches(filename: &str, pattern: &str) -> bool { + if let Some(suffix) = pattern.strip_prefix('*') { + filename.ends_with(suffix) + } else if let Some(prefix) = pattern.strip_suffix('*') { + filename.starts_with(prefix) + } else { + filename == pattern + } +} + +/// Check if a filename should be included based on include/exclude patterns +fn should_include_file(filename: &str, include: &[String], exclude: &[String]) -> bool { + if !include.is_empty() && !include.iter().any(|p| glob_matches(filename, p)) { + return false; + } + if exclude.iter().any(|p| glob_matches(filename, p)) { + return false; + } + true +} + /// Convert a BRE (Basic Regular Expression) pattern to ERE for the regex crate. /// In BRE: ( ) { } are literal; \( \) \{ \} \+ \? \| are metacharacters. /// In ERE/regex crate: ( ) { } + ? | are metacharacters. @@ -466,12 +501,17 @@ impl Builtin for Grep { let entry_path = path.join(&entry.name); if entry.metadata.file_type.is_dir() { dirs_to_process.push(entry_path); - } else if entry.metadata.file_type.is_file() { + } else if entry.metadata.file_type.is_file() + && should_include_file( + &entry.name, + &opts.include_patterns, + &opts.exclude_patterns, + ) + { if let Ok(content) = ctx.fs.read_file(&entry_path).await { let text = process_content(content, opts.binary_as_text); inputs.push((entry_path.to_string_lossy().into_owned(), text)); } - // Skip unreadable files in recursive mode } } } else if let Ok(content) = ctx.fs.read_file(&path).await { @@ -507,13 +547,13 @@ impl Builtin for Grep { inputs }; - // -H forces filename display, -h suppresses it, otherwise show for multiple files + // -H forces filename display, -h suppresses it, otherwise show for multiple files/recursive let show_filename = if opts.no_filename { false - } else if opts.show_filename { + } else if opts.show_filename || opts.recursive { true } else { - opts.files.len() > 1 + inputs.len() > 1 }; let has_context = opts.before_context > 0 || opts.after_context > 0; @@ -753,7 +793,7 @@ impl Builtin for Grep { #[allow(clippy::unwrap_used)] mod tests { use super::*; - use crate::fs::InMemoryFs; + use crate::fs::{FileSystem, InMemoryFs}; use std::collections::HashMap; use std::path::PathBuf; use std::sync::Arc; @@ -895,4 +935,106 @@ mod tests { assert_eq!(result.exit_code, 0); assert_eq!(result.stdout, "(stdin)\n"); } + + #[test] + fn test_glob_matches() { + assert!(glob_matches("file.txt", "*.txt")); + assert!(!glob_matches("file.log", "*.txt")); + assert!(glob_matches("readme.md", "readme*")); + assert!(!glob_matches("license.md", "readme*")); + assert!(glob_matches("exact.txt", "exact.txt")); + assert!(!glob_matches("other.txt", "exact.txt")); + } + + #[test] + fn test_should_include_file() { + assert!(should_include_file("foo.txt", &[], &[])); + + let inc = vec!["*.txt".to_string()]; + assert!(should_include_file("foo.txt", &inc, &[])); + assert!(!should_include_file("foo.log", &inc, &[])); + + let exc = vec!["*.log".to_string()]; + assert!(should_include_file("foo.txt", &[], &exc)); + assert!(!should_include_file("foo.log", &[], &exc)); + + assert!(should_include_file("foo.txt", &inc, &exc)); + assert!(!should_include_file("foo.log", &inc, &exc)); + } + + #[tokio::test] + async fn test_grep_recursive_include() { + let grep = Grep; + let fs = Arc::new(InMemoryFs::new()); + fs.mkdir(&PathBuf::from("/dir"), true).await.unwrap(); + fs.write_file(&PathBuf::from("/dir/a.txt"), b"hello\n") + .await + .unwrap(); + fs.write_file(&PathBuf::from("/dir/b.log"), b"hello\n") + .await + .unwrap(); + + let mut vars = HashMap::new(); + let mut cwd = PathBuf::from("/"); + let args: Vec = ["-r", "--include=*.txt", "hello", "/dir"] + .iter() + .map(|s| s.to_string()) + .collect(); + + let ctx = Context { + args: &args, + env: &HashMap::new(), + variables: &mut vars, + cwd: &mut cwd, + fs, + stdin: None, + #[cfg(feature = "http_client")] + http_client: None, + #[cfg(feature = "git")] + git_client: None, + }; + + let result = grep.execute(ctx).await.unwrap(); + assert_eq!(result.exit_code, 0); + assert!(result.stdout.contains("/dir/a.txt:hello")); + assert!(!result.stdout.contains("b.log")); + } + + #[tokio::test] + async fn test_grep_recursive_exclude() { + let grep = Grep; + let fs = Arc::new(InMemoryFs::new()); + fs.mkdir(&PathBuf::from("/dir"), true).await.unwrap(); + fs.write_file(&PathBuf::from("/dir/a.txt"), b"hello\n") + .await + .unwrap(); + fs.write_file(&PathBuf::from("/dir/b.log"), b"hello\n") + .await + .unwrap(); + + let mut vars = HashMap::new(); + let mut cwd = PathBuf::from("/"); + let args: Vec = ["-r", "--exclude=*.log", "hello", "/dir"] + .iter() + .map(|s| s.to_string()) + .collect(); + + let ctx = Context { + args: &args, + env: &HashMap::new(), + variables: &mut vars, + cwd: &mut cwd, + fs, + stdin: None, + #[cfg(feature = "http_client")] + http_client: None, + #[cfg(feature = "git")] + git_client: None, + }; + + let result = grep.execute(ctx).await.unwrap(); + assert_eq!(result.exit_code, 0); + assert!(result.stdout.contains("/dir/a.txt:hello")); + assert!(!result.stdout.contains("b.log")); + } } diff --git a/crates/bashkit/tests/spec_cases/grep/grep.test.sh b/crates/bashkit/tests/spec_cases/grep/grep.test.sh index a1bec5af..0c599d56 100644 --- a/crates/bashkit/tests/spec_cases/grep/grep.test.sh +++ b/crates/bashkit/tests/spec_cases/grep/grep.test.sh @@ -336,17 +336,25 @@ foo ### end ### grep_include_pattern -### skip: --include not implemented -grep --include='*.txt' pattern /some/dir -### exit_code: 1 +# Include only .txt files in recursive search +mkdir -p /tmp/grepdir && printf 'hello\n' > /tmp/grepdir/a.txt && printf 'hello\n' > /tmp/grepdir/b.log && grep -r --include='*.txt' hello /tmp/grepdir ### expect +/tmp/grepdir/a.txt:hello ### end ### grep_exclude_pattern -### skip: --exclude not implemented -grep --exclude='*.log' pattern /some/dir -### exit_code: 1 +# Exclude .log files in recursive search +mkdir -p /tmp/grepdir2 && printf 'hello\n' > /tmp/grepdir2/a.txt && printf 'hello\n' > /tmp/grepdir2/b.log && grep -r --exclude='*.log' hello /tmp/grepdir2 +### expect +/tmp/grepdir2/a.txt:hello +### end + +### grep_include_no_match +# Include pattern that matches no files +mkdir -p /tmp/grepdir3 && printf 'hello\n' > /tmp/grepdir3/a.log && grep -r --include='*.txt' hello /tmp/grepdir3 +echo $? ### expect +1 ### end ### grep_line_buffered diff --git a/specs/009-implementation-status.md b/specs/009-implementation-status.md index 605a4680..851c83c0 100644 --- a/specs/009-implementation-status.md +++ b/specs/009-implementation-status.md @@ -107,17 +107,17 @@ Bashkit implements IEEE 1003.1-2024 Shell Command Language. See ## Spec Test Coverage -**Total spec test cases:** 1042 +**Total spec test cases:** 1043 | Category | Cases | In CI | Pass | Skip | Notes | |----------|-------|-------|------|------|-------| | Bash (core) | 640 | Yes | 592 | 48 | `bash_spec_tests` in CI | | AWK | 90 | Yes | 73 | 17 | loops, arrays, -v, ternary, field assign | -| Grep | 81 | Yes | 76 | 5 | now with -z, -r, -a, -b, -H, -h, -f, -P | +| Grep | 82 | Yes | 79 | 3 | now with -z, -r, -a, -b, -H, -h, -f, -P, --include, --exclude | | Sed | 65 | Yes | 53 | 12 | hold space, change, regex ranges, -E | | JQ | 108 | Yes | 100 | 8 | reduce, walk, regex funcs, --arg/--argjson, combined flags | | Python | 58 | Yes | 50 | 8 | **Experimental.** VFS bridging, pathlib, env vars | -| **Total** | **1042** | **Yes** | **944** | **98** | | +| **Total** | **1043** | **Yes** | **948** | **95** | | ### Bash Spec Tests Breakdown @@ -277,19 +277,19 @@ Features that may be added in the future (not intentionally excluded): ### Grep Limitations -**Skipped Tests (5):** +**Skipped Tests (3):** | Feature | Count | Notes | |---------|-------|-------| | Recursive test | 1 | Test needs VFS setup with files | | Pattern file `-f` | 1 | Requires file redirection support | -| Include/exclude | 2 | `--include`, `--exclude` patterns | | Binary detection | 1 | Auto-detect binary files | **Implemented Features:** - Basic flags: `-i`, `-v`, `-c`, `-n`, `-o`, `-l`, `-w`, `-E`, `-F`, `-q`, `-m`, `-x` - Context: `-A`, `-B`, `-C` (after/before/context lines) - Multiple patterns: `-e` +- Include/exclude: `--include=GLOB`, `--exclude=GLOB` for recursive search - Pattern file: `-f` (requires file to exist in VFS) - Filename control: `-H` (always show), `-h` (never show) - Byte offset: `-b`