diff --git a/Cargo.lock b/Cargo.lock index e62c9c0..088cdeb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -340,7 +340,7 @@ checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" [[package]] name = "corgea" -version = "1.8.5" +version = "1.8.8" dependencies = [ "chrono", "clap", diff --git a/Cargo.toml b/Cargo.toml index 608ffbd..ebe8577 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "corgea" -version = "1.8.7" +version = "1.8.8" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -42,3 +42,6 @@ urlencoding = "2.1" [target.'cfg(not(target_os = "windows"))'.dependencies] openssl = { version = "0.10", features = ["vendored"] } + +[dev-dependencies] +tempfile = "3.12.0" diff --git a/src/main.rs b/src/main.rs index 5da00f9..8ab5033 100644 --- a/src/main.rs +++ b/src/main.rs @@ -100,6 +100,12 @@ enum Commands { )] target: Option, + #[arg( + long, + help = "Exclude files matching glob patterns from the scan. Accepts comma-separated glob patterns. Examples: 'tests/**', 'src/**/*.test.ts,**/*.spec.js', '*.md'." + )] + exclude: Option, + #[arg( long, help = "The name of the Corgea project. Defaults to git repository name if found, otherwise to the current directory name." @@ -259,7 +265,7 @@ fn main() { } } } - Some(Commands::Scan { scanner , fail_on, fail, only_uncommitted, scan_type, policy, out_format, out_file, target, project_name }) => { + Some(Commands::Scan { scanner , fail_on, fail, only_uncommitted, scan_type, policy, out_format, out_file, target, exclude, project_name }) => { verify_token_and_exit_when_fail(&corgea_config); if let Some(level) = fail_on { if *scanner != Scanner::Blast { @@ -339,10 +345,15 @@ fn main() { eprintln!("\nWarning: you didn't specify an only policy scan, so all other types of scans will run as well."); } } + if exclude.is_some() && *scanner != Scanner::Blast { + eprintln!("exclude is only supported with blast scanner."); + std::process::exit(1); + } + match scanner { Scanner::Snyk => scan::run_snyk(&corgea_config, project_name.clone()), Scanner::Semgrep => scan::run_semgrep(&corgea_config, project_name.clone()), - Scanner::Blast => scanners::blast::run(&corgea_config, fail_on.clone(), fail, only_uncommitted, scan_type.clone(), policy.clone(), out_format.clone(), out_file.clone(), target.clone(), project_name.clone()) + Scanner::Blast => scanners::blast::run(&corgea_config, fail_on.clone(), fail, only_uncommitted, scan_type.clone(), policy.clone(), out_format.clone(), out_file.clone(), target.clone(), exclude.clone(), project_name.clone()) } } Some(Commands::Wait { scan_id }) => { diff --git a/src/scanners/blast.rs b/src/scanners/blast.rs index d530ed8..70416ac 100644 --- a/src/scanners/blast.rs +++ b/src/scanners/blast.rs @@ -21,6 +21,7 @@ pub fn run( out_format: Option, out_file: Option, target: Option, + exclude: Option, project_name: Option, ) { // Validate that only_uncommitted and target are not used together @@ -88,8 +89,12 @@ pub fn run( target.as_deref() }; + if target_str.is_none() && exclude.is_some() { + println!("Excluding files matching: {}", exclude.as_deref().unwrap()); + } + if let Some(target_value) = target_str { - match targets::resolve_targets(target_value) { + match targets::resolve_targets_with_exclude(target_value, exclude.as_deref()) { Ok(result) => { if result.files.is_empty() { *stop_signal.lock().unwrap() = true; @@ -147,7 +152,7 @@ pub fn run( } } - match utils::generic::create_zip_from_target(target_str, &zip_path, None) { + match utils::generic::create_zip_from_target(target_str, &zip_path, None, exclude.as_deref()) { Ok(added_files) => { if added_files.is_empty() { *stop_signal.lock().unwrap() = true; diff --git a/src/targets.rs b/src/targets.rs index 81f2d47..d2cf588 100644 --- a/src/targets.rs +++ b/src/targets.rs @@ -18,7 +18,7 @@ pub struct TargetSegmentResult { pub error: Option, } -pub fn resolve_targets(target_value: &str) -> Result { +pub fn resolve_targets_with_exclude(target_value: &str, exclude: Option<&str>) -> Result { let segments: Vec = target_value .split(',') .map(|s| s.trim().to_string()) @@ -40,6 +40,8 @@ pub fn resolve_targets(target_value: &str) -> Result Result { + if is_excluded_by_glob(&normalized, &repo_root, &exclude_glob_set) { + continue; + } if seen_files.insert(normalized.clone()) { all_files.push(normalized); } @@ -100,6 +105,48 @@ pub fn resolve_targets(target_value: &str) -> Result) -> Result, String> { + let exclude_str = match exclude { + Some(s) if !s.trim().is_empty() => s, + _ => return Ok(None), + }; + + let patterns: Vec<&str> = exclude_str.split(',').map(|s| s.trim()).filter(|s| !s.is_empty()).collect(); + if patterns.is_empty() { + return Ok(None); + } + + let mut builder = GlobSetBuilder::new(); + for pattern in &patterns { + let glob = Glob::new(pattern) + .map_err(|e| format!("Invalid exclude glob pattern '{}': {}", pattern, e))?; + builder.add(glob); + } + let glob_set = builder.build() + .map_err(|e| format!("Failed to build exclude glob set: {}", e))?; + Ok(Some(glob_set)) +} + +fn is_excluded_by_glob(file: &Path, repo_root: &Path, exclude_glob_set: &Option) -> bool { + let glob_set = match exclude_glob_set { + Some(gs) => gs, + None => return false, + }; + + if let Ok(relative) = file.strip_prefix(repo_root) { + return glob_set.is_match(relative); + } + glob_set.is_match(file) +} + +pub fn build_user_exclude_glob_set(exclude: Option<&str>) -> Result, String> { + build_exclude_glob_set(exclude) +} + +pub fn is_file_excluded(file: &Path, base_dir: &Path, exclude_glob_set: &Option) -> bool { + is_excluded_by_glob(file, base_dir, exclude_glob_set) +} + fn resolve_segment(segment: &str, repo_root: &Path) -> Result, String> { if segment == "-" { return read_stdin_files(false); @@ -479,3 +526,117 @@ fn is_git_repo(dir: &Path) -> bool { Repository::discover(dir).is_ok() } +#[cfg(test)] +mod tests { + use super::*; + use std::fs; + + fn setup_test_dir() -> tempfile::TempDir { + let dir = tempfile::tempdir().unwrap(); + let base = dir.path(); + + Repository::init(base).unwrap(); + + fs::create_dir_all(base.join("src")).unwrap(); + fs::create_dir_all(base.join("tests")).unwrap(); + fs::create_dir_all(base.join("docs")).unwrap(); + + fs::write(base.join("src/main.rs"), "fn main() {}").unwrap(); + fs::write(base.join("src/lib.rs"), "pub fn hello() {}").unwrap(); + fs::write(base.join("tests/test_main.rs"), "// test").unwrap(); + fs::write(base.join("docs/readme.md"), "# readme").unwrap(); + fs::write(base.join("config.toml"), "[config]").unwrap(); + + dir + } + + #[test] + fn build_exclude_glob_set_returns_none_for_none() { + let result = build_exclude_glob_set(None).unwrap(); + assert!(result.is_none()); + } + + #[test] + fn build_exclude_glob_set_returns_none_for_empty() { + let result = build_exclude_glob_set(Some("")).unwrap(); + assert!(result.is_none()); + } + + #[test] + fn build_exclude_glob_set_returns_some_for_valid_pattern() { + let result = build_exclude_glob_set(Some("tests/**")).unwrap(); + assert!(result.is_some()); + } + + #[test] + fn build_exclude_glob_set_handles_comma_separated() { + let result = build_exclude_glob_set(Some("tests/**,docs/**")).unwrap(); + assert!(result.is_some()); + let gs = result.unwrap(); + assert!(gs.is_match("tests/foo.rs")); + assert!(gs.is_match("docs/readme.md")); + assert!(!gs.is_match("src/main.rs")); + } + + #[test] + fn build_exclude_glob_set_returns_error_for_invalid() { + let result = build_exclude_glob_set(Some("[invalid")); + assert!(result.is_err()); + } + + #[test] + fn is_excluded_by_glob_matches_relative_path() { + let gs = build_exclude_glob_set(Some("tests/**")).unwrap(); + let repo_root = Path::new("/repo"); + let file = Path::new("/repo/tests/test_main.rs"); + assert!(is_excluded_by_glob(file, repo_root, &gs)); + } + + #[test] + fn is_excluded_by_glob_does_not_match_non_excluded() { + let gs = build_exclude_glob_set(Some("tests/**")).unwrap(); + let repo_root = Path::new("/repo"); + let file = Path::new("/repo/src/main.rs"); + assert!(!is_excluded_by_glob(file, repo_root, &gs)); + } + + #[test] + fn is_excluded_by_glob_returns_false_for_none() { + let gs: Option = None; + let file = Path::new("/repo/tests/test_main.rs"); + assert!(!is_excluded_by_glob(file, Path::new("/repo"), &gs)); + } + + #[test] + fn is_excluded_by_glob_wildcard_extension() { + let gs = build_exclude_glob_set(Some("**/*.md")).unwrap(); + let repo_root = Path::new("/repo"); + assert!(is_excluded_by_glob(Path::new("/repo/docs/readme.md"), repo_root, &gs)); + assert!(!is_excluded_by_glob(Path::new("/repo/src/main.rs"), repo_root, &gs)); + } + + #[test] + fn is_excluded_filters_directory_files_correctly() { + let dir = setup_test_dir(); + let base = dir.path(); + let gs = build_exclude_glob_set(Some("tests/**,**/*.md")).unwrap(); + + assert!(!is_excluded_by_glob(&base.join("src/main.rs"), base, &gs)); + assert!(!is_excluded_by_glob(&base.join("src/lib.rs"), base, &gs)); + assert!(!is_excluded_by_glob(&base.join("config.toml"), base, &gs)); + assert!(is_excluded_by_glob(&base.join("tests/test_main.rs"), base, &gs)); + assert!(is_excluded_by_glob(&base.join("docs/readme.md"), base, &gs)); + } + + #[test] + fn is_excluded_with_none_includes_all() { + let dir = setup_test_dir(); + let base = dir.path(); + let gs: Option = None; + + assert!(!is_excluded_by_glob(&base.join("src/main.rs"), base, &gs)); + assert!(!is_excluded_by_glob(&base.join("tests/test_main.rs"), base, &gs)); + assert!(!is_excluded_by_glob(&base.join("docs/readme.md"), base, &gs)); + } +} + diff --git a/src/utils/generic.rs b/src/utils/generic.rs index 627ddda..80e7ba8 100644 --- a/src/utils/generic.rs +++ b/src/utils/generic.rs @@ -36,10 +36,12 @@ const DEFAULT_EXCLUDE_GLOBS: &[&str] = &[ /// - If `target` is `None`, performs a full repository scan (equivalent to scanning all files). /// - If `target` is `Some(target_str)`, resolves the target using the targets module and creates zip from those files. /// The target string can be a comma-separated list of files, directories, globs, or git selectors. +/// - `user_exclude` is an optional comma-separated list of glob patterns from `--exclude`. pub fn create_zip_from_target>( target: Option<&str>, output_zip: P, exclude_globs: Option<&[&str]>, + user_exclude: Option<&str>, ) -> Result, Box> { let exclude_globs = exclude_globs.unwrap_or(DEFAULT_EXCLUDE_GLOBS); @@ -49,9 +51,12 @@ pub fn create_zip_from_target>( } let glob_set = glob_builder.build()?; + let user_exclude_glob_set = crate::targets::build_user_exclude_glob_set(user_exclude) + .map_err(|e| format!("Failed to build exclude patterns: {}", e))?; + let files_to_zip: Vec<(PathBuf, PathBuf)> = if let Some(target_str) = target { let current_dir = env::current_dir()?; - let result = crate::targets::resolve_targets(target_str) + let result = crate::targets::resolve_targets_with_exclude(target_str, user_exclude) .map_err(|e| format!("Failed to resolve targets: {}", e))?; result.files @@ -81,6 +86,9 @@ pub fn create_zip_from_target>( if path.is_file() || path.is_dir() { let relative_path = path.strip_prefix(directory)?; + if path.is_file() && crate::targets::is_file_excluded(&relative_path.to_path_buf(), Path::new(""), &user_exclude_glob_set) { + continue; + } files.push((path.to_path_buf(), relative_path.to_path_buf())); } }