|
3 | 3 | from __future__ import annotations |
4 | 4 |
|
5 | 5 | import re |
| 6 | +from typing import Iterable |
6 | 7 |
|
7 | | -from gitingest.utils.exceptions import InvalidPatternError |
8 | 8 | from gitingest.utils.ignore_patterns import DEFAULT_IGNORE_PATTERNS |
9 | 9 |
|
| 10 | +_PATTERN_SPLIT_RE = re.compile(r"[,\s]+") |
| 11 | + |
10 | 12 |
|
11 | 13 | def process_patterns( |
12 | 14 | exclude_patterns: str | set[str] | None = None, |
@@ -43,66 +45,29 @@ def process_patterns( |
43 | 45 | return ignore_patterns_set, parsed_include |
44 | 46 |
|
45 | 47 |
|
46 | | -def _parse_patterns(pattern: set[str] | str) -> set[str]: |
47 | | - """Parse and validate file/directory patterns for inclusion or exclusion. |
48 | | -
|
49 | | - Takes either a single pattern string or set of pattern strings and processes them into a normalized list. |
50 | | - Patterns are split on commas and spaces, validated for allowed characters, and normalized. |
| 48 | +def _parse_patterns(patterns: str | Iterable[str]) -> set[str]: |
| 49 | + """Normalize a collection of file or directory patterns. |
51 | 50 |
|
52 | 51 | Parameters |
53 | 52 | ---------- |
54 | | - pattern : set[str] | str |
55 | | - Pattern(s) to parse - either a single string or set of strings |
| 53 | + patterns : str | Iterable[str] |
| 54 | + One pattern string or an iterable of pattern strings. Each pattern may contain multiple comma- or |
| 55 | + whitespace-separated sub-patterns, e.g. "src/*, tests *.md". |
56 | 56 |
|
57 | 57 | Returns |
58 | 58 | ------- |
59 | 59 | set[str] |
60 | | - A set of normalized patterns. |
61 | | -
|
62 | | - Raises |
63 | | - ------ |
64 | | - InvalidPatternError |
65 | | - If any pattern contains invalid characters. Only alphanumeric characters, |
66 | | - dash (-), underscore (_), dot (.), forward slash (/), plus (+), and |
67 | | - asterisk (*) are allowed. |
68 | | -
|
69 | | - """ |
70 | | - patterns = pattern if isinstance(pattern, set) else {pattern} |
71 | | - |
72 | | - parsed_patterns: set[str] = set() |
73 | | - for p in patterns: |
74 | | - parsed_patterns = parsed_patterns.union(set(re.split(",| ", p))) |
75 | | - |
76 | | - # Remove empty string if present |
77 | | - parsed_patterns = parsed_patterns - {""} |
78 | | - |
79 | | - # Normalize Windows paths to Unix-style paths |
80 | | - parsed_patterns = {p.replace("\\", "/") for p in parsed_patterns} |
81 | | - |
82 | | - # Validate and normalize each pattern |
83 | | - for p in parsed_patterns: |
84 | | - if not _is_valid_pattern(p): |
85 | | - raise InvalidPatternError(p) |
86 | | - |
87 | | - return parsed_patterns |
88 | | - |
89 | | - |
90 | | -def _is_valid_pattern(pattern: str) -> bool: |
91 | | - """Validate if the given pattern contains only valid characters. |
92 | | -
|
93 | | - This function checks if the pattern contains only alphanumeric characters or one |
94 | | - of the following allowed characters: dash ('-'), underscore ('_'), dot ('.'), |
95 | | - forward slash ('/'), plus ('+'), asterisk ('*'), or the at sign ('@'). |
96 | | -
|
97 | | - Parameters |
98 | | - ---------- |
99 | | - pattern : str |
100 | | - The pattern to validate. |
101 | | -
|
102 | | - Returns |
103 | | - ------- |
104 | | - bool |
105 | | - ``True`` if the pattern is valid, otherwise ``False``. |
| 60 | + Normalized patterns with Windows back-slashes converted to forward-slashes and duplicates removed. |
106 | 61 |
|
107 | 62 | """ |
108 | | - return all(c.isalnum() or c in "-_./+*@" for c in pattern) |
| 63 | + # Treat a lone string as the iterable [string] |
| 64 | + if isinstance(patterns, str): |
| 65 | + patterns = [patterns] |
| 66 | + |
| 67 | + # Flatten, split on commas/whitespace, strip empties, normalise slashes |
| 68 | + return { |
| 69 | + part.replace("\\", "/") |
| 70 | + for pat in patterns |
| 71 | + for part in _PATTERN_SPLIT_RE.split(pat.strip()) |
| 72 | + if part # discard empty tokens |
| 73 | + } |
0 commit comments