Skip to content

Commit 0d859f5

Browse files
chore: delete _is_valid_pattern, InvalidPatternError, and _is_safe_symlink
1 parent 5b61ba1 commit 0d859f5

File tree

3 files changed

+20
-111
lines changed

3 files changed

+20
-111
lines changed

src/gitingest/utils/exceptions.py

Lines changed: 0 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,6 @@
11
"""Custom exceptions for the Gitingest package."""
22

33

4-
class InvalidPatternError(ValueError):
5-
"""Exception raised when a pattern contains invalid characters.
6-
7-
This exception is used to signal that a pattern provided for some operation
8-
contains characters that are not allowed. The valid characters for the pattern
9-
include alphanumeric characters, dash (-), underscore (_), dot (.), forward slash (/),
10-
plus (+), and asterisk (*).
11-
12-
Parameters
13-
----------
14-
pattern : str
15-
The invalid pattern that caused the error.
16-
17-
"""
18-
19-
def __init__(self, pattern: str) -> None:
20-
super().__init__(
21-
f"Pattern '{pattern}' contains invalid characters. Only alphanumeric characters, dash (-), "
22-
"underscore (_), dot (.), forward slash (/), plus (+), and asterisk (*) are allowed.",
23-
)
24-
25-
264
class AsyncTimeoutError(Exception):
275
"""Exception raised when an async operation exceeds its timeout limit.
286

src/gitingest/utils/path_utils.py

Lines changed: 0 additions & 34 deletions
This file was deleted.

src/gitingest/utils/pattern_utils.py

Lines changed: 20 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
from __future__ import annotations
44

55
import re
6+
from typing import Iterable
67

7-
from gitingest.utils.exceptions import InvalidPatternError
88
from gitingest.utils.ignore_patterns import DEFAULT_IGNORE_PATTERNS
99

10+
_PATTERN_SPLIT_RE = re.compile(r"[,\s]+")
11+
1012

1113
def process_patterns(
1214
exclude_patterns: str | set[str] | None = None,
@@ -43,66 +45,29 @@ def process_patterns(
4345
return ignore_patterns_set, parsed_include
4446

4547

46-
def _parse_patterns(pattern: set[str] | str) -> set[str]:
47-
"""Parse and validate file/directory patterns for inclusion or exclusion.
48-
49-
Takes either a single pattern string or set of pattern strings and processes them into a normalized list.
50-
Patterns are split on commas and spaces, validated for allowed characters, and normalized.
48+
def _parse_patterns(patterns: str | Iterable[str]) -> set[str]:
49+
"""Normalize a collection of file or directory patterns.
5150
5251
Parameters
5352
----------
54-
pattern : set[str] | str
55-
Pattern(s) to parse - either a single string or set of strings
53+
patterns : str | Iterable[str]
54+
One pattern string or an iterable of pattern strings. Each pattern may contain multiple comma- or
55+
whitespace-separated sub-patterns, e.g. "src/*, tests *.md".
5656
5757
Returns
5858
-------
5959
set[str]
60-
A set of normalized patterns.
61-
62-
Raises
63-
------
64-
InvalidPatternError
65-
If any pattern contains invalid characters. Only alphanumeric characters,
66-
dash (-), underscore (_), dot (.), forward slash (/), plus (+), and
67-
asterisk (*) are allowed.
68-
69-
"""
70-
patterns = pattern if isinstance(pattern, set) else {pattern}
71-
72-
parsed_patterns: set[str] = set()
73-
for p in patterns:
74-
parsed_patterns = parsed_patterns.union(set(re.split(",| ", p)))
75-
76-
# Remove empty string if present
77-
parsed_patterns = parsed_patterns - {""}
78-
79-
# Normalize Windows paths to Unix-style paths
80-
parsed_patterns = {p.replace("\\", "/") for p in parsed_patterns}
81-
82-
# Validate and normalize each pattern
83-
for p in parsed_patterns:
84-
if not _is_valid_pattern(p):
85-
raise InvalidPatternError(p)
86-
87-
return parsed_patterns
88-
89-
90-
def _is_valid_pattern(pattern: str) -> bool:
91-
"""Validate if the given pattern contains only valid characters.
92-
93-
This function checks if the pattern contains only alphanumeric characters or one
94-
of the following allowed characters: dash ('-'), underscore ('_'), dot ('.'),
95-
forward slash ('/'), plus ('+'), asterisk ('*'), or the at sign ('@').
96-
97-
Parameters
98-
----------
99-
pattern : str
100-
The pattern to validate.
101-
102-
Returns
103-
-------
104-
bool
105-
``True`` if the pattern is valid, otherwise ``False``.
60+
Normalized patterns with Windows back-slashes converted to forward-slashes and duplicates removed.
10661
10762
"""
108-
return all(c.isalnum() or c in "-_./+*@" for c in pattern)
63+
# Treat a lone string as the iterable [string]
64+
if isinstance(patterns, str):
65+
patterns = [patterns]
66+
67+
# Flatten, split on commas/whitespace, strip empties, normalise slashes
68+
return {
69+
part.replace("\\", "/")
70+
for pat in patterns
71+
for part in _PATTERN_SPLIT_RE.split(pat.strip())
72+
if part # discard empty tokens
73+
}

0 commit comments

Comments
 (0)