Skip to content

Commit 8d8134c

Browse files
Debug: glob tool fails when pattern contains path (#718)
Co-authored-by: openhands <openhands@all-hands.dev>
1 parent 74cc24f commit 8d8134c

File tree

3 files changed

+273
-126
lines changed

3 files changed

+273
-126
lines changed

openhands-tools/openhands/tools/glob/impl.py

Lines changed: 119 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -44,23 +44,38 @@ def __call__(self, action: GlobAction) -> GlobObservation:
4444
GlobObservation with matching files or error information
4545
"""
4646
try:
47-
# Determine search path
47+
original_pattern = action.pattern # Store original pattern for observation
48+
4849
if action.path:
4950
search_path = Path(action.path).resolve()
50-
if not search_path.is_dir():
51-
return GlobObservation(
52-
files=[],
53-
pattern=action.pattern,
54-
search_path=str(search_path),
55-
error=f"Search path '{action.path}' is not a valid directory",
56-
)
51+
pattern = action.pattern
5752
else:
58-
search_path = self.working_dir
53+
extracted_path, pattern = self._extract_search_path_from_pattern(
54+
action.pattern
55+
)
56+
search_path = (
57+
extracted_path if extracted_path is not None else self.working_dir
58+
)
59+
60+
if not search_path.is_dir():
61+
return GlobObservation(
62+
files=[],
63+
pattern=original_pattern,
64+
search_path=str(search_path),
65+
error=f"Search path '{search_path}' is not a valid directory",
66+
)
5967

6068
if self._ripgrep_available:
61-
return self._execute_with_ripgrep(action, search_path)
69+
files, truncated = self._execute_with_ripgrep(pattern, search_path)
6270
else:
63-
return self._execute_with_glob(action, search_path)
71+
files, truncated = self._execute_with_glob(pattern, search_path)
72+
73+
return GlobObservation(
74+
files=files,
75+
pattern=original_pattern,
76+
search_path=str(search_path),
77+
truncated=truncated,
78+
)
6479

6580
except Exception as e:
6681
# Determine search path for error reporting
@@ -80,16 +95,25 @@ def __call__(self, action: GlobAction) -> GlobObservation:
8095
)
8196

8297
def _execute_with_ripgrep(
83-
self, action: GlobAction, search_path: Path
84-
) -> GlobObservation:
85-
"""Execute glob pattern matching using ripgrep."""
98+
self, pattern: str, search_path: Path
99+
) -> tuple[list[str], bool]:
100+
"""Execute glob pattern matching using ripgrep.
101+
102+
Args:
103+
pattern: The glob pattern to match
104+
search_path: The directory to search in
105+
106+
Returns:
107+
Tuple of (file_paths, truncated) where file_paths is a list of matching files
108+
and truncated is True if results were limited to 100 files
109+
""" # noqa: E501
86110
# Build ripgrep command: rg --files {path} -g {pattern} --sortr=modified
87111
cmd = [
88112
"rg",
89113
"--files",
90114
str(search_path),
91115
"-g",
92-
action.pattern,
116+
pattern,
93117
"--sortr=modified",
94118
]
95119

@@ -110,25 +134,28 @@ def _execute_with_ripgrep(
110134

111135
truncated = len(file_paths) >= 100
112136

113-
return GlobObservation(
114-
files=file_paths,
115-
pattern=action.pattern,
116-
search_path=str(search_path),
117-
truncated=truncated,
118-
)
137+
return file_paths, truncated
119138

120139
def _execute_with_glob(
121-
self, action: GlobAction, search_path: Path
122-
) -> GlobObservation:
123-
"""Execute glob pattern matching using Python's glob module."""
140+
self, pattern: str, search_path: Path
141+
) -> tuple[list[str], bool]:
142+
"""Execute glob pattern matching using Python's glob module.
143+
144+
Args:
145+
pattern: The glob pattern to match
146+
search_path: The directory to search in
147+
148+
Returns:
149+
Tuple of (file_paths, truncated) where file_paths is a list of matching files
150+
and truncated is True if results were limited to 100 files
151+
""" # noqa: E501
124152
# Change to search directory for glob to work correctly
125153
original_cwd = os.getcwd()
126154
try:
127155
os.chdir(search_path)
128156

129157
# Ripgrep's -g flag is always recursive, so we need to make the pattern
130158
# recursive if it doesn't already contain **
131-
pattern = action.pattern
132159
if "**" not in pattern:
133160
# Convert non-recursive patterns like "*.py" to "**/*.py"
134161
# to match ripgrep's recursive behavior
@@ -137,8 +164,8 @@ def _execute_with_glob(
137164
# Use glob to find matching files
138165
matches = glob_module.glob(pattern, recursive=True)
139166

140-
# Convert to absolute paths (without resolving symlinks) a
141-
# nd sort by modification time
167+
# Convert to absolute paths (without resolving symlinks)
168+
# and sort by modification time
142169
file_paths = []
143170
for match in matches:
144171
# Use absolute() instead of resolve() to avoid resolving symlinks
@@ -152,11 +179,70 @@ def _execute_with_glob(
152179

153180
truncated = len(file_paths) > 100
154181

155-
return GlobObservation(
156-
files=sorted_files,
157-
pattern=action.pattern,
158-
search_path=str(search_path),
159-
truncated=truncated,
160-
)
182+
return sorted_files, truncated
161183
finally:
162184
os.chdir(original_cwd)
185+
186+
@staticmethod
187+
def _extract_search_path_from_pattern(pattern: str) -> tuple[Path | None, str]:
188+
"""Extract search path and relative pattern from an absolute path pattern.
189+
190+
This is needed because agents may send absolute path patterns like
191+
"/path/to/dir/**/*.py", but ripgrep's -g flag expects a search directory
192+
and a relative pattern separately. This function splits the absolute pattern
193+
into these two components.
194+
195+
For relative patterns, returns (None, pattern) to indicate the caller should
196+
use its default working directory.
197+
198+
Args:
199+
pattern: The glob pattern (may be absolute or relative)
200+
201+
Returns:
202+
Tuple of (search_path, adjusted_pattern) where:
203+
- search_path: The directory to search in (None for relative patterns)
204+
- adjusted_pattern: The pattern relative to search_path
205+
206+
Examples:
207+
>>> _extract_search_path_from_pattern("/path/to/dir/**/*.py")
208+
(Path("/path/to/dir"), "**/*.py")
209+
210+
>>> _extract_search_path_from_pattern("/path/to/*.py")
211+
(Path("/path/to"), "*.py")
212+
213+
>>> _extract_search_path_from_pattern("**/*.py")
214+
(None, "**/*.py")
215+
"""
216+
if not pattern:
217+
return None, "**/*"
218+
219+
# Expand ~ for user home directory
220+
pattern = os.path.expanduser(pattern)
221+
222+
# Check if pattern is an absolute path
223+
if not pattern.startswith("/"):
224+
# Relative pattern - caller should use default working directory
225+
return None, pattern
226+
227+
# Absolute path pattern - extract the base path
228+
path_obj = Path(pattern)
229+
parts = path_obj.parts
230+
231+
# Find where the glob characters start using glob.has_magic()
232+
search_parts = []
233+
for part in parts:
234+
if glob_module.has_magic(part):
235+
break
236+
search_parts.append(part)
237+
238+
if not search_parts:
239+
# Pattern starts with glob at root (e.g., "/*/*.py")
240+
search_path = Path("/")
241+
adjusted_pattern = pattern.lstrip("/")
242+
else:
243+
search_path = Path(*search_parts)
244+
# Get the remaining parts as the pattern
245+
remaining = parts[len(search_parts) :]
246+
adjusted_pattern = str(Path(*remaining)) if remaining else "**/*"
247+
248+
return search_path.resolve(), adjusted_pattern

0 commit comments

Comments
 (0)