Skip to content

Commit 641823e

Browse files
committed
add more robust url parsing
1 parent bc18554 commit 641823e

File tree

1 file changed

+39
-18
lines changed
  • stac_fastapi/core/stac_fastapi/core/extensions

1 file changed

+39
-18
lines changed

stac_fastapi/core/stac_fastapi/core/extensions/catalogs.py

Lines changed: 39 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -286,36 +286,57 @@ async def get_catalog_collections(
286286
collection_ids = []
287287
if hasattr(catalog, "links") and catalog.links:
288288
base_url = str(request.base_url).rstrip("/")
289+
base_path = urlparse(base_url).path.rstrip("/")
290+
289291
for link in catalog.links:
290292
if link.get("rel") in ["child", "item"]:
291293
# Extract collection ID from href using proper URL parsing
292294
href = link.get("href", "")
293295
if href:
294296
try:
295297
parsed_url = urlparse(href)
296-
path = parsed_url.path
297-
298-
# Verify this is our expected URL pattern by checking it starts with base_url
299-
# or is a relative path that would resolve to our server
300-
full_href = (
301-
href
302-
if href.startswith(("http://", "https://"))
303-
else f"{base_url}{href}"
304-
)
305-
if not full_href.startswith(base_url):
306-
continue
307-
308-
# Look for patterns like /collections/{id} or collections/{id}
309-
if "/collections/" in path:
310-
# Split by /collections/ and take the last segment
311-
path_parts = path.split("/collections/")
312-
if len(path_parts) > 1:
313-
collection_id = path_parts[1].split("/")[0]
298+
path = parsed_url.path.rstrip("/")
299+
300+
# Resolve relative URLs against base URL
301+
if not href.startswith(("http://", "https://")):
302+
full_path = (
303+
f"{base_path}{path}" if path else base_path
304+
)
305+
else:
306+
# For absolute URLs, ensure they belong to our base domain
307+
if parsed_url.netloc != urlparse(base_url).netloc:
308+
continue
309+
full_path = path
310+
311+
# Look for collections endpoint at the end of the path
312+
# This prevents false positives when /collections/ appears in base URL
313+
collections_pattern = "/collections/"
314+
if collections_pattern in full_path:
315+
# Find the LAST occurrence of /collections/ to avoid base URL conflicts
316+
last_collections_pos = full_path.rfind(
317+
collections_pattern
318+
)
319+
if last_collections_pos != -1:
320+
# Extract everything after the last /collections/
321+
after_collections = full_path[
322+
last_collections_pos
323+
+ len(collections_pattern) :
324+
]
325+
326+
# Handle cases where there might be additional path segments
327+
# We only want the immediate collection ID
328+
collection_id = (
329+
after_collections.split("/")[0]
330+
if after_collections
331+
else None
332+
)
333+
314334
if (
315335
collection_id
316336
and collection_id not in collection_ids
317337
):
318338
collection_ids.append(collection_id)
339+
319340
except Exception:
320341
# If URL parsing fails, skip this link
321342
continue

0 commit comments

Comments
 (0)