@@ -286,36 +286,57 @@ async def get_catalog_collections(
286286 collection_ids = []
287287 if hasattr (catalog , "links" ) and catalog .links :
288288 base_url = str (request .base_url ).rstrip ("/" )
289+ base_path = urlparse (base_url ).path .rstrip ("/" )
290+
289291 for link in catalog .links :
290292 if link .get ("rel" ) in ["child" , "item" ]:
291293 # Extract collection ID from href using proper URL parsing
292294 href = link .get ("href" , "" )
293295 if href :
294296 try :
295297 parsed_url = urlparse (href )
296- path = parsed_url .path
297-
298- # Verify this is our expected URL pattern by checking it starts with base_url
299- # or is a relative path that would resolve to our server
300- full_href = (
301- href
302- if href .startswith (("http://" , "https://" ))
303- else f"{ base_url } { href } "
304- )
305- if not full_href .startswith (base_url ):
306- continue
307-
308- # Look for patterns like /collections/{id} or collections/{id}
309- if "/collections/" in path :
310- # Split by /collections/ and take the last segment
311- path_parts = path .split ("/collections/" )
312- if len (path_parts ) > 1 :
313- collection_id = path_parts [1 ].split ("/" )[0 ]
298+ path = parsed_url .path .rstrip ("/" )
299+
300+ # Resolve relative URLs against base URL
301+ if not href .startswith (("http://" , "https://" )):
302+ full_path = (
303+ f"{ base_path } { path } " if path else base_path
304+ )
305+ else :
306+ # For absolute URLs, ensure they belong to our base domain
307+ if parsed_url .netloc != urlparse (base_url ).netloc :
308+ continue
309+ full_path = path
310+
311+ # Look for collections endpoint at the end of the path
312+ # This prevents false positives when /collections/ appears in base URL
313+ collections_pattern = "/collections/"
314+ if collections_pattern in full_path :
315+ # Find the LAST occurrence of /collections/ to avoid base URL conflicts
316+ last_collections_pos = full_path .rfind (
317+ collections_pattern
318+ )
319+ if last_collections_pos != - 1 :
320+ # Extract everything after the last /collections/
321+ after_collections = full_path [
322+ last_collections_pos
323+ + len (collections_pattern ) :
324+ ]
325+
326+ # Handle cases where there might be additional path segments
327+ # We only want the immediate collection ID
328+ collection_id = (
329+ after_collections .split ("/" )[0 ]
330+ if after_collections
331+ else None
332+ )
333+
314334 if (
315335 collection_id
316336 and collection_id not in collection_ids
317337 ):
318338 collection_ids .append (collection_id )
339+
319340 except Exception :
320341 # If URL parsing fails, skip this link
321342 continue
0 commit comments