Conversation
| info = requests.request(url=asset.api_url, method='get').json() | ||
| url = '' | ||
| for url in info['contentUrl']: | ||
| if url.startswith('https://dandiarchive.s3.amazonaws.com'): |
Check failure
Code scanning / CodeQL
Incomplete URL substring sanitization
| for url in info['contentUrl']: | ||
| if url.startswith('https://dandiarchive.s3.amazonaws.com'): | ||
| break | ||
| if not url.startswith('https://dandiarchive.s3.amazonaws.com'): |
Check failure
Code scanning / CodeQL
Incomplete URL substring sanitization
|
|
||
| def _maybe_to_s3(self, url): | ||
| url = stringify_path(url) | ||
| is_s3 = url.startswith('https://dandiarchive.s3.amazonaws.com') |
Check failure
Code scanning / CodeQL
Incomplete URL substring sanitization
|
Codecov ReportAttention:
Additional details and impacted files@@ Coverage Diff @@
## master #1395 +/- ##
==========================================
- Coverage 88.50% 86.70% -1.81%
==========================================
Files 77 78 +1
Lines 10492 10722 +230
==========================================
+ Hits 9286 9296 +10
- Misses 1206 1426 +220
Flags with carried forward coverage won't be shown. Click here to find out more. ☔ View full report in Codecov by Sentry. |
|
relates to
|
jwodder
left a comment
There was a problem hiding this comment.
Address the lint & type-checking failures.
| """ | ||
| A `fsspec` File System for (remote) DANDI | ||
| """ | ||
| __all__ = ['RemoteDandiFileSystem'] |
There was a problem hiding this comment.
Move this after the imports.
| 'Please install with: pip install fsspec[http]') | ||
| import re | ||
| import requests | ||
| from typing import Optional, Union, Tuple |
There was a problem hiding this comment.
Add from __future__ import annotations to the top of the file so that Optional[T], Union[T, U], and Tuple[T, ...] can be written T | None, T | U, and tuple[T, ...] instead.
| import requests | ||
| from typing import Optional, Union, Tuple | ||
| from urllib.parse import unquote as url_unquote | ||
| from dandi.dandiapi import ( |
There was a problem hiding this comment.
Based on the formatting, it appears that you did not run black et alii via pre-commit. Install pre-commit, run pre-commit install in your clone of this repository, and then run pre-commit run -a to format your PR.
| dandiset: Optional[Union[str, RemoteDandiset]] = None, | ||
| version: Optional[str] = None, | ||
| client: Optional[Union[str, DandiInstance, DandiAPIClient]] = None, | ||
| **http_kwargs): |
There was a problem hiding this comment.
What if a user supplies a RemoteDandiset with one version, a version string with a different version, and a client that doesn't match the RemoteDandiset's client? Such mismatches should either result in immediate errors or else be prevented in the first place via dedicated constructor classmethods.
| return self._dandiset | ||
|
|
||
| @dandiset.setter | ||
| def dandiset(self, x: Optional[RemoteDandiset]): |
There was a problem hiding this comment.
Why would we want to support replacing the dandiset attribute to begin with?
| instance, dandiset, version, *_ = split_dandi_url(url) | ||
| return cls(dandiset, version, instance) | ||
|
|
||
| def get_dandiset( |
There was a problem hiding this comment.
You may want to rewrite this using navigate from dandiarchive.py.
| info = requests.request(url=asset.api_url, method='get').json() | ||
| url = '' | ||
| for url in info['contentUrl']: |
There was a problem hiding this comment.
| info = requests.request(url=asset.api_url, method='get').json() | |
| url = '' | |
| for url in info['contentUrl']: | |
| for url in asset.get_raw_metadata().get('contentUrl', []): |
| info = requests.request(url=asset.api_url, method='get').json() | ||
| url = '' | ||
| for url in info['contentUrl']: | ||
| if url.startswith('https://dandiarchive.s3.amazonaws.com'): |
There was a problem hiding this comment.
Note that dandiarchive.s3.amazonaws.com is only used by the production dandi instance; the staging instance uses dandi-api-staging-dandisets.s3.amazonaws.com.
| break | ||
| if not url.startswith('https://dandiarchive.s3.amazonaws.com'): | ||
| return None | ||
| return url |
There was a problem hiding this comment.
| break | |
| if not url.startswith('https://dandiarchive.s3.amazonaws.com'): | |
| return None | |
| return url | |
| return url | |
| return None |
| return self._httpfs.open(s3url, *args, **kwargs) | ||
|
|
||
|
|
||
| def split_dandi_url(url: str) -> Tuple[str, str, str, str, str]: |
There was a problem hiding this comment.
Replace this with the types from dandiarchive.py.
Hi team!
This PR contains a fsspec file system to navigate files on dandiarchive. Under the hood, it uses things like
get_assets_with_path_prefix,get_assets_by_glob,get_asset_by_pathto figure out files and their directory structure.It also automatically finds the s3 url of any asset.
It allows doing things like:
However,
Here are a few examples that work, assuming
fs = RemoteDandiFileSystem():I understand there may be a bit of work to do before it gets merged, but I hope it can be useful.