Skip to content

Commit ef7debe

Browse files
authored
Add download_large_files in monai/bundle/scripts.py (#6958)
Fixes #6362 . ### Description Add `download_large_files` in `monai/bundle/scripts.py`. Refer to the one in https://github.com/Project-MONAI/model-zoo/blob/131b0da3f540ed13a8a1c02d8395e92a682d47cf/ci/utils.py#L93. ### Types of changes <!--- Put an `x` in all the boxes that apply, and remove the not applicable items --> - [x] Non-breaking change (fix or new feature that would not break existing functionality). - [ ] Breaking change (fix or new feature that would cause existing functionality to change). - [ ] New tests added to cover the changes. - [ ] Integration tests passed locally by running `./runtests.sh -f -u --net --coverage`. - [ ] Quick tests passed locally by running `./runtests.sh --quick --unittests --disttests`. - [ ] In-line docstrings updated. - [ ] Documentation updated, tested `make html` command in the `docs/` folder. --------- Signed-off-by: KumoLiu <yunl@nvidia.com>
1 parent 6f13b8d commit ef7debe

File tree

4 files changed

+77
-1
lines changed

4 files changed

+77
-1
lines changed

monai/bundle/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
ckpt_export,
2020
create_workflow,
2121
download,
22+
download_large_files,
2223
get_all_bundles_list,
2324
get_bundle_info,
2425
get_bundle_versions,

monai/bundle/__main__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
from monai.bundle.scripts import (
1515
ckpt_export,
1616
download,
17+
download_large_files,
1718
init_bundle,
1819
onnx_export,
1920
run,

monai/bundle/scripts.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1619,3 +1619,43 @@ def create_workflow(
16191619
workflow_.initialize()
16201620

16211621
return workflow_
1622+
1623+
1624+
def download_large_files(bundle_path: str | None = None, large_file_name: str | None = None) -> None:
1625+
"""
1626+
This utility allows you to download large files from a bundle. It supports file suffixes like ".yml", ".yaml", and ".json".
1627+
If you don't specify a `large_file_name`, it will automatically search for large files among the supported suffixes.
1628+
1629+
Typical usage examples:
1630+
.. code-block:: bash
1631+
1632+
# Execute this module as a CLI entry to download large files from a bundle path:
1633+
python -m monai.bundle download_large_files --bundle_path <bundle_path>
1634+
1635+
# Execute this module as a CLI entry to download large files from the bundle path with a specified `large_file_name`:
1636+
python -m monai.bundle download_large_files --bundle_path <bundle_path> --large_file_name large_files.yaml
1637+
1638+
Args:
1639+
bundle_path: (Optional) The path to the bundle where the files are located. Default is `os.getcwd()`.
1640+
large_file_name: (Optional) The name of the large file to be downloaded.
1641+
1642+
"""
1643+
bundle_path = os.getcwd() if bundle_path is None else bundle_path
1644+
if large_file_name is None:
1645+
large_file_path = list(Path(bundle_path).glob("large_files*"))
1646+
large_file_path = list(filter(lambda x: x.suffix in [".yml", ".yaml", ".json"], large_file_path))
1647+
if len(large_file_path) == 0:
1648+
raise FileNotFoundError(f"Cannot find the large_files.yml/yaml/json under {bundle_path}.")
1649+
1650+
parser = ConfigParser()
1651+
parser.read_config(large_file_path)
1652+
large_files_list = parser.get()["large_files"]
1653+
for lf_data in large_files_list:
1654+
lf_data["fuzzy"] = True
1655+
if "hash_val" in lf_data and lf_data.get("hash_val", "") == "":
1656+
lf_data.pop("hash_val")
1657+
if "hash_type" in lf_data and lf_data.get("hash_type", "") == "":
1658+
lf_data.pop("hash_type")
1659+
lf_data["filepath"] = os.path.join(bundle_path, lf_data["path"])
1660+
lf_data.pop("path")
1661+
download_url(**lf_data)

tests/test_bundle_download.py

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,13 @@
7171
{"spatial_dims": 3, "out_channels": 5},
7272
]
7373

74+
TEST_CASE_8 = [
75+
["network.json", "test_output.pt", "test_input.pt", "large_files.yaml"],
76+
"test_bundle",
77+
"https://github.com/Project-MONAI/MONAI-extra-test-data/releases/download/0.8.1/test_bundle_v0.1.2.zip",
78+
{"model.pt": "27952767e2e154e3b0ee65defc5aed38", "model.ts": "97746870fe591f69ac09827175b00675"},
79+
]
80+
7481

7582
class TestDownload(unittest.TestCase):
7683
@parameterized.expand([TEST_CASE_1, TEST_CASE_2])
@@ -148,7 +155,6 @@ def test_load_weights(self, bundle_files, bundle_name, repo, device, model_file)
148155
device=device,
149156
return_state_dict=True,
150157
)
151-
152158
# prepare network
153159
with open(os.path.join(tempdir, bundle_name, bundle_files[2])) as f:
154160
net_args = json.load(f)["network_def"]
@@ -275,5 +281,33 @@ def test_load_ts_module(self, bundle_files, bundle_name, version, repo, device,
275281
self.assertTrue("network.json" in extra_file_dict.keys())
276282

277283

284+
class TestDownloadLargefiles(unittest.TestCase):
285+
@parameterized.expand([TEST_CASE_8])
286+
@skip_if_quick
287+
def test_url_download_large_files(self, bundle_files, bundle_name, url, hash_val):
288+
with skip_if_downloading_fails():
289+
# download a single file from url, also use `args_file`
290+
with tempfile.TemporaryDirectory() as tempdir:
291+
def_args = {"name": bundle_name, "bundle_dir": tempdir, "url": ""}
292+
def_args_file = os.path.join(tempdir, "def_args.json")
293+
parser = ConfigParser()
294+
parser.export_config_file(config=def_args, filepath=def_args_file)
295+
cmd = ["coverage", "run", "-m", "monai.bundle", "download", "--args_file", def_args_file]
296+
cmd += ["--url", url, "--source", "github"]
297+
command_line_tests(cmd)
298+
for file in bundle_files:
299+
file_path = os.path.join(tempdir, bundle_name, file)
300+
print(file_path)
301+
self.assertTrue(os.path.exists(file_path))
302+
303+
# download large files
304+
bundle_path = os.path.join(tempdir, bundle_name)
305+
cmd = ["coverage", "run", "-m", "monai.bundle", "download_large_files", "--bundle_path", bundle_path]
306+
command_line_tests(cmd)
307+
for file in ["model.pt", "model.ts"]:
308+
file_path = os.path.join(tempdir, bundle_name, f"models/{file}")
309+
self.assertTrue(check_hash(filepath=file_path, val=hash_val[file]))
310+
311+
278312
if __name__ == "__main__":
279313
unittest.main()

0 commit comments

Comments
 (0)