Skip to content

Commit c235e0b

Browse files
authored
Merge pull request #210 from superannotateai/download_export
Download export
2 parents 061c077 + 5e088ed commit c235e0b

File tree

6 files changed

+106
-71
lines changed

6 files changed

+106
-71
lines changed

pytest.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
minversion = 3.0
33
log_cli=true
44
python_files = test_*.py
5-
addopts = -n32 --dist=loadscope
5+
addopts = -n 32 --dist=loadscope

src/superannotate/lib/app/interface/sdk_interface.py

Lines changed: 14 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
from lib.app.helpers import get_annotation_paths
2929
from lib.app.helpers import get_paths_and_duplicated_from_csv
3030
from lib.app.helpers import reformat_metrics_json
31+
from lib.app.interface.types import AnnotationStatuses
3132
from lib.app.interface.types import AnnotationType
3233
from lib.app.interface.types import NotEmptyStr
3334
from lib.app.interface.types import Status
@@ -1646,7 +1647,7 @@ def upload_images_from_s3_bucket_to_project(
16461647
def prepare_export(
16471648
project: Union[NotEmptyStr, dict],
16481649
folder_names: Optional[List[NotEmptyStr]] = None,
1649-
annotation_statuses: Optional[List[NotEmptyStr]] = None,
1650+
annotation_statuses: Optional[List[AnnotationStatuses]] = None,
16501651
include_fuse: Optional[StrictBool] = False,
16511652
only_pinned=False,
16521653
):
@@ -2152,38 +2153,24 @@ def download_export(
21522153
"""
21532154
project_name, folder_name = extract_project_folder(project)
21542155
export_name = export["name"] if isinstance(export, dict) else export
2155-
response = controller.download_export(
2156+
2157+
use_case = controller.download_export(
21562158
project_name=project_name,
21572159
export_name=export_name,
21582160
folder_path=folder_path,
21592161
extract_zip_contents=extract_zip_contents,
21602162
to_s3_bucket=to_s3_bucket,
21612163
)
2162-
downloaded_folder_path = response.data
2163-
2164-
if to_s3_bucket:
2165-
to_s3_bucket = boto3.Session().resource("s3").Bucket(to_s3_bucket)
2166-
2167-
files_to_upload = []
2168-
for file in Path(downloaded_folder_path).rglob("*.*"):
2169-
files_to_upload.append(file)
2170-
2171-
def _upload_file_to_s3(to_s3_bucket, path, s3_key) -> None:
2172-
controller.upload_file_to_s3(
2173-
to_s3_bucket=to_s3_bucket, path=path, s3_key=s3_key
2174-
)
2175-
2176-
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
2177-
results = []
2178-
for path in files_to_upload:
2179-
s3_key = f"{path.as_posix()}"
2180-
results.append(
2181-
executor.submit(_upload_file_to_s3, to_s3_bucket, str(path), s3_key)
2182-
)
2183-
2184-
for future in concurrent.futures.as_completed(results):
2185-
future.result()
2186-
logger.info("Exported to AWS %s/%s", to_s3_bucket, str(path))
2164+
if use_case.is_valid():
2165+
if to_s3_bucket:
2166+
with tqdm(
2167+
total=use_case.get_upload_files_count(), desc="Uploading"
2168+
) as progress_bar:
2169+
for _ in use_case.execute():
2170+
progress_bar.update(1)
2171+
else:
2172+
for _ in use_case.execute():
2173+
continue
21872174

21882175

21892176
@Trackable

src/superannotate/lib/app/interface/types.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,16 @@ def validate(cls, value: Union[str]) -> Union[str]:
3434
return value
3535

3636

37+
class AnnotationStatuses(StrictStr):
38+
@classmethod
39+
def validate(cls, value: Union[str]) -> Union[str]:
40+
if value.lower() not in AnnotationStatus.values():
41+
raise TypeError(
42+
f"Available annotation_statuses are {', '.join(AnnotationStatus.titles())}. "
43+
)
44+
return value
45+
46+
3747
def to_chunks(t, size=2):
3848
it = iter(t)
3949
return zip(*[it] * size)

src/superannotate/lib/core/enums.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,10 @@ def get_value(cls, name):
2727
def values(cls):
2828
return [enum.name.lower() for enum in list(cls)]
2929

30+
@classmethod
31+
def titles(cls):
32+
return [enum.name for enum in list(cls)]
33+
3034

3135
class ProjectType(BaseTitledEnum):
3236
VECTOR = "Vector", 1

src/superannotate/lib/core/usecases.py

Lines changed: 76 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import logging
66
import os.path
77
import random
8+
import tempfile
89
import time
910
import uuid
1011
import zipfile
@@ -4012,58 +4013,92 @@ def __init__(
40124013
self._folder_path = folder_path
40134014
self._extract_zip_contents = extract_zip_contents
40144015
self._to_s3_bucket = to_s3_bucket
4016+
self._temp_dir = None
40154017

40164018
def validate_project_type(self):
40174019
if self._project.project_type in constances.LIMITED_FUNCTIONS:
40184020
raise AppValidationException(
40194021
constances.LIMITED_FUNCTIONS[self._project.project_type]
40204022
)
40214023

4022-
def execute(self):
4023-
if self.is_valid():
4024-
exports = self._service.get_exports(
4025-
team_id=self._project.team_id, project_id=self._project.uuid
4026-
)
4027-
export_id = None
4028-
for export in exports:
4029-
if export["name"] == self._export_name:
4030-
export_id = export["id"]
4031-
break
4032-
if not export_id:
4033-
raise AppException("Export not found.")
4024+
def upload_to_s3_from_folder(self, folder_path: str):
4025+
to_s3_bucket = boto3.Session().resource("s3").Bucket(self._to_s3_bucket)
4026+
files_to_upload = list(Path(folder_path).rglob("*.*"))
40344027

4035-
while True:
4036-
export = self._service.get_export(
4037-
team_id=self._project.team_id,
4038-
project_id=self._project.uuid,
4039-
export_id=export_id,
4028+
def _upload_file_to_s3(_to_s3_bucket, _path, _s3_key) -> None:
4029+
_to_s3_bucket.upload_file(_path, _s3_key)
4030+
4031+
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
4032+
results = []
4033+
for path in files_to_upload:
4034+
s3_key = f"{self._folder_path}/{path.name}"
4035+
results.append(
4036+
executor.submit(_upload_file_to_s3, to_s3_bucket, str(path), s3_key)
40404037
)
4038+
yield
40414039

4042-
if export["status"] == ExportStatus.IN_PROGRESS.value:
4043-
logger.info("Waiting 5 seconds for export to finish on server.")
4044-
time.sleep(5)
4045-
continue
4046-
if export["status"] == ExportStatus.ERROR.value:
4047-
raise AppException("Couldn't download export.")
4048-
pass
4049-
break
4050-
4051-
filename = Path(export["path"]).name
4052-
filepath = Path(self._folder_path) / filename
4053-
with requests.get(export["download"], stream=True) as r:
4054-
r.raise_for_status()
4055-
with open(filepath, "wb") as f:
4056-
for chunk in r.iter_content(chunk_size=8192):
4057-
f.write(chunk)
4058-
if self._extract_zip_contents:
4059-
with zipfile.ZipFile(filepath, "r") as f:
4060-
f.extractall(self._folder_path)
4061-
Path.unlink(filepath)
4062-
logger.info(f"Extracted {filepath} to folder {self._folder_path}")
4063-
else:
4064-
logger.info(f"Downloaded export ID {export['id']} to {filepath}")
4040+
def download_to_local_storage(self, destination: str):
4041+
exports = self._service.get_exports(
4042+
team_id=self._project.team_id, project_id=self._project.uuid
4043+
)
4044+
export = next(filter(lambda i: i["name"] == self._export_name, exports), None)
4045+
export = self._service.get_export(
4046+
team_id=self._project.team_id,
4047+
project_id=self._project.uuid,
4048+
export_id=export["id"],
4049+
)
4050+
if not export:
4051+
raise AppException("Export not found.")
4052+
export_status = export["status"]
4053+
4054+
while export_status != ExportStatus.COMPLETE.value:
4055+
logger.info("Waiting 5 seconds for export to finish on server.")
4056+
time.sleep(5)
4057+
4058+
export = self._service.get_export(
4059+
team_id=self._project.team_id,
4060+
project_id=self._project.uuid,
4061+
export_id=export["id"],
4062+
)
4063+
export_status = export["status"]
4064+
if export_status in (ExportStatus.ERROR.value, ExportStatus.CANCELED.value):
4065+
raise AppException("Couldn't download export.")
4066+
4067+
filename = Path(export["path"]).name
4068+
filepath = Path(destination) / filename
4069+
with requests.get(export["download"], stream=True) as response:
4070+
response.raise_for_status()
4071+
with open(filepath, "wb") as f:
4072+
for chunk in response.iter_content(chunk_size=8192):
4073+
f.write(chunk)
4074+
if self._extract_zip_contents:
4075+
with zipfile.ZipFile(filepath, "r") as f:
4076+
f.extractall(destination)
4077+
Path.unlink(filepath)
4078+
return export["id"], filepath, destination
4079+
4080+
def get_upload_files_count(self):
4081+
if not self._temp_dir:
4082+
self._temp_dir = tempfile.TemporaryDirectory()
4083+
self.download_to_local_storage(self._temp_dir.name)
4084+
return len(list(Path(self._temp_dir.name).rglob("*.*")))
40654085

4066-
self._response.data = self._folder_path
4086+
def execute(self):
4087+
if self.is_valid():
4088+
if self._to_s3_bucket:
4089+
self.get_upload_files_count()
4090+
yield from self.upload_to_s3_from_folder(self._temp_dir.name)
4091+
logger.info(f"Exported to AWS {self._to_s3_bucket}/{self._folder_path}")
4092+
self._temp_dir.cleanup()
4093+
else:
4094+
export_id, filepath, destination = self.download_to_local_storage(
4095+
self._folder_path
4096+
)
4097+
if self._extract_zip_contents:
4098+
logger.info(f"Extracted {filepath} to folder {destination}")
4099+
else:
4100+
logger.info(f"Downloaded export ID {export_id} to {filepath}")
4101+
yield
40674102
return self._response
40684103

40694104

src/superannotate/lib/infrastructure/controller.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1386,15 +1386,14 @@ def download_export(
13861386
to_s3_bucket: bool,
13871387
):
13881388
project = self._get_project(project_name)
1389-
use_case = usecases.DownloadExportUseCase(
1389+
return usecases.DownloadExportUseCase(
13901390
service=self._backend_client,
13911391
project=project,
13921392
export_name=export_name,
13931393
folder_path=folder_path,
13941394
extract_zip_contents=extract_zip_contents,
13951395
to_s3_bucket=to_s3_bucket,
13961396
)
1397-
return use_case.execute()
13981397

13991398
def download_ml_model(self, model_data: dict, download_path: str):
14001399
model = MLModelEntity(

0 commit comments

Comments
 (0)