Skip to content

Commit 65d5dae

Browse files
committed
Fix download export
1 parent 177adc4 commit 65d5dae

File tree

2 files changed

+74
-85
lines changed

2 files changed

+74
-85
lines changed

src/superannotate/lib/app/interface/sdk_interface.py

Lines changed: 7 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -2153,50 +2153,13 @@ def download_export(
21532153
"""
21542154
project_name, folder_name = extract_project_folder(project)
21552155
export_name = export["name"] if isinstance(export, dict) else export
2156-
2157-
if to_s3_bucket:
2158-
with tempfile.TemporaryDirectory() as tmp:
2159-
response = controller.download_export(
2160-
project_name=project_name,
2161-
export_name=export_name,
2162-
folder_path=tmp,
2163-
extract_zip_contents=extract_zip_contents,
2164-
to_s3_bucket=to_s3_bucket,
2165-
)
2166-
downloaded_folder_path = response.data
2167-
if to_s3_bucket:
2168-
to_s3_bucket = boto3.Session().resource("s3").Bucket(to_s3_bucket)
2169-
files_to_upload = []
2170-
for file in Path(downloaded_folder_path).rglob("*.*"):
2171-
files_to_upload.append(file)
2172-
2173-
def _upload_file_to_s3(to_s3_bucket, path, s3_key) -> None:
2174-
controller.upload_file_to_s3(
2175-
to_s3_bucket=to_s3_bucket, path=path, s3_key=s3_key
2176-
)
2177-
2178-
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
2179-
results = []
2180-
for path in files_to_upload:
2181-
s3_key = f"{folder_path}/{path.name}"
2182-
results.append(
2183-
executor.submit(
2184-
_upload_file_to_s3, to_s3_bucket, str(path), s3_key
2185-
)
2186-
)
2187-
for future in concurrent.futures.as_completed(results):
2188-
future.result()
2189-
logger.info(
2190-
"Exported to AWS %s/%s", to_s3_bucket.name, str(folder_path)
2191-
)
2192-
else:
2193-
controller.download_export(
2194-
project_name=project_name,
2195-
export_name=export_name,
2196-
folder_path=folder_path,
2197-
extract_zip_contents=extract_zip_contents,
2198-
to_s3_bucket=to_s3_bucket,
2199-
)
2156+
controller.download_export(
2157+
project_name=project_name,
2158+
export_name=export_name,
2159+
folder_path=folder_path,
2160+
extract_zip_contents=extract_zip_contents,
2161+
to_s3_bucket=to_s3_bucket,
2162+
)
22002163

22012164

22022165
@Trackable

src/superannotate/lib/core/usecases.py

Lines changed: 67 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import numpy as np
2222
import pandas as pd
2323
import requests
24+
import tempfile
2425
from boto3.exceptions import Boto3Error
2526
from lib.app.analytics.common import aggregate_annotations_as_df
2627
from lib.app.analytics.common import consensus_plot
@@ -3966,51 +3967,76 @@ def validate_project_type(self):
39663967
constances.LIMITED_FUNCTIONS[self._project.project_type]
39673968
)
39683969

3969-
def execute(self):
3970-
if self.is_valid():
3971-
exports = self._service.get_exports(
3972-
team_id=self._project.team_id, project_id=self._project.uuid
3973-
)
3974-
export_id = None
3975-
for export in exports:
3976-
if export["name"] == self._export_name:
3977-
export_id = export["id"]
3978-
break
3979-
if not export_id:
3980-
raise AppException("Export not found.")
3970+
def upload_to_s3_from_folder(self, folder_path: str):
3971+
to_s3_bucket = boto3.Session().resource("s3").Bucket(self._to_s3_bucket)
3972+
files_to_upload = list(Path(folder_path).rglob("*.*"))
39813973

3982-
while True:
3983-
export = self._service.get_export(
3984-
team_id=self._project.team_id,
3985-
project_id=self._project.uuid,
3986-
export_id=export_id,
3974+
def _upload_file_to_s3(_to_s3_bucket, _path, _s3_key) -> None:
3975+
_to_s3_bucket.upload_file(_path, _s3_key)
3976+
3977+
with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor:
3978+
results = []
3979+
for path in files_to_upload:
3980+
s3_key = f"{self._folder_path}/{path.name}"
3981+
results.append(
3982+
executor.submit(
3983+
_upload_file_to_s3, to_s3_bucket, str(path), s3_key
3984+
)
39873985
)
39883986

3989-
if export["status"] == ExportStatus.IN_PROGRESS.value:
3990-
logger.info("Waiting 5 seconds for export to finish on server.")
3991-
time.sleep(5)
3992-
continue
3993-
if export["status"] == ExportStatus.ERROR.value:
3994-
raise AppException("Couldn't download export.")
3995-
pass
3996-
break
3997-
3998-
filename = Path(export["path"]).name
3999-
filepath = Path(self._folder_path) / filename
4000-
with requests.get(export["download"], stream=True) as r:
4001-
r.raise_for_status()
4002-
with open(filepath, "wb") as f:
4003-
for chunk in r.iter_content(chunk_size=8192):
4004-
f.write(chunk)
4005-
if self._extract_zip_contents:
4006-
with zipfile.ZipFile(filepath, "r") as f:
4007-
f.extractall(self._folder_path)
4008-
Path.unlink(filepath)
4009-
logger.info(f"Extracted {filepath} to folder {self._folder_path}")
4010-
else:
4011-
logger.info(f"Downloaded export ID {export['id']} to {filepath}")
3987+
def download_to_local_storage(self, destination: str):
3988+
exports = self._service.get_exports(
3989+
team_id=self._project.team_id, project_id=self._project.uuid
3990+
)
3991+
export = next(filter(lambda i: i["name"] == self._export_name, exports), None)
3992+
export = self._service.get_export(
3993+
team_id=self._project.team_id,
3994+
project_id=self._project.uuid,
3995+
export_id=export["id"],
3996+
)
3997+
if not export:
3998+
raise AppException("Export not found.")
3999+
export_status = export["status"]
4000+
4001+
while export_status != ExportStatus.COMPLETE.value:
4002+
logger.info("Waiting 5 seconds for export to finish on server.")
4003+
time.sleep(5)
4004+
4005+
export = self._service.get_export(
4006+
team_id=self._project.team_id,
4007+
project_id=self._project.uuid,
4008+
export_id=export["id"],
4009+
)
4010+
export_status = export["status"]
4011+
if export_status in (ExportStatus.ERROR.value, ExportStatus.CANCELED.value):
4012+
raise AppException("Couldn't download export.")
4013+
4014+
filename = Path(export["path"]).name
4015+
filepath = Path(destination) / filename
4016+
with requests.get(export["download"], stream=True) as response:
4017+
response.raise_for_status()
4018+
with open(filepath, "wb") as f:
4019+
for chunk in response.iter_content(chunk_size=8192):
4020+
f.write(chunk)
4021+
if self._extract_zip_contents:
4022+
with zipfile.ZipFile(filepath, "r") as f:
4023+
f.extractall(destination)
4024+
Path.unlink(filepath)
4025+
return export["id"], filepath, destination
40124026

4013-
self._response.data = self._folder_path
4027+
def execute(self):
4028+
if self.is_valid():
4029+
if self._to_s3_bucket:
4030+
with tempfile.TemporaryDirectory() as tmp:
4031+
self.download_to_local_storage(tmp)
4032+
self.upload_to_s3_from_folder(tmp)
4033+
logger.info(f"Exported to AWS {self._to_s3_bucket}/{self._folder_path}")
4034+
else:
4035+
export_id, filepath, destination = self.download_to_local_storage(self._folder_path)
4036+
if self._extract_zip_contents:
4037+
logger.info(f"Extracted {filepath} to folder {destination}")
4038+
else:
4039+
logger.info(f"Downloaded export ID {export_id} to {filepath}")
40144040
return self._response
40154041

40164042

0 commit comments

Comments
 (0)