Skip to content

Commit d4cfb5e

Browse files
committed
finishing up with add_items_to_subset
1 parent b299d21 commit d4cfb5e

File tree

4 files changed

+286
-53
lines changed

4 files changed

+286
-53
lines changed

src/superannotate/lib/app/interface/sdk_interface.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3046,7 +3046,7 @@ def delete_custom_values(
30463046
raise AppException(response.errors)
30473047

30483048
def add_items_to_subset(
3049-
project: NotEmptyStr, subset: NotEmptyStr, items: List[dict]
3049+
self, project: NotEmptyStr, subset: NotEmptyStr, items: List[dict]
30503050
):
30513051
"""
30523052
@@ -3109,11 +3109,9 @@ def add_items_to_subset(
31093109

31103110
project_name, _ = extract_project_folder(project)
31113111

3112-
response = self.controller.add_items_to_subset(
3113-
project,
3114-
subset,
3115-
items
3116-
)
3112+
response = self.controller.add_items_to_subset(project_name, subset, items)
31173113

31183114
if response.errors:
31193115
raise AppException(response.errors)
3116+
3117+
return response.data

src/superannotate/lib/core/usecases/items.py

Lines changed: 197 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,12 @@
11
import copy
2+
from collections import defaultdict
3+
from concurrent.futures import as_completed
4+
from concurrent.futures import ThreadPoolExecutor
25
from typing import List
36
from typing import Optional
4-
from collections import defaultdict
57

68
import superannotate.lib.core as constants
9+
from lib.app.helpers import extract_project_folder
710
from lib.core.conditions import Condition
811
from lib.core.conditions import CONDITION_EQ as EQ
912
from lib.core.entities import AttachmentEntity
@@ -24,7 +27,7 @@
2427
from lib.core.serviceproviders import SuperannotateServiceProvider
2528
from lib.core.usecases.base import BaseReportableUseCase
2629
from lib.core.usecases.base import BaseUseCase
27-
30+
from lib.core.usecases.folders import SearchFoldersUseCase
2831
from superannotate.logger import get_default_logger
2932

3033
logger = get_default_logger()
@@ -812,33 +815,44 @@ def execute(self):
812815

813816
return self._response
814817

818+
815819
class AddItemsToSubsetUseCase(BaseUseCase):
816820
CHUNK_SIZE = 5000
817821

818-
def __init__(self, reporter, project, subset_name, items ):
822+
def __init__(
823+
self,
824+
reporter,
825+
project,
826+
subset_name,
827+
items,
828+
backend_client,
829+
folder_repo,
830+
root_folder,
831+
):
819832
self.reporter = reporter
820833
self.project = project
821834
self.subset_name = subset_name
822-
self.items=items
823-
self.results = {
824-
"failed":[],
825-
"skipped":[],
826-
"succeded": []
827-
}
835+
self.items = items
836+
self.results = {"failed": [], "skipped": [], "succeeded": []}
828837
self.item_ids = []
829-
self.path_separated = defaultdict(list)
838+
self.path_separated = defaultdict(dict)
839+
self._backend_client = backend_client
840+
self.folder_repository = folder_repo
841+
self.root_folder = root_folder
842+
super().__init__()
830843

831-
def __filter_duplicates():
844+
def __filter_duplicates(
845+
self,
846+
):
832847
def uniqueQ(item, seen):
833848
result = True
834849
if "id" in item:
835850
if item["id"] in seen:
836851
result = False
837852
else:
838853
seen.add(item["id"])
839-
840854
if "name" in item and "path" in item:
841-
unique_str = f"{item['path']}/item['name']"
855+
unique_str = f"{item['path']}/{item['name']}"
842856
if unique_str in seen:
843857
result = False
844858
else:
@@ -849,42 +863,136 @@ def uniqueQ(item, seen):
849863
uniques = [x for x in self.items if uniqueQ(x, seen)]
850864
return uniques
851865

852-
def __filter_invalid_items():
866+
def __filter_invalid_items(
867+
self,
868+
):
853869
def validQ(item):
854-
if "id" in items:
870+
if "id" in item:
855871
return True
856872
if "name" in item and "path" in item:
857873
return True
858-
self.results['skipped'].append(item)
874+
self.results["skipped"].append(item)
859875
return False
860876

861877
filtered_items = [x for x in self.items if validQ(x)]
862878

863-
864879
return filtered_items
865880

866-
def __separate_to_paths():
881+
def __separate_to_paths(
882+
self,
883+
):
867884
for item in self.items:
868-
if 'id' in item:
869-
self.item_ids.append(item['id'])
885+
if "id" in item:
886+
self.item_ids.append(item["id"])
870887
else:
871-
self.path_separated[item['path']].append(item["name"])
888+
if "items" not in self.path_separated[item["path"]]:
889+
self.path_separated[item["path"]]["items"] = []
890+
891+
self.path_separated[item["path"]]["items"].append(item)
892+
893+
# Removing paths that have incorrect folders in them
894+
# And adding their items to "skipped list" and removing it from self.path_separated
895+
# so that we don't query them later.
896+
# Otherwise include folder in path object in order to later run a query
897+
898+
removeables = []
899+
for path, value in self.path_separated.items():
900+
901+
project, folder = extract_project_folder(path)
902+
903+
if project != self.project.name:
904+
removeables.append(path)
905+
continue
872906

907+
# If no folder was provided in the path use "root"
908+
# Problems with folders name 'root' are going to arise
873909

874-
def __query(path, item_names):
910+
if not folder:
911+
value["folder"] = self.root_folder
912+
continue
913+
folder_found = False
914+
try:
915+
folder_candidates = SearchFoldersUseCase(
916+
project=self.project,
917+
folder_name=folder,
918+
folders=self.folder_repository,
919+
condition=Condition.get_empty_condition(),
920+
).execute()
921+
922+
for f in folder_candidates.data:
923+
if f.name == folder:
924+
value["folder"] = f
925+
folder_found = True
926+
break
927+
# If the folder did not exist add to skipped
928+
if not folder_found:
929+
removeables.append(path)
930+
931+
except Exception as e:
932+
removeables.append(path)
933+
934+
## Removing completely incorrect paths and their items
935+
for item in removeables:
936+
self.results["skipped"].extend(self.path_separated[item]["items"])
937+
self.path_separated.pop(item)
938+
939+
def __build_query_string(self, path, item_names):
940+
_, folder = extract_project_folder(path)
941+
if not folder:
942+
folder = "root"
943+
query_str = f"metadata(name IN {str(item_names)}) AND folder={folder}"
944+
945+
return query_str
946+
947+
def __query(self, path, items):
875948
folder = None
949+
_, folder = extract_project_folder(path)
950+
951+
item_names = [item["name"] for item in items["items"]]
952+
query = self.__build_query_string(path, item_names)
876953
query_use_case = QueryEntitiesUseCase(
877-
reporter = self.reporter,
878-
project = self.project,
879-
folder = folder,
880-
backend_service_provider = self._backend_client,
881-
query = query
954+
reporter=self.reporter,
955+
project=self.project,
956+
backend_service_provider=self._backend_client,
957+
query=query,
958+
folder=items["folder"],
959+
subset=None,
882960
)
883961

884-
queried_items = query_use_case.execute()
885-
return [x["id"] for x in queried_items]
962+
try:
963+
queried_items = query_use_case.execute().data
964+
except Exception as e:
965+
# If we failed the query for whatever reason
966+
# Add all items of the folder to skipped
967+
self.results["skipped"].extend(items["items"])
968+
969+
# Adding the images missing from specified folder to 'skipped'
970+
tmp = {item["name"]: item for item in items["items"]}
971+
tmp_q = (x.name for x in queried_items)
972+
973+
for i, val in tmp.items():
974+
if i not in tmp_q:
975+
self.results["skipped"].append(val)
976+
977+
# Adding ids to path_separated to later see if they've succeded
886978

887-
def validate_items():
979+
self.path_separated[path] = [
980+
{"id": x.id, "name": x.name, "path": x.path} for x in queried_items
981+
]
982+
return [x.id for x in queried_items]
983+
984+
def __distribute_to_results(self, item_id, response, item):
985+
986+
if item_id in response.data["success"]:
987+
self.results["succeeded"].append(item)
988+
elif item_id in response.data["skipped"]:
989+
self.results["skipped"].append(item)
990+
else:
991+
self.results["failed"].append(item)
992+
993+
def validate_items(
994+
self,
995+
):
888996
filtered_items = self.__filter_duplicates()
889997
if len(filtered_items) != len(self.items):
890998
self.reporter.log_info(
@@ -894,30 +1002,77 @@ def validate_items():
8941002
self.items = self.__filter_invalid_items()
8951003
self.__separate_to_paths()
8961004

897-
def execute():
1005+
def execute(
1006+
self,
1007+
):
8981008
if self.is_valid():
8991009

9001010
futures = []
9011011
with ThreadPoolExecutor(max_workers=4) as executor:
902-
for path, item_names in self.path_separated.items():
903-
future = executor.submit(self.__query, path, item_names)
1012+
for path, items in self.path_separated.items():
1013+
future = executor.submit(self.__query, path, items)
9041014
futures.append(future)
9051015

9061016
for future in as_completed(futures):
907-
ids = future.result
908-
self.item_ids.exend(ids)
1017+
ids = future.result()
1018+
self.item_ids.extend(ids)
1019+
1020+
subset = self._backend_client.get_subset(
1021+
self.project.team_id, self.project.id, self.subset_name
1022+
)
9091023

1024+
if not subset:
1025+
subset = self._backend_client.create_subset(
1026+
self.project.team_id,
1027+
self.project.id,
1028+
self.subset_name,
1029+
self.reporter,
1030+
)[0]
9101031

1032+
subset_id = subset["id"]
1033+
response = None
9111034
for i in range(0, len(self.item_ids), self.CHUNK_SIZE):
912-
response = self._backend_client.attach_items_to_subset(
913-
project = self.project.id,
914-
team_id = self.project.team,
915-
item_ids = self.item_ids[i:i+self.CHUNK_SIZE]
1035+
tmp_response = self._backend_client.add_items_to_subset(
1036+
project_id=self.project.id,
1037+
team_id=self.project.team_id,
1038+
item_ids=self.item_ids[i : i + self.CHUNK_SIZE],
1039+
subset_id=subset_id,
9161040
)
9171041

918-
if response.ok:
919-
...
1042+
if not response:
1043+
response = tmp_response
9201044
else:
921-
...
1045+
response.data["failed"] = response.data["failed"].union(
1046+
tmp_response.data["failed"]
1047+
)
1048+
response.data["skipped"] = response.data["skipped"].union(
1049+
tmp_response.data["skipped"]
1050+
)
1051+
response.data["success"] = response.data["success"].union(
1052+
tmp_response.data["success"]
1053+
)
1054+
1055+
# Iterating over all path_separated (that now have ids in them and sorting them into
1056+
# "success", "failed" and "skipped")
1057+
1058+
for path, value in self.path_separated.items():
1059+
for item in value:
1060+
item_id = item.pop(
1061+
"id"
1062+
) # Need to remove it, since its added artificially
1063+
self.__distribute_to_results(item_id, response, item)
1064+
1065+
for item in self.items:
1066+
if "id" not in item:
1067+
continue
1068+
item_id = item[
1069+
"id"
1070+
] # No need to remove id, since it was supplied by the user
9221071

1072+
self.__distribute_to_results(item_id, response, item)
9231073

1074+
self._response.data = self.results
1075+
# The function should either return something or raise an exception prior to
1076+
# returning control to the interface function that called it. So no need for
1077+
# error handling in the response
1078+
return self._response

src/superannotate/lib/infrastructure/controller.py

Lines changed: 9 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1599,15 +1599,19 @@ def delete_custom_values(
15991599
)
16001600
return use_case.execute()
16011601

1602-
def add_items_to_subset(project_name: str, subset: str, items: List[dict]):
1602+
def add_items_to_subset(self, project_name: str, subset: str, items: List[dict]):
16031603

16041604
project = self._get_project(project_name)
1605+
root_folder = FolderEntity(uuid=project.id, name="root")
16051606

16061607
use_case = usecases.AddItemsToSubsetUseCase(
1607-
reporter = self.get_default_reporter(),
1608-
project = project,
1609-
subset = subset,
1610-
items = items
1608+
reporter=self.get_default_reporter(),
1609+
project=project,
1610+
subset_name=subset,
1611+
items=items,
1612+
backend_client=self.backend_client,
1613+
folder_repo=self.folders,
1614+
root_folder=root_folder,
16111615
)
16121616

16131617
return use_case.execute()

0 commit comments

Comments
 (0)