11import copy
2+ from collections import defaultdict
3+ from concurrent .futures import as_completed
4+ from concurrent .futures import ThreadPoolExecutor
25from typing import List
36from typing import Optional
4- from collections import defaultdict
57
68import superannotate .lib .core as constants
9+ from lib .app .helpers import extract_project_folder
710from lib .core .conditions import Condition
811from lib .core .conditions import CONDITION_EQ as EQ
912from lib .core .entities import AttachmentEntity
2427from lib .core .serviceproviders import SuperannotateServiceProvider
2528from lib .core .usecases .base import BaseReportableUseCase
2629from lib .core .usecases .base import BaseUseCase
27-
30+ from lib . core . usecases . folders import SearchFoldersUseCase
2831from superannotate .logger import get_default_logger
2932
3033logger = get_default_logger ()
@@ -812,33 +815,44 @@ def execute(self):
812815
813816 return self ._response
814817
818+
815819class AddItemsToSubsetUseCase (BaseUseCase ):
816820 CHUNK_SIZE = 5000
817821
818- def __init__ (self , reporter , project , subset_name , items ):
822+ def __init__ (
823+ self ,
824+ reporter ,
825+ project ,
826+ subset_name ,
827+ items ,
828+ backend_client ,
829+ folder_repo ,
830+ root_folder ,
831+ ):
819832 self .reporter = reporter
820833 self .project = project
821834 self .subset_name = subset_name
822- self .items = items
823- self .results = {
824- "failed" :[],
825- "skipped" :[],
826- "succeded" : []
827- }
835+ self .items = items
836+ self .results = {"failed" : [], "skipped" : [], "succeeded" : []}
828837 self .item_ids = []
829- self .path_separated = defaultdict (list )
838+ self .path_separated = defaultdict (dict )
839+ self ._backend_client = backend_client
840+ self .folder_repository = folder_repo
841+ self .root_folder = root_folder
842+ super ().__init__ ()
830843
831- def __filter_duplicates ():
844+ def __filter_duplicates (
845+ self ,
846+ ):
832847 def uniqueQ (item , seen ):
833848 result = True
834849 if "id" in item :
835850 if item ["id" ] in seen :
836851 result = False
837852 else :
838853 seen .add (item ["id" ])
839-
840854 if "name" in item and "path" in item :
841- unique_str = f"{ item ['path' ]} /item['name']"
855+ unique_str = f"{ item ['path' ]} /{ item ['name' ]} "
842856 if unique_str in seen :
843857 result = False
844858 else :
@@ -849,42 +863,136 @@ def uniqueQ(item, seen):
849863 uniques = [x for x in self .items if uniqueQ (x , seen )]
850864 return uniques
851865
852- def __filter_invalid_items ():
866+ def __filter_invalid_items (
867+ self ,
868+ ):
853869 def validQ (item ):
854- if "id" in items :
870+ if "id" in item :
855871 return True
856872 if "name" in item and "path" in item :
857873 return True
858- self .results [' skipped' ].append (item )
874+ self .results [" skipped" ].append (item )
859875 return False
860876
861877 filtered_items = [x for x in self .items if validQ (x )]
862878
863-
864879 return filtered_items
865880
866- def __separate_to_paths ():
881+ def __separate_to_paths (
882+ self ,
883+ ):
867884 for item in self .items :
868- if 'id' in item :
869- self .item_ids .append (item ['id' ])
885+ if "id" in item :
886+ self .item_ids .append (item ["id" ])
870887 else :
871- self .path_separated [item ['path' ]].append (item ["name" ])
888+ if "items" not in self .path_separated [item ["path" ]]:
889+ self .path_separated [item ["path" ]]["items" ] = []
890+
891+ self .path_separated [item ["path" ]]["items" ].append (item )
892+
893+ # Removing paths that have incorrect folders in them
894+ # And adding their items to "skipped list" and removing it from self.path_separated
895+ # so that we don't query them later.
896+ # Otherwise include folder in path object in order to later run a query
897+
898+ removeables = []
899+ for path , value in self .path_separated .items ():
900+
901+ project , folder = extract_project_folder (path )
902+
903+ if project != self .project .name :
904+ removeables .append (path )
905+ continue
872906
907+ # If no folder was provided in the path use "root"
908+ # Problems with folders name 'root' are going to arise
873909
874- def __query (path , item_names ):
910+ if not folder :
911+ value ["folder" ] = self .root_folder
912+ continue
913+ folder_found = False
914+ try :
915+ folder_candidates = SearchFoldersUseCase (
916+ project = self .project ,
917+ folder_name = folder ,
918+ folders = self .folder_repository ,
919+ condition = Condition .get_empty_condition (),
920+ ).execute ()
921+
922+ for f in folder_candidates .data :
923+ if f .name == folder :
924+ value ["folder" ] = f
925+ folder_found = True
926+ break
927+ # If the folder did not exist add to skipped
928+ if not folder_found :
929+ removeables .append (path )
930+
931+ except Exception as e :
932+ removeables .append (path )
933+
934+ ## Removing completely incorrect paths and their items
935+ for item in removeables :
936+ self .results ["skipped" ].extend (self .path_separated [item ]["items" ])
937+ self .path_separated .pop (item )
938+
939+ def __build_query_string (self , path , item_names ):
940+ _ , folder = extract_project_folder (path )
941+ if not folder :
942+ folder = "root"
943+ query_str = f"metadata(name IN { str (item_names )} ) AND folder={ folder } "
944+
945+ return query_str
946+
947+ def __query (self , path , items ):
875948 folder = None
949+ _ , folder = extract_project_folder (path )
950+
951+ item_names = [item ["name" ] for item in items ["items" ]]
952+ query = self .__build_query_string (path , item_names )
876953 query_use_case = QueryEntitiesUseCase (
877- reporter = self .reporter ,
878- project = self .project ,
879- folder = folder ,
880- backend_service_provider = self ._backend_client ,
881- query = query
954+ reporter = self .reporter ,
955+ project = self .project ,
956+ backend_service_provider = self ._backend_client ,
957+ query = query ,
958+ folder = items ["folder" ],
959+ subset = None ,
882960 )
883961
884- queried_items = query_use_case .execute ()
885- return [x ["id" ] for x in queried_items ]
962+ try :
963+ queried_items = query_use_case .execute ().data
964+ except Exception as e :
965+ # If we failed the query for whatever reason
966+ # Add all items of the folder to skipped
967+ self .results ["skipped" ].extend (items ["items" ])
968+
969+ # Adding the images missing from specified folder to 'skipped'
970+ tmp = {item ["name" ]: item for item in items ["items" ]}
971+ tmp_q = (x .name for x in queried_items )
972+
973+ for i , val in tmp .items ():
974+ if i not in tmp_q :
975+ self .results ["skipped" ].append (val )
976+
977+ # Adding ids to path_separated to later see if they've succeded
886978
887- def validate_items ():
979+ self .path_separated [path ] = [
980+ {"id" : x .id , "name" : x .name , "path" : x .path } for x in queried_items
981+ ]
982+ return [x .id for x in queried_items ]
983+
984+ def __distribute_to_results (self , item_id , response , item ):
985+
986+ if item_id in response .data ["success" ]:
987+ self .results ["succeeded" ].append (item )
988+ elif item_id in response .data ["skipped" ]:
989+ self .results ["skipped" ].append (item )
990+ else :
991+ self .results ["failed" ].append (item )
992+
993+ def validate_items (
994+ self ,
995+ ):
888996 filtered_items = self .__filter_duplicates ()
889997 if len (filtered_items ) != len (self .items ):
890998 self .reporter .log_info (
@@ -894,30 +1002,77 @@ def validate_items():
8941002 self .items = self .__filter_invalid_items ()
8951003 self .__separate_to_paths ()
8961004
897- def execute ():
1005+ def execute (
1006+ self ,
1007+ ):
8981008 if self .is_valid ():
8991009
9001010 futures = []
9011011 with ThreadPoolExecutor (max_workers = 4 ) as executor :
902- for path , item_names in self .path_separated .items ():
903- future = executor .submit (self .__query , path , item_names )
1012+ for path , items in self .path_separated .items ():
1013+ future = executor .submit (self .__query , path , items )
9041014 futures .append (future )
9051015
9061016 for future in as_completed (futures ):
907- ids = future .result
908- self .item_ids .exend (ids )
1017+ ids = future .result ()
1018+ self .item_ids .extend (ids )
1019+
1020+ subset = self ._backend_client .get_subset (
1021+ self .project .team_id , self .project .id , self .subset_name
1022+ )
9091023
1024+ if not subset :
1025+ subset = self ._backend_client .create_subset (
1026+ self .project .team_id ,
1027+ self .project .id ,
1028+ self .subset_name ,
1029+ self .reporter ,
1030+ )[0 ]
9101031
1032+ subset_id = subset ["id" ]
1033+ response = None
9111034 for i in range (0 , len (self .item_ids ), self .CHUNK_SIZE ):
912- response = self ._backend_client .attach_items_to_subset (
913- project = self .project .id ,
914- team_id = self .project .team ,
915- item_ids = self .item_ids [i :i + self .CHUNK_SIZE ]
1035+ tmp_response = self ._backend_client .add_items_to_subset (
1036+ project_id = self .project .id ,
1037+ team_id = self .project .team_id ,
1038+ item_ids = self .item_ids [i : i + self .CHUNK_SIZE ],
1039+ subset_id = subset_id ,
9161040 )
9171041
918- if response . ok :
919- ...
1042+ if not response :
1043+ response = tmp_response
9201044 else :
921- ...
1045+ response .data ["failed" ] = response .data ["failed" ].union (
1046+ tmp_response .data ["failed" ]
1047+ )
1048+ response .data ["skipped" ] = response .data ["skipped" ].union (
1049+ tmp_response .data ["skipped" ]
1050+ )
1051+ response .data ["success" ] = response .data ["success" ].union (
1052+ tmp_response .data ["success" ]
1053+ )
1054+
1055+ # Iterating over all path_separated (that now have ids in them and sorting them into
1056+ # "success", "failed" and "skipped")
1057+
1058+ for path , value in self .path_separated .items ():
1059+ for item in value :
1060+ item_id = item .pop (
1061+ "id"
1062+ ) # Need to remove it, since its added artificially
1063+ self .__distribute_to_results (item_id , response , item )
1064+
1065+ for item in self .items :
1066+ if "id" not in item :
1067+ continue
1068+ item_id = item [
1069+ "id"
1070+ ] # No need to remove id, since it was supplied by the user
9221071
1072+ self .__distribute_to_results (item_id , response , item )
9231073
1074+ self ._response .data = self .results
1075+ # The function should either return something or raise an exception prior to
1076+ # returning control to the interface function that called it. So no need for
1077+ # error handling in the response
1078+ return self ._response
0 commit comments