55import os
66import tempfile
77import time
8- import uuid
98from collections import Counter
109from collections import namedtuple
1110from io import BytesIO
1817
1918import boto3
2019import lib .core as constances
21- import pandas as pd
2220import plotly .graph_objects as go
2321from lib .app .annotation_helpers import add_annotation_bbox_to_json
2422from lib .app .annotation_helpers import add_annotation_comment_to_json
3129from lib .app .exceptions import EmptyOutputError
3230from lib .app .helpers import extract_project_folder
3331from lib .app .helpers import get_annotation_paths
32+ from lib .app .helpers import get_paths_and_duplicated_from_csv
3433from lib .app .helpers import reformat_metrics_json
3534from lib .app .interface .types import AnnotationType
3635from lib .app .interface .types import NotEmptyStr
@@ -2287,45 +2286,26 @@ def attach_image_urls_to_project(
22872286 :rtype: tuple
22882287 """
22892288 project_name , folder_name = extract_project_folder (project )
2290- project = controller .get_project_metadata (project_name ).data
2291- if project ["project" ].project_type == constances .ProjectType .VIDEO .value :
2292- raise AppException (
2293- "The function does not support projects containing videos attached with URLs"
2294- )
2295-
2296- image_data = pd .read_csv (attachments , dtype = str )
2297- image_data = image_data [~ image_data ["url" ].isnull ()]
2298- if "name" in image_data .columns :
2299- image_data ["name" ] = (
2300- image_data ["name" ]
2301- .fillna ("" )
2302- .apply (lambda cell : cell if str (cell ).strip () else str (uuid .uuid4 ()))
2303- )
2304- else :
2305- image_data ["name" ] = [str (uuid .uuid4 ()) for _ in range (len (image_data .index ))]
2306-
2307- image_data = pd .DataFrame (image_data , columns = ["name" , "url" ])
2308- img_names_urls = image_data .rename (columns = {"url" : "path" }).to_dict (
2309- orient = "records"
2310- )
2289+ images_to_upload , duplicate_images = get_paths_and_duplicated_from_csv (attachments )
23112290 list_of_not_uploaded = []
2312- duplicate_images = []
2313- for i in range (0 , len (img_names_urls ), 500 ):
2314- response = controller .attach_urls (
2315- project_name = project_name ,
2316- folder_name = folder_name ,
2317- files = ImageSerializer .deserialize (
2318- img_names_urls [i : i + 500 ] # noqa: E203
2319- ),
2320- annotation_status = annotation_status ,
2321- )
2322- if response .errors :
2323- list_of_not_uploaded .append (response .data [0 ])
2324- duplicate_images .append (response .data [1 ])
23252291
2292+ with tqdm (total = len (images_to_upload ), desc = "Attaching urls" ) as progress_bar :
2293+ for i in range (0 , len (images_to_upload ), 500 ):
2294+ response = controller .attach_urls (
2295+ project_name = project_name ,
2296+ folder_name = folder_name ,
2297+ files = ImageSerializer .deserialize (
2298+ images_to_upload [i : i + 500 ] # noqa: E203
2299+ ),
2300+ annotation_status = annotation_status ,
2301+ )
2302+ if response .errors :
2303+ list_of_not_uploaded .append (response .data [0 ])
2304+ duplicate_images .append (response .data [1 ])
2305+ progress_bar .update (len (images_to_upload [i : i + 500 ]))
23262306 list_of_uploaded = [
23272307 image ["name" ]
2328- for image in img_names_urls
2308+ for image in images_to_upload
23292309 if image ["name" ] not in list_of_not_uploaded
23302310 ]
23312311
@@ -2349,43 +2329,26 @@ def attach_video_urls_to_project(
23492329 :rtype: (list, list, list)
23502330 """
23512331 project_name , folder_name = extract_project_folder (project )
2352- project = controller .get_project_metadata (project_name ).data
2353- if project ["project" ].project_type != constances .ProjectType .VIDEO .value :
2354- raise AppException ("The function does not support" )
2355-
2356- image_data = pd .read_csv (attachments , dtype = str )
2357- image_data = image_data [~ image_data ["url" ].isnull ()]
2358- if "name" in image_data .columns :
2359- image_data ["name" ] = (
2360- image_data ["name" ]
2361- .fillna ("" )
2362- .apply (lambda cell : cell if str (cell ).strip () else str (uuid .uuid4 ()))
2363- )
2364- else :
2365- image_data ["name" ] = [str (uuid .uuid4 ()) for _ in range (len (image_data .index ))]
2366-
2367- image_data = pd .DataFrame (image_data , columns = ["name" , "url" ])
2368- img_names_urls = image_data .rename (columns = {"url" : "path" }).to_dict (
2369- orient = "records"
2370- )
2332+ images_to_upload , duplicate_images = get_paths_and_duplicated_from_csv (attachments )
23712333 list_of_not_uploaded = []
2372- duplicate_images = []
2373- for i in range (0 , len (img_names_urls ), 500 ):
2374- response = controller .attach_urls (
2375- project_name = project_name ,
2376- folder_name = folder_name ,
2377- files = ImageSerializer .deserialize (
2378- img_names_urls [i : i + 500 ] # noqa: E203
2379- ),
2380- annotation_status = annotation_status ,
2381- )
2382- if response .errors :
2383- list_of_not_uploaded .append (response .data [0 ])
2384- duplicate_images .append (response .data [1 ])
23852334
2335+ with tqdm (total = len (images_to_upload ), desc = "Attaching urls" ) as progress_bar :
2336+ for i in range (0 , len (images_to_upload ), 500 ):
2337+ response = controller .attach_urls (
2338+ project_name = project_name ,
2339+ folder_name = folder_name ,
2340+ files = ImageSerializer .deserialize (
2341+ images_to_upload [i : i + 500 ] # noqa: E203
2342+ ),
2343+ annotation_status = annotation_status ,
2344+ )
2345+ if response .errors :
2346+ list_of_not_uploaded .append (response .data [0 ])
2347+ duplicate_images .append (response .data [1 ])
2348+ progress_bar .update (len (images_to_upload [i : i + 500 ]))
23862349 list_of_uploaded = [
23872350 image ["name" ]
2388- for image in img_names_urls
2351+ for image in images_to_upload
23892352 if image ["name" ] not in list_of_not_uploaded
23902353 ]
23912354
@@ -3642,40 +3605,26 @@ def attach_document_urls_to_project(
36423605 :rtype: tuple
36433606 """
36443607 project_name , folder_name = extract_project_folder (project )
3645-
3646- image_data = pd .read_csv (attachments , dtype = str )
3647- image_data = image_data [~ image_data ["url" ].isnull ()]
3648- if "name" in image_data .columns :
3649- image_data ["name" ] = (
3650- image_data ["name" ]
3651- .fillna ("" )
3652- .apply (lambda cell : cell if str (cell ).strip () else str (uuid .uuid4 ()))
3653- )
3654- else :
3655- image_data ["name" ] = [str (uuid .uuid4 ()) for _ in range (len (image_data .index ))]
3656-
3657- image_data = pd .DataFrame (image_data , columns = ["name" , "url" ])
3658- img_names_urls = image_data .rename (columns = {"url" : "path" }).to_dict (
3659- orient = "records"
3660- )
3608+ images_to_upload , duplicate_images = get_paths_and_duplicated_from_csv (attachments )
36613609 list_of_not_uploaded = []
3662- duplicate_images = []
3663- for i in range (0 , len (img_names_urls ), 500 ):
3664- response = controller .attach_urls (
3665- project_name = project_name ,
3666- folder_name = folder_name ,
3667- files = ImageSerializer .deserialize (
3668- img_names_urls [i : i + 500 ] # noqa: E203
3669- ),
3670- annotation_status = annotation_status ,
3671- )
3672- if response .errors :
3673- list_of_not_uploaded .append (response .data [0 ])
3674- duplicate_images .append (response .data [1 ])
36753610
3611+ with tqdm (total = len (images_to_upload ), desc = "Attaching urls" ) as progress_bar :
3612+ for i in range (0 , len (images_to_upload ), 500 ):
3613+ response = controller .attach_urls (
3614+ project_name = project_name ,
3615+ folder_name = folder_name ,
3616+ files = ImageSerializer .deserialize (
3617+ images_to_upload [i : i + 500 ] # noqa: E203
3618+ ),
3619+ annotation_status = annotation_status ,
3620+ )
3621+ if response .errors :
3622+ list_of_not_uploaded .append (response .data [0 ])
3623+ duplicate_images .append (response .data [1 ])
3624+ progress_bar .update (len (images_to_upload [i : i + 500 ]))
36763625 list_of_uploaded = [
36773626 image ["name" ]
3678- for image in img_names_urls
3627+ for image in images_to_upload
36793628 if image ["name" ] not in list_of_not_uploaded
36803629 ]
36813630
0 commit comments