Skip to content

Commit 137abc6

Browse files
committed
upload annotations fix
1 parent 0ecd8ab commit 137abc6

File tree

9 files changed

+254
-5
lines changed

9 files changed

+254
-5
lines changed

src/superannotate/lib/core/usecases/annotations.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ class AnnotationToUpload:
8181
path: str
8282
data: io.StringIO = None
8383
mask: io.BytesIO = None
84+
size: int = None
8485

8586
def __init__(
8687
self,
@@ -348,20 +349,23 @@ async def upload(_chunk):
348349
self.reporter.log_debug(str(e))
349350
self._report.failed_annotations.extend([i.name for i in _chunk])
350351

352+
_size = 0
351353
while True:
352354
item = await self._small_files_queue.get()
353355
self._small_files_queue.task_done()
354356
if not item:
355357
self._small_files_queue.put_nowait(None)
356358
break
357-
chunk.append(item)
358359
if (
359-
sys.getsizeof(chunk) >= self.CHUNK_SIZE_MB
360+
_size + item.size >= self.CHUNK_SIZE_MB
360361
or sum([len(i.name) for i in chunk])
361-
>= self.URI_THRESHOLD - len(chunk) * 14
362+
>= self.URI_THRESHOLD - (len(chunk) + 1) * 14
362363
):
363364
await upload(chunk)
364365
chunk = []
366+
_size = 0
367+
chunk.append(item)
368+
_size += item.size
365369
if chunk:
366370
await upload(chunk)
367371

@@ -411,6 +415,7 @@ async def distribute_queues(self, items_to_upload: list):
411415
annotation_file.seek(0)
412416
t_item.data = annotation_file
413417
t_item.mask = mask
418+
t_item.size = size
414419
while True:
415420
if size > BIG_FILE_THRESHOLD:
416421
if self._big_files_queue.qsize() > 32:

src/superannotate/lib/infrastructure/services.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1390,6 +1390,7 @@ async def upload_annotations(
13901390
filename=key,
13911391
content_type="application/json",
13921392
)
1393+
13931394
_response = await session.post(
13941395
url,
13951396
params={

tests/data_set/sample_large_json_vector/aearth_mov_001.jpg.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

tests/data_set/sample_large_json_vector/aearth_mov_002.jpg.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

tests/data_set/sample_large_json_vector/aearth_mov_003.jpg.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

tests/data_set/sample_large_json_vector/aearth_mov_004.jpg.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.

tests/data_set/sample_large_json_vector/aearth_mov_005.jpg.json

Lines changed: 1 addition & 0 deletions
Large diffs are not rendered by default.
Lines changed: 216 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,216 @@
1+
[
2+
{
3+
"id": 1178455,
4+
"project_id": 1290,
5+
"name": "sdfg",
6+
"color": "#880901",
7+
"type": "tag",
8+
"createdAt": "2022-02-16T12:49:36.000Z",
9+
"updatedAt": "2022-07-14T06:32:21.000Z",
10+
"attribute_groups": [
11+
{
12+
"group_type": "radio",
13+
"id": 1815783,
14+
"class_id": 1178455,
15+
"name": "0",
16+
"createdAt": "2022-07-14T06:32:21.000Z",
17+
"updatedAt": "2022-07-14T06:32:21.000Z",
18+
"attributes": [
19+
{
20+
"id": 4143110,
21+
"group_id": 1815783,
22+
"project_id": 1290,
23+
"name": "s",
24+
"count": 0,
25+
"createdAt": "2022-07-14T06:32:21.000Z",
26+
"updatedAt": "2022-07-14T06:32:21.000Z"
27+
}
28+
],
29+
"default_value": null
30+
},
31+
{
32+
"group_type": "radio",
33+
"id": 1815784,
34+
"class_id": 1178455,
35+
"name": "1",
36+
"createdAt": "2022-07-14T06:32:21.000Z",
37+
"updatedAt": "2022-07-14T06:32:21.000Z",
38+
"attributes": [
39+
{
40+
"id": 4143111,
41+
"group_id": 1815784,
42+
"project_id": 1290,
43+
"name": "d",
44+
"count": 0,
45+
"createdAt": "2022-07-14T06:32:21.000Z",
46+
"updatedAt": "2022-07-14T06:32:21.000Z"
47+
}
48+
],
49+
"default_value": null
50+
},
51+
{
52+
"group_type": "radio",
53+
"id": 1815785,
54+
"class_id": 1178455,
55+
"name": "2",
56+
"createdAt": "2022-07-14T06:32:21.000Z",
57+
"updatedAt": "2022-07-14T06:32:21.000Z",
58+
"attributes": [
59+
{
60+
"id": 4143112,
61+
"group_id": 1815785,
62+
"project_id": 1290,
63+
"name": "d",
64+
"count": 0,
65+
"createdAt": "2022-07-14T06:32:21.000Z",
66+
"updatedAt": "2022-07-14T06:32:21.000Z"
67+
}
68+
],
69+
"default_value": null
70+
}
71+
]
72+
},
73+
{
74+
"id": 755215,
75+
"project_id": 1290,
76+
"name": "Plhayer",
77+
"color": "#0022c3",
78+
"type": "object",
79+
"createdAt": "2021-07-29T08:41:59.000Z",
80+
"updatedAt": "2021-07-29T08:42:38.000Z",
81+
"attribute_groups": []
82+
},
83+
{
84+
"id": 7399,
85+
"project_id": 1290,
86+
"name": "Shirt Number",
87+
"color": "#792568",
88+
"type": "object",
89+
"createdAt": "2020-06-03T07:33:51.000Z",
90+
"updatedAt": "2020-06-03T07:33:51.000Z",
91+
"attribute_groups": [
92+
{
93+
"group_type": "checklist",
94+
"id": 5791,
95+
"class_id": 7399,
96+
"name": "Type",
97+
"createdAt": "2020-06-03T07:33:51.000Z",
98+
"updatedAt": "2020-06-03T07:33:51.000Z",
99+
"attributes": [
100+
{
101+
"id": 13668,
102+
"group_id": 5791,
103+
"project_id": 1290,
104+
"name": "Illegible",
105+
"count": 0,
106+
"createdAt": "2020-06-03T07:33:51.000Z",
107+
"updatedAt": "2020-06-03T07:33:51.000Z"
108+
},
109+
{
110+
"id": 13667,
111+
"group_id": 5791,
112+
"project_id": 1290,
113+
"name": "Occluded",
114+
"count": 0,
115+
"createdAt": "2020-06-03T07:33:51.000Z",
116+
"updatedAt": "2020-06-03T07:33:51.000Z"
117+
}
118+
],
119+
"default_value": []
120+
}
121+
]
122+
},
123+
{
124+
"id": 7398,
125+
"project_id": 1290,
126+
"name": "Referee",
127+
"color": "#26eded",
128+
"type": "object",
129+
"createdAt": "2020-06-03T07:33:51.000Z",
130+
"updatedAt": "2020-10-01T11:06:57.000Z",
131+
"attribute_groups": [
132+
{
133+
"group_type": "radio",
134+
"id": 5790,
135+
"class_id": 7398,
136+
"name": "Type",
137+
"createdAt": "2020-06-03T07:33:51.000Z",
138+
"updatedAt": "2020-06-03T07:33:51.000Z",
139+
"attributes": [
140+
{
141+
"id": 13666,
142+
"group_id": 5790,
143+
"project_id": 1290,
144+
"name": "Occluded",
145+
"count": 0,
146+
"createdAt": "2020-06-03T07:33:51.000Z",
147+
"updatedAt": "2020-06-03T07:33:51.000Z"
148+
}
149+
],
150+
"default_value": null
151+
}
152+
]
153+
},
154+
{
155+
"id": 7397,
156+
"project_id": 1290,
157+
"name": "Goalkeeper",
158+
"color": "#d002f6",
159+
"type": "object",
160+
"createdAt": "2020-06-03T07:33:51.000Z",
161+
"updatedAt": "2020-11-09T13:01:12.000Z",
162+
"attribute_groups": [
163+
{
164+
"group_type": "radio",
165+
"id": 5789,
166+
"class_id": 7397,
167+
"name": "Type",
168+
"createdAt": "2020-06-03T07:33:51.000Z",
169+
"updatedAt": "2020-06-03T07:33:51.000Z",
170+
"attributes": [
171+
{
172+
"id": 13665,
173+
"group_id": 5789,
174+
"project_id": 1290,
175+
"name": "Occluded",
176+
"count": 0,
177+
"createdAt": "2020-06-03T07:33:51.000Z",
178+
"updatedAt": "2020-06-03T07:33:51.000Z"
179+
}
180+
],
181+
"default_value": null
182+
}
183+
]
184+
},
185+
{
186+
"id": 7396,
187+
"project_id": 1290,
188+
"name": "Player",
189+
"color": "#f90e35",
190+
"type": "object",
191+
"createdAt": "2020-06-03T07:33:51.000Z",
192+
"updatedAt": "2020-11-09T13:01:12.000Z",
193+
"attribute_groups": [
194+
{
195+
"group_type": "radio",
196+
"id": 5788,
197+
"class_id": 7396,
198+
"name": "Type",
199+
"createdAt": "2020-06-03T07:33:51.000Z",
200+
"updatedAt": "2020-06-03T07:33:51.000Z",
201+
"attributes": [
202+
{
203+
"id": 13664,
204+
"group_id": 5788,
205+
"project_id": 1290,
206+
"name": "Occluded",
207+
"count": 0,
208+
"createdAt": "2020-06-03T07:33:51.000Z",
209+
"updatedAt": "2020-11-09T13:01:12.000Z"
210+
}
211+
],
212+
"default_value": null
213+
}
214+
]
215+
}
216+
]

tests/integration/annotations/test_upload_annotations_from_folder_to_project.py

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
from src.superannotate import SAClient
55
from tests.integration.base import BaseTestCase
6+
from tests import DATA_SET_PATH
67

78
sa = SAClient()
89

@@ -13,7 +14,8 @@ class TestAnnotationUploadVector(BaseTestCase):
1314
PROJECT_TYPE = "Vector"
1415
TEST_FOLDER_PATH = "data_set/sample_vector_annotations_with_tag_classes"
1516
TEST_4_FOLDER_PATH = "data_set/sample_project_vector"
16-
TEST_BIG_FOLDER_PATH = "data_set/sample_big_json_vector"
17+
TEST_BIG_FOLDER_PATH = "sample_big_json_vector"
18+
TEST_LARGE_FOLDER_PATH = "sample_large_json_vector"
1719
IMAGE_NAME = "example_image_1.jpg"
1820

1921
@property
@@ -26,7 +28,11 @@ def folder_path(self):
2628

2729
@property
2830
def big_annotations_folder_path(self):
29-
return os.path.join(Path(__file__).parent.parent.parent, self.TEST_BIG_FOLDER_PATH)
31+
return os.path.join(DATA_SET_PATH, self.TEST_BIG_FOLDER_PATH)
32+
33+
@property
34+
def large_annotations_folder_path(self):
35+
return os.path.join(DATA_SET_PATH, self.TEST_LARGE_FOLDER_PATH)
3036

3137
def test_annotation_folder_upload_download(self):
3238
self._attach_items()
@@ -55,6 +61,22 @@ def test_4_annotation_folder_upload_download(self):
5561
)
5662
assert len(uploaded) == 4
5763

64+
def test_upload_large_annotations(self):
65+
sa.attach_items(
66+
self.PROJECT_NAME,
67+
[{"name": f"aearth_mov_00{i}.jpg", "url": f"url_{i}"} for i in range(1, 6)] # noqa
68+
)
69+
70+
sa.create_annotation_classes_from_classes_json(
71+
self.PROJECT_NAME, f"{self.large_annotations_folder_path}/classes/classes.json"
72+
)
73+
uploaded, _, _ = sa.upload_annotations_from_folder_to_project(
74+
self.PROJECT_NAME, self.large_annotations_folder_path
75+
)
76+
assert len(uploaded) == 5
77+
annotations = sa.get_annotations(self.PROJECT_NAME)
78+
assert [len(annotation["instances"]) > 1 for annotation in annotations].count(True) == 4
79+
5880
def test_upload_big_annotations(self):
5981
sa.attach_items(
6082
self.PROJECT_NAME,

0 commit comments

Comments
 (0)