Skip to content

Commit 77aa02c

Browse files
committed
Added NaN handling
1 parent 3abafc9 commit 77aa02c

File tree

21 files changed

+337
-1623
lines changed

21 files changed

+337
-1623
lines changed

pytest.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
minversion = 3.7
33
log_cli=true
44
python_files = test_*.py
5-
;addopts = -n auto --dist=loadscope
5+
addopts = -n auto --dist=loadscope

requirements_dev.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
superannotate_schemas>=1.0.41b1
1+
superannotate_schemas>=v1.0.42dev1
22

src/superannotate/lib/app/analytics/aggregators.py

Lines changed: 85 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,34 @@ class VideoRawData:
5656
attributeName: str = None
5757

5858

59+
class DocumentRawData:
60+
docName: str = None
61+
folderName: str = None
62+
docStatus: str = None
63+
docUrl: str = None
64+
docAnnotator: str = None
65+
docQA: str = None
66+
# tag
67+
tagId: int = None
68+
tag: str = None
69+
# instance
70+
instanceId: int = None
71+
instanceStart: int = None
72+
instanceEnd: int = None
73+
type: str = None
74+
className: str = None
75+
createdAt: str = None
76+
createdBy: str = None
77+
creatorRole: str = None
78+
updatedAt: str = None
79+
updatedBy: str = None
80+
updatorRole: str = None
81+
# attribute
82+
attributeId: int = None
83+
attributeGroupName: str = None
84+
attributeName: str = None
85+
86+
5987
class DataAggregator:
6088
def __init__(
6189
self,
@@ -124,6 +152,8 @@ def aggregate_annotations_as_df(self):
124152
return self.aggregate_image_annotations_as_df(annotation_paths)
125153
elif self.project_type == constances.ProjectType.VIDEO.name:
126154
return self.aggregate_video_annotations_as_df(annotation_paths)
155+
elif self.project_type == constances.ProjectType.DOCUMENT.name:
156+
return self.aggregate_document_annotations_as_df(annotation_paths)
127157

128158
def aggregate_video_annotations_as_df(self, annotation_paths: List[str]):
129159
raws = []
@@ -205,7 +235,61 @@ def aggregate_video_annotations_as_df(self, annotation_paths: List[str]):
205235
raws.append(instance_raw)
206236
if not instances:
207237
raws.append(raw_data)
208-
return pd.DataFrame([raw.__dict__ for raw in raws], dtype=object)
238+
df = pd.DataFrame([raw.__dict__ for raw in raws], dtype=object)
239+
return df.where(pd.notnull(df), None)
240+
241+
def aggregate_document_annotations_as_df(self, annotation_paths: List[str]):
242+
raws = []
243+
for annotation_path in annotation_paths:
244+
annotation_path = Path(annotation_path)
245+
annotation_data = json.load(open(annotation_path))
246+
raw_data = DocumentRawData()
247+
# metadata
248+
raw_data.docName = annotation_data["metadata"]["name"]
249+
raw_data.folderName = (
250+
annotation_path.parent.name
251+
if annotation_path.parent != self.project_root
252+
else None
253+
)
254+
raw_data.docStatus = annotation_data["metadata"].get("status")
255+
raw_data.docUrl = annotation_data["metadata"].get("url")
256+
raw_data.docAnnotator = annotation_data["metadata"].get("annotatorEmail")
257+
raw_data.docQA = annotation_data["metadata"].get("qaEmail")
258+
# append tags
259+
for idx, tag in enumerate(annotation_data.get("tags", [])):
260+
tag_row = copy.copy(raw_data)
261+
tag_row.tagId = idx
262+
tag_row.tag = tag
263+
raws.append(tag_row)
264+
# append instances
265+
instances = annotation_data.get("instances", [])
266+
for idx, instance in enumerate(instances):
267+
instance_raw = copy.copy(raw_data)
268+
instance_raw.instanceId = int(idx)
269+
instance_raw.instanceStart = instance.get("start")
270+
instance_raw.instanceEnd = instance.get("end")
271+
instance_raw.type = instance.get("type")
272+
instance_raw.className = instance.get("className")
273+
instance_raw.createdAt = instance.get("createdAt")
274+
instance_raw.createdBy = instance.get("createdBy", {}).get("email")
275+
instance_raw.creatorRole = instance.get("createdBy", {}).get("role")
276+
instance_raw.updatedAt = instance.get("updatedAt")
277+
instance_raw.updatedBy = instance.get("updatedBy", {}).get("email")
278+
instance_raw.updatorRole = instance.get("updatedBy", {}).get("role")
279+
attributes = instance.get("attributes", [])
280+
# append attributes
281+
for attribute_id, attribute in enumerate(attributes):
282+
attribute_raw = copy.copy(instance_raw)
283+
attribute_raw.attributeId = attribute_id
284+
attribute_raw.attributeGroupName = attribute.get("groupName")
285+
attribute_raw.attributeName = attribute.get("name")
286+
raws.append(attribute_raw)
287+
if not attributes:
288+
raws.append(instance_raw)
289+
if not instances:
290+
raws.append(raw_data)
291+
df = pd.DataFrame([raw.__dict__ for raw in raws], dtype=object)
292+
return df.where(pd.notnull(df), None)
209293

210294
def aggregate_image_annotations_as_df(self, annotations_paths: List[str]):
211295
annotation_data = {

src/superannotate/lib/app/interface/sdk_interface.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2558,9 +2558,9 @@ def aggregate_annotations_as_df(
25582558
"""Aggregate annotations as pandas dataframe from project root.
25592559
25602560
:param project_root: the export path of the project
2561-
:type project_root: Pathlike (str or Path)
2561+
:type project_root: Path-like (str or Path)
25622562
2563-
:param project_type: the project type, Vector/Pixel or Video
2563+
:param project_type: the project type, Vector/Pixel, Video or Document
25642564
:type project_type: str
25652565
25662566
:param folder_names: Aggregate the specified folders from project_root.
@@ -2585,16 +2585,17 @@ def aggregate_annotations_as_df(
25852585
include_tags=True,
25862586
folder_names=folder_names,
25872587
)
2588-
elif project_type == constances.ProjectType.VIDEO.name:
2588+
elif project_type in (
2589+
constances.ProjectType.VIDEO.name,
2590+
constances.ProjectType.DOCUMENT.name,
2591+
):
25892592
from superannotate.lib.app.analytics.aggregators import DataAggregator
25902593

25912594
return DataAggregator(
25922595
project_type=project_type,
25932596
project_root=project_root,
25942597
folder_names=folder_names,
25952598
).aggregate_annotations_as_df()
2596-
else:
2597-
raise AppException(constances.DEPRECATED_DOCUMENT_PROJECTS_MESSAGE)
25982599

25992600

26002601
@Trackable

src/superannotate/lib/infrastructure/controller.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,8 @@
4646
def build_condition(**kwargs) -> Condition:
4747
condition = Condition.get_empty_condition()
4848
if any(kwargs.values()):
49-
conditions_iter = iter(kwargs.items())
50-
key, value = next(conditions_iter)
51-
if value:
52-
condition = Condition(key, value, EQ)
53-
for key, value in conditions_iter:
54-
condition = condition & Condition(key, value, EQ)
49+
for key, value in ((key, value) for key, value in kwargs.items() if value):
50+
condition = condition & Condition(key, value, EQ)
5551
return condition
5652

5753

src/superannotate/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "4.3.3dev8"
1+
__version__ = "4.3.3dev9"
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
[
2+
{
3+
"id": 873208,
4+
"project_id": 160158,
5+
"name": "vid",
6+
"color": "#0fc1c9",
7+
"count": 0,
8+
"createdAt": "2021-10-22T10:40:03.000Z",
9+
"updatedAt": "2021-10-22T10:40:03.000Z",
10+
"attribute_groups": [
11+
{
12+
"id": 347588,
13+
"class_id": 873208,
14+
"name": "attr g",
15+
"is_multiselect": 0,
16+
"createdAt": "2021-10-22T10:40:03.000Z",
17+
"updatedAt": "2021-10-22T10:40:03.000Z",
18+
"attributes": [
19+
{
20+
"id": 1203338,
21+
"group_id": 347588,
22+
"project_id": 160158,
23+
"name": "attr",
24+
"count": 0,
25+
"createdAt": "2021-10-22T10:40:03.000Z",
26+
"updatedAt": "2021-10-22T10:40:03.000Z"
27+
}
28+
]
29+
}
30+
]
31+
}
32+
]
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
{
2+
"metadata": {
3+
"name": "text_file_example_1",
4+
"status": "Completed",
5+
"url": "https://sa-public-files.s3.us-west-2.amazonaws.com/Text+project/text_file_example_1.txt",
6+
"projectId": 160158,
7+
"annotatorEmail": null,
8+
"qaEmail": null,
9+
"lastAction": {
10+
"email": "shab.prog@gmail.com",
11+
"timestamp": 1634899229953
12+
}
13+
},
14+
"instances": [
15+
{
16+
"type": "entity",
17+
"start": 253,
18+
"end": 593,
19+
"classId": 873208,
20+
"createdAt": "2021-10-22T10:40:26.151Z",
21+
"createdBy": {
22+
"email": "shab.prog@gmail.com",
23+
"role": "Admin"
24+
},
25+
"updatedAt": "2021-10-22T10:40:29.953Z",
26+
"updatedBy": {
27+
"email": "shab.prog@gmail.com",
28+
"role": "Admin"
29+
},
30+
"attributes": [],
31+
"creationType": "Manual",
32+
"className": "vid"
33+
},
34+
{
35+
"type": "entity",
36+
"start": 255,
37+
"end": 593,
38+
"classId": 873208,
39+
"createdAt": "2021-10-22T10:40:26.151Z",
40+
"createdBy": {
41+
"email": "shab.prog@gmail.com",
42+
"role": "Admin"
43+
},
44+
"updatedAt": "2021-10-22T10:40:29.953Z",
45+
"updatedBy": {
46+
"email": "shab.prog@gmail.com",
47+
"role": "Admin"
48+
},
49+
"attributes": [],
50+
"creationType": "Manual",
51+
"className": "pid"
52+
}
53+
],
54+
"tags": [
55+
"vid",
56+
"pid"
57+
],
58+
"freeText": ""
59+
}
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
{
2+
"metadata": {
3+
"name": "text_file_example_1",
4+
"status": "Completed",
5+
"url": "https://sa-public-files.s3.us-west-2.amazonaws.com/Text+project/text_file_example_1.txt",
6+
"projectId": 160158,
7+
"annotatorEmail": null,
8+
"qaEmail": null,
9+
"lastAction": {
10+
"email": "shab.prog@gmail.com",
11+
"timestamp": 1634899229953
12+
}
13+
},
14+
"instances": [
15+
{
16+
"type": "entity",
17+
"start": 253,
18+
"end": 593,
19+
"classId": 873208,
20+
"createdAt": "2021-10-22T10:40:26.151Z",
21+
"createdBy": {
22+
"email": "shab.prog@gmail.com",
23+
"role": "Admin"
24+
},
25+
"updatedAt": "2021-10-22T10:40:29.953Z",
26+
"updatedBy": {
27+
"email": "shab.prog@gmail.com",
28+
"role": "Admin"
29+
},
30+
"attributes": [],
31+
"creationType": "Manual",
32+
"className": "vid"
33+
},
34+
{
35+
"type": "entity",
36+
"start": 255,
37+
"end": 593,
38+
"classId": 873208,
39+
"createdAt": "2021-10-22T10:40:26.151Z",
40+
"createdBy": {
41+
"email": "shab.prog@gmail.com",
42+
"role": "Admin"
43+
},
44+
"updatedAt": "2021-10-22T10:40:29.953Z",
45+
"updatedBy": {
46+
"email": "shab.prog@gmail.com",
47+
"role": "Admin"
48+
},
49+
"attributes": [],
50+
"creationType": "Manual",
51+
"className": "pid"
52+
}
53+
],
54+
"tags": [
55+
"vid"
56+
],
57+
"freeText": ""
58+
}

0 commit comments

Comments
 (0)