Skip to content

Commit a9fa126

Browse files
authored
Merge pull request #316 from superannotateai/friday
Friday
2 parents 4d7f7d3 + 9e586d8 commit a9fa126

File tree

12 files changed

+909
-72
lines changed

12 files changed

+909
-72
lines changed

src/superannotate/__init__.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
from superannotate.lib.app.input_converters.conversion import export_annotation
2323
from superannotate.lib.app.input_converters.conversion import import_annotation
2424
from superannotate.lib.app.input_converters.df_converter import df_to_annotations
25-
2625
from superannotate.lib.app.input_converters.dicom_converter import dicom_to_rgb_sequence
2726
from superannotate.lib.app.interface.sdk_interface import add_annotation_bbox_to_image
2827
from superannotate.lib.app.interface.sdk_interface import (

src/superannotate/lib/app/analytics/aggregators.py

Lines changed: 55 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,14 @@
11
import copy
22
import json
33
import logging
4+
from dataclasses import dataclass
45
from pathlib import Path
56
from typing import List
67
from typing import Optional
78
from typing import Union
89

910
import lib.core as constances
1011
import pandas as pd
11-
from dataclasses import dataclass
1212
from lib.app.exceptions import AppException
1313
from lib.core import ATTACHED_VIDEO_ANNOTATION_POSTFIX
1414
from lib.core import PIXEL_ANNOTATION_POSTFIX
@@ -58,10 +58,10 @@ class VideoRawData:
5858

5959
class DataAggregator:
6060
def __init__(
61-
self,
62-
project_type: str,
63-
project_root: Union[str, Path],
64-
folder_names: Optional[List[Union[Path, str]]] = None
61+
self,
62+
project_type: str,
63+
project_root: Union[str, Path],
64+
folder_names: Optional[List[Union[Path, str]]] = None,
6565
):
6666
self.project_type = project_type
6767
self.project_root = Path(project_root)
@@ -87,10 +87,18 @@ def get_annotation_paths(self):
8787
if path.is_file() and path.suffix == self.annotation_suffix:
8888
annotations_paths.append(path)
8989
elif path.is_dir() and path.name != "classes":
90-
annotations_paths.extend(list(path.rglob(f"*{self.annotation_suffix}")))
90+
annotations_paths.extend(
91+
list(path.rglob(f"*{self.annotation_suffix}"))
92+
)
9193
else:
9294
for folder_name in self.folder_names:
93-
annotations_paths.extend(list((self.project_root / folder_name).rglob(f"*{self.annotation_suffix:}")))
95+
annotations_paths.extend(
96+
list(
97+
(self.project_root / folder_name).rglob(
98+
f"*{self.annotation_suffix:}"
99+
)
100+
)
101+
)
94102

95103
if not annotations_paths:
96104
logger.warning(f"Could not find annotations in {self.project_root}.")
@@ -103,11 +111,16 @@ def check_classes_path(self):
103111
)
104112

105113
def aggregate_annotations_as_df(self):
106-
logger.info(f"Aggregating annotations from {self.project_root} as pandas DataFrame")
114+
logger.info(
115+
f"Aggregating annotations from {self.project_root} as pandas DataFrame"
116+
)
107117
self.check_classes_path()
108118
annotation_paths = self.get_annotation_paths()
109119

110-
if self.project_type in (constances.ProjectType.VECTOR.name, constances.ProjectType.PIXEL.name):
120+
if self.project_type in (
121+
constances.ProjectType.VECTOR.name,
122+
constances.ProjectType.PIXEL.name,
123+
):
111124
return self.aggregate_image_annotations_as_df(annotation_paths)
112125
elif self.project_type == constances.ProjectType.VIDEO.name:
113126
return self.aggregate_video_annotations_as_df(annotation_paths)
@@ -120,7 +133,11 @@ def aggregate_video_annotations_as_df(self, annotation_paths: List[str]):
120133
raw_data = VideoRawData()
121134
# metadata
122135
raw_data.videoName = annotation_data["metadata"]["name"]
123-
raw_data.folderName = annotation_path.parent.name if annotation_path.parent != self.project_root else None
136+
raw_data.folderName = (
137+
annotation_path.parent.name
138+
if annotation_path.parent != self.project_root
139+
else None
140+
)
124141
raw_data.videoHeight = annotation_data["metadata"].get("height")
125142
raw_data.videoWidth = annotation_data["metadata"].get("width")
126143
raw_data.videoStatus = annotation_data["metadata"].get("status")
@@ -146,11 +163,19 @@ def aggregate_video_annotations_as_df(self, annotation_paths: List[str]):
146163
instance_raw.type = instance["meta"].get("type")
147164
instance_raw.className = instance["meta"].get("className")
148165
instance_raw.createdAt = instance["meta"].get("createdAt")
149-
instance_raw.createdBy = instance["meta"].get("createdBy", {}).get("email")
150-
instance_raw.creatorRole = instance["meta"].get("createdBy", {}).get("role")
166+
instance_raw.createdBy = (
167+
instance["meta"].get("createdBy", {}).get("email")
168+
)
169+
instance_raw.creatorRole = (
170+
instance["meta"].get("createdBy", {}).get("role")
171+
)
151172
instance_raw.updatedAt = instance["meta"].get("updatedAt")
152-
instance_raw.updatedBy = instance["meta"].get("updatedBy", {}).get("email")
153-
instance_raw.updatorRole = instance["meta"].get("updatedBy", {}).get("role")
173+
instance_raw.updatedBy = (
174+
instance["meta"].get("updatedBy", {}).get("email")
175+
)
176+
instance_raw.updatorRole = (
177+
instance["meta"].get("updatedBy", {}).get("role")
178+
)
154179
instance_raw.pointLabels = instance["meta"].get("pointLabels")
155180
parameters = instance.get("parameters", [])
156181
for parameter_id, parameter in enumerate(parameters):
@@ -167,7 +192,9 @@ def aggregate_video_annotations_as_df(self, annotation_paths: List[str]):
167192
for attribute_id, attribute in enumerate(attributes):
168193
attribute_raw = copy.copy(timestamp_raw)
169194
attribute_raw.attributeId = attribute_id
170-
attribute_raw.attributeGroupName = attribute.get("groupName")
195+
attribute_raw.attributeGroupName = attribute.get(
196+
"groupName"
197+
)
171198
attribute_raw.attributeName = attribute.get("name")
172199
raws.append(attribute_raw)
173200
if not attributes:
@@ -212,7 +239,7 @@ def aggregate_image_annotations_as_df(self, annotations_paths: List[str]):
212239
"imageAnnotator": [],
213240
"imageQA": [],
214241
"commentResolved": [],
215-
"tag": []
242+
"tag": [],
216243
}
217244

218245
classes_json = json.load(open(self.classes_path))
@@ -233,7 +260,9 @@ def aggregate_image_annotations_as_df(self, annotations_paths: List[str]):
233260
def __append_annotation(annotation_dict):
234261
for annotation_key in annotation_data:
235262
if annotation_key in annotation_dict:
236-
annotation_data[annotation_key].append(annotation_dict[annotation_key])
263+
annotation_data[annotation_key].append(
264+
annotation_dict[annotation_key]
265+
)
237266
else:
238267
annotation_data[annotation_key].append(None)
239268

@@ -271,8 +300,8 @@ def __append_annotation(annotation_dict):
271300
annotation_type = annotation.get("type", "mask")
272301
annotation_class_name = annotation.get("className")
273302
if (
274-
annotation_class_name is None
275-
or annotation_class_name not in class_name_to_color
303+
annotation_class_name is None
304+
or annotation_class_name not in class_name_to_color
276305
):
277306
logger.warning(
278307
"Annotation class %s not found in classes json. Skipping.",
@@ -308,7 +337,7 @@ def __append_annotation(annotation_dict):
308337
annotation_probability = annotation.get("probability")
309338
annotation_point_labels = annotation.get("pointLabels")
310339
attributes = annotation.get("attributes")
311-
user_metadata =self.__get_user_metadata(annotation)
340+
user_metadata = self.__get_user_metadata(annotation)
312341
folder_name = None
313342
if annotation_path.parent != Path(self.project_root):
314343
folder_name = annotation_path.parent.name
@@ -339,19 +368,19 @@ def __append_annotation(annotation_dict):
339368
attribute_group = attribute.get("groupName")
340369
attribute_name = attribute.get("name")
341370
if (
342-
attribute_group
343-
not in class_group_name_to_values[annotation_class_name]
371+
attribute_group
372+
not in class_group_name_to_values[annotation_class_name]
344373
):
345374
logger.warning(
346375
"Annotation class group %s not in classes json. Skipping.",
347376
attribute_group,
348377
)
349378
continue
350379
if (
351-
attribute_name
352-
not in class_group_name_to_values[annotation_class_name][
353-
attribute_group
354-
]
380+
attribute_name
381+
not in class_group_name_to_values[annotation_class_name][
382+
attribute_group
383+
]
355384
):
356385
logger.warning(
357386
"Annotation class group value %s not in classes json. Skipping.",

src/superannotate/lib/app/analytics/class_analytics.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,4 +67,3 @@ def class_distribution(export_root, project_names, visualize=False):
6767
fig.show()
6868

6969
return df
70-

src/superannotate/lib/app/interface/sdk_interface.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3528,8 +3528,14 @@ def aggregate_annotations_as_df(
35283528
:return: DataFrame on annotations
35293529
:rtype: pandas DataFrame
35303530
"""
3531-
if project_type in (constances.ProjectType.VECTOR.name, constances.ProjectType.PIXEL.name):
3532-
from superannotate.lib.app.analytics.common import aggregate_image_annotations_as_df
3531+
if project_type in (
3532+
constances.ProjectType.VECTOR.name,
3533+
constances.ProjectType.PIXEL.name,
3534+
):
3535+
from superannotate.lib.app.analytics.common import (
3536+
aggregate_image_annotations_as_df,
3537+
)
3538+
35333539
return aggregate_image_annotations_as_df(
35343540
project_root=project_root,
35353541
include_classes_wo_annotations=False,
@@ -3539,10 +3545,11 @@ def aggregate_annotations_as_df(
35393545
)
35403546
elif project_type == constances.ProjectType.VIDEO.name:
35413547
from superannotate.lib.app.analytics.aggregators import DataAggregator
3548+
35423549
return DataAggregator(
35433550
project_type=project_type,
35443551
project_root=project_root,
3545-
folder_names=folder_names
3552+
folder_names=folder_names,
35463553
).aggregate_annotations_as_df()
35473554
else:
35483555
raise AppException(constances.DEPRECATED_DOCUMENT_PROJECTS_MESSAGE)

src/superannotate/lib/core/entities/document.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,16 +7,18 @@
77
from lib.core.entities.utils import MetadataBase
88
from lib.core.entities.utils import Tag
99
from pydantic import Field
10+
from pydantic import StrictInt
11+
from pydantic import StrictStr
1012

1113

1214
class DocumentInstance(BaseInstance):
13-
start: int
14-
end: int
15+
start: StrictInt
16+
end: StrictInt
1517
attributes: Optional[List[Attribute]] = Field(list())
1618

1719

1820
class DocumentAnnotation(BaseModel):
1921
metadata: MetadataBase
2022
instances: Optional[List[DocumentInstance]] = Field(list())
2123
tags: Optional[List[Tag]] = Field(list())
22-
free_text: Optional[str] = Field(None, alias="freeText")
24+
free_text: Optional[StrictStr] = Field(None, alias="freeText")

src/superannotate/lib/core/entities/pixel.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,13 +7,14 @@
77
from lib.core.entities.utils import Metadata
88
from lib.core.entities.utils import Tag
99
from pydantic import Field
10+
from pydantic import StrictBool
1011
from pydantic import validator
1112
from pydantic.color import Color
1213
from pydantic.color import ColorType
1314

1415

1516
class PixelMetaData(Metadata):
16-
is_segmented: Optional[bool] = Field(None, alias="isSegmented")
17+
is_segmented: Optional[StrictBool] = Field(None, alias="isSegmented")
1718

1819

1920
class PixelAnnotationPart(BaseModel):

src/superannotate/lib/core/entities/utils.py

Lines changed: 60 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,11 @@
99
from pydantic import EmailStr
1010
from pydantic import Extra
1111
from pydantic import Field
12+
from pydantic import StrictStr
1213
from pydantic import StrRegexError
14+
from pydantic import ValidationError
1315
from pydantic import validator
16+
from pydantic.error_wrappers import ErrorWrapper
1417
from pydantic.errors import EnumMemberError
1518

1619

@@ -21,16 +24,20 @@ def enum_error_handling(self) -> str:
2124

2225
EnumMemberError.__str__ = enum_error_handling
2326

24-
2527
NotEmptyStr = constr(strict=True, min_length=1)
2628

27-
2829
DATE_REGEX = r"\d{4}-[01]\d-[0-3]\dT[0-2]\d:[0-5]\d:[0-5]\d(?:\.\d{3})Z"
2930

3031
DATE_TIME_FORMAT_ERROR_MESSAGE = (
3132
"does not match expected format YYYY-MM-DDTHH:MM:SS.fffZ"
3233
)
3334

35+
POINT_LABEL_KEY_FORMAT_ERROR_MESSAGE = "does not match expected format ^[0-9]*$"
36+
37+
POINT_LABEL_VALUE_FORMAT_ERROR_MESSAGE = "str type expected"
38+
39+
INVALID_DICT_MESSAGE = "value is not a valid dict"
40+
3441

3542
class BaseModel(PyDanticBaseModel):
3643
class Config:
@@ -144,7 +151,7 @@ class LastUserAction(BaseModel):
144151

145152
class BaseInstance(TrackableModel, TimedBaseModel):
146153
class_id: Optional[int] = Field(None, alias="classId")
147-
class_name: NotEmptyStr = Field(alias="className")
154+
class_name: Optional[NotEmptyStr] = Field(None, alias="className")
148155

149156

150157
class MetadataBase(BaseModel):
@@ -159,10 +166,6 @@ class MetadataBase(BaseModel):
159166
status: Optional[AnnotationStatusEnum]
160167

161168

162-
class PointLabels(BaseModel):
163-
__root__: Dict[constr(regex=r"^[0-9]*$"), NotEmptyStr] # noqa: F722 E261
164-
165-
166169
class Correspondence(BaseModel):
167170
text: NotEmptyStr
168171
email: EmailStr
@@ -188,6 +191,56 @@ class Config:
188191
}
189192

190193

194+
class StringA(BaseModel):
195+
string: StrictStr
196+
197+
198+
class PointLabels(BaseModel):
199+
__root__: Dict[constr(regex=r"^[0-9]*$"), str]
200+
201+
@classmethod
202+
def __get_validators__(cls):
203+
yield cls.validate_type
204+
yield cls.validate_value
205+
206+
@validator("__root__", pre=True)
207+
def validate_value(cls, values):
208+
result = {}
209+
errors = []
210+
validate_key = None
211+
validate_value = None
212+
for key, value in values.items():
213+
try:
214+
validate_key = constr(regex=r"^[0-9]*$", min_length=1).validate(key)
215+
except ValueError:
216+
errors.append(
217+
ErrorWrapper(
218+
ValueError(POINT_LABEL_KEY_FORMAT_ERROR_MESSAGE), str(key)
219+
)
220+
)
221+
try:
222+
validate_value = StringA(string=value)
223+
except ValueError:
224+
errors.append(
225+
ErrorWrapper(
226+
ValueError(POINT_LABEL_VALUE_FORMAT_ERROR_MESSAGE), str(key)
227+
)
228+
)
229+
230+
if validate_key and validate_value:
231+
result.update({key: value})
232+
233+
if errors:
234+
raise ValidationError(errors, cls)
235+
return result
236+
237+
@classmethod
238+
def validate_type(cls, values):
239+
if not issubclass(type(values), dict):
240+
raise TypeError(INVALID_DICT_MESSAGE)
241+
return values
242+
243+
191244
class BaseVectorInstance(BaseImageInstance):
192245
type: VectorAnnotationTypeEnum
193246
point_labels: Optional[PointLabels] = Field(None, alias="pointLabels")

0 commit comments

Comments
 (0)