Skip to content

Commit bb4012a

Browse files
committed
new converters
1 parent 79164bd commit bb4012a

File tree

15 files changed

+970
-38
lines changed

15 files changed

+970
-38
lines changed

superannotate/input_converters/conversion.py

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@
1111
from ..exceptions import SABaseException
1212

1313
AVAILABLE_ANNOTATION_FORMATS = [
14-
"COCO", "VOC", "LabelBox", "DataLoop", 'Supervisely'
14+
"COCO", "VOC", "LabelBox", "DataLoop", 'Supervisely', 'VoTT', 'SageMaker',
15+
'VGG', 'GoogleCloud'
1516
]
1617

1718
AVAILABLE_PLATFORMS = ["Desktop", "Web"]
@@ -51,6 +52,20 @@
5152

5253
ALLOWED_CONVERSIONS_SUPERVISELY_TO_SA = [('Vector', 'vector_annotation')]
5354

55+
ALLOWED_CONVERSIONS_VOTT_TO_SA = [
56+
('Vector', 'object_detection'), ('Vector', 'instance_segmentation'),
57+
('Vector', 'vector_annotation')
58+
]
59+
60+
ALLOWED_CONVERSIONS_SAGEMAKER_TO_SA = [('Vector', 'object_detection')]
61+
62+
ALLOWED_CONVERSIONS_VGG_TO_SA = [
63+
('Vector', 'object_detection'), ('Vector', 'instance_segmentation'),
64+
('Vector', 'vector_annotation')
65+
]
66+
67+
ALLOWED_CONVERSIONS_GOOGLECLOUD_TO_SA = [('Vector', 'object_detection')]
68+
5469

5570
def _passes_sanity_checks(args):
5671
if not isinstance(args.input_dir, (str, Path)):
@@ -110,6 +125,14 @@ def _passes_converter_sanity(args, direction):
110125
return True
111126
elif args.dataset_format == "Supervisely" and converter_values in ALLOWED_CONVERSIONS_SUPERVISELY_TO_SA:
112127
return True
128+
elif args.dataset_format == 'VoTT' and converter_values in ALLOWED_CONVERSIONS_VOTT_TO_SA:
129+
return True
130+
elif args.dataset_format == 'SageMaker' and converter_values in ALLOWED_CONVERSIONS_SAGEMAKER_TO_SA:
131+
return True
132+
elif args.dataset_format == 'VGG' and converter_values in ALLOWED_CONVERSIONS_VGG_TO_SA:
133+
return True
134+
elif args.dataset_format == 'GoogleCloud' and converter_values in ALLOWED_CONVERSIONS_GOOGLECLOUD_TO_SA:
135+
return True
113136
else:
114137
if args.dataset_format == "COCO" and converter_values in ALLOWED_CONVERSIONS_SA_TO_COCO:
115138
return True
@@ -251,7 +274,7 @@ def import_annotation_format(
251274
:type input_dir: str
252275
:param output_dir: Path to the folder, where you want to have converted dataset.
253276
:type output_dir: str
254-
:param dataset_format: Annotation format to convert SuperAnnotate annotation format. Available candidates are: ["COCO", "VOC", "LabelBox"]
277+
:param dataset_format: Annotation format to convert SuperAnnotate annotation format. Available candidates are: ["COCO", "VOC", "LabelBox", "DataLoop", "Supervisely"]
255278
:type dataset_format: str
256279
:param dataset_name: Name of the json file in the input_dir, which should be converted.
257280
:type dataset_name: str

superannotate/input_converters/converters/converters.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@
1818
from .labelbox_converters.labelbox_strategies import LabelBoxObjectDetectionStrategy
1919
from .dataloop_converters.dataloop_strategies import DataLoopObjectDetectionStrategy
2020
from .supervisely_converters.supervisely_strategies import SuperviselyObjectDetectionStrategy
21+
from .vott_converters.vott_strategies import VoTTObjectDetectionStrategy
22+
from .sagemaker_converters.sagemaker_strategies import SageMakerObjectDetectionStrategy
23+
from .vgg_converters.vgg_strategies import VGGObjectDetectionStrategy
24+
from .googlecloud_converters.googlecloud_strategies import GoogleCloudObjectDetectionStrategy
2125

2226

2327
class Converter(object):
@@ -56,6 +60,18 @@ def _select_strategy(self, args):
5660
elif args.dataset_format == "Supervisely":
5761
if args.task == 'vector_annotation':
5862
c_strategy = SuperviselyObjectDetectionStrategy(args)
63+
elif args.dataset_format == "VoTT":
64+
if args.task == 'object_detection' or args.task == 'instance_segmentation' or args.task == 'vector_annotation':
65+
c_strategy = VoTTObjectDetectionStrategy(args)
66+
elif args.dataset_format == "SageMaker":
67+
if args.task == 'object_detection' or args.task == 'instance_segmentation' or args.task == 'vector_annotation':
68+
c_strategy = SageMakerObjectDetectionStrategy(args)
69+
elif args.dataset_format == "VGG":
70+
if args.task == 'object_detection' or args.task == 'instance_segmentation' or args.task == 'vector_annotation':
71+
c_strategy = VGGObjectDetectionStrategy(args)
72+
elif args.dataset_format == "GoogleCloud":
73+
if args.task == 'object_detection':
74+
c_strategy = GoogleCloudObjectDetectionStrategy(args)
5975
else:
6076
pass
6177

superannotate/input_converters/converters/googlecloud_converters/__init__.py

Whitespace-only changes.
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from pathlib import Path
2+
import json
3+
4+
5+
class GoogleCloudConverter(object):
6+
def __init__(self, args):
7+
self.dataset_name = args.dataset_name
8+
self.project_type = args.project_type
9+
self.task = args.task
10+
self.output_dir = args.output_dir
11+
self.export_root = args.export_root
12+
self.direction = args.direction
13+
14+
def set_output_dir(self, output_dir_):
15+
self.output_dir = output_dir_
16+
17+
def set_export_root(self, export_root_):
18+
self.export_root = export_root_
19+
20+
def set_dataset_name(self, dname):
21+
self.dataset_name = dname
22+
23+
def save_objects(self, files_dict):
24+
for key, value in files_dict.items():
25+
path = Path(self.output_dir)
26+
print(path.joinpath(key))
27+
with open(path.joinpath(key), 'w') as fw:
28+
json.dump(value, fw, indent=2)
29+
30+
def save_classes(self, classes):
31+
path = Path(self.output_dir)
32+
with open(path.joinpath('classes', 'classes.json'), 'w') as fw:
33+
json.dump(classes, fw)
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
import os
2+
from pathlib import Path
3+
4+
from .googlecloud_converter import GoogleCloudConverter
5+
from .googlecloud_to_sa_vector import googlecloud_object_detection_to_sa_vector
6+
7+
8+
class GoogleCloudObjectDetectionStrategy(GoogleCloudConverter):
9+
name = "ObjectDetection converter"
10+
11+
def __init__(self, args):
12+
super().__init__(args)
13+
self.__setup_conversion_algorithm()
14+
15+
def __setup_conversion_algorithm(self):
16+
if self.direction == "to":
17+
raise NotImplementedError("Doesn't support yet")
18+
else:
19+
if self.project_type == "Vector":
20+
if self.task == "object_detection":
21+
self.converion_algorithm = googlecloud_object_detection_to_sa_vector
22+
else:
23+
raise NotImplementedError("Doesn't support yet")
24+
elif self.project_type == "Pixel":
25+
raise NotImplementedError("Doesn't support yet")
26+
27+
def __str__(self):
28+
return '{} object'.format(self.name)
29+
30+
def from_sa_format(self):
31+
pass
32+
33+
def to_sa_format(self):
34+
path = Path(self.export_root).joinpath(self.dataset_name + '.csv')
35+
id_generator = self._make_id_generator()
36+
sa_jsons, sa_classes = self.converion_algorithm(path, id_generator)
37+
self.save_objects(sa_jsons)
38+
self.save_classes(sa_classes)
39+
40+
def _make_id_generator(self):
41+
cur_id = 0
42+
while True:
43+
cur_id += 1
44+
yield cur_id
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import os
2+
import pandas as pd
3+
import numpy as np
4+
import cv2
5+
6+
7+
def _create_classes(classes):
8+
classes_loader = []
9+
for class_, id_ in classes.items():
10+
color = np.random.choice(range(256), size=3)
11+
hexcolor = "#%02x%02x%02x" % tuple(color)
12+
sa_classes = {
13+
'id': id_,
14+
'name': class_,
15+
'color': hexcolor,
16+
'attribute_groups': []
17+
}
18+
classes_loader.append(sa_classes)
19+
return classes_loader
20+
21+
22+
def googlecloud_object_detection_to_sa_vector(path, id_generator):
23+
df = pd.read_csv(path, header=None)
24+
dir_name = os.path.dirname(path)
25+
26+
sa_jsons = {}
27+
classes = {}
28+
for idx, row in df.iterrows():
29+
if row[2] not in classes.keys():
30+
classes[row[2]] = next(id_generator)
31+
32+
file_name = row[1].split('/')[-1]
33+
img = cv2.imread(os.path.join(dir_name, file_name))
34+
H, W, C = img.shape
35+
sa_file_name = os.path.basename(file_name) + '___objects.json'
36+
xmin = row[3] * W
37+
xmax = row[5] * W
38+
ymin = row[4] * H
39+
ymax = row[8] * H
40+
41+
sa_obj = {
42+
'type': 'bbox',
43+
'points': {
44+
'x1': xmin,
45+
'y1': ymin,
46+
'x2': xmax,
47+
'y2': ymax
48+
},
49+
'className': row[2],
50+
'classId': classes[row[2]],
51+
'attributes': [],
52+
'probability': 100,
53+
'locked': False,
54+
'visible': True,
55+
'groupId': 0
56+
}
57+
58+
if sa_file_name in sa_jsons.keys():
59+
sa_jsons[sa_file_name].append(sa_obj)
60+
else:
61+
sa_jsons[sa_file_name] = [sa_obj]
62+
63+
return sa_jsons, _create_classes(classes)

superannotate/input_converters/converters/vgg_converters/__init__.py

Whitespace-only changes.
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
from pathlib import Path
2+
import json
3+
4+
5+
class VGGConverter(object):
6+
def __init__(self, args):
7+
self.dataset_name = args.dataset_name
8+
self.project_type = args.project_type
9+
self.task = args.task
10+
self.output_dir = args.output_dir
11+
self.export_root = args.export_root
12+
self.direction = args.direction
13+
14+
def set_output_dir(self, output_dir_):
15+
self.output_dir = output_dir_
16+
17+
def set_export_root(self, export_root_):
18+
self.export_root = export_root_
19+
20+
def set_dataset_name(self, dname):
21+
self.dataset_name = dname
22+
23+
def get_file_list(self):
24+
json_file_list = []
25+
path = Path(self.export_root)
26+
if self.dataset_name != '':
27+
json_file_list.append(path.joinpath(self.dataset_name + '.json'))
28+
else:
29+
file_generator = path.glob('*.json')
30+
for gen in file_generator:
31+
json_file_list.append(gen)
32+
33+
return json_file_list
34+
35+
def save_objects(self, files_dict):
36+
for key, value in files_dict.items():
37+
path = Path(self.output_dir)
38+
with open(path.joinpath(key), 'w') as fw:
39+
json.dump(value, fw, indent=2)
40+
41+
def save_classes(self, classes):
42+
path = Path(self.output_dir)
43+
with open(path.joinpath('classes', 'classes.json'), 'w') as fw:
44+
json.dump(classes, fw)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import json
2+
3+
from .vgg_converter import VGGConverter
4+
from .vgg_to_sa_vector import vgg_object_detection_to_sa_vector, vgg_instance_segmentation_to_sa_vector, vgg_to_sa
5+
6+
7+
class VGGObjectDetectionStrategy(VGGConverter):
8+
name = "ObjectDetection converter"
9+
10+
def __init__(self, args):
11+
super().__init__(args)
12+
self.__setup_conversion_algorithm()
13+
14+
def __setup_conversion_algorithm(self):
15+
if self.direction == "to":
16+
raise NotImplementedError("Doesn't support yet")
17+
else:
18+
if self.project_type == "Vector":
19+
if self.task == "object_detection":
20+
self.conversion_algorithm = vgg_object_detection_to_sa_vector
21+
elif self.task == 'instance_segmentation':
22+
self.conversion_algorithm = vgg_instance_segmentation_to_sa_vector
23+
elif self.task == 'vector_annotation':
24+
self.conversion_algorithm = vgg_to_sa
25+
elif self.project_type == "Pixel":
26+
raise NotImplementedError("Doesn't support yet")
27+
28+
def __str__(self):
29+
return '{} object'.format(self.name)
30+
31+
def from_sa_format(self):
32+
pass
33+
34+
def to_sa_format(self):
35+
json_data = self.get_file_list()
36+
id_generator = self._make_id_generator()
37+
sa_jsons, sa_classes = self.conversion_algorithm(
38+
json_data, id_generator
39+
)
40+
self.save_objects(sa_jsons)
41+
self.save_classes(sa_classes)
42+
43+
def _make_id_generator(self):
44+
cur_id = 0
45+
while True:
46+
cur_id += 1
47+
yield cur_id

0 commit comments

Comments
 (0)