Skip to content

Commit f558d4e

Browse files
authored
Merge pull request #1 from superannotateai/consensus_dev
Consensus implementation added
2 parents 6c8bd5d + 97083b1 commit f558d4e

22 files changed

+346
-0
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,3 +130,6 @@ dmypy.json
130130

131131
sample_large_files
132132
sample_tif_files
133+
134+
# Vscode settings folder
135+
.vscode/

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,4 @@ packaging>=20.4
1414
pandas>=1.1.2
1515
plotly>=4.1.0
1616
ffmpeg-python>=0.2.0
17+
Shapely>=1.7.1

superannotate/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@
6262
SAExistingProjectNameException, SANonExistingAnnotationClassNameException,
6363
SANonExistingProjectNameException
6464
)
65+
from .consensus_benchmark.consensus import consensus
6566
from .input_converters.conversion import (
6667
convert_platform, convert_project_type, export_annotation_format,
6768
import_annotation_format

superannotate/consensus_benchmark/__init__.py

Whitespace-only changes.
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
"""
2+
Main module for consensus computation
3+
"""
4+
import logging
5+
import tempfile
6+
import pandas as pd
7+
from pathlib import Path
8+
9+
from .helpers import image_consensus, consensus_plot
10+
from ..db.exports import prepare_export, download_export
11+
from ..analytics.common import aggregate_annotations_as_df
12+
13+
logger = logging.getLogger("superannotate-python-sdk")
14+
15+
16+
def consensus(
17+
project_names,
18+
export_root=None,
19+
image_list=None,
20+
annot_type='bbox',
21+
show_plots=False
22+
):
23+
"""Computes consensus score for each instance of given images that are present in at least 2 of the given projects:
24+
25+
:param project_names: list of project names to aggregate through
26+
:type project_names: list of str
27+
:param export_root: root export path of the projects
28+
:type export_root: Pathlike (str or Path)
29+
:param image_list: List of image names from the projects list that must be used. If None, then all images from the projects list will be used. Default: None
30+
:type image_list: list
31+
:param annot_type: Type of annotation instances to consider. Available candidates are: ["bbox", "polygon", "point"]
32+
:type annot_type: str
33+
:param show_plots: If True, show plots based on results of consensus computation. Default: False
34+
:type show_plots: bool
35+
36+
"""
37+
supported_types = ['polygon', 'bbox', 'point']
38+
if annot_type not in supported_types:
39+
raise NotImplementedError
40+
41+
project_dfs = []
42+
for project_name in project_names:
43+
if export_root is None:
44+
with tempfile.TemporaryDirectory() as export_dir:
45+
proj_export_meta = prepare_export(project_name)
46+
download_export(project_name, proj_export_meta, export_dir)
47+
project_df = aggregate_annotations_as_df(export_dir)
48+
else:
49+
export_dir = Path(export_root) / project_name
50+
project_df = aggregate_annotations_as_df(export_dir)
51+
project_df["project"] = project_name
52+
project_dfs.append(project_df)
53+
54+
all_projects_df = pd.concat(project_dfs)
55+
all_projects_df = all_projects_df[all_projects_df["instanceId"].notna()]
56+
57+
if image_list is not None:
58+
all_projects_df = all_projects_df.loc[
59+
all_projects_df["imageName"].isin(image_list)]
60+
61+
all_projects_df.query("type == '" + annot_type + "'", inplace=True)
62+
63+
def aggregate_attributes(instance_df):
64+
def attribute_to_list(attribute_df):
65+
attribute_names = list(attribute_df["attributeName"])
66+
attribute_df["attributeNames"] = len(attribute_df) * [
67+
attribute_names
68+
]
69+
return attribute_df
70+
71+
attributes = None
72+
if not instance_df["attributeGroupName"].isna().all():
73+
attrib_group_name = instance_df.groupby("attributeGroupName")[[
74+
"attributeGroupName", "attributeName"
75+
]].apply(attribute_to_list)
76+
attributes = dict(
77+
zip(
78+
attrib_group_name["attributeGroupName"],
79+
attrib_group_name["attributeNames"]
80+
)
81+
)
82+
83+
instance_df.drop(
84+
["attributeGroupName", "attributeName"], axis=1, inplace=True
85+
)
86+
instance_df.drop_duplicates(
87+
subset=["imageName", "instanceId", "project"], inplace=True
88+
)
89+
instance_df["attributes"] = [attributes]
90+
return instance_df
91+
92+
all_projects_df = all_projects_df.groupby(
93+
["imageName", "instanceId", "project"]
94+
)
95+
all_projects_df = all_projects_df.apply(aggregate_attributes).reset_index(
96+
drop=True
97+
)
98+
99+
unique_images = set(all_projects_df["imageName"])
100+
all_consensus_data = []
101+
for image_name in unique_images:
102+
image_data = image_consensus(all_projects_df, image_name, annot_type)
103+
all_consensus_data.append(pd.DataFrame(image_data))
104+
105+
consensus_df = pd.concat(all_consensus_data, ignore_index=True)
106+
107+
if show_plots:
108+
consensus_plot(consensus_df, project_names)
109+
110+
return consensus_df
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
from shapely.geometry import Polygon, box, Point
2+
import plotly.express as px
3+
from plotly.subplots import make_subplots
4+
import plotly.graph_objects as go
5+
import plotly.colors as col
6+
import logging
7+
import numpy as np
8+
logger = logging.getLogger("superannotate-python-sdk")
9+
10+
11+
def instance_consensus(inst_1, inst_2):
12+
"""Helper function that computes consensus score between two instances:
13+
14+
:param inst_1: First instance for consensus score.
15+
:type inst_1: shapely object
16+
:param inst_2: Second instance for consensus score.
17+
:type inst_2: shapely object
18+
19+
"""
20+
if inst_1.type == inst_2.type == 'Polygon':
21+
intersect = inst_1.intersection(inst_2)
22+
union = inst_1.union(inst_2)
23+
score = intersect.area / union.area
24+
elif inst_1.type == inst_2.type == 'Point':
25+
score = -1 * inst_1.distance(inst_2)
26+
else:
27+
raise NotImplementedError
28+
29+
return score
30+
31+
32+
def image_consensus(df, image_name, annot_type):
33+
"""Helper function that computes consensus score for instances of a single image:
34+
35+
:param df: Annotation data of all images
36+
:type df: pandas.DataFrame
37+
:param image_name: The image name for which the consensus score will be computed
38+
:type image_name: str
39+
:param annot_type: Type of annotation instances to consider. Available candidates are: ["bbox", "polygon", "point"]
40+
:type dataset_format: str
41+
42+
"""
43+
image_df = df[df["imageName"] == image_name]
44+
all_projects = list(set(df["project"]))
45+
column_names = [
46+
"creatorEmail", "imageName", "instanceId", "area", "className",
47+
"attributes", "projectName", "score"
48+
]
49+
instance_id = 0
50+
image_data = {}
51+
for column_name in column_names:
52+
image_data[column_name] = []
53+
54+
projects_shaply_objs = {}
55+
# generate shapely objects of instances
56+
for _, row in image_df.iterrows():
57+
if row["project"] not in projects_shaply_objs:
58+
projects_shaply_objs[row["project"]] = []
59+
inst_data = row["meta"]
60+
if annot_type == 'bbox':
61+
inst_coords = inst_data["points"]
62+
x1, x2 = inst_coords["x1"], inst_coords["x2"]
63+
y1, y2 = inst_coords["y1"], inst_coords["y2"]
64+
inst = box(min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
65+
elif annot_type == 'polygon':
66+
inst_coords = inst_data["points"]
67+
shapely_format = []
68+
for i in range(0, len(inst_coords) - 1, 2):
69+
shapely_format.append((inst_coords[i], inst_coords[i + 1]))
70+
inst = Polygon(shapely_format)
71+
elif annot_type == 'point':
72+
inst = Point(inst_data["x"], inst_data["y"])
73+
if inst.is_valid:
74+
projects_shaply_objs[row["project"]].append(
75+
(
76+
inst, row["className"], row["creatorEmail"],
77+
row["attributes"]
78+
)
79+
)
80+
else:
81+
logger.info(
82+
"Invalid %s instance occured, skipping to the next one.",
83+
annot_type
84+
)
85+
86+
# match instances
87+
for curr_proj, curr_proj_instances in projects_shaply_objs.items():
88+
for curr_inst_data in curr_proj_instances:
89+
curr_inst, curr_class, _, _ = curr_inst_data
90+
max_instances = []
91+
for other_proj, other_proj_instances in projects_shaply_objs.items(
92+
):
93+
if curr_proj == other_proj:
94+
max_instances.append((curr_proj, *curr_inst_data))
95+
projects_shaply_objs[curr_proj].remove(curr_inst_data)
96+
else:
97+
if annot_type in ['polygon', 'bbox']:
98+
max_score = 0
99+
else:
100+
max_score = float('-inf')
101+
max_inst_data = None
102+
for other_inst_data in other_proj_instances:
103+
other_inst, other_class, _, _ = other_inst_data
104+
score = instance_consensus(curr_inst, other_inst)
105+
if score > max_score and other_class == curr_class:
106+
max_score = score
107+
max_inst_data = other_inst_data
108+
if max_inst_data is not None:
109+
max_instances.append((other_proj, *max_inst_data))
110+
projects_shaply_objs[other_proj].remove(max_inst_data)
111+
if len(max_instances) == 1:
112+
image_data["creatorEmail"].append(max_instances[0][3])
113+
image_data["attributes"].append(max_instances[0][4])
114+
image_data["area"].append(max_instances[0][1].area)
115+
image_data["imageName"].append(image_name)
116+
image_data["instanceId"].append(instance_id)
117+
image_data["className"].append(max_instances[0][2])
118+
image_data["projectName"].append(max_instances[0][0])
119+
image_data["score"].append(0)
120+
else:
121+
for curr_match_data in max_instances:
122+
proj_cons = 0
123+
for other_match_data in max_instances:
124+
if curr_match_data[0] != other_match_data[0]:
125+
score = instance_consensus(
126+
curr_match_data[1], other_match_data[1]
127+
)
128+
proj_cons += (1. if score <= 0 else score)
129+
image_data["creatorEmail"].append(curr_match_data[3])
130+
image_data["attributes"].append(curr_match_data[4])
131+
image_data["area"].append(curr_match_data[1].area)
132+
image_data["imageName"].append(image_name)
133+
image_data["instanceId"].append(instance_id)
134+
image_data["className"].append(curr_match_data[2])
135+
image_data["projectName"].append(curr_match_data[0])
136+
image_data["score"].append(
137+
proj_cons / (len(all_projects) - 1)
138+
)
139+
instance_id += 1
140+
141+
return image_data
142+
143+
144+
def consensus_plot(consensus_df, projects):
145+
plot_data = consensus_df.copy()
146+
147+
#annotator-wise boxplot
148+
annot_box_fig = px.box(plot_data, x="creatorEmail", y="score", points="all")
149+
annot_box_fig.show()
150+
151+
#project-wise boxplot
152+
project_box_fig = px.box(
153+
plot_data, x="projectName", y="score", points="all"
154+
)
155+
project_box_fig.show()
156+
157+
#scatter plot of score vs area
158+
fig = px.scatter(
159+
plot_data,
160+
x="area",
161+
y="score",
162+
color="className",
163+
symbol="creatorEmail",
164+
facet_col="projectName",
165+
hover_data={
166+
"className": False,
167+
"imageName": True,
168+
"projectName": False,
169+
"area": False,
170+
"score": False
171+
}
172+
)
173+
fig.show()

0 commit comments

Comments
 (0)