superannotateai
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎requirements.txt‎
Lines changed: 1 addition & 0 deletions b/‎requirements.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎superannotate/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎superannotate/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎superannotate/consensus_benchmark/__init__.py‎ b/‎superannotate/consensus_benchmark/__init__.py‎
diff --git a/‎superannotate/consensus_benchmark/consensus.py‎
Lines changed: 110 additions & 0 deletions b/‎superannotate/consensus_benchmark/consensus.py‎
Lines changed: 110 additions & 0 deletions
diff --git a/‎superannotate/consensus_benchmark/helpers.py‎
Lines changed: 173 additions & 0 deletions b/‎superannotate/consensus_benchmark/helpers.py‎
Lines changed: 173 additions & 0 deletions
@@ -130,3 +130,6 @@ dmypy.json
 
 sample_large_files
 sample_tif_files
+
+# Vscode settings folder
+.vscode/
@@ -14,3 +14,4 @@ packaging>=20.4
 pandas>=1.1.2
 plotly>=4.1.0
 ffmpeg-python>=0.2.0
+Shapely>=1.7.1
@@ -62,6 +62,7 @@
     SAExistingProjectNameException, SANonExistingAnnotationClassNameException,
     SANonExistingProjectNameException
 )
+from .consensus_benchmark.consensus import consensus
 from .input_converters.conversion import (
     convert_platform, convert_project_type, export_annotation_format,
     import_annotation_format
 
@@ -0,0 +1,110 @@
+"""
+Main module for consensus computation
+"""
+import logging
+import tempfile
+import pandas as pd
+from pathlib import Path
+
+from .helpers import image_consensus, consensus_plot
+from ..db.exports import prepare_export, download_export
+from ..analytics.common import aggregate_annotations_as_df
+
+logger = logging.getLogger("superannotate-python-sdk")
+
+
+def consensus(
+    project_names,
+    export_root=None,
+    image_list=None,
+    annot_type='bbox',
+    show_plots=False
+):
+    """Computes consensus score for each instance of given images that are present in at least 2 of the given projects:    
+    
+    :param project_names: list of project names to aggregate through
+    :type project_names: list of str
+    :param export_root: root export path of the projects
+    :type export_root: Pathlike (str or Path)
+    :param image_list: List of image names from the projects list that must be used. If None, then all images from the projects list will be used. Default: None
+    :type image_list: list
+    :param annot_type: Type of annotation instances to consider. Available candidates are: ["bbox", "polygon", "point"]
+    :type annot_type: str
+    :param show_plots: If True, show plots based on results of consensus computation. Default: False
+    :type show_plots: bool
+
+    """
+    supported_types = ['polygon', 'bbox', 'point']
+    if annot_type not in supported_types:
+        raise NotImplementedError
+
+    project_dfs = []
+    for project_name in project_names:
+        if export_root is None:
+            with tempfile.TemporaryDirectory() as export_dir:
+                proj_export_meta = prepare_export(project_name)
+                download_export(project_name, proj_export_meta, export_dir)
+                project_df = aggregate_annotations_as_df(export_dir)
+        else:
+            export_dir = Path(export_root) / project_name
+            project_df = aggregate_annotations_as_df(export_dir)
+        project_df["project"] = project_name
+        project_dfs.append(project_df)
+
+    all_projects_df = pd.concat(project_dfs)
+    all_projects_df = all_projects_df[all_projects_df["instanceId"].notna()]
+
+    if image_list is not None:
+        all_projects_df = all_projects_df.loc[
+            all_projects_df["imageName"].isin(image_list)]
+
+    all_projects_df.query("type == '" + annot_type + "'", inplace=True)
+
+    def aggregate_attributes(instance_df):
+        def attribute_to_list(attribute_df):
+            attribute_names = list(attribute_df["attributeName"])
+            attribute_df["attributeNames"] = len(attribute_df) * [
+                attribute_names
+            ]
+            return attribute_df
+
+        attributes = None
+        if not instance_df["attributeGroupName"].isna().all():
+            attrib_group_name = instance_df.groupby("attributeGroupName")[[
+                "attributeGroupName", "attributeName"
+            ]].apply(attribute_to_list)
+            attributes = dict(
+                zip(
+                    attrib_group_name["attributeGroupName"],
+                    attrib_group_name["attributeNames"]
+                )
+            )
+
+        instance_df.drop(
+            ["attributeGroupName", "attributeName"], axis=1, inplace=True
+        )
+        instance_df.drop_duplicates(
+            subset=["imageName", "instanceId", "project"], inplace=True
+        )
+        instance_df["attributes"] = [attributes]
+        return instance_df
+
+    all_projects_df = all_projects_df.groupby(
+        ["imageName", "instanceId", "project"]
+    )
+    all_projects_df = all_projects_df.apply(aggregate_attributes).reset_index(
+        drop=True
+    )
+
+    unique_images = set(all_projects_df["imageName"])
+    all_consensus_data = []
+    for image_name in unique_images:
+        image_data = image_consensus(all_projects_df, image_name, annot_type)
+        all_consensus_data.append(pd.DataFrame(image_data))
+
+    consensus_df = pd.concat(all_consensus_data, ignore_index=True)
+
+    if show_plots:
+        consensus_plot(consensus_df, project_names)
+
+    return consensus_df
@@ -0,0 +1,173 @@
+from shapely.geometry import Polygon, box, Point
+import plotly.express as px
+from plotly.subplots import make_subplots
+import plotly.graph_objects as go
+import plotly.colors as col
+import logging
+import numpy as np
+logger = logging.getLogger("superannotate-python-sdk")
+
+
+def instance_consensus(inst_1, inst_2):
+    """Helper function that computes consensus score between two instances:
+
+    :param inst_1: First instance for consensus score.
+    :type inst_1: shapely object
+    :param inst_2: Second instance for consensus score.
+    :type inst_2: shapely object
+
+    """
+    if inst_1.type == inst_2.type == 'Polygon':
+        intersect = inst_1.intersection(inst_2)
+        union = inst_1.union(inst_2)
+        score = intersect.area / union.area
+    elif inst_1.type == inst_2.type == 'Point':
+        score = -1 * inst_1.distance(inst_2)
+    else:
+        raise NotImplementedError
+
+    return score
+
+
+def image_consensus(df, image_name, annot_type):
+    """Helper function that computes consensus score for instances of a single image:
+
+    :param df: Annotation data of all images
+    :type df: pandas.DataFrame
+    :param image_name: The image name for which the consensus score will be computed
+    :type image_name: str
+    :param annot_type: Type of annotation instances to consider. Available candidates are: ["bbox", "polygon", "point"]
+    :type dataset_format: str
+
+    """
+    image_df = df[df["imageName"] == image_name]
+    all_projects = list(set(df["project"]))
+    column_names = [
+        "creatorEmail", "imageName", "instanceId", "area", "className",
+        "attributes", "projectName", "score"
+    ]
+    instance_id = 0
+    image_data = {}
+    for column_name in column_names:
+        image_data[column_name] = []
+
+    projects_shaply_objs = {}
+    # generate shapely objects of instances
+    for _, row in image_df.iterrows():
+        if row["project"] not in projects_shaply_objs:
+            projects_shaply_objs[row["project"]] = []
+        inst_data = row["meta"]
+        if annot_type == 'bbox':
+            inst_coords = inst_data["points"]
+            x1, x2 = inst_coords["x1"], inst_coords["x2"]
+            y1, y2 = inst_coords["y1"], inst_coords["y2"]
+            inst = box(min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
+        elif annot_type == 'polygon':
+            inst_coords = inst_data["points"]
+            shapely_format = []
+            for i in range(0, len(inst_coords) - 1, 2):
+                shapely_format.append((inst_coords[i], inst_coords[i + 1]))
+            inst = Polygon(shapely_format)
+        elif annot_type == 'point':
+            inst = Point(inst_data["x"], inst_data["y"])
+        if inst.is_valid:
+            projects_shaply_objs[row["project"]].append(
+                (
+                    inst, row["className"], row["creatorEmail"],
+                    row["attributes"]
+                )
+            )
+        else:
+            logger.info(
+                "Invalid %s instance occured, skipping to the next one.",
+                annot_type
+            )
+
+    # match instances
+    for curr_proj, curr_proj_instances in projects_shaply_objs.items():
+        for curr_inst_data in curr_proj_instances:
+            curr_inst, curr_class, _, _ = curr_inst_data
+            max_instances = []
+            for other_proj, other_proj_instances in projects_shaply_objs.items(
+            ):
+                if curr_proj == other_proj:
+                    max_instances.append((curr_proj, *curr_inst_data))
+                    projects_shaply_objs[curr_proj].remove(curr_inst_data)
+                else:
+                    if annot_type in ['polygon', 'bbox']:
+                        max_score = 0
+                    else:
+                        max_score = float('-inf')
+                    max_inst_data = None
+                    for other_inst_data in other_proj_instances:
+                        other_inst, other_class, _, _ = other_inst_data
+                        score = instance_consensus(curr_inst, other_inst)
+                        if score > max_score and other_class == curr_class:
+                            max_score = score
+                            max_inst_data = other_inst_data
+                    if max_inst_data is not None:
+                        max_instances.append((other_proj, *max_inst_data))
+                        projects_shaply_objs[other_proj].remove(max_inst_data)
+            if len(max_instances) == 1:
+                image_data["creatorEmail"].append(max_instances[0][3])
+                image_data["attributes"].append(max_instances[0][4])
+                image_data["area"].append(max_instances[0][1].area)
+                image_data["imageName"].append(image_name)
+                image_data["instanceId"].append(instance_id)
+                image_data["className"].append(max_instances[0][2])
+                image_data["projectName"].append(max_instances[0][0])
+                image_data["score"].append(0)
+            else:
+                for curr_match_data in max_instances:
+                    proj_cons = 0
+                    for other_match_data in max_instances:
+                        if curr_match_data[0] != other_match_data[0]:
+                            score = instance_consensus(
+                                curr_match_data[1], other_match_data[1]
+                            )
+                            proj_cons += (1. if score <= 0 else score)
+                    image_data["creatorEmail"].append(curr_match_data[3])
+                    image_data["attributes"].append(curr_match_data[4])
+                    image_data["area"].append(curr_match_data[1].area)
+                    image_data["imageName"].append(image_name)
+                    image_data["instanceId"].append(instance_id)
+                    image_data["className"].append(curr_match_data[2])
+                    image_data["projectName"].append(curr_match_data[0])
+                    image_data["score"].append(
+                        proj_cons / (len(all_projects) - 1)
+                    )
+            instance_id += 1
+
+    return image_data
+
+
+def consensus_plot(consensus_df, projects):
+    plot_data = consensus_df.copy()
+
+    #annotator-wise boxplot
+    annot_box_fig = px.box(plot_data, x="creatorEmail", y="score", points="all")
+    annot_box_fig.show()
+
+    #project-wise boxplot
+    project_box_fig = px.box(
+        plot_data, x="projectName", y="score", points="all"
+    )
+    project_box_fig.show()
+
+    #scatter plot of score vs area
+    fig = px.scatter(
+        plot_data,
+        x="area",
+        y="score",
+        color="className",
+        symbol="creatorEmail",
+        facet_col="projectName",
+        hover_data={
+            "className": False,
+            "imageName": True,
+            "projectName": False,
+            "area": False,
+            "score": False
+        }
+    )
+    fig.show()
Original file line number	Diff line number	Diff line change
`@@ -62,6 +62,7 @@`
`62`	`62`	`SAExistingProjectNameException, SANonExistingAnnotationClassNameException,`
`63`	`63`	`SANonExistingProjectNameException`
`64`	`64`	`)`
	`65`	`+from .consensus_benchmark.consensus import consensus`
`65`	`66`	`from .input_converters.conversion import (`
`66`	`67`	`convert_platform, convert_project_type, export_annotation_format,`
`67`	`68`	`import_annotation_format`