-
Notifications
You must be signed in to change notification settings - Fork 4
feat(keypoint-detection): add COCO OKS-AP evaluation #949
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: feat/keypoint-detection-enablement
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,154 @@ | ||
| # ------------------------------------------------------------------------- | ||
| # Copyright (c) Microsoft Corporation. All rights reserved. | ||
| # Licensed under the MIT License. | ||
| # -------------------------------------------------------------------------- | ||
| """Build a local COCO keypoints dataset for ``winml eval`` keypoint-detection. | ||
|
|
||
| Downloads the COCO person-keypoints annotations (cached once) and a chosen | ||
| number of validation images individually (so a small subset does not require | ||
| the full ~780 MB image zip), then writes an Arrow dataset to disk via | ||
| ``datasets.Dataset.save_to_disk``. Point ``winml eval --dataset-path`` at the | ||
| output directory. | ||
|
|
||
| Each record has: | ||
| - ``image``: the RGB image (datasets ``Image`` feature) | ||
| - ``objects``: dict with parallel per-person lists ``keypoints`` (flat | ||
| ``[x, y, v]`` triplets), ``bbox`` (COCO ``[x, y, w, h]``) and ``area``. | ||
|
|
||
| Only images containing at least one labeled-keypoint person are included. | ||
|
|
||
| Usage: | ||
| uv run python scripts/build_coco_keypoints.py --output-dir ~/.cache/winml/datasets/coco_keypoints_val2017 | ||
| uv run python scripts/build_coco_keypoints.py --output-dir <dir> --num-images 100 | ||
| uv run python scripts/build_coco_keypoints.py --output-dir <dir> --num-images 0 # all images | ||
| """ | ||
|
|
||
| import argparse | ||
| import io | ||
| import json | ||
| import shutil | ||
| import sys | ||
| import urllib.request | ||
| import zipfile | ||
| from pathlib import Path | ||
|
|
||
|
|
||
| ANNOTATIONS_URL = "http://images.cocodataset.org/annotations/annotations_trainval2017.zip" | ||
| ANNOTATION_MEMBER = "annotations/person_keypoints_val2017.json" | ||
| IMAGE_URL_TEMPLATE = "http://images.cocodataset.org/val2017/{file_name}" | ||
|
|
||
| DEFAULT_CACHE = Path.home() / ".cache" / "winml" / "coco_build" | ||
|
|
||
|
|
||
| def _download(url: str, dest: Path) -> None: | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this dataset very enormous? If so, you can consider downloading 5000 samples in steaming, shuffle and take 1000 samples. We did this for other large dataset in build_ai4privacy.py It would be fine if the dataset is not huge like below 5GB. |
||
| """Download ``url`` to ``dest`` (skips if it already exists).""" | ||
| if dest.exists(): | ||
| return | ||
| dest.parent.mkdir(parents=True, exist_ok=True) | ||
| print(f"Downloading {url}") | ||
| with urllib.request.urlopen(url) as resp, dest.open("wb") as fh: # noqa: S310 | ||
| shutil.copyfileobj(resp, fh) | ||
|
|
||
|
|
||
| def _load_annotations(cache_dir: Path) -> dict: | ||
| """Return the parsed person-keypoints annotation JSON, downloading once.""" | ||
| ann_zip = cache_dir / "annotations_trainval2017.zip" | ||
| _download(ANNOTATIONS_URL, ann_zip) | ||
| print("Reading keypoint annotations...") | ||
| with zipfile.ZipFile(ann_zip) as zf, zf.open(ANNOTATION_MEMBER) as fh: | ||
| return json.load(fh) | ||
|
|
||
|
|
||
| def _group_annotations_by_image(annotations: list[dict]) -> dict[int, list[dict]]: | ||
| """Group person annotations by image id, keeping only labeled-keypoint people.""" | ||
| by_image: dict[int, list[dict]] = {} | ||
| for ann in annotations: | ||
| if ann.get("num_keypoints", 0) <= 0 or ann.get("iscrowd", 0): | ||
| continue | ||
| by_image.setdefault(ann["image_id"], []).append(ann) | ||
| return by_image | ||
|
|
||
|
|
||
| def _fetch_image(file_name: str) -> bytes: | ||
| """Download one validation image and return its raw bytes.""" | ||
| url = IMAGE_URL_TEMPLATE.format(file_name=file_name) | ||
| with urllib.request.urlopen(url) as resp: # noqa: S310 | ||
| return resp.read() | ||
|
|
||
|
|
||
| def build(output_dir: Path, num_images: int, cache_dir: Path) -> None: | ||
| """Build and save the COCO keypoints dataset to ``output_dir``.""" | ||
| from datasets import Dataset, Features, Image, Sequence, Value | ||
| from PIL import Image as PILImage | ||
|
|
||
| coco = _load_annotations(cache_dir) | ||
| images_by_id = {img["id"]: img for img in coco["images"]} | ||
| by_image = _group_annotations_by_image(coco["annotations"]) | ||
|
|
||
| image_ids = sorted(by_image) | ||
| if num_images > 0: | ||
| image_ids = image_ids[:num_images] | ||
| print(f"Building {len(image_ids)} images with keypoint annotations...") | ||
|
|
||
| records = [] | ||
| for idx, image_id in enumerate(image_ids, start=1): | ||
| info = images_by_id[image_id] | ||
| try: | ||
| raw = _fetch_image(info["file_name"]) | ||
| image = PILImage.open(io.BytesIO(raw)).convert("RGB") | ||
| except Exception as exc: | ||
| print(f" skip {info['file_name']}: {exc}") | ||
| continue | ||
|
|
||
| persons = by_image[image_id] | ||
| records.append( | ||
| { | ||
| "image": image, | ||
| "objects": { | ||
| "keypoints": [[float(v) for v in p["keypoints"]] for p in persons], | ||
| "bbox": [[float(v) for v in p["bbox"]] for p in persons], | ||
| "area": [float(p["area"]) for p in persons], | ||
| }, | ||
| } | ||
| ) | ||
| if idx % 50 == 0: | ||
| print(f" {idx}/{len(image_ids)}") | ||
|
|
||
| features = Features( | ||
| { | ||
| "image": Image(), | ||
| "objects": { | ||
| "keypoints": Sequence(Sequence(Value("float32"))), | ||
| "bbox": Sequence(Sequence(Value("float32"))), | ||
| "area": Sequence(Value("float32")), | ||
| }, | ||
| } | ||
| ) | ||
| dataset = Dataset.from_list(records, features=features) | ||
| output_dir.mkdir(parents=True, exist_ok=True) | ||
| dataset.save_to_disk(str(output_dir)) | ||
| print(f"Saved {len(dataset)} samples to {output_dir}") | ||
|
|
||
|
|
||
| def main() -> int: | ||
| parser = argparse.ArgumentParser(description="Build a local COCO keypoints dataset.") | ||
| parser.add_argument("--output-dir", required=True, type=Path, help="Dataset output directory.") | ||
| parser.add_argument( | ||
| "--num-images", | ||
| type=int, | ||
| default=100, | ||
| help="Number of images to include (0 = all images with keypoints).", | ||
| ) | ||
| parser.add_argument( | ||
| "--cache-dir", | ||
| type=Path, | ||
| default=DEFAULT_CACHE, | ||
| help="Where to cache the downloaded annotations zip.", | ||
| ) | ||
| args = parser.parse_args() | ||
| build(args.output_dir, args.num_images, args.cache_dir) | ||
| return 0 | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| sys.exit(main()) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -62,6 +62,8 @@ | |
| "winml.modelkit.eval.zero_shot_image_classification_evaluator:WinMLZeroShotImageClassificationEvaluator", | ||
| "depth-estimation": | ||
| "winml.modelkit.eval.depth_estimation_evaluator:WinMLDepthEstimationEvaluator", | ||
| "keypoint-detection": | ||
| "winml.modelkit.eval.keypoint_detection_evaluator:WinMLKeypointDetectionEvaluator", | ||
| "compare-tensor": | ||
| "winml.modelkit.eval.tensor_similarity_evaluator:TensorSimilarityEvaluator", | ||
| } | ||
|
|
@@ -172,6 +174,21 @@ def get_evaluator_class(config: WinMLEvaluationConfig) -> type[WinMLEvaluator]: | |
| # the legacy `nyu_depth_v2.py` loader script. | ||
| "revision": "refs/convert/parquet", | ||
| }, | ||
| "keypoint-detection": { | ||
| # Built locally by scripts/build_coco_keypoints.py (COCO has no | ||
| # script-free HF mirror for person keypoints). Run that script first, | ||
| # or pass --dataset-path to point at your own build. | ||
| "path": "~/.cache/winml/datasets/coco_keypoints_val2017", | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If no existing HF dataset can be used directly, it is OK not to document a default dataset |
||
| "split": "validation", | ||
| "columns_mapping": { | ||
| "input_column": "image", | ||
| "annotation_column": "objects", | ||
| "keypoints_key": "keypoints", | ||
| "bbox_key": "bbox", | ||
| "area_key": "area", | ||
| "box_format": "xywh", | ||
| }, | ||
| }, | ||
| } | ||
|
|
||
|
|
||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the models which you verified and can be evaluated, can you help to update C:\Users\zhenni\repos\wmk\scripts\e2e_eval\testsets\models_with_acc.json
and post the evaluation data you run locally?