From 7e24d7179f61442a1a02582be2a08ac0b8826152 Mon Sep 17 00:00:00 2001 From: Rishi Jat Date: Fri, 13 Mar 2026 14:17:03 +0530 Subject: [PATCH 1/3] feat: add auto-generated Python data structures from ModelPack schema Signed-off-by: Rishi Jat --- Makefile | 4 ++ py/README.md | 28 ++++++++++++ py/model_spec/v1/__init__.py | 17 +++++++ py/model_spec/v1/models.py | 79 +++++++++++++++++++++++++++++++++ tools/generate_python_models.py | 54 ++++++++++++++++++++++ 5 files changed, 182 insertions(+) create mode 100644 py/README.md create mode 100644 py/model_spec/v1/__init__.py create mode 100644 py/model_spec/v1/models.py create mode 100644 tools/generate_python_models.py diff --git a/Makefile b/Makefile index 803e878..4e5f48f 100644 --- a/Makefile +++ b/Makefile @@ -5,3 +5,7 @@ validate-examples: ## validate examples in the specification markdown files .PHONY: test test: go test ./... + +.PHONY: generate-python-api +generate-python-api: ## generate Python API models from JSON schema + python3 tools/generate_python_models.py diff --git a/py/README.md b/py/README.md new file mode 100644 index 0000000..887e4a1 --- /dev/null +++ b/py/README.md @@ -0,0 +1,28 @@ +# Python ModelPack Types + +This directory provides auto-generated Python data structures for the ModelPack specification. + +The models are generated from the canonical JSON Schema at `schema/config-schema.json` and are intended for downstream projects that need importable spec-aligned types. + +## Usage + +```python +from model_spec.v1 import Model + +model = Model.model_validate_json(json_payload) +print(model.descriptor.name) +``` + +## Regenerate + +Run: + +```bash +make generate-python-api +``` + +This executes `tools/generate_python_models.py`, which uses `datamodel-codegen` to regenerate `py/model_spec/v1/models.py`. + +## Important + +Do not edit generated models manually. Update the schema and regenerate instead. diff --git a/py/model_spec/v1/__init__.py b/py/model_spec/v1/__init__.py new file mode 100644 index 0000000..94d0d74 --- /dev/null +++ b/py/model_spec/v1/__init__.py @@ -0,0 +1,17 @@ +from .models import ( + Model, + ModelCapabilities, + ModelConfig, + ModelDescriptor, + ModelFS, + Modality, +) + +__all__ = [ + "Model", + "ModelCapabilities", + "ModelConfig", + "ModelDescriptor", + "ModelFS", + "Modality", +] diff --git a/py/model_spec/v1/models.py b/py/model_spec/v1/models.py new file mode 100644 index 0000000..4f73b36 --- /dev/null +++ b/py/model_spec/v1/models.py @@ -0,0 +1,79 @@ +# generated by datamodel-codegen: +# filename: config-schema.json + +from __future__ import annotations + +from typing import Literal + +from pydantic import AwareDatetime, BaseModel, ConfigDict, Field, RootModel + + +class ModelDescriptor(BaseModel): + model_config = ConfigDict( + extra='forbid', + ) + createdAt: AwareDatetime | None = None + authors: list[str] | None = None + family: str | None = None + name: str | None = Field(None, min_length=1) + docURL: str | None = None + sourceURL: str | None = None + datasetsURL: list[str] | None = None + version: str | None = None + revision: str | None = None + vendor: str | None = None + licenses: list[str] | None = None + title: str | None = None + description: str | None = None + + +class ModelFS(BaseModel): + model_config = ConfigDict( + extra='forbid', + ) + type: Literal['layers'] + diffIds: list[str] = Field(..., min_length=1) + + +class Language(RootModel[str]): + root: str = Field(..., pattern='^[a-z]{2}$') + + +class Modality( + RootModel[Literal['text', 'image', 'audio', 'video', 'embedding', 'other']] +): + root: Literal['text', 'image', 'audio', 'video', 'embedding', 'other'] + + +class ModelCapabilities(BaseModel): + model_config = ConfigDict( + extra='forbid', + ) + inputTypes: list[Modality] | None = None + outputTypes: list[Modality] | None = None + knowledgeCutoff: AwareDatetime | None = None + reasoning: bool | None = None + toolUsage: bool | None = None + reward: bool | None = None + languages: list[Language] | None = None + + +class ModelConfig(BaseModel): + model_config = ConfigDict( + extra='forbid', + ) + architecture: str | None = None + format: str | None = None + paramSize: str | None = None + precision: str | None = None + quantization: str | None = None + capabilities: ModelCapabilities | None = None + + +class Model(BaseModel): + model_config = ConfigDict( + extra='forbid', + ) + descriptor: ModelDescriptor + modelfs: ModelFS + config: ModelConfig diff --git a/tools/generate_python_models.py b/tools/generate_python_models.py new file mode 100644 index 0000000..99c9fbc --- /dev/null +++ b/tools/generate_python_models.py @@ -0,0 +1,54 @@ +#!/usr/bin/env python3 +"""Generate Python models from the canonical ModelPack JSON Schema.""" + +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +SCHEMA_PATH = ROOT / "schema" / "config-schema.json" +OUTPUT_PATH = ROOT / "py" / "model_spec" / "v1" / "models.py" + + +def main() -> int: + try: + import datamodel_code_generator # noqa: F401 + except ModuleNotFoundError: + print( + "error: datamodel-code-generator is not installed for this Python interpreter. " + "Install it with: python -m pip install datamodel-code-generator", + file=sys.stderr, + ) + return 1 + + OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) + + cmd = [ + sys.executable, + "-m", + "datamodel_code_generator", + "--input", + str(SCHEMA_PATH), + "--output", + str(OUTPUT_PATH), + "--input-file-type", + "jsonschema", + "--output-model-type", + "pydantic_v2.BaseModel", + "--target-python-version", + "3.10", + "--enum-field-as-literal", + "all", + "--field-constraints", + "--disable-timestamp", + ] + + subprocess.run(cmd, check=True) + print(f"Generated: {OUTPUT_PATH}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) From 90878bce41f8bbbb0d95e1e1e3044f15dd3da9fc Mon Sep 17 00:00:00 2001 From: Rishi Jat Date: Fri, 13 Mar 2026 18:16:51 +0530 Subject: [PATCH 2/3] copilot suggestion Signed-off-by: Rishi Jat --- py/README.md | 19 ++++++++++++++++++- py/model_spec/v1/__init__.py | 2 ++ tools/generate_python_models.py | 23 ++++++++++++++++++++--- 3 files changed, 40 insertions(+), 4 deletions(-) diff --git a/py/README.md b/py/README.md index 887e4a1..79ef4c5 100644 --- a/py/README.md +++ b/py/README.md @@ -4,13 +4,28 @@ This directory provides auto-generated Python data structures for the ModelPack The models are generated from the canonical JSON Schema at `schema/config-schema.json` and are intended for downstream projects that need importable spec-aligned types. +## Requirements + +- Python >= 3.10 +- Pydantic >= 2 + +## Installation / Import setup + +These models live under the `py/` directory. + +To make `model_spec.v1` importable locally: + +```bash +export PYTHONPATH="$(pwd)/py:${PYTHONPATH}" +``` + ## Usage ```python from model_spec.v1 import Model model = Model.model_validate_json(json_payload) -print(model.descriptor.name) +print(model.descriptor.docURL) ``` ## Regenerate @@ -18,9 +33,11 @@ print(model.descriptor.name) Run: ```bash +pip install datamodel-code-generator make generate-python-api ``` + This executes `tools/generate_python_models.py`, which uses `datamodel-codegen` to regenerate `py/model_spec/v1/models.py`. ## Important diff --git a/py/model_spec/v1/__init__.py b/py/model_spec/v1/__init__.py index 94d0d74..a044e87 100644 --- a/py/model_spec/v1/__init__.py +++ b/py/model_spec/v1/__init__.py @@ -5,6 +5,7 @@ ModelDescriptor, ModelFS, Modality, + Language, ) __all__ = [ @@ -14,4 +15,5 @@ "ModelDescriptor", "ModelFS", "Modality", + "Language", ] diff --git a/tools/generate_python_models.py b/tools/generate_python_models.py index 99c9fbc..c6aa31b 100644 --- a/tools/generate_python_models.py +++ b/tools/generate_python_models.py @@ -23,6 +23,13 @@ def main() -> int: ) return 1 + if not SCHEMA_PATH.is_file(): + print( + f"error: JSON Schema file not found or not a file at expected path: {SCHEMA_PATH}", + file=sys.stderr, + ) + return 1 + OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) cmd = [ @@ -45,9 +52,19 @@ def main() -> int: "--disable-timestamp", ] - subprocess.run(cmd, check=True) - print(f"Generated: {OUTPUT_PATH}") - return 0 + try: + subprocess.run(cmd, check=True) + except subprocess.CalledProcessError as exc: + cmd_str = " ".join(exc.cmd) if getattr(exc, "cmd", None) else " ".join(cmd) + print( + f"error: datamodel-code-generator failed with exit code {exc.returncode}.", + file=sys.stderr, + ) + print(f"command: {cmd_str}", file=sys.stderr) + return exc.returncode or 1 + else: + print(f"Generated: {OUTPUT_PATH}") + return 0 if __name__ == "__main__": From 33ee298f1d47db9b3904bd68aa8756a810313c8c Mon Sep 17 00:00:00 2001 From: Rishi Jat Date: Fri, 13 Mar 2026 19:08:41 +0530 Subject: [PATCH 3/3] fix Signed-off-by: Rishi Jat --- py/README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/py/README.md b/py/README.md index 79ef4c5..3db7269 100644 --- a/py/README.md +++ b/py/README.md @@ -37,7 +37,6 @@ pip install datamodel-code-generator make generate-python-api ``` - This executes `tools/generate_python_models.py`, which uses `datamodel-codegen` to regenerate `py/model_spec/v1/models.py`. ## Important