diff --git a/.gitignore b/.gitignore index 71cce7a..5bb9fb7 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,13 @@ # Dependency directories (remove the comment below to include it) vendor/ + +# Python +__pycache__/ +*.pyc +*.egg-info/ +.venv/ +.pytest_cache/ .idea .vscode .cache diff --git a/Makefile b/Makefile index 803e878..bdc16d2 100644 --- a/Makefile +++ b/Makefile @@ -5,3 +5,7 @@ validate-examples: ## validate examples in the specification markdown files .PHONY: test test: go test ./... + +.PHONY: generate-python-models +generate-python-models: ## generate Python models from JSON schema + python3 tools/generate_python_models.py diff --git a/specs-python/README.md b/specs-python/README.md new file mode 100644 index 0000000..d1cdcc4 --- /dev/null +++ b/specs-python/README.md @@ -0,0 +1,58 @@ +# Python ModelPack Types + +This directory provides Python data structures for the CNCF ModelPack specification. + +The core model types are **auto-generated** from the canonical JSON Schema at `schema/config-schema.json` using [datamodel-code-generator](https://github.com/koxudaxi/datamodel-code-generator), ensuring they stay in sync with the specification automatically. + +## Requirements + +- Python >= 3.10 +- pydantic >= 2 +- jsonschema >= 4.20.0 + +## Installation + +```bash +cd specs-python +pip install -e . +``` + +For development: + +```bash +pip install -e ".[dev]" +``` + +## Usage + +```python +from modelpack.v1 import Model, ModelDescriptor, ModelFS, ModelConfig + +# Create a model from a JSON payload +model = Model.model_validate_json(json_payload) +print(model.descriptor.name) + +# Validate a config dict against the JSON schema +from modelpack.v1 import validate_config +validate_config(config_dict) +``` + +## Regenerate Models + +If the schema changes, regenerate the Pydantic models: + +```bash +pip install datamodel-code-generator +make generate-python-models +``` + +This runs `tools/generate_python_models.py`, which regenerates `specs-python/modelpack/v1/models.py`. + +**Do not edit `models.py` manually.** Update the schema and regenerate instead. + +## Run Tests + +```bash +cd specs-python +pytest +``` diff --git a/specs-python/modelpack/__init__.py b/specs-python/modelpack/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/specs-python/modelpack/v1/__init__.py b/specs-python/modelpack/v1/__init__.py new file mode 100644 index 0000000..3c9b164 --- /dev/null +++ b/specs-python/modelpack/v1/__init__.py @@ -0,0 +1,97 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""ModelPack Python SDK - CNCF standard for packaging and distributing AI models. + +Types are auto-generated from schema/config-schema.json using datamodel-code-generator. +Do not edit models.py manually — regenerate with: make generate-python-models +""" + +from modelpack.v1.annotations import ( + ANNOTATION_FILE_METADATA, + ANNOTATION_FILEPATH, + ANNOTATION_MEDIA_TYPE_UNTESTED, + FileMetadata, +) +from modelpack.v1.mediatype import ( + ARTIFACT_TYPE_MODEL_MANIFEST, + MEDIA_TYPE_MODEL_CODE, + MEDIA_TYPE_MODEL_CODE_GZIP, + MEDIA_TYPE_MODEL_CODE_RAW, + MEDIA_TYPE_MODEL_CODE_ZSTD, + MEDIA_TYPE_MODEL_CONFIG, + MEDIA_TYPE_MODEL_DATASET, + MEDIA_TYPE_MODEL_DATASET_GZIP, + MEDIA_TYPE_MODEL_DATASET_RAW, + MEDIA_TYPE_MODEL_DATASET_ZSTD, + MEDIA_TYPE_MODEL_DOC, + MEDIA_TYPE_MODEL_DOC_GZIP, + MEDIA_TYPE_MODEL_DOC_RAW, + MEDIA_TYPE_MODEL_DOC_ZSTD, + MEDIA_TYPE_MODEL_WEIGHT, + MEDIA_TYPE_MODEL_WEIGHT_CONFIG, + MEDIA_TYPE_MODEL_WEIGHT_CONFIG_GZIP, + MEDIA_TYPE_MODEL_WEIGHT_CONFIG_RAW, + MEDIA_TYPE_MODEL_WEIGHT_CONFIG_ZSTD, + MEDIA_TYPE_MODEL_WEIGHT_GZIP, + MEDIA_TYPE_MODEL_WEIGHT_RAW, + MEDIA_TYPE_MODEL_WEIGHT_ZSTD, +) +from modelpack.v1.models import ( + Language, + Modality, + Model, + ModelCapabilities, + ModelConfig, + ModelDescriptor, + ModelFS, +) +from modelpack.v1.validator import validate_config + +__all__ = [ + "Model", + "ModelCapabilities", + "ModelConfig", + "ModelDescriptor", + "ModelFS", + "Modality", + "Language", + "FileMetadata", + "ANNOTATION_FILEPATH", + "ANNOTATION_FILE_METADATA", + "ANNOTATION_MEDIA_TYPE_UNTESTED", + "ARTIFACT_TYPE_MODEL_MANIFEST", + "MEDIA_TYPE_MODEL_CONFIG", + "MEDIA_TYPE_MODEL_WEIGHT_RAW", + "MEDIA_TYPE_MODEL_WEIGHT", + "MEDIA_TYPE_MODEL_WEIGHT_GZIP", + "MEDIA_TYPE_MODEL_WEIGHT_ZSTD", + "MEDIA_TYPE_MODEL_WEIGHT_CONFIG_RAW", + "MEDIA_TYPE_MODEL_WEIGHT_CONFIG", + "MEDIA_TYPE_MODEL_WEIGHT_CONFIG_GZIP", + "MEDIA_TYPE_MODEL_WEIGHT_CONFIG_ZSTD", + "MEDIA_TYPE_MODEL_DOC_RAW", + "MEDIA_TYPE_MODEL_DOC", + "MEDIA_TYPE_MODEL_DOC_GZIP", + "MEDIA_TYPE_MODEL_DOC_ZSTD", + "MEDIA_TYPE_MODEL_CODE_RAW", + "MEDIA_TYPE_MODEL_CODE", + "MEDIA_TYPE_MODEL_CODE_GZIP", + "MEDIA_TYPE_MODEL_CODE_ZSTD", + "MEDIA_TYPE_MODEL_DATASET_RAW", + "MEDIA_TYPE_MODEL_DATASET", + "MEDIA_TYPE_MODEL_DATASET_GZIP", + "MEDIA_TYPE_MODEL_DATASET_ZSTD", + "validate_config", +] diff --git a/specs-python/modelpack/v1/annotations.py b/specs-python/modelpack/v1/annotations.py new file mode 100644 index 0000000..3aa85d1 --- /dev/null +++ b/specs-python/modelpack/v1/annotations.py @@ -0,0 +1,87 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Annotation constants and types matching specs-go/v1/annotations.go.""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from datetime import datetime, timezone + +# Annotation key for the file path of the layer. +ANNOTATION_FILEPATH = "org.cncf.model.filepath" + +# Annotation key for the file metadata of the layer. +ANNOTATION_FILE_METADATA = "org.cncf.model.file.metadata+json" + +# Annotation key for file media type untested flag of the layer. +ANNOTATION_MEDIA_TYPE_UNTESTED = "org.cncf.model.file.mediatype.untested" + + +def _format_datetime(dt: datetime) -> str: + """Format a datetime as RFC 3339 with 'Z' suffix for UTC, matching Go.""" + s = dt.isoformat() + if s.endswith("+00:00"): + s = s[:-6] + "Z" + return s + + +@dataclass +class FileMetadata: + """Represents the metadata of a file. + + Mirrors the Go FileMetadata struct in specs-go/v1/annotations.go. + """ + + name: str = "" + mode: int = 0 + uid: int = 0 + gid: int = 0 + size: int = 0 + mod_time: datetime = field( + default_factory=lambda: datetime(1, 1, 1, tzinfo=timezone.utc) + ) + typeflag: int = 0 + + def to_dict(self) -> dict: + """Serialize to a dict matching the JSON field names. + + All fields are always present, matching Go's FileMetadata + which has no omitempty tags. + """ + return { + "name": self.name, + "mode": self.mode, + "uid": self.uid, + "gid": self.gid, + "size": self.size, + "mtime": _format_datetime(self.mod_time), + "typeflag": self.typeflag, + } + + @classmethod + def from_dict(cls, data: dict) -> FileMetadata: + """Deserialize from a dict with JSON field names.""" + mod_time = None + if "mtime" in data: + mod_time = datetime.fromisoformat(data["mtime"].replace("Z", "+00:00")) + return cls( + name=data.get("name", ""), + mode=data.get("mode", 0), + uid=data.get("uid", 0), + gid=data.get("gid", 0), + size=data.get("size", 0), + mod_time=mod_time, + typeflag=data.get("typeflag", 0), + ) diff --git a/specs-python/modelpack/v1/mediatype.py b/specs-python/modelpack/v1/mediatype.py new file mode 100644 index 0000000..1b1bfd6 --- /dev/null +++ b/specs-python/modelpack/v1/mediatype.py @@ -0,0 +1,55 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Media type constants matching specs-go/v1/mediatype.go.""" + +# Artifact type for a model manifest. +ARTIFACT_TYPE_MODEL_MANIFEST = "application/vnd.cncf.model.manifest.v1+json" + +# Media type for a model configuration. +MEDIA_TYPE_MODEL_CONFIG = "application/vnd.cncf.model.config.v1+json" + +# Model weight media types. +MEDIA_TYPE_MODEL_WEIGHT_RAW = "application/vnd.cncf.model.weight.v1.raw" +MEDIA_TYPE_MODEL_WEIGHT = "application/vnd.cncf.model.weight.v1.tar" +MEDIA_TYPE_MODEL_WEIGHT_GZIP = "application/vnd.cncf.model.weight.v1.tar+gzip" +MEDIA_TYPE_MODEL_WEIGHT_ZSTD = "application/vnd.cncf.model.weight.v1.tar+zstd" + +# Model weight config media types. +MEDIA_TYPE_MODEL_WEIGHT_CONFIG_RAW = "application/vnd.cncf.model.weight.config.v1.raw" +MEDIA_TYPE_MODEL_WEIGHT_CONFIG = "application/vnd.cncf.model.weight.config.v1.tar" +MEDIA_TYPE_MODEL_WEIGHT_CONFIG_GZIP = ( + "application/vnd.cncf.model.weight.config.v1.tar+gzip" +) +MEDIA_TYPE_MODEL_WEIGHT_CONFIG_ZSTD = ( + "application/vnd.cncf.model.weight.config.v1.tar+zstd" +) + +# Model documentation media types. +MEDIA_TYPE_MODEL_DOC_RAW = "application/vnd.cncf.model.doc.v1.raw" +MEDIA_TYPE_MODEL_DOC = "application/vnd.cncf.model.doc.v1.tar" +MEDIA_TYPE_MODEL_DOC_GZIP = "application/vnd.cncf.model.doc.v1.tar+gzip" +MEDIA_TYPE_MODEL_DOC_ZSTD = "application/vnd.cncf.model.doc.v1.tar+zstd" + +# Model code media types. +MEDIA_TYPE_MODEL_CODE_RAW = "application/vnd.cncf.model.code.v1.raw" +MEDIA_TYPE_MODEL_CODE = "application/vnd.cncf.model.code.v1.tar" +MEDIA_TYPE_MODEL_CODE_GZIP = "application/vnd.cncf.model.code.v1.tar+gzip" +MEDIA_TYPE_MODEL_CODE_ZSTD = "application/vnd.cncf.model.code.v1.tar+zstd" + +# Model dataset media types. +MEDIA_TYPE_MODEL_DATASET_RAW = "application/vnd.cncf.model.dataset.v1.raw" +MEDIA_TYPE_MODEL_DATASET = "application/vnd.cncf.model.dataset.v1.tar" +MEDIA_TYPE_MODEL_DATASET_GZIP = "application/vnd.cncf.model.dataset.v1.tar+gzip" +MEDIA_TYPE_MODEL_DATASET_ZSTD = "application/vnd.cncf.model.dataset.v1.tar+zstd" diff --git a/specs-python/modelpack/v1/models.py b/specs-python/modelpack/v1/models.py new file mode 100644 index 0000000..bd89a00 --- /dev/null +++ b/specs-python/modelpack/v1/models.py @@ -0,0 +1,79 @@ +# generated by datamodel-codegen: +# filename: config-schema.json + +from __future__ import annotations + +from typing import Literal + +from pydantic import AwareDatetime, BaseModel, ConfigDict, Field, RootModel + + +class ModelDescriptor(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + createdAt: AwareDatetime | None = None + authors: list[str] | None = None + family: str | None = None + name: str | None = Field(None, min_length=1) + docURL: str | None = None + sourceURL: str | None = None + datasetsURL: list[str] | None = None + version: str | None = None + revision: str | None = None + vendor: str | None = None + licenses: list[str] | None = None + title: str | None = None + description: str | None = None + + +class ModelFS(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + type: Literal["layers"] + diffIds: list[str] = Field(..., min_length=1) + + +class Language(RootModel[str]): + root: str = Field(..., pattern="^[a-z]{2}$") + + +class Modality( + RootModel[Literal["text", "image", "audio", "video", "embedding", "other"]] +): + root: Literal["text", "image", "audio", "video", "embedding", "other"] + + +class ModelCapabilities(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + inputTypes: list[Modality] | None = None + outputTypes: list[Modality] | None = None + knowledgeCutoff: AwareDatetime | None = None + reasoning: bool | None = None + toolUsage: bool | None = None + reward: bool | None = None + languages: list[Language] | None = None + + +class ModelConfig(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + architecture: str | None = None + format: str | None = None + paramSize: str | None = None + precision: str | None = None + quantization: str | None = None + capabilities: ModelCapabilities | None = None + + +class Model(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + descriptor: ModelDescriptor + modelfs: ModelFS + config: ModelConfig diff --git a/specs-python/modelpack/v1/validator.py b/specs-python/modelpack/v1/validator.py new file mode 100644 index 0000000..9a1873c --- /dev/null +++ b/specs-python/modelpack/v1/validator.py @@ -0,0 +1,56 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""JSON schema validation for ModelPack configs. + +Loads config-schema.json from the repo root (schema/config-schema.json) +as the single source of truth, matching the Go validator. +""" + +from __future__ import annotations + +import json +from pathlib import Path + +from jsonschema import Draft4Validator, FormatChecker + + +def _load_schema() -> dict: + """Load and return the config JSON schema from the repo root.""" + schema_path = ( + Path(__file__).resolve().parent.parent.parent.parent + / "schema" + / "config-schema.json" + ) + with schema_path.open(encoding="utf-8") as f: + return json.load(f) + + +def validate_config(data: dict | str) -> None: + """Validate a model config against the JSON schema. + + Args: + data: Either a dict or a JSON string representing the model config. + + Raises: + jsonschema.ValidationError: If the config is invalid. + jsonschema.SchemaError: If the schema itself is invalid. + json.JSONDecodeError: If data is a string that is not valid JSON. + """ + if isinstance(data, str): + data = json.loads(data) + + schema = _load_schema() + format_checker = FormatChecker() + Draft4Validator(schema, format_checker=format_checker).validate(data) diff --git a/specs-python/pyproject.toml b/specs-python/pyproject.toml new file mode 100644 index 0000000..d4295a6 --- /dev/null +++ b/specs-python/pyproject.toml @@ -0,0 +1,33 @@ +[build-system] +requires = ["setuptools>=64"] +build-backend = "setuptools.build_meta" + +[project] +name = "modelpack" +version = "0.1.0" +description = "Python SDK for the CNCF ModelPack specification" +requires-python = ">=3.10" +license = "Apache-2.0" +dependencies = [ + "pydantic>=2", + "jsonschema[format]>=4.20.0", +] + +[project.optional-dependencies] +dev = [ + "pytest>=7.0", + "ruff>=0.4.0", + "datamodel-code-generator>=0.25.0", +] + +[tool.setuptools.packages.find] +include = ["modelpack*"] + +[tool.ruff] +line-length = 88 + +[tool.ruff.lint] +select = ["E", "F", "I", "W"] + +[tool.pytest.ini_options] +testpaths = ["tests"] diff --git a/specs-python/tests/__init__.py b/specs-python/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/specs-python/tests/test_annotations.py b/specs-python/tests/test_annotations.py new file mode 100644 index 0000000..cc31ab4 --- /dev/null +++ b/specs-python/tests/test_annotations.py @@ -0,0 +1,72 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for annotation constants and FileMetadata type.""" + +from datetime import datetime, timezone + +from modelpack.v1.annotations import ( + ANNOTATION_FILE_METADATA, + ANNOTATION_FILEPATH, + ANNOTATION_MEDIA_TYPE_UNTESTED, + FileMetadata, +) + + +class TestAnnotationConstants: + """Verify annotation constants match Go definitions exactly.""" + + def test_filepath(self): + assert ANNOTATION_FILEPATH == "org.cncf.model.filepath" + + def test_file_metadata(self): + assert ANNOTATION_FILE_METADATA == "org.cncf.model.file.metadata+json" + + def test_media_type_untested(self): + assert ( + ANNOTATION_MEDIA_TYPE_UNTESTED == "org.cncf.model.file.mediatype.untested" + ) + + +class TestFileMetadata: + """Tests for FileMetadata serialization.""" + + def test_round_trip(self): + dt = datetime(2025, 1, 1, 12, 0, 0, tzinfo=timezone.utc) + meta = FileMetadata( + name="model.bin", + mode=0o644, + uid=1000, + gid=1000, + size=1024, + mod_time=dt, + typeflag=0, + ) + d = meta.to_dict() + assert d["name"] == "model.bin" + assert d["mode"] == 0o644 + assert d["size"] == 1024 + assert "mtime" in d + + restored = FileMetadata.from_dict(d) + assert restored.name == "model.bin" + assert restored.mode == 0o644 + assert restored.size == 1024 + + def test_empty(self): + meta = FileMetadata() + d = meta.to_dict() + assert d["name"] == "" + assert d["size"] == 0 + assert "mtime" in d diff --git a/specs-python/tests/test_config.py b/specs-python/tests/test_config.py new file mode 100644 index 0000000..7706f72 --- /dev/null +++ b/specs-python/tests/test_config.py @@ -0,0 +1,316 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for auto-generated Pydantic model types.""" + +import json +from datetime import datetime, timezone + +import pytest +from pydantic import ValidationError + +from modelpack.v1.models import ( + Language, + Modality, + Model, + ModelCapabilities, + ModelConfig, + ModelDescriptor, + ModelFS, +) + + +class TestModality: + """Tests for the Modality RootModel.""" + + def test_all_values(self): + for val in ("text", "image", "audio", "video", "embedding", "other"): + m = Modality(root=val) + assert m.root == val + + def test_from_string(self): + m = Modality.model_validate("text") + assert m.root == "text" + + def test_invalid_value(self): + with pytest.raises(ValidationError): + Modality.model_validate("invalid") + + +class TestLanguage: + """Tests for the Language RootModel.""" + + def test_valid(self): + lang = Language.model_validate("en") + assert lang.root == "en" + + def test_invalid_three_letter(self): + with pytest.raises(ValidationError): + Language.model_validate("fra") + + def test_invalid_uppercase(self): + with pytest.raises(ValidationError): + Language.model_validate("EN") + + +class TestModelCapabilities: + """Tests for ModelCapabilities Pydantic model.""" + + def test_empty(self): + caps = ModelCapabilities() + d = caps.model_dump(exclude_none=True) + assert d == {} + + def test_round_trip(self): + caps = ModelCapabilities( + inputTypes=[Modality(root="text"), Modality(root="image")], + outputTypes=[Modality(root="text")], + reasoning=True, + toolUsage=False, + reward=True, + languages=[Language(root="en"), Language(root="fr")], + ) + d = caps.model_dump(exclude_none=True) + assert d["inputTypes"] == ["text", "image"] + assert d["outputTypes"] == ["text"] + assert d["reasoning"] is True + assert d["toolUsage"] is False + assert d["reward"] is True + assert d["languages"] == ["en", "fr"] + + restored = ModelCapabilities.model_validate(d) + assert restored.inputTypes[0].root == "text" + assert restored.reasoning is True + assert restored.toolUsage is False + + def test_knowledge_cutoff(self): + dt = datetime(2025, 1, 1, tzinfo=timezone.utc) + caps = ModelCapabilities(knowledgeCutoff=dt) + d = caps.model_dump(exclude_none=True, mode="json") + assert "knowledgeCutoff" in d + + restored = ModelCapabilities.model_validate(d) + assert restored.knowledgeCutoff is not None + + +class TestModelConfig: + """Tests for ModelConfig Pydantic model.""" + + def test_empty(self): + cfg = ModelConfig() + d = cfg.model_dump(exclude_none=True) + assert d == {} + + def test_round_trip(self): + cfg = ModelConfig( + architecture="transformer", + format="safetensors", + paramSize="8b", + precision="float16", + quantization="awq", + ) + d = cfg.model_dump(exclude_none=True) + assert d["architecture"] == "transformer" + assert d["paramSize"] == "8b" + + restored = ModelConfig.model_validate(d) + assert restored.architecture == "transformer" + assert restored.paramSize == "8b" + + def test_with_capabilities(self): + cfg = ModelConfig( + paramSize="8b", + capabilities=ModelCapabilities( + inputTypes=[Modality(root="text")], + outputTypes=[Modality(root="text")], + ), + ) + d = cfg.model_dump(exclude_none=True) + assert "capabilities" in d + assert d["capabilities"]["inputTypes"] == ["text"] + + +class TestModelFS: + """Tests for ModelFS Pydantic model.""" + + def test_round_trip(self): + fs = ModelFS( + type="layers", + diffIds=["sha256:abc123"], + ) + d = fs.model_dump() + assert d["type"] == "layers" + assert d["diffIds"] == ["sha256:abc123"] + + restored = ModelFS.model_validate(d) + assert restored.type == "layers" + assert restored.diffIds == ["sha256:abc123"] + + def test_invalid_type(self): + with pytest.raises(ValidationError): + ModelFS(type="invalid", diffIds=["sha256:abc"]) + + def test_empty_diff_ids(self): + with pytest.raises(ValidationError): + ModelFS(type="layers", diffIds=[]) + + +class TestModelDescriptor: + """Tests for ModelDescriptor Pydantic model.""" + + def test_empty(self): + desc = ModelDescriptor() + d = desc.model_dump(exclude_none=True) + assert d == {} + + def test_round_trip(self): + desc = ModelDescriptor( + name="llama3-8b-instruct", + version="3.1", + family="llama3", + authors=["Meta"], + licenses=["Apache-2.0"], + ) + d = desc.model_dump(exclude_none=True) + assert d["name"] == "llama3-8b-instruct" + assert d["version"] == "3.1" + + restored = ModelDescriptor.model_validate(d) + assert restored.name == "llama3-8b-instruct" + assert restored.authors == ["Meta"] + + def test_created_at(self): + dt = datetime(2025, 6, 15, 10, 30, 0, tzinfo=timezone.utc) + desc = ModelDescriptor(name="test", createdAt=dt) + d = desc.model_dump(exclude_none=True, mode="json") + assert "createdAt" in d + + restored = ModelDescriptor.model_validate(d) + assert restored.createdAt is not None + + def test_empty_name_rejected(self): + with pytest.raises(ValidationError): + ModelDescriptor(name="") + + def test_extra_field_rejected(self): + with pytest.raises(ValidationError): + ModelDescriptor.model_validate({"name": "test", "unknownField": "value"}) + + +class TestModel: + """Tests for Model Pydantic model.""" + + def test_minimal(self): + model = Model( + descriptor=ModelDescriptor(name="test-model"), + modelfs=ModelFS(type="layers", diffIds=["sha256:abc"]), + config=ModelConfig(paramSize="8b"), + ) + d = model.model_dump(exclude_none=True) + assert d["descriptor"]["name"] == "test-model" + assert d["modelfs"]["type"] == "layers" + assert d["config"]["paramSize"] == "8b" + + def test_json_round_trip(self): + model = Model( + descriptor=ModelDescriptor( + name="llama3-8b-instruct", + version="3.1", + ), + modelfs=ModelFS( + type="layers", + diffIds=[ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + ), + config=ModelConfig(paramSize="8b"), + ) + json_str = model.model_dump_json() + restored = Model.model_validate_json(json_str) + assert restored.descriptor.name == "llama3-8b-instruct" + assert restored.modelfs.type == "layers" + assert restored.config.paramSize == "8b" + + def test_from_json_string(self): + data = json.dumps( + { + "descriptor": {"name": "test"}, + "modelfs": {"type": "layers", "diffIds": ["sha256:abc"]}, + "config": {"paramSize": "1b"}, + } + ) + model = Model.model_validate_json(data) + assert model.descriptor.name == "test" + assert model.config.paramSize == "1b" + + def test_full_model(self): + model = Model( + descriptor=ModelDescriptor( + name="qwen2-vl-72b-instruct", + version="2.0", + family="qwen2", + vendor="Alibaba", + authors=["Qwen Team"], + licenses=["Apache-2.0"], + title="Qwen2 VL 72B Instruct", + description="A vision-language model", + docURL="https://example.com/docs", + sourceURL="https://github.com/example/qwen2", + datasetsURL=["https://example.com/dataset"], + ), + modelfs=ModelFS( + type="layers", + diffIds=["sha256:aabbcc", "sha256:ddeeff"], + ), + config=ModelConfig( + architecture="transformer", + format="safetensors", + paramSize="72b", + precision="bfloat16", + capabilities=ModelCapabilities( + inputTypes=[Modality(root="text"), Modality(root="image")], + outputTypes=[Modality(root="text")], + reasoning=True, + toolUsage=True, + languages=[Language(root="en"), Language(root="zh")], + ), + ), + ) + d = model.model_dump(exclude_none=True) + assert d["descriptor"]["vendor"] == "Alibaba" + assert d["config"]["capabilities"]["inputTypes"] == ["text", "image"] + assert d["config"]["capabilities"]["languages"] == ["en", "zh"] + + json_str = model.model_dump_json() + restored = Model.model_validate_json(json_str) + assert restored.config.capabilities.inputTypes[0].root == "text" + assert restored.config.capabilities.inputTypes[1].root == "image" + assert restored.config.capabilities.languages[0].root == "en" + assert restored.config.capabilities.languages[1].root == "zh" + + def test_missing_required_fields(self): + with pytest.raises(ValidationError): + Model.model_validate({}) + + def test_extra_field_at_root(self): + with pytest.raises(ValidationError): + Model.model_validate( + { + "descriptor": {"name": "test"}, + "modelfs": {"type": "layers", "diffIds": ["sha256:abc"]}, + "config": {"paramSize": "8b"}, + "extraField": "should fail", + } + ) diff --git a/specs-python/tests/test_mediatype.py b/specs-python/tests/test_mediatype.py new file mode 100644 index 0000000..fb47c08 --- /dev/null +++ b/specs-python/tests/test_mediatype.py @@ -0,0 +1,119 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for media type constants matching specs-go/v1/mediatype.go.""" + +from modelpack.v1 import mediatype + + +class TestMediaTypes: + """Verify media type constants match Go definitions exactly.""" + + def test_artifact_type(self): + assert ( + mediatype.ARTIFACT_TYPE_MODEL_MANIFEST + == "application/vnd.cncf.model.manifest.v1+json" + ) + + def test_config(self): + assert ( + mediatype.MEDIA_TYPE_MODEL_CONFIG + == "application/vnd.cncf.model.config.v1+json" + ) + + def test_weight_types(self): + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_RAW + == "application/vnd.cncf.model.weight.v1.raw" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT + == "application/vnd.cncf.model.weight.v1.tar" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_GZIP + == "application/vnd.cncf.model.weight.v1.tar+gzip" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_ZSTD + == "application/vnd.cncf.model.weight.v1.tar+zstd" + ) + + def test_weight_config_types(self): + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_CONFIG_RAW + == "application/vnd.cncf.model.weight.config.v1.raw" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_CONFIG + == "application/vnd.cncf.model.weight.config.v1.tar" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_CONFIG_GZIP + == "application/vnd.cncf.model.weight.config.v1.tar+gzip" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_WEIGHT_CONFIG_ZSTD + == "application/vnd.cncf.model.weight.config.v1.tar+zstd" + ) + + def test_doc_types(self): + assert ( + mediatype.MEDIA_TYPE_MODEL_DOC_RAW + == "application/vnd.cncf.model.doc.v1.raw" + ) + assert mediatype.MEDIA_TYPE_MODEL_DOC == "application/vnd.cncf.model.doc.v1.tar" + assert ( + mediatype.MEDIA_TYPE_MODEL_DOC_GZIP + == "application/vnd.cncf.model.doc.v1.tar+gzip" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_DOC_ZSTD + == "application/vnd.cncf.model.doc.v1.tar+zstd" + ) + + def test_code_types(self): + assert ( + mediatype.MEDIA_TYPE_MODEL_CODE_RAW + == "application/vnd.cncf.model.code.v1.raw" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_CODE == "application/vnd.cncf.model.code.v1.tar" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_CODE_GZIP + == "application/vnd.cncf.model.code.v1.tar+gzip" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_CODE_ZSTD + == "application/vnd.cncf.model.code.v1.tar+zstd" + ) + + def test_dataset_types(self): + assert ( + mediatype.MEDIA_TYPE_MODEL_DATASET_RAW + == "application/vnd.cncf.model.dataset.v1.raw" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_DATASET + == "application/vnd.cncf.model.dataset.v1.tar" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_DATASET_GZIP + == "application/vnd.cncf.model.dataset.v1.tar+gzip" + ) + assert ( + mediatype.MEDIA_TYPE_MODEL_DATASET_ZSTD + == "application/vnd.cncf.model.dataset.v1.tar+zstd" + ) diff --git a/specs-python/tests/test_validator.py b/specs-python/tests/test_validator.py new file mode 100644 index 0000000..5060797 --- /dev/null +++ b/specs-python/tests/test_validator.py @@ -0,0 +1,705 @@ +# Copyright 2025 The CNCF ModelPack Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Validation tests mirroring the Go test cases in schema/config_test.go. + +Each test case matches the corresponding Go test to ensure +consistent validation behavior between the Go and Python SDKs. +""" + +import json + +import pytest +from jsonschema import ValidationError + +from modelpack.v1.validator import validate_config + +# A valid base config used across tests. +VALID_CONFIG = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef1234567890abcdef1234567890abcdef" + ], + }, +} + + +class TestValidConfigCases: + """Tests that valid configs pass validation.""" + + def test_minimal_valid(self): + validate_config(VALID_CONFIG) + + def test_valid_as_json_string(self): + validate_config(json.dumps(VALID_CONFIG)) + + def test_valid_with_all_fields(self): + config = { + "descriptor": { + "name": "llama3-8b-instruct", + "version": "3.1", + "family": "llama3", + "authors": ["Meta"], + "vendor": "Meta", + "licenses": ["Apache-2.0"], + "title": "Llama 3 8B Instruct", + "description": "An instruction-tuned LLM", + "createdAt": "2025-01-01T00:00:00Z", + "docURL": "https://example.com/docs", + "sourceURL": "https://github.com/meta/llama3", + "datasetsURL": ["https://example.com/data"], + "revision": "abc123", + }, + "config": { + "architecture": "transformer", + "format": "safetensors", + "paramSize": "8b", + "precision": "float16", + "quantization": "awq", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "knowledgeCutoff": "2025-01-01T00:00:00Z", + "reasoning": True, + "toolUsage": True, + "reward": False, + "languages": ["en", "fr"], + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:abcdef1234567890abcdef1234567890" + "abcdef1234567890abcdef1234567890" + ], + }, + } + validate_config(config) + + +class TestFailureConfigCases: + """Tests mirroring Go config_test.go failure cases. + + Each test corresponds to a numbered test case in the Go file. + """ + + def test_config_missing(self): + """Go test 0: config is missing.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_version_is_number(self): + """Go test 1: version is a number.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": 3.1, + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_revision_is_number(self): + """Go test 2: revision is a number.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "revision": 1234567890, + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_created_at_not_rfc3339(self): + """Go test 3: createdAt is not RFC3339 format.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "createdAt": "2025/01/01T00:00:00Z", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_authors_not_array(self): + """Go test 4: authors is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "authors": "John Doe", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_licenses_not_array(self): + """Go test 5: licenses is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "licenses": "Apache-2.0", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_doc_url_is_array(self): + """Go test 6: docURL is an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "docURL": ["https://example.com/doc"], + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_source_url_is_array(self): + """Go test 7: sourceURL is an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "sourceURL": ["https://github.com/xyz/xyz3"], + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_datasets_url_not_array(self): + """Go test 8: datasetsURL is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + "sourceURL": "https://github.com/xyz/xyz3", + "datasetsURL": "https://example.com/dataset", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_param_size_is_number(self): + """Go test 9: paramSize is a number.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": {"paramSize": 8000000}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_precision_is_number(self): + """Go test 10: precision is a number.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": {"precision": 16}, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_type_not_layers(self): + """Go test 11: type is not 'layers'.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layer", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_diff_ids_not_array(self): + """Go test 12: diffIds is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": ( + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ), + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_diff_ids_empty(self): + """Go test 13: diffIds is empty.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": [], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_input_types_not_array(self): + """Go test 14: inputTypes is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": {"inputTypes": "text"}, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_output_types_not_array(self): + """Go test 15: outputTypes is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": {"outputTypes": "text"}, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_invalid_modality(self): + """Go test 16: invalid modality value.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": {"inputTypes": ["img"]}, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_knowledge_cutoff_not_rfc3339(self): + """Go test 17: knowledgeCutoff is not RFC3339 format.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "knowledgeCutoff": "2025-01-01", + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_reasoning_not_boolean(self): + """Go test 18: reasoning is not boolean.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "reasoning": "true", + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_tool_usage_not_boolean(self): + """Go test 19: toolUsage is not boolean.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "toolUsage": "true", + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_reward_not_boolean(self): + """Go test 20: reward is not boolean.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "reward": "true", + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_languages_not_array(self): + """Go test 21: languages is not an array.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "languages": "en", + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_language_code_not_iso639(self): + """Go test 22: language code is not a two-letter ISO 639 code.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "outputTypes": ["text"], + "languages": ["fra"], + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_unknown_field_in_capabilities(self): + """Go test 23: unknown field in capabilities.""" + config = { + "descriptor": { + "name": "xyz-3-8B-Instruct", + "version": "3.1", + }, + "config": { + "paramSize": "8b", + "capabilities": { + "inputTypes": ["text"], + "unknownField": True, + }, + }, + "modelfs": { + "type": "layers", + "diffIds": [ + "sha256:1234567890abcdef1234567890abcdef" + "1234567890abcdef1234567890abcdef" + ], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + +class TestEdgeCases: + """Additional edge case tests.""" + + def test_empty_dict(self): + with pytest.raises(ValidationError): + validate_config({}) + + def test_invalid_json_string(self): + with pytest.raises(Exception): + validate_config("{invalid json") + + def test_empty_name(self): + """Name with minLength: 1 should reject empty string.""" + config = { + "descriptor": {"name": "", "version": "1.0"}, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": ["sha256:abc"], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_unknown_field_at_root(self): + config = { + "descriptor": {"name": "test", "version": "1.0"}, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": ["sha256:abc"], + }, + "extraField": "should fail", + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_unknown_field_in_descriptor(self): + config = { + "descriptor": { + "name": "test", + "version": "1.0", + "unknownField": "value", + }, + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": ["sha256:abc"], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_unknown_field_in_config(self): + config = { + "descriptor": {"name": "test", "version": "1.0"}, + "config": {"paramSize": "8b", "unknownField": "value"}, + "modelfs": { + "type": "layers", + "diffIds": ["sha256:abc"], + }, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_modelfs_missing(self): + config = { + "descriptor": {"name": "test", "version": "1.0"}, + "config": {"paramSize": "8b"}, + } + with pytest.raises(ValidationError): + validate_config(config) + + def test_descriptor_missing(self): + config = { + "config": {"paramSize": "8b"}, + "modelfs": { + "type": "layers", + "diffIds": ["sha256:abc"], + }, + } + with pytest.raises(ValidationError): + validate_config(config) diff --git a/tools/generate_python_models.py b/tools/generate_python_models.py new file mode 100644 index 0000000..95bbc30 --- /dev/null +++ b/tools/generate_python_models.py @@ -0,0 +1,69 @@ +#!/usr/bin/env python3 +"""Generate Python models from the canonical ModelPack JSON Schema.""" + +from __future__ import annotations + +import subprocess +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +SCHEMA_PATH = ROOT / "schema" / "config-schema.json" +OUTPUT_PATH = ROOT / "specs-python" / "modelpack" / "v1" / "models.py" + + +def main() -> int: + try: + import datamodel_code_generator # noqa: F401 + except ModuleNotFoundError: + print( + "error: datamodel-code-generator is not installed. " + "Install it with: pip install datamodel-code-generator", + file=sys.stderr, + ) + return 1 + + if not SCHEMA_PATH.is_file(): + print( + f"error: JSON Schema not found at: {SCHEMA_PATH}", + file=sys.stderr, + ) + return 1 + + OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) + + cmd = [ + sys.executable, + "-m", + "datamodel_code_generator", + "--input", + str(SCHEMA_PATH), + "--output", + str(OUTPUT_PATH), + "--input-file-type", + "jsonschema", + "--output-model-type", + "pydantic_v2.BaseModel", + "--target-python-version", + "3.10", + "--enum-field-as-literal", + "all", + "--field-constraints", + "--disable-timestamp", + ] + + try: + subprocess.run(cmd, check=True) + except subprocess.CalledProcessError as exc: + print( + f"error: datamodel-code-generator failed with exit code {exc.returncode}.", + file=sys.stderr, + ) + return exc.returncode or 1 + else: + print(f"Generated: {OUTPUT_PATH}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())