From aefc54bf9bf3357442a2f69f75e9ee91082f6435 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 15 May 2026 19:20:47 +0200 Subject: [PATCH 1/2] fix: correct bug in bytes + gzip codec metadata object types --- .../src/zarr_metadata/v3/codec/bytes.py | 13 +++++++++--- .../src/zarr_metadata/v3/codec/gzip.py | 21 ++++++++++++------- .../tests/v3/codec/bytes/cases.json | 3 +++ .../tests/v3/codec/gzip/cases.json | 7 +------ 4 files changed, 27 insertions(+), 17 deletions(-) diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py index d72f9ac6ea..6ac857cdfd 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py @@ -29,10 +29,16 @@ class BytesCodecConfiguration(TypedDict): class BytesCodecObject(TypedDict): - """`bytes` codec metadata in object form.""" + """`bytes` codec metadata in object form. + + `configuration` is itself optional — when no configuration fields are + set, the entire `configuration` key may be omitted. This matches the + bare-string short-hand form (`BytesCodecName`) at the canonical data + level; both encodings describe a `bytes` codec with default settings. + """ name: BytesCodecName - configuration: BytesCodecConfiguration + configuration: NotRequired[BytesCodecConfiguration] BytesCodecMetadata = BytesCodecObject | BytesCodecName @@ -40,7 +46,8 @@ class BytesCodecObject(TypedDict): The configuration has no required keys (`endian` is conditionally required at runtime based on data type), so the spec's short-hand-name form is -permitted in addition to the object form. +permitted in addition to the object form, and the object form may itself +omit `configuration` entirely. """ __all__ = [ diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/gzip.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/gzip.py index fb0c2faf3e..3b9936f8cd 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/codec/gzip.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/gzip.py @@ -4,7 +4,7 @@ See https://zarr-specs.readthedocs.io/en/latest/v3/codecs/gzip/index.html """ -from typing import Final, Literal, NotRequired +from typing import Final, Literal from typing_extensions import TypedDict @@ -20,11 +20,15 @@ class GzipCodecConfiguration(TypedDict): Configuration for the Zarr v3 `gzip` codec. `level` is an integer in the range 0-9; 0 disables compression and 9 - is slowest with the best compression ratio. The spec does not mandate - a default. + is slowest with the best compression ratio. The codec's compressed + output depends on `level`, so metadata that omits it cannot + reproducibly identify the chunk bytes produced by a writer — `level` + is required for the metadata to fulfill its reproducibility role, + even though the spec text does not mark it required with RFC 2119 + keywords. """ - level: NotRequired[int] + level: int class GzipCodecObject(TypedDict): @@ -34,11 +38,12 @@ class GzipCodecObject(TypedDict): configuration: GzipCodecConfiguration -GzipCodecMetadata = GzipCodecObject | GzipCodecName -"""Permitted JSON shapes for `gzip` codec metadata. +GzipCodecMetadata = GzipCodecObject +"""Permitted JSON shape for `gzip` codec metadata. -The configuration has no required keys (`level` has no spec-mandated -default but is `NotRequired`), so the short-hand-name form is permitted. +`configuration.level` is required (it determines the codec's output bytes +and is therefore part of the metadata's reproducibility contract), so +only the object form is valid; the short-hand-name form is not permitted. """ __all__ = [ diff --git a/packages/zarr-metadata/tests/v3/codec/bytes/cases.json b/packages/zarr-metadata/tests/v3/codec/bytes/cases.json index f3c8b978f8..0c30d70a67 100644 --- a/packages/zarr-metadata/tests/v3/codec/bytes/cases.json +++ b/packages/zarr-metadata/tests/v3/codec/bytes/cases.json @@ -11,5 +11,8 @@ "name": "bytes", "configuration": {} }, + "no_configuration": { + "name": "bytes" + }, "short_hand_name": "bytes" } diff --git a/packages/zarr-metadata/tests/v3/codec/gzip/cases.json b/packages/zarr-metadata/tests/v3/codec/gzip/cases.json index b40cbeeedb..7d5e1e6f94 100644 --- a/packages/zarr-metadata/tests/v3/codec/gzip/cases.json +++ b/packages/zarr-metadata/tests/v3/codec/gzip/cases.json @@ -2,10 +2,5 @@ "with_level": { "name": "gzip", "configuration": {"level": 5} - }, - "no_level": { - "name": "gzip", - "configuration": {} - }, - "short_hand_name": "gzip" + } } From ea89b47f5e8a6c187bca6892160a3a15af38ad21 Mon Sep 17 00:00:00 2001 From: Davis Vann Bennett Date: Fri, 15 May 2026 22:04:53 +0200 Subject: [PATCH 2/2] feat: add typed constants --- .../src/zarr_metadata/v2/array.py | 14 +++++++++++--- .../v3/chunk_key_encoding/default.py | 6 +++++- .../zarr_metadata/v3/chunk_key_encoding/v2.py | 6 +++++- .../src/zarr_metadata/v3/codec/blosc.py | 12 ++++++++++-- .../src/zarr_metadata/v3/codec/bytes.py | 6 +++++- .../src/zarr_metadata/v3/codec/cast_value.py | 18 ++++++++++++++++-- .../zarr_metadata/v3/codec/sharding_indexed.py | 6 +++++- 7 files changed, 57 insertions(+), 11 deletions(-) diff --git a/packages/zarr-metadata/src/zarr_metadata/v2/array.py b/packages/zarr-metadata/src/zarr_metadata/v2/array.py index 9043fd1893..04f76c264f 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v2/array.py +++ b/packages/zarr-metadata/src/zarr_metadata/v2/array.py @@ -1,7 +1,7 @@ """Zarr v2 array metadata types.""" from collections.abc import Mapping -from typing import Literal, NotRequired +from typing import Final, Literal, NotRequired from typing_extensions import TypedDict @@ -22,15 +22,18 @@ """ ArrayOrderV2 = Literal["C", "F"] -"""Permitted values for the `order` field of v2 array metadata. +"""Literal type of permitted values for the `order` field of v2 array metadata. `"C"` (row-major) or `"F"` (column-major) — the in-chunk byte layout. See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html """ +ARRAY_ORDER_V2: Final = ("C", "F") +"""Tuple of permitted values for the `order` field of v2 array metadata.""" + ArrayDimensionSeparatorV2 = Literal[".", "/"] -"""Permitted values for the `dimension_separator` field of v2 array metadata. +"""Literal type of permitted values for the `dimension_separator` field of v2 array metadata. `"."` (legacy default) joins chunk grid coordinates as `0.0`, `0.1`, ... `"/"` joins them as `0/0`, `0/1`, ... yielding nested directories. @@ -38,6 +41,9 @@ See https://zarr-specs.readthedocs.io/en/latest/v2/v2.0.html """ +ARRAY_DIMENSION_SEPARATOR_V2: Final = (".", "/") +"""Tuple of permitted values for the `dimension_separator` field of v2 array metadata.""" + class ZArrayMetadata(TypedDict): """ @@ -93,6 +99,8 @@ class ArrayMetadataV2(TypedDict): __all__ = [ + "ARRAY_DIMENSION_SEPARATOR_V2", + "ARRAY_ORDER_V2", "ArrayDimensionSeparatorV2", "ArrayMetadataV2", "ArrayOrderV2", diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/chunk_key_encoding/default.py b/packages/zarr-metadata/src/zarr_metadata/v3/chunk_key_encoding/default.py index d69d6af23f..c783861b34 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/chunk_key_encoding/default.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/chunk_key_encoding/default.py @@ -18,11 +18,14 @@ """Literal type of the `name` field of the default chunk key encoding.""" DefaultChunkKeyEncodingSeparator = Literal["/", "."] -"""Permitted `separator` values for the default chunk key encoding. +"""Literal type of permitted `separator` values for the default chunk key encoding. Defaults to `"/"` if absent. """ +DEFAULT_CHUNK_KEY_ENCODING_SEPARATOR: Final = ("/", ".") +"""Tuple of permitted values for the `separator` field of the default chunk key encoding.""" + class DefaultChunkKeyEncodingConfiguration(TypedDict): """Configuration for the default chunk key encoding. @@ -49,6 +52,7 @@ class DefaultChunkKeyEncodingObject(TypedDict): __all__ = [ "DEFAULT_CHUNK_KEY_ENCODING_NAME", + "DEFAULT_CHUNK_KEY_ENCODING_SEPARATOR", "DefaultChunkKeyEncodingConfiguration", "DefaultChunkKeyEncodingMetadata", "DefaultChunkKeyEncodingName", diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/chunk_key_encoding/v2.py b/packages/zarr-metadata/src/zarr_metadata/v3/chunk_key_encoding/v2.py index ce247ca12c..fef5793626 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/chunk_key_encoding/v2.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/chunk_key_encoding/v2.py @@ -18,11 +18,14 @@ """Literal type of the `name` field of the v2 chunk key encoding.""" V2ChunkKeyEncodingSeparator = Literal["/", "."] -"""Permitted `separator` values for the v2 chunk key encoding. +"""Literal type of permitted `separator` values for the v2 chunk key encoding. Defaults to `"."` if absent. """ +V2_CHUNK_KEY_ENCODING_SEPARATOR: Final = ("/", ".") +"""Tuple of permitted values for the `separator` field of the v2 chunk key encoding.""" + class V2ChunkKeyEncodingConfiguration(TypedDict): """Configuration for the v2 chunk key encoding. @@ -49,6 +52,7 @@ class V2ChunkKeyEncodingObject(TypedDict): __all__ = [ "V2_CHUNK_KEY_ENCODING_NAME", + "V2_CHUNK_KEY_ENCODING_SEPARATOR", "V2ChunkKeyEncodingConfiguration", "V2ChunkKeyEncodingMetadata", "V2ChunkKeyEncodingName", diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/blosc.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/blosc.py index 69152cee50..5a986c8260 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/codec/blosc.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/blosc.py @@ -15,10 +15,16 @@ """Literal type of the `name` field of the `blosc` codec.""" BloscShuffle = Literal["noshuffle", "shuffle", "bitshuffle"] -"""Blosc shuffle mode names.""" +"""Literal type of blosc shuffle mode names.""" + +BLOSC_SHUFFLE: Final = ("noshuffle", "shuffle", "bitshuffle") +"""Tuple of permitted values for the `shuffle` field of the `blosc` codec.""" BloscCName = Literal["lz4", "lz4hc", "blosclz", "snappy", "zlib", "zstd"] -"""Blosc compressor identifiers.""" +"""Literal type of blosc compressor identifiers.""" + +BLOSC_CNAME: Final = ("lz4", "lz4hc", "blosclz", "snappy", "zlib", "zstd") +"""Tuple of permitted values for the `cname` field of the `blosc` codec.""" class BloscCodecConfiguration(TypedDict): @@ -47,7 +53,9 @@ class BloscCodecObject(TypedDict): """ __all__ = [ + "BLOSC_CNAME", "BLOSC_CODEC_NAME", + "BLOSC_SHUFFLE", "BloscCName", "BloscCodecConfiguration", "BloscCodecMetadata", diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py index 6ac857cdfd..522cbe10f5 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/bytes.py @@ -15,7 +15,10 @@ """Literal type of the `name` field of the `bytes` codec.""" Endian = Literal["little", "big"] -"""Byte order of multi-byte numeric data.""" +"""Literal type of byte order of multi-byte numeric data.""" + +ENDIAN: Final = ("little", "big") +"""Tuple of permitted values for the `endian` field of the `bytes` codec.""" class BytesCodecConfiguration(TypedDict): @@ -52,6 +55,7 @@ class BytesCodecObject(TypedDict): __all__ = [ "BYTES_CODEC_NAME", + "ENDIAN", "BytesCodecConfiguration", "BytesCodecMetadata", "BytesCodecName", diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/cast_value.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/cast_value.py index 468dab9587..17905bf38a 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/codec/cast_value.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/cast_value.py @@ -23,17 +23,29 @@ "towards-negative", "nearest-away", ] -"""Permitted values for the `rounding` configuration field. +"""Literal type of permitted values for the `rounding` configuration field. Defaults to `"nearest-even"` if absent. """ +ROUNDING_MODE: Final = ( + "nearest-even", + "towards-zero", + "towards-positive", + "towards-negative", + "nearest-away", +) +"""Tuple of permitted values for the `rounding` field of the `cast_value` codec.""" + OutOfRangeMode = Literal["clamp", "wrap"] -"""Permitted values for the `out_of_range` configuration field. +"""Literal type of permitted values for the `out_of_range` configuration field. If absent, out-of-range values are an encoding/decoding error. """ +OUT_OF_RANGE_MODE: Final = ("clamp", "wrap") +"""Tuple of permitted values for the `out_of_range` field of the `cast_value` codec.""" + ScalarMapEntry = tuple[object, object] """A single `[input, output]` mapping in a `scalar_map` direction. @@ -81,6 +93,8 @@ class CastValueCodecObject(TypedDict): __all__ = [ "CAST_VALUE_CODEC_NAME", + "OUT_OF_RANGE_MODE", + "ROUNDING_MODE", "CastValueCodecConfiguration", "CastValueCodecMetadata", "CastValueCodecName", diff --git a/packages/zarr-metadata/src/zarr_metadata/v3/codec/sharding_indexed.py b/packages/zarr-metadata/src/zarr_metadata/v3/codec/sharding_indexed.py index c8dd954e3f..93a0774e4e 100644 --- a/packages/zarr-metadata/src/zarr_metadata/v3/codec/sharding_indexed.py +++ b/packages/zarr-metadata/src/zarr_metadata/v3/codec/sharding_indexed.py @@ -17,7 +17,10 @@ """Literal type of the `name` field of the `sharding_indexed` codec.""" IndexLocation = Literal["start", "end"] -"""Position of the shard index within the encoded shard.""" +"""Literal type of the position of the shard index within the encoded shard.""" + +INDEX_LOCATION: Final = ("start", "end") +"""Tuple of permitted values for the `index_location` field of the `sharding_indexed` codec.""" class ShardingIndexedCodecConfiguration(TypedDict): @@ -58,6 +61,7 @@ class ShardingIndexedCodecObject(TypedDict): """ __all__ = [ + "INDEX_LOCATION", "SHARDING_INDEXED_CODEC_NAME", "IndexLocation", "ShardingIndexedCodecConfiguration",