diff --git a/changes/3004.feature.md b/changes/3004.feature.md
new file mode 100644
index 0000000000..9d3816a50c
--- /dev/null
+++ b/changes/3004.feature.md
@@ -0,0 +1,4 @@
+Optimizes reading multiple chunks from a shard. Serial calls to `Store.get()`
+in the sharding codec have been replaced with a single call to
+`Store.get_ranges()`, which coalesces nearby byte ranges and fetches them
+concurrently.
diff --git a/src/zarr/codecs/sharding.py b/src/zarr/codecs/sharding.py
index d0b2cec285..d25ac6fac0 100644
--- a/src/zarr/codecs/sharding.py
+++ b/src/zarr/codecs/sharding.py
@@ -60,6 +60,7 @@
     parse_codecs,
 )
 from zarr.registry import get_ndbuffer_class, get_pipeline_class
+from zarr.storage._common import StorePath
 from zarr.storage._utils import _normalize_byte_range_index
 
 if TYPE_CHECKING:
@@ -495,7 +496,7 @@ async def _decode_partial_single(
         all_chunk_coords = {chunk_coords for chunk_coords, *_ in indexed_chunks}
 
         # reading bytes of all requested chunks
-        shard_dict: ShardMapping = {}
+        shard_dict_maybe: ShardMapping | None
         if self._is_total_shard(all_chunk_coords, chunks_per_shard):
             # read entire shard
             shard_dict_maybe = await self._load_full_shard_maybe(
@@ -503,24 +504,18 @@ async def _decode_partial_single(
                 prototype=chunk_spec.prototype,
                 chunks_per_shard=chunks_per_shard,
             )
-            if shard_dict_maybe is None:
-                return None
-            shard_dict = shard_dict_maybe
         else:
             # read some chunks within the shard
-            shard_index = await self._load_shard_index_maybe(byte_getter, chunks_per_shard)
-            if shard_index is None:
-                return None
-            shard_dict = {}
-            for chunk_coords in all_chunk_coords:
-                chunk_byte_slice = shard_index.get_chunk_slice(chunk_coords)
-                if chunk_byte_slice:
-                    chunk_bytes = await byte_getter.get(
-                        prototype=chunk_spec.prototype,
-                        byte_range=RangeByteRequest(chunk_byte_slice[0], chunk_byte_slice[1]),
-                    )
-                    if chunk_bytes:
-                        shard_dict[chunk_coords] = chunk_bytes
+            shard_dict_maybe = await self._load_partial_shard_maybe(
+                byte_getter,
+                chunk_spec.prototype,
+                chunks_per_shard,
+                all_chunk_coords,
+            )
+
+        if shard_dict_maybe is None:
+            return None
+        shard_dict = shard_dict_maybe
 
         # decoding chunks and writing them into the output buffer
         await self.codec_pipeline.read(
@@ -807,6 +802,66 @@ async def _load_full_shard_maybe(
             else None
         )
 
+    async def _load_partial_shard_maybe(
+        self,
+        byte_getter: ByteGetter,
+        prototype: BufferPrototype,
+        chunks_per_shard: tuple[int, ...],
+        all_chunk_coords: set[tuple[int, ...]],
+    ) -> ShardMapping | None:
+        """
+        Read chunks from `byte_getter` for the case where the read is less than a full shard.
+        Returns a mapping of chunk coordinates to bytes or None.
+        """
+        shard_index = await self._load_shard_index_maybe(byte_getter, chunks_per_shard)
+        if shard_index is None:
+            return None
+
+        # Pair up chunks and their byte ranges as list[tuple[chunk_coord, byte_range]]
+        chunk_coord_byte_ranges: list[tuple[tuple[int, ...], RangeByteRequest]] = []
+        for chunk_coord in all_chunk_coords:
+            chunk_byte_slice = shard_index.get_chunk_slice(chunk_coord)
+            if chunk_byte_slice is not None:
+                chunk_coord_byte_ranges.append(
+                    (chunk_coord, RangeByteRequest(chunk_byte_slice[0], chunk_byte_slice[1]))
+                )
+
+        if not chunk_coord_byte_ranges:
+            return {}
+
+        shard_dict: ShardMutableMapping = {}
+        if isinstance(byte_getter, StorePath):
+            # External store: use Store.get_ranges for coalescing + concurrency.
+            byte_ranges = [byte_range for _, byte_range in chunk_coord_byte_ranges]
+            try:
+                async for group in byte_getter.store.get_ranges(
+                    byte_getter.path, byte_ranges, prototype=prototype
+                ):
+                    for idx, buf in group:
+                        if buf is not None:
+                            chunk_coord, _ = chunk_coord_byte_ranges[idx]
+                            shard_dict[chunk_coord] = buf
+            except BaseExceptionGroup as eg:
+                # `Store.get_ranges` raises FileNotFoundError (wrapped in a
+                # BaseExceptionGroup) if any underlying fetch indicates the key is
+                # absent. The shard index loaded above, so this typically means a
+                # race where the shard was deleted mid-read; treat it as "shard
+                # gone" to match the index-missing branch (return None). Anything
+                # else in the group (e.g. IO errors) is re-raised.
+                _, rest = eg.split(FileNotFoundError)
+                if rest is not None:
+                    raise rest from None
+                return None
+        else:
+            # Any other ByteGetter. In practice only `_ShardingByteGetter` for
+            # nested sharding, which slices an in-memory buffer (no I/O to coalesce).
+            for chunk_coord, byte_range in chunk_coord_byte_ranges:
+                buf = await byte_getter.get(prototype, byte_range)
+                if buf is not None:
+                    shard_dict[chunk_coord] = buf
+
+        return shard_dict
+
     def compute_encoded_size(self, input_byte_length: int, shard_spec: ArraySpec) -> int:
         chunks_per_shard = self._get_chunks_per_shard(shard_spec)
         return input_byte_length + self._shard_index_size(chunks_per_shard)
diff --git a/tests/test_codecs/test_sharding.py b/tests/test_codecs/test_sharding.py
index 2325069dd0..9c4f90792e 100644
--- a/tests/test_codecs/test_sharding.py
+++ b/tests/test_codecs/test_sharding.py
@@ -1,5 +1,6 @@
 import pickle
 from typing import Any
+from unittest.mock import AsyncMock
 
 import numpy as np
 import numpy.typing as npt
@@ -198,6 +199,269 @@ def test_sharding_partial_read(
     assert np.all(read_data == 1)
 
 
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+def test_sharding_multiple_chunks_partial_shard_read(
+    store: Store,
+    index_location: ShardingCodecIndexLocation,
+) -> None:
+    array_shape = (16, 64)
+    shard_shape = (8, 32)
+    chunk_shape = (2, 4)
+    data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
+
+    store_mock = AsyncMock(wraps=store, spec=store.__class__)
+    a = zarr.create_array(
+        StorePath(store_mock),
+        shape=data.shape,
+        chunks=chunk_shape,
+        shards={"shape": shard_shape, "index_location": index_location},
+        compressors=BloscCodec(cname="lz4"),
+        dtype=data.dtype,
+        fill_value=1,
+    )
+    a[:] = data
+
+    store_mock.reset_mock()  # ignore store calls during array creation
+
+    # Reads 3 (2 full, 1 partial) chunks each from 2 shards (a subset of both shards)
+    # for a total of 6 chunks accessed
+    assert np.allclose(a[0, 22:42], np.arange(22, 42, dtype="float32"))
+
+    # 2 shard index reads via store.get() + 2 get_ranges calls (one per shard)
+    assert store_mock.get.call_count == 2
+    assert store_mock.get_ranges.call_count == 2
+
+    store_mock.reset_mock()
+
+    # Reads 4 chunks from both shards along dimension 0 for a total of 8 chunks accessed
+    assert np.allclose(a[:, 0], np.arange(0, data.size, array_shape[1], dtype="float32"))
+
+    # 2 shard index reads via store.get() + 2 get_ranges calls (one per shard)
+    assert store_mock.get.call_count == 2
+    assert store_mock.get_ranges.call_count == 2
+
+
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+def test_sharding_duplicate_read_indexes(
+    store: Store,
+    index_location: ShardingCodecIndexLocation,
+) -> None:
+    """
+    Check that duplicate index reads are handled correctly when
+    using get_ranges for chunk data.
+    """
+    array_shape = (15,)
+    shard_shape = (8,)
+    chunk_shape = (2,)
+    data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
+
+    store_mock = AsyncMock(wraps=store, spec=store.__class__)
+    a = zarr.create_array(
+        StorePath(store_mock),
+        shape=data.shape,
+        chunks=chunk_shape,
+        shards={"shape": shard_shape, "index_location": index_location},
+        compressors=BloscCodec(cname="lz4"),
+        dtype=data.dtype,
+        fill_value=-1,
+    )
+    a[:] = data
+
+    store_mock.reset_mock()  # ignore store calls during array creation
+
+    # Read the same index multiple times from two chunks
+    indexer = [8, 8, 12, 12]
+    assert np.array_equal(a[indexer], data[indexer])
+
+    # 1 shard index read via store.get() + 1 get_ranges call
+    assert store_mock.get.call_count == 1
+    assert store_mock.get_ranges.call_count == 1
+
+
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+def test_sharding_read_empty_chunks_within_non_empty_shard_write_empty_false(
+    store: Store, index_location: ShardingCodecIndexLocation
+) -> None:
+    """
+    Case where
+        - some, but not all, chunks in the last shard are empty
+        - the last shard is not complete (array length is not a multiple of shard shape),
+          this takes us down the partial shard read path
+        - write_empty_chunks=False so the shard index will have fewer entries than chunks in the shard
+    """
+    # array with mixed empty and non-empty chunks in second shard
+    data = np.array([
+        # shard 0. full 8 elements, all chunks have some non-fill data
+        0, 1, 2, 3, 4, 5, 6, 7,
+        # shard 1. 6 elements (< shard shape)
+         2,  0, # chunk 0, written
+        -9, -9, # chunk 1, all fill, not written
+         4,  5  # chunk 2, written
+    ], dtype="int32")  # fmt: off
+
+    spath = StorePath(store)
+    a = zarr.create_array(
+        spath,
+        shape=(14,),
+        chunks=(2,),
+        shards={"shape": (8,), "index_location": index_location},
+        dtype="int32",
+        fill_value=-9,
+        filters=None,
+        compressors=None,
+        config={"write_empty_chunks": False},
+    )
+    a[:] = data
+
+    assert np.array_equal(a[:], data)
+
+
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+def test_sharding_read_empty_chunks_within_empty_shard_write_empty_false(
+    store: Store, index_location: ShardingCodecIndexLocation
+) -> None:
+    """
+    Case where
+        - all chunks in last shard are empty
+        - the last shard is not complete (array length is not a multiple of shard shape),
+          this takes us down the partial shard read path
+        - write_empty_chunks=False so the shard index will have no entries
+    """
+    fill_value = -99
+    shard_size = 8
+    data = np.arange(14, dtype="int32")
+    data[shard_size:] = fill_value  # 2nd shard is all fill value
+
+    spath = StorePath(store)
+    a = zarr.create_array(
+        spath,
+        shape=(14,),
+        chunks=(2,),
+        shards={"shape": (shard_size,), "index_location": index_location},
+        dtype="int32",
+        fill_value=fill_value,
+        filters=None,
+        compressors=None,
+        config={"write_empty_chunks": False},
+    )
+    a[:] = data
+
+    assert np.array_equal(a[:], data)
+
+
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+def test_sharding_partial_shard_read__index_load_fails(
+    store: Store, index_location: ShardingCodecIndexLocation
+) -> None:
+    """Test fill value is returned when the call to the store to load the bytes of the shard's chunk index fails."""
+    array_shape = (16,)
+    shard_shape = (16,)
+    chunk_shape = (8,)
+    data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
+    fill_value = -999
+
+    store_mock = AsyncMock(wraps=store, spec=store.__class__)
+    # loading the index is the first call to .get() so returning None will simulate an index load failure
+    store_mock.get.return_value = None
+
+    a = zarr.create_array(
+        StorePath(store_mock),
+        shape=data.shape,
+        chunks=chunk_shape,
+        shards={"shape": shard_shape, "index_location": index_location},
+        compressors=BloscCodec(cname="lz4"),
+        dtype=data.dtype,
+        fill_value=fill_value,
+    )
+    a[:] = data
+
+    # Read from one of two chunks in a shard to test the partial shard read path
+    assert a[0] == fill_value
+    assert a[0] != data[0]
+
+
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+def test_sharding_partial_shard_read__index_chunk_slice_fails(
+    store: Store,
+    index_location: ShardingCodecIndexLocation,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Test fill value is returned when looking up a chunk's byte slice within a shard fails."""
+    array_shape = (16,)
+    shard_shape = (16,)
+    chunk_shape = (8,)
+    data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
+    fill_value = -999
+
+    monkeypatch.setattr(
+        "zarr.codecs.sharding._ShardIndex.get_chunk_slice",
+        lambda self, chunk_coords: None,
+    )
+
+    a = zarr.create_array(
+        StorePath(store),
+        shape=data.shape,
+        chunks=chunk_shape,
+        shards={"shape": shard_shape, "index_location": index_location},
+        compressors=BloscCodec(cname="lz4"),
+        dtype=data.dtype,
+        fill_value=fill_value,
+    )
+    a[:] = data
+
+    # Read from one of two chunks in a shard to test the partial shard read path
+    assert a[0] == fill_value
+    assert a[0] != data[0]
+
+
+@pytest.mark.parametrize("index_location", ["start", "end"])
+@pytest.mark.parametrize("store", ["local", "memory", "zip"], indirect=["store"])
+def test_sharding_partial_shard_read__chunk_load_fails(
+    store: Store, index_location: ShardingCodecIndexLocation
+) -> None:
+    """Test fill value is returned when the call to the store to load a chunk's bytes fails."""
+    array_shape = (16,)
+    shard_shape = (16,)
+    chunk_shape = (8,)
+    data = np.arange(np.prod(array_shape), dtype="float32").reshape(array_shape)
+    fill_value = -999
+
+    store_mock = AsyncMock(wraps=store, spec=store.__class__)
+
+    a = zarr.create_array(
+        StorePath(store_mock),
+        shape=data.shape,
+        chunks=chunk_shape,
+        shards={"shape": shard_shape, "index_location": index_location},
+        compressors=BloscCodec(cname="lz4"),
+        dtype=data.dtype,
+        fill_value=fill_value,
+    )
+    a[:] = data
+
+    # Set up store mock after array creation to simulate chunk load failure.
+    # Index loads still succeed (via store.get), but chunk-byte loads fail
+    # (via store.get_ranges raising BaseExceptionGroup containing FileNotFoundError —
+    # the same shape Store.get_ranges produces when a key is absent).
+    store_mock.reset_mock()
+
+    async def fail_chunk_reads(key: str, byte_ranges: Any, **kwargs: Any) -> Any:
+        raise BaseExceptionGroup("chunk read failed", [FileNotFoundError(key)])
+        yield  # type: ignore[unreachable]  # marks this as an async generator
+
+    store_mock.get_ranges = fail_chunk_reads
+
+    # Read from one of two chunks in a shard to test the partial shard read path
+    assert a[0] == fill_value
+    assert a[0] != data[0]
+
+
 @pytest.mark.parametrize(
     "array_fixture",
     [
diff --git a/tests/test_codecs/test_sharding_unit.py b/tests/test_codecs/test_sharding_unit.py
new file mode 100644
index 0000000000..e8da18a1ce
--- /dev/null
+++ b/tests/test_codecs/test_sharding_unit.py
@@ -0,0 +1,538 @@
+import numpy as np
+import pytest
+
+from zarr.codecs.sharding import (
+    MAX_UINT_64,
+    ShardingCodec,
+    _ShardIndex,
+    _ShardingByteGetter,
+    _ShardReader,
+)
+from zarr.core.buffer import default_buffer_prototype
+from zarr.core.buffer.cpu import Buffer
+from zarr.storage._common import StorePath
+from zarr.storage._memory import MemoryStore
+
+# ============================================================================
+# _ShardIndex tests
+# ============================================================================
+
+
+def test_shard_index_create_empty() -> None:
+    """Test that create_empty creates an index filled with MAX_UINT_64."""
+    chunks_per_shard = (2, 3)
+    index = _ShardIndex.create_empty(chunks_per_shard)
+
+    assert index.chunks_per_shard == chunks_per_shard
+    assert index.offsets_and_lengths.shape == (2, 3, 2)
+    assert index.offsets_and_lengths.dtype == np.dtype("<u8")
+    assert np.all(index.offsets_and_lengths == MAX_UINT_64)
+
+
+def test_shard_index_create_empty_1d() -> None:
+    """Test create_empty with 1D chunks_per_shard."""
+    chunks_per_shard = (4,)
+    index = _ShardIndex.create_empty(chunks_per_shard)
+
+    assert index.chunks_per_shard == chunks_per_shard
+    assert index.offsets_and_lengths.shape == (4, 2)
+
+
+def test_shard_index_is_all_empty_true() -> None:
+    """Test is_all_empty returns True for a freshly created empty index."""
+    index = _ShardIndex.create_empty((2, 2))
+    assert index.is_all_empty() is True
+
+
+def test_shard_index_is_all_empty_false() -> None:
+    """Test is_all_empty returns False when at least one chunk is set."""
+    index = _ShardIndex.create_empty((2, 2))
+    index.set_chunk_slice((0, 0), slice(0, 100))
+    assert index.is_all_empty() is False
+
+
+def test_shard_index_get_chunk_slice_empty() -> None:
+    """Test get_chunk_slice returns None for empty chunks."""
+    index = _ShardIndex.create_empty((2, 2))
+    assert index.get_chunk_slice((0, 0)) is None
+    assert index.get_chunk_slice((1, 1)) is None
+
+
+def test_shard_index_get_chunk_slice_set() -> None:
+    """Test get_chunk_slice returns correct (start, end) tuple after setting."""
+    index = _ShardIndex.create_empty((2, 2))
+    index.set_chunk_slice((0, 1), slice(100, 200))
+
+    result = index.get_chunk_slice((0, 1))
+    assert result == (100, 200)
+
+
+def test_shard_index_set_chunk_slice() -> None:
+    """Test set_chunk_slice correctly sets offset and length."""
+    index = _ShardIndex.create_empty((3, 3))
+
+    # Set a chunk slice
+    index.set_chunk_slice((1, 2), slice(50, 150))
+
+    # Verify the underlying array
+    assert index.offsets_and_lengths[1, 2, 0] == 50  # offset
+    assert index.offsets_and_lengths[1, 2, 1] == 100  # length (150 - 50)
+
+
+def test_shard_index_set_chunk_slice_none() -> None:
+    """Test set_chunk_slice with None marks chunk as empty."""
+    index = _ShardIndex.create_empty((2, 2))
+
+    # First set a value
+    index.set_chunk_slice((0, 0), slice(0, 100))
+    assert index.get_chunk_slice((0, 0)) == (0, 100)
+
+    # Then clear it
+    index.set_chunk_slice((0, 0), None)
+    assert index.get_chunk_slice((0, 0)) is None
+    assert index.offsets_and_lengths[0, 0, 0] == MAX_UINT_64
+    assert index.offsets_and_lengths[0, 0, 1] == MAX_UINT_64
+
+
+def test_shard_index_get_full_chunk_map() -> None:
+    """Test get_full_chunk_map returns correct boolean array."""
+    index = _ShardIndex.create_empty((2, 3))
+
+    # Set some chunks
+    index.set_chunk_slice((0, 0), slice(0, 10))
+    index.set_chunk_slice((1, 2), slice(10, 20))
+
+    chunk_map = index.get_full_chunk_map()
+
+    assert chunk_map.shape == (2, 3)
+    assert chunk_map.dtype == np.bool_
+    assert chunk_map[0, 0] is np.True_
+    assert chunk_map[0, 1] is np.False_
+    assert chunk_map[0, 2] is np.False_
+    assert chunk_map[1, 0] is np.False_
+    assert chunk_map[1, 1] is np.False_
+    assert chunk_map[1, 2] is np.True_
+
+
+def test_shard_index_localize_chunk() -> None:
+    """Test _localize_chunk maps global coords to local shard coords via modulo."""
+    index = _ShardIndex.create_empty((2, 3))
+
+    # Within bounds - should return same coords
+    assert index._localize_chunk((0, 0)) == (0, 0)
+    assert index._localize_chunk((1, 2)) == (1, 2)
+
+    # Out of bounds - should wrap via modulo
+    assert index._localize_chunk((2, 0)) == (0, 0)  # 2 % 2 = 0
+    assert index._localize_chunk((3, 5)) == (1, 2)  # 3 % 2 = 1, 5 % 3 = 2
+    assert index._localize_chunk((4, 6)) == (0, 0)  # 4 % 2 = 0, 6 % 3 = 0
+
+
+def test_shard_index_is_dense_true() -> None:
+    """Test is_dense returns True when chunks are contiguously packed."""
+    index = _ShardIndex.create_empty((2,))
+    chunk_byte_length = 100
+
+    # Set chunks contiguously: [0-100), [100-200)
+    index.set_chunk_slice((0,), slice(0, 100))
+    index.set_chunk_slice((1,), slice(100, 200))
+
+    assert index.is_dense(chunk_byte_length) is True
+
+
+def test_shard_index_is_dense_false_duplicate_offsets() -> None:
+    """Test is_dense returns False when chunks have duplicate offsets."""
+    index = _ShardIndex.create_empty((2,))
+    chunk_byte_length = 100
+
+    # Set both chunks to same offset (duplicate)
+    index.set_chunk_slice((0,), slice(0, 100))
+    index.set_chunk_slice((1,), slice(0, 100))
+
+    assert index.is_dense(chunk_byte_length) is False
+
+
+def test_shard_index_is_dense_false_wrong_alignment() -> None:
+    """Test is_dense returns False when chunks are not aligned to chunk_byte_length."""
+    index = _ShardIndex.create_empty((2,))
+    chunk_byte_length = 100
+
+    # Set chunks not aligned: [0-100), [150-250)
+    index.set_chunk_slice((0,), slice(0, 100))
+    index.set_chunk_slice((1,), slice(150, 250))
+
+    assert index.is_dense(chunk_byte_length) is False
+
+
+def test_shard_index_is_dense_with_empty_chunks() -> None:
+    """Test is_dense handles empty chunks correctly."""
+    index = _ShardIndex.create_empty((3,))
+    chunk_byte_length = 100
+
+    # Only set first and third chunk, skip middle
+    index.set_chunk_slice((0,), slice(0, 100))
+    # (1,) is empty
+    index.set_chunk_slice((2,), slice(100, 200))
+
+    # Should still be dense since only non-empty chunks are considered
+    assert index.is_dense(chunk_byte_length) is True
+
+
+# ============================================================================
+# _load_partial_shard_maybe tests
+#
+# These exercise the partial-shard read path against a real MemoryStore wrapped
+# in a StorePath (the external-store branch in `_load_partial_shard_maybe`),
+# plus one test against a real `_ShardingByteGetter` (the in-memory branch used
+# by nested sharding).
+# ============================================================================
+
+
+async def _store_path_with_blob(key: str, blob: bytes) -> StorePath:
+    """Build a `StorePath` over a fresh `MemoryStore` containing `blob` at `key`."""
+    store = MemoryStore()
+    await store.set(key, Buffer.from_bytes(blob))
+    return StorePath(store, key)
+
+
+async def test_load_partial_shard_maybe_index_load_fails() -> None:
+    """Returns None when the shard key is absent (index load fails)."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    byte_getter = StorePath(MemoryStore(), "missing")
+
+    result = await codec._load_partial_shard_maybe(
+        byte_getter=byte_getter,
+        prototype=default_buffer_prototype(),
+        chunks_per_shard=(2,),
+        all_chunk_coords={(0,)},
+    )
+
+    assert result is None
+
+
+async def test_load_partial_shard_maybe_with_empty_chunks(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Chunks whose index entry is empty are silently skipped."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (4,)
+
+    # Index where chunk (1,) is empty; the others point into the stored blob.
+    index = _ShardIndex.create_empty(chunks_per_shard)
+    index.set_chunk_slice((0,), slice(0, 100))
+    index.set_chunk_slice((2,), slice(100, 200))
+    index.set_chunk_slice((3,), slice(200, 300))
+
+    async def mock_load_index(
+        self: ShardingCodec, byte_getter: StorePath, cps: tuple[int, ...]
+    ) -> _ShardIndex:
+        return index
+
+    monkeypatch.setattr(ShardingCodec, "_load_shard_index_maybe", mock_load_index)
+
+    byte_getter = await _store_path_with_blob("shard", b"x" * 300)
+
+    result = await codec._load_partial_shard_maybe(
+        byte_getter=byte_getter,
+        prototype=default_buffer_prototype(),
+        chunks_per_shard=chunks_per_shard,
+        all_chunk_coords={(0,), (1,), (2,)},
+    )
+
+    assert result is not None
+    assert (0,) in result
+    assert (1,) not in result  # empty in index
+    assert (2,) in result
+
+
+async def test_load_partial_shard_maybe_all_chunks_empty(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Returns an empty dict when all requested chunks are empty (no I/O issued)."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (4,)
+
+    # Fully-empty index — `get_chunk_slice` returns None for every coord.
+    index = _ShardIndex.create_empty(chunks_per_shard)
+
+    async def mock_load_index(
+        self: ShardingCodec, byte_getter: StorePath, cps: tuple[int, ...]
+    ) -> _ShardIndex:
+        return index
+
+    monkeypatch.setattr(ShardingCodec, "_load_shard_index_maybe", mock_load_index)
+
+    # Empty store is fine — we never reach the chunk-read path when all are empty.
+    byte_getter = StorePath(MemoryStore(), "shard")
+
+    result = await codec._load_partial_shard_maybe(
+        byte_getter=byte_getter,
+        prototype=default_buffer_prototype(),
+        chunks_per_shard=chunks_per_shard,
+        all_chunk_coords={(0,), (1,), (2,)},
+    )
+
+    assert result == {}
+
+
+async def test_load_partial_shard_returns_chunk_contents(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Returns the correct bytes for each requested chunk."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (4,)
+
+    index = _ShardIndex.create_empty(chunks_per_shard)
+    index.set_chunk_slice((0,), slice(0, 100))
+    index.set_chunk_slice((1,), slice(100, 200))
+
+    async def mock_load_index(
+        self: ShardingCodec, byte_getter: StorePath, cps: tuple[int, ...]
+    ) -> _ShardIndex:
+        return index
+
+    monkeypatch.setattr(ShardingCodec, "_load_shard_index_maybe", mock_load_index)
+
+    blob = b"A" * 100 + b"B" * 100
+    byte_getter = await _store_path_with_blob("shard", blob)
+
+    result = await codec._load_partial_shard_maybe(
+        byte_getter=byte_getter,
+        prototype=default_buffer_prototype(),
+        chunks_per_shard=chunks_per_shard,
+        all_chunk_coords={(0,), (1,)},
+    )
+
+    assert result is not None
+    buf_0, buf_1 = result[(0,)], result[(1,)]
+    assert buf_0 is not None
+    assert buf_1 is not None
+    assert buf_0.to_bytes() == b"A" * 100
+    assert buf_1.to_bytes() == b"B" * 100
+
+
+async def test_load_partial_shard_shard_disappears_returns_none(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """If the shard key is missing when chunk reads run, returns None.
+
+    This models a race: the index loaded successfully, but the shard was deleted
+    before the chunk-byte fetches landed. `Store.get_ranges` surfaces this as a
+    `BaseExceptionGroup` containing `FileNotFoundError`, which the codec catches
+    and converts to None to match the index-missing branch's behavior.
+    """
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (4,)
+
+    index = _ShardIndex.create_empty(chunks_per_shard)
+    index.set_chunk_slice((0,), slice(0, 100))
+
+    async def mock_load_index(
+        self: ShardingCodec, byte_getter: StorePath, cps: tuple[int, ...]
+    ) -> _ShardIndex:
+        return index
+
+    monkeypatch.setattr(ShardingCodec, "_load_shard_index_maybe", mock_load_index)
+
+    # Store has no value for "shard" — `get_ranges` will raise FileNotFoundError.
+    byte_getter = StorePath(MemoryStore(), "shard")
+
+    result = await codec._load_partial_shard_maybe(
+        byte_getter=byte_getter,
+        prototype=default_buffer_prototype(),
+        chunks_per_shard=chunks_per_shard,
+        all_chunk_coords={(0,)},
+    )
+
+    assert result is None
+
+
+async def test_load_partial_shard_non_fnf_error_propagates(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Non-FileNotFoundError errors from get_ranges are re-raised, not swallowed.
+
+    Our `BaseExceptionGroup.split(FileNotFoundError)` keeps the "shard gone"
+    behavior for FNF only; anything else (e.g. an OSError from the underlying
+    fetch) must bubble up.
+    """
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (4,)
+
+    index = _ShardIndex.create_empty(chunks_per_shard)
+    index.set_chunk_slice((0,), slice(0, 100))
+
+    async def mock_load_index(
+        self: ShardingCodec, byte_getter: StorePath, cps: tuple[int, ...]
+    ) -> _ShardIndex:
+        return index
+
+    monkeypatch.setattr(ShardingCodec, "_load_shard_index_maybe", mock_load_index)
+
+    # Make the underlying store.get raise OSError. The default Store.get_ranges
+    # impl routes through self.get; coalesced_get wraps the failure in a
+    # BaseExceptionGroup, which our code re-raises (minus FNF leaves, of which
+    # there are none here).
+    async def boom(*args: object, **kwargs: object) -> Buffer | None:
+        raise OSError("injected disk error")
+
+    store = MemoryStore()
+    monkeypatch.setattr(store, "get", boom)
+    byte_getter = StorePath(store, "shard")
+
+    with pytest.RaisesGroup(pytest.RaisesExc(OSError, match="injected disk error")):
+        await codec._load_partial_shard_maybe(
+            byte_getter=byte_getter,
+            prototype=default_buffer_prototype(),
+            chunks_per_shard=chunks_per_shard,
+            all_chunk_coords={(0,)},
+        )
+
+
+async def test_load_partial_shard_nested_sharding_path(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Nested sharding: byte_getter is a `_ShardingByteGetter` over an in-memory dict."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (4,)
+
+    index = _ShardIndex.create_empty(chunks_per_shard)
+    index.set_chunk_slice((0,), slice(0, 100))
+    index.set_chunk_slice((1,), slice(100, 200))
+
+    async def mock_load_index(
+        self: ShardingCodec, byte_getter: _ShardingByteGetter, cps: tuple[int, ...]
+    ) -> _ShardIndex:
+        return index
+
+    monkeypatch.setattr(ShardingCodec, "_load_shard_index_maybe", mock_load_index)
+
+    # The "store" for an inner shard is a dict keyed by outer-chunk coords; the
+    # byte_getter reads ranges out of one entry of that dict.
+    blob = b"A" * 100 + b"B" * 100
+    shard_dict: dict[tuple[int, ...], Buffer | None] = {(0,): Buffer.from_bytes(blob)}
+    byte_getter = _ShardingByteGetter(shard_dict, (0,))
+
+    result = await codec._load_partial_shard_maybe(
+        byte_getter=byte_getter,
+        prototype=default_buffer_prototype(),
+        chunks_per_shard=chunks_per_shard,
+        all_chunk_coords={(0,), (1,)},
+    )
+
+    assert result is not None
+    buf_0, buf_1 = result[(0,)], result[(1,)]
+    assert buf_0 is not None
+    assert buf_1 is not None
+    assert buf_0.to_bytes() == b"A" * 100
+    assert buf_1.to_bytes() == b"B" * 100
+
+
+async def test_load_partial_shard_nested_sharding_missing_outer_chunk(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Nested sharding: outer chunk absent → `_ShardingByteGetter.get` returns None
+    → chunks are silently skipped, yielding an empty shard_dict."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (4,)
+
+    index = _ShardIndex.create_empty(chunks_per_shard)
+    index.set_chunk_slice((0,), slice(0, 100))
+
+    async def mock_load_index(
+        self: ShardingCodec, byte_getter: _ShardingByteGetter, cps: tuple[int, ...]
+    ) -> _ShardIndex:
+        return index
+
+    monkeypatch.setattr(ShardingCodec, "_load_shard_index_maybe", mock_load_index)
+
+    # Empty outer dict — _ShardingByteGetter.get(...) returns None for any range.
+    shard_dict: dict[tuple[int, ...], Buffer | None] = {}
+    byte_getter = _ShardingByteGetter(shard_dict, (0,))
+
+    result = await codec._load_partial_shard_maybe(
+        byte_getter=byte_getter,
+        prototype=default_buffer_prototype(),
+        chunks_per_shard=chunks_per_shard,
+        all_chunk_coords={(0,)},
+    )
+
+    assert result == {}
+
+
+# ============================================================================
+# Supporting class tests (_ShardReader, _is_total_shard)
+# ============================================================================
+
+
+def test_shard_reader_create_empty() -> None:
+    """Test _ShardReader.create_empty creates reader with empty index."""
+    chunks_per_shard = (2, 3)
+    reader = _ShardReader.create_empty(chunks_per_shard)
+
+    assert reader.index.is_all_empty()
+    assert len(reader.buf) == 0
+    assert len(reader) == 2 * 3
+
+
+def test_shard_reader_iteration() -> None:
+    """Test _ShardReader iteration yields all chunk coordinates."""
+    chunks_per_shard = (2, 2)
+    reader = _ShardReader.create_empty(chunks_per_shard)
+
+    coords = list(reader)
+
+    assert len(coords) == 4
+    assert (0, 0) in coords
+    assert (0, 1) in coords
+    assert (1, 0) in coords
+    assert (1, 1) in coords
+
+
+def test_shard_reader_getitem_raises_for_empty() -> None:
+    """Test _ShardReader.__getitem__ raises KeyError for empty chunks."""
+    chunks_per_shard = (2,)
+    reader = _ShardReader.create_empty(chunks_per_shard)
+
+    with pytest.raises(KeyError):
+        _ = reader[(0,)]
+
+
+def test_is_total_shard_full() -> None:
+    """Test _is_total_shard returns True when all chunk coords are present."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (2, 2)
+    all_chunk_coords: set[tuple[int, ...]] = {(0, 0), (0, 1), (1, 0), (1, 1)}
+
+    assert codec._is_total_shard(all_chunk_coords, chunks_per_shard) is True
+
+
+def test_is_total_shard_partial() -> None:
+    """Test _is_total_shard returns False for partial chunk coords."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (2, 2)
+    all_chunk_coords: set[tuple[int, ...]] = {(0, 0), (1, 1)}  # Missing (0, 1) and (1, 0)
+
+    assert codec._is_total_shard(all_chunk_coords, chunks_per_shard) is False
+
+
+def test_is_total_shard_empty() -> None:
+    """Test _is_total_shard returns False for empty chunk coords."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (2, 2)
+    all_chunk_coords: set[tuple[int, ...]] = set()
+
+    assert codec._is_total_shard(all_chunk_coords, chunks_per_shard) is False
+
+
+def test_is_total_shard_1d() -> None:
+    """Test _is_total_shard works with 1D shards."""
+    codec = ShardingCodec(chunk_shape=(8,))
+    chunks_per_shard = (4,)
+    all_chunk_coords: set[tuple[int, ...]] = {(0,), (1,), (2,), (3,)}
+
+    assert codec._is_total_shard(all_chunk_coords, chunks_per_shard) is True
+
+    # Partial
+    partial_coords: set[tuple[int, ...]] = {(0,), (2,)}
+    assert codec._is_total_shard(partial_coords, chunks_per_shard) is False