From 7677b25056a47b0c53700a306d2f07f4ab7f560f Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Thu, 12 Feb 2026 20:53:35 +0000 Subject: [PATCH 1/3] Add Seismic3DOffsetTilesTemplate and Seismic3DReceiverGathersTemplate Add two new seismic dataset templates: - Seismic3DOffsetTilesTemplate: 5D template for wide-azimuth data organized by CDP location with offset vector tile decomposition. Supports time and depth domains. - Seismic3DReceiverGathersTemplate: Template for fixed receiver surveys (OBN, OBC, land) organized by receiver position. Time domain only. Also fixes duplicate entries in the expected template names test list. Co-authored-by: Cursor --- src/mdio/builder/template_registry.py | 9 + .../templates/seismic_3d_offset_tiles.py | 111 +++++++++++ .../templates/seismic_3d_receiver_gathers.py | 119 ++++++++++++ .../templates/test_seismic_3d_offset_tiles.py | 177 ++++++++++++++++++ .../test_seismic_3d_receiver_gathers.py | 168 +++++++++++++++++ .../v1/templates/test_template_registry.py | 19 +- 6 files changed, 595 insertions(+), 8 deletions(-) create mode 100644 src/mdio/builder/templates/seismic_3d_offset_tiles.py create mode 100644 src/mdio/builder/templates/seismic_3d_receiver_gathers.py create mode 100644 tests/unit/v1/templates/test_seismic_3d_offset_tiles.py create mode 100644 tests/unit/v1/templates/test_seismic_3d_receiver_gathers.py diff --git a/src/mdio/builder/template_registry.py b/src/mdio/builder/template_registry.py index 3cb09da5..970af1a2 100644 --- a/src/mdio/builder/template_registry.py +++ b/src/mdio/builder/template_registry.py @@ -26,7 +26,9 @@ from mdio.builder.templates.seismic_3d_cdp import Seismic3DCdpGathersTemplate from mdio.builder.templates.seismic_3d_coca import Seismic3DCocaGathersTemplate from mdio.builder.templates.seismic_3d_obn import Seismic3DObnReceiverGathersTemplate +from mdio.builder.templates.seismic_3d_offset_tiles import Seismic3DOffsetTilesTemplate from mdio.builder.templates.seismic_3d_poststack import Seismic3DPostStackTemplate +from mdio.builder.templates.seismic_3d_receiver_gathers import Seismic3DReceiverGathersTemplate from mdio.builder.templates.seismic_3d_shot_receiver_line import Seismic3DShotReceiverLineGathersTemplate from mdio.builder.templates.seismic_3d_streamer_field import Seismic3DStreamerFieldRecordsTemplate from mdio.builder.templates.seismic_3d_streamer_shot import Seismic3DStreamerShotGathersTemplate @@ -135,6 +137,13 @@ def _register_default_templates(self) -> None: self.register(Seismic3DCocaGathersTemplate("time")) self.register(Seismic3DCocaGathersTemplate("depth")) + # Receiver Gathers (OBN, OBC, land fixed-spread) - time domain only + self.register(Seismic3DReceiverGathersTemplate()) + + # Offset Tiles + self.register(Seismic3DOffsetTilesTemplate("time")) + self.register(Seismic3DOffsetTilesTemplate("depth")) + # Field (shot) data self.register(Seismic2DStreamerShotGathersTemplate()) self.register(Seismic3DStreamerShotGathersTemplate()) diff --git a/src/mdio/builder/templates/seismic_3d_offset_tiles.py b/src/mdio/builder/templates/seismic_3d_offset_tiles.py new file mode 100644 index 00000000..f056f846 --- /dev/null +++ b/src/mdio/builder/templates/seismic_3d_offset_tiles.py @@ -0,0 +1,111 @@ +"""Seismic3DOffsetTilesTemplate MDIO v1 dataset templates.""" + +from typing import Any + +from mdio.builder.schemas import compressors +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.templates.base import AbstractDatasetTemplate +from mdio.builder.templates.types import SeismicDataDomain + + +class Seismic3DOffsetTilesTemplate(AbstractDatasetTemplate): + """Seismic Offset Tiles pre-stack 3D Dataset template. + + A 5D template for wide-azimuth seismic data organized by offset vector tiles. + Data is binned by CDP location (inline, crossline) with offset vector + decomposition into inline and crossline offset tile components. + + Dimensions: + - inline: Inline bin position + - crossline: Crossline bin position + - inline_offset_tile: Inline component of the offset vector tile + - crossline_offset_tile: Crossline component of the offset vector tile + - time/depth: Sample dimension + + This organization is optimal for: + - Wide-azimuth data preservation + - 5D interpolation and regularization + - Azimuthal analysis and processing + - Offset vector filtering + + The offset vector tiles partition the offset-azimuth space into a regular grid, + preserving both offset magnitude and azimuth information in a format suitable + for modern wide-azimuth processing workflows. + + Args: + data_domain: The domain of the dataset ('time' or 'depth'). + """ + + def __init__(self, data_domain: SeismicDataDomain = "time"): + super().__init__(data_domain=data_domain) + + self._dim_names = ( + "inline", + "crossline", + "inline_offset_tile", + "crossline_offset_tile", + self._data_domain, + ) + self._physical_coord_names = ("cdp_x", "cdp_y") + self._logical_coord_names = () + self._var_chunk_shape = (4, 4, 6, 6, 4096) + + @property + def _name(self) -> str: + return f"OffsetTiles3D{self._data_domain.capitalize()}" + + def _load_dataset_attributes(self) -> dict[str, Any]: + return {"surveyType": "3D", "gatherType": "offset_tiles"} + + def _add_coordinates(self) -> None: + # Add dimension coordinates + self._builder.add_coordinate( + "inline", + dimensions=("inline",), + data_type=ScalarType.INT32, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("inline")), + ) + self._builder.add_coordinate( + "crossline", + dimensions=("crossline",), + data_type=ScalarType.INT32, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("crossline")), + ) + self._builder.add_coordinate( + "inline_offset_tile", + dimensions=("inline_offset_tile",), + data_type=ScalarType.INT16, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("inline_offset_tile")), + ) + self._builder.add_coordinate( + "crossline_offset_tile", + dimensions=("crossline_offset_tile",), + data_type=ScalarType.INT16, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("crossline_offset_tile")), + ) + self._builder.add_coordinate( + self.trace_domain, + dimensions=(self.trace_domain,), + data_type=ScalarType.INT32, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self.trace_domain)), + ) + + # Add non-dimension coordinates + compressor = compressors.Blosc(cname=compressors.BloscCname.zstd) + + # CDP coordinates (vary by inline, crossline) + self._builder.add_coordinate( + "cdp_x", + dimensions=("inline", "crossline"), + data_type=ScalarType.FLOAT64, + compressor=compressor, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("cdp_x")), + ) + self._builder.add_coordinate( + "cdp_y", + dimensions=("inline", "crossline"), + data_type=ScalarType.FLOAT64, + compressor=compressor, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("cdp_y")), + ) diff --git a/src/mdio/builder/templates/seismic_3d_receiver_gathers.py b/src/mdio/builder/templates/seismic_3d_receiver_gathers.py new file mode 100644 index 00000000..4b7ba40a --- /dev/null +++ b/src/mdio/builder/templates/seismic_3d_receiver_gathers.py @@ -0,0 +1,119 @@ +"""Seismic3DReceiverGathersTemplate MDIO v1 dataset templates.""" + +from typing import Any + +from mdio.builder.schemas import compressors +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.v1.variable import CoordinateMetadata +from mdio.builder.templates.base import AbstractDatasetTemplate + + +class Seismic3DReceiverGathersTemplate(AbstractDatasetTemplate): + """Seismic receiver gather pre-stack 3D Dataset template. + + A template for surveys with fixed receiver positions (OBN, OBC, land fixed-spread) + where data is organized by receiver position for receiver-side processing. + Shots are organized by shot lines with a calculated shot index. + + This template is time-domain only as receiver gathers are typically used + for early-stage processing before depth conversion. + + Dimensions: + - receiver: Index of the receiver node/station + - shot_line: Shot line or swath identifier + - shot_index: Sequential index of shots within the line (calculated, 0-N) + - time: Sample dimension + + This organization is optimal for: + - Receiver-side wavefield separation (up/down) + - Receiver-consistent deconvolution + - Multi-component processing + - Mirror imaging from OBN + """ + + def __init__(self) -> None: + super().__init__(data_domain="time") + + self._dim_names = ("receiver", "shot_line", "shot_index", "time") + self._calculated_dims = ("shot_index",) + self._physical_coord_names = ( + "receiver_x", + "receiver_y", + "source_coord_x", + "source_coord_y", + ) + self._logical_coord_names = ("shot_point",) + self._var_chunk_shape = (1, 1, 512, 4096) + + @property + def _name(self) -> str: + return "ReceiverGathers3D" + + def _load_dataset_attributes(self) -> dict[str, Any]: + return {"surveyType": "3D", "gatherType": "receiver_gathers"} + + def _add_coordinates(self) -> None: + # Add dimension coordinates + # Note: shot_index is calculated (0-N), so we don't add a coordinate for it + self._builder.add_coordinate( + "receiver", + dimensions=("receiver",), + data_type=ScalarType.UINT32, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("receiver")), + ) + self._builder.add_coordinate( + "shot_line", + dimensions=("shot_line",), + data_type=ScalarType.UINT32, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("shot_line")), + ) + self._builder.add_coordinate( + self.trace_domain, + dimensions=(self.trace_domain,), + data_type=ScalarType.INT32, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key(self.trace_domain)), + ) + + # Add non-dimension coordinates + compressor = compressors.Blosc(cname=compressors.BloscCname.zstd) + + # Receiver coordinates (fixed per receiver) + self._builder.add_coordinate( + "receiver_x", + dimensions=("receiver",), + data_type=ScalarType.FLOAT64, + compressor=compressor, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("receiver_x")), + ) + self._builder.add_coordinate( + "receiver_y", + dimensions=("receiver",), + data_type=ScalarType.FLOAT64, + compressor=compressor, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("receiver_y")), + ) + + # Shot point coordinate (actual shot point numbers, varies by shot_line and shot_index) + self._builder.add_coordinate( + "shot_point", + dimensions=("shot_line", "shot_index"), + data_type=ScalarType.UINT32, + compressor=compressor, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("shot_point")), + ) + + # Source coordinates (vary by shot_line and shot_index) + self._builder.add_coordinate( + "source_coord_x", + dimensions=("shot_line", "shot_index"), + data_type=ScalarType.FLOAT64, + compressor=compressor, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_x")), + ) + self._builder.add_coordinate( + "source_coord_y", + dimensions=("shot_line", "shot_index"), + data_type=ScalarType.FLOAT64, + compressor=compressor, + metadata=CoordinateMetadata(units_v1=self.get_unit_by_key("source_coord_y")), + ) diff --git a/tests/unit/v1/templates/test_seismic_3d_offset_tiles.py b/tests/unit/v1/templates/test_seismic_3d_offset_tiles.py new file mode 100644 index 00000000..291d67ee --- /dev/null +++ b/tests/unit/v1/templates/test_seismic_3d_offset_tiles.py @@ -0,0 +1,177 @@ +"""Unit tests for Seismic3DOffsetTilesTemplate.""" + +import pytest +from tests.unit.v1.helpers import validate_variable + +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.units import TimeUnitEnum +from mdio.builder.schemas.v1.units import TimeUnitModel +from mdio.builder.templates.seismic_3d_offset_tiles import Seismic3DOffsetTilesTemplate +from mdio.builder.templates.types import SeismicDataDomain + +UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) +UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) + + +def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: StructuredType, domain: str) -> None: + """Validate the coordinate, headers, trace_mask variables in the dataset.""" + # Verify variables + # 5 dim coords + 2 non-dim coords + 1 data + 1 trace mask + 1 headers = 10 variables + assert len(dataset.variables) == 10 + + # Verify trace headers + validate_variable( + dataset, + name="headers", + dims=[("inline", 256), ("crossline", 256), ("inline_offset_tile", 12), ("crossline_offset_tile", 12)], + coords=["cdp_x", "cdp_y"], + dtype=headers, + ) + + validate_variable( + dataset, + name="trace_mask", + dims=[("inline", 256), ("crossline", 256), ("inline_offset_tile", 12), ("crossline_offset_tile", 12)], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.BOOL, + ) + + # Verify dimension coordinate variables + validate_variable( + dataset, + name="inline", + dims=[("inline", 256)], + coords=["inline"], + dtype=ScalarType.INT32, + ) + + validate_variable( + dataset, + name="crossline", + dims=[("crossline", 256)], + coords=["crossline"], + dtype=ScalarType.INT32, + ) + + validate_variable( + dataset, + name="inline_offset_tile", + dims=[("inline_offset_tile", 12)], + coords=["inline_offset_tile"], + dtype=ScalarType.INT16, + ) + + validate_variable( + dataset, + name="crossline_offset_tile", + dims=[("crossline_offset_tile", 12)], + coords=["crossline_offset_tile"], + dtype=ScalarType.INT16, + ) + + domain_var = validate_variable( + dataset, + name=domain, + dims=[(domain, 2048)], + coords=[domain], + dtype=ScalarType.INT32, + ) + assert domain_var.metadata.units_v1 in (UNITS_METER, UNITS_SECOND) + + # Verify non-dimension coordinate variables + cdp_x = validate_variable( + dataset, + name="cdp_x", + dims=[("inline", 256), ("crossline", 256)], + coords=["cdp_x"], + dtype=ScalarType.FLOAT64, + ) + assert cdp_x.metadata.units_v1 == UNITS_METER + + cdp_y = validate_variable( + dataset, + name="cdp_y", + dims=[("inline", 256), ("crossline", 256)], + coords=["cdp_y"], + dtype=ScalarType.FLOAT64, + ) + assert cdp_y.metadata.units_v1 == UNITS_METER + + +@pytest.mark.parametrize("data_domain", ["depth", "time"]) +class TestSeismic3DOffsetTilesTemplate: + """Unit tests for Seismic3DOffsetTilesTemplate.""" + + def test_configuration(self, data_domain: SeismicDataDomain) -> None: + """Unit tests for Seismic3DOffsetTilesTemplate configuration.""" + t = Seismic3DOffsetTilesTemplate(data_domain=data_domain) + + # Template attributes + assert t._dim_names == ("inline", "crossline", "inline_offset_tile", "crossline_offset_tile", data_domain) + assert t._physical_coord_names == ("cdp_x", "cdp_y") + assert t._logical_coord_names == () + assert t.full_chunk_shape == (4, 4, 6, 6, 4096) + + # Variables instantiated when build_dataset() is called + assert t._builder is None + assert t._dim_sizes == () + + # Verify dataset attributes + attrs = t._load_dataset_attributes() + assert attrs == {"surveyType": "3D", "gatherType": "offset_tiles"} + assert t.default_variable_name == "amplitude" + + def test_build_dataset(self, data_domain: SeismicDataDomain, structured_headers: StructuredType) -> None: + """Unit tests for Seismic3DOffsetTilesTemplate build.""" + t = Seismic3DOffsetTilesTemplate(data_domain=data_domain) + t.add_units({"cdp_x": UNITS_METER, "cdp_y": UNITS_METER}) + t.add_units({"time": UNITS_SECOND, "depth": UNITS_METER}) + + dataset = t.build_dataset( + "Wide Azimuth Offset Tiles", + sizes=(256, 256, 12, 12, 2048), + header_dtype=structured_headers, + ) + + assert dataset.metadata.name == "Wide Azimuth Offset Tiles" + assert dataset.metadata.attributes["surveyType"] == "3D" + assert dataset.metadata.attributes["gatherType"] == "offset_tiles" + + _validate_coordinates_headers_trace_mask(dataset, structured_headers, data_domain) + + # Verify seismic variable + seismic = validate_variable( + dataset, + name="amplitude", + dims=[ + ("inline", 256), + ("crossline", 256), + ("inline_offset_tile", 12), + ("crossline_offset_tile", 12), + (data_domain, 2048), + ], + coords=["cdp_x", "cdp_y"], + dtype=ScalarType.FLOAT32, + ) + assert isinstance(seismic.compressor, Blosc) + assert seismic.compressor.cname == BloscCname.zstd + assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == (4, 4, 6, 6, 4096) + assert seismic.metadata.stats_v1 is None + + +@pytest.mark.parametrize("data_domain", ["Time", "DePTh"]) +def test_domain_case_handling(data_domain: str) -> None: + """Test that domain parameter handles different cases correctly.""" + template = Seismic3DOffsetTilesTemplate(data_domain=data_domain) + assert template._data_domain == data_domain.lower() + + data_domain_suffix = data_domain.lower().capitalize() + assert template.name == f"OffsetTiles3D{data_domain_suffix}" diff --git a/tests/unit/v1/templates/test_seismic_3d_receiver_gathers.py b/tests/unit/v1/templates/test_seismic_3d_receiver_gathers.py new file mode 100644 index 00000000..df7bb32f --- /dev/null +++ b/tests/unit/v1/templates/test_seismic_3d_receiver_gathers.py @@ -0,0 +1,168 @@ +"""Unit tests for Seismic3DReceiverGathersTemplate.""" + +from tests.unit.v1.helpers import validate_variable + +from mdio.builder.schemas.chunk_grid import RegularChunkGrid +from mdio.builder.schemas.compressors import Blosc +from mdio.builder.schemas.compressors import BloscCname +from mdio.builder.schemas.dtype import ScalarType +from mdio.builder.schemas.dtype import StructuredType +from mdio.builder.schemas.v1.dataset import Dataset +from mdio.builder.schemas.v1.units import LengthUnitEnum +from mdio.builder.schemas.v1.units import LengthUnitModel +from mdio.builder.schemas.v1.units import TimeUnitEnum +from mdio.builder.schemas.v1.units import TimeUnitModel +from mdio.builder.templates.seismic_3d_receiver_gathers import Seismic3DReceiverGathersTemplate + +UNITS_METER = LengthUnitModel(length=LengthUnitEnum.METER) +UNITS_SECOND = TimeUnitModel(time=TimeUnitEnum.SECOND) + + +EXPECTED_COORDINATES = [ + "receiver_x", + "receiver_y", + "shot_point", + "source_coord_x", + "source_coord_y", +] + +DATASET_SIZE_MAP = {"receiver": 100, "shot_line": 10, "shot_index": 500, "time": 2048} + + +def _validate_coordinates_headers_trace_mask(dataset: Dataset, headers: StructuredType) -> None: + """Validate the coordinate, headers, trace_mask variables in the dataset.""" + dataset_dtype_map = {"receiver": "uint32", "shot_line": "uint32", "time": "int32"} + + # Verify variables + # 3 dim coords (excluding shot_index) + 5 non-dim coords + 1 data + 1 trace mask + 1 headers = 11 variables + assert len(dataset.variables) == 11 + + # Verify trace headers + validate_variable( + dataset, + name="headers", + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k != "time"], + coords=EXPECTED_COORDINATES, + dtype=headers, + ) + + validate_variable( + dataset, + name="trace_mask", + dims=[(k, v) for k, v in DATASET_SIZE_MAP.items() if k != "time"], + coords=EXPECTED_COORDINATES, + dtype=ScalarType.BOOL, + ) + + # Verify dimension coordinate variables (excluding shot_index which is calculated) + for dim_name, dim_size in DATASET_SIZE_MAP.items(): + if dim_name == "shot_index": + continue + + validate_variable( + dataset, + name=dim_name, + dims=[(dim_name, dim_size)], + coords=[dim_name], + dtype=ScalarType(dataset_dtype_map[dim_name]), + ) + + # Verify non-dimension coordinate variables - receiver coordinates + receiver_x = validate_variable( + dataset, + name="receiver_x", + dims=[("receiver", DATASET_SIZE_MAP["receiver"])], + coords=["receiver_x"], + dtype=ScalarType.FLOAT64, + ) + assert receiver_x.metadata.units_v1.length == LengthUnitEnum.METER + + receiver_y = validate_variable( + dataset, + name="receiver_y", + dims=[("receiver", DATASET_SIZE_MAP["receiver"])], + coords=["receiver_y"], + dtype=ScalarType.FLOAT64, + ) + assert receiver_y.metadata.units_v1.length == LengthUnitEnum.METER + + # Verify shot_point coordinate (logical) + validate_variable( + dataset, + name="shot_point", + dims=[ + ("shot_line", DATASET_SIZE_MAP["shot_line"]), + ("shot_index", DATASET_SIZE_MAP["shot_index"]), + ], + coords=["shot_point"], + dtype=ScalarType.UINT32, + ) + + # Verify source coordinate variables + for coord_name in ["source_coord_x", "source_coord_y"]: + coord = validate_variable( + dataset, + name=coord_name, + dims=[ + ("shot_line", DATASET_SIZE_MAP["shot_line"]), + ("shot_index", DATASET_SIZE_MAP["shot_index"]), + ], + coords=[coord_name], + dtype=ScalarType.FLOAT64, + ) + assert coord.metadata.units_v1.length == LengthUnitEnum.METER + + +class TestSeismic3DReceiverGathersTemplate: + """Unit tests for Seismic3DReceiverGathersTemplate.""" + + def test_configuration(self) -> None: + """Unit tests for Seismic3DReceiverGathersTemplate configuration.""" + t = Seismic3DReceiverGathersTemplate() + + # Template attributes + assert t.name == "ReceiverGathers3D" + assert t._dim_names == ("receiver", "shot_line", "shot_index", "time") + assert t._calculated_dims == ("shot_index",) + assert t._physical_coord_names == ("receiver_x", "receiver_y", "source_coord_x", "source_coord_y") + assert t._logical_coord_names == ("shot_point",) + assert t.full_chunk_shape == (1, 1, 512, 4096) + + # Variables instantiated when build_dataset() is called + assert t._builder is None + assert t._dim_sizes == () + + # Verify dataset attributes + attrs = t._load_dataset_attributes() + assert attrs == {"surveyType": "3D", "gatherType": "receiver_gathers"} + assert t.default_variable_name == "amplitude" + + def test_build_dataset(self, structured_headers: StructuredType) -> None: + """Unit tests for Seismic3DReceiverGathersTemplate build.""" + t = Seismic3DReceiverGathersTemplate() + t.add_units({"receiver_x": UNITS_METER, "receiver_y": UNITS_METER}) + t.add_units({"source_coord_x": UNITS_METER, "source_coord_y": UNITS_METER}) + t.add_units({"time": UNITS_SECOND}) + + sizes = tuple(DATASET_SIZE_MAP.values()) + dataset = t.build_dataset("OBN Survey Receiver Gathers", sizes=sizes, header_dtype=structured_headers) + + assert dataset.metadata.name == "OBN Survey Receiver Gathers" + assert dataset.metadata.attributes["surveyType"] == "3D" + assert dataset.metadata.attributes["gatherType"] == "receiver_gathers" + + _validate_coordinates_headers_trace_mask(dataset, structured_headers) + + # Verify seismic variable + seismic = validate_variable( + dataset, + name="amplitude", + dims=list(DATASET_SIZE_MAP.items()), + coords=EXPECTED_COORDINATES, + dtype=ScalarType.FLOAT32, + ) + assert isinstance(seismic.compressor, Blosc) + assert seismic.compressor.cname == BloscCname.zstd + assert isinstance(seismic.metadata.chunk_grid, RegularChunkGrid) + assert seismic.metadata.chunk_grid.configuration.chunk_shape == (1, 1, 512, 4096) + assert seismic.metadata.stats_v1 is None diff --git a/tests/unit/v1/templates/test_template_registry.py b/tests/unit/v1/templates/test_template_registry.py index 418c343c..37d63ec2 100644 --- a/tests/unit/v1/templates/test_template_registry.py +++ b/tests/unit/v1/templates/test_template_registry.py @@ -22,15 +22,18 @@ "PostStack3DTime", "PostStack3DDepth", "CdpOffsetGathers2DTime", - "CdpAngleGathers2DDepth", - "CdpOffsetGathers2DTime", + "CdpOffsetGathers2DDepth", + "CdpAngleGathers2DTime", "CdpAngleGathers2DDepth", "CdpOffsetGathers3DTime", - "CdpAngleGathers3DTime", "CdpOffsetGathers3DDepth", + "CdpAngleGathers3DTime", "CdpAngleGathers3DDepth", "CocaGathers3DTime", "CocaGathers3DDepth", + "ReceiverGathers3D", + "OffsetTiles3DTime", + "OffsetTiles3DDepth", "StreamerShotGathers2D", "StreamerShotGathers3D", "StreamerFieldRecords3D", @@ -242,7 +245,7 @@ def test_list_all_templates(self) -> None: registry.register(template2) templates = registry.list_all_templates() - assert len(templates) == 19 + 2 # 19 default + 2 custom + assert len(templates) == 22 + 2 # 22 default + 2 custom assert "Template_One" in templates assert "Template_Two" in templates @@ -252,7 +255,7 @@ def test_clear_templates(self) -> None: # Default templates are always installed templates = list_templates() - assert len(templates) == 19 + assert len(templates) == 22 # Add some templates template1 = MockDatasetTemplate("Template1") @@ -261,7 +264,7 @@ def test_clear_templates(self) -> None: registry.register(template1) registry.register(template2) - assert len(registry.list_all_templates()) == 19 + 2 # 19 default + 2 custom + assert len(registry.list_all_templates()) == 22 + 2 # 22 default + 2 custom # Clear all registry.clear() @@ -394,7 +397,7 @@ def test_list_templates_global(self) -> None: register_template(template2) templates = list_templates() - assert len(templates) == 21 # 19 default + 2 custom + assert len(templates) == 24 # 22 default + 2 custom assert "template1" in templates assert "template2" in templates @@ -437,7 +440,7 @@ def register_template_worker(template_id: int) -> None: assert len(errors) == 0 assert len(results) == 10 # Including default templates - assert len(registry.list_all_templates()) == 29 # 19 default + 10 registered + assert len(registry.list_all_templates()) == 32 # 22 default + 10 registered # Check all templates are registered for i in range(10): From cd6d59054ddd47e32856b906c17a0b047595ec65 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Thu, 12 Feb 2026 21:30:38 +0000 Subject: [PATCH 2/3] Refactor Seismic Dataset Templates Documentation and Tests - Updated docstrings for Seismic3DOffsetTilesTemplate and Seismic3DReceiverGathersTemplate to provide concise descriptions. - Added a new test for chunk size calculation in both templates to ensure proper memory management. - Adjusted existing tests to reflect changes in dataset naming conventions and template attributes. These changes enhance clarity and maintainability of the codebase. --- .../templates/seismic_3d_offset_tiles.py | 27 +------------------ .../templates/seismic_3d_receiver_gathers.py | 22 +-------------- .../templates/test_seismic_3d_offset_tiles.py | 27 ++++++++++++++++--- .../test_seismic_3d_receiver_gathers.py | 23 ++++++++++++++-- 4 files changed, 46 insertions(+), 53 deletions(-) diff --git a/src/mdio/builder/templates/seismic_3d_offset_tiles.py b/src/mdio/builder/templates/seismic_3d_offset_tiles.py index f056f846..f68ed9bb 100644 --- a/src/mdio/builder/templates/seismic_3d_offset_tiles.py +++ b/src/mdio/builder/templates/seismic_3d_offset_tiles.py @@ -10,32 +10,7 @@ class Seismic3DOffsetTilesTemplate(AbstractDatasetTemplate): - """Seismic Offset Tiles pre-stack 3D Dataset template. - - A 5D template for wide-azimuth seismic data organized by offset vector tiles. - Data is binned by CDP location (inline, crossline) with offset vector - decomposition into inline and crossline offset tile components. - - Dimensions: - - inline: Inline bin position - - crossline: Crossline bin position - - inline_offset_tile: Inline component of the offset vector tile - - crossline_offset_tile: Crossline component of the offset vector tile - - time/depth: Sample dimension - - This organization is optimal for: - - Wide-azimuth data preservation - - 5D interpolation and regularization - - Azimuthal analysis and processing - - Offset vector filtering - - The offset vector tiles partition the offset-azimuth space into a regular grid, - preserving both offset magnitude and azimuth information in a format suitable - for modern wide-azimuth processing workflows. - - Args: - data_domain: The domain of the dataset ('time' or 'depth'). - """ + """Seismic 3D template for offset vector tile (OVT) binned gathers.""" def __init__(self, data_domain: SeismicDataDomain = "time"): super().__init__(data_domain=data_domain) diff --git a/src/mdio/builder/templates/seismic_3d_receiver_gathers.py b/src/mdio/builder/templates/seismic_3d_receiver_gathers.py index 4b7ba40a..4f5ef5f1 100644 --- a/src/mdio/builder/templates/seismic_3d_receiver_gathers.py +++ b/src/mdio/builder/templates/seismic_3d_receiver_gathers.py @@ -9,27 +9,7 @@ class Seismic3DReceiverGathersTemplate(AbstractDatasetTemplate): - """Seismic receiver gather pre-stack 3D Dataset template. - - A template for surveys with fixed receiver positions (OBN, OBC, land fixed-spread) - where data is organized by receiver position for receiver-side processing. - Shots are organized by shot lines with a calculated shot index. - - This template is time-domain only as receiver gathers are typically used - for early-stage processing before depth conversion. - - Dimensions: - - receiver: Index of the receiver node/station - - shot_line: Shot line or swath identifier - - shot_index: Sequential index of shots within the line (calculated, 0-N) - - time: Sample dimension - - This organization is optimal for: - - Receiver-side wavefield separation (up/down) - - Receiver-consistent deconvolution - - Multi-component processing - - Mirror imaging from OBN - """ + """Seismic 3D receiver gathers template with calculated shot index.""" def __init__(self) -> None: super().__init__(data_domain="time") diff --git a/tests/unit/v1/templates/test_seismic_3d_offset_tiles.py b/tests/unit/v1/templates/test_seismic_3d_offset_tiles.py index 291d67ee..d2ec8cb8 100644 --- a/tests/unit/v1/templates/test_seismic_3d_offset_tiles.py +++ b/tests/unit/v1/templates/test_seismic_3d_offset_tiles.py @@ -110,7 +110,7 @@ class TestSeismic3DOffsetTilesTemplate: """Unit tests for Seismic3DOffsetTilesTemplate.""" def test_configuration(self, data_domain: SeismicDataDomain) -> None: - """Unit tests for Seismic3DOffsetTilesTemplate configuration.""" + """Test template configuration and attributes.""" t = Seismic3DOffsetTilesTemplate(data_domain=data_domain) # Template attributes @@ -128,19 +128,38 @@ def test_configuration(self, data_domain: SeismicDataDomain) -> None: assert attrs == {"surveyType": "3D", "gatherType": "offset_tiles"} assert t.default_variable_name == "amplitude" + def test_chunk_size_calculation(self, data_domain: SeismicDataDomain) -> None: + """Test that chunk shape produces approximately 9 MiB chunks. + + The chunk shape (4, 4, 6, 6, 4096) produces: + 4 * 4 * 6 * 6 * 4096 = 2,359,296 samples. + With float32 (4 bytes): 2,359,296 * 4 = 9,437,184 bytes = 9 MiB. + """ + t = Seismic3DOffsetTilesTemplate(data_domain=data_domain) + + chunk_shape = t.full_chunk_shape + assert chunk_shape == (4, 4, 6, 6, 4096) + + samples_per_chunk = 1 + for dim_size in chunk_shape: + samples_per_chunk *= dim_size + + bytes_per_chunk = samples_per_chunk * 4 + assert bytes_per_chunk == 9 * 1024 * 1024 # 9 MiB + def test_build_dataset(self, data_domain: SeismicDataDomain, structured_headers: StructuredType) -> None: - """Unit tests for Seismic3DOffsetTilesTemplate build.""" + """Test building a complete dataset with the template.""" t = Seismic3DOffsetTilesTemplate(data_domain=data_domain) t.add_units({"cdp_x": UNITS_METER, "cdp_y": UNITS_METER}) t.add_units({"time": UNITS_SECOND, "depth": UNITS_METER}) dataset = t.build_dataset( - "Wide Azimuth Offset Tiles", + "OVT Gathers", sizes=(256, 256, 12, 12, 2048), header_dtype=structured_headers, ) - assert dataset.metadata.name == "Wide Azimuth Offset Tiles" + assert dataset.metadata.name == "OVT Gathers" assert dataset.metadata.attributes["surveyType"] == "3D" assert dataset.metadata.attributes["gatherType"] == "offset_tiles" diff --git a/tests/unit/v1/templates/test_seismic_3d_receiver_gathers.py b/tests/unit/v1/templates/test_seismic_3d_receiver_gathers.py index df7bb32f..36f966d4 100644 --- a/tests/unit/v1/templates/test_seismic_3d_receiver_gathers.py +++ b/tests/unit/v1/templates/test_seismic_3d_receiver_gathers.py @@ -117,7 +117,7 @@ class TestSeismic3DReceiverGathersTemplate: """Unit tests for Seismic3DReceiverGathersTemplate.""" def test_configuration(self) -> None: - """Unit tests for Seismic3DReceiverGathersTemplate configuration.""" + """Test template configuration and attributes.""" t = Seismic3DReceiverGathersTemplate() # Template attributes @@ -137,8 +137,27 @@ def test_configuration(self) -> None: assert attrs == {"surveyType": "3D", "gatherType": "receiver_gathers"} assert t.default_variable_name == "amplitude" + def test_chunk_size_calculation(self) -> None: + """Test that chunk shape produces approximately 8 MiB chunks. + + The chunk shape (1, 1, 512, 4096) produces: + 1 * 1 * 512 * 4096 = 2,097,152 samples. + With float32 (4 bytes): 2,097,152 * 4 = 8,388,608 bytes = 8 MiB. + """ + t = Seismic3DReceiverGathersTemplate() + + chunk_shape = t.full_chunk_shape + assert chunk_shape == (1, 1, 512, 4096) + + samples_per_chunk = 1 + for dim_size in chunk_shape: + samples_per_chunk *= dim_size + + bytes_per_chunk = samples_per_chunk * 4 + assert bytes_per_chunk == 8 * 1024 * 1024 # 8 MiB + def test_build_dataset(self, structured_headers: StructuredType) -> None: - """Unit tests for Seismic3DReceiverGathersTemplate build.""" + """Test building a complete dataset with the template.""" t = Seismic3DReceiverGathersTemplate() t.add_units({"receiver_x": UNITS_METER, "receiver_y": UNITS_METER}) t.add_units({"source_coord_x": UNITS_METER, "source_coord_y": UNITS_METER}) From 6c22cccfa6775ef545777f2a1de476ef3214aee4 Mon Sep 17 00:00:00 2001 From: BrianMichell Date: Fri, 13 Feb 2026 15:41:37 +0000 Subject: [PATCH 3/3] Use a more correct name for offset tile gathers --- src/mdio/builder/templates/seismic_3d_offset_tiles.py | 2 +- tests/unit/v1/templates/test_seismic_3d_offset_tiles.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/mdio/builder/templates/seismic_3d_offset_tiles.py b/src/mdio/builder/templates/seismic_3d_offset_tiles.py index f68ed9bb..65567a5a 100644 --- a/src/mdio/builder/templates/seismic_3d_offset_tiles.py +++ b/src/mdio/builder/templates/seismic_3d_offset_tiles.py @@ -10,7 +10,7 @@ class Seismic3DOffsetTilesTemplate(AbstractDatasetTemplate): - """Seismic 3D template for offset vector tile (OVT) binned gathers.""" + """Seismic 3D template for rectangular offset tile gathers.""" def __init__(self, data_domain: SeismicDataDomain = "time"): super().__init__(data_domain=data_domain) diff --git a/tests/unit/v1/templates/test_seismic_3d_offset_tiles.py b/tests/unit/v1/templates/test_seismic_3d_offset_tiles.py index d2ec8cb8..5d0056c9 100644 --- a/tests/unit/v1/templates/test_seismic_3d_offset_tiles.py +++ b/tests/unit/v1/templates/test_seismic_3d_offset_tiles.py @@ -154,12 +154,12 @@ def test_build_dataset(self, data_domain: SeismicDataDomain, structured_headers: t.add_units({"time": UNITS_SECOND, "depth": UNITS_METER}) dataset = t.build_dataset( - "OVT Gathers", + "Offset Tile Gathers", sizes=(256, 256, 12, 12, 2048), header_dtype=structured_headers, ) - assert dataset.metadata.name == "OVT Gathers" + assert dataset.metadata.name == "Offset Tile Gathers" assert dataset.metadata.attributes["surveyType"] == "3D" assert dataset.metadata.attributes["gatherType"] == "offset_tiles"