From 22d1490cf1368065d09a14eed1f8974aa6e9ddb2 Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Mon, 20 Jan 2025 17:20:39 +0100
Subject: [PATCH 01/17] new plugin module

---
 xrlint/plugins/xcube/__init__.py                   | 3 ++-
 xrlint/plugins/xcube/plugin.py                     | 8 ++++++++
 xrlint/plugins/xcube/rules/__init__.py             | 8 --------
 xrlint/plugins/xcube/rules/any_spatial_data_var.py | 2 +-
 xrlint/plugins/xcube/rules/cube_dims_order.py      | 2 +-
 xrlint/plugins/xcube/rules/data_var_colors.py      | 2 +-
 xrlint/plugins/xcube/rules/grid_mapping_naming.py  | 2 +-
 xrlint/plugins/xcube/rules/increasing_time.py      | 2 +-
 xrlint/plugins/xcube/rules/lat_lon_naming.py       | 2 +-
 xrlint/plugins/xcube/rules/single_grid_mapping.py  | 2 +-
 xrlint/plugins/xcube/rules/time_naming.py          | 2 +-
 11 files changed, 18 insertions(+), 17 deletions(-)
 create mode 100644 xrlint/plugins/xcube/plugin.py

diff --git a/xrlint/plugins/xcube/__init__.py b/xrlint/plugins/xcube/__init__.py
index ee60293..31b23c4 100644
--- a/xrlint/plugins/xcube/__init__.py
+++ b/xrlint/plugins/xcube/__init__.py
@@ -4,9 +4,10 @@
 
 
 def export_plugin() -> Plugin:
-    from .rules import plugin
+    from .plugin import plugin
 
     import_submodules("xrlint.plugins.xcube.rules")
+    # import_submodules("xrlint.plugins.xcube.processors")
 
     plugin.configs["recommended"] = Config.from_value(
         {
diff --git a/xrlint/plugins/xcube/plugin.py b/xrlint/plugins/xcube/plugin.py
new file mode 100644
index 0000000..1de8247
--- /dev/null
+++ b/xrlint/plugins/xcube/plugin.py
@@ -0,0 +1,8 @@
+from xrlint.plugin import new_plugin
+from xrlint.version import version
+
+plugin = new_plugin(
+    name="xcube",
+    version=version,
+    ref="xrlint.plugins.xcube:export_plugin",
+)
diff --git a/xrlint/plugins/xcube/rules/__init__.py b/xrlint/plugins/xcube/rules/__init__.py
index 1de8247..e69de29 100644
--- a/xrlint/plugins/xcube/rules/__init__.py
+++ b/xrlint/plugins/xcube/rules/__init__.py
@@ -1,8 +0,0 @@
-from xrlint.plugin import new_plugin
-from xrlint.version import version
-
-plugin = new_plugin(
-    name="xcube",
-    version=version,
-    ref="xrlint.plugins.xcube:export_plugin",
-)
diff --git a/xrlint/plugins/xcube/rules/any_spatial_data_var.py b/xrlint/plugins/xcube/rules/any_spatial_data_var.py
index 697d2df..26f9375 100644
--- a/xrlint/plugins/xcube/rules/any_spatial_data_var.py
+++ b/xrlint/plugins/xcube/rules/any_spatial_data_var.py
@@ -1,5 +1,5 @@
 from xrlint.node import DatasetNode
-from xrlint.plugins.xcube.rules import plugin
+from xrlint.plugins.xcube.plugin import plugin
 from xrlint.plugins.xcube.util import is_spatial_var
 from xrlint.rule import RuleContext, RuleOp
 
diff --git a/xrlint/plugins/xcube/rules/cube_dims_order.py b/xrlint/plugins/xcube/rules/cube_dims_order.py
index 103ed42..aee767e 100644
--- a/xrlint/plugins/xcube/rules/cube_dims_order.py
+++ b/xrlint/plugins/xcube/rules/cube_dims_order.py
@@ -1,6 +1,6 @@
 from xrlint.node import DataArrayNode
 from xrlint.plugins.xcube.constants import LAT_NAME, LON_NAME, X_NAME, Y_NAME, TIME_NAME
-from xrlint.plugins.xcube.rules import plugin
+from xrlint.plugins.xcube.plugin import plugin
 from xrlint.rule import RuleContext, RuleOp
 
 
diff --git a/xrlint/plugins/xcube/rules/data_var_colors.py b/xrlint/plugins/xcube/rules/data_var_colors.py
index 39225d2..48d8fc1 100644
--- a/xrlint/plugins/xcube/rules/data_var_colors.py
+++ b/xrlint/plugins/xcube/rules/data_var_colors.py
@@ -1,5 +1,5 @@
 from xrlint.node import DataArrayNode
-from xrlint.plugins.xcube.rules import plugin
+from xrlint.plugins.xcube.plugin import plugin
 from xrlint.plugins.xcube.util import is_spatial_var
 from xrlint.rule import RuleContext, RuleOp
 
diff --git a/xrlint/plugins/xcube/rules/grid_mapping_naming.py b/xrlint/plugins/xcube/rules/grid_mapping_naming.py
index dc079a3..cca1f8d 100644
--- a/xrlint/plugins/xcube/rules/grid_mapping_naming.py
+++ b/xrlint/plugins/xcube/rules/grid_mapping_naming.py
@@ -1,6 +1,6 @@
 from xrlint.node import DatasetNode
 from xrlint.plugins.xcube.constants import GM_NAMES, GM_NAMES_TEXT
-from xrlint.plugins.xcube.rules import plugin
+from xrlint.plugins.xcube.plugin import plugin
 from xrlint.rule import RuleContext, RuleOp
 
 
diff --git a/xrlint/plugins/xcube/rules/increasing_time.py b/xrlint/plugins/xcube/rules/increasing_time.py
index 331fb05..ee38997 100644
--- a/xrlint/plugins/xcube/rules/increasing_time.py
+++ b/xrlint/plugins/xcube/rules/increasing_time.py
@@ -1,7 +1,7 @@
 import numpy as np
 
 from xrlint.node import DataArrayNode
-from xrlint.plugins.xcube.rules import plugin
+from xrlint.plugins.xcube.plugin import plugin
 from xrlint.rule import RuleContext, RuleExit, RuleOp
 from xrlint.util.formatting import format_count, format_seq
 
diff --git a/xrlint/plugins/xcube/rules/lat_lon_naming.py b/xrlint/plugins/xcube/rules/lat_lon_naming.py
index 9ae2c98..e3d4a14 100644
--- a/xrlint/plugins/xcube/rules/lat_lon_naming.py
+++ b/xrlint/plugins/xcube/rules/lat_lon_naming.py
@@ -1,6 +1,6 @@
 from xrlint.node import DatasetNode
 from xrlint.plugins.xcube.constants import LAT_NAME, LON_NAME
-from xrlint.plugins.xcube.rules import plugin
+from xrlint.plugins.xcube.plugin import plugin
 from xrlint.rule import RuleContext, RuleOp
 
 INVALID_LAT_NAMES = {"ltd", "latitude"}
diff --git a/xrlint/plugins/xcube/rules/single_grid_mapping.py b/xrlint/plugins/xcube/rules/single_grid_mapping.py
index 0e75e5c..cabb7c8 100644
--- a/xrlint/plugins/xcube/rules/single_grid_mapping.py
+++ b/xrlint/plugins/xcube/rules/single_grid_mapping.py
@@ -1,6 +1,6 @@
 from xrlint.node import DatasetNode
 from xrlint.plugins.xcube.constants import GM_NAMES_TEXT, LAT_NAME, LON_NAME
-from xrlint.plugins.xcube.rules import plugin
+from xrlint.plugins.xcube.plugin import plugin
 from xrlint.plugins.xcube.util import is_spatial_var
 from xrlint.rule import RuleContext, RuleOp
 
diff --git a/xrlint/plugins/xcube/rules/time_naming.py b/xrlint/plugins/xcube/rules/time_naming.py
index 5c2db10..08d3f5e 100644
--- a/xrlint/plugins/xcube/rules/time_naming.py
+++ b/xrlint/plugins/xcube/rules/time_naming.py
@@ -5,7 +5,7 @@
 
 from xrlint.node import DatasetNode
 from xrlint.plugins.xcube.constants import TIME_NAME
-from xrlint.plugins.xcube.rules import plugin
+from xrlint.plugins.xcube.plugin import plugin
 from xrlint.rule import RuleOp, RuleContext
 
 

From 19ae644563f8a9f6bdf16983d24db1733c0449b6 Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Mon, 20 Jan 2025 17:21:34 +0100
Subject: [PATCH 02/17] added xcube processor

---
 xrlint/plugins/xcube/processors/__init__.py  |  0
 xrlint/plugins/xcube/processors/mldataset.py | 29 ++++++++++++++++++++
 2 files changed, 29 insertions(+)
 create mode 100644 xrlint/plugins/xcube/processors/__init__.py
 create mode 100644 xrlint/plugins/xcube/processors/mldataset.py

diff --git a/xrlint/plugins/xcube/processors/__init__.py b/xrlint/plugins/xcube/processors/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/xrlint/plugins/xcube/processors/mldataset.py b/xrlint/plugins/xcube/processors/mldataset.py
new file mode 100644
index 0000000..603b720
--- /dev/null
+++ b/xrlint/plugins/xcube/processors/mldataset.py
@@ -0,0 +1,29 @@
+from typing import Any
+
+import xarray as xr
+
+from xrlint.plugins.xcube.plugin import plugin
+from xrlint.processor import ProcessorOp
+from xrlint.result import Message
+
+
+@plugin.define_processor("multi-level-dataset")
+class MultiLevelDatasetProcessor(ProcessorOp):
+    """This processor should be used with `files: ["**/*.levels"]`."""
+
+    def preprocess(
+        self, file_path: str, opener_options: dict[str, Any]
+    ) -> list[tuple[xr.Dataset, str]]:
+        engine = opener_options.pop("engine", "zarr")
+        new_file_path = file_path + "/" + "0.zarr"
+        return [
+            (
+                xr.open_dataset(new_file_path, engine=engine, **opener_options),
+                new_file_path,
+            )
+        ]
+
+    def postprocess(
+        self, messages: list[list[Message]], file_path: str
+    ) -> list[Message]:
+        return messages[0]

From 1b2c78c497c6524bd415e384022e6ffc3c5754a6 Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Mon, 20 Jan 2025 17:24:48 +0100
Subject: [PATCH 03/17] new plugin module

---
 xrlint/plugins/core/__init__.py                 | 2 +-
 xrlint/plugins/core/plugin.py                   | 9 +++++++++
 xrlint/plugins/core/rules/__init__.py           | 9 ---------
 xrlint/plugins/core/rules/coords_for_dims.py    | 2 +-
 xrlint/plugins/core/rules/dataset_title_attr.py | 2 +-
 xrlint/plugins/core/rules/flags.py              | 2 +-
 xrlint/plugins/core/rules/grid_mappings.py      | 2 +-
 xrlint/plugins/core/rules/lat_lon_coordinate.py | 2 +-
 xrlint/plugins/core/rules/no_empty_attrs.py     | 2 +-
 xrlint/plugins/core/rules/no_empty_chunks.py    | 2 +-
 xrlint/plugins/core/rules/time_coordinate.py    | 2 +-
 xrlint/plugins/core/rules/var_units_attr.py     | 2 +-
 12 files changed, 19 insertions(+), 19 deletions(-)
 create mode 100644 xrlint/plugins/core/plugin.py

diff --git a/xrlint/plugins/core/__init__.py b/xrlint/plugins/core/__init__.py
index 2457614..1dc64ed 100644
--- a/xrlint/plugins/core/__init__.py
+++ b/xrlint/plugins/core/__init__.py
@@ -4,7 +4,7 @@
 
 
 def export_plugin() -> Plugin:
-    from .rules import plugin
+    from .plugin import plugin
 
     import_submodules("xrlint.plugins.core.rules")
 
diff --git a/xrlint/plugins/core/plugin.py b/xrlint/plugins/core/plugin.py
new file mode 100644
index 0000000..ab64c5d
--- /dev/null
+++ b/xrlint/plugins/core/plugin.py
@@ -0,0 +1,9 @@
+from xrlint.constants import CORE_PLUGIN_NAME
+from xrlint.plugin import new_plugin
+from xrlint.version import version
+
+plugin = new_plugin(
+    name=CORE_PLUGIN_NAME,
+    version=version,
+    ref="xrlint.plugins.core:export_plugin",
+)
diff --git a/xrlint/plugins/core/rules/__init__.py b/xrlint/plugins/core/rules/__init__.py
index ab64c5d..e69de29 100644
--- a/xrlint/plugins/core/rules/__init__.py
+++ b/xrlint/plugins/core/rules/__init__.py
@@ -1,9 +0,0 @@
-from xrlint.constants import CORE_PLUGIN_NAME
-from xrlint.plugin import new_plugin
-from xrlint.version import version
-
-plugin = new_plugin(
-    name=CORE_PLUGIN_NAME,
-    version=version,
-    ref="xrlint.plugins.core:export_plugin",
-)
diff --git a/xrlint/plugins/core/rules/coords_for_dims.py b/xrlint/plugins/core/rules/coords_for_dims.py
index 8a4a056..b6c5f44 100644
--- a/xrlint/plugins/core/rules/coords_for_dims.py
+++ b/xrlint/plugins/core/rules/coords_for_dims.py
@@ -1,5 +1,5 @@
 from xrlint.node import DatasetNode
-from xrlint.plugins.core.rules import plugin
+from xrlint.plugins.core.plugin import plugin
 from xrlint.rule import RuleContext, RuleOp
 from xrlint.util.formatting import format_item
 
diff --git a/xrlint/plugins/core/rules/dataset_title_attr.py b/xrlint/plugins/core/rules/dataset_title_attr.py
index cd27ddc..4abb5e2 100644
--- a/xrlint/plugins/core/rules/dataset_title_attr.py
+++ b/xrlint/plugins/core/rules/dataset_title_attr.py
@@ -1,5 +1,5 @@
 from xrlint.node import DatasetNode
-from xrlint.plugins.core.rules import plugin
+from xrlint.plugins.core.plugin import plugin
 from xrlint.rule import RuleContext, RuleOp
 
 
diff --git a/xrlint/plugins/core/rules/flags.py b/xrlint/plugins/core/rules/flags.py
index 0138680..7d88213 100644
--- a/xrlint/plugins/core/rules/flags.py
+++ b/xrlint/plugins/core/rules/flags.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 from xrlint.node import DataArrayNode
-from xrlint.plugins.core.rules import plugin
+from xrlint.plugins.core.plugin import plugin
 from xrlint.rule import RuleOp, RuleContext
 
 
diff --git a/xrlint/plugins/core/rules/grid_mappings.py b/xrlint/plugins/core/rules/grid_mappings.py
index 0f85c76..2050c3e 100644
--- a/xrlint/plugins/core/rules/grid_mappings.py
+++ b/xrlint/plugins/core/rules/grid_mappings.py
@@ -1,5 +1,5 @@
 from xrlint.node import DatasetNode
-from xrlint.plugins.core.rules import plugin
+from xrlint.plugins.core.plugin import plugin
 from xrlint.rule import RuleContext, RuleOp
 
 
diff --git a/xrlint/plugins/core/rules/lat_lon_coordinate.py b/xrlint/plugins/core/rules/lat_lon_coordinate.py
index bdcb6e1..903cf15 100644
--- a/xrlint/plugins/core/rules/lat_lon_coordinate.py
+++ b/xrlint/plugins/core/rules/lat_lon_coordinate.py
@@ -3,7 +3,7 @@
 import xarray as xr
 
 from xrlint.node import DataArrayNode
-from xrlint.plugins.core.rules import plugin
+from xrlint.plugins.core.plugin import plugin
 from xrlint.rule import RuleContext
 from xrlint.rule import RuleOp
 
diff --git a/xrlint/plugins/core/rules/no_empty_attrs.py b/xrlint/plugins/core/rules/no_empty_attrs.py
index 22c3ad3..7405c8e 100644
--- a/xrlint/plugins/core/rules/no_empty_attrs.py
+++ b/xrlint/plugins/core/rules/no_empty_attrs.py
@@ -1,5 +1,5 @@
 from xrlint.node import AttrsNode
-from xrlint.plugins.core.rules import plugin
+from xrlint.plugins.core.plugin import plugin
 from xrlint.result import Suggestion
 from xrlint.rule import RuleContext, RuleOp
 
diff --git a/xrlint/plugins/core/rules/no_empty_chunks.py b/xrlint/plugins/core/rules/no_empty_chunks.py
index a4b7186..0e842f9 100644
--- a/xrlint/plugins/core/rules/no_empty_chunks.py
+++ b/xrlint/plugins/core/rules/no_empty_chunks.py
@@ -1,5 +1,5 @@
 from xrlint.node import DataArrayNode
-from xrlint.plugins.core.rules import plugin
+from xrlint.plugins.core.plugin import plugin
 from xrlint.rule import RuleContext, RuleExit, RuleOp
 
 
diff --git a/xrlint/plugins/core/rules/time_coordinate.py b/xrlint/plugins/core/rules/time_coordinate.py
index 6842ca9..c2156c3 100644
--- a/xrlint/plugins/core/rules/time_coordinate.py
+++ b/xrlint/plugins/core/rules/time_coordinate.py
@@ -1,5 +1,5 @@
 from xrlint.node import DataArrayNode
-from xrlint.plugins.core.rules import plugin
+from xrlint.plugins.core.plugin import plugin
 from xrlint.rule import RuleContext, RuleOp
 
 
diff --git a/xrlint/plugins/core/rules/var_units_attr.py b/xrlint/plugins/core/rules/var_units_attr.py
index cec977f..81d2ba7 100644
--- a/xrlint/plugins/core/rules/var_units_attr.py
+++ b/xrlint/plugins/core/rules/var_units_attr.py
@@ -1,5 +1,5 @@
 from xrlint.node import DataArrayNode
-from xrlint.plugins.core.rules import plugin
+from xrlint.plugins.core.plugin import plugin
 from xrlint.rule import RuleContext, RuleOp
 
 

From 5db6ee5ffdb6791d4679bcea868d6a95b1f0685a Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Mon, 20 Jan 2025 18:37:57 +0100
Subject: [PATCH 04/17] experimenting with ml-dataset config

---
 xrlint/plugins/xcube/__init__.py             | 12 +++++++-
 xrlint/plugins/xcube/processors/mldataset.py | 30 ++++++++++++++------
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/xrlint/plugins/xcube/__init__.py b/xrlint/plugins/xcube/__init__.py
index 31b23c4..762eb86 100644
--- a/xrlint/plugins/xcube/__init__.py
+++ b/xrlint/plugins/xcube/__init__.py
@@ -7,11 +7,12 @@ def export_plugin() -> Plugin:
     from .plugin import plugin
 
     import_submodules("xrlint.plugins.xcube.rules")
-    # import_submodules("xrlint.plugins.xcube.processors")
+    import_submodules("xrlint.plugins.xcube.processors")
 
     plugin.configs["recommended"] = Config.from_value(
         {
             "name": "xcube-recommended",
+            "ignores": ["**/*.levels"],
             "plugins": {
                 "xcube": plugin,
             },
@@ -30,11 +31,20 @@ def export_plugin() -> Plugin:
     plugin.configs["all"] = Config.from_value(
         {
             "name": "xcube-all",
+            "ignores": ["**/*.levels"],
             "plugins": {
                 "xcube": plugin,
             },
             "rules": {f"xcube/{rule_id}": "error" for rule_id in plugin.rules.keys()},
         }
     )
+    plugin.configs["multi-level-datasets"] = Config.from_value(
+        {
+            "name": "xcube-multi-level-datasets",
+            "files": ["**/*.levels"],
+            "ignores": ["**/*.zarr", "**/*.nc"],
+            "processor": "xcube/multi-level-dataset",
+        }
+    )
 
     return plugin
diff --git a/xrlint/plugins/xcube/processors/mldataset.py b/xrlint/plugins/xcube/processors/mldataset.py
index 603b720..ecaaa2b 100644
--- a/xrlint/plugins/xcube/processors/mldataset.py
+++ b/xrlint/plugins/xcube/processors/mldataset.py
@@ -1,3 +1,4 @@
+import itertools
 from typing import Any
 
 import xarray as xr
@@ -15,15 +16,28 @@ def preprocess(
         self, file_path: str, opener_options: dict[str, Any]
     ) -> list[tuple[xr.Dataset, str]]:
         engine = opener_options.pop("engine", "zarr")
-        new_file_path = file_path + "/" + "0.zarr"
-        return [
-            (
-                xr.open_dataset(new_file_path, engine=engine, **opener_options),
-                new_file_path,
-            )
-        ]
+
+        level_datasets = []
+        for level in itertools.count():
+            level_path = f"{file_path}/{level}.zarr"
+            try:
+                level_dataset = xr.open_dataset(
+                    level_path, engine=engine, **opener_options
+                )
+            except FileNotFoundError:
+                break
+            level_datasets.append((level_dataset, level_path))
+
+        for level, (level_dataset, level_path) in enumerate(level_datasets):
+            level_dataset.attrs["_LEVEL_INFO"] = {
+                "index": level,
+                "count": len(level_datasets),
+                "datasets": [ds for ds, _ in level_datasets],
+            }
+
+        return level_datasets
 
     def postprocess(
         self, messages: list[list[Message]], file_path: str
     ) -> list[Message]:
-        return messages[0]
+        return list(itertools.chain(*messages))

From 2eca8d41203f043003bf67a16eccb0e358f0b773 Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Mon, 20 Jan 2025 18:51:55 +0100
Subject: [PATCH 05/17] Node paths now contain the split index if dataset    
 has been opened by a processor

---
 CHANGES.md                    |  6 +++++-
 tests/_linter/test_rulectx.py |  5 +++--
 tests/test_linter.py          |  4 ++--
 xrlint/_linter/apply.py       | 10 +++++++++-
 xrlint/_linter/rulectx.py     |  9 +++++++--
 xrlint/_linter/verify.py      | 12 ++++++++----
 6 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 59c884d..f8cc5bc 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2,7 +2,11 @@
 
 ## Version 0.4.0 (in development)
 
-
+- Internal changes:
+  - Inbuilt plugin now get their `plugin` instance from
+    `xrlint.plugins.<plugin>.plugin` module.
+  - Node paths now contain the split index if dataset 
+    has been opened by a processor.  
 
 ## Version 0.3.0 (from 2025-01-20)
 
diff --git a/tests/_linter/test_rulectx.py b/tests/_linter/test_rulectx.py
index a74600b..d7ce5fc 100644
--- a/tests/_linter/test_rulectx.py
+++ b/tests/_linter/test_rulectx.py
@@ -12,14 +12,15 @@ class RuleContextImplTest(TestCase):
     def test_defaults(self):
         config = Config()
         dataset = xr.Dataset()
-        context = RuleContextImpl(config, dataset, "./ds.zarr")
+        context = RuleContextImpl(config, dataset, "./ds.zarr", None)
         self.assertIs(config, context.config)
         self.assertIs(dataset, context.dataset)
         self.assertEqual({}, context.settings)
         self.assertEqual("./ds.zarr", context.file_path)
+        self.assertEqual(None, context.file_index)
 
     def test_report(self):
-        context = RuleContextImpl(Config(), xr.Dataset(), "./ds.zarr")
+        context = RuleContextImpl(Config(), xr.Dataset(), "./ds.zarr", None)
         with context.use_state(rule_id="no-xxx"):
             context.report(
                 "What the heck do you mean?",
diff --git a/tests/test_linter.py b/tests/test_linter.py
index 1dfd196..f3b5a29 100644
--- a/tests/test_linter.py
+++ b/tests/test_linter.py
@@ -277,13 +277,13 @@ def test_processor(self):
             [
                 Message(
                     message="Dataset does not have data variables",
-                    node_path="dataset",
+                    node_path="dataset[0]",
                     rule_id="test/dataset-without-data-vars",
                     severity=1,
                 ),
                 Message(
                     message="Dataset does not have data variables",
-                    node_path="dataset",
+                    node_path="dataset[1]",
                     rule_id="test/dataset-without-data-vars",
                     severity=1,
                 ),
diff --git a/xrlint/_linter/apply.py b/xrlint/_linter/apply.py
index 7c0993c..0080eb8 100644
--- a/xrlint/_linter/apply.py
+++ b/xrlint/_linter/apply.py
@@ -30,7 +30,15 @@ def apply_rule(
             _visit_dataset_node(
                 rule_op,
                 context,
-                DatasetNode(parent=None, path="dataset", dataset=context.dataset),
+                DatasetNode(
+                    parent=None,
+                    path=(
+                        "dataset"
+                        if context.file_index is None
+                        else f"dataset[{context.file_index}]"
+                    ),
+                    dataset=context.dataset,
+                ),
             )
         except RuleExit:
             # This is ok, the rule requested it.
diff --git a/xrlint/_linter/rulectx.py b/xrlint/_linter/rulectx.py
index d86e8e2..db3252f 100644
--- a/xrlint/_linter/rulectx.py
+++ b/xrlint/_linter/rulectx.py
@@ -16,13 +16,16 @@ def __init__(
         config: Config,
         dataset: xr.Dataset,
         file_path: str,
+        file_index: int | None,
     ):
         assert config is not None
         assert dataset is not None
         assert file_path is not None
+        assert file_index is None or isinstance(file_index, int)
         self._config = config
         self._dataset = dataset
         self._file_path = file_path
+        self._file_index = file_index
         self.messages: list[Message] = []
         self.rule_id: str | None = None
         self.severity: Literal[1, 2] = SEVERITY_ERROR
@@ -34,7 +37,6 @@ def config(self) -> Config:
 
     @property
     def settings(self) -> dict[str, Any]:
-        assert self._config is not None
         return self._config.settings or {}
 
     @property
@@ -43,9 +45,12 @@ def dataset(self) -> xr.Dataset:
 
     @property
     def file_path(self) -> str:
-        assert self._file_path is not None
         return self._file_path
 
+    @property
+    def file_index(self) -> int | None:
+        return self._file_index
+
     def report(
         self,
         message: str,
diff --git a/xrlint/_linter/verify.py b/xrlint/_linter/verify.py
index fa95148..328bb9c 100644
--- a/xrlint/_linter/verify.py
+++ b/xrlint/_linter/verify.py
@@ -18,7 +18,7 @@ def verify_dataset(config: Config, dataset: Any, file_path: str | None):
     assert isinstance(file_path, (str, type(None)))
     if isinstance(dataset, xr.Dataset):
         file_path = file_path or _get_file_path_for_dataset(dataset)
-        messages = _verify_dataset(config, dataset, file_path)
+        messages = _verify_dataset(config, dataset, file_path, None)
     else:
         file_path = file_path or _get_file_path_for_source(dataset)
         messages = _open_and_verify_dataset(config, dataset, file_path)
@@ -29,12 +29,13 @@ def _verify_dataset(
     config: Config,
     dataset: xr.Dataset,
     file_path: str,
+    file_index: int | None,
 ) -> list[Message]:
     assert isinstance(config, Config)
     assert isinstance(dataset, xr.Dataset)
     assert isinstance(file_path, str)
 
-    context = RuleContextImpl(config, dataset, file_path)
+    context = RuleContextImpl(config, dataset, file_path, file_index)
 
     if not config.rules:
         context.report("No rules configured or applicable.", fatal=True)
@@ -61,7 +62,10 @@ def _open_and_verify_dataset(
         except (OSError, ValueError, TypeError) as e:
             return [Message(message=str(e), fatal=True, severity=2)]
         return processor_op.postprocess(
-            [_verify_dataset(config, ds, path) for ds, path in ds_path_list],
+            [
+                _verify_dataset(config, ds, path, i)
+                for i, (ds, path) in enumerate(ds_path_list)
+            ],
             file_path,
         )
     else:
@@ -70,7 +74,7 @@ def _open_and_verify_dataset(
         except (OSError, ValueError, TypeError) as e:
             return [Message(message=str(e), fatal=True, severity=2)]
         with dataset:
-            return _verify_dataset(config, dataset, file_path)
+            return _verify_dataset(config, dataset, file_path, None)
 
 
 def _open_dataset(

From a81298660d11ba64b2d733b153fab0d1f01063e0 Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Wed, 22 Jan 2025 12:52:28 +0100
Subject: [PATCH 06/17] experimenting with ml-dataset config

---
 tests/plugins/xcube/test_plugin.py           |  4 +-
 xrlint/plugins/xcube/__init__.py             | 66 +++++++++++---------
 xrlint/plugins/xcube/processors/mldataset.py | 13 ++++
 3 files changed, 52 insertions(+), 31 deletions(-)

diff --git a/tests/plugins/xcube/test_plugin.py b/tests/plugins/xcube/test_plugin.py
index e0f4654..240d074 100644
--- a/tests/plugins/xcube/test_plugin.py
+++ b/tests/plugins/xcube/test_plugin.py
@@ -32,9 +32,9 @@ def test_configs_complete(self):
         all_rule_names = set(f"xcube/{k}" for k in plugin.rules.keys())
         self.assertEqual(
             all_rule_names,
-            set(plugin.configs["all"].rules.keys()),
+            set(plugin.configs["all"].configs[-1].rules.keys()),
         )
         self.assertEqual(
             all_rule_names,
-            set(plugin.configs["recommended"].rules.keys()),
+            set(plugin.configs["recommended"].configs[-1].rules.keys()),
         )
diff --git a/xrlint/plugins/xcube/__init__.py b/xrlint/plugins/xcube/__init__.py
index 762eb86..ee30ee4 100644
--- a/xrlint/plugins/xcube/__init__.py
+++ b/xrlint/plugins/xcube/__init__.py
@@ -1,4 +1,4 @@
-from xrlint.config import Config
+from xrlint.config import Config, ConfigList
 from xrlint.plugin import Plugin
 from xrlint.util.importutil import import_submodules
 
@@ -9,42 +9,50 @@ def export_plugin() -> Plugin:
     import_submodules("xrlint.plugins.xcube.rules")
     import_submodules("xrlint.plugins.xcube.processors")
 
-    plugin.configs["recommended"] = Config.from_value(
+    common_configs = [
         {
-            "name": "xcube-recommended",
-            "ignores": ["**/*.levels"],
             "plugins": {
                 "xcube": plugin,
             },
-            "rules": {
-                "xcube/any-spatial-data-var": "error",
-                "xcube/cube-dims-order": "error",
-                "xcube/data-var-colors": "warn",
-                "xcube/grid-mapping-naming": "warn",
-                "xcube/increasing-time": "error",
-                "xcube/lat-lon-naming": "error",
-                "xcube/single-grid-mapping": "error",
-                "xcube/time-naming": "error",
-            },
-        }
-    )
-    plugin.configs["all"] = Config.from_value(
+        },
         {
-            "name": "xcube-all",
-            "ignores": ["**/*.levels"],
-            "plugins": {
-                "xcube": plugin,
-            },
-            "rules": {f"xcube/{rule_id}": "error" for rule_id in plugin.rules.keys()},
-        }
-    )
-    plugin.configs["multi-level-datasets"] = Config.from_value(
+            # Add *.levels to globally included list of file types
+            "files": ["**/*.levels"],
+        },
         {
-            "name": "xcube-multi-level-datasets",
+            # Specify a processor for *.levels files
             "files": ["**/*.levels"],
-            "ignores": ["**/*.zarr", "**/*.nc"],
             "processor": "xcube/multi-level-dataset",
-        }
+        },
+    ]
+
+    plugin.configs["recommended"] = ConfigList.from_value(
+        [
+            *common_configs,
+            {
+                "rules": {
+                    "xcube/any-spatial-data-var": "error",
+                    "xcube/cube-dims-order": "error",
+                    "xcube/data-var-colors": "warn",
+                    "xcube/grid-mapping-naming": "warn",
+                    "xcube/increasing-time": "error",
+                    "xcube/lat-lon-naming": "error",
+                    "xcube/single-grid-mapping": "error",
+                    "xcube/time-naming": "error",
+                },
+            },
+        ]
+    )
+
+    plugin.configs["all"] = ConfigList.from_value(
+        [
+            *common_configs,
+            {
+                "rules": {
+                    f"xcube/{rule_id}": "error" for rule_id in plugin.rules.keys()
+                },
+            },
+        ]
     )
 
     return plugin
diff --git a/xrlint/plugins/xcube/processors/mldataset.py b/xrlint/plugins/xcube/processors/mldataset.py
index ecaaa2b..edcb918 100644
--- a/xrlint/plugins/xcube/processors/mldataset.py
+++ b/xrlint/plugins/xcube/processors/mldataset.py
@@ -1,6 +1,8 @@
 import itertools
+import json
 from typing import Any
 
+import fsspec
 import xarray as xr
 
 from xrlint.plugins.xcube.plugin import plugin
@@ -17,6 +19,15 @@ def preprocess(
     ) -> list[tuple[xr.Dataset, str]]:
         engine = opener_options.pop("engine", "zarr")
 
+        levels_meta_path = f"{file_path}/.zlevels"
+        try:
+            with fsspec.open(levels_meta_path, mode="wt") as stream:
+                levels_meta_content = json.load(stream.read())
+        except FileNotFoundError:
+            levels_meta_path = None
+            levels_meta_content = None
+            pass
+
         level_datasets = []
         for level in itertools.count():
             level_path = f"{file_path}/{level}.zarr"
@@ -30,6 +41,8 @@ def preprocess(
 
         for level, (level_dataset, level_path) in enumerate(level_datasets):
             level_dataset.attrs["_LEVEL_INFO"] = {
+                "meta_path": levels_meta_path,
+                "meta_content": levels_meta_content,
                 "index": level,
                 "count": len(level_datasets),
                 "datasets": [ds for ds, _ in level_datasets],

From b879fa7d7d7612d85982f251b205a4648e3b732d Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Wed, 22 Jan 2025 13:04:12 +0100
Subject: [PATCH 07/17] plugin configs are always lists

---
 examples/virtual_plugin_config.py | 14 ++++++++------
 tests/cli/configs/recommended.py  |  2 +-
 tests/cli/test_config.py          | 18 +++++++++---------
 tests/test_config.py              | 13 +++++++++++--
 tests/test_plugin.py              | 14 ++++++++------
 xrlint/config.py                  | 24 ++++++++++++++----------
 xrlint/plugin.py                  | 15 ++++++++++++---
 7 files changed, 63 insertions(+), 37 deletions(-)

diff --git a/examples/virtual_plugin_config.py b/examples/virtual_plugin_config.py
index 858d878..2fe5546 100644
--- a/examples/virtual_plugin_config.py
+++ b/examples/virtual_plugin_config.py
@@ -37,12 +37,14 @@ def export_configs():
                         # Add more rules here...
                     },
                     "configs": {
-                        "recommended": {
-                            "rules": {
-                                "hello/good-title": "warn",
-                                # Configure more rules here...
-                            },
-                        },
+                        "recommended": [
+                            {
+                                "rules": {
+                                    "hello/good-title": "warn",
+                                    # Configure more rules here...
+                                },
+                            }
+                        ],
                         # Add more configurations here...
                     },
                 },
diff --git a/tests/cli/configs/recommended.py b/tests/cli/configs/recommended.py
index 23bc763..6d1b3f8 100644
--- a/tests/cli/configs/recommended.py
+++ b/tests/cli/configs/recommended.py
@@ -11,7 +11,7 @@ def export_configs():
             }
         },
         core.configs["recommended"],
-        xcube.configs["recommended"],
+        *xcube.configs["recommended"].configs,
         {
             "rules": {
                 "dataset-title-attr": "error",
diff --git a/tests/cli/test_config.py b/tests/cli/test_config.py
index ce70c6b..bd50f1d 100644
--- a/tests/cli/test_config.py
+++ b/tests/cli/test_config.py
@@ -117,13 +117,13 @@ def test_read_config_yaml_with_format_error(self):
                 read_config_list(config_path)
 
     def test_read_config_yaml_with_type_error(self):
-        with text_file("config.yaml", "prime: 97") as config_path:
+        with text_file("config.yaml", "97") as config_path:
             with pytest.raises(
                 ConfigError,
                 match=(
-                    "'config.yaml: configuration list must be of"
-                    " type ConfigList|list\\[Config|dict|str\\],"
-                    " but got dict'"
+                    r"config\.yaml\: config_list must be of"
+                    r" type ConfigList \| list\[Config \| dict \| str\],"
+                    r" but got int"
                 ),
             ):
                 read_config_list(config_path)
@@ -171,10 +171,10 @@ def test_read_config_py_with_invalid_config_list(self):
             with pytest.raises(
                 ConfigError,
                 match=(
-                    ".py: return value of export_configs\\(\\):"
-                    " configuration list must be of type"
-                    " ConfigList|list\\[Config|dict|str\\],"
-                    " but got int"
+                    r"\.py: return value of export_configs\(\):"
+                    r" config_list must be of type"
+                    r" ConfigList \| list\[Config\ | dict \| str\],"
+                    r" but got int"
                 ),
             ):
                 read_config_list(config_path)
@@ -198,7 +198,7 @@ def test_read_config_yaml(self):
 
     def assert_ok(self, config_list: ConfigList):
         self.assertIsInstance(config_list, ConfigList)
-        self.assertEqual(4, len(config_list.configs))
+        self.assertEqual(7, len(config_list.configs))
         config = config_list.compute_config("test.zarr")
         self.assertIsInstance(config, Config)
         self.assertEqual("<computed>", config.name)
diff --git a/tests/test_config.py b/tests/test_config.py
index dcdfd5b..6ac9575 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -140,16 +140,25 @@ def test_from_value_ok(self):
         self.assertIsInstance(config_list, ConfigList)
         self.assertEqual([Config()], config_list.configs)
 
+        config_list = ConfigList.from_value({})
+        self.assertIsInstance(config_list, ConfigList)
+        self.assertEqual([Config()], config_list.configs)
+
+        config = Config.from_value({})
+        config_list = ConfigList.from_value(config)
+        self.assertIsInstance(config_list, ConfigList)
+        self.assertIs(config, config_list.configs[0])
+
     # noinspection PyMethodMayBeStatic
     def test_from_value_fail(self):
         with pytest.raises(
             TypeError,
             match=(
                 r"config_list must be of type"
-                r" ConfigList \| list\[Config \| dict\], but got dict"
+                r" ConfigList \| list\[Config \| dict \| str\], but got int"
             ),
         ):
-            ConfigList.from_value({})
+            ConfigList.from_value(264)
 
     def test_compute_config(self):
         config_list = ConfigList([Config()])
diff --git a/tests/test_plugin.py b/tests/test_plugin.py
index 3c3bafb..e909050 100644
--- a/tests/test_plugin.py
+++ b/tests/test_plugin.py
@@ -38,12 +38,14 @@ class MyRule2(RuleOp):
                     "r2": MyRule2,
                 },
                 "configs": {
-                    "recommended": {
-                        "rules": {
-                            "hello/r1": "warn",
-                            "hello/r2": "error",
-                        },
-                    },
+                    "recommended": [
+                        {
+                            "rules": {
+                                "hello/r1": "warn",
+                                "hello/r2": "error",
+                            },
+                        }
+                    ],
                 },
             }
         )
diff --git a/xrlint/config.py b/xrlint/config.py
index 6d26d72..75d66b4 100644
--- a/xrlint/config.py
+++ b/xrlint/config.py
@@ -359,12 +359,14 @@ def from_value(cls, value: Any, value_name: str | None = None) -> "ConfigList":
         Args:
             value: A `ConfigList` object or `list` of items which can be
                 converted into `Config` objects including configuration
-                names of tyype `str`. The latter are resolved against
+                names of type `str`. The latter are resolved against
                 the plugin configurations seen so far in the list.
             value_name: A value's name.
         Returns:
             A `ConfigList` object.
         """
+        if isinstance(value, (Config, dict)):
+            return ConfigList(configs=[Config.from_value(value)])
         return super().from_value(value, value_name=value_name)
 
     @classmethod
@@ -375,12 +377,13 @@ def _from_sequence(cls, value: Sequence, value_name: str) -> "ConfigList":
             if isinstance(item, str):
                 if CORE_PLUGIN_NAME not in plugins:
                     plugins.update({CORE_PLUGIN_NAME: get_core_plugin()})
-                config = cls._get_named_config(item, plugins)
+                new_configs = cls._get_named_config_list(item, plugins)
             else:
-                config = Config.from_value(item)
-            configs.append(config)
-            plugins.update(config.plugins if config.plugins else {})
-        return ConfigList(configs)
+                new_configs = [Config.from_value(item)]
+            for config in new_configs:
+                configs.append(config)
+                plugins.update(config.plugins if config.plugins else {})
+        return ConfigList(configs=configs)
 
     @classmethod
     def value_name(cls) -> str:
@@ -388,10 +391,12 @@ def value_name(cls) -> str:
 
     @classmethod
     def value_type_name(cls) -> str:
-        return "ConfigList | list[Config | dict]"
+        return "ConfigList | list[Config | dict | str]"
 
     @classmethod
-    def _get_named_config(cls, config_spec: str, plugins: dict[str, "Plugin"]):
+    def _get_named_config_list(
+        cls, config_spec: str, plugins: dict[str, "Plugin"]
+    ) -> list[Config]:
         plugin_name, config_name = (
             config_spec.split("/", maxsplit=1)
             if "/" in config_spec
@@ -400,5 +405,4 @@ def _get_named_config(cls, config_spec: str, plugins: dict[str, "Plugin"]):
         plugin: Plugin | None = plugins.get(plugin_name)
         if plugin is None or not plugin.configs or config_name not in plugin.configs:
             raise ValueError(f"configuration {config_spec!r} not found")
-        config_value = plugin.configs[config_name]
-        return config_value
+        return ConfigList.from_value(plugin.configs[config_name]).configs
diff --git a/xrlint/plugin.py b/xrlint/plugin.py
index 1900d79..12bc0a7 100644
--- a/xrlint/plugin.py
+++ b/xrlint/plugin.py
@@ -1,7 +1,7 @@
 from dataclasses import dataclass, field
 from typing import Any, Callable, Literal, Type
 
-from xrlint.config import Config
+from xrlint.config import Config, ConfigList
 from xrlint.processor import Processor, ProcessorOp, define_processor
 from xrlint.rule import Rule, RuleOp, define_rule
 from xrlint.util.constructible import MappingConstructible
@@ -50,8 +50,8 @@ class Plugin(MappingConstructible, JsonSerializable):
     """A dictionary containing named processors.
     """
 
-    configs: dict[str, Config] = field(default_factory=dict)
-    """A dictionary containing named configurations."""
+    configs: dict[str, list[Config]] = field(default_factory=dict)
+    """A dictionary containing named configuration lists."""
 
     def define_rule(
         self,
@@ -99,6 +99,15 @@ def define_processor(
             registry=self.processors,
         )
 
+    def define_config(
+        self,
+        name: str,
+        config_value: list[Config | dict[str, Any]] | Config | dict[str, Any],
+    ) -> list[Config]:
+        config_list = ConfigList.from_value(config_value)
+        self.configs[name] = config_list.configs
+        return config_list.configs
+
     @classmethod
     def _from_str(cls, value: str, value_name: str) -> "Plugin":
         plugin, plugin_ref = import_value(

From e6f36a260c994c70bbab293009cdea55f512cbdb Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Wed, 22 Jan 2025 16:38:59 +0100
Subject: [PATCH 08/17] plugin configs are always lists; included xcube
 processor in named config

---
 CHANGES.md                                   | 17 ++++++++++
 docs/config.md                               |  2 +-
 docs/todo.md                                 |  4 ---
 examples/plugin_config.py                    | 35 ++++++++++----------
 examples/virtual_plugin_config.py            |  2 +-
 tests/cli/configs/recommended.py             |  6 ++--
 tests/cli/test_config.py                     | 14 ++++----
 tests/cli/test_main.py                       |  1 -
 tests/plugins/core/test_plugin.py            |  4 +--
 tests/plugins/xcube/test_plugin.py           |  4 +--
 tests/test_config.py                         |  8 ++---
 tests/test_examples.py                       |  4 +--
 xrlint/cli/config.py                         |  2 +-
 xrlint/config.py                             |  8 ++---
 xrlint/plugin.py                             | 20 ++++++++---
 xrlint/plugins/core/__init__.py              | 12 ++++---
 xrlint/plugins/xcube/__init__.py             | 11 +++---
 xrlint/plugins/xcube/processors/mldataset.py |  5 ++-
 xrlint/util/importutil.py                    |  2 +-
 19 files changed, 92 insertions(+), 69 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index f8cc5bc..f166403 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2,6 +2,23 @@
 
 ## Version 0.4.0 (in development)
 
+- Now supporting xcube multi-level datasets `*.levels`:
+  - Added xcube plugin processor `"xcube/multi-level-dataset"` that is used
+    inside the predefined xcube configurations "all" and "recommended".
+
+- Changed type of `Plugin.configs` from `dict[str, Config]` to 
+  `dict[str, list[Config]]`.
+- Introduced method `Plugin.define_config` which defines a named plugin
+  configuration. It takes a name and a configuration object or list of 
+  configuration objects.
+- Changed they way how configuration is defined and exported from
+  Python configuration files:
+  - Renamed function that exports configuration from `export_configs` 
+    into `export_config`.
+  - The returned value should be a list of values that can be 
+    converted into configuration objects: mixed `Config` instances,
+    dictionary, or a name that refers to a named configuration of a plugin.
+
 - Internal changes:
   - Inbuilt plugin now get their `plugin` instance from
     `xrlint.plugins.<plugin>.plugin` module.
diff --git a/docs/config.md b/docs/config.md
index c3819fb..3061703 100644
--- a/docs/config.md
+++ b/docs/config.md
@@ -43,7 +43,7 @@ Same using JSON:
 And as Python script:
 
 ```python
-def export_configs():
+def export_config():
     return [
       {"files": ["**/*.zarr", "**/*.nc"]},
       {
diff --git a/docs/todo.md b/docs/todo.md
index b6e1afa..8931dfd 100644
--- a/docs/todo.md
+++ b/docs/todo.md
@@ -14,10 +14,6 @@
 ## Desired
  
 - project logo
-- support validating xcube 'levels' format. Options:
-    - implement xarray backend so we can open them using `xr.open_dataset`
-      with `opener_options: {"engine": "xc-levels"}`.
-    - implement a `xrlint.processor.Processor` for that purpose.
 - add some more tests so we reach 99% coverage
 - support rule op args/kwargs schema validation
 - Support `RuleTest.expected`, it is currently unused
diff --git a/examples/plugin_config.py b/examples/plugin_config.py
index b08139e..e98e8b1 100644
--- a/examples/plugin_config.py
+++ b/examples/plugin_config.py
@@ -3,27 +3,11 @@
 using the `Plugin` class and its `define_rule()` decorator method.
 """
 
-from xrlint.config import Config
 from xrlint.node import DatasetNode
 from xrlint.plugin import new_plugin
 from xrlint.rule import RuleContext, RuleOp
 
-plugin = new_plugin(
-    name="hello-plugin",
-    version="1.0.0",
-    configs={
-        # "configs" entries must be `Config` objects!
-        "recommended": Config.from_value(
-            {
-                "rules": {
-                    "hello/good-title": "warn",
-                    # Configure more rules here...
-                },
-            }
-        ),
-        # Add more configurations here...
-    },
-)
+plugin = new_plugin(name="hello-plugin", version="1.0.0")
 
 
 @plugin.define_rule("good-title")
@@ -42,7 +26,22 @@ def dataset(self, ctx: RuleContext, node: DatasetNode):
 # Define more rules here...
 
 
-def export_configs():
+plugin.define_config(
+    "recommended",
+    [
+        {
+            "rules": {
+                "hello/good-title": "warn",
+                # Configure more rules here...
+            },
+        }
+    ],
+)
+
+# Add more configurations here...
+
+
+def export_config():
     return [
         # Use "hello" plugin
         {
diff --git a/examples/virtual_plugin_config.py b/examples/virtual_plugin_config.py
index 2fe5546..da9a3a8 100644
--- a/examples/virtual_plugin_config.py
+++ b/examples/virtual_plugin_config.py
@@ -22,7 +22,7 @@ def dataset(self, ctx: RuleContext, node: DatasetNode):
 # Define more rules here...
 
 
-def export_configs():
+def export_config():
     return [
         # Define and use "hello" plugin
         {
diff --git a/tests/cli/configs/recommended.py b/tests/cli/configs/recommended.py
index 6d1b3f8..f998c12 100644
--- a/tests/cli/configs/recommended.py
+++ b/tests/cli/configs/recommended.py
@@ -1,4 +1,4 @@
-def export_configs():
+def export_config():
     import xrlint.plugins.core
     import xrlint.plugins.xcube
 
@@ -10,8 +10,8 @@ def export_configs():
                 "xcube": xcube,
             }
         },
-        core.configs["recommended"],
-        *xcube.configs["recommended"].configs,
+        *core.configs["recommended"],
+        *xcube.configs["recommended"],
         {
             "rules": {
                 "dataset-title-attr": "error",
diff --git a/tests/cli/test_config.py b/tests/cli/test_config.py
index bd50f1d..6bd49a5 100644
--- a/tests/cli/test_config.py
+++ b/tests/cli/test_config.py
@@ -34,7 +34,7 @@
 """
 
 py_text = """
-def export_configs():
+def export_config():
     return [
         {
             "name": "py-test",
@@ -141,14 +141,14 @@ def test_read_config_py_no_export(self):
             with pytest.raises(
                 ConfigError,
                 match=(
-                    "config_1002.py: attribute 'export_configs'"
+                    "config_1002.py: attribute 'export_config'"
                     " not found in module 'config_1002'"
                 ),
             ):
                 read_config_list(config_path)
 
     def test_read_config_py_with_value_error(self):
-        py_code = "def export_configs():\n    raise ValueError('value is useless!')\n"
+        py_code = "def export_config():\n    raise ValueError('value is useless!')\n"
         with text_file(self.new_config_py(), py_code) as config_path:
             with pytest.raises(
                 ValueError,
@@ -157,7 +157,7 @@ def test_read_config_py_with_value_error(self):
                 read_config_list(config_path)
 
     def test_read_config_py_with_os_error(self):
-        py_code = "def export_configs():\n    raise OSError('where is my hat?')\n"
+        py_code = "def export_config():\n    raise OSError('where is my hat?')\n"
         with text_file(self.new_config_py(), py_code) as config_path:
             with pytest.raises(
                 ConfigError,
@@ -166,12 +166,12 @@ def test_read_config_py_with_os_error(self):
                 read_config_list(config_path)
 
     def test_read_config_py_with_invalid_config_list(self):
-        py_code = "def export_configs():\n    return 42\n"
+        py_code = "def export_config():\n    return 42\n"
         with text_file(self.new_config_py(), py_code) as config_path:
             with pytest.raises(
                 ConfigError,
                 match=(
-                    r"\.py: return value of export_configs\(\):"
+                    r"\.py: return value of export_config\(\):"
                     r" config_list must be of type"
                     r" ConfigList \| list\[Config\ | dict \| str\],"
                     r" but got int"
@@ -201,7 +201,7 @@ def assert_ok(self, config_list: ConfigList):
         self.assertEqual(7, len(config_list.configs))
         config = config_list.compute_config("test.zarr")
         self.assertIsInstance(config, Config)
-        self.assertEqual("<computed>", config.name)
+        self.assertEqual(None, config.name)
         self.assertIsInstance(config.plugins, dict)
         self.assertEqual({"xcube"}, set(config.plugins.keys()))
         self.assertIsInstance(config.rules, dict)
diff --git a/tests/cli/test_main.py b/tests/cli/test_main.py
index bf0fb53..5aab920 100644
--- a/tests/cli/test_main.py
+++ b/tests/cli/test_main.py
@@ -191,7 +191,6 @@ def test_print_config_option(self):
             self.assertEqual(
                 (
                     "{\n"
-                    '  "name": "<computed>",\n'
                     '  "plugins": {\n'
                     '    "__core__": "xrlint.plugins.core:export_plugin"\n'
                     "  },\n"
diff --git a/tests/plugins/core/test_plugin.py b/tests/plugins/core/test_plugin.py
index 0c7baed..057ef4d 100644
--- a/tests/plugins/core/test_plugin.py
+++ b/tests/plugins/core/test_plugin.py
@@ -34,9 +34,9 @@ def test_configs_complete(self):
         all_rule_names = set(plugin.rules.keys())
         self.assertEqual(
             all_rule_names,
-            set(plugin.configs["all"].rules.keys()),
+            set(plugin.configs["all"][-1].rules.keys()),
         )
         self.assertEqual(
             all_rule_names,
-            set(plugin.configs["recommended"].rules.keys()),
+            set(plugin.configs["recommended"][-1].rules.keys()),
         )
diff --git a/tests/plugins/xcube/test_plugin.py b/tests/plugins/xcube/test_plugin.py
index 240d074..4f4deaa 100644
--- a/tests/plugins/xcube/test_plugin.py
+++ b/tests/plugins/xcube/test_plugin.py
@@ -32,9 +32,9 @@ def test_configs_complete(self):
         all_rule_names = set(f"xcube/{k}" for k in plugin.rules.keys())
         self.assertEqual(
             all_rule_names,
-            set(plugin.configs["all"].configs[-1].rules.keys()),
+            set(plugin.configs["all"][-1].rules.keys()),
         )
         self.assertEqual(
             all_rule_names,
-            set(plugin.configs["recommended"].configs[-1].rules.keys()),
+            set(plugin.configs["recommended"][-1].rules.keys()),
         )
diff --git a/tests/test_config.py b/tests/test_config.py
index 6ac9575..37b678a 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -163,9 +163,7 @@ def test_from_value_fail(self):
     def test_compute_config(self):
         config_list = ConfigList([Config()])
         file_path = "s3://wq-services/datacubes/chl-2.zarr"
-        self.assertEqual(
-            Config(name="<computed>"), config_list.compute_config(file_path)
-        )
+        self.assertEqual(Config(), config_list.compute_config(file_path))
 
         config_list = ConfigList(
             [
@@ -176,14 +174,14 @@ def test_compute_config(self):
         )
         file_path = "s3://wq-services/datacubes/chl-2.zarr"
         self.assertEqual(
-            Config(name="<computed>", settings={"a": 1, "b": 2}),
+            Config(settings={"a": 1, "b": 2}),
             config_list.compute_config(file_path),
         )
 
         # global ignores
         file_path = "s3://wq-services/datacubes/chl-2.txt"
         self.assertEqual(
-            Config(name="<computed>", settings={"a": 2, "b": 1}),
+            Config(settings={"a": 2, "b": 1}),
             config_list.compute_config(file_path),
         )
 
diff --git a/tests/test_examples.py b/tests/test_examples.py
index bf1650c..feb684e 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -9,7 +9,7 @@
 class ExamplesTest(TestCase):
     def test_plugin_config(self):
         config_list, _ = import_value(
-            "examples.plugin_config", "export_configs", factory=ConfigList.from_value
+            "examples.plugin_config", "export_config", factory=ConfigList.from_value
         )
         self.assertIsInstance(config_list, ConfigList)
         self.assertEqual(3, len(config_list.configs))
@@ -17,7 +17,7 @@ def test_plugin_config(self):
     def test_virtual_plugin_config(self):
         config_list, _ = import_value(
             "examples.virtual_plugin_config",
-            "export_configs",
+            "export_config",
             factory=ConfigList.from_value,
         )
         self.assertIsInstance(config_list, ConfigList)
diff --git a/xrlint/cli/config.py b/xrlint/cli/config.py
index 25c2aaf..f9cf0b5 100644
--- a/xrlint/cli/config.py
+++ b/xrlint/cli/config.py
@@ -87,7 +87,7 @@ def _read_config_python(config_path: str) -> Any:
     try:
         return import_value(
             module_name,
-            "export_configs",
+            "export_config",
             factory=ConfigList.from_value,
         )[0]
     except ValueImportError as e:
diff --git a/xrlint/config.py b/xrlint/config.py
index 75d66b4..0a45318 100644
--- a/xrlint/config.py
+++ b/xrlint/config.py
@@ -45,10 +45,11 @@ def get_core_config(
             CORE_PLUGIN_NAME: core_plugin,
         },
     )
-    if config_name:
-        return config.merge(core_plugin.configs[config_name])
-    else:
+    if config_name is None:
         return config
+    config_list = core_plugin.configs[config_name]
+    assert len(config_list) == 1
+    return config.merge(config_list[0])
 
 
 def split_config_spec(config_spec: str) -> tuple[str, str]:
@@ -341,7 +342,6 @@ def compute_config(self, file_path: str) -> Config | None:
 
         # Note, computed configurations do not have "files" and "ignores"
         return Config(
-            name="<computed>",
             linter_options=config.linter_options,
             opener_options=config.opener_options,
             processor=config.processor,
diff --git a/xrlint/plugin.py b/xrlint/plugin.py
index 12bc0a7..a7cab18 100644
--- a/xrlint/plugin.py
+++ b/xrlint/plugin.py
@@ -102,11 +102,23 @@ def define_processor(
     def define_config(
         self,
         name: str,
-        config_value: list[Config | dict[str, Any]] | Config | dict[str, Any],
+        value: list[Config | dict[str, Any]] | Config | dict[str, Any],
     ) -> list[Config]:
-        config_list = ConfigList.from_value(config_value)
-        self.configs[name] = config_list.configs
-        return config_list.configs
+        """Define a named configuration.
+
+        Args:
+            name: The name of the configuration.
+            value: The configuration-like object or list.
+                A configuration-like object is either a
+                [Config][xrlint.config.Config] or a `dict` that
+                represents a configuration.
+
+        Returns:
+            A list of `Config` objects.
+        """
+        configs = ConfigList.from_value(value).configs
+        self.configs[name] = configs
+        return configs
 
     @classmethod
     def _from_str(cls, value: str, value_name: str) -> "Plugin":
diff --git a/xrlint/plugins/core/__init__.py b/xrlint/plugins/core/__init__.py
index 1dc64ed..5a69302 100644
--- a/xrlint/plugins/core/__init__.py
+++ b/xrlint/plugins/core/__init__.py
@@ -1,4 +1,3 @@
-from xrlint.config import Config
 from xrlint.plugin import Plugin
 from xrlint.util.importutil import import_submodules
 
@@ -8,7 +7,8 @@ def export_plugin() -> Plugin:
 
     import_submodules("xrlint.plugins.core.rules")
 
-    plugin.configs["recommended"] = Config.from_value(
+    plugin.define_config(
+        "recommended",
         {
             "name": "recommended",
             "rules": {
@@ -23,13 +23,15 @@ def export_plugin() -> Plugin:
                 "time-coordinate": "error",
                 "var-units-attr": "warn",
             },
-        }
+        },
     )
-    plugin.configs["all"] = Config.from_value(
+
+    plugin.define_config(
+        "all",
         {
             "name": "all",
             "rules": {rule_id: "error" for rule_id in plugin.rules.keys()},
-        }
+        },
     )
 
     return plugin
diff --git a/xrlint/plugins/xcube/__init__.py b/xrlint/plugins/xcube/__init__.py
index ee30ee4..dcfc4d9 100644
--- a/xrlint/plugins/xcube/__init__.py
+++ b/xrlint/plugins/xcube/__init__.py
@@ -1,4 +1,3 @@
-from xrlint.config import Config, ConfigList
 from xrlint.plugin import Plugin
 from xrlint.util.importutil import import_submodules
 
@@ -26,7 +25,8 @@ def export_plugin() -> Plugin:
         },
     ]
 
-    plugin.configs["recommended"] = ConfigList.from_value(
+    plugin.define_config(
+        "recommended",
         [
             *common_configs,
             {
@@ -41,10 +41,11 @@ def export_plugin() -> Plugin:
                     "xcube/time-naming": "error",
                 },
             },
-        ]
+        ],
     )
 
-    plugin.configs["all"] = ConfigList.from_value(
+    plugin.define_config(
+        "all",
         [
             *common_configs,
             {
@@ -52,7 +53,7 @@ def export_plugin() -> Plugin:
                     f"xcube/{rule_id}": "error" for rule_id in plugin.rules.keys()
                 },
             },
-        ]
+        ],
     )
 
     return plugin
diff --git a/xrlint/plugins/xcube/processors/mldataset.py b/xrlint/plugins/xcube/processors/mldataset.py
index edcb918..3943101 100644
--- a/xrlint/plugins/xcube/processors/mldataset.py
+++ b/xrlint/plugins/xcube/processors/mldataset.py
@@ -21,12 +21,11 @@ def preprocess(
 
         levels_meta_path = f"{file_path}/.zlevels"
         try:
-            with fsspec.open(levels_meta_path, mode="wt") as stream:
-                levels_meta_content = json.load(stream.read())
+            with fsspec.open(levels_meta_path, mode="rt", encoding="utf-8") as stream:
+                levels_meta_content = json.load(stream)
         except FileNotFoundError:
             levels_meta_path = None
             levels_meta_content = None
-            pass
 
         level_datasets = []
         for level in itertools.count():
diff --git a/xrlint/util/importutil.py b/xrlint/util/importutil.py
index 7430ab0..48ccd39 100644
--- a/xrlint/util/importutil.py
+++ b/xrlint/util/importutil.py
@@ -57,7 +57,7 @@ def import_value(
             If it is not given, the module itself will be the exported value.
         attr_ref: Attribute reference. Should be given in the case where
             `module_ref` does not contain an attribute reference.
-            Example values are "export_plugin", "export_configs".
+            Example values are "export_plugin", "export_config".
         constant: If `True` the value is expected to be a constant.
             If `False`, the default, the referenced attribute can
             be a no-arg callable that yields the actual exported value.

From bf734837e9b18c883f4359c54769d5bb6158864c Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Thu, 23 Jan 2025 07:48:23 +0100
Subject: [PATCH 09/17] `JsonSerializable` now recognizes `dataclass`
 instances; no longer traverse  dirs that have config

---
 CHANGES.md                  | 23 +++++----
 tests/test_config.py        |  8 +--
 tests/test_operation.py     | 11 ++---
 tests/test_rule.py          | 12 ++---
 tests/util/test_codec.py    | 56 ++-------------------
 xrlint/cli/engine.py        | 97 +++++++++++++++++--------------------
 xrlint/config.py            | 15 +++---
 xrlint/operation.py         |  7 ---
 xrlint/util/serializable.py | 16 ++++--
 9 files changed, 94 insertions(+), 151 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index f166403..624d274 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -5,25 +5,30 @@
 - Now supporting xcube multi-level datasets `*.levels`:
   - Added xcube plugin processor `"xcube/multi-level-dataset"` that is used
     inside the predefined xcube configurations "all" and "recommended".
-
-- Changed type of `Plugin.configs` from `dict[str, Config]` to 
-  `dict[str, list[Config]]`.
+- Directories that are recognized by file patterns associated with a non-empty 
+  configuration object are no longer recursively
+  traversed.
 - Introduced method `Plugin.define_config` which defines a named plugin
   configuration. It takes a name and a configuration object or list of 
   configuration objects.
-- Changed they way how configuration is defined and exported from
+- Changed the way how configuration is defined and exported from
   Python configuration files:
   - Renamed function that exports configuration from `export_configs` 
     into `export_config`.
   - The returned value should be a list of values that can be 
     converted into configuration objects: mixed `Config` instances,
     dictionary, or a name that refers to a named configuration of a plugin.
-
-- Internal changes:
-  - Inbuilt plugin now get their `plugin` instance from
+- Node path names now contain the dataset index if a file path 
+  has been opened by a processor produced multiple 
+  datasets to validate.
+
+- Other changes:
+  - Changed type of `Plugin.configs` from `dict[str, Config]` to 
+    `dict[str, list[Config]]`.
+  - Inbuilt plugin rules now import their `plugin` instance from
     `xrlint.plugins.<plugin>.plugin` module.
-  - Node paths now contain the split index if dataset 
-    has been opened by a processor.  
+  - `JsonSerializable` now recognizes `dataclass` instances and no longer
+    serializes property values that are also default values.
 
 ## Version 0.3.0 (from 2025-01-20)
 
diff --git a/tests/test_config.py b/tests/test_config.py
index 37b678a..8fab8c3 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -185,7 +185,7 @@ def test_compute_config(self):
             config_list.compute_config(file_path),
         )
 
-    def test_get_global_filter(self):
+    def test_split_global_filter(self):
         config_list = ConfigList(
             [
                 Config(files=["**/*.hdf"]),  # global file
@@ -196,16 +196,18 @@ def test_get_global_filter(self):
             ]
         )
 
-        file_filter = config_list.get_global_filter()
+        new_config_list, file_filter = config_list.split_global_filter()
         self.assertEqual(
             FileFilter.from_patterns(["**/*.hdf"], ["**/chl-?.txt"]),
             file_filter,
         )
+        self.assertEqual(3, len(new_config_list.configs))
 
-        file_filter = config_list.get_global_filter(
+        new_config_list, file_filter = config_list.split_global_filter(
             default=FileFilter.from_patterns(["**/*.h5"], None)
         )
         self.assertEqual(
             FileFilter.from_patterns(["**/*.h5", "**/*.hdf"], ["**/chl-?.txt"]),
             file_filter,
         )
+        self.assertEqual(3, len(new_config_list.configs))
diff --git a/tests/test_operation.py b/tests/test_operation.py
index f0abc79..43c5fc5 100644
--- a/tests/test_operation.py
+++ b/tests/test_operation.py
@@ -146,14 +146,9 @@ class MyThingOp3(ThingOp):
         )
         self.assertEqual(
             {
-                "meta": {
-                    "name": "t3",
-                    "version": "0.0.0",
-                    "description": "What a thing.",
-                },
-                "op_class": "<class"
-                " 'tests.test_operation.OperationTest"
-                ".test_to_json.<locals>.MyThingOp3'>",
+                "meta": {"description": "What a thing.", "name": "t3"},
+                "op_class": "<class "
+                "'tests.test_operation.OperationTest.test_to_json.<locals>.MyThingOp3'>",
             },
             rule.to_json(),
         )
diff --git a/tests/test_rule.py b/tests/test_rule.py
index fc20fee..e794f6c 100644
--- a/tests/test_rule.py
+++ b/tests/test_rule.py
@@ -64,9 +64,8 @@ class MyRule3(RuleOp):
         rule = Rule(meta=RuleMeta(name="r3"), op_class=MyRule3)
         self.assertEqual(
             {
-                "meta": {"name": "r3", "version": "0.0.0", "type": "problem"},
-                "op_class": "<class"
-                " 'tests.test_rule.RuleTest.test_to_json.<locals>.MyRule3'>",
+                "meta": {"name": "r3"},
+                "op_class": "<class 'tests.test_rule.RuleTest.test_to_json.<locals>.MyRule3'>",
             },
             rule.to_json(),
         )
@@ -95,12 +94,7 @@ def test_from_value(self):
     def test_to_json(self):
         rule_meta = RuleMeta(name="r1", version="0.1.2", description="Nice one.")
         self.assertEqual(
-            {
-                "name": "r1",
-                "version": "0.1.2",
-                "type": "problem",
-                "description": "Nice one.",
-            },
+            {"description": "Nice one.", "name": "r1", "version": "0.1.2"},
             rule_meta.to_json(),
         )
 
diff --git a/tests/util/test_codec.py b/tests/util/test_codec.py
index a0cdec1..6761723 100644
--- a/tests/util/test_codec.py
+++ b/tests/util/test_codec.py
@@ -124,15 +124,7 @@ def test_assumptions(self):
 class JsonSerializableTest(TestCase):
     def test_simple_ok(self):
         self.assertEqual(
-            {
-                "a": None,
-                "b": False,
-                "c": 0,
-                "d": 0.0,
-                "e": "abc",
-                "f": "<class 'int'>",
-                "g": 0,
-            },
+            {},
             SimpleTypesContainer().to_json(),
         )
         self.assertEqual(
@@ -162,58 +154,18 @@ def test_complex_ok(self):
         )
         self.assertEqual(
             {
-                "p": {
-                    "a": None,
-                    "b": False,
-                    "c": 0,
-                    "d": 0.0,
-                    "e": "abc",
-                    "f": "<class 'int'>",
-                    "g": 0,
-                },
+                "p": {},
                 "q": {"p": True, "q": False},
-                "r": {
-                    "u": {
-                        "a": None,
-                        "b": False,
-                        "c": 0,
-                        "d": 0.0,
-                        "e": "abc",
-                        "f": "<class 'int'>",
-                        "g": 0,
-                    },
-                    "v": {
-                        "a": None,
-                        "b": False,
-                        "c": 0,
-                        "d": 0.0,
-                        "e": "abc",
-                        "f": "<class 'int'>",
-                        "g": 0,
-                    },
-                },
+                "r": {"u": {}, "v": {}},
                 "s": [1, 2, 3],
                 "t": [
+                    {"c": 5, "d": 6.7},
                     {
-                        "a": None,
-                        "b": False,
-                        "c": 5,
-                        "d": 6.7,
-                        "e": "abc",
-                        "f": "<class 'int'>",
-                        "g": 0,
-                    },
-                    {
-                        "a": None,
-                        "b": False,
                         "c": 8,
                         "d": 9.1,
-                        "e": "abc",
                         "f": "<class 'tests.util.test_codec.SimpleTypesContainer'>",
-                        "g": 0,
                     },
                 ],
-                "u": None,
             },
             container.to_json(),
         )
diff --git a/xrlint/cli/engine.py b/xrlint/cli/engine.py
index 71d44bd..f8eccf9 100644
--- a/xrlint/cli/engine.py
+++ b/xrlint/cli/engine.py
@@ -21,7 +21,7 @@
 from xrlint.formatters import export_formatters
 from xrlint.linter import Linter
 from xrlint.plugin import Plugin
-from xrlint.result import Message, Result, ResultStats
+from xrlint.result import Result, ResultStats
 from xrlint.util.filefilter import FileFilter
 
 DEFAULT_GLOBAL_FILTER = FileFilter.from_patterns(
@@ -147,25 +147,53 @@ def verify_datasets(self, files: Iterable[str]) -> Iterator[Result]:
         Returns:
             Iterator of reports.
         """
-        global_filter = self.config_list.get_global_filter(
+        linter = Linter()
+        for file_path, config in self.get_files(files):
+            yield linter.verify_dataset(file_path, config=config)
+
+    def get_files(self, file_paths: Iterable[str]) -> Iterator[tuple[str, Config]]:
+        """Provide an iterator for the list of files or directories.
+
+        Directories in `files` that are not filtered out will be
+        recursively traversed.
+
+        Args:
+            file_paths: Iterable of files or directory.
+
+        Returns:
+            An iterator of filtered files or directories.
+        """
+        config_list, global_filter = self.config_list.split_global_filter(
             default=DEFAULT_GLOBAL_FILTER
         )
-        linter = Linter()
-        for file_path, is_dir in get_files(files, global_filter):
-            config = self.get_config_for_file(file_path)
+
+        def compute_config(p: str):
+            return config_list.compute_config(p) if global_filter.accept(p) else None
+
+        for file_path in file_paths:
+            _fs, root = fsspec.url_to_fs(file_path)
+            fs: fsspec.AbstractFileSystem = _fs
+
+            config = compute_config(file_path)
             if config is not None:
-                yield linter.verify_dataset(file_path, config=config)
-            else:
-                yield Result.new(
-                    config=config,
-                    file_path=file_path,
-                    messages=[
-                        Message(
-                            message="No configuration matches this file.",
-                            severity=2,
-                        )
-                    ],
-                )
+                yield file_path, config
+                continue
+
+            if fs.isdir(root):
+                for path, dirs, files in fs.walk(root, topdown=True):
+
+                    for d in list(dirs):
+                        d_path = f"{path}/{d}"
+                        c = compute_config(d_path)
+                        if c is not None:
+                            dirs.remove(d)
+                            yield d_path, c
+
+                    for f in files:
+                        f_path = f"{path}/{f}"
+                        c = compute_config(f_path)
+                        if c is not None:
+                            yield f_path, c
 
     def format_results(self, results: Iterable[Result]) -> str:
         """Format the given results.
@@ -220,38 +248,3 @@ def init_config_file(cls) -> None:
         with open(file_path, "w") as f:
             f.write(INIT_CONFIG_YAML)
         click.echo(f"Configuration template written to {file_path}")
-
-
-def get_files(
-    file_paths: Iterable[str], global_filter: FileFilter
-) -> Iterator[tuple[str, bool | None]]:
-    """Provide an iterator for the list of files or directories.
-
-    Directories in `files` that are not filtered out will be
-    recursively traversed.
-
-    Args:
-        file_paths: Iterable of files or directory.
-        global_filter: A file filter that includes files that
-            covered by global file patterns and not excluded
-            by global ignore patterns.
-
-    Returns:
-        An iterator of filtered files or directories.
-    """
-    for file_path in file_paths:
-        _fs, root = fsspec.url_to_fs(file_path)
-        fs: fsspec.AbstractFileSystem = _fs
-        _dir = fs.isdir(root)
-        if global_filter.accept(file_path):
-            yield file_path, _dir
-        elif _dir:
-            for path, dirs, files in fs.walk(root):
-                for d in dirs:
-                    d_path = f"{path}/{d}"
-                    if global_filter.accept(d_path):
-                        yield d_path, True
-                for f in files:
-                    f_path = f"{path}/{f}"
-                    if global_filter.accept(f_path):
-                        yield f_path, False
diff --git a/xrlint/config.py b/xrlint/config.py
index 0a45318..ee294d4 100644
--- a/xrlint/config.py
+++ b/xrlint/config.py
@@ -6,7 +6,7 @@
 from xrlint.util.constructible import MappingConstructible, ValueConstructible
 from xrlint.util.filefilter import FileFilter
 from xrlint.util.merge import merge_arrays, merge_dicts, merge_set_lists, merge_values
-from xrlint.util.serializable import JsonSerializable, JsonValue
+from xrlint.util.serializable import JsonSerializable
 
 if TYPE_CHECKING:  # pragma: no cover
     # make IDEs and flake8 happy
@@ -155,6 +155,7 @@ def empty(self) -> bool:
         return not (
             self.linter_options
             or self.opener_options
+            or self.processor
             or self.plugins
             or self.rules
             or self.settings
@@ -293,9 +294,6 @@ def value_name(cls) -> str:
     def value_type_name(cls) -> str:
         return "Config | dict | None"
 
-    def to_dict(self, value_name: str | None = None) -> dict[str, JsonValue]:
-        return {k: v for k, v in super().to_dict().items() if v is not None}
-
 
 @dataclass(frozen=True)
 class ConfigList(ValueConstructible, JsonSerializable):
@@ -309,16 +307,21 @@ class ConfigList(ValueConstructible, JsonSerializable):
     configs: list[Config] = field(default_factory=list)
     """The list of configuration objects."""
 
-    def get_global_filter(self, default: FileFilter | None = None) -> FileFilter:
+    def split_global_filter(
+        self, default: FileFilter | None = None
+    ) -> tuple["ConfigList", FileFilter]:
         """Get a global file filter for this configuration list."""
         global_filter = FileFilter(
             default.files if default else (),
             default.ignores if default else (),
         )
+        configs = []
         for c in self.configs:
             if c.empty and not c.file_filter.empty:
                 global_filter = global_filter.merge(c.file_filter)
-        return global_filter
+            else:
+                configs.append(c)
+        return ConfigList(configs=configs), global_filter
 
     def compute_config(self, file_path: str) -> Config | None:
         """Compute the configuration object for the given file path.
diff --git a/xrlint/operation.py b/xrlint/operation.py
index 9f1c35f..87332bd 100644
--- a/xrlint/operation.py
+++ b/xrlint/operation.py
@@ -46,13 +46,6 @@ class OperationMeta(MappingConstructible["OpMetadata"], JsonSerializable):
     Must have the form "<module>:<attr>", if given.
     """
 
-    def to_dict(self, value_name: str | None = None) -> dict[str, JsonValue]:
-        return {
-            k: v
-            for k, v in super().to_dict(value_name=value_name).items()
-            if v is not None
-        }
-
 
 class Operation(MappingConstructible["Operation"], JsonSerializable):
     """A mixin class that is used by operation classes.
diff --git a/xrlint/util/serializable.py b/xrlint/util/serializable.py
index b8b7415..51e305e 100644
--- a/xrlint/util/serializable.py
+++ b/xrlint/util/serializable.py
@@ -1,3 +1,4 @@
+from dataclasses import is_dataclass, fields
 from typing import Any, Final, Mapping, Sequence, TypeAlias
 
 from xrlint.util.formatting import format_message_type_of
@@ -61,11 +62,16 @@ def _value_to_json(cls, value: Any, value_name: str) -> JsonValue:
 
     @classmethod
     def _object_to_json(cls, value: Any, value_name: str) -> dict[str, JsonValue]:
-        return {
-            k: cls._value_to_json(v, f"{value_name}.{k}")
-            for k, v in vars(value).items()
-            if cls._is_non_protected_property_name(k)
-        }
+        if is_dataclass(value):
+            _d = {f.name: (f, getattr(value, f.name)) for f in fields(value)}
+            d = {k: v for k, (f, v) in _d.items() if v != f.default}
+        else:
+            d = {
+                k: v
+                for k, v in vars(value).items()
+                if cls._is_non_protected_property_name(k)
+            }
+        return {k: cls._value_to_json(v, f"{value_name}.{k}") for k, v in d.items()}
 
     @classmethod
     def _mapping_to_json(

From 80fe7195609403272d671ed57da302959ac9f20b Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Thu, 23 Jan 2025 08:37:38 +0100
Subject: [PATCH 10/17] missing util tests

---
 .../{test_codec.py => test_constructible.py}  |  64 -----
 tests/util/test_serializable.py               | 237 ++++++++++++++++++
 2 files changed, 237 insertions(+), 64 deletions(-)
 rename tests/util/{test_codec.py => test_constructible.py} (87%)
 create mode 100644 tests/util/test_serializable.py

diff --git a/tests/util/test_codec.py b/tests/util/test_constructible.py
similarity index 87%
rename from tests/util/test_codec.py
rename to tests/util/test_constructible.py
index 6761723..f5dac61 100644
--- a/tests/util/test_codec.py
+++ b/tests/util/test_constructible.py
@@ -121,70 +121,6 @@ def test_assumptions(self):
 
 
 # noinspection PyMethodMayBeStatic
-class JsonSerializableTest(TestCase):
-    def test_simple_ok(self):
-        self.assertEqual(
-            {},
-            SimpleTypesContainer().to_json(),
-        )
-        self.assertEqual(
-            {
-                "a": "?",
-                "b": True,
-                "c": 12,
-                "d": 34.56,
-                "e": "uvw",
-                "f": "<class 'bool'>",
-                "g": "off",
-            },
-            SimpleTypesContainer(
-                a="?", b=True, c=12, d=34.56, e="uvw", f=bool, g="off"
-            ).to_json(),
-        )
-
-    def test_complex_ok(self):
-        container = ComplexTypesContainer(
-            q=dict(p=True, q=False),
-            r=dict(u=SimpleTypesContainer(), v=SimpleTypesContainer()),
-            s=[1, 2, 3],
-            t=[
-                SimpleTypesContainer(c=5, d=6.7),
-                SimpleTypesContainer(c=8, d=9.1, f=SimpleTypesContainer),
-            ],
-        )
-        self.assertEqual(
-            {
-                "p": {},
-                "q": {"p": True, "q": False},
-                "r": {"u": {}, "v": {}},
-                "s": [1, 2, 3],
-                "t": [
-                    {"c": 5, "d": 6.7},
-                    {
-                        "c": 8,
-                        "d": 9.1,
-                        "f": "<class 'tests.util.test_codec.SimpleTypesContainer'>",
-                    },
-                ],
-            },
-            container.to_json(),
-        )
-
-    def test_fail(self):
-        @dataclass()
-        class Problematic(JsonSerializable):
-            data: Any
-
-        with pytest.raises(
-            TypeError,
-            match=(
-                "problematic.data must be of type"
-                " None|bool|int|float|str|dict|list|tuple, but got object"
-            ),
-        ):
-            Problematic(data=object()).to_json(value_name="problematic")
-
-
 class ValueConstructibleTest(TestCase):
     def test_useless_ok(self):
         container = UselessContainer()
diff --git a/tests/util/test_serializable.py b/tests/util/test_serializable.py
new file mode 100644
index 0000000..4206af9
--- /dev/null
+++ b/tests/util/test_serializable.py
@@ -0,0 +1,237 @@
+from dataclasses import dataclass, field
+from typing import Any, Literal
+
+from unittest import TestCase
+
+import pytest
+
+from xrlint.util.serializable import JsonSerializable
+
+
+class PlainSimpleTypesContainer(JsonSerializable):
+    def __init__(
+        self,
+        a: Any = None,
+        b: bool = False,
+        c: int = 0,
+        d: float = 0.0,
+        e: str = "abc",
+        f: type = int,
+        g: Literal[0, 1, True, False, "on", "off"] = 0,
+    ):
+        self.a = a
+        self.b = b
+        self.c = c
+        self.d = d
+        self.e = e
+        self.f = f
+        self.g = g
+
+
+class PlainComplexTypesContainer(JsonSerializable):
+    def __init__(
+        self,
+        p: PlainSimpleTypesContainer = PlainSimpleTypesContainer(),
+        q: dict[str, bool] = None,
+        r: dict[str, PlainSimpleTypesContainer] = None,
+        s: list[int] = None,
+        t: list[PlainSimpleTypesContainer] = None,
+        u: int | float | None = None,
+    ):
+        self.p = p
+        self.q = q or {}
+        self.r = r or {}
+        self.s = s or []
+        self.t = t or []
+        self.u = u
+
+
+@dataclass()
+class DataclassSimpleTypesContainer(JsonSerializable):
+    a: Any = None
+    b: bool = False
+    c: int = 0
+    d: float = 0.0
+    e: str = "abc"
+    f: type = int
+    g: Literal[0, 1, True, False, "on", "off"] = 0
+
+
+@dataclass()
+class DataclassComplexTypesContainer(JsonSerializable):
+    p: DataclassSimpleTypesContainer = field(
+        default_factory=DataclassSimpleTypesContainer
+    )
+    q: dict[str, bool] = field(default_factory=dict)
+    r: dict[str, DataclassSimpleTypesContainer] = field(default_factory=dict)
+    s: list[int] = field(default_factory=list)
+    t: list[DataclassSimpleTypesContainer] = field(default_factory=list)
+    u: int | float | None = None
+
+
+# noinspection PyMethodMayBeStatic
+class JsonSerializableTest(TestCase):
+    def test_plain_simple_ok(self):
+        self.assertEqual(
+            {
+                "a": None,
+                "b": False,
+                "c": 0,
+                "d": 0.0,
+                "e": "abc",
+                "f": "<class 'int'>",
+                "g": 0,
+            },
+            PlainSimpleTypesContainer().to_json(),
+        )
+        self.assertEqual(
+            {
+                "a": "?",
+                "b": True,
+                "c": 12,
+                "d": 34.56,
+                "e": "uvw",
+                "f": "<class 'bool'>",
+                "g": "off",
+            },
+            PlainSimpleTypesContainer(
+                a="?", b=True, c=12, d=34.56, e="uvw", f=bool, g="off"
+            ).to_json(),
+        )
+
+    def test_plain_complex_ok(self):
+        container = PlainComplexTypesContainer(
+            q=dict(p=True, q=False),
+            r=dict(u=PlainSimpleTypesContainer(), v=PlainSimpleTypesContainer()),
+            s=[1, 2, 3],
+            t=[
+                PlainSimpleTypesContainer(c=5, d=6.7),
+                PlainSimpleTypesContainer(c=8, d=9.1, f=PlainSimpleTypesContainer),
+            ],
+        )
+        self.assertEqual(
+            {
+                "p": {
+                    "a": None,
+                    "b": False,
+                    "c": 0,
+                    "d": 0.0,
+                    "e": "abc",
+                    "f": "<class 'int'>",
+                    "g": 0,
+                },
+                "q": {"p": True, "q": False},
+                "r": {
+                    "u": {
+                        "a": None,
+                        "b": False,
+                        "c": 0,
+                        "d": 0.0,
+                        "e": "abc",
+                        "f": "<class 'int'>",
+                        "g": 0,
+                    },
+                    "v": {
+                        "a": None,
+                        "b": False,
+                        "c": 0,
+                        "d": 0.0,
+                        "e": "abc",
+                        "f": "<class 'int'>",
+                        "g": 0,
+                    },
+                },
+                "s": [1, 2, 3],
+                "t": [
+                    {
+                        "a": None,
+                        "b": False,
+                        "c": 5,
+                        "d": 6.7,
+                        "e": "abc",
+                        "f": "<class 'int'>",
+                        "g": 0,
+                    },
+                    {
+                        "a": None,
+                        "b": False,
+                        "c": 8,
+                        "d": 9.1,
+                        "e": "abc",
+                        "f": "<class 'tests.util.test_serializable.PlainSimpleTypesContainer'>",
+                        "g": 0,
+                    },
+                ],
+                "u": None,
+            },
+            container.to_json(),
+        )
+
+    def test_dataclass_simple_ok(self):
+        self.assertEqual(
+            {},
+            DataclassSimpleTypesContainer().to_json(),
+        )
+        self.assertEqual(
+            {
+                "a": "?",
+                "b": True,
+                "c": 12,
+                "d": 34.56,
+                "e": "uvw",
+                "f": "<class 'bool'>",
+                "g": "off",
+            },
+            DataclassSimpleTypesContainer(
+                a="?", b=True, c=12, d=34.56, e="uvw", f=bool, g="off"
+            ).to_json(),
+        )
+
+    def test_dataclass_complex_ok(self):
+        container = DataclassComplexTypesContainer(
+            q=dict(p=True, q=False),
+            r=dict(
+                u=DataclassSimpleTypesContainer(), v=DataclassSimpleTypesContainer()
+            ),
+            s=[1, 2, 3],
+            t=[
+                DataclassSimpleTypesContainer(c=5, d=6.7),
+                DataclassSimpleTypesContainer(
+                    c=8, d=9.1, f=DataclassSimpleTypesContainer
+                ),
+            ],
+        )
+        self.assertEqual(
+            {
+                "p": {},
+                "q": {"p": True, "q": False},
+                "r": {"u": {}, "v": {}},
+                "s": [1, 2, 3],
+                "t": [
+                    {"c": 5, "d": 6.7},
+                    {
+                        "c": 8,
+                        "d": 9.1,
+                        "f": (
+                            "<class 'tests.util.test_serializable"
+                            ".DataclassSimpleTypesContainer'>"
+                        ),
+                    },
+                ],
+            },
+            container.to_json(),
+        )
+
+    def test_fail(self):
+        @dataclass()
+        class Problematic(JsonSerializable):
+            data: Any
+
+        with pytest.raises(
+            TypeError,
+            match=(
+                "problematic.data must be of type"
+                " None|bool|int|float|str|dict|list|tuple, but got object"
+            ),
+        ):
+            Problematic(data=object()).to_json(value_name="problematic")

From e4a48d702d41aade218e9b918b12f26fdad38200 Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Thu, 23 Jan 2025 14:07:06 +0100
Subject: [PATCH 11/17] testing MultiLevelDatasetProcessor

---
 environment.yml                               |  1 +
 pyproject.toml                                |  1 +
 tests/plugins/xcube/helpers.py                | 63 ++++++++++++++
 tests/plugins/xcube/processors/__init__.py    |  0
 .../xcube/processors/test_mldataset.py        | 87 +++++++++++++++++++
 5 files changed, 152 insertions(+)
 create mode 100644 tests/plugins/xcube/helpers.py
 create mode 100644 tests/plugins/xcube/processors/__init__.py
 create mode 100644 tests/plugins/xcube/processors/test_mldataset.py

diff --git a/environment.yml b/environment.yml
index 5b3fd1b..9d45b3f 100644
--- a/environment.yml
+++ b/environment.yml
@@ -20,6 +20,7 @@ dependencies:
   - requests-mock
   - ruff
   # Testing Datasets
+  - dask
   - pandas
   - netcdf4
   - numpy
diff --git a/pyproject.toml b/pyproject.toml
index 8981b1d..5097449 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -73,6 +73,7 @@ dev = [
   "ruff",
   "twine",
   # Dataset testing
+  "dask",
   "netcdf4",
   "numpy",
   "pandas",
diff --git a/tests/plugins/xcube/helpers.py b/tests/plugins/xcube/helpers.py
new file mode 100644
index 0000000..77bbfb6
--- /dev/null
+++ b/tests/plugins/xcube/helpers.py
@@ -0,0 +1,63 @@
+import numpy as np
+import xarray as xr
+
+
+def make_cube(nx: int, ny: int, nt: int) -> xr.Dataset:
+    """Make an in-memory dataset that should pass all xcube rules.
+
+    Args:
+        nx: length of the lon-dimension
+        ny: length of the lat-dimension
+        nt: length of the time-dimension
+
+    Returns:
+        an in-memory dataset with one 3-d data variable "chl"
+            with dimensions "time", "lat", "lon".
+    """
+    dx = 180.0 / nx
+    dy = 90.0 / ny
+    return xr.Dataset(
+        coords=dict(
+            lon=xr.DataArray(
+                np.linspace(-180 + dx, 180 - dx, nx),
+                dims="lon",
+                attrs=dict(
+                    long_name="longitude",
+                    standard_name="longitude",
+                    units="degrees_east",
+                ),
+            ),
+            lat=xr.DataArray(
+                np.linspace(-90 + dy, 90 - dy, ny),
+                dims="lat",
+                attrs=dict(
+                    long_name="latitude",
+                    standard_name="latitude",
+                    units="degrees_north",
+                ),
+            ),
+            time=xr.DataArray(
+                range(nt),
+                dims="time",
+                attrs=dict(
+                    long_name="time",
+                    standard_name="time",
+                    units="days since 2024-06-10:12:00:00 utc",
+                    calendar="gregorian",
+                ),
+            ),
+        ),
+        data_vars=dict(
+            chl=xr.DataArray(
+                np.zeros((nt, ny, nx)),
+                dims=["time", "lat", "lon"],
+                attrs=dict(
+                    long_name="chlorophyll concentration",
+                    standard_name="chlorophyll_concentration",
+                    units="mg/m^3",
+                    _FillValue=0,
+                ),
+            ).chunk(time=1, lat=min(ny, 90), lon=min(nx, 90)),
+        ),
+        attrs=dict(),
+    )
diff --git a/tests/plugins/xcube/processors/__init__.py b/tests/plugins/xcube/processors/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/tests/plugins/xcube/processors/test_mldataset.py b/tests/plugins/xcube/processors/test_mldataset.py
new file mode 100644
index 0000000..757d7e6
--- /dev/null
+++ b/tests/plugins/xcube/processors/test_mldataset.py
@@ -0,0 +1,87 @@
+import json
+from unittest import TestCase
+
+import fsspec
+
+from tests.plugins.xcube.helpers import make_cube
+from xrlint.plugins.xcube.processors.mldataset import MultiLevelDatasetProcessor
+from xrlint.result import Message
+
+
+class MultiLevelDatasetProcessorTest(TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls.levels_dir = "memory://xrlint-test.levels"
+        cls.fs, _ = fsspec.core.url_to_fs(cls.levels_dir)
+        cls.fs.mkdir(cls.levels_dir)
+
+        cls.nx = nx = 720
+        cls.ny = ny = nx // 2
+        cls.nt = 3
+        cls.nl = 4
+
+        for level in range(cls.nl):
+            dataset = make_cube(nx, ny, cls.nt)
+            dataset.to_zarr(f"{cls.levels_dir}/{level}.zarr", write_empty_chunks=False)
+            nx //= 2
+            ny //= 2
+
+        cls.meta_path = f"{cls.levels_dir}/.zlevels"
+        cls.meta_content = {
+            "version": "1.0",
+            "num_levels": cls.nl,
+            "use_saved_levels": False,
+            "agg_methods": {
+                "chl": "mean",
+            },
+        }
+
+    def test_preprocess(self):
+        with self.fs.open(self.meta_path, mode="wt") as stream:
+            json.dump(self.meta_content, stream, indent=2)
+
+        processor = MultiLevelDatasetProcessor()
+        datasets = processor.preprocess(self.levels_dir, {})
+        self.assertIsInstance(datasets, list)
+        self.assertEqual(self.nl, len(datasets))
+        for i, (dataset, file_path) in enumerate(datasets):
+            self.assertEqual(f"{self.levels_dir}/{i}.zarr", file_path)
+            self.assertIn("_LEVEL_INFO", dataset.attrs)
+            level_info = dataset.attrs.get("_LEVEL_INFO")
+            self.assertIsInstance(level_info, dict)
+            self.assertEqual(i, level_info.get("index"))
+            self.assertEqual(self.nl, level_info.get("count"))
+            self.assertEqual(self.meta_path, level_info.get("meta_path"))
+            self.assertEqual(self.meta_content, level_info.get("meta_content"))
+            self.assertIsInstance(level_info.get("datasets"), list)
+
+        self.fs.delete(self.meta_path)
+
+    def test_preprocess_no_meta(self):
+        processor = MultiLevelDatasetProcessor()
+        datasets = processor.preprocess(self.levels_dir, {})
+        self.assertIsInstance(datasets, list)
+        self.assertEqual(self.nl, len(datasets))
+        for i, (dataset, file_path) in enumerate(datasets):
+            self.assertEqual(f"{self.levels_dir}/{i}.zarr", file_path)
+            self.assertIn("_LEVEL_INFO", dataset.attrs)
+            level_info = dataset.attrs.get("_LEVEL_INFO")
+            self.assertIsInstance(level_info, dict)
+            self.assertEqual(i, level_info.get("index"))
+            self.assertEqual(self.nl, level_info.get("count"))
+            self.assertEqual(None, level_info.get("meta_path"))
+            self.assertEqual(None, level_info.get("meta_content"))
+            self.assertIsInstance(level_info.get("datasets"), list)
+
+    def test_postprocess(self):
+        processor = MultiLevelDatasetProcessor()
+        ml0 = [Message("m00"), Message("m01")]
+        ml1 = [Message("10"), Message("m11"), Message("m12")]
+        messages = processor.postprocess(
+            [
+                ml0,
+                ml1,
+            ],
+            self.levels_dir,
+        )
+        self.assertEqual([*ml0, *ml1], messages)

From 299686ab46896af3fe9d1556e08447a7a7205aad Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Thu, 23 Jan 2025 20:39:48 +0100
Subject: [PATCH 12/17] new multi-level dataset rules

---
 CHANGES.md                                    |   3 +
 .../xcube/processors/test_mldataset.py        |  37 ++---
 tests/plugins/xcube/test_plugin.py            |   2 +
 xrlint/plugins/xcube/__init__.py              |   7 +-
 xrlint/plugins/xcube/constants.py             |  20 ++-
 xrlint/plugins/xcube/processors/mldataset.py  | 132 ++++++++++++++----
 xrlint/plugins/xcube/rules/ml_dataset_meta.py |  56 ++++++++
 .../xcube/rules/ml_dataset_resolution.py      |  57 ++++++++
 xrlint/plugins/xcube/util.py                  |  67 ++++++++-
 9 files changed, 325 insertions(+), 56 deletions(-)
 create mode 100644 xrlint/plugins/xcube/rules/ml_dataset_meta.py
 create mode 100644 xrlint/plugins/xcube/rules/ml_dataset_resolution.py

diff --git a/CHANGES.md b/CHANGES.md
index 624d274..a3b9b84 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -2,6 +2,9 @@
 
 ## Version 0.4.0 (in development)
 
+- New xcube multi-level dataset rules:
+  - `ml-dataset-meta`: verifies that a meta info file exists and is consistent
+  - `ml-dataset-resolution`: verifies that the levels have expected resolutions
 - Now supporting xcube multi-level datasets `*.levels`:
   - Added xcube plugin processor `"xcube/multi-level-dataset"` that is used
     inside the predefined xcube configurations "all" and "recommended".
diff --git a/tests/plugins/xcube/processors/test_mldataset.py b/tests/plugins/xcube/processors/test_mldataset.py
index 757d7e6..aee7bf4 100644
--- a/tests/plugins/xcube/processors/test_mldataset.py
+++ b/tests/plugins/xcube/processors/test_mldataset.py
@@ -4,7 +4,12 @@
 import fsspec
 
 from tests.plugins.xcube.helpers import make_cube
-from xrlint.plugins.xcube.processors.mldataset import MultiLevelDatasetProcessor
+from xrlint.plugins.xcube.constants import ML_INFO_ATTR
+from xrlint.plugins.xcube.processors.mldataset import (
+    MultiLevelDatasetProcessor,
+    LevelInfo,
+    MultiLevelDatasetMeta,
+)
 from xrlint.result import Message
 
 
@@ -46,14 +51,13 @@ def test_preprocess(self):
         self.assertEqual(self.nl, len(datasets))
         for i, (dataset, file_path) in enumerate(datasets):
             self.assertEqual(f"{self.levels_dir}/{i}.zarr", file_path)
-            self.assertIn("_LEVEL_INFO", dataset.attrs)
-            level_info = dataset.attrs.get("_LEVEL_INFO")
-            self.assertIsInstance(level_info, dict)
-            self.assertEqual(i, level_info.get("index"))
-            self.assertEqual(self.nl, level_info.get("count"))
-            self.assertEqual(self.meta_path, level_info.get("meta_path"))
-            self.assertEqual(self.meta_content, level_info.get("meta_content"))
-            self.assertIsInstance(level_info.get("datasets"), list)
+            level_info = dataset.attrs.get(ML_INFO_ATTR)
+            self.assertIsInstance(level_info, LevelInfo)
+            self.assertEqual(i, level_info.level)
+            self.assertEqual(self.nl, level_info.num_levels)
+            self.assertIsInstance(level_info.meta, MultiLevelDatasetMeta)
+            self.assertIsInstance(level_info.datasets, list)
+            self.assertEqual(self.nl, len(level_info.datasets))
 
         self.fs.delete(self.meta_path)
 
@@ -64,14 +68,13 @@ def test_preprocess_no_meta(self):
         self.assertEqual(self.nl, len(datasets))
         for i, (dataset, file_path) in enumerate(datasets):
             self.assertEqual(f"{self.levels_dir}/{i}.zarr", file_path)
-            self.assertIn("_LEVEL_INFO", dataset.attrs)
-            level_info = dataset.attrs.get("_LEVEL_INFO")
-            self.assertIsInstance(level_info, dict)
-            self.assertEqual(i, level_info.get("index"))
-            self.assertEqual(self.nl, level_info.get("count"))
-            self.assertEqual(None, level_info.get("meta_path"))
-            self.assertEqual(None, level_info.get("meta_content"))
-            self.assertIsInstance(level_info.get("datasets"), list)
+            level_info = dataset.attrs.get(ML_INFO_ATTR)
+            self.assertIsInstance(level_info, LevelInfo)
+            self.assertEqual(i, level_info.level)
+            self.assertEqual(self.nl, level_info.num_levels)
+            self.assertEqual(None, level_info.meta)
+            self.assertIsInstance(level_info.datasets, list)
+            self.assertEqual(self.nl, len(level_info.datasets))
 
     def test_postprocess(self):
         processor = MultiLevelDatasetProcessor()
diff --git a/tests/plugins/xcube/test_plugin.py b/tests/plugins/xcube/test_plugin.py
index 4f4deaa..a7cd0c6 100644
--- a/tests/plugins/xcube/test_plugin.py
+++ b/tests/plugins/xcube/test_plugin.py
@@ -14,6 +14,8 @@ def test_rules_complete(self):
                 "grid-mapping-naming",
                 "increasing-time",
                 "lat-lon-naming",
+                "ml-dataset-meta",
+                "ml-dataset-resolution",
                 "single-grid-mapping",
                 "time-naming",
             },
diff --git a/xrlint/plugins/xcube/__init__.py b/xrlint/plugins/xcube/__init__.py
index dcfc4d9..3a1a917 100644
--- a/xrlint/plugins/xcube/__init__.py
+++ b/xrlint/plugins/xcube/__init__.py
@@ -1,4 +1,5 @@
 from xrlint.plugin import Plugin
+from xrlint.plugins.xcube.constants import ML_FILE_PATTERN
 from xrlint.util.importutil import import_submodules
 
 
@@ -16,11 +17,11 @@ def export_plugin() -> Plugin:
         },
         {
             # Add *.levels to globally included list of file types
-            "files": ["**/*.levels"],
+            "files": [ML_FILE_PATTERN],
         },
         {
             # Specify a processor for *.levels files
-            "files": ["**/*.levels"],
+            "files": [ML_FILE_PATTERN],
             "processor": "xcube/multi-level-dataset",
         },
     ]
@@ -37,6 +38,8 @@ def export_plugin() -> Plugin:
                     "xcube/grid-mapping-naming": "warn",
                     "xcube/increasing-time": "error",
                     "xcube/lat-lon-naming": "error",
+                    "xcube/ml-dataset-meta": "warn",
+                    "xcube/ml-dataset-resolution": "error",
                     "xcube/single-grid-mapping": "error",
                     "xcube/time-naming": "error",
                 },
diff --git a/xrlint/plugins/xcube/constants.py b/xrlint/plugins/xcube/constants.py
index d3727d7..b10b532 100644
--- a/xrlint/plugins/xcube/constants.py
+++ b/xrlint/plugins/xcube/constants.py
@@ -1,8 +1,14 @@
-LON_NAME = "lon"
-LAT_NAME = "lat"
-X_NAME = "x"
-Y_NAME = "y"
-TIME_NAME = "time"
+from typing import Final
 
-GM_NAMES = "spatial_ref", "crs"
-GM_NAMES_TEXT = " or ".join(repr(gm_name) for gm_name in GM_NAMES)
+LON_NAME: Final = "lon"
+LAT_NAME: Final = "lat"
+X_NAME: Final = "x"
+Y_NAME: Final = "y"
+TIME_NAME: Final = "time"
+
+GM_NAMES: Final = "spatial_ref", "crs"
+GM_NAMES_TEXT: Final = " or ".join(repr(gm_name) for gm_name in GM_NAMES)
+
+ML_FILE_PATTERN: Final = "**/*.levels"
+ML_META_FILENAME: Final = ".zlevels"
+ML_INFO_ATTR: Final = "_LEVEL_INFO"
diff --git a/xrlint/plugins/xcube/processors/mldataset.py b/xrlint/plugins/xcube/processors/mldataset.py
index 3943101..3e4c3ee 100644
--- a/xrlint/plugins/xcube/processors/mldataset.py
+++ b/xrlint/plugins/xcube/processors/mldataset.py
@@ -1,51 +1,78 @@
 import itertools
 import json
+import re
 from typing import Any
 
 import fsspec
 import xarray as xr
 
+from xrlint.plugins.xcube.constants import (
+    ML_FILE_PATTERN,
+    ML_META_FILENAME,
+)
 from xrlint.plugins.xcube.plugin import plugin
+from xrlint.plugins.xcube.util import (
+    set_dataset_level_info,
+    LevelInfo,
+    MultiLevelDatasetMeta,
+)
 from xrlint.processor import ProcessorOp
 from xrlint.result import Message
 
+level_pattern = re.compile(r"^(\d+)(?:\.zarr)?$")
+
 
 @plugin.define_processor("multi-level-dataset")
 class MultiLevelDatasetProcessor(ProcessorOp):
-    """This processor should be used with `files: ["**/*.levels"]`."""
+    f"""This processor should be used with `files: [{ML_FILE_PATTERN}"]`."""
 
     def preprocess(
         self, file_path: str, opener_options: dict[str, Any]
     ) -> list[tuple[xr.Dataset, str]]:
-        engine = opener_options.pop("engine", "zarr")
+        fs, fs_path = get_filesystem(file_path, opener_options)
+
+        file_names = [
+            # extracting the filename could be done more robustly
+            f.replace(fs_path, "").strip("/")
+            for f in fs.listdir(fs_path, detail=False)
+        ]
+
+        meta = None
+        if ML_META_FILENAME in file_names:
+            try:
+                with fs.open(f"{fs_path}/{ML_META_FILENAME}") as stream:
+                    meta = parse_multi_level_dataset_meta(stream)
+            except FileNotFoundError:
+                pass
 
-        levels_meta_path = f"{file_path}/.zlevels"
-        try:
-            with fsspec.open(levels_meta_path, mode="rt", encoding="utf-8") as stream:
-                levels_meta_content = json.load(stream)
-        except FileNotFoundError:
-            levels_meta_path = None
-            levels_meta_content = None
-
-        level_datasets = []
-        for level in itertools.count():
-            level_path = f"{file_path}/{level}.zarr"
+        level_0_path = None
+        if "0.link" in file_names:
             try:
-                level_dataset = xr.open_dataset(
-                    level_path, engine=engine, **opener_options
-                )
+                with fs.open(f"{fs_path}/0.link") as stream:
+                    level_0_path = stream.read()
             except FileNotFoundError:
-                break
+                pass
+
+        level_names, num_levels = parse_levels(file_names, level_0_path)
+
+        engine = opener_options.pop("engine", "zarr")
+
+        level_datasets: list[xr.Dataset | None] = []
+        for level, level_name in level_names.items():
+            level_path = f"{file_path}/{level_name}"
+            level_dataset = xr.open_dataset(level_path, engine=engine, **opener_options)
             level_datasets.append((level_dataset, level_path))
 
-        for level, (level_dataset, level_path) in enumerate(level_datasets):
-            level_dataset.attrs["_LEVEL_INFO"] = {
-                "meta_path": levels_meta_path,
-                "meta_content": levels_meta_content,
-                "index": level,
-                "count": len(level_datasets),
-                "datasets": [ds for ds, _ in level_datasets],
-            }
+        for level, (level_dataset, _) in enumerate(level_datasets):
+            set_dataset_level_info(
+                level_dataset,
+                LevelInfo(
+                    level=level,
+                    num_levels=num_levels,
+                    meta=meta,
+                    datasets=level_datasets,
+                ),
+            )
 
         return level_datasets
 
@@ -53,3 +80,58 @@ def postprocess(
         self, messages: list[list[Message]], file_path: str
     ) -> list[Message]:
         return list(itertools.chain(*messages))
+
+
+def get_filesystem(file_path: str, opener_options: dict[str, Any]):
+    storage_options = (
+        opener_options.get(
+            "storage_options",
+            opener_options.get("backend_kwargs", {}).get("storage_options"),
+        )
+        or {}
+    )
+    _fs, fs_path = fsspec.core.url_to_fs(file_path, **storage_options)
+    fs: fsspec.AbstractFileSystem = _fs
+    fs_path: str = fs_path.replace("\\", "/")
+    return fs, fs_path
+
+
+def parse_levels(
+    file_names: list[str], level_0_path: str | None
+) -> tuple[dict[int, str], int]:
+    level_names: dict[int, str] = {0: level_0_path} if level_0_path else {}
+    num_levels = 0
+    for file_name in file_names:
+        m = level_pattern.match(file_name)
+        if m is not None:
+            level = int(m.group(1))
+            level_names[level] = file_name
+            num_levels = max(num_levels, level + 1)
+    if not level_names:
+        raise ValueError("empty multi-level dataset")
+    num_levels = max(level_names.keys()) + 1
+    for level in range(num_levels):
+        if level not in level_names:
+            raise ValueError(
+                f"missing dataset for level {level} in multi-level dataset"
+            )
+    return level_names, num_levels
+
+
+def parse_multi_level_dataset_meta(stream: Any) -> MultiLevelDatasetMeta:
+    meta_content = json.load(stream)
+
+    msg_prefix = f"illegal xcube {ML_META_FILENAME!r} file"
+
+    if not isinstance(meta_content, dict):
+        raise ValueError(f"{msg_prefix}, JSON object expected")
+
+    version = meta_content.get("version")
+    if not isinstance(version, str) or not version.startswith("1."):
+        raise ValueError(f"{msg_prefix}, missing or invalid 'version'")
+
+    num_levels = meta_content.get("num_levels")
+    if not isinstance(num_levels, int) or num_levels < 1:
+        raise ValueError(f"{msg_prefix}, missing or invalid 'num_levels'")
+
+    return MultiLevelDatasetMeta(**meta_content)
diff --git a/xrlint/plugins/xcube/rules/ml_dataset_meta.py b/xrlint/plugins/xcube/rules/ml_dataset_meta.py
new file mode 100644
index 0000000..99d0aeb
--- /dev/null
+++ b/xrlint/plugins/xcube/rules/ml_dataset_meta.py
@@ -0,0 +1,56 @@
+from xrlint.node import DatasetNode
+from xrlint.plugins.xcube.constants import ML_META_FILENAME
+from xrlint.plugins.xcube.plugin import plugin
+from xrlint.plugins.xcube.util import get_dataset_level_info
+from xrlint.rule import RuleContext, RuleOp
+from xrlint.util.formatting import format_item
+
+
+@plugin.define_rule(
+    "ml-dataset-meta",
+    version="1.0.0",
+    type="suggestion",
+    description=(
+        f"Multi-level datasets should provide {ML_META_FILENAME!r} meta information file"
+        f" and if so, it should be consistent."
+    ),
+    docs_url=(
+        "https://xcube.readthedocs.io/en/latest/cubespec.html#encoding-of-colors"
+    ),
+)
+class MultiLevelDatasetMeta(RuleOp):
+    def dataset(self, ctx: RuleContext, node: DatasetNode):
+        level_info = get_dataset_level_info(node.dataset)
+        if level_info is None:
+            # ok, this rules applies only to level datasets opened
+            # by the xcube multi-level processor
+            return
+
+        level = level_info.level
+        if level > 0:
+            # ok, this rule does only apply to level 0
+            return
+
+        meta = level_info.meta
+        if meta is None:
+            ctx.report(
+                f"Missing {ML_META_FILENAME!r} file,"
+                f" therefore dataset cannot be extended."
+            )
+            return
+
+        actual_count: int = level_info.num_levels
+        meta_count = meta.num_levels
+        if meta_count != actual_count:
+            ctx.report(
+                f"Expected {format_item(meta_count, 'level')}, but found {actual_count}"
+            )
+
+        actual_count: int = level_info.num_levels
+        meta_count = meta.use_saved_levels
+        if meta_count != actual_count:
+            ctx.report(
+                f"Expected {format_item(meta_count, 'level')}, but found {actual_count}"
+            )
+
+        # TODO: verify variables listed in meta
diff --git a/xrlint/plugins/xcube/rules/ml_dataset_resolution.py b/xrlint/plugins/xcube/rules/ml_dataset_resolution.py
new file mode 100644
index 0000000..ce00558
--- /dev/null
+++ b/xrlint/plugins/xcube/rules/ml_dataset_resolution.py
@@ -0,0 +1,57 @@
+import math
+
+from xrlint.node import DatasetNode
+from xrlint.plugins.xcube.plugin import plugin
+from xrlint.plugins.xcube.util import get_spatial_size, get_dataset_level_info
+from xrlint.rule import RuleContext, RuleOp
+
+
+@plugin.define_rule(
+    "ml-dataset-resolution",
+    version="1.0.0",
+    type="problem",
+    description=(
+        "Multi-level dataset levels should provide spatial resolutions"
+        " decreasing by powers of two."
+    ),
+    docs_url=(
+        "https://xcube.readthedocs.io/en/latest/cubespec.html#encoding-of-colors"
+    ),
+)
+class MultiLevelDatasetMeta(RuleOp):
+    def dataset(self, ctx: RuleContext, node: DatasetNode):
+        level_info = get_dataset_level_info(node.dataset)
+        if level_info is None:
+            # ok, this rules applies only to level datasets opened
+            # by the xcube multi-level processor
+            return
+
+        level = level_info.level
+        if level == 0:
+            # ok, this rule does only apply to level > 0
+            return
+
+        datasets = level_info.datasets
+        level_0_dataset, _ = datasets[0]
+        l0_size = get_spatial_size(level_0_dataset)
+        if l0_size is None:
+            # weird - maybe no data vars?
+            return
+
+        (x_name, level_0_width), (y_name, level_0_height) = l0_size
+        level_width = node.dataset.sizes.get(x_name)
+        level_height = node.dataset.sizes.get(y_name)
+        expected_level_width = math.ceil(level_0_width >> level)
+        expected_level_height = math.ceil(level_0_height >> level)
+
+        if level_width != expected_level_width:
+            ctx.report(
+                f"Expected size of dimension {x_name!r} in level {level}"
+                f" to be {expected_level_width}, but was {level_width}"
+            )
+
+        if level_height != expected_level_height:
+            ctx.report(
+                f"Expected size of dimension {y_name!r} in level {level}"
+                f" to be {expected_level_height}, but was {level_height}"
+            )
diff --git a/xrlint/plugins/xcube/util.py b/xrlint/plugins/xcube/util.py
index 09a4e20..1cb2002 100644
--- a/xrlint/plugins/xcube/util.py
+++ b/xrlint/plugins/xcube/util.py
@@ -1,11 +1,68 @@
+from collections.abc import Hashable
+from dataclasses import dataclass
+
 import xarray as xr
 
-from .constants import LAT_NAME, LON_NAME, X_NAME, Y_NAME
+from .constants import LAT_NAME, LON_NAME, X_NAME, Y_NAME, ML_INFO_ATTR
+
+
+@dataclass(frozen=True, kw_only=True)
+class MultiLevelDatasetMeta:
+    version: str
+    num_levels: int
+    use_saved_levels: bool | None = None
+    agg_methods: dict[str, str] | None = None
+
+
+@dataclass(frozen=True, kw_only=True)
+class LevelInfo:
+    level: int
+    num_levels: int
+    datasets: list[tuple[xr.Dataset, str]]
+    meta: MultiLevelDatasetMeta | None = None
+
+
+def get_dataset_level_info(dataset: xr.Dataset) -> LevelInfo | None:
+    return dataset.attrs.get(ML_INFO_ATTR)
+
+
+def set_dataset_level_info(dataset: xr.Dataset, level_info: LevelInfo):
+    dataset.attrs[ML_INFO_ATTR] = level_info
 
 
 def is_spatial_var(var: xr.DataArray) -> bool:
-    """Return 'True' if `var` looks like a spatial variable."""
-    dims = var.dims
-    return (X_NAME in dims and Y_NAME in dims) or (
-        LON_NAME in dims and LAT_NAME in dims
+    """Return 'True' if `var` looks like a spatial 2+d variable."""
+    if var.ndim < 2:
+        return False
+    y_name, x_name = var.dims[-2:]
+    return (x_name == X_NAME and y_name == Y_NAME) or (
+        x_name == LON_NAME and y_name == LAT_NAME
     )
+
+
+def get_spatial_size(
+    dataset: xr.Dataset,
+) -> tuple[tuple[Hashable, int], tuple[Hashable, int]] | None:
+    """Return (x_size, y_size) for given dataset."""
+    for k, v in dataset.data_vars.items():
+        if is_spatial_var(v):
+            y_name, x_name = v.dims[-2:]
+            x_size = dataset.sizes[x_name]
+            y_size = dataset.sizes[y_name]
+            if x_size and y_size:
+                return (x_name, x_size), (y_name, y_size)
+    return None
+
+
+def get_spatial_size_for_dims(
+    dataset: xr.Dataset, x_name: str, y_name: str
+) -> tuple[int, int]:
+    """Return (x_size, y_size) for given dataset and dim names."""
+    x_size: int = 0
+    y_size: int = 0
+    for k, v in dataset.sizes.items():
+        if k == x_name:
+            x_size = v
+        elif k == y_name:
+            y_size = v
+    return x_size, y_size

From 24c0b8d3a3bc9c869e73498c357fe384f5d37455 Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Fri, 24 Jan 2025 12:35:27 +0100
Subject: [PATCH 13/17] testing MultiLevelDatasetProcessor

---
 tests/plugins/xcube/helpers.py                |   9 ++
 .../xcube/processors/test_mldataset.py        | 141 ++++++++++++------
 xrlint/cli/engine.py                          |   1 -
 xrlint/plugins/xcube/processors/mldataset.py  |  37 +----
 xrlint/plugins/xcube/rules/ml_dataset_meta.py |  52 +++++--
 .../xcube/rules/ml_dataset_resolution.py      |   6 +-
 xrlint/plugins/xcube/util.py                  |  32 +++-
 7 files changed, 184 insertions(+), 94 deletions(-)

diff --git a/tests/plugins/xcube/helpers.py b/tests/plugins/xcube/helpers.py
index 77bbfb6..f397500 100644
--- a/tests/plugins/xcube/helpers.py
+++ b/tests/plugins/xcube/helpers.py
@@ -1,7 +1,16 @@
+import math
+
 import numpy as np
 import xarray as xr
 
 
+def make_cube_levels(nx: int, ny: int, nt: int, nl: int) -> list[xr.Dataset]:
+    return [
+        make_cube(math.ceil(nx >> level), math.ceil(ny >> level), nt)
+        for level in range(nl)
+    ]
+
+
 def make_cube(nx: int, ny: int, nt: int) -> xr.Dataset:
     """Make an in-memory dataset that should pass all xcube rules.
 
diff --git a/tests/plugins/xcube/processors/test_mldataset.py b/tests/plugins/xcube/processors/test_mldataset.py
index aee7bf4..8889f6c 100644
--- a/tests/plugins/xcube/processors/test_mldataset.py
+++ b/tests/plugins/xcube/processors/test_mldataset.py
@@ -1,9 +1,11 @@
 import json
+from typing import Any
 from unittest import TestCase
 
 import fsspec
+import pytest
 
-from tests.plugins.xcube.helpers import make_cube
+from tests.plugins.xcube.helpers import make_cube_levels
 from xrlint.plugins.xcube.constants import ML_INFO_ATTR
 from xrlint.plugins.xcube.processors.mldataset import (
     MultiLevelDatasetProcessor,
@@ -14,67 +16,116 @@
 
 
 class MultiLevelDatasetProcessorTest(TestCase):
+    levels_name = "xrlint-test"
+    levels_dir = f"memory://{levels_name}.levels"
+
+    x_size = nx = 720
+    y_size = ny = 360
+    time_size = 3
+    num_levels = 4
+
+    meta_path = f"{levels_dir}/.zlevels"
+    meta_content = {
+        "version": "1.0",
+        "num_levels": num_levels,
+        "use_saved_levels": False,
+        # tile_size is optional
+        "agg_methods": {
+            "chl": "mean",
+        },
+    }
+
     @classmethod
     def setUpClass(cls):
-        cls.levels_dir = "memory://xrlint-test.levels"
-        cls.fs, _ = fsspec.core.url_to_fs(cls.levels_dir)
-        cls.fs.mkdir(cls.levels_dir)
-
-        cls.nx = nx = 720
-        cls.ny = ny = nx // 2
-        cls.nt = 3
-        cls.nl = 4
-
-        for level in range(cls.nl):
-            dataset = make_cube(nx, ny, cls.nt)
-            dataset.to_zarr(f"{cls.levels_dir}/{level}.zarr", write_empty_chunks=False)
-            nx //= 2
-            ny //= 2
-
-        cls.meta_path = f"{cls.levels_dir}/.zlevels"
-        cls.meta_content = {
-            "version": "1.0",
-            "num_levels": cls.nl,
-            "use_saved_levels": False,
-            "agg_methods": {
-                "chl": "mean",
-            },
-        }
+        fs, _ = fsspec.core.url_to_fs(cls.levels_dir)
+        cls.fs: fsspec.AbstractFileSystem = fs
+
+    def setUp(self):
+        self._delete_files()
+        self.fs.mkdir(self.levels_dir)
+
+        level_datasets = make_cube_levels(
+            self.x_size, self.y_size, self.time_size, self.num_levels
+        )
+        for level, dataset in enumerate(level_datasets):
+            dataset.to_zarr(f"{self.levels_dir}/{level}.zarr", write_empty_chunks=False)
 
-    def test_preprocess(self):
         with self.fs.open(self.meta_path, mode="wt") as stream:
             json.dump(self.meta_content, stream, indent=2)
 
-        processor = MultiLevelDatasetProcessor()
-        datasets = processor.preprocess(self.levels_dir, {})
+    def tearDown(self):
+        self._delete_files()
+
+    def _delete_files(self):
+        if self.fs.exists(self.levels_dir):
+            self.fs.delete(self.levels_dir, recursive=True)
+
+    def assert_levels_ok(
+        self, datasets: Any, expect_meta: bool = False, expect_link: bool = False
+    ):
         self.assertIsInstance(datasets, list)
-        self.assertEqual(self.nl, len(datasets))
+        self.assertEqual(self.num_levels, len(datasets))
         for i, (dataset, file_path) in enumerate(datasets):
-            self.assertEqual(f"{self.levels_dir}/{i}.zarr", file_path)
+            if expect_link and i == 0:
+                self.assertEqual(f"memory://{self.levels_name}.zarr", file_path)
+            else:
+                self.assertEqual(
+                    f"memory://{self.levels_name}.levels/{i}.zarr", file_path
+                )
             level_info = dataset.attrs.get(ML_INFO_ATTR)
             self.assertIsInstance(level_info, LevelInfo)
             self.assertEqual(i, level_info.level)
-            self.assertEqual(self.nl, level_info.num_levels)
-            self.assertIsInstance(level_info.meta, MultiLevelDatasetMeta)
+            self.assertEqual(self.num_levels, level_info.num_levels)
             self.assertIsInstance(level_info.datasets, list)
-            self.assertEqual(self.nl, len(level_info.datasets))
+            self.assertEqual(self.num_levels, len(level_info.datasets))
+            meta = level_info.meta
+            if expect_meta:
+                self.assertIsInstance(meta, MultiLevelDatasetMeta)
+                self.assertEqual("1.0", meta.version)
+                self.assertEqual(self.num_levels, meta.num_levels)
+                self.assertEqual(False, meta.use_saved_levels)
+                self.assertEqual(None, meta.tile_size)
+                self.assertEqual({"chl": "mean"}, meta.agg_methods)
+            else:
+                self.assertIsNone(meta)
 
-        self.fs.delete(self.meta_path)
+    def test_preprocess(self):
+        processor = MultiLevelDatasetProcessor()
+        datasets = processor.preprocess(self.levels_dir, {})
+        self.assert_levels_ok(datasets, expect_meta=True)
 
     def test_preprocess_no_meta(self):
+        self.fs.delete(self.meta_path)
         processor = MultiLevelDatasetProcessor()
         datasets = processor.preprocess(self.levels_dir, {})
-        self.assertIsInstance(datasets, list)
-        self.assertEqual(self.nl, len(datasets))
-        for i, (dataset, file_path) in enumerate(datasets):
-            self.assertEqual(f"{self.levels_dir}/{i}.zarr", file_path)
-            level_info = dataset.attrs.get(ML_INFO_ATTR)
-            self.assertIsInstance(level_info, LevelInfo)
-            self.assertEqual(i, level_info.level)
-            self.assertEqual(self.nl, level_info.num_levels)
-            self.assertEqual(None, level_info.meta)
-            self.assertIsInstance(level_info.datasets, list)
-            self.assertEqual(self.nl, len(level_info.datasets))
+        self.assert_levels_ok(datasets, expect_meta=False)
+
+    def test_preprocess_with_link(self):
+        self.fs.copy(
+            f"{self.levels_dir}/0.zarr",
+            f"memory://{self.levels_name}.zarr",
+            recursive=True,
+        )
+        self.fs.delete(f"{self.levels_dir}/0.zarr", recursive=True)
+        self.fs.write_text(f"{self.levels_dir}/0.link", f"../{self.levels_name}.zarr")
+        processor = MultiLevelDatasetProcessor()
+        datasets = processor.preprocess(self.levels_dir, {})
+        self.assert_levels_ok(datasets, expect_meta=True, expect_link=True)
+
+    def test_preprocess_fail_empty(self):
+        for i in range(self.num_levels):
+            self.fs.delete(f"{self.levels_dir}/{i}.zarr", recursive=True)
+        processor = MultiLevelDatasetProcessor()
+        with pytest.raises(ValueError, match="empty multi-level dataset"):
+            processor.preprocess(self.levels_dir, {})
+
+    def test_preprocess_fail_missing(self):
+        self.fs.delete(f"{self.levels_dir}/1.zarr", recursive=True)
+        processor = MultiLevelDatasetProcessor()
+        with pytest.raises(
+            ValueError, match="missing dataset for level 1 in multi-level dataset"
+        ):
+            processor.preprocess(self.levels_dir, {})
 
     def test_postprocess(self):
         processor = MultiLevelDatasetProcessor()
diff --git a/xrlint/cli/engine.py b/xrlint/cli/engine.py
index f8eccf9..b40d99c 100644
--- a/xrlint/cli/engine.py
+++ b/xrlint/cli/engine.py
@@ -181,7 +181,6 @@ def compute_config(p: str):
 
             if fs.isdir(root):
                 for path, dirs, files in fs.walk(root, topdown=True):
-
                     for d in list(dirs):
                         d_path = f"{path}/{d}"
                         c = compute_config(d_path)
diff --git a/xrlint/plugins/xcube/processors/mldataset.py b/xrlint/plugins/xcube/processors/mldataset.py
index 3e4c3ee..67580a2 100644
--- a/xrlint/plugins/xcube/processors/mldataset.py
+++ b/xrlint/plugins/xcube/processors/mldataset.py
@@ -15,6 +15,7 @@
     set_dataset_level_info,
     LevelInfo,
     MultiLevelDatasetMeta,
+    norm_path,
 )
 from xrlint.processor import ProcessorOp
 from xrlint.result import Message
@@ -37,21 +38,16 @@ def preprocess(
             for f in fs.listdir(fs_path, detail=False)
         ]
 
+        # check for optional ".zlevels" that provides meta-info
         meta = None
         if ML_META_FILENAME in file_names:
-            try:
-                with fs.open(f"{fs_path}/{ML_META_FILENAME}") as stream:
-                    meta = parse_multi_level_dataset_meta(stream)
-            except FileNotFoundError:
-                pass
+            with fs.open(f"{fs_path}/{ML_META_FILENAME}") as stream:
+                meta = MultiLevelDatasetMeta.from_value(json.load(stream))
 
+        # check for optional ".0.link" that locates level 0 somewhere else
         level_0_path = None
         if "0.link" in file_names:
-            try:
-                with fs.open(f"{fs_path}/0.link") as stream:
-                    level_0_path = stream.read()
-            except FileNotFoundError:
-                pass
+            level_0_path = fs.read_text(f"{fs_path}/0.link")
 
         level_names, num_levels = parse_levels(file_names, level_0_path)
 
@@ -59,7 +55,7 @@ def preprocess(
 
         level_datasets: list[xr.Dataset | None] = []
         for level, level_name in level_names.items():
-            level_path = f"{file_path}/{level_name}"
+            level_path = norm_path(f"{file_path}/{level_name}")
             level_dataset = xr.open_dataset(level_path, engine=engine, **opener_options)
             level_datasets.append((level_dataset, level_path))
 
@@ -116,22 +112,3 @@ def parse_levels(
                 f"missing dataset for level {level} in multi-level dataset"
             )
     return level_names, num_levels
-
-
-def parse_multi_level_dataset_meta(stream: Any) -> MultiLevelDatasetMeta:
-    meta_content = json.load(stream)
-
-    msg_prefix = f"illegal xcube {ML_META_FILENAME!r} file"
-
-    if not isinstance(meta_content, dict):
-        raise ValueError(f"{msg_prefix}, JSON object expected")
-
-    version = meta_content.get("version")
-    if not isinstance(version, str) or not version.startswith("1."):
-        raise ValueError(f"{msg_prefix}, missing or invalid 'version'")
-
-    num_levels = meta_content.get("num_levels")
-    if not isinstance(num_levels, int) or num_levels < 1:
-        raise ValueError(f"{msg_prefix}, missing or invalid 'num_levels'")
-
-    return MultiLevelDatasetMeta(**meta_content)
diff --git a/xrlint/plugins/xcube/rules/ml_dataset_meta.py b/xrlint/plugins/xcube/rules/ml_dataset_meta.py
index 99d0aeb..a77c1c1 100644
--- a/xrlint/plugins/xcube/rules/ml_dataset_meta.py
+++ b/xrlint/plugins/xcube/rules/ml_dataset_meta.py
@@ -1,7 +1,7 @@
 from xrlint.node import DatasetNode
 from xrlint.plugins.xcube.constants import ML_META_FILENAME
 from xrlint.plugins.xcube.plugin import plugin
-from xrlint.plugins.xcube.util import get_dataset_level_info
+from xrlint.plugins.xcube.util import get_dataset_level_info, is_spatial_var
 from xrlint.rule import RuleContext, RuleOp
 from xrlint.util.formatting import format_item
 
@@ -11,11 +11,11 @@
     version="1.0.0",
     type="suggestion",
     description=(
-        f"Multi-level datasets should provide {ML_META_FILENAME!r} meta information file"
-        f" and if so, it should be consistent."
+        f"Multi-level datasets should provide {ML_META_FILENAME!r}"
+        f" meta information file and if so, it should be consistent."
     ),
     docs_url=(
-        "https://xcube.readthedocs.io/en/latest/cubespec.html#encoding-of-colors"
+        "https://xcube.readthedocs.io/en/latest/mldatasets.html#the-xcube-levels-format"
     ),
 )
 class MultiLevelDatasetMeta(RuleOp):
@@ -34,23 +34,47 @@ def dataset(self, ctx: RuleContext, node: DatasetNode):
         meta = level_info.meta
         if meta is None:
             ctx.report(
-                f"Missing {ML_META_FILENAME!r} file,"
+                f"Missing {ML_META_FILENAME!r} meta-info file,"
                 f" therefore dataset cannot be extended."
             )
             return
 
-        actual_count: int = level_info.num_levels
-        meta_count = meta.num_levels
-        if meta_count != actual_count:
+        if not meta.version.startswith("1."):
+            ctx.report(f"Unsupported {ML_META_FILENAME!r} meta-info version")
+
+        if meta.num_levels < 0:
+            ctx.report(
+                f"Invalid 'num_levels' in {ML_META_FILENAME!r} meta-info:"
+                f" {meta.num_levels}"
+            )
+        elif meta.num_levels != level_info.num_levels:
+            ctx.report(
+                f"Expected {format_item(meta.num_levels, 'level')},"
+                f" but found {level_info.num_levels}"
+            )
+
+        if meta.use_saved_levels is None:
             ctx.report(
-                f"Expected {format_item(meta_count, 'level')}, but found {actual_count}"
+                f"Missing value for 'use_saved_levels'"
+                f" in {ML_META_FILENAME!r} meta-info."
             )
 
-        actual_count: int = level_info.num_levels
-        meta_count = meta.use_saved_levels
-        if meta_count != actual_count:
+        if not meta.agg_methods:
             ctx.report(
-                f"Expected {format_item(meta_count, 'level')}, but found {actual_count}"
+                f"Missing value for 'agg_methods' in {ML_META_FILENAME!r} meta-info."
             )
+        else:
+            for var_name, var in node.dataset.data_vars.items():
+                if is_spatial_var(var) and not meta.agg_methods.get(var_name):
+                    ctx.report(
+                        f"Missing value for variable {var_name!r}"
+                        f" in 'agg_methods' of {ML_META_FILENAME!r} meta-info."
+                    )
+            for var_name in meta.agg_methods.keys():
+                if var_name not in node.dataset:
+                    ctx.report(
+                        f"Variable {var_name!r} not found in dataset, but specified"
+                        f" in 'agg_methods' of {ML_META_FILENAME!r} meta-info."
+                    )
 
-        # TODO: verify variables listed in meta
+        # Later: check meta.tile_size as well...
diff --git a/xrlint/plugins/xcube/rules/ml_dataset_resolution.py b/xrlint/plugins/xcube/rules/ml_dataset_resolution.py
index ce00558..b58c886 100644
--- a/xrlint/plugins/xcube/rules/ml_dataset_resolution.py
+++ b/xrlint/plugins/xcube/rules/ml_dataset_resolution.py
@@ -14,9 +14,7 @@
         "Multi-level dataset levels should provide spatial resolutions"
         " decreasing by powers of two."
     ),
-    docs_url=(
-        "https://xcube.readthedocs.io/en/latest/cubespec.html#encoding-of-colors"
-    ),
+    docs_url="https://xcube.readthedocs.io/en/latest/mldatasets.html#definition",
 )
 class MultiLevelDatasetMeta(RuleOp):
     def dataset(self, ctx: RuleContext, node: DatasetNode):
@@ -55,3 +53,5 @@ def dataset(self, ctx: RuleContext, node: DatasetNode):
                 f"Expected size of dimension {y_name!r} in level {level}"
                 f" to be {expected_level_height}, but was {level_height}"
             )
+
+        # Here: check spatial coordinates...
diff --git a/xrlint/plugins/xcube/util.py b/xrlint/plugins/xcube/util.py
index 1cb2002..373c75e 100644
--- a/xrlint/plugins/xcube/util.py
+++ b/xrlint/plugins/xcube/util.py
@@ -3,23 +3,43 @@
 
 import xarray as xr
 
+from xrlint.util.constructible import MappingConstructible
 from .constants import LAT_NAME, LON_NAME, X_NAME, Y_NAME, ML_INFO_ATTR
 
 
 @dataclass(frozen=True, kw_only=True)
-class MultiLevelDatasetMeta:
+class MultiLevelDatasetMeta(MappingConstructible):
+    """The contents of a xcube `.zlevels` meta-info file."""
+
     version: str
     num_levels: int
+    tile_size: tuple[int, int] | None = None
     use_saved_levels: bool | None = None
     agg_methods: dict[str, str] | None = None
 
 
 @dataclass(frozen=True, kw_only=True)
 class LevelInfo:
+    """Level info added to each level dataset to allow for validation
+    of multi-level datasets.
+
+    Note, this need to be aligned with
+    <https://xcube.readthedocs.io/en/latest/mldatasets.html#the-xcube-levels-format>
+    """
+
     level: int
+    """Current level index."""
+
     num_levels: int
+    """Actual number of levels found."""
+
     datasets: list[tuple[xr.Dataset, str]]
+    """A list of num_levels pairs comprising
+     level dataset and level file path.
+     """
+
     meta: MultiLevelDatasetMeta | None = None
+    """Content of a `.zlevels` file, if file was found."""
 
 
 def get_dataset_level_info(dataset: xr.Dataset) -> LevelInfo | None:
@@ -66,3 +86,13 @@ def get_spatial_size_for_dims(
         elif k == y_name:
             y_size = v
     return x_size, y_size
+
+
+def norm_path(level_path: str) -> str:
+    parts = level_path.replace("\\", "/").split("/")
+    level_path = "/".join(
+        p
+        for i, p in enumerate(parts)
+        if p not in (".", "..") and (i == len(parts) - 1 or parts[i + 1] != "..")
+    )
+    return level_path

From f44c132bfaa6b8514ee1c1aa50beb1c4a9a96dbd Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Fri, 24 Jan 2025 15:48:38 +0100
Subject: [PATCH 14/17] added final tests

---
 CHANGES.md                                    |   3 +-
 docs/rule-ref.md                              |  21 +++
 mkruleref.py                                  |  24 +++-
 tests/plugins/xcube/helpers.py                | 121 +++++++++++-------
 .../xcube/processors/test_mldataset.py        |  10 +-
 .../xcube/rules/test_ml_dataset_meta.py       |  87 +++++++++++++
 .../xcube/rules/test_ml_dataset_time.py       |  39 ++++++
 .../plugins/xcube/rules/test_ml_dataset_xy.py |  57 +++++++++
 tests/plugins/xcube/test_plugin.py            |   3 +-
 xrlint/plugins/xcube/__init__.py              |   5 +-
 xrlint/plugins/xcube/processors/mldataset.py  |  18 +--
 xrlint/plugins/xcube/rules/ml_dataset_meta.py |   4 +-
 xrlint/plugins/xcube/rules/ml_dataset_time.py |  39 ++++++
 ...dataset_resolution.py => ml_dataset_xy.py} |   6 +-
 xrlint/plugins/xcube/util.py                  |  20 ++-
 15 files changed, 375 insertions(+), 82 deletions(-)
 create mode 100644 tests/plugins/xcube/rules/test_ml_dataset_meta.py
 create mode 100644 tests/plugins/xcube/rules/test_ml_dataset_time.py
 create mode 100644 tests/plugins/xcube/rules/test_ml_dataset_xy.py
 create mode 100644 xrlint/plugins/xcube/rules/ml_dataset_time.py
 rename xrlint/plugins/xcube/rules/{ml_dataset_resolution.py => ml_dataset_xy.py} (94%)

diff --git a/CHANGES.md b/CHANGES.md
index a3b9b84..43c0291 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -4,7 +4,8 @@
 
 - New xcube multi-level dataset rules:
   - `ml-dataset-meta`: verifies that a meta info file exists and is consistent
-  - `ml-dataset-resolution`: verifies that the levels have expected resolutions
+  - `ml-dataset-xy`: verifies that the levels have expected spatial resolutions
+  - `ml-dataset-time`: verifies that the levels have expected time dimension, if any
 - Now supporting xcube multi-level datasets `*.levels`:
   - Added xcube plugin processor `"xcube/multi-level-dataset"` that is used
     inside the predefined xcube configurations "all" and "recommended".
diff --git a/docs/rule-ref.md b/docs/rule-ref.md
index d9aaf0b..bb4219d 100644
--- a/docs/rule-ref.md
+++ b/docs/rule-ref.md
@@ -114,6 +114,27 @@ Latitude and longitude coordinates and dimensions should be called 'lat' and 'lo
 
 Contained in:  `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt:
 
+### :material-lightbulb: `ml-dataset-meta`
+
+Multi-level datasets should provide '.zlevels' meta information file and if so, it should be consistent.
+[:material-information-variant:](https://xcube.readthedocs.io/en/latest/mldatasets.html#the-xcube-levels-format)
+
+Contained in:  `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt:
+
+### :material-bug: `ml-dataset-time`
+
+The `time` dimension of multi-level datasets should use a chunk size of 1. This allows for faster image tile generation for visualisation.
+[:material-information-variant:](https://xcube.readthedocs.io/en/latest/mldatasets.html#definition)
+
+Contained in:  `all`-:material-lightning-bolt: `recommended`-:material-alert:
+
+### :material-bug: `ml-dataset-xy`
+
+Multi-level dataset levels should provide spatial resolutions decreasing by powers of two.
+[:material-information-variant:](https://xcube.readthedocs.io/en/latest/mldatasets.html#definition)
+
+Contained in:  `all`-:material-lightning-bolt: `recommended`-:material-lightning-bolt:
+
 ### :material-bug: `single-grid-mapping`
 
 A single grid mapping shall be used for all spatial data variables of a datacube.
diff --git a/mkruleref.py b/mkruleref.py
index ebd1a31..6e2d396 100644
--- a/mkruleref.py
+++ b/mkruleref.py
@@ -1,4 +1,5 @@
 from xrlint.plugin import Plugin
+from xrlint.rule import RuleConfig
 
 # for icons, see
 # https://squidfunk.github.io/mkdocs-material/reference/icons-emojis/
@@ -39,7 +40,7 @@ def write_rule_ref_page():
 
 
 def write_plugin_rules(stream, plugin: Plugin):
-    configs = plugin.configs
+    config_rules = get_plugin_rule_configs(plugin)
     for rule_id in sorted(plugin.rules.keys()):
         rule_meta = plugin.rules[rule_id].meta
         stream.write(
@@ -51,9 +52,8 @@ def write_plugin_rules(stream, plugin: Plugin):
         stream.write("\n\n")
         # List the predefined configurations that contain the rule
         stream.write("Contained in: ")
-        for config_id in sorted(configs.keys()):
-            config = configs[config_id]
-            rule_configs = config.rules or {}
+        for config_id in sorted(config_rules.keys()):
+            rule_configs = config_rules[config_id]
             rule_config = rule_configs.get(rule_id) or rule_configs.get(
                 f"{plugin.meta.name}/{rule_id}"
             )
@@ -62,5 +62,21 @@ def write_plugin_rules(stream, plugin: Plugin):
         stream.write("\n\n")
 
 
+def get_plugin_rule_configs(plugin):
+    configs = plugin.configs
+    config_rules: dict[str, dict[str, RuleConfig]] = {}
+    for config_name, config_list in configs.items():
+        # note, here we assume most plugins configure their rules
+        # in one dedicated config object only. However, this is not
+        # the general case as file patterns may be used to make the
+        # rules configurations specific.
+        rule_configs = {}
+        for config in config_list:
+            if config.rules:
+                rule_configs.update(config.rules)
+        config_rules[config_name] = rule_configs
+    return config_rules
+
+
 if __name__ == "__main__":
     write_rule_ref_page()
diff --git a/tests/plugins/xcube/helpers.py b/tests/plugins/xcube/helpers.py
index f397500..181f0e2 100644
--- a/tests/plugins/xcube/helpers.py
+++ b/tests/plugins/xcube/helpers.py
@@ -3,70 +3,95 @@
 import numpy as np
 import xarray as xr
 
+from xrlint.plugins.xcube.util import LevelsMeta, attach_dataset_level_infos
 
-def make_cube_levels(nx: int, ny: int, nt: int, nl: int) -> list[xr.Dataset]:
-    return [
+
+def make_cube_levels(
+    nl: int,
+    nx: int,
+    ny: int,
+    nt: int | None = None,
+    meta: LevelsMeta | None = None,
+    force_infos: bool = False,
+) -> list[xr.Dataset]:
+    levels = [
         make_cube(math.ceil(nx >> level), math.ceil(ny >> level), nt)
         for level in range(nl)
     ]
+    if meta is not None or force_infos:
+        attach_dataset_level_infos(
+            [(ds, f"{i}.zarr") for i, ds in enumerate(levels)], meta=meta
+        )
+    return levels
 
 
-def make_cube(nx: int, ny: int, nt: int) -> xr.Dataset:
+def make_cube(nx: int, ny: int, nt: int | None = None) -> xr.Dataset:
     """Make an in-memory dataset that should pass all xcube rules.
 
     Args:
         nx: length of the lon-dimension
         ny: length of the lat-dimension
-        nt: length of the time-dimension
+        nt: length of the time-dimension, optional
 
     Returns:
         an in-memory dataset with one 3-d data variable "chl"
-            with dimensions "time", "lat", "lon".
+            with dimensions ["time",] "lat", "lon".
     """
+    x_attrs = dict(
+        long_name="longitude",
+        standard_name="longitude",
+        units="degrees_east",
+    )
+    y_attrs = dict(
+        long_name="latitude",
+        standard_name="latitude",
+        units="degrees_north",
+    )
+
     dx = 180.0 / nx
     dy = 90.0 / ny
-    return xr.Dataset(
-        coords=dict(
-            lon=xr.DataArray(
-                np.linspace(-180 + dx, 180 - dx, nx),
-                dims="lon",
-                attrs=dict(
-                    long_name="longitude",
-                    standard_name="longitude",
-                    units="degrees_east",
-                ),
-            ),
-            lat=xr.DataArray(
-                np.linspace(-90 + dy, 90 - dy, ny),
-                dims="lat",
-                attrs=dict(
-                    long_name="latitude",
-                    standard_name="latitude",
-                    units="degrees_north",
-                ),
+    x_data = np.linspace(-180 + dx, 180 - dx, nx)
+    y_data = np.linspace(-90 + dy, 90 - dy, ny)
+
+    chl_attrs = dict(
+        long_name="chlorophyll concentration",
+        standard_name="chlorophyll_concentration",
+        units="mg/m^3",
+        _FillValue=0,
+    )
+    chl_chunks = dict(lat=min(ny, 90), lon=min(nx, 90))
+
+    ds_attrs = dict(title="Chlorophyll")
+
+    coords = dict(
+        lon=xr.DataArray(x_data, dims="lon", attrs=x_attrs),
+        lat=xr.DataArray(y_data, dims="lat", attrs=y_attrs),
+    )
+
+    if nt is None:
+        return xr.Dataset(
+            data_vars=dict(
+                chl=xr.DataArray(
+                    np.zeros((ny, nx)), dims=["lat", "lon"], attrs=chl_attrs
+                ).chunk(**chl_chunks),
             ),
-            time=xr.DataArray(
-                range(nt),
-                dims="time",
-                attrs=dict(
-                    long_name="time",
-                    standard_name="time",
-                    units="days since 2024-06-10:12:00:00 utc",
-                    calendar="gregorian",
-                ),
+            coords=coords,
+            attrs=ds_attrs,
+        )
+    else:
+        time_attrs = dict(
+            long_name="time",
+            standard_name="time",
+            units="days since 2024-06-10:12:00:00 utc",
+            calendar="gregorian",
+        )
+        coords.update(time=xr.DataArray(range(nt), dims="time", attrs=time_attrs))
+        return xr.Dataset(
+            data_vars=dict(
+                chl=xr.DataArray(
+                    np.zeros((nt, ny, nx)), dims=["time", "lat", "lon"], attrs=chl_attrs
+                ).chunk(time=1, **chl_chunks),
             ),
-        ),
-        data_vars=dict(
-            chl=xr.DataArray(
-                np.zeros((nt, ny, nx)),
-                dims=["time", "lat", "lon"],
-                attrs=dict(
-                    long_name="chlorophyll concentration",
-                    standard_name="chlorophyll_concentration",
-                    units="mg/m^3",
-                    _FillValue=0,
-                ),
-            ).chunk(time=1, lat=min(ny, 90), lon=min(nx, 90)),
-        ),
-        attrs=dict(),
-    )
+            coords=coords,
+            attrs=ds_attrs,
+        )
diff --git a/tests/plugins/xcube/processors/test_mldataset.py b/tests/plugins/xcube/processors/test_mldataset.py
index 8889f6c..f0511a6 100644
--- a/tests/plugins/xcube/processors/test_mldataset.py
+++ b/tests/plugins/xcube/processors/test_mldataset.py
@@ -7,10 +7,10 @@
 
 from tests.plugins.xcube.helpers import make_cube_levels
 from xrlint.plugins.xcube.constants import ML_INFO_ATTR
-from xrlint.plugins.xcube.processors.mldataset import (
-    MultiLevelDatasetProcessor,
+from xrlint.plugins.xcube.processors.mldataset import MultiLevelDatasetProcessor
+from xrlint.plugins.xcube.util import (
     LevelInfo,
-    MultiLevelDatasetMeta,
+    LevelsMeta,
 )
 from xrlint.result import Message
 
@@ -45,7 +45,7 @@ def setUp(self):
         self.fs.mkdir(self.levels_dir)
 
         level_datasets = make_cube_levels(
-            self.x_size, self.y_size, self.time_size, self.num_levels
+            self.num_levels, self.x_size, self.y_size, self.time_size
         )
         for level, dataset in enumerate(level_datasets):
             dataset.to_zarr(f"{self.levels_dir}/{level}.zarr", write_empty_chunks=False)
@@ -80,7 +80,7 @@ def assert_levels_ok(
             self.assertEqual(self.num_levels, len(level_info.datasets))
             meta = level_info.meta
             if expect_meta:
-                self.assertIsInstance(meta, MultiLevelDatasetMeta)
+                self.assertIsInstance(meta, LevelsMeta)
                 self.assertEqual("1.0", meta.version)
                 self.assertEqual(self.num_levels, meta.num_levels)
                 self.assertEqual(False, meta.use_saved_levels)
diff --git a/tests/plugins/xcube/rules/test_ml_dataset_meta.py b/tests/plugins/xcube/rules/test_ml_dataset_meta.py
new file mode 100644
index 0000000..5ad27a0
--- /dev/null
+++ b/tests/plugins/xcube/rules/test_ml_dataset_meta.py
@@ -0,0 +1,87 @@
+import xarray as xr
+from tests.plugins.xcube.helpers import make_cube_levels
+from xrlint.plugins.xcube.rules.ml_dataset_meta import MLDatasetMeta
+from xrlint.plugins.xcube.util import (
+    LevelsMeta,
+    set_dataset_level_info,
+    LevelInfo,
+    get_dataset_level_info,
+)
+from xrlint.testing import RuleTest, RuleTester
+
+
+def _replace_meta(dataset: xr.Dataset, meta: LevelsMeta) -> xr.Dataset:
+    dataset = dataset.copy()
+    old_level_info = get_dataset_level_info(dataset)
+    new_level_info = LevelInfo(
+        level=old_level_info.level,
+        num_levels=old_level_info.num_levels,
+        datasets=old_level_info.datasets,
+        meta=meta,
+    )
+    set_dataset_level_info(dataset, new_level_info)
+    return dataset
+
+
+levels_with_meta = make_cube_levels(
+    4,
+    720,
+    360,
+    meta=LevelsMeta(
+        version="1.0",
+        num_levels=4,
+        use_saved_levels=True,
+        agg_methods={"chl": "mean"},
+    ),
+)
+
+valid_dataset_0 = levels_with_meta[0]
+valid_dataset_1 = levels_with_meta[1]
+valid_dataset_2 = levels_with_meta[2]
+valid_dataset_3 = xr.Dataset()
+
+levels_wo_meta = make_cube_levels(4, 720, 360, force_infos=True)
+invalid_dataset_0 = levels_wo_meta[0]
+invalid_dataset_1 = _replace_meta(
+    levels_wo_meta[0].copy(),
+    meta=LevelsMeta(
+        version="2.0",  # error: != "1.x"
+        num_levels=0,  # error: < 1
+        # error: missing use_saved_levels=False
+        # error: missing agg_methods={"chl": "mean"}
+    ),
+)
+invalid_dataset_2 = _replace_meta(
+    levels_wo_meta[0],
+    meta=LevelsMeta(
+        version="1.0",  # ok
+        num_levels=3,  # error: != level_info.num_levels
+    ),
+)
+
+invalid_dataset_3 = _replace_meta(
+    levels_wo_meta[0],
+    meta=LevelsMeta(
+        version="1.0",  # ok
+        num_levels=4,  # ok
+        use_saved_levels=False,  # ok
+        agg_methods={"tsm": "median"},  # error: where is "chl"?
+    ),
+)
+
+MLDatasetMetaTest = RuleTester.define_test(
+    "ml-dataset-meta",
+    MLDatasetMeta,
+    valid=[
+        RuleTest(dataset=valid_dataset_0),
+        RuleTest(dataset=valid_dataset_1),
+        RuleTest(dataset=valid_dataset_2),
+        RuleTest(dataset=valid_dataset_3),
+    ],
+    invalid=[
+        RuleTest(dataset=invalid_dataset_0),
+        RuleTest(dataset=invalid_dataset_1),
+        RuleTest(dataset=invalid_dataset_2),
+        RuleTest(dataset=invalid_dataset_3),
+    ],
+)
diff --git a/tests/plugins/xcube/rules/test_ml_dataset_time.py b/tests/plugins/xcube/rules/test_ml_dataset_time.py
new file mode 100644
index 0000000..8d3d6f4
--- /dev/null
+++ b/tests/plugins/xcube/rules/test_ml_dataset_time.py
@@ -0,0 +1,39 @@
+import xarray as xr
+from tests.plugins.xcube.helpers import make_cube_levels
+from xrlint.plugins.xcube.rules.ml_dataset_time import MLDatasetTime
+from xrlint.plugins.xcube.util import (
+    LevelsMeta,
+)
+from xrlint.testing import RuleTest, RuleTester
+
+
+meta = LevelsMeta(
+    version="1.0",
+    num_levels=4,
+    use_saved_levels=True,
+    agg_methods={"chl": "mean"},
+)
+levels_with_time = make_cube_levels(4, 720, 360, nt=6, meta=meta)
+levels_wo_time = make_cube_levels(4, 720, 360, meta=meta)
+
+valid_dataset_0 = levels_with_time[0]
+valid_dataset_1 = levels_with_time[1]
+valid_dataset_2 = levels_wo_time[0]
+valid_dataset_3 = xr.Dataset()
+
+invalid_dataset_0 = levels_with_time[0].copy()
+invalid_dataset_0["chl"] = invalid_dataset_0["chl"].chunk(time=3)
+
+MLDatasetTimeTest = RuleTester.define_test(
+    "ml-dataset-time",
+    MLDatasetTime,
+    valid=[
+        RuleTest(dataset=valid_dataset_0),
+        RuleTest(dataset=valid_dataset_1),
+        RuleTest(dataset=valid_dataset_2),
+        RuleTest(dataset=valid_dataset_3),
+    ],
+    invalid=[
+        RuleTest(dataset=invalid_dataset_0),
+    ],
+)
diff --git a/tests/plugins/xcube/rules/test_ml_dataset_xy.py b/tests/plugins/xcube/rules/test_ml_dataset_xy.py
new file mode 100644
index 0000000..6faa58a
--- /dev/null
+++ b/tests/plugins/xcube/rules/test_ml_dataset_xy.py
@@ -0,0 +1,57 @@
+import xarray as xr
+from tests.plugins.xcube.helpers import make_cube_levels
+from xrlint.plugins.xcube.rules.ml_dataset_xy import MLDatasetXY
+from xrlint.plugins.xcube.util import (
+    LevelsMeta,
+    get_dataset_level_info,
+)
+from xrlint.testing import RuleTest, RuleTester
+
+
+meta = LevelsMeta(
+    version="1.0",
+    num_levels=4,
+    use_saved_levels=True,
+    agg_methods={"chl": "mean"},
+)
+levels = make_cube_levels(4, 720, 360, nt=3, meta=meta)
+
+valid_dataset_0 = levels[0]
+valid_dataset_1 = levels[1]
+valid_dataset_2 = levels[2]
+valid_dataset_3 = levels[3]
+valid_dataset_4 = xr.Dataset()
+
+levels = make_cube_levels(4, 720, 360, meta=meta)
+valid_dataset_5 = levels[2]
+level_info = get_dataset_level_info(valid_dataset_5)
+for ds, _ in level_info.datasets:
+    # remove spatial vars
+    del ds["chl"]
+
+levels = make_cube_levels(4, 720, 360, meta=meta)
+invalid_dataset_0 = levels[2].copy()
+# simulate resolution mismatch by exchanging 2 levels
+level_info = get_dataset_level_info(invalid_dataset_0)
+level_datasets = level_info.datasets
+level_0_dataset = level_datasets[0]
+level_1_dataset = level_datasets[1]
+level_info.datasets[0] = level_1_dataset
+level_info.datasets[1] = level_0_dataset
+
+
+MLDatasetXYTest = RuleTester.define_test(
+    "ml-dataset-xy",
+    MLDatasetXY,
+    valid=[
+        RuleTest(dataset=valid_dataset_0),
+        RuleTest(dataset=valid_dataset_1),
+        RuleTest(dataset=valid_dataset_2),
+        RuleTest(dataset=valid_dataset_3),
+        RuleTest(dataset=valid_dataset_4),
+        RuleTest(dataset=valid_dataset_5),
+    ],
+    invalid=[
+        RuleTest(dataset=invalid_dataset_0),
+    ],
+)
diff --git a/tests/plugins/xcube/test_plugin.py b/tests/plugins/xcube/test_plugin.py
index a7cd0c6..e469d74 100644
--- a/tests/plugins/xcube/test_plugin.py
+++ b/tests/plugins/xcube/test_plugin.py
@@ -15,7 +15,8 @@ def test_rules_complete(self):
                 "increasing-time",
                 "lat-lon-naming",
                 "ml-dataset-meta",
-                "ml-dataset-resolution",
+                "ml-dataset-time",
+                "ml-dataset-xy",
                 "single-grid-mapping",
                 "time-naming",
             },
diff --git a/xrlint/plugins/xcube/__init__.py b/xrlint/plugins/xcube/__init__.py
index 3a1a917..0c008a9 100644
--- a/xrlint/plugins/xcube/__init__.py
+++ b/xrlint/plugins/xcube/__init__.py
@@ -38,8 +38,9 @@ def export_plugin() -> Plugin:
                     "xcube/grid-mapping-naming": "warn",
                     "xcube/increasing-time": "error",
                     "xcube/lat-lon-naming": "error",
-                    "xcube/ml-dataset-meta": "warn",
-                    "xcube/ml-dataset-resolution": "error",
+                    "xcube/ml-dataset-meta": "error",
+                    "xcube/ml-dataset-time": "warn",
+                    "xcube/ml-dataset-xy": "error",
                     "xcube/single-grid-mapping": "error",
                     "xcube/time-naming": "error",
                 },
diff --git a/xrlint/plugins/xcube/processors/mldataset.py b/xrlint/plugins/xcube/processors/mldataset.py
index 67580a2..fc44eaf 100644
--- a/xrlint/plugins/xcube/processors/mldataset.py
+++ b/xrlint/plugins/xcube/processors/mldataset.py
@@ -12,10 +12,9 @@
 )
 from xrlint.plugins.xcube.plugin import plugin
 from xrlint.plugins.xcube.util import (
-    set_dataset_level_info,
-    LevelInfo,
-    MultiLevelDatasetMeta,
+    LevelsMeta,
     norm_path,
+    attach_dataset_level_infos,
 )
 from xrlint.processor import ProcessorOp
 from xrlint.result import Message
@@ -42,7 +41,7 @@ def preprocess(
         meta = None
         if ML_META_FILENAME in file_names:
             with fs.open(f"{fs_path}/{ML_META_FILENAME}") as stream:
-                meta = MultiLevelDatasetMeta.from_value(json.load(stream))
+                meta = LevelsMeta.from_value(json.load(stream))
 
         # check for optional ".0.link" that locates level 0 somewhere else
         level_0_path = None
@@ -59,16 +58,7 @@ def preprocess(
             level_dataset = xr.open_dataset(level_path, engine=engine, **opener_options)
             level_datasets.append((level_dataset, level_path))
 
-        for level, (level_dataset, _) in enumerate(level_datasets):
-            set_dataset_level_info(
-                level_dataset,
-                LevelInfo(
-                    level=level,
-                    num_levels=num_levels,
-                    meta=meta,
-                    datasets=level_datasets,
-                ),
-            )
+        attach_dataset_level_infos(level_datasets, meta=meta)
 
         return level_datasets
 
diff --git a/xrlint/plugins/xcube/rules/ml_dataset_meta.py b/xrlint/plugins/xcube/rules/ml_dataset_meta.py
index a77c1c1..7d19ab3 100644
--- a/xrlint/plugins/xcube/rules/ml_dataset_meta.py
+++ b/xrlint/plugins/xcube/rules/ml_dataset_meta.py
@@ -18,7 +18,7 @@
         "https://xcube.readthedocs.io/en/latest/mldatasets.html#the-xcube-levels-format"
     ),
 )
-class MultiLevelDatasetMeta(RuleOp):
+class MLDatasetMeta(RuleOp):
     def dataset(self, ctx: RuleContext, node: DatasetNode):
         level_info = get_dataset_level_info(node.dataset)
         if level_info is None:
@@ -42,7 +42,7 @@ def dataset(self, ctx: RuleContext, node: DatasetNode):
         if not meta.version.startswith("1."):
             ctx.report(f"Unsupported {ML_META_FILENAME!r} meta-info version")
 
-        if meta.num_levels < 0:
+        if meta.num_levels <= 0:
             ctx.report(
                 f"Invalid 'num_levels' in {ML_META_FILENAME!r} meta-info:"
                 f" {meta.num_levels}"
diff --git a/xrlint/plugins/xcube/rules/ml_dataset_time.py b/xrlint/plugins/xcube/rules/ml_dataset_time.py
new file mode 100644
index 0000000..0273af4
--- /dev/null
+++ b/xrlint/plugins/xcube/rules/ml_dataset_time.py
@@ -0,0 +1,39 @@
+from xrlint.node import DatasetNode
+from xrlint.plugins.xcube.constants import TIME_NAME
+from xrlint.plugins.xcube.plugin import plugin
+from xrlint.plugins.xcube.util import get_dataset_level_info, is_spatial_var
+from xrlint.rule import RuleContext, RuleOp
+from xrlint.util.formatting import format_seq
+
+
+@plugin.define_rule(
+    "ml-dataset-time",
+    version="1.0.0",
+    type="problem",
+    description=(
+        "The `time` dimension of multi-level datasets should use a chunk size of 1."
+        " This allows for faster image tile generation for visualisation."
+    ),
+    docs_url="https://xcube.readthedocs.io/en/latest/mldatasets.html#definition",
+)
+class MLDatasetTime(RuleOp):
+    def dataset(self, ctx: RuleContext, node: DatasetNode):
+        level_info = get_dataset_level_info(node.dataset)
+        if level_info is None:
+            # ok, this rules applies only to level datasets opened
+            # by the xcube multi-level processor
+            return
+
+        if TIME_NAME not in node.dataset.sizes or node.dataset.sizes[TIME_NAME] <= 1:
+            # ok, no time dimension used or no time extent
+            return
+
+        for var_name, var in node.dataset.data_vars.items():
+            if is_spatial_var(var) and TIME_NAME in var.dims and var.chunks is not None:
+                time_index = var.dims.index(TIME_NAME)
+                time_chunks = var.chunks[time_index]
+                if not all(c == 1 for c in time_chunks):
+                    ctx.report(
+                        f"Variable {var_name!r} uses chunking for {TIME_NAME!r}"
+                        f" that differs from from one: {format_seq(time_chunks)}"
+                    )
diff --git a/xrlint/plugins/xcube/rules/ml_dataset_resolution.py b/xrlint/plugins/xcube/rules/ml_dataset_xy.py
similarity index 94%
rename from xrlint/plugins/xcube/rules/ml_dataset_resolution.py
rename to xrlint/plugins/xcube/rules/ml_dataset_xy.py
index b58c886..863f9a7 100644
--- a/xrlint/plugins/xcube/rules/ml_dataset_resolution.py
+++ b/xrlint/plugins/xcube/rules/ml_dataset_xy.py
@@ -7,7 +7,7 @@
 
 
 @plugin.define_rule(
-    "ml-dataset-resolution",
+    "ml-dataset-xy",
     version="1.0.0",
     type="problem",
     description=(
@@ -16,7 +16,7 @@
     ),
     docs_url="https://xcube.readthedocs.io/en/latest/mldatasets.html#definition",
 )
-class MultiLevelDatasetMeta(RuleOp):
+class MLDatasetXY(RuleOp):
     def dataset(self, ctx: RuleContext, node: DatasetNode):
         level_info = get_dataset_level_info(node.dataset)
         if level_info is None:
@@ -33,7 +33,7 @@ def dataset(self, ctx: RuleContext, node: DatasetNode):
         level_0_dataset, _ = datasets[0]
         l0_size = get_spatial_size(level_0_dataset)
         if l0_size is None:
-            # weird - maybe no data vars?
+            # ok, maybe no spatial data vars?
             return
 
         (x_name, level_0_width), (y_name, level_0_height) = l0_size
diff --git a/xrlint/plugins/xcube/util.py b/xrlint/plugins/xcube/util.py
index 373c75e..cb66758 100644
--- a/xrlint/plugins/xcube/util.py
+++ b/xrlint/plugins/xcube/util.py
@@ -8,7 +8,7 @@
 
 
 @dataclass(frozen=True, kw_only=True)
-class MultiLevelDatasetMeta(MappingConstructible):
+class LevelsMeta(MappingConstructible):
     """The contents of a xcube `.zlevels` meta-info file."""
 
     version: str
@@ -38,7 +38,7 @@ class LevelInfo:
      level dataset and level file path.
      """
 
-    meta: MultiLevelDatasetMeta | None = None
+    meta: LevelsMeta | None = None
     """Content of a `.zlevels` file, if file was found."""
 
 
@@ -50,6 +50,22 @@ def set_dataset_level_info(dataset: xr.Dataset, level_info: LevelInfo):
     dataset.attrs[ML_INFO_ATTR] = level_info
 
 
+def attach_dataset_level_infos(
+    level_datasets: list[tuple[xr.Dataset, str]],
+    meta: LevelsMeta | None = None,
+):
+    for level, (level_dataset, _) in enumerate(level_datasets):
+        set_dataset_level_info(
+            level_dataset,
+            LevelInfo(
+                level=level,
+                num_levels=len(level_datasets),
+                meta=meta,
+                datasets=level_datasets,
+            ),
+        )
+
+
 def is_spatial_var(var: xr.DataArray) -> bool:
     """Return 'True' if `var` looks like a spatial 2+d variable."""
     if var.ndim < 2:

From 60f32af0881dd21503d29cbddac31fb13e5016cd Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Fri, 24 Jan 2025 15:56:24 +0100
Subject: [PATCH 15/17] removed unused fn

---
 xrlint/plugins/xcube/util.py | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/xrlint/plugins/xcube/util.py b/xrlint/plugins/xcube/util.py
index cb66758..2d56d18 100644
--- a/xrlint/plugins/xcube/util.py
+++ b/xrlint/plugins/xcube/util.py
@@ -90,20 +90,6 @@ def get_spatial_size(
     return None
 
 
-def get_spatial_size_for_dims(
-    dataset: xr.Dataset, x_name: str, y_name: str
-) -> tuple[int, int]:
-    """Return (x_size, y_size) for given dataset and dim names."""
-    x_size: int = 0
-    y_size: int = 0
-    for k, v in dataset.sizes.items():
-        if k == x_name:
-            x_size = v
-        elif k == y_name:
-            y_size = v
-    return x_size, y_size
-
-
 def norm_path(level_path: str) -> str:
     parts = level_path.replace("\\", "/").split("/")
     level_path = "/".join(

From 6df703516c44c6bdbf3d991de001d219f3e5189b Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Fri, 24 Jan 2025 16:12:05 +0100
Subject: [PATCH 16/17] try making FS asynchronous

---
 .../xcube/processors/test_mldataset.py        | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/tests/plugins/xcube/processors/test_mldataset.py b/tests/plugins/xcube/processors/test_mldataset.py
index f0511a6..7a9b005 100644
--- a/tests/plugins/xcube/processors/test_mldataset.py
+++ b/tests/plugins/xcube/processors/test_mldataset.py
@@ -4,6 +4,7 @@
 
 import fsspec
 import pytest
+import xarray as xr
 
 from tests.plugins.xcube.helpers import make_cube_levels
 from xrlint.plugins.xcube.constants import ML_INFO_ATTR
@@ -89,15 +90,20 @@ def assert_levels_ok(
             else:
                 self.assertIsNone(meta)
 
-    def test_preprocess(self):
+    def preprocess(self) -> list[tuple[xr.Dataset, str]]:
         processor = MultiLevelDatasetProcessor()
-        datasets = processor.preprocess(self.levels_dir, {})
+        return processor.preprocess(
+            self.levels_dir,
+            {"backend_kwargs": {"storage_options": {"asynchronous": True}}},
+        )
+
+    def test_preprocess(self):
+        datasets = self.preprocess()
         self.assert_levels_ok(datasets, expect_meta=True)
 
     def test_preprocess_no_meta(self):
         self.fs.delete(self.meta_path)
-        processor = MultiLevelDatasetProcessor()
-        datasets = processor.preprocess(self.levels_dir, {})
+        datasets = self.preprocess()
         self.assert_levels_ok(datasets, expect_meta=False)
 
     def test_preprocess_with_link(self):
@@ -108,24 +114,21 @@ def test_preprocess_with_link(self):
         )
         self.fs.delete(f"{self.levels_dir}/0.zarr", recursive=True)
         self.fs.write_text(f"{self.levels_dir}/0.link", f"../{self.levels_name}.zarr")
-        processor = MultiLevelDatasetProcessor()
-        datasets = processor.preprocess(self.levels_dir, {})
+        datasets = self.preprocess()
         self.assert_levels_ok(datasets, expect_meta=True, expect_link=True)
 
     def test_preprocess_fail_empty(self):
         for i in range(self.num_levels):
             self.fs.delete(f"{self.levels_dir}/{i}.zarr", recursive=True)
-        processor = MultiLevelDatasetProcessor()
         with pytest.raises(ValueError, match="empty multi-level dataset"):
-            processor.preprocess(self.levels_dir, {})
+            self.preprocess()
 
     def test_preprocess_fail_missing(self):
         self.fs.delete(f"{self.levels_dir}/1.zarr", recursive=True)
-        processor = MultiLevelDatasetProcessor()
         with pytest.raises(
             ValueError, match="missing dataset for level 1 in multi-level dataset"
         ):
-            processor.preprocess(self.levels_dir, {})
+            self.preprocess()
 
     def test_postprocess(self):
         processor = MultiLevelDatasetProcessor()

From 0955eda3e2f3acaca61da6e24ccbe4f4c1e1c3e7 Mon Sep 17 00:00:00 2001
From: Norman Fomferra <norman.fomferra@brockmann-consult.de>
Date: Fri, 24 Jan 2025 16:34:42 +0100
Subject: [PATCH 17/17] pinned zarr

---
 CHANGES.md                                       | 3 +++
 environment.yml                                  | 2 +-
 pyproject.toml                                   | 2 +-
 tests/plugins/xcube/processors/test_mldataset.py | 9 +++++----
 4 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 43c0291..a4c104e 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -33,6 +33,9 @@
     `xrlint.plugins.<plugin>.plugin` module.
   - `JsonSerializable` now recognizes `dataclass` instances and no longer
     serializes property values that are also default values.
+  - Pinned zarr dependency to be >=2.18, <3 until test
+    `tests.plugins.xcube.processors.test_mldataset.MultiLevelDatasetProcessorTest`
+    is adjusted or fsspec's memory filesystem is updated.
 
 ## Version 0.3.0 (from 2025-01-20)
 
diff --git a/environment.yml b/environment.yml
index 9d45b3f..e5334fc 100644
--- a/environment.yml
+++ b/environment.yml
@@ -24,4 +24,4 @@ dependencies:
   - pandas
   - netcdf4
   - numpy
-  - zarr
+  - zarr >=2.18,<3  # tests fail with zarr 3+
diff --git a/pyproject.toml b/pyproject.toml
index 5097449..5b03f43 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -77,7 +77,7 @@ dev = [
   "netcdf4",
   "numpy",
   "pandas",
-  "zarr",
+  "zarr >=2.18,<3",
 ]
 doc = [
   "mkdocs",
diff --git a/tests/plugins/xcube/processors/test_mldataset.py b/tests/plugins/xcube/processors/test_mldataset.py
index 7a9b005..69005e4 100644
--- a/tests/plugins/xcube/processors/test_mldataset.py
+++ b/tests/plugins/xcube/processors/test_mldataset.py
@@ -15,6 +15,10 @@
 )
 from xrlint.result import Message
 
+# TODO: This tests requires zarr >=2, <3, because the test used fsspec's
+#   memory filesystem, which is not async but zarr wants all filesystems
+#   to be async now.
+
 
 class MultiLevelDatasetProcessorTest(TestCase):
     levels_name = "xrlint-test"
@@ -92,10 +96,7 @@ def assert_levels_ok(
 
     def preprocess(self) -> list[tuple[xr.Dataset, str]]:
         processor = MultiLevelDatasetProcessor()
-        return processor.preprocess(
-            self.levels_dir,
-            {"backend_kwargs": {"storage_options": {"asynchronous": True}}},
-        )
+        return processor.preprocess(self.levels_dir, {})
 
     def test_preprocess(self):
         datasets = self.preprocess()