OpenFreeEnergy · hannahbaumann · Dec 18, 2025 · Dec 18, 2025 · Dec 18, 2025 · Dec 19, 2025
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -52,6 +52,33 @@ jobs:
         run: |
           python -m pip install --no-deps .
 
+      - name: Cache Pooch data
+        uses: actions/cache@v4
+        with:
+          path: |
+            # Linux cache location
+            ~/.cache/openfe_analysis
+            # macOS cache location
+            ~/Library/Caches/openfe_analysis
+          key: pooch-${{ matrix.os }}-v2
+
+      - name: "Download Zenodo data"
+        run: |
+          python - <<'EOF'
+          import pooch
+          from openfe_analysis.tests.conftest import ZENODO_DOI, ZENODO_FILES
+
+          zenodo = pooch.create(
+              path=pooch.os_cache('openfe_analysis'),
+              base_url=ZENODO_DOI,
+              registry=ZENODO_FILES,
+          )
+
+          for fname in ZENODO_FILES:
+              zenodo.fetch(fname, processor=pooch.Untar())
+
+          EOF
+
       - name: "Test imports"
         run: |
           python -Ic "import openfe_analysis; print(openfe_analysis.__version__)"

diff --git a/src/openfe_analysis/__init__.py b/src/openfe_analysis/__init__.py
@@ -3,6 +3,6 @@
 from .reader import FEReader
 from .transformations import (
     Aligner,
-    Minimiser,
+    ClosestImageShift,
     NoJump,
 )
diff --git a/src/openfe_analysis/reader.py b/src/openfe_analysis/reader.py
@@ -1,4 +1,5 @@
-from typing import Optional
+import pathlib
+from typing import Literal, Optional
 
 import netCDF4 as nc
 import numpy as np
@@ -52,16 +53,20 @@ def _determine_iteration_dt(dataset) -> float:
 
 
 class FEReader(ReaderBase):
-    """A MDAnalysis Reader for NetCDF files created by
+    """
+    MDAnalysis Reader for NetCDF files created by
     `openmmtools.multistate.MultiStateReporter`
 
-    Looks along a multistate NetCDF file along one of two axes:
-      - constant state/lambda (varying replica)
-      - constant replica (varying lambda)
+    Provides a 1D trajectory view along either:
+
+    - constant Hamiltonian state (`view="state"`)
+    - constant replica (`view="replica"`)
+
+    selected via the `index` argument.
     """
 
-    _state_id: Optional[int]
-    _replica_id: Optional[int]
+    _index: Optional[int]
+    _view: Optional[str]
     _frame_index: int
     _dataset: nc.Dataset
     _dataset_owner: bool
@@ -70,35 +75,27 @@ class FEReader(ReaderBase):
 
     units = {"time": "ps", "length": "nanometer"}
 
-    def __init__(self, filename, convert_units=True, state_id=None, replica_id=None, **kwargs):
+    def __init__(
+        self,
+        filename: str | pathlib.Path | nc.Dataset,
+        *,
+        index: int,
+        view: Literal["state", "replica"] = "state",
+        convert_units: bool = True,
+        **kwargs,
+    ):
         """
         Parameters
         ----------
         filename : pathlike or nc.Dataset
-          path to the .nc file
+            Path to the .nc file or an open Dataset.
+        index : int
+            Index of the state or replica to extract. May be negative.
+        view : {"state", "replica"}, default "state"
+            Whether `index` refers to a Hamiltonian state or a replica.
         convert_units : bool
-          convert positions to Angstrom
-        state_id : Optional[int]
-          The Hamiltonian state index to extract. Must be defined if
-          ``replica_id`` is not defined. May be negative (see notes below).
-        replica_id : Optional[int]
-          The replica index to extract. Must be defined if ``state_id``
-          is not defined. May be negative (see notes below).
-
-        Notes
-        -----
-        A negative index may be passed to either ``state_id`` or
-        ``replica_id``. This will be interpreted as indexing in reverse
-        starting from the last state/replica. For example, passing a
-        value of -2 for ``replica_id`` will select the before last replica.
+            Convert positions to Angstrom.
         """
-        if not ((state_id is None) ^ (replica_id is None)):
-            raise ValueError(
-                "Specify one and only one of state or replica, "
-                f"got state id={state_id} "
-                f"replica_id={replica_id}"
-            )
-
         super().__init__(filename, convert_units, **kwargs)
 
         if isinstance(filename, nc.Dataset):
@@ -108,29 +105,37 @@ def __init__(self, filename, convert_units=True, state_id=None, replica_id=None,
             self._dataset = nc.Dataset(filename)
             self._dataset_owner = True
 
-        # Handle the negative ID case
-        if state_id is not None and state_id < 0:
-            state_id = range(self._dataset.dimensions["state"].size)[state_id]
+        if view not in {"state", "replica"}:
+            raise ValueError(f"View must be 'state' or 'replica', got {view}")
 
-        if replica_id is not None and replica_id < 0:
-            replica_id = range(self._dataset.dimensions["replica"].size)[replica_id]
+        self._view = view
 
-        self._state_id = state_id
-        self._replica_id = replica_id
+        # Handle the negative ID case
+        if view == "state":
+            size = self._dataset.dimensions["state"].size
+        else:
+            size = self._dataset.dimensions["replica"].size
+
+        self._index = index % size
 
         self._n_atoms = self._dataset.dimensions["atom"].size
         self.ts = Timestep(self._n_atoms)
         self._frames = _determine_position_indices(self._dataset)
         # The MDAnalysis trajectory "dt" is the iteration dt
         # multiplied by the number of iterations between frames.
         self._dt = _determine_iteration_dt(self._dataset) * np.diff(self._frames)[0]
+        self._frame_index = -1
         self._read_frame(0)
 
     @staticmethod
     def _format_hint(thing) -> bool:
         # can pass raw nc datasets through to reduce open/close operations
         return isinstance(thing, nc.Dataset)
 
+    @property
+    def index(self) -> int:
+        return self._index
+
     @property
     def n_atoms(self) -> int:
         return self._n_atoms
@@ -139,6 +144,10 @@ def n_atoms(self) -> int:
     def n_frames(self) -> int:
         return len(self._frames)
 
+    @property
+    def view(self) -> str:
+        return self._view
+
     @staticmethod
     def parse_n_atoms(filename, **kwargs) -> int:
         with nc.Dataset(filename) as ds:
@@ -153,17 +162,19 @@ def _read_next_timestep(self, ts=None) -> Timestep:
     def _read_frame(self, frame: int) -> Timestep:
         self._frame_index = frame
 
-        if self._state_id is not None:
+        frame = self._frames[self._frame_index]
+
+        if self._view == "state":
             rep = multistate._state_to_replica(
-                self._dataset, self._state_id, self._frames[self._frame_index]
+                self._dataset,
+                self._index,
+                frame,
             )
         else:
-            rep = self._replica_id
+            rep = self._index
 
-        pos = multistate._replica_positions_at_frame(
-            self._dataset, rep, self._frames[self._frame_index]
-        )
-        dim = multistate._get_unitcell(self._dataset, rep, self._frames[self._frame_index])
+        pos = multistate._replica_positions_at_frame(self._dataset, rep, frame)
+        dim = multistate._get_unitcell(self._dataset, rep, frame)
 
         if pos is None:
             errmsg = (
@@ -193,5 +204,7 @@ def _reopen(self):
         self._frame_index = -1
 
     def close(self):
-        if self._dataset_owner:
-            self._dataset.close()
+        if self._dataset is not None:
+            if self._dataset_owner:
+                self._dataset.close()
+            self._dataset = None