Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
b528ca2
Move some tests to use skipped smaller data
hannahbaumann Jan 16, 2026
a477bc1
Test out zenodo dealings
hannahbaumann Jan 16, 2026
ad84082
Try to improbe speed
hannahbaumann Jan 16, 2026
8ba8087
Try removing locking
hannahbaumann Jan 16, 2026
ead7951
Run downloads before the testing to have a single download for all th…
hannahbaumann Jan 19, 2026
f898a35
add import pooch
hannahbaumann Jan 19, 2026
c675a5c
Test out more
hannahbaumann Jan 19, 2026
88e456d
Ensure datasets get closed
hannahbaumann Jan 19, 2026
73a8e4d
Move to per test download again
hannahbaumann Jan 19, 2026
43aaca2
Remove commented out lines
hannahbaumann Jan 21, 2026
c165525
Test out adding an extra slash
hannahbaumann Jan 21, 2026
5f17770
Switch to all version doi
hannahbaumann Jan 21, 2026
c28286e
Download url directly
hannahbaumann Jan 21, 2026
197b6ba
Small fix
hannahbaumann Jan 21, 2026
b45390a
Change url
hannahbaumann Jan 21, 2026
1d70936
Add missing s
hannahbaumann Jan 21, 2026
20084c3
Switch to api url
hannahbaumann Jan 21, 2026
a9a8780
Update tests for new results
hannahbaumann Jan 23, 2026
c34c97c
Update conftest
hannahbaumann Jan 26, 2026
0161673
Update to v2
hannahbaumann Jan 26, 2026
9b6ca69
Update tests
hannahbaumann Jan 26, 2026
1d5c849
Update rmsd test, currently large rmsd till rmsd fix comes in
hannahbaumann Jan 26, 2026
f4e88e2
Make last test pass
hannahbaumann Jan 26, 2026
bd0c8ee
Switch to zenodo fetch
hannahbaumann Jan 26, 2026
ba4c912
remove lines
hannahbaumann Jan 26, 2026
157c02f
Update tests with large errors multichain failure
hannahbaumann Jan 27, 2026
98ea023
Apply suggestion from @hannahbaumann
hannahbaumann Jan 28, 2026
c5b2d70
Reuse zenodo specification
hannahbaumann Jan 28, 2026
54576ab
reorder install
hannahbaumann Jan 28, 2026
3aa52a5
Small fix
hannahbaumann Jan 28, 2026
ff6991a
Remove flaky retries
hannahbaumann Jan 28, 2026
7a30f69
Small fix
hannahbaumann Jan 28, 2026
1b2c488
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 29, 2026
2a1d130
Apply review suggestions
hannahbaumann Jan 30, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,33 @@ jobs:
run: |
python -m pip install --no-deps .

- name: Cache Pooch data
uses: actions/cache@v4
with:
path: |
# Linux cache location
~/.cache/openfe_analysis
# macOS cache location
~/Library/Caches/openfe_analysis
key: pooch-${{ matrix.os }}-v2

- name: "Download Zenodo data"
run: |
python - <<'EOF'
import pooch
from openfe_analysis.tests.conftest import ZENODO_DOI, ZENODO_FILES

zenodo = pooch.create(
path=pooch.os_cache('openfe_analysis'),
base_url=ZENODO_DOI,
registry=ZENODO_FILES,
Comment on lines +73 to +74
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

glad you're doing this! it's what I'm moving us toward on the openfe side.

)

for fname in ZENODO_FILES:
zenodo.fetch(fname, processor=pooch.Untar())

EOF

- name: "Test imports"
run: |
python -Ic "import openfe_analysis; print(openfe_analysis.__version__)"
Expand Down
3 changes: 2 additions & 1 deletion environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,9 @@ dependencies:
- pyyaml
# for testing
- coverage
- pooch
- pytest
- pytest-cov
- pytest-xdist
- pytest-rerunfailures
- pip:
- git+https://github.com/fatiando/pooch@main # related to https://github.com/fatiando/pooch/issues/502
6 changes: 4 additions & 2 deletions src/openfe_analysis/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,5 +193,7 @@ def _reopen(self):
self._frame_index = -1

def close(self):
if self._dataset_owner:
self._dataset.close()
if self._dataset is not None:
if self._dataset_owner:
self._dataset.close()
self._dataset = None
157 changes: 79 additions & 78 deletions src/openfe_analysis/rmsd.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,87 +97,88 @@ def gather_rms_data(
"protein_2D_RMSD": [],
}

ds = nc.Dataset(dataset)
n_lambda = ds.dimensions["state"].size

# If you're using a new multistate nc file, you need to account for
# the position skip rate.
if hasattr(ds, "PositionInterval"):
n_frames = len(range(0, ds.dimensions["iteration"].size, ds.PositionInterval))
else:
n_frames = ds.dimensions["iteration"].size

if skip is None:
# find skip that would give ~500 frames of output
# max against 1 to avoid skip=0 case
skip = max(n_frames // 500, 1)

pb = tqdm.tqdm(total=int(n_frames / skip) * n_lambda)

u_top = mda.Universe(pdb_topology)

for i in range(n_lambda):
# cheeky, but we can read the PDB topology once and reuse per universe
# this then only hits the PDB file once for all replicas
u = make_Universe(u_top._topology, ds, state=i)

prot = u.select_atoms("protein and name CA")
ligand = u.select_atoms("resname UNK")

# save coordinates for 2D RMSD matrix
# TODO: Some smart guard to avoid allocating a silly amount of memory?
prot2d = np.empty((len(u.trajectory[::skip]), len(prot), 3), dtype=np.float32)

prot_start = prot.positions
# prot_weights = prot.masses / np.mean(prot.masses)
ligand_start = ligand.positions
ligand_initial_com = ligand.center_of_mass()
ligand_weights = ligand.masses / np.mean(ligand.masses)

this_protein_rmsd = []
this_ligand_rmsd = []
this_ligand_wander = []

for ts_i, ts in enumerate(u.trajectory[::skip]):
pb.update()
# Open the NetCDF file safely using a context manager
with nc.Dataset(dataset) as ds:
n_lambda = ds.dimensions["state"].size

# If you're using a new multistate nc file, you need to account for
# the position skip rate.
if hasattr(ds, "PositionInterval"):
n_frames = len(range(0, ds.dimensions["iteration"].size, ds.PositionInterval))
else:
n_frames = ds.dimensions["iteration"].size

if skip is None:
# find skip that would give ~500 frames of output
# max against 1 to avoid skip=0 case
skip = max(n_frames // 500, 1)

pb = tqdm.tqdm(total=int(n_frames / skip) * n_lambda)

u_top = mda.Universe(pdb_topology)

for i in range(n_lambda):
# cheeky, but we can read the PDB topology once and reuse per universe
# this then only hits the PDB file once for all replicas
u = make_Universe(u_top._topology, ds, state=i)

prot = u.select_atoms("protein and name CA")
ligand = u.select_atoms("resname UNK")

# save coordinates for 2D RMSD matrix
# TODO: Some smart guard to avoid allocating a silly amount of memory?
prot2d = np.empty((len(u.trajectory[::skip]), len(prot), 3), dtype=np.float32)

prot_start = prot.positions
# prot_weights = prot.masses / np.mean(prot.masses)
ligand_start = ligand.positions
ligand_initial_com = ligand.center_of_mass()
ligand_weights = ligand.masses / np.mean(ligand.masses)

this_protein_rmsd = []
this_ligand_rmsd = []
this_ligand_wander = []

for ts_i, ts in enumerate(u.trajectory[::skip]):
pb.update()

if prot:
prot2d[ts_i, :, :] = prot.positions
this_protein_rmsd.append(
rms.rmsd(
prot.positions,
prot_start,
None, # prot_weights,
center=False,
superposition=False,
)
)
if ligand:
this_ligand_rmsd.append(
rms.rmsd(
ligand.positions,
ligand_start,
ligand_weights,
center=False,
superposition=False,
)
)
this_ligand_wander.append(
# distance between start and current ligand position
# ignores PBC, but we've already centered the traj
mda.lib.distances.calc_bonds(ligand.center_of_mass(), ligand_initial_com)
)

if prot:
prot2d[ts_i, :, :] = prot.positions
this_protein_rmsd.append(
rms.rmsd(
prot.positions,
prot_start,
None, # prot_weights,
center=False,
superposition=False,
)
)
# can ignore weights here as it's all Ca
rmsd2d = twoD_RMSD(prot2d, w=None) # prot_weights)
output["protein_RMSD"].append(this_protein_rmsd)
output["protein_2D_RMSD"].append(rmsd2d)
if ligand:
this_ligand_rmsd.append(
rms.rmsd(
ligand.positions,
ligand_start,
ligand_weights,
center=False,
superposition=False,
)
)
this_ligand_wander.append(
# distance between start and current ligand position
# ignores PBC, but we've already centered the traj
mda.lib.distances.calc_bonds(ligand.center_of_mass(), ligand_initial_com)
)

if prot:
# can ignore weights here as it's all Ca
rmsd2d = twoD_RMSD(prot2d, w=None) # prot_weights)
output["protein_RMSD"].append(this_protein_rmsd)
output["protein_2D_RMSD"].append(rmsd2d)
if ligand:
output["ligand_RMSD"].append(this_ligand_rmsd)
output["ligand_wander"].append(this_ligand_wander)

output["time(ps)"] = list(np.arange(len(u.trajectory))[::skip] * u.trajectory.dt)
output["ligand_RMSD"].append(this_ligand_rmsd)
output["ligand_wander"].append(this_ligand_wander)

output["time(ps)"] = list(np.arange(len(u.trajectory))[::skip] * u.trajectory.dt)

return output

Expand Down
49 changes: 31 additions & 18 deletions src/openfe_analysis/tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,49 +1,62 @@
import pathlib
from importlib import resources

import pathlib
import pooch
import pytest

POOCH_CACHE = pooch.os_cache("openfe_analysis")
ZENODO_DOI = "doi:10.5281/zenodo.18378051"

ZENODO_FILES = {
"openfe_analysis_simulation_output.tar.gz": "md5:7f0babaac3dc8f7dd2db63cb79dff00f",
"openfe_analysis_skipped.tar.gz": "md5:ac42219bde9da3641375adf3a9ddffbf",
}

POOCH_CACHE = pathlib.Path(pooch.os_cache("openfe_analysis"))
POOCH_CACHE.mkdir(parents=True, exist_ok=True)

ZENODO_RBFE_DATA = pooch.create(
path=POOCH_CACHE,
base_url="doi:10.5281/zenodo.17916322",
registry={
"openfe_analysis_simulation_output.tar.gz":"md5:09752f2c4e5b7744d8afdee66dbd1414",
"openfe_analysis_skipped.tar.gz": "md5:3840d044299caacc4ccd50e6b22c0880",
},
base_url=ZENODO_DOI,
registry=ZENODO_FILES,
)


def _fetch_and_untar(dirname: str) -> pathlib.Path:
ZENODO_RBFE_DATA.fetch(f"{dirname}.tar.gz", processor=pooch.Untar())
cached_dir = pathlib.Path(f"{POOCH_CACHE}/{dirname}.tar.gz.untar/{dirname}")
return cached_dir


@pytest.fixture(scope="session")
def rbfe_output_data_dir() -> pathlib.Path:
ZENODO_RBFE_DATA.fetch("openfe_analysis_simulation_output.tar.gz", processor=pooch.Untar())
result_dir = pathlib.Path(POOCH_CACHE) / "openfe_analysis_simulation_output.tar.gz.untar/openfe_analysis_simulation_output/"
return result_dir
cached_dir = _fetch_and_untar("openfe_analysis_simulation_output")
return cached_dir


@pytest.fixture(scope="session")
def rbfe_skipped_data_dir() -> pathlib.Path:
ZENODO_RBFE_DATA.fetch("openfe_analysis_skipped.tar.gz", processor=pooch.Untar())
result_dir = pathlib.Path(POOCH_CACHE) / "openfe_analysis_skipped.tar.gz.untar/openfe_analysis_skipped/"
return result_dir
cached_dir = _fetch_and_untar("openfe_analysis_skipped")
return cached_dir


@pytest.fixture(scope="session")
def simulation_nc(rbfe_output_data_dir) -> pathlib.Path:
return rbfe_output_data_dir/"simulation.nc"
return rbfe_output_data_dir / "simulation.nc"


@pytest.fixture(scope="session")
def simulation_skipped_nc(rbfe_skipped_data_dir) -> pathlib.Path:
return rbfe_skipped_data_dir/"simulation.nc"
return rbfe_skipped_data_dir / "simulation.nc"


@pytest.fixture(scope="session")
def hybrid_system_pdb(rbfe_output_data_dir) -> pathlib.Path:
return rbfe_output_data_dir/"hybrid_system.pdb"
return rbfe_output_data_dir / "hybrid_system.pdb"


@pytest.fixture(scope="session")
def hybrid_system_skipped_pdb(rbfe_skipped_data_dir)->pathlib.Path:
return rbfe_skipped_data_dir/"hybrid_system.pdb"
def hybrid_system_skipped_pdb(rbfe_skipped_data_dir) -> pathlib.Path:
return rbfe_skipped_data_dir / "hybrid_system.pdb"


@pytest.fixture(scope="session")
Expand Down
Loading