diff --git a/.pre-commit-hooks.yaml b/.pre-commit-hooks.yaml index 29bab0b..35a4ba0 100644 --- a/.pre-commit-hooks.yaml +++ b/.pre-commit-hooks.yaml @@ -5,3 +5,10 @@ language: python pass_filenames: false always_run: true +- id: pre-executed-nb-never-execute + name: Check pre-executed notebooks + description: Confirm that notebooks that are pre-executed contain "nbsphinx" metadata where "execute"="never" + entry: pre-executed-nb-never-execute + language: python + pass_filenames: true + always_run: true diff --git a/README.md b/README.md index 25612f9..b8931bf 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,20 @@ [![Template](https://img.shields.io/badge/Template-LINCC%20Frameworks%20Python%20Project%20Template-brightgreen)](https://lincc-ppt.readthedocs.io/en/latest/) +## Check template version + +### Motivation + +The [LINCC Frameworks Python Project Template](https://lincc-ppt.readthedocs.io/en/latest/) +is a reference implementation for our team's +python best practices. These best practices change over time (or we adapt to changes in +other systems), and the PPT copier template is updated and released. + +This pre-commit check determines if your current PPT copier template is up-to-date +and provides a warning if it's not. You can still continue with your commit if the +template is stale, but we recommend running the update process. + +### Configuration Add the following to your `.pre-commit-config.yaml` file to check for newer versions of the template: @@ -12,7 +26,7 @@ repos: ... - repo: https://github.com/lincc-frameworks/pre-commit-hooks - rev: v0.1 + rev: v0.1.2 hooks: - id: check-lincc-frameworks-template-version name: Check template version @@ -22,6 +36,8 @@ repos: ... ``` +### Execution + If the template version in the `.copier-answers.yml` file matches the most recent version available, you'll see this: @@ -44,11 +60,79 @@ For more information see the documentation: https://lincc-ppt.readthedocs.io/en/ ``` +## Check pre-executed notebooks + +### Motivation + +Often, tutorial notebooks require large datasets, access to third party APIs, large CPU or GPU requirements. + +In our projects, we want to include these in rendered readthedocs sites, but do NOT want them to be +executed as part of the docs workflow. + +To ensure that the notebooks are not run by the notebook conversion process, you can add the following metadata block to the notebook: + +``` + "nbsphinx": { + "execute": "never" + }, +``` + +The `pre-executed-nb-never-execute` hook was built to check all notebooks inside some `pre_executed` directory +for this metadata block. If it is not present, we add the block. We also confirm that the metadata is well-formatted. + +### Configuration + +Add the following to your `.pre-commit-config.yaml` file to check notebook metadata: + +``` +repos: + +... + - repo: https://github.com/lincc-frameworks/pre-commit-hooks + rev: v0.1.2 + hooks: + - id: pre-executed-nb-never-execute + name: Check pre-executed notebooks + files: ^docs/pre_executed/.*\.ipynb$ + verbose: true + args: + ["docs/pre_executed/",] +... +``` + +### Execution + +If all notebooks are found to be in good order, expect the following output +in your pre-commit logs: + +``` +Check pre-executed notebooks.............................................Passed +``` + +If some notebooks are modified by this hook, the hook will fail, with a message that +includes the paths to the modified notebooks. +``` +Check pre-executed notebooks.............................................Failed +- hook id: pre-executed-nb-never-execute +- exit code: 1 +- files were modified by this hook + +Modified notebook to set nbsphinx.execute='never': docs/pre_executed/intro.ipynb +``` + +Further, if notebooks are malformed, the errors will appear in the pre-commit +logs. + +``` +Check pre-executed notebooks.............................................Failed +- hook id: pre-executed-nb-never-execute +- exit code: 1 + +Expecting property name enclosed in double quotes: line 85 column 1 (char 2195) +``` + ## Attribution This project was automatically generated using the LINCC-Frameworks [python-project-template](https://github.com/lincc-frameworks/python-project-template). -A repository badge was added to show that this project uses the python-project-template, however it's up to -you whether or not you'd like to display it! - -For more information about the project template see the For more information about the project template see the [documentation](https://lincc-ppt.readthedocs.io/en/latest/). +For more information about the project template see the [documentation](https://lincc-ppt.readthedocs.io/en/latest/). diff --git a/src/pre_commit_hooks/__init__.py b/src/pre_commit_hooks/__init__.py index 3741e2f..da0ee7b 100644 --- a/src/pre_commit_hooks/__init__.py +++ b/src/pre_commit_hooks/__init__.py @@ -1,4 +1,5 @@ from ._version import __version__ from .check_template_version import * +from .pre_executed_nb_never_execute import * __all__ = ["__version__"] diff --git a/src/pre_commit_hooks/pre_executed_nb_never_execute.py b/src/pre_commit_hooks/pre_executed_nb_never_execute.py new file mode 100644 index 0000000..d4041ac --- /dev/null +++ b/src/pre_commit_hooks/pre_executed_nb_never_execute.py @@ -0,0 +1,99 @@ +#!/usr/bin/env python3 +""" +Ensure every notebook under docs/pre_executed has metadata: +"nbsphinx": {"execute": "never"} + +This script is safe to run as a pre-commit hook. It will modify notebooks in-place +and print modified file paths. It raises exceptions on hard errors (instead of +return codes). If any notebook has an explicit `nbsphinx.execute` value other than +`"never"`, the script fails without making changes to that notebook. +""" + +from __future__ import annotations + +import json +import logging +import sys +from pathlib import Path + +ROOT = Path(__file__).resolve().parent.parent +TARGET_DIR = ROOT / "docs" / "pre_executed" + + +def _find_notebooks(dirpath: Path) -> list[Path]: + if not dirpath.exists(): + return [] + return sorted(dirpath.glob("*.ipynb")) + + +def ensure_nbsphinx_execute_never(notebook_path: Path) -> bool: + """Return True if the file was modified. + + Raises RuntimeError when the notebook explicitly sets `nbsphinx.execute` + to a value other than "never" (the script should fail with no fix). + """ + with notebook_path.open("r") as fh: + nb = json.load(fh) + + if not isinstance(nb, dict): + raise RuntimeError(f"Unexpected notebook format for {notebook_path}") + + metadata = nb.setdefault("metadata", {}) + nbsphinx = metadata.setdefault("nbsphinx", {}) + + if not isinstance(nbsphinx, dict): + raise RuntimeError( + f"Notebook {notebook_path} has metadata.nbsphinx " + "of type {type(nbsphinx).__name__}; expected a mapping" + ) + + if "execute" in nbsphinx: + if nbsphinx["execute"] == "never": + return False + raise RuntimeError( + f"Notebook {notebook_path} has " + "metadata.nbsphinx.execute={nbsphinx.get('execute')!r}; expected 'never'" + ) + + nbsphinx["execute"] = "never" + + with notebook_path.open("w") as fh: + json.dump(nb, fh, indent=1, sort_keys=True, ensure_ascii=False) + + logging.warning("Modified notebook to set nbsphinx.execute='never': %s", notebook_path) + + return True + + +def main(argv=None) -> int: + """Main method for execution. + + Finds notebooks in the target directory, and scans each for nbsphinx execute metadata. + """ + logging.basicConfig(level=logging.WARNING, format="%(message)s") + + if argv is None and len(sys.argv) > 1: + argv = sys.argv[1] + if argv is None: + argv = TARGET_DIR + + notebooks = _find_notebooks(Path(argv)) + if len(notebooks) == 0: + logging.warning("No notebooks found at path %s.", str(argv)) + return 0 + + # If pre-scan passed, perform modifications safely. + return_code = 0 + for nb in notebooks: + try: + if ensure_nbsphinx_execute_never(nb): + return_code = 1 + except Exception as e: + logging.error(str(e)) + return_code = 1 + + return return_code + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tests/pre_commit_hooks/test_pre_executed_nb_never_execute.py b/tests/pre_commit_hooks/test_pre_executed_nb_never_execute.py new file mode 100644 index 0000000..8332e98 --- /dev/null +++ b/tests/pre_commit_hooks/test_pre_executed_nb_never_execute.py @@ -0,0 +1,143 @@ +from pre_commit_hooks import pre_executed_nb_never_execute + + +def test_nb_never_metadata(caplog, capsys, tmp_path): + notebook_path = tmp_path / "empty_metadata.ipynb" + with notebook_path.open("w") as fh: + fh.write( + """{ +"metadata": { + "nbsphinx": { + "execute": "never" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} +""" + ) + result = pre_executed_nb_never_execute.main(tmp_path) + assert result == 0 + + captured = caplog.text + assert captured == "" + + captured = capsys.readouterr().out + assert captured == "" + + +def test_nb_empty_metadata(caplog, tmp_path): + notebook_path = tmp_path / "empty_metadata.ipynb" + with notebook_path.open("w") as fh: + fh.write( + """{ +"metadata": { + }, + "nbformat": 4, + "nbformat_minor": 5 +} +""" + ) + result = pre_executed_nb_never_execute.main(tmp_path) + assert result == 1 + + captured = caplog.text + assert "Modified notebook to set" in captured + assert "empty_metadata.ipynb" in captured + + +def test_nb_no_metadata(caplog, tmp_path): + notebook_path = tmp_path / "no_metadata.ipynb" + with notebook_path.open("w") as fh: + fh.write( + """{ + "nbformat": 4, + "nbformat_minor": 5 +} +""" + ) + result = pre_executed_nb_never_execute.main(tmp_path) + assert result == 1 + + captured = caplog.text + assert "Modified notebook to set" in captured + assert "no_metadata.ipynb" in captured + + +def test_nb_always_executes(caplog, tmp_path): + notebook_path = tmp_path / "always_execute.ipynb" + with notebook_path.open("w") as fh: + fh.write( + """{ +"metadata": { + "nbsphinx": { + "execute": "always" + } + } +} +""" + ) + result = pre_executed_nb_never_execute.main(tmp_path) + assert result == 1 + + captured = caplog.text + assert "always_execute.ipynb" in captured + assert "expected 'never'" in captured + + +def test_nb_metadata_wrong_type(caplog, tmp_path): + notebook_path = tmp_path / "wrong_metadata.ipynb" + with notebook_path.open("w") as fh: + fh.write( + """{ +"metadata": { + "nbsphinx": "never" + } +} +""" + ) + result = pre_executed_nb_never_execute.main(tmp_path) + assert result == 1 + + captured = caplog.text + assert "wrong_metadata.ipynb" in captured + assert "expected a mapping" in captured + + +def test_nb_not_a_nb(caplog, tmp_path): + notebook_path = tmp_path / "not_a_nb.ipynb" + with notebook_path.open("w") as fh: + fh.write( + """# README +Hey man, this is just a readme. +""" + ) + result = pre_executed_nb_never_execute.main(tmp_path) + assert result == 1 + + captured = caplog.text + assert "ERROR" in captured + + +def test_nb_no_argument(caplog): + result = pre_executed_nb_never_execute.main() + assert result == 0 + + captured = caplog.text + assert "No notebooks found at path" in captured + + +def test_nb_no_notebooks(caplog, tmp_path): + result = pre_executed_nb_never_execute.main(tmp_path) + assert result == 0 + + captured = caplog.text + assert "No notebooks found at path" in captured + + +def test_nb_dir_does_not_exist(caplog, tmp_path): + result = pre_executed_nb_never_execute.main(tmp_path / "not_there") + assert result == 0 + + captured = caplog.text + assert "No notebooks found at path" in captured