diff --git a/.gitignore b/.gitignore index 55ee328..775f939 100644 --- a/.gitignore +++ b/.gitignore @@ -70,6 +70,7 @@ instance/ # Sphinx documentation docs/_build/ +.DS_Store # PyBuilder .pybuilder/ diff --git a/README.md b/README.md index 349f9b5..a09d3d4 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,11 @@ biosimdb-interface ================== + +

+ biosimdb logo + biosimdb logo +

+ A repository for extracting and uploading simulation data via a web interface to the PSDI hosted BioSimDB community data-collection. ## Project Status diff --git a/biosimdb_interface/form/extract.py b/biosimdb_interface/form/extract.py index 71f0c01..eeddc64 100644 --- a/biosimdb_interface/form/extract.py +++ b/biosimdb_interface/form/extract.py @@ -33,6 +33,7 @@ def extract_files_validate(top_file, traj_file): schema_path=os.getenv("ENGINE_MAPPING_SCHEMA_PATH", ""), top_file=top_file, traj_file=traj_file, + store_file_metadata=False, # not on webform so exclude ) result = populator.populate() diff --git a/biosimdb_interface/form/upload.py b/biosimdb_interface/form/upload.py index ef39059..7a73cbf 100644 --- a/biosimdb_interface/form/upload.py +++ b/biosimdb_interface/form/upload.py @@ -3,8 +3,10 @@ import glob import json import os +import shutil import tempfile +from biosim_extractor.metadata.filemetadata import files_metadata from flask import current_app, request, session from werkzeug.utils import secure_filename @@ -12,6 +14,57 @@ from .utils import fill_invenio_metadata, form_to_json +def _save_request_files(tmpdir): + """Save uploaded request files into a temporary directory grouped by role. + + Maps the trajectory[] field to trajectory and keeps other field names as roles. + + Args: + tmpdir: Path to the temporary directory where uploaded files are written. + + Returns: + Dictionary mapping file roles to lists of saved file paths. + """ + saved_files = {"topology": [], "trajectory": []} + for field in request.files: + role = "trajectory" if field == "trajectory[]" else field + for file in request.files.getlist(field): + if file.filename: + path = os.path.join(tmpdir, secure_filename(file.filename)) + file.save(path) + saved_files.setdefault(role, []).append(path) + return saved_files + + +def _save_files_and_extract_metadata(tmpdir): + """Save uploaded request files and compute file metadata in one step. + + Args: + tmpdir: Path to the temporary directory where uploaded files are written. + + Returns: + Tuple of: + - saved_files: Dictionary of role to saved file paths. + - file_meta: List of extracted file metadata dictionaries. + """ + saved_files = _save_request_files(tmpdir) + file_meta = files_metadata(saved_files) + return saved_files, file_meta + + +def extract_uploaded_file_metadata(): + """Extract file metadata from the current request's uploaded files.""" + tmpdir = tempfile.mkdtemp(prefix="biosimdb_file_metadata_") + try: + _, file_meta = _save_files_and_extract_metadata(tmpdir) + return file_meta + finally: + for field in request.files: + for file in request.files.getlist(field): + file.stream.seek(0) + shutil.rmtree(tmpdir, ignore_errors=True) + + def _data_collections_upload(metadata_path, files_path): """Upload metadata as a draft PSDI data-collections record. @@ -35,6 +88,40 @@ def _data_collections_upload(metadata_path, files_path): return repository, draft_id +def save_pending_submission(json_form=None): + """Save uploaded files and form data for deferred post-login submission. + + Writes uploaded request files to a new temporary directory, computes file + metadata from those saved files, and stores pending submission state in the + Flask session so submission can resume after OAuth login. + + If ``json_form`` is provided, this function attaches the computed file + metadata under ``json_form["files"]`` and writes the result to + ``simulation_metadata.json`` in the temporary directory. + + Args: + json_form: Optional converted/validated BioSim metadata dictionary to + persist alongside uploaded files. When provided, file metadata is + added before writing. + + Side effects: + session["pending_form_data"]: Set to submitted form data (dict of lists). + session["pending_files_dir"]: Set to temporary directory path containing + uploaded files and optional ``simulation_metadata.json``. + """ + tmpdir = tempfile.mkdtemp(prefix="biosimdb_pending_") + _, file_meta = _save_files_and_extract_metadata(tmpdir) + + if json_form is not None: + json_form["files"] = file_meta + json_path = os.path.join(tmpdir, "simulation_metadata.json") + with open(json_path, "w") as f: + json.dump(json_form, f, indent=2) + + session["pending_form_data"] = request.form.to_dict(flat=False) + session["pending_files_dir"] = tmpdir + + def prepare_for_invenio(form_data, tmpdir): """Convert form data and upload files from tmpdir to Invenio. Cleans up tmpdir. @@ -45,8 +132,6 @@ def prepare_for_invenio(form_data, tmpdir): Returns: draft_id: The Invenio draft record ID of the created upload. """ - import shutil - try: json_form = form_to_json(form_data) invenio_data = fill_invenio_metadata(json_form) @@ -60,22 +145,3 @@ def prepare_for_invenio(form_data, tmpdir): finally: shutil.rmtree(tmpdir, ignore_errors=True) return draft_id - - -def save_pending_submission(): - """Save uploaded files and form data to a temp directory for post-login submission. - - Stores uploaded files in a new temporary directory and saves the form data and - directory path in the Flask session so the submission can be resumed after OAuth login. - - Side effects: - session["pending_form_data"]: Set to the submitted form data as a dict. - session["pending_files_dir"]: Set to the path of the temporary directory. - """ - tmpdir = tempfile.mkdtemp(prefix="biosimdb_pending_") - for field in request.files: - for file in request.files.getlist(field): - if file.filename: - file.save(os.path.join(tmpdir, secure_filename(file.filename))) - session["pending_form_data"] = request.form.to_dict(flat=False) - session["pending_files_dir"] = tmpdir diff --git a/biosimdb_interface/form/utils.py b/biosimdb_interface/form/utils.py index 81fb8c2..aab21ba 100644 --- a/biosimdb_interface/form/utils.py +++ b/biosimdb_interface/form/utils.py @@ -3,7 +3,7 @@ import copy import re -from biosimdb_interface.schema.invenio import INVENIO_DSMD_TEMPLATE, INVENIO_FORM_EMPTY +from biosimdb_interface.schema.invenio import INVENIO_FORM_EMPTY from biosimdb_interface.schema.webform import get_simulation_metadata @@ -18,8 +18,8 @@ def fill_invenio_metadata(form_data): """ invenio_data = copy.deepcopy(INVENIO_FORM_EMPTY) # invenio_data["custom_fields"]["dsmd"] = [form_data] - # temporarily use dsmd template for testing - invenio_data["custom_fields"]["dsmd"] = [INVENIO_DSMD_TEMPLATE] + # exclude dsmd template until updated to new schema + # invenio_data["custom_fields"]["dsmd"] = [INVENIO_DSMD_TEMPLATE] # add generated keywords # add generated subjects diff --git a/biosimdb_interface/form/webform.py b/biosimdb_interface/form/webform.py index 52931cc..941f730 100644 --- a/biosimdb_interface/form/webform.py +++ b/biosimdb_interface/form/webform.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import os +import requests from biosim_extractor.metadata.convertpopulated import convert_populated_metadata_units from biosim_extractor.metadata.validatemetadata import validate_metadata from flask import ( @@ -13,22 +14,29 @@ session, url_for, ) +from werkzeug.datastructures import ImmutableMultiDict from biosimdb_interface.schema.webform import WEBFORM_SCHEMA, get_simulation_metadata from . import form_bp -from .upload import prepare_for_invenio, save_pending_submission +from .upload import ( + extract_uploaded_file_metadata, + prepare_for_invenio, + save_pending_submission, +) from .utils import form_to_json, remove_empty_fields from .validation import validate_with_mdanalysis @form_bp.route("/webform", methods=["GET", "POST"]) def webform(): - """Render the metadata submission form and handle save/submit actions. + """Render the metadata form and handle save/submit actions. - On GET, renders the empty form. On POST, validates uploaded files with - MDAnalysis, then either downloads form data as JSON (save) or initiates - submission to BioSimDB (submit). + On POST, validates uploaded files, converts submitted metadata to standard + units, removes empty fields, and validates the result against the BioSim + schema. ``save`` returns the validated JSON to the browser. ``submit`` saves + uploaded files plus the validated JSON for deferred Invenio upload, then + starts login if needed. """ token = session.get("access_token") @@ -55,6 +63,9 @@ def webform(): # convert to standard units json_form = convert_populated_metadata_units(json_form) + if action == "save": + json_form["files"] = extract_uploaded_file_metadata() + # NOTE: note used yet, could be used to validate extracted fields are matching what is returned from json_form # extracted = session.get("extracted_metadata") @@ -74,7 +85,7 @@ def webform(): ) if action == "submit": - save_pending_submission() + save_pending_submission(json_form) if not token: session["post_login_redirect"] = url_for("form.resume_submit") return redirect(url_for("login.login")) @@ -117,14 +128,42 @@ def do_submit(): Called automatically by the loading page after login. Clears pending session data after upload and renders the success page with the record URL. """ - from werkzeug.datastructures import ImmutableMultiDict - form_data = session.pop("pending_form_data", None) tmpdir = session.pop("pending_files_dir", None) + + if not form_data or not tmpdir: + flash("No pending submission found. Please submit again.", "warning") + return redirect(url_for("form.webform")) + flat_form = ImmutableMultiDict( [(k, v) for k, vals in form_data.items() for v in vals] ) - draft_id = prepare_for_invenio(flat_form, tmpdir) + + try: + draft_id = prepare_for_invenio(flat_form, tmpdir) + except requests.HTTPError as exc: + status = exc.response.status_code if exc.response is not None else None + + if status in (401, 403): + session.pop("access_token", None) # force fresh login + session["post_login_redirect"] = url_for("form.resume_submit") + flash( + "Your login session is no longer valid for upload. " + "Please sign in again and the submission will resume automatically.", + "warning", + ) + return redirect(url_for("login.login")) + else: + session.pop("access_token", None) # force fresh login + flash("Upload failed unexpectedly. Please try again.", "danger") + + # keep pending_form_data and pending_files_dir for retry + return redirect(url_for("form.webform")) + + # success: now clear pending state + session.pop("pending_form_data", None) + session.pop("pending_files_dir", None) + BASE_URL = current_app.config["BASE_URL"] record_url = f"{BASE_URL}/uploads/{draft_id}" return render_template("form/submit_success.html", record_url=record_url) diff --git a/biosimdb_interface/schema/invenio.py b/biosimdb_interface/schema/invenio.py index 4f25c35..4774c92 100644 --- a/biosimdb_interface/schema/invenio.py +++ b/biosimdb_interface/schema/invenio.py @@ -42,21 +42,21 @@ INVENIO_DSMD_TEMPLATE = { - "software": "GROMACS", + "software": "", "software_version": "", "molecular_model": "", "simulation_method": "", - "timestep": "2", - "framestep": "2", + "timestep": "", + "framestep": "", "length": "", - "temperature": "298", + "temperature": "", "pressure": "", "ensemble": "", "box_type": "", "trajectories": "", "force_fields": "", "experimental_structures": "", - "pH": "7", + "pH": "", "membrane": "", "ligands": "", "sequences": "", @@ -65,6 +65,6 @@ "long_range_cutoff": "", "thermostat": "", "barostat": "", - "atom_count": "100", + "atom_count": "", "wall_time": "", } diff --git a/biosimdb_interface/schema/webform.py b/biosimdb_interface/schema/webform.py index 3f6c9bc..186dac0 100644 --- a/biosimdb_interface/schema/webform.py +++ b/biosimdb_interface/schema/webform.py @@ -51,10 +51,10 @@ def get_simulation_metadata(): "guidance": { "label": "Please Note:", "options": [ - "To extract simulation metadata, please upload a topology and corresponding trajectory file/s. Optionally, upload the aiida archive file containing the simulation provenance to extract further information about the simulation protocol.", + "To extract simulation metadata, please upload a topology and corresponding trajectory file/s. Optionally, upload a aiida archive file containing the simulation provenance to extract further information about the simulation protocol.", "Multiple trajectory files can be uploaded with a single associated topology file, both file types are required.", 'Various simulation file formats are accepted and read using MDAnalysis, please ensure your files are compatible.', - "This form uses the following schema to define and group BioSim metadata: XXXX. Missing terms or units? Please considering raising an issue or contributing to the schema.", + 'This form uses the biosim-schema to define and group BioSim metadata. Missing terms or units? Please considering raising an issue or contributing to the schema.', 'Automatically add metadata associated with each entry using the "Extract metadata" button. Please fill in any missing metadata fields manually where applicable, click on each field name for further information about data field requirements.', "Two options are available for your extracted simulation metadata:
  1. Download the simulation metadata as a json file and share alongside simulation files.
  2. Submit your simulation files and metadata in one step to BioSimDB, you will be directed to a login page and asked to authorize this application. Please continue and complete the data submission process in BioSimDB.
", ], diff --git a/docs/source/_static/logos/CCPBioSim-logo.png b/docs/source/_static/logos/CCPBioSim-logo.png new file mode 100644 index 0000000..7707537 Binary files /dev/null and b/docs/source/_static/logos/CCPBioSim-logo.png differ diff --git a/docs/source/_static/logos/biosimdb-logo-black.png b/docs/source/_static/logos/biosimdb-logo-black.png new file mode 100644 index 0000000..315ca6c Binary files /dev/null and b/docs/source/_static/logos/biosimdb-logo-black.png differ diff --git a/docs/source/_static/logos/biosimdb-logo-white.png b/docs/source/_static/logos/biosimdb-logo-white.png new file mode 100644 index 0000000..013ad7c Binary files /dev/null and b/docs/source/_static/logos/biosimdb-logo-white.png differ diff --git a/docs/source/_static/logos/cosec-logo.png b/docs/source/_static/logos/cosec-logo.png new file mode 100644 index 0000000..f46ec6f Binary files /dev/null and b/docs/source/_static/logos/cosec-logo.png differ diff --git a/docs/source/_static/logos/psdi-logo.png b/docs/source/_static/logos/psdi-logo.png new file mode 100644 index 0000000..5513421 Binary files /dev/null and b/docs/source/_static/logos/psdi-logo.png differ diff --git a/docs/source/conf.py b/docs/source/conf.py index 0ad4df3..e4afdb6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -40,6 +40,10 @@ html_theme = 'furo' html_static_path = ['_static'] +html_theme_options = { + "dark_logo": "logos/biosimdb-logo-white.png", + "light_logo": "logos/biosimdb-logo-black.png", +} import warnings warnings.filterwarnings("ignore", message="netCDF4 is not available") diff --git a/docs/source/index.rst b/docs/source/index.rst index 4910ba1..65248f2 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -6,6 +6,18 @@ BioSimDB Interface ================== +``biosimdb-interface`` is available at http://github.com/CCPBioSim/biosimdb-interface + +.. image:: /_static/logos/biosimdb-logo-black.png + :width: 250px + :align: center + :class: only-light + +.. image:: /_static/logos/biosimdb-logo-white.png + :width: 250px + :align: center + :class: only-dark + A web interface for extracting simulation metadata and uploading it to the `BioSimDB `_ community data collection, hosted by PSDI. @@ -20,3 +32,26 @@ hosted by PSDI. usage contributing api + +Funding +======= + +Contributors to biosim-schema were supported by + +.. image:: _static/logos/psdi-logo.png + :height: 100px + :target: https://www.psdi.ac.uk/ + +.. image:: _static/logos/cosec-logo.png + :height: 100px + :target: https://www.cosec.ac.uk/ + +.. image:: _static/logos/CCPBioSim-logo.png + :height: 100px + :target: https://www.ccpbiosim.ac.uk/ + +Indices and tables +------------------ + +* :ref:`genindex` +* :ref:`modindex` diff --git a/tests/test_form/test_invenio.py b/tests/test_form/test_invenio.py new file mode 100644 index 0000000..e785e95 --- /dev/null +++ b/tests/test_form/test_invenio.py @@ -0,0 +1,66 @@ +from pathlib import Path +from unittest.mock import MagicMock + +from biosimdb_interface.form import invenio + + +def test_create_files_dict_expands_globs(tmp_path): + file_a = tmp_path / "a.file" + file_a.write_text("a") + + subdir = tmp_path / "example" + subdir.mkdir() + file_b = subdir / "b.cif" + file_b.write_text("b") + + patterns = [str(tmp_path / "*.file"), str(tmp_path / "example" / "*.cif")] + result = invenio.create_files_dict(patterns) + + assert result["a.file"] == file_a + assert result["b.cif"] == file_b + assert set(result.keys()) == {"a.file", "b.cif"} + + +def test_create_files_dict_returns_empty_for_no_matches(tmp_path): + patterns = [str(tmp_path / "*.does_not_exist")] + result = invenio.create_files_dict(patterns) + assert result == {} + + +def test_run_record_upload_calls_repository_flow(monkeypatch, tmp_path): + metadata_path = tmp_path / "metadata.json" + metadata_path.write_text("{}") + + loaded_metadata = {"metadata": {"title": "test"}} + files_dict = {"traj.xtc": Path("/tmp/traj.xtc")} + + loader = MagicMock(return_value=loaded_metadata) + repo = MagicMock() + + created_draft = MagicMock() + created_draft.get.return_value = {"id": "draft-123"} + repo.depositions.create.return_value = created_draft + + draft_api = repo.depositions.draft.return_value + + monkeypatch.setattr(invenio, "InvenioRepository", lambda url, api_key: repo) + monkeypatch.setattr(invenio, "get_loader", lambda _fmt: loader) + monkeypatch.setattr(invenio, "create_files_dict", lambda _files: files_dict) + + returned_repo, draft_id = invenio.run_record_upload( + api_url="https://example.org", + api_key="secret", + metadata_path=metadata_path, + metadata_format="json", + files=["*.xtc"], + community="my-community", + ) + + assert returned_repo is repo + assert draft_id == "draft-123" + + loader.assert_called_once_with(metadata_path) + repo.depositions.create.assert_called_once_with() + draft_api.update.assert_called_once_with(loaded_metadata) + draft_api.files.upload.assert_called_once_with(files_dict) + draft_api.bind.assert_called_once_with("my-community")