Skip to content
Merged

Dev #11

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@ instance/

# Sphinx documentation
docs/_build/
.DS_Store

# PyBuilder
.pybuilder/
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
biosimdb-interface
==================

<p align="center">
<img src="docs/source/_static/logos/biosimdb-logo-white.png#gh-dark-mode-only" alt="biosimdb logo" width="300"/>
<img src="docs/source/_static/logos/biosimdb-logo-black.png#gh-light-mode-only" alt="biosimdb logo" width="300"/>
</p>

A repository for extracting and uploading simulation data via a web interface to the PSDI hosted BioSimDB community data-collection.

## Project Status
Expand Down
1 change: 1 addition & 0 deletions biosimdb_interface/form/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def extract_files_validate(top_file, traj_file):
schema_path=os.getenv("ENGINE_MAPPING_SCHEMA_PATH", ""),
top_file=top_file,
traj_file=traj_file,
store_file_metadata=False, # not on webform so exclude
)

result = populator.populate()
Expand Down
108 changes: 87 additions & 21 deletions biosimdb_interface/form/upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,68 @@
import glob
import json
import os
import shutil
import tempfile

from biosim_extractor.metadata.filemetadata import files_metadata
from flask import current_app, request, session
from werkzeug.utils import secure_filename

from .invenio import run_record_upload
from .utils import fill_invenio_metadata, form_to_json


def _save_request_files(tmpdir):
"""Save uploaded request files into a temporary directory grouped by role.

Maps the trajectory[] field to trajectory and keeps other field names as roles.

Args:
tmpdir: Path to the temporary directory where uploaded files are written.

Returns:
Dictionary mapping file roles to lists of saved file paths.
"""
saved_files = {"topology": [], "trajectory": []}
for field in request.files:
role = "trajectory" if field == "trajectory[]" else field
for file in request.files.getlist(field):
if file.filename:
path = os.path.join(tmpdir, secure_filename(file.filename))
file.save(path)
saved_files.setdefault(role, []).append(path)
return saved_files


def _save_files_and_extract_metadata(tmpdir):
"""Save uploaded request files and compute file metadata in one step.

Args:
tmpdir: Path to the temporary directory where uploaded files are written.

Returns:
Tuple of:
- saved_files: Dictionary of role to saved file paths.
- file_meta: List of extracted file metadata dictionaries.
"""
saved_files = _save_request_files(tmpdir)
file_meta = files_metadata(saved_files)
return saved_files, file_meta


def extract_uploaded_file_metadata():
"""Extract file metadata from the current request's uploaded files."""
tmpdir = tempfile.mkdtemp(prefix="biosimdb_file_metadata_")
try:
_, file_meta = _save_files_and_extract_metadata(tmpdir)
return file_meta
finally:
for field in request.files:
for file in request.files.getlist(field):
file.stream.seek(0)
shutil.rmtree(tmpdir, ignore_errors=True)


def _data_collections_upload(metadata_path, files_path):
"""Upload metadata as a draft PSDI data-collections record.

Expand All @@ -35,6 +88,40 @@ def _data_collections_upload(metadata_path, files_path):
return repository, draft_id


def save_pending_submission(json_form=None):
"""Save uploaded files and form data for deferred post-login submission.

Writes uploaded request files to a new temporary directory, computes file
metadata from those saved files, and stores pending submission state in the
Flask session so submission can resume after OAuth login.

If ``json_form`` is provided, this function attaches the computed file
metadata under ``json_form["files"]`` and writes the result to
``simulation_metadata.json`` in the temporary directory.

Args:
json_form: Optional converted/validated BioSim metadata dictionary to
persist alongside uploaded files. When provided, file metadata is
added before writing.

Side effects:
session["pending_form_data"]: Set to submitted form data (dict of lists).
session["pending_files_dir"]: Set to temporary directory path containing
uploaded files and optional ``simulation_metadata.json``.
"""
tmpdir = tempfile.mkdtemp(prefix="biosimdb_pending_")
_, file_meta = _save_files_and_extract_metadata(tmpdir)

if json_form is not None:
json_form["files"] = file_meta
json_path = os.path.join(tmpdir, "simulation_metadata.json")
with open(json_path, "w") as f:
json.dump(json_form, f, indent=2)

session["pending_form_data"] = request.form.to_dict(flat=False)
session["pending_files_dir"] = tmpdir


def prepare_for_invenio(form_data, tmpdir):
"""Convert form data and upload files from tmpdir to Invenio. Cleans up tmpdir.

Expand All @@ -45,8 +132,6 @@ def prepare_for_invenio(form_data, tmpdir):
Returns:
draft_id: The Invenio draft record ID of the created upload.
"""
import shutil

try:
json_form = form_to_json(form_data)
invenio_data = fill_invenio_metadata(json_form)
Expand All @@ -60,22 +145,3 @@ def prepare_for_invenio(form_data, tmpdir):
finally:
shutil.rmtree(tmpdir, ignore_errors=True)
return draft_id


def save_pending_submission():
"""Save uploaded files and form data to a temp directory for post-login submission.

Stores uploaded files in a new temporary directory and saves the form data and
directory path in the Flask session so the submission can be resumed after OAuth login.

Side effects:
session["pending_form_data"]: Set to the submitted form data as a dict.
session["pending_files_dir"]: Set to the path of the temporary directory.
"""
tmpdir = tempfile.mkdtemp(prefix="biosimdb_pending_")
for field in request.files:
for file in request.files.getlist(field):
if file.filename:
file.save(os.path.join(tmpdir, secure_filename(file.filename)))
session["pending_form_data"] = request.form.to_dict(flat=False)
session["pending_files_dir"] = tmpdir
6 changes: 3 additions & 3 deletions biosimdb_interface/form/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import copy
import re

from biosimdb_interface.schema.invenio import INVENIO_DSMD_TEMPLATE, INVENIO_FORM_EMPTY
from biosimdb_interface.schema.invenio import INVENIO_FORM_EMPTY
from biosimdb_interface.schema.webform import get_simulation_metadata


Expand All @@ -18,8 +18,8 @@ def fill_invenio_metadata(form_data):
"""
invenio_data = copy.deepcopy(INVENIO_FORM_EMPTY)
# invenio_data["custom_fields"]["dsmd"] = [form_data]
# temporarily use dsmd template for testing
invenio_data["custom_fields"]["dsmd"] = [INVENIO_DSMD_TEMPLATE]
# exclude dsmd template until updated to new schema
# invenio_data["custom_fields"]["dsmd"] = [INVENIO_DSMD_TEMPLATE]
# add generated keywords
# add generated subjects

Expand Down
57 changes: 48 additions & 9 deletions biosimdb_interface/form/webform.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/env python
import os

import requests
from biosim_extractor.metadata.convertpopulated import convert_populated_metadata_units
from biosim_extractor.metadata.validatemetadata import validate_metadata
from flask import (
Expand All @@ -13,22 +14,29 @@
session,
url_for,
)
from werkzeug.datastructures import ImmutableMultiDict

from biosimdb_interface.schema.webform import WEBFORM_SCHEMA, get_simulation_metadata

from . import form_bp
from .upload import prepare_for_invenio, save_pending_submission
from .upload import (
extract_uploaded_file_metadata,
prepare_for_invenio,
save_pending_submission,
)
from .utils import form_to_json, remove_empty_fields
from .validation import validate_with_mdanalysis


@form_bp.route("/webform", methods=["GET", "POST"])
def webform():
"""Render the metadata submission form and handle save/submit actions.
"""Render the metadata form and handle save/submit actions.

On GET, renders the empty form. On POST, validates uploaded files with
MDAnalysis, then either downloads form data as JSON (save) or initiates
submission to BioSimDB (submit).
On POST, validates uploaded files, converts submitted metadata to standard
units, removes empty fields, and validates the result against the BioSim
schema. ``save`` returns the validated JSON to the browser. ``submit`` saves
uploaded files plus the validated JSON for deferred Invenio upload, then
starts login if needed.
"""
token = session.get("access_token")

Expand All @@ -55,6 +63,9 @@ def webform():
# convert to standard units
json_form = convert_populated_metadata_units(json_form)

if action == "save":
json_form["files"] = extract_uploaded_file_metadata()

# NOTE: note used yet, could be used to validate extracted fields are matching what is returned from json_form
# extracted = session.get("extracted_metadata")

Expand All @@ -74,7 +85,7 @@ def webform():
)

if action == "submit":
save_pending_submission()
save_pending_submission(json_form)
if not token:
session["post_login_redirect"] = url_for("form.resume_submit")
return redirect(url_for("login.login"))
Expand Down Expand Up @@ -117,14 +128,42 @@ def do_submit():
Called automatically by the loading page after login. Clears pending
session data after upload and renders the success page with the record URL.
"""
from werkzeug.datastructures import ImmutableMultiDict

form_data = session.pop("pending_form_data", None)
tmpdir = session.pop("pending_files_dir", None)

if not form_data or not tmpdir:
flash("No pending submission found. Please submit again.", "warning")
return redirect(url_for("form.webform"))

flat_form = ImmutableMultiDict(
[(k, v) for k, vals in form_data.items() for v in vals]
)
draft_id = prepare_for_invenio(flat_form, tmpdir)

try:
draft_id = prepare_for_invenio(flat_form, tmpdir)
except requests.HTTPError as exc:
status = exc.response.status_code if exc.response is not None else None

if status in (401, 403):
session.pop("access_token", None) # force fresh login
session["post_login_redirect"] = url_for("form.resume_submit")
flash(
"Your login session is no longer valid for upload. "
"Please sign in again and the submission will resume automatically.",
"warning",
)
return redirect(url_for("login.login"))
else:
session.pop("access_token", None) # force fresh login
flash("Upload failed unexpectedly. Please try again.", "danger")

# keep pending_form_data and pending_files_dir for retry
return redirect(url_for("form.webform"))

# success: now clear pending state
session.pop("pending_form_data", None)
session.pop("pending_files_dir", None)

BASE_URL = current_app.config["BASE_URL"]
record_url = f"{BASE_URL}/uploads/{draft_id}"
return render_template("form/submit_success.html", record_url=record_url)
12 changes: 6 additions & 6 deletions biosimdb_interface/schema/invenio.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,21 @@


INVENIO_DSMD_TEMPLATE = {
"software": "GROMACS",
"software": "",
"software_version": "",
"molecular_model": "",
"simulation_method": "",
"timestep": "2",
"framestep": "2",
"timestep": "",
"framestep": "",
"length": "",
"temperature": "298",
"temperature": "",
"pressure": "",
"ensemble": "",
"box_type": "",
"trajectories": "",
"force_fields": "",
"experimental_structures": "",
"pH": "7",
"pH": "",
"membrane": "",
"ligands": "",
"sequences": "",
Expand All @@ -65,6 +65,6 @@
"long_range_cutoff": "",
"thermostat": "",
"barostat": "",
"atom_count": "100",
"atom_count": "",
"wall_time": "",
}
4 changes: 2 additions & 2 deletions biosimdb_interface/schema/webform.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,10 @@ def get_simulation_metadata():
"guidance": {
"label": "Please Note:",
"options": [
"To extract simulation metadata, please upload a topology and corresponding trajectory file/s. Optionally, upload the aiida archive file containing the simulation provenance to extract further information about the simulation protocol.",
"To extract simulation metadata, please upload a topology and corresponding trajectory file/s. Optionally, upload a aiida archive file containing the simulation provenance to extract further information about the simulation protocol.",
"Multiple trajectory files can be uploaded with a single associated topology file, both file types are required.",
'Various simulation file formats are accepted and read using <a href="https://www.mdanalysis.org/" target="_blank" class="text-reset"> MDAnalysis</a>, please ensure your files are compatible.',
"This form uses the following schema to define and group BioSim metadata: XXXX. Missing terms or units? Please considering raising an issue or contributing to the schema.",
'This form uses the <a href="https://biosim-schema.readthedocs.io/en/latest/index.html" target="_blank" class="text-reset"> biosim-schema</a> to define and group BioSim metadata. Missing terms or units? Please considering raising an issue or contributing to the schema.',
'Automatically add metadata associated with each entry using the "Extract metadata" button. Please fill in any missing metadata fields manually where applicable, click on each field name for further information about data field requirements.',
"Two options are available for your extracted simulation metadata: <ol> <li> Download the simulation metadata as a json file and share alongside simulation files. </li> <li> Submit your simulation files and metadata in one step to BioSimDB, you will be directed to a login page and asked to authorize this application. Please continue and complete the data submission process in BioSimDB. </li> </ol>",
],
Expand Down
Binary file added docs/source/_static/logos/CCPBioSim-logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/source/_static/logos/cosec-logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/source/_static/logos/psdi-logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
4 changes: 4 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@

html_theme = 'furo'
html_static_path = ['_static']
html_theme_options = {
"dark_logo": "logos/biosimdb-logo-white.png",
"light_logo": "logos/biosimdb-logo-black.png",
}

import warnings
warnings.filterwarnings("ignore", message="netCDF4 is not available")
Loading
Loading