diff --git a/.rtd_pip_reqs.txt b/.rtd_pip_reqs.txt index d7f3843e..8caa4dc8 100644 --- a/.rtd_pip_reqs.txt +++ b/.rtd_pip_reqs.txt @@ -3,6 +3,7 @@ # sphinx-markdown-tables # sphinx-reredirects # sphinx-rtd-theme +# sphinxcontrib-bibtex # sphinxcontrib-napoleon # m2r2 alabaster==1.0.0 @@ -41,6 +42,7 @@ sphinx-markdown-tables==0.0.17 sphinx-reredirects==1.0.0 sphinx-rtd-theme==3.0.2 sphinxcontrib-applehelp==2.0.0 +sphinxcontrib-bibtex==2.6.5 sphinxcontrib-devhelp==2.0.0 sphinxcontrib-htmlhelp==2.1.0 sphinxcontrib-jquery==4.1 diff --git a/doc/conf.py b/doc/conf.py index f2380ab0..e824e5d3 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -41,6 +41,8 @@ 'sphinx.ext.autosummary', 'sphinx_markdown_tables', 'sphinx_reredirects', + # bibtex references + "sphinxcontrib.bibtex", ] # Add any paths that contain templates here, relative to this directory. @@ -77,6 +79,8 @@ "documentation_data_format": "v1/documentation_data_format.html", } +bibtex_bibfiles = ["references.bib"] + # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for diff --git a/doc/index.rst b/doc/index.rst index ab645524..8b39a60f 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -14,7 +14,7 @@ :maxdepth: 3 :caption: PEtab v2 - PEtab v2 draft + PEtab v2 specs .. toctree:: :maxdepth: 2 diff --git a/doc/references.bib b/doc/references.bib new file mode 100644 index 00000000..972b5404 --- /dev/null +++ b/doc/references.bib @@ -0,0 +1,151 @@ +@Article{LopezMuh2013, + author = {Lopez, Carlos F and Muhlich, Jeremy L and Bachman, John A and Sorger, Peter K}, + journal = {Molecular systems biology}, + title = {Programming biological models in Python using PySB.}, + year = {2013}, + issn = {1744-4292}, + pages = {646}, + volume = {9}, + abstract = {Mathematical equations are fundamental to modeling biological networks, but as networks get large and revisions frequent, it becomes difficult to manage equations directly or to combine previously developed models. Multiple simultaneous efforts to create graphical standards, rule-based languages, and integrated software workbenches aim to simplify biological modeling but none fully meets the need for transparent, extensible, and reusable models. In this paper we describe PySB, an approach in which models are not only created using programs, they are programs. PySB draws on programmatic modeling concepts from little b and ProMot, the rule-based languages BioNetGen and Kappa and the growing library of Python numerical tools. Central to PySB is a library of macros encoding familiar biochemical actions such as binding, catalysis, and polymerization, making it possible to use a high-level, action-oriented vocabulary to construct detailed models. As Python programs, PySB models leverage tools and practices from the open-source software community, substantially advancing our ability to distribute and manage the work of testing biochemical hypotheses. We illustrate these ideas using new and previously published models of apoptosis.}, + chemicals = {Proto-Oncogene Proteins c-bcl-2}, + citation-subset = {IM}, + completed = {2013-09-16}, + country = {England}, + creationdate = {2026-01-05T08:55:01}, + doi = {10.1038/msb.2013.1}, + issn-linking = {1744-4292}, + keywords = {Apoptosis, physiology; Computer Simulation; Mitochondria, physiology; Models, Biological; Programming Languages; Proto-Oncogene Proteins c-bcl-2, physiology; Software}, + modificationdate = {2026-01-05T08:55:01}, + nlm-id = {101235389}, + owner = {NLM}, + pii = {msb20131}, + pmc = {PMC3588907}, + pmid = {23423320}, + pubmodel = {Print}, + pubstatus = {ppublish}, + revised = {2016-10-19}, +} + +@Article{HuckaFin2003, + author = {Hucka, M. and Finney, A. and Sauro, H. M. and Bolouri, H. and Doyle, J. C. and Kitano, H. and Arkin, A. P. and Bornstein, B. J. and Bray, D. and Cornish-Bowden, A. and Cuellar, A. A. and Dronov, S. and Gilles, E. D. and Ginkel, M. and Gor, V. and Goryanin, I. I. and Hedley, W. J. and Hodgman, T. C. and Hofmeyr, J.-H. and Hunter, P. J. and Juty, N. S. and Kasberger, J. L. and Kremling, A. and Kummer, U. and {Le Nov\`{e}re}, N. and Loew, L. M. and Lucio, D. and Mendes, P. and Minch, E. and Mjolsness, E. D. and Nakayama, Y. and Nelson, M. R. and Nielsen, P. F. and Sakurada, T. and Schaff, J. C. and Shapiro, B. E. and Shimizu, T. S. and Spence, H. D. and Stelling, J. and Takahashi, K. and Tomita, M. and Wagner, J. and Wang, J.}, + journal = {Bioinformatics}, + title = {The systems biology markup language {(SBML):} {A} medium for representation and exchange of biochemical network models}, + year = {2003}, + number = {4}, + pages = {524--531}, + volume = {19}, + bdsk-url-1 = {http://dx.doi.org/10.1093/bioinformatics/btg015}, + creationdate = {2026-01-05T08:55:12}, + date-added = {2016-08-17 16:49:31 +0000}, + date-modified = {2018-04-13 20:51:14 +0000}, + doi = {10.1093/bioinformatics/btg015}, + file = {:Papers/HuckaFin2003a.pdf:}, + journal-full = {Bioinformatics}, + keywords = {SBML}, + modificationdate = {2026-01-05T08:55:12}, + months = {March}, +} + +@Article{SchmiesterSch2021, + author = {Schmiester, Leonard AND Sch{\"a}lte, Yannik AND Bergmann, Frank T. AND Camba, Tacio AND Dudkin, Erika AND Egert, Janine AND Fr{\"o}hlich, Fabian AND Fuhrmann, Lara AND Hauber, Adrian L. AND Kemmer, Svenja AND Lakrisenko, Polina AND Loos, Carolin AND Merkt, Simon AND Müller, Wolfgang AND Pathirana, Dilan AND Raimúndez, Elba AND Refisch, Lukas AND Rosenblatt, Marcus AND Stapor, Paul L. AND St{\"a}dter, Philipp AND Wang, Dantong AND Wieland, Franz-Georg AND Banga, Julio R. AND Timmer, Jens AND Villaverde, Alejandro F. AND Sahle, Sven AND Kreutz, Clemens AND Hasenauer, Jan AND Weindl, Daniel}, + journal = {PLOS Computational Biology}, + title = {{PEtab}—Interoperable specification of parameter estimation problems in systems biology}, + year = {2021}, + month = {01}, + number = {1}, + pages = {1-10}, + volume = {17}, + abstract = {Author summary Parameter estimation is a common and crucial task in modeling, as many models depend on unknown parameters which need to be inferred from data. There exist various tools for tasks like model development, model simulation, optimization, or uncertainty analysis, each with different capabilities and strengths. In order to be able to easily combine tools in an interoperable manner, but also to make results accessible and reusable for other researchers, it is valuable to define parameter estimation problems in a standardized form. Here, we introduce PEtab, a parameter estimation problem definition format which integrates with established systems biology standards for model and data specification. As the novel format is already supported by eight software tools with hundreds of users in total, we expect it to be of great use and impact in the community, both for modeling and algorithm development.}, + creationdate = {2026-01-05T08:56:19}, + doi = {10.1371/journal.pcbi.1008646}, + modificationdate = {2026-01-05T08:56:19}, + publisher = {Public Library of Science}, + timestamp = {2021-01-30}, + url = {https://doi.org/10.1371/journal.pcbi.1008646}, +} +@Misc{Lindner1993tsv, + author = {Paul Lindner}, + note = {Retrieved 5 January 2026}, + title = {text/tab-separated-values}, + year = {1993}, + creationdate = {2026-01-05T19:15:51}, + modificationdate = {2026-01-05T19:18:22}, + howpublished = {\url{https://www.iana.org/assignments/media-types/text/tab-separated-values}}, +} +@Article{ClerxCoo2020, + author = {Michael Clerx and Michael T. Cooling and Jonathan Cooper and Alan Garny and Keri Moyle and David P. Nickerson and Poul M. F. Nielsen and Hugh Sorby}, + journal = {Journal of Integrative Bioinformatics}, + title = {{CellML 2.0}}, + year = {2020}, + number = {2-3}, + pages = {20200021}, + volume = {17}, + creationdate = {2026-01-05T19:22:35}, + doi = {doi:10.1515/jib-2020-0021}, + lastchecked = {2026-01-05}, + modificationdate = {2026-01-05T19:22:45}, + url = {https://doi.org/10.1515/jib-2020-0021}, +} + +@InBook{Faeder2009, + author = {Faeder, James R. and Blinov, Michael L. and Hlavacek, William S.}, + editor = {Maly, Ivan V.}, + pages = {113--167}, + publisher = {Humana Press}, + title = {Rule-Based Modeling of Biochemical Systems with {BioNetGen}}, + year = {2009}, + address = {Totowa, NJ}, + isbn = {978-1-59745-525-1}, + abstract = {Rule-based modeling involves the representation of molecules as structured objects and molecular interactions as rules for transforming the attributes of these objects. The approach is notable in that it allows one to systematically incorporate site-specific details about protein---protein interactions into a model for the dynamics of a signal-transduction system, but the method has other applications as well, such as following the fates of individual carbon atoms in metabolic reactions. The consequences of protein---protein interactions are difficult to specify and track with a conventional modeling approach because of the large number of protein phosphoforms and protein complexes that these interactions potentially generate. Here, we focus on how a rule-based model is specified in the BioNetGen language (BNGL) and how a model specification is analyzed using the BioNetGen software tool. We also discuss new developments in rule-based modeling that should enable the construction and analyses of comprehensive models for signal transduction pathways and similarly large-scale models for other biochemical systems.}, + booktitle = {Systems Biology}, + doi = {10.1007/978-1-59745-525-1_5}, + modificationdate = {2026-01-05T19:23:44}, + timestamp = {2020-12-11}, + url = {https://doi.org/10.1007/978-1-59745-525-1_5}, +} + +@Article{VillaverdePat2021, + author = {Villaverde, Alejandro F and Pathirana, Dilan and Fröhlich, Fabian and Hasenauer, Jan and Banga, Julio R}, + journal = {Briefings in Bioinformatics}, + title = {A protocol for dynamic model calibration}, + year = {2021}, + issn = {1477-4054}, + month = {10}, + number = {1}, + pages = {bbab387}, + volume = {23}, + abstract = {Ordinary differential equation models are nowadays widely used for the mechanistic description of biological processes and their temporal evolution. These models typically have many unknown and nonmeasurable parameters, which have to be determined by fitting the model to experimental data. In order to perform this task, known as parameter estimation or model calibration, the modeller faces challenges such as poor parameter identifiability, lack of sufficiently informative experimental data and the existence of local minima in the objective function landscape. These issues tend to worsen with larger model sizes, increasing the computational complexity and the number of unknown parameters. An incorrectly calibrated model is problematic because it may result in inaccurate predictions and misleading conclusions. For nonexpert users, there are a large number of potential pitfalls. Here, we provide a protocol that guides the user through all the steps involved in the calibration of dynamic models. We illustrate the methodology with two models and provide all the code required to reproduce the results and perform the same analysis on new models. Our protocol provides practitioners and researchers in biological modelling with a one-stop guide that is at the same time compact and sufficiently comprehensive to cover all aspects of the problem.}, + creationdate = {2025-02-03T09:42:24}, + doi = {10.1093/bib/bbab387}, + modificationdate = {2025-02-03T09:42:24}, +} + +@misc{yaml, + title = {{YAML Ain't Markup Language} ({YAML}\texttrademark) Version 1.2, Revision 1.2.2}, + howpublished = {\url{https://yaml.org/spec/1.2.2/}}, + year = {2021}, + note = {Accessed 2026-01-16} +} + +@misc{jsonschema-spec-2020-12, + title = {{JSON} Schema: A Media Type for Describing {JSON} Documents (Draft 2020-12)}, + author = {Wright, Austin and Andrews, Henry and Hutton, Ben and Dennis, Greg}, + institution = {Internet Engineering Task Force}, + year = {2022}, + howpublished = {\url{https://json-schema.org/draft/2020-12/json-schema-core.html}} +} +@Article{KeatingWal2020, + author = {Keating, Sarah M and Waltemath, Dagmar and König, Matthias and Zhang, Fengkai and Dräger, Andreas and Chaouiya, Claudine and Bergmann, Frank T and Finney, Andrew and Gillespie, Colin S and Helikar, Tomáš and Hoops, Stefan and Malik‐Sheriff, Rahuman S and Moodie, Stuart L and Moraru, Ion I and Myers, Chris J and Naldi, Aurélien and Olivier, Brett G and Sahle, Sven and Schaff, James C and Smith, Lucian P and Swat, Maciej J and Thieffry, Denis and Watanabe, Leandro and Wilkinson, Darren J and Blinov, Michael L and Begley, Kimberly and Faeder, James R and Gómez, Harold F and Hamm, Thomas M and Inagaki, Yuichiro and Liebermeister, Wolfram and Lister, Allyson L and Lucio, Daniel and Mjolsness, Eric and Proctor, Carole J and Raman, Karthik and Rodriguez, Nicolas and Shaffer, Clifford A and Shapiro, Bruce E and Stelling, Joerg and Swainston, Neil and Tanimura, Naoki and Wagner, John and Meier‐Schellersheim, Martin and Sauro, Herbert M and Palsson, Bernhard and Bolouri, Hamid and Kitano, Hiroaki and Funahashi, Akira and Hermjakob, Henning and Doyle, John C and Hucka, Michael and Adams, Richard R and Allen, Nicholas A and Angermann, Bastian R and Antoniotti, Marco and Bader, Gary D and Červený, Jan and Courtot, Mélanie and Cox, Chris D and Dalle Pezze, Piero and Demir, Emek and Denney, William S and Dharuri, Harish and Dorier, Julien and Drasdo, Dirk and Ebrahim, Ali and Eichner, Johannes and Elf, Johan and Endler, Lukas and Evelo, Chris T and Flamm, Christoph and Fleming, Ronan MT and Fröhlich, Martina and Glont, Mihai and Gonçalves, Emanuel and Golebiewski, Martin and Grabski, Hovakim and Gutteridge, Alex and Hachmeister, Damon and Harris, Leonard A and Heavner, Benjamin D and Henkel, Ron and Hlavacek, William S and Hu, Bin and Hyduke, Daniel R and de Jong, Hidde and Juty, Nick and Karp, Peter D and Karr, Jonathan R and Kell, Douglas B and Keller, Roland and Kiselev, Ilya and Klamt, Steffen and Klipp, Edda and Knüpfer, Christian and Kolpakov, Fedor and Krause, Falko and Kutmon, Martina and Laibe, Camille and Lawless, Conor and Li, Lu and Loew, Leslie M and Machne, Rainer and Matsuoka, Yukiko and Mendes, Pedro and Mi, Huaiyu and Mittag, Florian and Monteiro, Pedro T and Natarajan, Kedar Nath and Nielsen, Poul MF and Nguyen, Tramy and Palmisano, Alida and Pettit, Jean‐Baptiste and Pfau, Thomas and Phair, Robert D and Radivoyevitch, Tomas and Rohwer, Johann M and Ruebenacker, Oliver A and Saez‐Rodriguez, Julio and Scharm, Martin and Schmidt, Henning and Schreiber, Falk and Schubert, Michael and Schulte, Roman and Sealfon, Stuart C and Smallbone, Kieran and Soliman, Sylvain and Stefan, Melanie I and Sullivan, Devin P and Takahashi, Koichi and Teusink, Bas and Tolnay, David and Vazirabad, Ibrahim and von Kamp, Axel and Wittig, Ulrike and Wrzodek, Clemens and Wrzodek, Finja and Xenarios, Ioannis and Zhukova, Anna and Zucker, Jeremy}, + journal = {Molecular Systems Biology}, + title = {{SBML} Level 3: an extensible format for the exchange and reuse of biological models}, + year = {2020}, + issn = {1744-4292}, + month = aug, + number = {8}, + volume = {16}, + creationdate = {2026-01-26T09:35:11}, + doi = {10.15252/msb.20199110}, + modificationdate = {2026-01-26T09:35:27}, + publisher = {Springer Science and Business Media LLC}, +} +@Comment{jabref-meta: databaseType:bibtex;} diff --git a/doc/v2/documentation_data_format.rst b/doc/v2/documentation_data_format.rst index b9b8dcba..ff7adcce 100644 --- a/doc/v2/documentation_data_format.rst +++ b/doc/v2/documentation_data_format.rst @@ -3,10 +3,6 @@ PEtab data format specification 2.0 =================================== -.. warning:: - - This document is a draft and subject to change. - Format version: 2.0.0 This document explains the PEtab data format. @@ -27,48 +23,58 @@ The scope of PEtab is the complete specification of parameter estimation problems in typical systems biology applications. In practise, data-driven modeling often begins with either (i) a computational model of a biological system that requires calibration or (ii) experimental data that need -integration and analysis through a computational model. +integration and analysis through a computational model +:cite:p:`VillaverdePat2021`. Measurements are linked to the biological model by an observation and noise model. Often, measurements are taken after some experimental perturbations have been applied, which are represented as derivations from a generic model -(Figure 1A). Therefore, one goal was to specify such a setup in the least -redundant way. Furthermore, we wanted to establish an intuitive, modular, -machine- and human-readable and -writable format that makes use of existing -standards. +(Figure 1A). Therefore, a goal of PEtab is to specify +such a setup in the least redundant way. Furthermore, PEtab aims to provide an +intuitive, modular, machine- and human-readable and -writable format that makes +use of existing standards. .. figure:: gfx/petab_scope_and_files.png - :alt: A typical setup for data-based modeling studies and its - representation in PEtab. + :alt: Structure of typical parameter estimation problems in systems biology + and their representation in PEtab. :scale: 80% - **Figure 1: A typical setup for data-based modeling studies and its - representation in PEtab.** + **Figure 1: Structure of typical parameter estimation problems in systems + biology and their representation in PEtab.** + **A:** Different experiments are conducted and measurements are taken. + The different experiments are described by different instances of a generic + model. These experiment-specific models are simulated to evaluate an + objective function. + **B:** How the different elements of A are represented in PEtab. + Corresponding elements are indicated by the same background color. Overview --------- -The PEtab data format defines a parameter estimation problem using multiple -text-based files in `YAML `_ and `Tab-Separated Values (TSV) +PEtab builds on existing standards for model specification and defines a +parameter estimation problem using multiple text-based files in +`YAML `_ and `Tab-Separated Values (TSV) `_ -format (Figure 2), including: +format (Figure 2). +A PEtab problem consists of the following types of files: -- A :ref:`grouping file ` that lists all of the following - files and provides additional information including +- A :ref:`problem configuration file ` that lists all of the + following files and provides additional information including :ref:`extensions ` [YAML]. - :ref:`Parameter file(s) ` to set parameter values - globally, and to specify the parameters to be estimated as well as their - parameter bounds and prior distributions [TSV]. + globally(across all experiments), and to specify the parameters to be + estimated as well as their parameter bounds and prior distributions [TSV]. - :ref:`Model ` file(s) specifying the base model(s) - [SBML, CELLML, BNGL, PYSB, ...]. + [SBML :cite:p:`HuckaFin2003,KeatingWal2020`, CellLML :cite:p:`ClerxCoo2020`, + BNGL :cite:p:`Faeder2009`, ...]. - :ref:`Observable file(s) ` defining the observation model [TSV]. - :ref:`Measurement file(s) ` containing experimental - data used for model fitting [TSV]. + data used for model calibration [TSV]. - (optional) :ref:`Condition file(s) ` specifying model inputs and condition-specific parameters [TSV]. @@ -83,7 +89,10 @@ format (Figure 2), including: .. figure:: gfx/petab_files.png :alt: Files constituting a PEtab problem - **Figure 2: Files constituting a PEtab problem.** + **Figure 2: Files constituting a PEtab 2.0 problem.** + A single YAML file links the different files types. + There can be one or more files of each type; the grayed-out files are + optional. Figure 1B shows how those files relate to a typical data-based modeling setup. @@ -111,10 +120,13 @@ they are part of a :ref:`PEtab extension `. - ``NON_PARAMETER_TABLE_ID``: A valid PEtab ID referring to a constant or differential entity (:ref:`v2_model_entities`), including PEtab output parameters, but excluding parameters listed in the - :ref:`v2_parameter_table` (independent of their ``estimate`` value). + :ref:`parameter tables ` + (independent of their ``estimate`` value). - ``LIST[...]``: A ``STRING`` that is a semicolon-delimited list of values, where each value can be interpreted as the type or value inside the brackets. + - ``OPTIONAL`` indicates that a column is optional whereas ``NULL`` indicates + that individual cells in a column may be empty. .. _v2_changes: @@ -128,12 +140,14 @@ PEtab 2.0.0 is a major update of the PEtab format. The main changes are: * Support for models in other formats than SBML (:ref:`v2_model`). * The use of different models for different measurements is now supported via the optional ``modelId`` column in the - :ref:`v2_measurement_table`, see also :ref:`v2_multiple_models`. - This was poorly defined in PEtab 1.0.0 and probably not used in practice. + :ref:`measurement table `, + see also :ref:`v2_multiple_models`. + This was poorly defined in PEtab 1.0.0. * The (now optional) condition table format changed from wide to long (:ref:`v2_condition_table`). * ``simulationConditionId`` and ``preequilibrationConditionId`` in the - :ref:`v2_measurement_table` are replaced by ``experimentId`` and a more + :ref:`measurement table ` + are replaced by ``experimentId`` and a more flexible way for defining experiments and time courses. This allows arbitrary sequences of conditions and combinations of conditions to be applied to the model (Figure 3 and :ref:`v2_experiment_table`). @@ -141,43 +155,50 @@ PEtab 2.0.0 is a major update of the PEtab format. The main changes are: .. figure:: gfx/v2_experiment.png :width: 80 % :align: center - :alt: A comparison of simulations in PEtab v1 and v2. + :alt: A comparison of simulations in PEtab 1.0 and 2.0. - **Figure 3: A comparison of simulations in PEtab v1 and v2.** + **Figure 3: A comparison of simulations in PEtab 1.0 and 2.0.** + While in PEtab 1.0, a simulation consisted of one or two periods, + PEtab 2.0 supports an arbitrary number of periods. + Furthermore, PEtab 2.0 allows specifying the initial time of the simulation. * Support for math expressions in the condition table - (:ref:`v2_condition_table`, :ref:`v2_math_expressions`). + (:ref:`condition table `, :ref:`v2_math_expressions`). * Clarification and specification of various previously underspecified aspects, including overriding values via the condition table (:ref:`v2_initialization_semantics`, :ref:`v2_reinitialization_semantics`). * Support for format :ref:`extensions `. * Observable IDs can now be used in observable and noise formulas (:ref:`v2_observable_table`). -* The ``parameterScale`` column of the :ref:`v2_parameter_table` is removed. +* The ``parameterScale`` column of the + :ref:`parameter table ` is removed. This change was made to simplify the PEtab format. This feature was a constant source of confusion and the interaction with parameter priors was not well-defined. To obtain the same effect, the model parameters can be transformed in the model file. * The ``initializationPriorType`` and ``initializationPriorParameters`` - columns of the :ref:`v2_parameter_table` are removed. Initialization - priors are outside the definition of the parameter estimation problem - and were a source of confusion. + columns of the :ref:`parameter table ` are removed. + Initialization priors are outside the definition of the parameter estimation + problem and were a source of confusion. * ``objectivePriorType`` and ``objectivePriorParameters`` in the - :ref:`v2_parameter_table` are renamed to ``priorDistribution`` and - ``priorParameters``, respectively. This change was made to simplify - the PEtab format. -* The admissible values for ``estimate`` in the :ref:`v2_parameter_table` + :ref:`parameter table ` are renamed to + ``priorDistribution`` and ``priorParameters``, respectively. + This change was made to simplify the PEtab format. +* The admissible values for ``estimate`` in the + :ref:`parameter table ` are now ``true`` and ``false`` instead of ``1`` and ``0``. * Support for new parameter prior distributions in the :ref:`v2_parameter_table`, and clarification that bounds truncate the prior distributions. -* The ``observableTransformation`` column of the :ref:`v2_observable_table` +* The ``observableTransformation`` column of the + :ref:`observable table ` has been combined with the ``noiseDistribution`` column to make its intent clearer. The ``log10`` transformation has been removed, since this was mostly relevant for visualization purposes, and the same effect can be achieved by rescaling the parameters of the respective (natural) log-distributions. -* The ``observableFormula`` field in the :ref:`v2_observable_table` must not +* The ``observableFormula`` field in the + :ref:`observable table ` must not contain any observable IDs. This was previously allowed, but it was not well-defined how to deal with placeholder parameters in this case. The ``noiseFormula`` field may contain only the observable ID of the @@ -186,11 +207,11 @@ PEtab 2.0.0 is a major update of the PEtab format. The main changes are: * Placeholders for measurement-specific parameters in ``observableFormula`` and ``noiseFormula`` are now declared using the ``observablePlaceholders`` and ``noisePlaceholders`` fields in the - :ref:`v2_observable_table`. This replaces the previous + :ref:`observable table `. This replaces the previous ``observableParameter${n}_${observableId}`` syntax. The new approach is more explicit and allows for more descriptive and shorter names for the placeholders. -* The visualization table has been removed. The PEtab v1 visualization table +* The visualization table has been removed. The PEtab 1.0 visualization table was not well-defined and not widely used. Visualization is handled by the PEtab Python library which also provides documentation on the respective input format. @@ -201,25 +222,26 @@ PEtab 2.0.0 is a major update of the PEtab format. The main changes are: Model definition ---------------- -PEtab 2.0.0 is **model format agnostic**, meaning it does not depend on a +PEtab 2.0 is **model-format–agnostic**, meaning it does not depend on a specific model description. The model file is referenced in the :ref:`PEtab -problem description (YAML) ` by its file name or a URL. +problem configuration file ` by its file name or a URL. PEtab distinguishes between three types of entities: -* **Differential entities**: Entities that are defined in terms of a - time-derivative, e.g., the targets of SBML rate rules or species that change - due to participation in reactions (reactants or products). +* **Differential entities**: Entities whose time evolution is defined in terms + of a time-derivative, e.g., the targets of SBML rate rules or species + that change due to participation in reactions (reactants or products). * **Algebraic entities**: Entities that are defined in terms of algebraic assignments, rather than time derivatives, that are in effect throughout the simulation. They are not necessarily constant, for example, the targets of SBML assignment rules. -* **Constant entities**: Entities are that not differential or algebraic +* **Constant entities**: Entities that are not differential or algebraic entities. They are defined in terms of an at least piecewise constant value but may be subject to event assignments, e.g., parameters of an SBML - model that are not targets of rate rules or assignment rules. + model that are not targets of rate rules or assignment rules + or determined by algebraic rules. .. _v2_condition_table: @@ -367,7 +389,7 @@ are applied in five consecutive phases: Experiment table ---------------- -The optional experiments table defines a sequence (Figure 3, lower) of +The optional experiment table defines a sequence (Figure 3, lower) of experimental conditions (i.e., discrete changes; see :ref:`v2_condition_table`) applied to the model. @@ -420,27 +442,25 @@ The experiment table has three mandatory columns ``experimentId``, .. note:: - In PEtab, the steady state definition is that *all* differential entities - are at steady state, meaning that all differential entities have reached, - and will remain at, a constant value. + In PEtab, a steady state is defined as a state in which *all* + differential entities have reached, and will remain at, a constant value. Determining whether differential entities are at steady state is left to - the simulator and user. Reasonable numerical criteria should be used - to determine whether a steady state is reached. Users should - share their chosen numerical criteria when sharing their model, for - reproducibility. - - It can be difficult to determine whether the differential entities are - at steady state. For example, events and other discontinuities may - occur after an apparent steady state is reached. It is left to the user to - avoid situations where this issue is problematic. - - If the simulation of a condition with steady state fails to reach a steady state, - and the condition is required for the evaluation of simulation at - measurement points, the evaluation of the model is not well-defined. + the simulator and user. Reasonable numerical criteria should be used, + and users are encouraged to share their chosen criteria when sharing their + model to ensure reproducibility. + + Determining steady state can be nontrivial; for example, events or other + discontinuities may occur after an apparent steady state has been reached. + It is the user’s responsibility to avoid situations where this ambiguity + is problematic. + + If the simulation of an experiment requiring steady state fails to reach a + steady state, the evaluation of the model at measurement points is not + well-defined. In such cases, PEtab interpreters should notify the user, for example, by returning ``NaN`` or ``inf`` values for the objective function. - PEtab does not specify a numerical criterion for steady states. + PEtab does not prescribe a numerical criterion for steady state. Any event triggers defined in the model must also be checked during this pre-simulation. @@ -469,7 +489,7 @@ The experiment table has three mandatory columns ``experimentId``, Measurement table ----------------- -A tab-separated values files containing all measurements to be used for +A tab-separated values file containing all measurements to be used for model training or validation. Expected to have the following named columns in any (but preferably this) @@ -542,7 +562,7 @@ Detailed field description the condition change is applied before the observable is evaluated (see :ref:`v2_reinitialization_semantics` for details). -- ``observableParameters`` [NUMERIC, STRING OR NULL, OPTIONAL] +- ``observableParameters`` [LIST[parameterId, NUMERIC], NULL, OPTIONAL] Measurement-specific overrides for placeholder parameters in the `observableFormula` declared in the @@ -567,7 +587,7 @@ Detailed field description If none of the observables referenced in a given measurement table use any noise placeholders, this column may be omitted there. -- ``noiseParameters`` [NUMERIC, STRING OR NULL, OPTIONAL] +- ``noiseParameters`` [LIST[parameterId, NUMERIC], NULL, OPTIONAL] Measurement-specific overrides for placeholder parameters in the `noiseFormula` declared in the @@ -640,24 +660,24 @@ The observable table has the following columns: +-----------------------+--------------------------------+-----------------------------------------------------------------------------+ | e.g. | | | +-----------------------+--------------------------------+-----------------------------------------------------------------------------+ -| relativeTotalProtein1 | Relative abundance of Protein1 | observableParameter1_relativeTotalProtein1 * (protein1 + phospho_protein1 ) | +| relativeTotalProtein1 | Relative abundance of Protein1 | scale_relTotProt1 * (protein1 + phospho_protein1 ) | +-----------------------+--------------------------------+-----------------------------------------------------------------------------+ | ... | ... | ... | +-----------------------+--------------------------------+-----------------------------------------------------------------------------+ *(wrapped for readability)* -+-----+---------------------------------------+-----------------------+ -| ... | noiseFormula | [noiseDistribution] | -+=====+=======================================+=======================+ -| ... | STRING\|NUMBER | *see below* | -+-----+---------------------------------------+-----------------------+ -| ... | | | -+-----+---------------------------------------+-----------------------+ -| ... | noiseParameter1_relativeTotalProtein1 | normal | -+-----+---------------------------------------+-----------------------+ -| ... | ... | ... | -+-----+---------------------------------------+-----------------------+ ++-----+---------------------------------------+-----------------------+--------------------------+---------------------+ +| ... | noiseFormula | [noiseDistribution] | [observablePlaceholders] | [noisePlaceholders] | ++=====+=======================================+=======================+==========================+=====================+ +| ... | STRING\|NUMBER | *see below* | *see below* | *see below* | ++-----+---------------------------------------+-----------------------+--------------------------+---------------------+ +| ... | | | | | ++-----+---------------------------------------+-----------------------+--------------------------+---------------------+ +| ... | sd_relTotProt1 | normal | scale_relTotProt1 | sd_relTotProt1 | ++-----+---------------------------------------+-----------------------+--------------------------+---------------------+ +| ... | ... | ... | ... | ... | ++-----+---------------------------------------+-----------------------+--------------------------+---------------------+ Detailed field description @@ -818,7 +838,8 @@ and *must not* include: - Placeholder parameters (see ``observableParameters`` and ``noiseParameters`` above) -- Parameters occurring as ``targetId`` in the *condition table* +- Parameters occurring as ``targetId`` in the + :ref:`condition table ` - "Parameters" that are not *constant* entities (e.g., in an SBML model, the targets of *AssignmentRules* or *EventAssignments*) - Any parameters that do not have valid PEtab IDs. @@ -872,8 +893,7 @@ Detailed field description - ``parameterName`` [STRING, OPTIONAL] - Parameter name to be used e.g. for plotting etc. Can be chosen freely. May - or may not coincide with the SBML parameter name. + Parameter name to be used, e.g., for plotting etc. Can be chosen freely. - ``lowerBound`` [NUMERIC] @@ -1080,20 +1100,20 @@ Detailed field description .. _v2_problem_yaml: -YAML file for grouping files ----------------------------- +Problem configuration file +-------------------------- To link the model, measurement table, condition table, etc. in an unambiguous way, we use a `YAML `_ file. - -This file also allows specifying a PEtab version and employed PEtab extensions. +This file also allows specifying a PEtab version and employed PEtab +:ref:`extensions `. Furthermore, this can be used to describe parameter estimation problems comprising multiple models (more details below). -The format is described in the -`jsonschema <../_static/petab_schema_v2.yaml>`_, which allows for -easy validation: +The format is described by the following JSON +`schema <../_static/petab_schema_v2.yaml>`_ :cite:p:`jsonschema-spec-2020-12`, +which allows for easy validation: .. literalinclude:: _static/petab_schema_v2.yaml :language: yaml @@ -1144,9 +1164,9 @@ This design has several implications: - The number of conditions to be simulated for a model-specific instance of an experiment may vary across models. - Each parameter defined in the :ref:`v2_parameter_table` has a shared value - across all models. Parameters not listed in the parameter table do not share - values, which can result in model-specific instantiations of model observables - referencing these parameters. + across all models. Parameters not listed in the parameter table(s) do not + share values, which can result in model-specific instantiations of model + observables referencing these parameters. Validation Rules ++++++++++++++++ @@ -1172,7 +1192,7 @@ defined in the PEtab problem. 1. Pre-initialization - 1. Parameters values for parameters that occur in the parameter table are + 1. Parameter values for parameters that occur in the parameter table are applied to the uninitialized model. *Uninitialized* means that no model-internal initial values have been computed yet (e.g., in SBML models, no initial assignments have been @@ -1392,7 +1412,7 @@ The supported operators are: - | float * - ``!`` - 3 - - not + - logical `not` - - bool - bool @@ -1600,7 +1620,7 @@ the expression is interpreted as ``true && true = true``. Identifiers ----------- -* All identifiers in PEtab may only contain upper and lower case letters, +* All identifiers in PEtab may only contain upper and lower case ASCII letters, digits and underscores, and must not start with a digit. In PCRE2 regex, they must match ``[a-zA-Z_][a-zA-Z_\d]*``. @@ -1659,3 +1679,9 @@ Rules for extensions: support. * Toolboxes must reject PEtab problems that use extensions with ``required: true`` that they do not support. + + +References +---------- + +.. bibliography::