diff --git a/.github/workflows/level_1.yml b/.github/workflows/level_1.yml index 2ad0d935..4fc3ba8c 100644 --- a/.github/workflows/level_1.yml +++ b/.github/workflows/level_1.yml @@ -17,7 +17,7 @@ jobs: runs-on: ubuntu-latest environment: ${{ matrix.env }} strategy: - fail-fast: true + fail-fast: false matrix: python-version: ["3.13"] env: ${{ fromJSON( ((github.event_name == 'workflow_call' || github.event_name == 'release') && inputs.env != '') && format('["{0}"]', inputs.env) || '["dev","staging","prod"]' ) }} diff --git a/.vscode/settings.json b/.vscode/settings.json index 726711f3..272888ac 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -37,10 +37,15 @@ "drugability", "dtos", "emeq", + "ensembl", + "fasta", "finalizer", "FMCS", + "hbond", + "herg", "HETATM", "inchi", + "interpro", "isin", "isoparse", "kabsch", @@ -48,7 +53,10 @@ "Kekulize", "Konnektor", "kwargs", + "ligandability", "ligandset", + "logd", + "logp", "marimo", "mbar", "molblock", @@ -70,6 +78,7 @@ "rcsbapi", "rdchem", "rdkit", + "refseq", "replex", "resnames", "retryable", @@ -78,7 +87,10 @@ "SASA", "softcore", "Substruct", + "subtable", "textea", + "tpsa", + "uniprot", "venv" ] } \ No newline at end of file diff --git a/docs/dd/how-to/proteins.md b/docs/dd/how-to/proteins.md index 04966243..ce757475 100644 --- a/docs/dd/how-to/proteins.md +++ b/docs/dd/how-to/proteins.md @@ -37,6 +37,37 @@ from deeporigin.drug_discovery import Protein protein = Protein.from_name("insulin") ``` +### From a Deep Origin Data Platform ID + +You can create a Protein instance directly from a Deep Origin Data Platform ID. This method fetches the protein data from the platform, downloads the structure file, and creates a Protein instance with metadata from the platform: + +```python +from deeporigin.drug_discovery import Protein + +protein = Protein.from_id("08AD337N5YV4Y") +``` + +The method automatically: +- Downloads the structure file from the Deep Origin Data Platform +- Sets the protein's ID, name, and PDB ID (if available) from the platform metadata +- Creates a Protein instance from the downloaded file + +You can optionally provide a custom `DeepOriginClient` instance: + +```python +from deeporigin.drug_discovery import Protein +from deeporigin.platform.client import DeepOriginClient + +client = DeepOriginClient() +protein = Protein.from_id("08AD337N5YV4Y", client=client) +``` + +!!! warning "Requires file_path" + The protein data in the platform must contain a `file_path` field. If the protein data does not have a file_path, a `ValueError` will be raised. + +!!! note "Automatic metadata" + The method automatically populates the protein's `name` field from the platform data, preferring `protein_name`, then `pdb_id`, then `gene_symbol` (in that order). + ## Inspecting the Protein diff --git a/docs/notebooks/clean/docking-single-ligand.ipynb b/docs/notebooks/clean/docking-single-ligand.ipynb index 1ed9748b..babb5c8a 100644 --- a/docs/notebooks/clean/docking-single-ligand.ipynb +++ b/docs/notebooks/clean/docking-single-ligand.ipynb @@ -94,6 +94,16 @@ "sim" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "2d8bfa9a-c142-4deb-ac3d-d2a4f581df55", + "metadata": {}, + "outputs": [], + "source": [ + "protein._remote_path" + ] + }, { "cell_type": "markdown", "id": "e2aa58aa", @@ -115,6 +125,16 @@ "ligands" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "95f716a7-4a4c-4c23-9d61-130fc3e9a72f", + "metadata": {}, + "outputs": [], + "source": [ + "ligands.to_smiles()" + ] + }, { "cell_type": "markdown", "id": "94e19bec-6bee-4dbf-9c47-1490c41fdbd0", diff --git a/docs/platform/ref/data.md b/docs/platform/ref/data.md new file mode 100644 index 00000000..90a610b0 --- /dev/null +++ b/docs/platform/ref/data.md @@ -0,0 +1,71 @@ +# Data Platform API. + +The DeepOriginClient can be used to access the data platform API using: + +```{.python notest} +from deeporigin.platform.client import DeepOriginClient + +client = DeepOriginClient() +``` + +Then, the following methods can be used, for example: + +```{.python notest} +# Check the health status of the data platform +health_status = client.data.health() + +# Search ligands joined with tool results +results = client.data.search_ligands_with_results( + limit=10, + experiments=[{"toolId": "deeporigin.docking"}], +) + +# Search an entity (e.g., ligands) +results = client.data.search("ligands") + +# Search ligands using convenience method +results = client.data.search_ligands(limit=10) + +# Search proteins using convenience method +results = client.data.search_proteins(limit=10) + +# Create a new ligand +ligand = client.data.create_ligand( + project_id="\\x0011223344556677", + canonical_smiles="CCOc1ccc2nc(S(=O)(=O)N3CCN(CC3)C)c(N)c2c1", + inchi_key="BSYNRYMUTXBXSQ-UHFFFAOYSA-N", + inchi="InChI=1S/C20H24N4O4S/.../h1-4,6-9H,5,10-14H2,(H,22,23)", + smiles="CCOc1ccc2nc(S(=O)(=O)N3CCN(CC3)C)c(N)c2c1", + name="Compound-12345", + formal_charge=0, + hbond_donor_count=1, + hbond_acceptor_count=6, + rotatable_bond_count=5, + tpsa=85.12, + molecular_weight=447.5, +) + +# List projects +projects = client.data.list_projects() + +# List public models +models = client.data.list_models() +``` + + +::: src.platform.data.Data + options: + heading_level: 2 + docstring_style: google + show_root_heading: true + show_category_heading: true + show_object_full_path: false + show_root_toc_entry: false + inherited_members: true + members_order: alphabetical + filters: + - "!^_" # Exclude private members (names starting with "_") + show_signature: true + show_signature_annotations: true + show_if_no_docstring: true + group_by_category: true diff --git a/mkdocs.yaml b/mkdocs.yaml index 3fe49e68..d1b1d235 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -87,6 +87,7 @@ nav: - functions: platform/ref/functions.md - organizations: platform/ref/organizations.md - billing: platform/ref/billing.md + - data: platform/ref/data.md - Developing: - Installing: dev/install.md - Clients: dev/clients.md diff --git a/src/drug_discovery/structures/entity.py b/src/drug_discovery/structures/entity.py index f1378dfd..56c416ea 100644 --- a/src/drug_discovery/structures/entity.py +++ b/src/drug_discovery/structures/entity.py @@ -5,7 +5,7 @@ """ from abc import ABC, abstractmethod -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path from typing import Optional @@ -20,6 +20,8 @@ class Entity(ABC): This class manages the remote path and provides an upload method to ensure that the entity's file is uploaded to the remote storage if it does not already exist there. It uses the DeepOrigin FilesClient for remote file operations. """ + id: str | None = field(default=None, kw_only=True) + @abstractmethod def to_hash(self) -> str: """computes a hash of the entity""" diff --git a/src/drug_discovery/structures/ligand.py b/src/drug_discovery/structures/ligand.py index 121b1394..59be5200 100644 --- a/src/drug_discovery/structures/ligand.py +++ b/src/drug_discovery/structures/ligand.py @@ -692,6 +692,115 @@ def coordinates(self): def atom_types(self): return self.get_species() + @property + def formal_charge(self) -> int: + """Compute the formal charge of the ligand molecule. + + Returns: + int: The sum of formal charges of all atoms in the molecule. + """ + if self.mol is None: + raise DeepOriginException("Cannot compute formal charge: molecule is None") + return sum(atom.GetFormalCharge() for atom in self.mol.GetAtoms()) + + @property + def molecular_weight(self) -> float: + """Compute the exact molecular weight of the ligand molecule. + + Returns: + float: The exact molecular weight in atomic mass units. + """ + if self.mol is None: + raise DeepOriginException( + "Cannot compute molecular weight: molecule is None" + ) + return rdMolDescriptors.CalcExactMolWt(self.mol) + + @property + def hbond_donor_count(self) -> int: + """Compute the number of hydrogen bond donors in the ligand molecule. + + Returns: + int: The number of hydrogen bond donors. + """ + if self.mol is None: + raise DeepOriginException( + "Cannot compute H-bond donor count: molecule is None" + ) + return rdMolDescriptors.CalcNumHBD(self.mol) + + @property + def hbond_acceptor_count(self) -> int: + """Compute the number of hydrogen bond acceptors in the ligand molecule. + + Returns: + int: The number of hydrogen bond acceptors. + """ + if self.mol is None: + raise DeepOriginException( + "Cannot compute H-bond acceptor count: molecule is None" + ) + return rdMolDescriptors.CalcNumHBA(self.mol) + + @property + def rotatable_bond_count(self) -> int: + """Compute the number of rotatable bonds in the ligand molecule. + + Returns: + int: The number of rotatable bonds. + """ + if self.mol is None: + raise DeepOriginException( + "Cannot compute rotatable bond count: molecule is None" + ) + return rdMolDescriptors.CalcNumRotatableBonds(self.mol) + + @property + def tpsa(self) -> float: + """Compute the Topological Polar Surface Area (TPSA) of the ligand molecule. + + Returns: + float: The TPSA value in square Angstroms. + """ + if self.mol is None: + raise DeepOriginException("Cannot compute TPSA: molecule is None") + return rdMolDescriptors.CalcTPSA(self.mol) + + @property + def canonical_smiles(self) -> str: + """ + Canonical (RDKit) SMILES for this ligand. + + Notes: + - Canonicalization is RDKit-specific. + - Returns implicit-H SMILES by default (explicit Hs removed). + - Preserves stereochemistry if present. + """ + mol = None + + if self.mol is not None: + mol = self.mol + elif self.smiles is not None: + mol = Chem.MolFromSmiles(self.smiles) + if mol is None: + raise DeepOriginException(f"Invalid SMILES: {self.smiles!r}") + else: + raise DeepOriginException( + "Cannot compute canonical SMILES: missing mol and smiles" + ) + + # Remove explicit Hs so we don't emit `[H]...` everywhere + mol = Chem.RemoveHs(mol) + + # ensure sanitization: + Chem.SanitizeMol(mol) + + return Chem.MolToSmiles( + mol, + canonical=True, + isomericSmiles=True, # keep stereochem + ) + def set_property(self, prop_name: str, prop_value): """ Set a property for the ligand molecule. @@ -881,6 +990,79 @@ def to_hash(self) -> str: return hash_hex + @beartype + def sync(self, client: Optional[DeepOriginClient] = None) -> None: + """Sync the ligand to the data platform. + + This method uploads the ligand file to remote storage (if available) and creates a ligand + record in the data platform. If a ligand with the same canonical_smiles already exists, + it returns the existing ligand data instead of creating a new one. + + Args: + client: DeepOriginClient instance. If None, uses DeepOriginClient.get(). + + Note: + If the ligand was created from a SMILES string without an SDF file, only the SMILES + will be used for syncing (no file upload will occur). + """ + if client is None: + client = DeepOriginClient.get() + + # If ligand has a file_path, upload it to remote storage + # (Note: ligands in the data platform are identified by canonical_smiles, not file_path) + mol_file: str | None = None + if self.file_path is not None: + # Upload the ligand file first + self.upload(client=client) + # Use the remote path as the mol_file + mol_file = self._remote_path + + # Search for existing ligands by canonical_smiles + response = client.data.search_ligands(canonical_smiles=self.canonical_smiles) + data = response["data"] + + # If a ligand with this canonical_smiles already exists, update self.id and return + if data: + existing_ligand = data[0] + if "id" in existing_ligand: + self.id = existing_ligand["id"] + return + + # No existing ligand found, create a new one + # Prepare parameters for create_ligand + # Note: canonical_smiles is read-only and computed by the platform + kwargs: dict[str, Any] = { + "smiles": self.smiles if self.smiles is not None else self.canonical_smiles, + } + + # Add mol_file if available + if mol_file is not None: + kwargs["mol_file"] = mol_file + + # Add optional fields if available + if self.name is not None: + kwargs["name"] = self.name + + # Add computed molecular properties if mol is available + if self.mol is not None: + try: + kwargs["formal_charge"] = self.formal_charge + kwargs["molecular_weight"] = self.molecular_weight + kwargs["hbond_donor_count"] = self.hbond_donor_count + kwargs["hbond_acceptor_count"] = self.hbond_acceptor_count + kwargs["rotatable_bond_count"] = self.rotatable_bond_count + kwargs["tpsa"] = self.tpsa + except Exception: + # If property computation fails, continue without those properties + pass + + # Call create_ligand through the client + result = client.data.create_ligand(**kwargs) + + # Update self.id with the newly created ligand's ID + if "data" in result and "id" in result["data"]: + self.id = result["data"]["id"] + @beartype def to_pdb(self, output_path: Optional[str] = None) -> str | Path: """Write the ligand to a PDB file.""" diff --git a/src/drug_discovery/structures/protein.py b/src/drug_discovery/structures/protein.py index 56898000..ce44d660 100644 --- a/src/drug_discovery/structures/protein.py +++ b/src/drug_discovery/structures/protein.py @@ -67,6 +67,56 @@ def from_name(cls, name: str) -> Self: return cls.from_pdb_id(pdb_id) + @classmethod + def from_id(cls, id: str, *, client: Optional[DeepOriginClient] = None) -> Self: + """ + Create a Protein instance from a Deep Origin Data Platform ID. + + Args: + id: The Deep Origin Data Platform ID of the protein. + client: Optional DeepOriginClient instance. If not provided, uses the default client. + + Returns: + Protein: A new Protein instance. + + Raises: + ValueError: If the protein data does not contain a file_path. + RuntimeError: If the file cannot be downloaded or loaded. + """ + if client is None: + client = DeepOriginClient.get() + + data = client.data.get_protein(id=id) + + # Check if file_path exists + file_path = data.get("file_path") + if not file_path: + raise ValueError( + f"Protein {id} does not have a file_path. Cannot create Protein instance without structure file." + ) + + # Download the file + local_file_path = client.files.download_file(remote_path=file_path) + + # Create Protein instance from the downloaded file + protein = cls.from_file(file_path=local_file_path) + + # Set the ID from the data + protein.id = data.get("id") + + # Update fields from the data + if data.get("protein_name"): + protein.name = data["protein_name"] + elif data.get("pdb_id"): + protein.name = data["pdb_id"] + elif data.get("gene_symbol"): + protein.name = data["gene_symbol"] + + if data.get("pdb_id"): + protein.pdb_id = data["pdb_id"] + + return protein + @classmethod def from_pdb_id(cls, pdb_id: str, struct_ind: int = 0) -> Self: """ @@ -1308,11 +1358,75 @@ def __str__(self): info_str += f"Info: {self.info}\n" return f"Protein:\n {info_str}" + @beartype + def sync(self, client: Optional[DeepOriginClient] = None) -> None: + """Sync the protein to the data platform. + + This method uploads the protein file to remote storage and creates a protein + record in the data platform. If a protein with the same file_path already exists, + it updates the current instance with the existing protein's ID instead of + creating a new one. + + Args: + client: DeepOriginClient instance. If None, uses DeepOriginClient.get(). + + Returns: + None. As a side effect, uploads the protein (if necessary) and updates + ``self.id`` with the ID of the existing or newly created protein record. + """ + if client is None: + client = DeepOriginClient.get() + + # Upload the protein file first + self.upload(client=client) + + # Use the remote path as the file_path + file_path = self._remote_path + + # Search for existing proteins with the same file_path + response = client.data.search_proteins(file_path=file_path) + data = response["data"] + + # If a protein with this file_path already exists, return the first one + if data: + existing_protein = data[0] + # Update self.id with the existing protein's ID + if "id" in existing_protein: + self.id = existing_protein["id"] + return + + # No existing protein found, create a new one + # Prepare parameters for create_protein + kwargs: dict[str, Any] = { + "file_path": file_path, + } + + # Pass pdb_id if available + if self.pdb_id is not None: + kwargs["pdb_id"] = self.pdb_id + + # Only compute and include protein_length when a local file_path is available + if getattr(self, "file_path", None) is not None: + kwargs["protein_length"] = self.length + kwargs["protein_name"] = self.name + + # Call create_protein through the client + result = client.data.create_protein(**kwargs) + + # Update self.id with the newly created protein's ID + if "data" in result and "id" in result["data"]: + self.id = result["data"]["id"] + def update_coordinates(self, coords: np.ndarray): """update coordinates of the protein structure""" self.structure.coord = coords + @property + def length(self) -> int: + """get the length of the protein structure""" + return sum([len(seq) for seq in self.sequence]) + def validate_pdb_file(file_path: str | Path) -> None: """validate a PDB file by checking if it can be parsed by RDKit diff --git a/src/platform/client.py b/src/platform/client.py index ec6c533a..fb66388c 100644 --- a/src/platform/client.py +++ b/src/platform/client.py @@ -22,6 +22,7 @@ from deeporigin.exceptions import DeepOriginException from deeporigin.platform.billing import Billing from deeporigin.platform.clusters import Clusters +from deeporigin.platform.data import Data from deeporigin.platform.executions import Executions from deeporigin.platform.files import Files from deeporigin.platform.functions import Functions @@ -306,6 +307,7 @@ def __init__( self.executions = Executions(self) self.organizations = Organizations(self) self.billing = Billing(self) + self.data = Data(self) # Retry configuration self.max_retries = max_retries diff --git a/src/platform/data.py b/src/platform/data.py new file mode 100644 index 00000000..242dd6c0 --- /dev/null +++ b/src/platform/data.py @@ -0,0 +1,550 @@ +"""Data Platform API wrapper for DeepOriginClient.""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +if TYPE_CHECKING: + from deeporigin.platform.client import DeepOriginClient + + +class Data: + """Data Platform API wrapper. + + Provides access to data platform-related endpoints through the DeepOriginClient. + """ + + def __init__(self, client: DeepOriginClient) -> None: + """Initialize Data wrapper. + + Args: + client: The DeepOriginClient instance to use for API calls. + """ + self._c = client + self._models: dict | None = None + + def health(self) -> dict: + """Check the health status of the data platform. + + Returns: + Dictionary containing the health status response. + """ + return self._c.get_json("/data-platform/health") + + def list_models(self) -> dict: + """List public models. + + The result is cached per instance. + + Returns: + Dictionary containing the list of models. + """ + if self._models is None: + self._models = self._c.get_json( + f"/data-platform/{self._c.org_key}/meta/models" + ) + return self._models + + def search_ligands_with_results( + self, + *, + cursor: str | None = None, + experiments: list[dict[str, str]] | None = None, + filter_dict: dict[str, Any] | None = None, + limit: int | None = None, + offset: int | None = None, + select: list[str] | None = None, + sort: dict[str, str] | None = None, + ) -> dict: + """Search ligands joined with tool results (wide pivot view). + + Args: + cursor: Cursor for pagination. + experiments: List of experiment filters, each containing toolId and + optionally toolVersion. + filter_dict: Additional filter criteria as a dictionary. + limit: Maximum number of results to return. Defaults to 100. + offset: Number of results to skip. + select: List of fields to select in the response. + sort: Dictionary mapping field names to sort order ("asc" or "desc"). + + Returns: + Dictionary containing the search results. + """ + # Ensure deleted=False is always set in filter_dict + if filter_dict is None: + filter_dict = {"deleted": False} + else: + filter_dict = filter_dict.copy() + filter_dict["deleted"] = False + + body: dict[str, Any] = {} + if cursor is not None: + body["cursor"] = cursor + if experiments is not None: + body["experiments"] = experiments + body["filter"] = filter_dict + + if limit is not None: + body["limit"] = limit + if offset is not None: + body["offset"] = offset + if select is not None: + body["select"] = select + if sort is not None: + body["sort"] = sort + + return self._c.post_json( + f"/data-platform/{self._c.org_key}/ligands_with_results/search", + body=body, + ) + + def search( + self, + entity: str, + *, + cursor: str | None = None, + filter_dict: dict[str, Any] | None = None, + limit: int | None = None, + offset: int | None = None, + select: list[str] | None = None, + sort: dict[str, str] | None = None, + ) -> dict: + """Search an entity (table). + + Args: + entity: Entity (table) name to search (e.g., "ligands"). + cursor: Cursor for pagination. + filter_dict: Additional filter criteria as a dictionary. + limit: Maximum number of results to return. Defaults to 100. + offset: Number of results to skip. + select: List of fields to select in the response. + sort: Dictionary mapping field names to sort order ("asc" or "desc"). + + Returns: + Dictionary containing the search results. + + Raises: + ValueError: If the entity is not a valid table name. + """ + # Validate entity against list of available models + models_response = self.list_models() + valid_table_names = { + model["tableName"] for model in models_response.get("models", []) + } + if entity not in valid_table_names: + raise ValueError( + f"Invalid entity '{entity}'. Valid entities are: {', '.join(sorted(valid_table_names))}" + ) + + if filter_dict is None: + filter_dict = {"deleted": False} + else: + filter_dict = filter_dict.copy() + filter_dict["deleted"] = False + + body: dict[str, Any] = {} + if cursor is not None: + body["cursor"] = cursor + + body["filter"] = filter_dict + if limit is not None: + body["limit"] = limit + if offset is not None: + body["offset"] = offset + if select is not None: + body["select"] = select + if sort is not None: + body["sort"] = sort + + return self._c.post_json( + f"/data-platform/{self._c.org_key}/{entity}/search", + body=body, + ) + + def search_ligands( + self, + *, + cursor: str | None = None, + filter_dict: dict[str, Any] | None = None, + smiles: str | None = None, + canonical_smiles: str | None = None, + min_molecular_weight: float | int | None = None, + max_molecular_weight: float | int | None = None, + limit: int | None = None, + offset: int | None = None, + select: list[str] | None = None, + sort: dict[str, str] | None = None, + ) -> dict: + """Search ligands entity. + + Convenience method that calls search(entity="ligands"). + + Args: + cursor: Cursor for pagination. + filter_dict: Additional filter criteria as a dictionary. + smiles: Filter by SMILES string. + canonical_smiles: Filter by canonical SMILES string. + min_molecular_weight: Minimum molecular weight filter (inclusive). + max_molecular_weight: Maximum molecular weight filter (inclusive). + limit: Maximum number of results to return. Defaults to 100. + offset: Number of results to skip. + select: List of fields to select in the response. + sort: Dictionary mapping field names to sort order ("asc" or "desc"). + + Returns: + Dictionary containing the search results. + + Raises: + ValueError: If ligands is not a valid table name (should not happen). + """ + # Build filter dict, starting with provided filter_dict or empty dict + filter_dict = filter_dict.copy() if filter_dict is not None else {} + filter_dict.setdefault("deleted", False) + + # Add smiles filter if provided + if smiles is not None: + filter_dict["smiles"] = smiles + + # Add canonical_smiles filter if provided + if canonical_smiles is not None: + filter_dict["canonical_smiles"] = canonical_smiles + + # Build molecular weight filters + props = [] + if min_molecular_weight is not None: + props.append( + { + "column": "molecular_weight", + "op": "gte", + "value": min_molecular_weight, + } + ) + if max_molecular_weight is not None: + props.append( + { + "column": "molecular_weight", + "op": "lte", + "value": max_molecular_weight, + } + ) + + if props: + # Merge with existing props if any + existing_props = filter_dict.get("props", []) + filter_dict["props"] = existing_props + props + + return self.search( + "ligands", + cursor=cursor, + filter_dict=filter_dict, + limit=limit, + offset=offset, + select=select, + sort=sort, + ) + + def get_entity(self, *, entity: str, entity_id: str) -> dict: + """Get an entity by ID. + + Args: + entity: The entity type (e.g., "ligands", "proteins"). + entity_id: The ID of the entity to retrieve. + + Returns: + Dictionary containing the entity data. + """ + return self._c.get_json( + f"/data-platform/{self._c.org_key}/{entity}/{entity_id}" + ) + + def get_ligand(self, id: str) -> dict: + """Get a ligand by ID. + + Args: + id: The ID of the ligand to retrieve. + + Returns: + Dictionary containing the ligand data. + """ + return self.get_entity(entity="ligands", entity_id=id) + + def get_protein(self, id: str) -> dict: + """Get a protein by ID. + + Args: + id: The ID of the protein to retrieve. + + Returns: + Dictionary containing the protein data. + """ + return self.get_entity(entity="proteins", entity_id=id) + + def search_proteins( + self, + *, + cursor: str | None = None, + pdb_id: str | None = None, + file_path: str | None = None, + min_molecular_weight: float | int | None = None, + max_molecular_weight: float | int | None = None, + sequence: str | None = None, + limit: int | None = None, + offset: int | None = None, + select: list[str] | None = None, + sort: dict[str, str] | None = None, + ) -> dict: + """Search proteins entity. + + Convenience method that calls search(entity="proteins"). + + Args: + cursor: Cursor for pagination. + pdb_id: Filter by PDB ID. + file_path: Filter by file path. + min_molecular_weight: Minimum molecular weight filter (inclusive). + max_molecular_weight: Maximum molecular weight filter (inclusive). + sequence: Filter by FASTA sequence (exact match). + limit: Maximum number of results to return. Defaults to 100. + offset: Number of results to skip. + select: List of fields to select in the response. + sort: Dictionary mapping field names to sort order ("asc" or "desc"). + + Returns: + Dictionary containing the search results. + + Raises: + ValueError: If proteins is not a valid table name (should not happen). + """ + + filter_dict = {"deleted": False} + if pdb_id is not None: + filter_dict["pdb_id"] = pdb_id + if file_path is not None: + filter_dict["file_path"] = file_path + + # Build molecular weight filters + props = [] + if min_molecular_weight is not None: + props.append( + { + "column": "molecular_weight", + "op": "gte", + "value": min_molecular_weight, + } + ) + if max_molecular_weight is not None: + props.append( + { + "column": "molecular_weight", + "op": "lte", + "value": max_molecular_weight, + } + ) + if sequence is not None: + props.append( + { + "column": "fasta_sequence", + "op": "eq", + "value": sequence, + } + ) + + if props: + filter_dict["props"] = props + + return self.search( + "proteins", + cursor=cursor, + filter_dict=filter_dict, + limit=limit, + offset=offset, + select=select, + sort=sort, + ) + + def create_ligand( + self, + *, + smiles: str, + project_id: str | None = None, + name: str | None = None, + mol_file: str | None = None, + formal_charge: int = 0, + hbond_donor_count: int | None = None, + hbond_acceptor_count: int | None = None, + rotatable_bond_count: int | None = None, + tpsa: float | None = None, + molecular_weight: float | None = None, + variant_name_tag: str = "", + ) -> dict: + """Create a new ligand. + + Args: + smiles: SMILES string (required). + project_id: Project ID for the ligand. + name: Name of the ligand. + mol_file: Path to the molecule file (e.g., SDF file) in remote storage. + formal_charge: Formal charge. Defaults to 0. + hbond_donor_count: Number of hydrogen bond donors. + hbond_acceptor_count: Number of hydrogen bond acceptors. + rotatable_bond_count: Number of rotatable bonds. + tpsa: Topological polar surface area. + molecular_weight: Molecular weight. + variant_name_tag: Variant name tag. Defaults to empty string. + + Returns: + Dictionary containing the created ligand data. + """ + # Build the set object with all ligand properties + set_dict: dict[str, Any] = { + "subtable_name": "ligands", + "smiles": smiles, + "formal_charge": formal_charge, + "variant_name_tag": variant_name_tag, + } + + # Add optional fields only if provided + if project_id is not None: + set_dict["project_id"] = project_id + if name is not None: + set_dict["name"] = name + if mol_file is not None: + set_dict["mol_file"] = mol_file + if hbond_donor_count is not None: + set_dict["hbond_donor_count"] = hbond_donor_count + if hbond_acceptor_count is not None: + set_dict["hbond_acceptor_count"] = hbond_acceptor_count + if rotatable_bond_count is not None: + set_dict["rotatable_bond_count"] = rotatable_bond_count + if tpsa is not None: + set_dict["tpsa"] = tpsa + if molecular_weight is not None: + set_dict["molecular_weight"] = molecular_weight + + body: dict[str, Any] = { + "set": set_dict, + "returning": [ + "id", + "version", + "valid_from", + "valid_to", + "modified_by", + "deleted", + "mol_file", + "project_id", + "subtable_name", + "canonical_smiles", + "smiles", + "inchi_key", + "inchi", + "name", + "formal_charge", + "hbond_donor_count", + "hbond_acceptor_count", + "rotatable_bond_count", + "tpsa", + "molecular_weight", + "log_p", + "structure_key", + ], + } + + return self._c.post_json( + f"/data-platform/{self._c.org_key}/ligands", + body=body, + ) + + def create_protein( + self, + *, + file_path: str, + gene_symbol: str | None = None, + pdb_id: str | None = None, + fasta_sequence: str | None = None, + protein_name: str | None = None, + protein_length: int | None = None, + project_id: str | None = None, + ) -> dict: + """Create a new protein. + + Args: + file_path: Path to the protein file (required). + gene_symbol: Gene symbol. + pdb_id: PDB ID. + fasta_sequence: FASTA sequence. + protein_name: Protein name. + protein_length: Protein length. + project_id: Project ID for the protein. + + Returns: + Dictionary containing the created protein data. + """ + # Build the set object with all protein properties + set_dict: dict[str, Any] = { + "file_path": file_path, + } + + # Add optional fields only if provided + if project_id is not None: + set_dict["project_id"] = project_id + if gene_symbol is not None: + set_dict["gene_symbol"] = gene_symbol + if pdb_id is not None: + set_dict["pdb_id"] = pdb_id + if fasta_sequence is not None: + set_dict["fasta_sequence"] = fasta_sequence + if protein_name is not None: + set_dict["protein_name"] = protein_name + if protein_length is not None: + set_dict["protein_length"] = protein_length + + body: dict[str, Any] = { + "set": set_dict, + "returning": [ + "id", + "version", + "valid_from", + "valid_to", + "modified_by", + "deleted", + "project_id", + "subtable_name", + "uniprot_accession", + "file_path", + "gene_symbol", + "pdb_id", + "refseq_protein_id", + "ensembl_protein_id", + "alpha_fold_id", + "fasta_sequence", + "protein_name", + "kegg_gene_id", + "chembl_target_id", + "binding_db_target_id", + "drugbank_target_id", + "pfam_id", + "interpro_id", + "ec_number", + "ncbi_taxonomy_id", + "protein_family", + "ligandability_score", + "protein_length", + ], + } + + return self._c.post_json( + f"/data-platform/{self._c.org_key}/proteins", + body=body, + ) + + def list_projects(self) -> dict: + """List projects. + + Returns: + Dictionary containing the list of projects. + """ + return self._c.post_json( + f"/data-platform/{self._c.org_key}/projects/search", + body={}, + ) diff --git a/tests/fixtures/ligand_08B05B1GDYWJR.json b/tests/fixtures/ligand_08B05B1GDYWJR.json new file mode 100644 index 00000000..a4b101cd --- /dev/null +++ b/tests/fixtures/ligand_08B05B1GDYWJR.json @@ -0,0 +1,70 @@ +{ + "id": "08B05B1GDYWJR", + "created_at": "2026-02-19T18:40:33.979Z", + "updated_at": "2026-02-19T18:40:33.979Z", + "version": 1, + "valid_from": "2026-02-19T18:40:33.979Z", + "valid_to": null, + "modified_by": "6b96d8f8-0f55-474c-a86c-e09651ba4b20", + "deleted": false, + "project_id": null, + "project_scope_key": "__unscoped__", + "mol_file": null, + "rdkit_mol": "C/C=C/Cn1cc(-c2cccc(C(=O)N(C)C)c2)c2cc[nH]c2c1=O", + "smiles": "C/C=C/Cn1cc(-c2cccc(C(=O)N(C)C)c2)c2cc[nH]c2c1=O", + "canonical_smiles": "C/C=C/Cn1cc(-c2cccc(C(=O)N(C)C)c2)c2cc[nH]c2c1=O", + "inchi_key": "RJEMCUZKQLRUIS-SNAWJCMRSA-N", + "inchi": "InChI=1S/C20H21N3O2/c1-4-5-11-23-13-17(16-9-10-21-18(16)20(23)25)14-7-6-8-15(12-14)19(24)22(2)3/h4-10,12-13,21H,11H2,1-3H3/b5-4+", + "subtable_name": "ligands", + "variant_name_tag": "", + "structure_key": "RJEMCUZKQLRUIS-SNAWJCMRSA-N", + "smirks": null, + "name": "cmpd 4 (Crotyl)", + "sa_score": null, + "qed_score": null, + "topological_fingerprint": "\\xaf3c9d0b3481ef0669925e6bdd530c7d9c79493f5a6e9693ce1c583fa9055f7bf9b5327ec30cfe64da3d60ee21e9e0c15ef6e1358c96974e63ffca31a9d8b5223a9f1e547162192515752b2b13948da71b15cfcc0c365182b1c88af7e731d3a91049d899971ef44ff157c152f77df9d2289e26c5d7821287439edb9d4c23e287", + "morgan_fingerprint": "\\x06800010224000000100010400002000000d000000004000a00041000100004000042000c000000408301200910a122000000000808000100300018004000005", + "formal_charge": 0, + "hbond_donor_count": 1, + "hbond_acceptor_count": 3, + "rotatable_bond_count": 7, + "tpsa": 58.1, + "molecular_weight": 335.16337691200056, + "aromatic_ring_count": null, + "log_p": null, + "external_id": null, + "cas_registry_number": null, + "chembl_id": null, + "pdb_ligand_id": null, + "drugbank_id": null, + "zinc_id": null, + "pubchem_cid": null, + "binding_db_id": null, + "rule_of5_violations": null, + "bemis_murcko_scaffold": null, + "canonical_tautomer": null, + "charge_state": null, + "pka_values": null, + "bioavailability_score": null, + "rotamer_state_count": null, + "topological_diameter": null, + "chebi_id": null, + "kegg_compound_id": null, + "uni_chem_id": null, + "chem_spider_id": null, + "iuphar_ligand_id": null, + "sure_chembl_id": null, + "hmdb_id": null, + "nsc_number": null, + "embl_compound_id": null, + "lincs_id": null, + "maccs_keys": null, + "atom_pair_fingerprint": null, + "selfies": null, + "polarizability": null, + "refractivity": null, + "conformer_count": null, + "electrostatic_potential_map": null, + "pains_flag": null, + "unii": null +} diff --git a/tests/fixtures/protein_08AD337N5YV4Y.json b/tests/fixtures/protein_08AD337N5YV4Y.json new file mode 100644 index 00000000..33f82d8e --- /dev/null +++ b/tests/fixtures/protein_08AD337N5YV4Y.json @@ -0,0 +1,46 @@ +{ + "id": "08AD337N5YV4Y", + "created_at": "2026-02-18T20:27:28.073Z", + "updated_at": "2026-02-18T20:27:28.073Z", + "version": 1, + "valid_from": "2026-02-18T20:27:28.073Z", + "valid_to": null, + "modified_by": "6b96d8f8-0f55-474c-a86c-e09651ba4b20", + "deleted": false, + "project_id": null, + "subtable_name": "proteins", + "uniprot_accession": null, + "file_path": "entities/proteins/db4aa32e2e8ffa976a60004a8361b86427a2e5653a6623bb60b7913445902549.pdb", + "gene_symbol": null, + "pdb_id": null, + "refseq_protein_id": null, + "ensembl_protein_id": null, + "alpha_fold_id": null, + "fasta_sequence": null, + "protein_name": null, + "external_id": null, + "kegg_gene_id": null, + "chembl_target_id": null, + "binding_db_target_id": null, + "drugbank_target_id": null, + "pfam_id": null, + "interpro_id": null, + "ec_number": null, + "ncbi_taxonomy_id": null, + "go_term_id": null, + "uniprot_entry_name": null, + "uni_parc": null, + "reactome_protein_id": null, + "hgnc_id": null, + "orth_db_id": null, + "ensembl_transcript_id": null, + "tcr_bcr_ids": null, + "protein_family": null, + "disordered_regions": null, + "ligandability_score": null, + "protein_length": null, + "half_life": null, + "molecular_weight": null, + "isoelectric_point": null, + "subcellular_location": null +} \ No newline at end of file diff --git a/tests/mock_server/server.py b/tests/mock_server/server.py index 5ab6a86f..6763e07f 100644 --- a/tests/mock_server/server.py +++ b/tests/mock_server/server.py @@ -47,6 +47,8 @@ def __init__(self, port: int = 0, docking_speed: float = 0.5): # In-memory storage for executions self._executions: dict[str, dict[str, Any]] = {} self._execution_start_times: dict[str, datetime] = {} + self._ligands: dict[str, dict[str, Any]] = {} + self._proteins: dict[str, dict[str, Any]] = {} # Tool-specific mock execution durations (in seconds) self._mock_execution_durations: dict[str, float] = { "deeporigin.abfe-end-to-end": 30.0, # seconds @@ -957,6 +959,193 @@ def health() -> dict[str, str]: """Health check endpoint.""" return {"status": "ok"} + @self.app.get("/data-platform/health") + def data_platform_health() -> dict[str, str]: + """Data platform health check endpoint.""" + return {"status": "ok"} + + @self.app.post("/data-platform/{org_key}/ligands_with_results/search") + async def search_ligands_with_results( + org_key: str, request: Request + ) -> dict[str, Any]: + """Search ligands joined with tool results.""" + await request.json() # Consume request body + # Return a mock response with empty data list + return { + "data": [], + "count": 0, + } + + @self.app.post("/data-platform/{org_key}/{entity}/search") + async def search_entity( + org_key: str, entity: str, request: Request + ) -> dict[str, Any]: + """Search an entity.""" + await request.json() # Consume request body + # Return a mock response with empty data list + return { + "data": [], + "count": 0, + } + + @self.app.post("/data-platform/{org_key}/projects/search") + async def list_projects(org_key: str, request: Request) -> dict[str, Any]: + """List projects.""" + await request.json() # Consume request body + # Return a mock response with empty projects list + return { + "data": [], + "count": 0, + } + + @self.app.post("/data-platform/{org_key}/ligands") + async def create_ligand(org_key: str, request: Request) -> dict[str, Any]: + """Create a new ligand.""" + body = await request.json() + set_data = body.get("set", {}) + returning = body.get("returning", []) + + # Generate mock response matching the real API format + now = datetime.now(timezone.utc) + ligand_id = "08" + str(uuid.uuid4()).replace("-", "").upper()[:11] + smiles = set_data.get("smiles", "") + response_data: dict[str, Any] = { + "id": ligand_id, + "version": 1, + "valid_from": now.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z", + "valid_to": None, + "modified_by": "test-user", + "deleted": False, + "project_id": None, + "subtable_name": "ligands", + "canonical_smiles": smiles, + "smiles": smiles, + "inchi_key": None, + "inchi": None, + "log_p": None, + "structure_key": None, + } + + # Include all fields from set_data + response_data.update(set_data) + + # Store full record in memory before filtering + self._ligands[ligand_id] = response_data.copy() + + # Filter to only return requested fields if specified + if returning: + response_data = { + k: v for k, v in response_data.items() if k in returning + } + + return {"data": response_data, "meta": {"inserted": 1}} + + @self.app.post("/data-platform/{org_key}/proteins") + async def create_protein(org_key: str, request: Request) -> dict[str, Any]: + """Create a new protein.""" + body = await request.json() + set_data = body.get("set", {}) + returning = body.get("returning", []) + + # Generate mock response matching the real API format + now = datetime.now(timezone.utc) + protein_id = "08" + str(uuid.uuid4()).replace("-", "").upper()[:11] + modified_by = "6b96d8f8-0f55-474c-a86c-e09651ba4b20" + + # Build response data with all fields matching the real API + response_data: dict[str, Any] = { + "id": protein_id, + "version": 1, + "valid_from": now.strftime("%Y-%m-%dT%H:%M:%S.%f")[:-3] + "Z", + "valid_to": None, + "modified_by": modified_by, + "deleted": False, + "project_id": None, + "subtable_name": "proteins", + "uniprot_accession": None, + "file_path": set_data.get("file_path", ""), + "gene_symbol": None, + "pdb_id": None, + "refseq_protein_id": None, + "ensembl_protein_id": None, + "alpha_fold_id": None, + "fasta_sequence": None, + "protein_name": None, + "kegg_gene_id": None, + "chembl_target_id": None, + "binding_db_target_id": None, + "drugbank_target_id": None, + "pfam_id": None, + "interpro_id": None, + "ec_number": None, + "ncbi_taxonomy_id": None, + "protein_family": None, + "ligandability_score": None, + "protein_length": None, + } + + # Override with any fields provided in set_data + response_data.update(set_data) + + # Store full record in memory before filtering + self._proteins[protein_id] = response_data.copy() + + # Filter to only return requested fields if specified + if returning: + response_data = { + k: v for k, v in response_data.items() if k in returning + } + + return { + "data": response_data, + "meta": {"inserted": 1}, + } + + @self.app.get("/data-platform/{org_key}/ligands/{ligand_id}") + def get_ligand(org_key: str, ligand_id: str) -> dict[str, Any]: + """Get a ligand by ID.""" + if ligand_id in self._ligands: + return self._ligands[ligand_id] + try: + return self._load_fixture(f"ligand_{ligand_id}") + except FileNotFoundError: + from fastapi import HTTPException + + raise HTTPException( + status_code=404, detail=f"Ligand {ligand_id} not found" + ) from None + + @self.app.get("/data-platform/{org_key}/proteins/{protein_id}") + def get_protein(org_key: str, protein_id: str) -> dict[str, Any]: + """Get a protein by ID.""" + if protein_id in self._proteins: + return self._proteins[protein_id] + try: + return self._load_fixture(f"protein_{protein_id}") + except FileNotFoundError: + from fastapi import HTTPException + + raise HTTPException( + status_code=404, detail=f"Protein {protein_id} not found" + ) from None + + @self.app.get("/data-platform/{org_key}/meta/models") + def list_models(org_key: str) -> dict[str, Any]: + """List public models.""" + return { + "models": [ + {"tableName": "ligands", "visibility": "public"}, + {"tableName": "proteins", "visibility": "public"}, + {"tableName": "patents", "visibility": "public"}, + {"tableName": "projects", "visibility": "public"}, + {"tableName": "ui_settings", "visibility": "public"}, + {"tableName": "executions", "visibility": "public"}, + {"tableName": "execution_subjects", "visibility": "public"}, + {"tableName": "results", "visibility": "public"}, + {"tableName": "result_table_catalog", "visibility": "public"}, + ] + } + def start(self) -> tuple[str, int]: """Start the test server. diff --git a/tests/test_data.py b/tests/test_data.py new file mode 100644 index 00000000..5d6410bd --- /dev/null +++ b/tests/test_data.py @@ -0,0 +1,213 @@ +"""Tests for the Data Platform API wrapper.""" + +import uuid + +import pytest + +from deeporigin.platform.client import DeepOriginClient + + +def test_data_platform_health_lv1(): + """Test the data platform health endpoint.""" + client = DeepOriginClient() + response = client.data.health() + + assert isinstance(response, dict), "Expected a dictionary response" + assert "status" in response, "Expected 'status' key in response" + assert response["status"] == "ok", "Expected status to be 'ok'" + + +def test_search_entity_lv1(): + """Test searching an entity.""" + client = DeepOriginClient() + response = client.data.search("ligands") + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_search_entity_invalid_entity(): + """Test searching with an invalid entity raises ValueError.""" + client = DeepOriginClient() + with pytest.raises(ValueError, match="Invalid entity 'invalid_table'"): + client.data.search("invalid_table") + + +def test_search_ligands_lv1(): + """Test searching ligands using convenience method.""" + client = DeepOriginClient() + response = client.data.search_ligands() + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_search_ligands_molecular_weight_lv1(): + """Test searching ligands with molecular weight filters.""" + client = DeepOriginClient() + response = client.data.search_ligands( + min_molecular_weight=250, + max_molecular_weight=550, + ) + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_search_proteins_lv1(): + """Test searching proteins using convenience method.""" + client = DeepOriginClient() + response = client.data.search_proteins() + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_search_proteins_molecular_weight_lv1(): + """Test searching proteins with molecular weight filters.""" + client = DeepOriginClient() + response = client.data.search_proteins( + min_molecular_weight=250, + max_molecular_weight=550, + ) + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_search_proteins_sequence_lv1(): + """Test searching proteins with sequence filter.""" + client = DeepOriginClient() + response = client.data.search_proteins( + sequence="MKTAYIAKQRQISFVKSHFSRQLEERLGLIEVQAPILSRVGDGTQDNLSGAEKAVQVKVKALPDAQFEVVHSLAKWKRQTLGQHDFSAGEGLYTHMKALRPDEDRLSPLHSVYVDQWDWERVMGDGERQFSTLKSTVEAIWAGIKATEAAVSEEFGLAPFLPDQIHFVHSQELLSRYPDLDAKGRERAIAKDLGAVFLVGIGGKLSDGHRHDVRAPDYDDWSTPSELGHAGLNGDILVWNPVLEDAFELSSMGIRVDADTLKHQLALTGDEDRLELEWHQALLRGEMPQTIGGGIGQSRLTMLLLQLPHIGQVQAGVWPAAVRESVPSLL" + ) + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_list_models_lv1(): + """Test listing models.""" + client = DeepOriginClient() + response = client.data.list_models() + + assert isinstance(response, dict), "Expected a dictionary response" + assert "models" in response, "Expected 'models' key in response" + assert isinstance(response["models"], list), "Expected 'models' to be a list" + assert len(response["models"]) > 0, "Expected at least one model" + # Verify structure of first model + model = response["models"][0] + assert "tableName" in model, "Expected 'tableName' key in model" + assert "visibility" in model, "Expected 'visibility' key in model" + assert model["visibility"] == "public", "Expected visibility to be 'public'" + + +def test_create_ligand_lv1(): + """Test creating a ligand.""" + client = DeepOriginClient() + smiles = "Fc1c(-c2cccc3ccccc23)ncc2c(N3C[C@H]4CC[C@@H](C3)N4)nc(OCC34CCCN3CCC4)nc12" + unique_tag = str(uuid.uuid4()) + response = client.data.create_ligand( + smiles=smiles, + name="Compound-12345", + formal_charge=0, + hbond_donor_count=1, + hbond_acceptor_count=6, + rotatable_bond_count=5, + tpsa=85.12, + molecular_weight=447.5, + variant_name_tag=unique_tag, + ) + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + data = response["data"] + assert isinstance(data, dict), "Expected 'data' to be a dictionary" + assert "id" in data, "Expected 'id' key in data" + assert "version" in data, "Expected 'version' key in data" + assert data["version"] == 1, "Expected version to be 1" + assert "name" in data, "Expected 'name' key in data" + assert data["name"] == "Compound-12345", "Expected name to match" + assert "canonical_smiles" in data, "Expected 'canonical_smiles' key in data" + assert "meta" in response, "Expected 'meta' key in response" + assert response["meta"]["inserted"] == 1, "Expected inserted to be 1" + + +def test_create_protein_lv1(): + """Test creating a protein.""" + client = DeepOriginClient() + response = client.data.create_protein( + file_path="entities/proteins/db4aa32e2e8ffa976a60004a8361b86427a2e5653a6623bb60b7913445902549.pdb", + ) + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], dict), "Expected 'data' to be a dictionary" + assert "id" in response["data"], "Expected 'id' key in response data" + assert "version" in response["data"], "Expected 'version' key in response data" + assert response["data"]["version"] == 1, "Expected version to be 1" + assert "file_path" in response["data"], "Expected 'file_path' key in response data" + assert ( + response["data"]["file_path"] + == "entities/proteins/db4aa32e2e8ffa976a60004a8361b86427a2e5653a6623bb60b7913445902549.pdb" + ), "Expected file_path to match" + assert "meta" in response, "Expected 'meta' key in response" + assert "inserted" in response["meta"], "Expected 'inserted' key in meta" + assert response["meta"]["inserted"] == 1, "Expected inserted to be 1" + + +def test_list_projects_lv1(): + """Test listing projects.""" + client = DeepOriginClient() + response = client.data.list_projects() + + assert isinstance(response, dict), "Expected a dictionary response" + assert "data" in response, "Expected 'data' key in response" + assert isinstance(response["data"], list), "Expected 'data' to be a list" + + +def test_get_ligand_lv1(): + """Test getting a ligand by ID.""" + client = DeepOriginClient() + smiles = "Fc1c(-c2cccc3ccccc23)ncc2c(N3C[C@H]4CC[C@@H](C3)N4)nc(OCC34CCCN3CCC4)nc12" + created = client.data.create_ligand( + smiles=smiles, + name="GetLigandTest", + molecular_weight=447.5, + variant_name_tag=str(uuid.uuid4()), + ) + ligand_id = created["data"]["id"] + + response = client.data.get_ligand(id=ligand_id) + + assert isinstance(response, dict), "Expected a dictionary response" + assert "id" in response, "Expected 'id' key in response" + assert response["id"] == ligand_id, "Expected id to match" + assert "smiles" in response, "Expected 'smiles' key in response" + assert "name" in response, "Expected 'name' key in response" + assert response["name"] == "GetLigandTest", "Expected name to match" + assert "molecular_weight" in response, "Expected 'molecular_weight' key in response" + + +def test_get_protein_lv1(): + """Test getting a protein by ID.""" + client = DeepOriginClient() + file_path = "entities/proteins/db4aa32e2e8ffa976a60004a8361b86427a2e5653a6623bb60b7913445902549.pdb" + created = client.data.create_protein(file_path=file_path) + protein_id = created["data"]["id"] + + response = client.data.get_protein(id=protein_id) + + assert isinstance(response, dict), "Expected a dictionary response" + assert "id" in response, "Expected 'id' key in response" + assert response["id"] == protein_id, "Expected id to match" + assert "file_path" in response, "Expected 'file_path' key in response" + assert response["file_path"] == file_path, "Expected file_path to match" + assert "subtable_name" in response, "Expected 'subtable_name' key in response" + assert response["subtable_name"] == "proteins", "Expected subtable_name to match" diff --git a/tests/test_protein.py b/tests/test_protein.py index a1db7ba8..2df2cd37 100644 --- a/tests/test_protein.py +++ b/tests/test_protein.py @@ -7,6 +7,7 @@ from deeporigin.drug_discovery import BRD_DATA_DIR, Protein from deeporigin.exceptions import DeepOriginException +from deeporigin.platform.client import DeepOriginClient def test_load_protein_from_cif_structure_factor(): @@ -401,3 +402,15 @@ def test_load_structure_from_block_invalid_type(): """Test that load_structure_from_block raises ValueError for unsupported types.""" with pytest.raises(ValueError, match=r".*Unsupported block type.*"): Protein.load_structure_from_block("test content", "xyz") + + +def test_from_id_lv1(): + """Test creating a protein from a Deep Origin Data Platform ID.""" + client = DeepOriginClient() + protein = Protein.from_id("08AD337N5YV4Y", client=client) + + assert protein.id == "08AD337N5YV4Y" + assert protein.file_path is not None + assert protein.file_path.exists() + assert len(protein.structure) > 0 + assert protein.block_content is not None