Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions atomsci/ddm/test/unit/test_struct_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ def test_get_rdkit_smiles():

def test_rdkit_smiles_from_smiles():
result = su.rdkit_smiles_from_smiles(test_smiles, useCanonicalTautomers=True)
assert result == ['Brc1cc(O[C@@H]2C[C@H]3CC[C@@H](C2)N3)cc(-c2ccccc2)c1',
'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1',
assert result == ['Brc1cc(O[C@@H]2C[C@H]3CC[C@@H](C2)N3)cc(-c2ccccc2)c1',
'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1',
'Brc1ccc(N2CCN(CCCN3CCC(Cc4ccccc4)CC3)CC2)cc1']

result = su.rdkit_smiles_from_smiles(test_smiles, useCanonicalTautomers=False)
assert result == ['Brc1cc(O[C@@H]2C[C@H]3CC[C@@H](C2)N3)cc(-c2ccccc2)c1',
'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1',
'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1',
'Brc1ccc(N2CCN(CCCN3CCC(Cc4ccccc4)CC3)CC2)cc1']

result = su.rdkit_smiles_from_smiles(test_smiles, useIsomericSmiles=False)
Expand Down Expand Up @@ -157,7 +157,7 @@ def test_mol_wt_from_smiles():

def test_canonical_tautomers_from_smiles():
canonical_tautomers = [su.canonical_tautomers_from_smiles(s) for s in ['asdf']+test_smiles]
assert canonical_tautomers==[[''],
assert canonical_tautomers==[[''],
['Brc1cc(O[C@@H]2C[C@H]3CC[C@@H](C2)N3)cc(-c2ccccc2)c1'],
['Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1'],
['Brc1ccc(N2CCN(CCCN3CCC(Cc4ccccc4)CC3)CC2)cc1']]
Expand All @@ -177,4 +177,4 @@ def test_canonical_tautomers_from_smiles():
test_draw_structure()
test_smiles_to_inchi_key()
test_mol_wt_from_smiles()
test_canonical_tautomers_from_smiles()
test_canonical_tautomers_from_smiles()
45 changes: 25 additions & 20 deletions atomsci/ddm/utils/struct_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,13 @@

import re
import numpy as np
import molvs
import logging

from rdkit import Chem
from rdkit.Chem import AllChem, Draw, Descriptors
from rdkit.Chem.MolStandardize import rdMolStandardize

stdizer = molvs.standardize.Standardizer(prefer_organic=True)
uncharger = molvs.charge.Uncharger()
uncharger = rdMolStandardize.Uncharger()


def get_rdkit_smiles(orig_smiles, useIsomericSmiles=True):
Expand All @@ -34,13 +32,12 @@ def get_rdkit_smiles(orig_smiles, useIsomericSmiles=True):
mol = Chem.MolFromSmiles(orig_smiles)
if mol is None:
return ""
else:
return Chem.MolToSmiles(mol, isomericSmiles=useIsomericSmiles)
return Chem.MolToSmiles(mol, isomericSmiles=useIsomericSmiles)


def rdkit_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, useCanonicalTautomers=False, workers=1):
"""Parallel version of get_rdkit_smiles. If orig_smiles is a list and workers is > 1, spawn 'workers'
threads to convert input SMILES strings to standardized RDKit format.
processes to convert input SMILES strings to standardized RDKit format.

Args:
orig_smiles (list or str): List of SMILES strings to canonicalize.
Expand All @@ -59,7 +56,7 @@ def rdkit_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, useCanonicalTa

if isinstance(orig_smiles, list):
from functools import partial
func = partial(rdkit_smiles_from_smiles, useIsomericSmiles=useIsomericSmiles,
func = partial(rdkit_smiles_from_smiles, useIsomericSmiles=useIsomericSmiles,
useCanonicalTautomers=useCanonicalTautomers)
if workers > 1:
from multiprocessing import pool
Expand All @@ -85,7 +82,7 @@ def rdkit_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, useCanonicalTa

def mols_from_smiles(orig_smiles, workers=1):
"""Parallel function to create RDKit Mol objects for a list of SMILES strings. If orig_smiles is a list
and workers is > 1, spawn 'workers' threads to convert input SMILES strings to Mol objects.
and workers is > 1, spawn 'workers' processes to convert input SMILES strings to Mol objects.

Args:
orig_smiles (list or str): List of SMILES strings to convert to Mol objects.
Expand Down Expand Up @@ -115,7 +112,7 @@ def mols_from_smiles(orig_smiles, workers=1):
return mols


def base_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, removeCharges=False,
def base_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, removeCharges=False,
useCanonicalTautomers=False, workers=1):
"""Generate standardized SMILES strings for the largest fragments of each molecule specified by
orig_smiles. Strips salt groups and replaces any rare isotopes with the most common ones for each element.
Expand Down Expand Up @@ -225,9 +222,14 @@ def base_mol_from_smiles(orig_smiles, useIsomericSmiles=True, removeCharges=Fals
cmpd_mol = Chem.MolFromSmiles(orig_smiles)
if cmpd_mol is None:
return None
std_mol = stdizer.isotope_parent(stdizer.fragment_parent(cmpd_mol), skip_standardize=True)
if removeCharges:
std_mol = uncharger(std_mol)
try:
std_mol = rdMolStandardize.IsotopeParent(
rdMolStandardize.FragmentParent(cmpd_mol),
skipStandardize=True)
if removeCharges:
std_mol = uncharger.uncharge(std_mol)
except Exception:
std_mol = None
return std_mol


Expand Down Expand Up @@ -294,12 +296,17 @@ def base_mol_from_inchi(inchi_str, useIsomericSmiles=True, removeCharges=False):
cmpd_mol = Chem.inchi.MolFromInchi(inchi_str)
if cmpd_mol is None:
return None
std_mol = stdizer.isotope_parent(stdizer.fragment_parent(cmpd_mol), skip_standardize=True)
if removeCharges:
std_mol = uncharger(std_mol)
try:
std_mol = rdMolStandardize.IsotopeParent(
rdMolStandardize.FragmentParent(cmpd_mol),
skipStandardize=True)
if removeCharges:
std_mol = uncharger.uncharge(std_mol)
except Exception as e:
print(f"Error standardizing InChI {inchi_str}: {e}")
std_mol = None
return std_mol


def draw_structure(smiles_str, image_path, image_size=500):
"""Draw structure for the compound with the given SMILES string as a PNG file.

Expand Down Expand Up @@ -418,7 +425,7 @@ def _merge_dataframes_by_smiles(dataframes, smiles_col='rdkit_smiles', id_col='c
new_df = new_df.drop([lCol, rCol], axis=1)
left_df = new_df

return new_df
return left_df


def smiles_to_inchi_key(smiles):
Expand Down Expand Up @@ -514,6 +521,4 @@ def canonical_tautomers_from_smiles(smiles):
smiles = [smiles]
mols = [Chem.MolFromSmiles(smi) for smi in smiles]
canon_tautomers = [taut_enum.Canonicalize(m) if m is not None else None for m in mols]
return [Chem.MolToSmiles(m) if m is not None else '' for m in canon_tautomers]


return [Chem.MolToSmiles(m) if m is not None else '' for m in canon_tautomers]
94 changes: 42 additions & 52 deletions install-ampl/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,40 +1,21 @@

[project]
name = "ampl"
version = "1.0"
name = "atomsci-ampl"
description = "ATOM Modeling PipeLine"
version = "1.8.0"
requires-python = ">=3.9,<3.12"

# NOTE:
# * deepchem only works with python <= 3.10 (and possibly 3.11)
# - seems to install in python 3.11 but website says suports only
# python <= 3.10
# - does not work with python 3.12+ due to tensorflow-addons dependency
# - does NOT work with python 3.12+ due to tensorflow-addons dependency
# which is no longer supported and only has support up to python 3.11
# * ampl only works with python 3.9, 3.10, 3.11 (due to deepchem)
# - encoders / decoders need python 3.9
# - everthing else works with python 3.9 and 3.10 (and possibly 3.11)
# * this will install on python 3.12 but won't have deepchem
requires-python = ">=3.9,<3.13"

# Dependencies for all Python versions
dependencies = [
"setuptools",
"pip",

# core dependencies
"torch_cluster",
"torch_scatter",
"torch_sparse",
"torch_spline_conv",
"torch-geometric",

# deepchem 2.8.0 deps [NOTE: may not work on python >= 3.11]
"deepchem[torch,tensorflow]==2.8.0; python_version >= '3.10' and python_version < '3.12'",

# deepchem[tensorflow] deps
"tensorflow-cpu==2.14.0; python_version >= '3.10' and python_version < '3.12'",

# atomsci-ampl deps [need to be added to atomsci-ampl]
"molvs",
"xgboost",
"umap-learn",
"imbalanced-learn",
Expand All @@ -43,28 +24,41 @@ dependencies = [
"pytest",
"seaborn",
"matplotcheck",

# code repos
"atomsci-ampl; python_version >= '3.10' and python_version < '3.12'",
"numpy<2",
"atomsci-clients",

# rdkit
"rdkit>=2024.3.5; python_version >= '3.10'",
# deepchem 2.7.1 for Python 3.9 version
"deepchem[torch,tensorflow]==2.7.1; python_version < '3.10'",
# deepchem 2.8.0 deps [NOTE: may not work on python 3.11]
"deepchem[torch,tensorflow]==2.8.0; python_version >= '3.10'",

"numpy<2",
# deepchem[tensorflow] deps
"tensorflow-cpu==2.14.0",

# python specific versions [dgl / pyg dependent]
"rdkit>=2024.3.5; python_version >= '3.10'",
"dgl==2.1.0; python_version >= '3.10'",

"pyg_lib==0.3.1; python_version >= '3.10' and python_version < '3.12'",
"pyg_lib==0.4.0; python_version >= '3.12'",
"pyg_lib==0.2.0; python_version < '3.10'",
"pyg_lib==0.3.1; python_version >= '3.10'",

"torch==2.1.2+cpu; python_version >= '3.10' and python_version < '3.12'",
"torch==2.3.1+cpu; python_version >= '3.12'"
"torch==2.0.1+cpu; python_version < '3.10'",
"torch==2.1.2+cpu; python_version >= '3.10'",
"torch_cluster",
"torch_scatter",
"torch_sparse",
"torch_spline_conv",
]

[build-system]
requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"

[tool.setuptools.packages.find]
include = ["atomsci*"]
namespaces = true

[tool.uv]
managed = true
allow-insecure-host = [
"download.pytorch.org",
"data.dgl.ai",
Expand All @@ -82,9 +76,8 @@ url = "https://data.dgl.ai/wheels/repo.html"
format = "flat"

[[tool.uv.index]]
# NOTE: pyg libraries for torch >= 2.2 seem to have GLIBC incompatibilities on LC
name = "pyg_lib_py312"
url = "https://data.pyg.org/whl/torch-2.3.1+cpu.html"
name = "pyg_lib_py39"
url = "https://data.pyg.org/whl/torch-2.0.1+cpu.html"
format = "flat"

[[tool.uv.index]]
Expand All @@ -94,34 +87,31 @@ format = "flat"

[tool.uv.sources]
# code repos
atomsci-ampl = [
{ git = "https://github.com/ATOMScience-org/AMPL.git", rev = "py310", marker = "python_version >= '3.10'"}
]
atomsci-clients = { git = "ssh://git@czgitlab.llnl.gov:7999/atom/clients.git", rev = "master" }

# dgl
dgl = { index = "dgl" }

# torch
torch = { index = "pytorch" }

pyg_lib = [
{ index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" },
{ index = "pyg_lib_py312", marker = "python_version >= '3.12'" },
{ index = "pyg_lib_py39", marker = "python_version < '3.10'" },
{ index = "pyg_lib_py310", marker = "python_version >= '3.10'" },
]
torch_cluster = [
{ index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" },
{ index = "pyg_lib_py312", marker = "python_version >= '3.12'" },
{ index = "pyg_lib_py39", marker = "python_version < '3.10'" },
{ index = "pyg_lib_py310", marker = "python_version >= '3.10'" },
]
torch_scatter = [
{ index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" },
{ index = "pyg_lib_py312", marker = "python_version >= '3.12'" },
{ index = "pyg_lib_py39", marker = "python_version < '3.10'" },
{ index = "pyg_lib_py310", marker = "python_version >= '3.10'" },
]
torch_sparse = [
{ index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" },
{ index = "pyg_lib_py312", marker = "python_version >= '3.12'" },
{ index = "pyg_lib_py39", marker = "python_version < '3.10'" },
{ index = "pyg_lib_py310", marker = "python_version >= '3.10'" },
]
torch_spline_conv = [
{ index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" },
{ index = "pyg_lib_py312", marker = "python_version >= '3.12'" },
{ index = "pyg_lib_py39", marker = "python_version < '3.10'" },
{ index = "pyg_lib_py310", marker = "python_version >= '3.10'" },
]

1 change: 0 additions & 1 deletion pip/cpu_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ maestrowf

deepchem==2.8
rdkit==2024.3.5
MolVS
mordred

# ==========================
Expand Down
1 change: 0 additions & 1 deletion pip/cuda_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ imblearn
# pycairo

maestrowf
MolVS
mordred

pytest
Expand Down
1 change: 0 additions & 1 deletion pip/docker_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ bravado
# pycairo

maestrowf
MolVS
mordred
pytest
ipykernel
Expand Down
1 change: 0 additions & 1 deletion pip/mchip_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ imblearn
# pycairo

maestrowf
MolVS
mordred

pytest
Expand Down
1 change: 0 additions & 1 deletion pip/readthedocs_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ hyperopt
IPython
joblib
matplotlib
molvs
numpy
pandas
pygments
Expand Down
1 change: 0 additions & 1 deletion pip/rocm_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ imblearn


maestrowf
MolVS
mordred

pytest
Expand Down
Loading