diff --git a/atomsci/ddm/test/unit/test_struct_utils.py b/atomsci/ddm/test/unit/test_struct_utils.py index e9ac3004..5ce7463e 100644 --- a/atomsci/ddm/test/unit/test_struct_utils.py +++ b/atomsci/ddm/test/unit/test_struct_utils.py @@ -20,13 +20,13 @@ def test_get_rdkit_smiles(): def test_rdkit_smiles_from_smiles(): result = su.rdkit_smiles_from_smiles(test_smiles, useCanonicalTautomers=True) - assert result == ['Brc1cc(O[C@@H]2C[C@H]3CC[C@@H](C2)N3)cc(-c2ccccc2)c1', - 'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1', + assert result == ['Brc1cc(O[C@@H]2C[C@H]3CC[C@@H](C2)N3)cc(-c2ccccc2)c1', + 'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1', 'Brc1ccc(N2CCN(CCCN3CCC(Cc4ccccc4)CC3)CC2)cc1'] result = su.rdkit_smiles_from_smiles(test_smiles, useCanonicalTautomers=False) assert result == ['Brc1cc(O[C@@H]2C[C@H]3CC[C@@H](C2)N3)cc(-c2ccccc2)c1', - 'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1', + 'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1', 'Brc1ccc(N2CCN(CCCN3CCC(Cc4ccccc4)CC3)CC2)cc1'] result = su.rdkit_smiles_from_smiles(test_smiles, useIsomericSmiles=False) @@ -157,7 +157,7 @@ def test_mol_wt_from_smiles(): def test_canonical_tautomers_from_smiles(): canonical_tautomers = [su.canonical_tautomers_from_smiles(s) for s in ['asdf']+test_smiles] - assert canonical_tautomers==[[''], + assert canonical_tautomers==[[''], ['Brc1cc(O[C@@H]2C[C@H]3CC[C@@H](C2)N3)cc(-c2ccccc2)c1'], ['Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1'], ['Brc1ccc(N2CCN(CCCN3CCC(Cc4ccccc4)CC3)CC2)cc1']] @@ -177,4 +177,4 @@ def test_canonical_tautomers_from_smiles(): test_draw_structure() test_smiles_to_inchi_key() test_mol_wt_from_smiles() - test_canonical_tautomers_from_smiles() \ No newline at end of file + test_canonical_tautomers_from_smiles() diff --git a/atomsci/ddm/utils/struct_utils.py b/atomsci/ddm/utils/struct_utils.py index 5cafe4f7..1624ee9c 100644 --- a/atomsci/ddm/utils/struct_utils.py +++ b/atomsci/ddm/utils/struct_utils.py @@ -7,15 +7,13 @@ import re import numpy as np -import molvs import logging from rdkit import Chem from rdkit.Chem import AllChem, Draw, Descriptors from rdkit.Chem.MolStandardize import rdMolStandardize -stdizer = molvs.standardize.Standardizer(prefer_organic=True) -uncharger = molvs.charge.Uncharger() +uncharger = rdMolStandardize.Uncharger() def get_rdkit_smiles(orig_smiles, useIsomericSmiles=True): @@ -34,13 +32,12 @@ def get_rdkit_smiles(orig_smiles, useIsomericSmiles=True): mol = Chem.MolFromSmiles(orig_smiles) if mol is None: return "" - else: - return Chem.MolToSmiles(mol, isomericSmiles=useIsomericSmiles) + return Chem.MolToSmiles(mol, isomericSmiles=useIsomericSmiles) def rdkit_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, useCanonicalTautomers=False, workers=1): """Parallel version of get_rdkit_smiles. If orig_smiles is a list and workers is > 1, spawn 'workers' - threads to convert input SMILES strings to standardized RDKit format. + processes to convert input SMILES strings to standardized RDKit format. Args: orig_smiles (list or str): List of SMILES strings to canonicalize. @@ -59,7 +56,7 @@ def rdkit_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, useCanonicalTa if isinstance(orig_smiles, list): from functools import partial - func = partial(rdkit_smiles_from_smiles, useIsomericSmiles=useIsomericSmiles, + func = partial(rdkit_smiles_from_smiles, useIsomericSmiles=useIsomericSmiles, useCanonicalTautomers=useCanonicalTautomers) if workers > 1: from multiprocessing import pool @@ -85,7 +82,7 @@ def rdkit_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, useCanonicalTa def mols_from_smiles(orig_smiles, workers=1): """Parallel function to create RDKit Mol objects for a list of SMILES strings. If orig_smiles is a list - and workers is > 1, spawn 'workers' threads to convert input SMILES strings to Mol objects. + and workers is > 1, spawn 'workers' processes to convert input SMILES strings to Mol objects. Args: orig_smiles (list or str): List of SMILES strings to convert to Mol objects. @@ -115,7 +112,7 @@ def mols_from_smiles(orig_smiles, workers=1): return mols -def base_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, removeCharges=False, +def base_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, removeCharges=False, useCanonicalTautomers=False, workers=1): """Generate standardized SMILES strings for the largest fragments of each molecule specified by orig_smiles. Strips salt groups and replaces any rare isotopes with the most common ones for each element. @@ -225,9 +222,14 @@ def base_mol_from_smiles(orig_smiles, useIsomericSmiles=True, removeCharges=Fals cmpd_mol = Chem.MolFromSmiles(orig_smiles) if cmpd_mol is None: return None - std_mol = stdizer.isotope_parent(stdizer.fragment_parent(cmpd_mol), skip_standardize=True) - if removeCharges: - std_mol = uncharger(std_mol) + try: + std_mol = rdMolStandardize.IsotopeParent( + rdMolStandardize.FragmentParent(cmpd_mol), + skipStandardize=True) + if removeCharges: + std_mol = uncharger.uncharge(std_mol) + except Exception: + std_mol = None return std_mol @@ -294,12 +296,17 @@ def base_mol_from_inchi(inchi_str, useIsomericSmiles=True, removeCharges=False): cmpd_mol = Chem.inchi.MolFromInchi(inchi_str) if cmpd_mol is None: return None - std_mol = stdizer.isotope_parent(stdizer.fragment_parent(cmpd_mol), skip_standardize=True) - if removeCharges: - std_mol = uncharger(std_mol) + try: + std_mol = rdMolStandardize.IsotopeParent( + rdMolStandardize.FragmentParent(cmpd_mol), + skipStandardize=True) + if removeCharges: + std_mol = uncharger.uncharge(std_mol) + except Exception as e: + print(f"Error standardizing InChI {inchi_str}: {e}") + std_mol = None return std_mol - def draw_structure(smiles_str, image_path, image_size=500): """Draw structure for the compound with the given SMILES string as a PNG file. @@ -418,7 +425,7 @@ def _merge_dataframes_by_smiles(dataframes, smiles_col='rdkit_smiles', id_col='c new_df = new_df.drop([lCol, rCol], axis=1) left_df = new_df - return new_df + return left_df def smiles_to_inchi_key(smiles): @@ -514,6 +521,4 @@ def canonical_tautomers_from_smiles(smiles): smiles = [smiles] mols = [Chem.MolFromSmiles(smi) for smi in smiles] canon_tautomers = [taut_enum.Canonicalize(m) if m is not None else None for m in mols] - return [Chem.MolToSmiles(m) if m is not None else '' for m in canon_tautomers] - - + return [Chem.MolToSmiles(m) if m is not None else '' for m in canon_tautomers] \ No newline at end of file diff --git a/install-ampl/pyproject.toml b/install-ampl/pyproject.toml index 28662fbc..93b2860f 100644 --- a/install-ampl/pyproject.toml +++ b/install-ampl/pyproject.toml @@ -1,40 +1,21 @@ [project] -name = "ampl" -version = "1.0" +name = "atomsci-ampl" +description = "ATOM Modeling PipeLine" +version = "1.8.0" +requires-python = ">=3.9,<3.12" # NOTE: # * deepchem only works with python <= 3.10 (and possibly 3.11) # - seems to install in python 3.11 but website says suports only # python <= 3.10 -# - does not work with python 3.12+ due to tensorflow-addons dependency +# - does NOT work with python 3.12+ due to tensorflow-addons dependency # which is no longer supported and only has support up to python 3.11 # * ampl only works with python 3.9, 3.10, 3.11 (due to deepchem) -# - encoders / decoders need python 3.9 -# - everthing else works with python 3.9 and 3.10 (and possibly 3.11) -# * this will install on python 3.12 but won't have deepchem -requires-python = ">=3.9,<3.13" # Dependencies for all Python versions dependencies = [ - "setuptools", - "pip", - - # core dependencies - "torch_cluster", - "torch_scatter", - "torch_sparse", - "torch_spline_conv", "torch-geometric", - - # deepchem 2.8.0 deps [NOTE: may not work on python >= 3.11] - "deepchem[torch,tensorflow]==2.8.0; python_version >= '3.10' and python_version < '3.12'", - - # deepchem[tensorflow] deps - "tensorflow-cpu==2.14.0; python_version >= '3.10' and python_version < '3.12'", - - # atomsci-ampl deps [need to be added to atomsci-ampl] - "molvs", "xgboost", "umap-learn", "imbalanced-learn", @@ -43,28 +24,41 @@ dependencies = [ "pytest", "seaborn", "matplotcheck", - - # code repos - "atomsci-ampl; python_version >= '3.10' and python_version < '3.12'", + "numpy<2", "atomsci-clients", - # rdkit - "rdkit>=2024.3.5; python_version >= '3.10'", + # deepchem 2.7.1 for Python 3.9 version + "deepchem[torch,tensorflow]==2.7.1; python_version < '3.10'", + # deepchem 2.8.0 deps [NOTE: may not work on python 3.11] + "deepchem[torch,tensorflow]==2.8.0; python_version >= '3.10'", - "numpy<2", + # deepchem[tensorflow] deps + "tensorflow-cpu==2.14.0", - # python specific versions [dgl / pyg dependent] + "rdkit>=2024.3.5; python_version >= '3.10'", "dgl==2.1.0; python_version >= '3.10'", - "pyg_lib==0.3.1; python_version >= '3.10' and python_version < '3.12'", - "pyg_lib==0.4.0; python_version >= '3.12'", + "pyg_lib==0.2.0; python_version < '3.10'", + "pyg_lib==0.3.1; python_version >= '3.10'", - "torch==2.1.2+cpu; python_version >= '3.10' and python_version < '3.12'", - "torch==2.3.1+cpu; python_version >= '3.12'" + "torch==2.0.1+cpu; python_version < '3.10'", + "torch==2.1.2+cpu; python_version >= '3.10'", + "torch_cluster", + "torch_scatter", + "torch_sparse", + "torch_spline_conv", ] +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +include = ["atomsci*"] +namespaces = true [tool.uv] +managed = true allow-insecure-host = [ "download.pytorch.org", "data.dgl.ai", @@ -82,9 +76,8 @@ url = "https://data.dgl.ai/wheels/repo.html" format = "flat" [[tool.uv.index]] -# NOTE: pyg libraries for torch >= 2.2 seem to have GLIBC incompatibilities on LC -name = "pyg_lib_py312" -url = "https://data.pyg.org/whl/torch-2.3.1+cpu.html" +name = "pyg_lib_py39" +url = "https://data.pyg.org/whl/torch-2.0.1+cpu.html" format = "flat" [[tool.uv.index]] @@ -94,9 +87,6 @@ format = "flat" [tool.uv.sources] # code repos -atomsci-ampl = [ - { git = "https://github.com/ATOMScience-org/AMPL.git", rev = "py310", marker = "python_version >= '3.10'"} -] atomsci-clients = { git = "ssh://git@czgitlab.llnl.gov:7999/atom/clients.git", rev = "master" } # dgl @@ -104,24 +94,24 @@ dgl = { index = "dgl" } # torch torch = { index = "pytorch" } + pyg_lib = [ - { index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" }, - { index = "pyg_lib_py312", marker = "python_version >= '3.12'" }, + { index = "pyg_lib_py39", marker = "python_version < '3.10'" }, + { index = "pyg_lib_py310", marker = "python_version >= '3.10'" }, ] torch_cluster = [ - { index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" }, - { index = "pyg_lib_py312", marker = "python_version >= '3.12'" }, + { index = "pyg_lib_py39", marker = "python_version < '3.10'" }, + { index = "pyg_lib_py310", marker = "python_version >= '3.10'" }, ] torch_scatter = [ - { index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" }, - { index = "pyg_lib_py312", marker = "python_version >= '3.12'" }, + { index = "pyg_lib_py39", marker = "python_version < '3.10'" }, + { index = "pyg_lib_py310", marker = "python_version >= '3.10'" }, ] torch_sparse = [ - { index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" }, - { index = "pyg_lib_py312", marker = "python_version >= '3.12'" }, + { index = "pyg_lib_py39", marker = "python_version < '3.10'" }, + { index = "pyg_lib_py310", marker = "python_version >= '3.10'" }, ] torch_spline_conv = [ - { index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" }, - { index = "pyg_lib_py312", marker = "python_version >= '3.12'" }, + { index = "pyg_lib_py39", marker = "python_version < '3.10'" }, + { index = "pyg_lib_py310", marker = "python_version >= '3.10'" }, ] - diff --git a/pip/cpu_requirements.txt b/pip/cpu_requirements.txt index 93621c84..e989d122 100644 --- a/pip/cpu_requirements.txt +++ b/pip/cpu_requirements.txt @@ -93,7 +93,6 @@ maestrowf deepchem==2.8 rdkit==2024.3.5 -MolVS mordred # ========================== diff --git a/pip/cuda_requirements.txt b/pip/cuda_requirements.txt index 6640fa87..491d35bf 100644 --- a/pip/cuda_requirements.txt +++ b/pip/cuda_requirements.txt @@ -39,7 +39,6 @@ imblearn # pycairo maestrowf -MolVS mordred pytest diff --git a/pip/docker_requirements.txt b/pip/docker_requirements.txt index bfd74efb..722c537f 100644 --- a/pip/docker_requirements.txt +++ b/pip/docker_requirements.txt @@ -36,7 +36,6 @@ bravado # pycairo maestrowf -MolVS mordred pytest ipykernel diff --git a/pip/mchip_requirements.txt b/pip/mchip_requirements.txt index fcc857a9..b6aba47c 100644 --- a/pip/mchip_requirements.txt +++ b/pip/mchip_requirements.txt @@ -38,7 +38,6 @@ imblearn # pycairo maestrowf -MolVS mordred pytest diff --git a/pip/readthedocs_requirements.txt b/pip/readthedocs_requirements.txt index 02f83b92..4af65729 100644 --- a/pip/readthedocs_requirements.txt +++ b/pip/readthedocs_requirements.txt @@ -4,7 +4,6 @@ hyperopt IPython joblib matplotlib -molvs numpy pandas pygments diff --git a/pip/rocm_requirements.txt b/pip/rocm_requirements.txt index d3bbe1d5..78f1ac63 100644 --- a/pip/rocm_requirements.txt +++ b/pip/rocm_requirements.txt @@ -27,7 +27,6 @@ imblearn maestrowf -MolVS mordred pytest