From 94983853bc912bb73440146e8c837187466b599e Mon Sep 17 00:00:00 2001 From: kmelough Date: Thu, 12 Mar 2026 12:04:06 -0700 Subject: [PATCH 1/6] Remove molvs package dependency in SMILES standardization code. --- atomsci/ddm/utils/struct_utils.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/atomsci/ddm/utils/struct_utils.py b/atomsci/ddm/utils/struct_utils.py index f5bbbf81..c6e01b1a 100644 --- a/atomsci/ddm/utils/struct_utils.py +++ b/atomsci/ddm/utils/struct_utils.py @@ -9,14 +9,12 @@ import re import pdb import numpy as np -import molvs from rdkit import Chem from rdkit.Chem import AllChem, Draw, Descriptors from rdkit.Chem.MolStandardize import rdMolStandardize -stdizer = molvs.standardize.Standardizer(prefer_organic=True) -uncharger = molvs.charge.Uncharger() +uncharger = rdMolStandardize.Uncharger() def get_rdkit_smiles(orig_smiles, useIsomericSmiles=True): @@ -226,9 +224,13 @@ def base_mol_from_smiles(orig_smiles, useIsomericSmiles=True, removeCharges=Fals cmpd_mol = Chem.MolFromSmiles(orig_smiles) if cmpd_mol is None: return None - std_mol = stdizer.isotope_parent(stdizer.fragment_parent(cmpd_mol), skip_standardize=True) - if removeCharges: - std_mol = uncharger(std_mol) + try: + std_mol = rdMolStandardize.IsotopeParent(rdMolStandardize.FragmentParent(cmpd_mol), skipStandardize=True) + if removeCharges: + std_mol = uncharger(std_mol) + #except Chem.rdchem.MolSanitizeException: + except: + std_mol = None return std_mol @@ -295,9 +297,13 @@ def base_mol_from_inchi(inchi_str, useIsomericSmiles=True, removeCharges=False): cmpd_mol = Chem.inchi.MolFromInchi(inchi_str) if cmpd_mol is None: return None - std_mol = stdizer.isotope_parent(stdizer.fragment_parent(cmpd_mol), skip_standardize=True) - if removeCharges: - std_mol = uncharger(std_mol) + try: + std_mol = rdMolStandardize.IsotopeParent(rdMolStandardize.FragmentParent(cmpd_mol), skipStandardize=True) + if removeCharges: + std_mol = uncharger(std_mol) + #except Chem.rdchem.MolSanitizeException: + except: + std_mol = None return std_mol From 3e4c23f2e679edb46a1ab707e8fe4ef3a85dcb79 Mon Sep 17 00:00:00 2001 From: "Kevin S. McLoughlin" Date: Wed, 1 Apr 2026 12:51:59 -0700 Subject: [PATCH 2/6] Removed molvs from pyproject and pip requirements files. --- install-ampl/pyproject.toml | 1 - pip/cpu_requirements.txt | 1 - pip/cuda_requirements.txt | 1 - pip/docker_requirements.txt | 1 - pip/mchip_requirements.txt | 1 - pip/readthedocs_requirements.txt | 1 - pip/rocm_requirements.txt | 1 - 7 files changed, 7 deletions(-) diff --git a/install-ampl/pyproject.toml b/install-ampl/pyproject.toml index 28662fbc..a046ff43 100644 --- a/install-ampl/pyproject.toml +++ b/install-ampl/pyproject.toml @@ -34,7 +34,6 @@ dependencies = [ "tensorflow-cpu==2.14.0; python_version >= '3.10' and python_version < '3.12'", # atomsci-ampl deps [need to be added to atomsci-ampl] - "molvs", "xgboost", "umap-learn", "imbalanced-learn", diff --git a/pip/cpu_requirements.txt b/pip/cpu_requirements.txt index 93621c84..e989d122 100644 --- a/pip/cpu_requirements.txt +++ b/pip/cpu_requirements.txt @@ -93,7 +93,6 @@ maestrowf deepchem==2.8 rdkit==2024.3.5 -MolVS mordred # ========================== diff --git a/pip/cuda_requirements.txt b/pip/cuda_requirements.txt index 6640fa87..491d35bf 100644 --- a/pip/cuda_requirements.txt +++ b/pip/cuda_requirements.txt @@ -39,7 +39,6 @@ imblearn # pycairo maestrowf -MolVS mordred pytest diff --git a/pip/docker_requirements.txt b/pip/docker_requirements.txt index bfd74efb..722c537f 100644 --- a/pip/docker_requirements.txt +++ b/pip/docker_requirements.txt @@ -36,7 +36,6 @@ bravado # pycairo maestrowf -MolVS mordred pytest ipykernel diff --git a/pip/mchip_requirements.txt b/pip/mchip_requirements.txt index fcc857a9..b6aba47c 100644 --- a/pip/mchip_requirements.txt +++ b/pip/mchip_requirements.txt @@ -38,7 +38,6 @@ imblearn # pycairo maestrowf -MolVS mordred pytest diff --git a/pip/readthedocs_requirements.txt b/pip/readthedocs_requirements.txt index 02f83b92..4af65729 100644 --- a/pip/readthedocs_requirements.txt +++ b/pip/readthedocs_requirements.txt @@ -4,7 +4,6 @@ hyperopt IPython joblib matplotlib -molvs numpy pandas pygments diff --git a/pip/rocm_requirements.txt b/pip/rocm_requirements.txt index d3bbe1d5..78f1ac63 100644 --- a/pip/rocm_requirements.txt +++ b/pip/rocm_requirements.txt @@ -27,7 +27,6 @@ imblearn maestrowf -MolVS mordred pytest From 1ddf4efd11af9d00422c7c399714dd87134944e0 Mon Sep 17 00:00:00 2001 From: "Kevin S. McLoughlin" Date: Wed, 1 Apr 2026 14:03:19 -0700 Subject: [PATCH 3/6] Replaced bare 'except' statements that Ruff doesn't like. --- atomsci/ddm/utils/struct_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/atomsci/ddm/utils/struct_utils.py b/atomsci/ddm/utils/struct_utils.py index 6d15c528..d1740c0e 100644 --- a/atomsci/ddm/utils/struct_utils.py +++ b/atomsci/ddm/utils/struct_utils.py @@ -227,8 +227,7 @@ def base_mol_from_smiles(orig_smiles, useIsomericSmiles=True, removeCharges=Fals std_mol = rdMolStandardize.IsotopeParent(rdMolStandardize.FragmentParent(cmpd_mol), skipStandardize=True) if removeCharges: std_mol = uncharger(std_mol) - #except Chem.rdchem.MolSanitizeException: - except: + except Exception: std_mol = None return std_mol @@ -300,8 +299,7 @@ def base_mol_from_inchi(inchi_str, useIsomericSmiles=True, removeCharges=False): std_mol = rdMolStandardize.IsotopeParent(rdMolStandardize.FragmentParent(cmpd_mol), skipStandardize=True) if removeCharges: std_mol = uncharger(std_mol) - #except Chem.rdchem.MolSanitizeException: - except: + except Exception: std_mol = None return std_mol From c52125deeaa5dde877ce167d5ea608fe0b276bee Mon Sep 17 00:00:00 2001 From: "Kevin S. McLoughlin" Date: Wed, 1 Apr 2026 17:39:45 -0700 Subject: [PATCH 4/6] Removed recursive dependency of atomsci-ampl on itself. Fixed some issues with torch and pyg versions: support Python 3.9 through 11, but not 3.12+ which doesn't work with DeepChem. Removed pip dependency and moved setuptools to [build-system] block, since it's no longer required for AMPL runtime. --- install-ampl/pyproject.toml | 93 +++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 51 deletions(-) diff --git a/install-ampl/pyproject.toml b/install-ampl/pyproject.toml index a046ff43..93b2860f 100644 --- a/install-ampl/pyproject.toml +++ b/install-ampl/pyproject.toml @@ -1,39 +1,21 @@ [project] -name = "ampl" -version = "1.0" +name = "atomsci-ampl" +description = "ATOM Modeling PipeLine" +version = "1.8.0" +requires-python = ">=3.9,<3.12" # NOTE: # * deepchem only works with python <= 3.10 (and possibly 3.11) # - seems to install in python 3.11 but website says suports only # python <= 3.10 -# - does not work with python 3.12+ due to tensorflow-addons dependency +# - does NOT work with python 3.12+ due to tensorflow-addons dependency # which is no longer supported and only has support up to python 3.11 # * ampl only works with python 3.9, 3.10, 3.11 (due to deepchem) -# - encoders / decoders need python 3.9 -# - everthing else works with python 3.9 and 3.10 (and possibly 3.11) -# * this will install on python 3.12 but won't have deepchem -requires-python = ">=3.9,<3.13" # Dependencies for all Python versions dependencies = [ - "setuptools", - "pip", - - # core dependencies - "torch_cluster", - "torch_scatter", - "torch_sparse", - "torch_spline_conv", "torch-geometric", - - # deepchem 2.8.0 deps [NOTE: may not work on python >= 3.11] - "deepchem[torch,tensorflow]==2.8.0; python_version >= '3.10' and python_version < '3.12'", - - # deepchem[tensorflow] deps - "tensorflow-cpu==2.14.0; python_version >= '3.10' and python_version < '3.12'", - - # atomsci-ampl deps [need to be added to atomsci-ampl] "xgboost", "umap-learn", "imbalanced-learn", @@ -42,28 +24,41 @@ dependencies = [ "pytest", "seaborn", "matplotcheck", - - # code repos - "atomsci-ampl; python_version >= '3.10' and python_version < '3.12'", + "numpy<2", "atomsci-clients", - # rdkit - "rdkit>=2024.3.5; python_version >= '3.10'", + # deepchem 2.7.1 for Python 3.9 version + "deepchem[torch,tensorflow]==2.7.1; python_version < '3.10'", + # deepchem 2.8.0 deps [NOTE: may not work on python 3.11] + "deepchem[torch,tensorflow]==2.8.0; python_version >= '3.10'", - "numpy<2", + # deepchem[tensorflow] deps + "tensorflow-cpu==2.14.0", - # python specific versions [dgl / pyg dependent] + "rdkit>=2024.3.5; python_version >= '3.10'", "dgl==2.1.0; python_version >= '3.10'", - "pyg_lib==0.3.1; python_version >= '3.10' and python_version < '3.12'", - "pyg_lib==0.4.0; python_version >= '3.12'", + "pyg_lib==0.2.0; python_version < '3.10'", + "pyg_lib==0.3.1; python_version >= '3.10'", - "torch==2.1.2+cpu; python_version >= '3.10' and python_version < '3.12'", - "torch==2.3.1+cpu; python_version >= '3.12'" + "torch==2.0.1+cpu; python_version < '3.10'", + "torch==2.1.2+cpu; python_version >= '3.10'", + "torch_cluster", + "torch_scatter", + "torch_sparse", + "torch_spline_conv", ] +[build-system] +requires = ["setuptools>=61.0", "wheel"] +build-backend = "setuptools.build_meta" + +[tool.setuptools.packages.find] +include = ["atomsci*"] +namespaces = true [tool.uv] +managed = true allow-insecure-host = [ "download.pytorch.org", "data.dgl.ai", @@ -81,9 +76,8 @@ url = "https://data.dgl.ai/wheels/repo.html" format = "flat" [[tool.uv.index]] -# NOTE: pyg libraries for torch >= 2.2 seem to have GLIBC incompatibilities on LC -name = "pyg_lib_py312" -url = "https://data.pyg.org/whl/torch-2.3.1+cpu.html" +name = "pyg_lib_py39" +url = "https://data.pyg.org/whl/torch-2.0.1+cpu.html" format = "flat" [[tool.uv.index]] @@ -93,9 +87,6 @@ format = "flat" [tool.uv.sources] # code repos -atomsci-ampl = [ - { git = "https://github.com/ATOMScience-org/AMPL.git", rev = "py310", marker = "python_version >= '3.10'"} -] atomsci-clients = { git = "ssh://git@czgitlab.llnl.gov:7999/atom/clients.git", rev = "master" } # dgl @@ -103,24 +94,24 @@ dgl = { index = "dgl" } # torch torch = { index = "pytorch" } + pyg_lib = [ - { index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" }, - { index = "pyg_lib_py312", marker = "python_version >= '3.12'" }, + { index = "pyg_lib_py39", marker = "python_version < '3.10'" }, + { index = "pyg_lib_py310", marker = "python_version >= '3.10'" }, ] torch_cluster = [ - { index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" }, - { index = "pyg_lib_py312", marker = "python_version >= '3.12'" }, + { index = "pyg_lib_py39", marker = "python_version < '3.10'" }, + { index = "pyg_lib_py310", marker = "python_version >= '3.10'" }, ] torch_scatter = [ - { index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" }, - { index = "pyg_lib_py312", marker = "python_version >= '3.12'" }, + { index = "pyg_lib_py39", marker = "python_version < '3.10'" }, + { index = "pyg_lib_py310", marker = "python_version >= '3.10'" }, ] torch_sparse = [ - { index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" }, - { index = "pyg_lib_py312", marker = "python_version >= '3.12'" }, + { index = "pyg_lib_py39", marker = "python_version < '3.10'" }, + { index = "pyg_lib_py310", marker = "python_version >= '3.10'" }, ] torch_spline_conv = [ - { index = "pyg_lib_py310", marker = "python_version >= '3.10' and python_version < '3.12'" }, - { index = "pyg_lib_py312", marker = "python_version >= '3.12'" }, + { index = "pyg_lib_py39", marker = "python_version < '3.10'" }, + { index = "pyg_lib_py310", marker = "python_version >= '3.10'" }, ] - From 1baeaba2b21014450365fef3fd75da8b867389aa Mon Sep 17 00:00:00 2001 From: Jessica Mauvais Date: Thu, 2 Apr 2026 10:27:30 -0700 Subject: [PATCH 5/6] changed to use uncharger.uncharge() instead of uncharger(std_mol) --- atomsci/ddm/test/unit/test_struct_utils.py | 12 ++++---- atomsci/ddm/utils/struct_utils.py | 33 +++++++++++----------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/atomsci/ddm/test/unit/test_struct_utils.py b/atomsci/ddm/test/unit/test_struct_utils.py index e9ac3004..4084a091 100644 --- a/atomsci/ddm/test/unit/test_struct_utils.py +++ b/atomsci/ddm/test/unit/test_struct_utils.py @@ -20,13 +20,13 @@ def test_get_rdkit_smiles(): def test_rdkit_smiles_from_smiles(): result = su.rdkit_smiles_from_smiles(test_smiles, useCanonicalTautomers=True) - assert result == ['Brc1cc(O[C@@H]2C[C@H]3CC[C@@H](C2)N3)cc(-c2ccccc2)c1', - 'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1', + assert result == ['Brc1cc(O[C@@H]2C[C@H]3CC[C@@H](C2)N3)cc(-c2ccccc2)c1', + 'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1', 'Brc1ccc(N2CCN(CCCN3CCC(Cc4ccccc4)CC3)CC2)cc1'] result = su.rdkit_smiles_from_smiles(test_smiles, useCanonicalTautomers=False) assert result == ['Brc1cc(O[C@@H]2C[C@H]3CC[C@@H](C2)N3)cc(-c2ccccc2)c1', - 'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1', + 'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1', 'Brc1ccc(N2CCN(CCCN3CCC(Cc4ccccc4)CC3)CC2)cc1'] result = su.rdkit_smiles_from_smiles(test_smiles, useIsomericSmiles=False) @@ -65,7 +65,7 @@ def test_base_smiles_from_smiles(): useCanonicalTautomers=True, useIsomericSmiles=False, removeCharges=True, - workers=2) + workers=1) assert results == ['Brc1cc(OC2CC3CCC(C2)N3)cc(-c2ccccc2)c1', 'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1', 'Brc1ccc(N2CCN(CCCN3CCC(Cc4ccccc4)CC3)CC2)cc1'] @@ -74,7 +74,7 @@ def test_base_smiles_from_smiles(): useCanonicalTautomers=True, useIsomericSmiles=False, removeCharges=True, - workers=2) + workers=1) assert results == ['', 'Brc1cc(OC2CC3CCC(C2)N3)cc(-c2ccccc2)c1', 'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1', @@ -157,7 +157,7 @@ def test_mol_wt_from_smiles(): def test_canonical_tautomers_from_smiles(): canonical_tautomers = [su.canonical_tautomers_from_smiles(s) for s in ['asdf']+test_smiles] - assert canonical_tautomers==[[''], + assert canonical_tautomers==[[''], ['Brc1cc(O[C@@H]2C[C@H]3CC[C@@H](C2)N3)cc(-c2ccccc2)c1'], ['Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1'], ['Brc1ccc(N2CCN(CCCN3CCC(Cc4ccccc4)CC3)CC2)cc1']] diff --git a/atomsci/ddm/utils/struct_utils.py b/atomsci/ddm/utils/struct_utils.py index d1740c0e..1624ee9c 100644 --- a/atomsci/ddm/utils/struct_utils.py +++ b/atomsci/ddm/utils/struct_utils.py @@ -32,13 +32,12 @@ def get_rdkit_smiles(orig_smiles, useIsomericSmiles=True): mol = Chem.MolFromSmiles(orig_smiles) if mol is None: return "" - else: - return Chem.MolToSmiles(mol, isomericSmiles=useIsomericSmiles) + return Chem.MolToSmiles(mol, isomericSmiles=useIsomericSmiles) def rdkit_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, useCanonicalTautomers=False, workers=1): """Parallel version of get_rdkit_smiles. If orig_smiles is a list and workers is > 1, spawn 'workers' - threads to convert input SMILES strings to standardized RDKit format. + processes to convert input SMILES strings to standardized RDKit format. Args: orig_smiles (list or str): List of SMILES strings to canonicalize. @@ -57,7 +56,7 @@ def rdkit_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, useCanonicalTa if isinstance(orig_smiles, list): from functools import partial - func = partial(rdkit_smiles_from_smiles, useIsomericSmiles=useIsomericSmiles, + func = partial(rdkit_smiles_from_smiles, useIsomericSmiles=useIsomericSmiles, useCanonicalTautomers=useCanonicalTautomers) if workers > 1: from multiprocessing import pool @@ -83,7 +82,7 @@ def rdkit_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, useCanonicalTa def mols_from_smiles(orig_smiles, workers=1): """Parallel function to create RDKit Mol objects for a list of SMILES strings. If orig_smiles is a list - and workers is > 1, spawn 'workers' threads to convert input SMILES strings to Mol objects. + and workers is > 1, spawn 'workers' processes to convert input SMILES strings to Mol objects. Args: orig_smiles (list or str): List of SMILES strings to convert to Mol objects. @@ -113,7 +112,7 @@ def mols_from_smiles(orig_smiles, workers=1): return mols -def base_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, removeCharges=False, +def base_smiles_from_smiles(orig_smiles, useIsomericSmiles=True, removeCharges=False, useCanonicalTautomers=False, workers=1): """Generate standardized SMILES strings for the largest fragments of each molecule specified by orig_smiles. Strips salt groups and replaces any rare isotopes with the most common ones for each element. @@ -224,9 +223,11 @@ def base_mol_from_smiles(orig_smiles, useIsomericSmiles=True, removeCharges=Fals if cmpd_mol is None: return None try: - std_mol = rdMolStandardize.IsotopeParent(rdMolStandardize.FragmentParent(cmpd_mol), skipStandardize=True) + std_mol = rdMolStandardize.IsotopeParent( + rdMolStandardize.FragmentParent(cmpd_mol), + skipStandardize=True) if removeCharges: - std_mol = uncharger(std_mol) + std_mol = uncharger.uncharge(std_mol) except Exception: std_mol = None return std_mol @@ -296,14 +297,16 @@ def base_mol_from_inchi(inchi_str, useIsomericSmiles=True, removeCharges=False): if cmpd_mol is None: return None try: - std_mol = rdMolStandardize.IsotopeParent(rdMolStandardize.FragmentParent(cmpd_mol), skipStandardize=True) + std_mol = rdMolStandardize.IsotopeParent( + rdMolStandardize.FragmentParent(cmpd_mol), + skipStandardize=True) if removeCharges: - std_mol = uncharger(std_mol) - except Exception: + std_mol = uncharger.uncharge(std_mol) + except Exception as e: + print(f"Error standardizing InChI {inchi_str}: {e}") std_mol = None return std_mol - def draw_structure(smiles_str, image_path, image_size=500): """Draw structure for the compound with the given SMILES string as a PNG file. @@ -422,7 +425,7 @@ def _merge_dataframes_by_smiles(dataframes, smiles_col='rdkit_smiles', id_col='c new_df = new_df.drop([lCol, rCol], axis=1) left_df = new_df - return new_df + return left_df def smiles_to_inchi_key(smiles): @@ -518,6 +521,4 @@ def canonical_tautomers_from_smiles(smiles): smiles = [smiles] mols = [Chem.MolFromSmiles(smi) for smi in smiles] canon_tautomers = [taut_enum.Canonicalize(m) if m is not None else None for m in mols] - return [Chem.MolToSmiles(m) if m is not None else '' for m in canon_tautomers] - - + return [Chem.MolToSmiles(m) if m is not None else '' for m in canon_tautomers] \ No newline at end of file From f9be6d78ef4ed85684ec75ae10fd7fd58fefcd79 Mon Sep 17 00:00:00 2001 From: Jessica Mauvais Date: Thu, 2 Apr 2026 13:02:50 -0700 Subject: [PATCH 6/6] put workers=2 back --- atomsci/ddm/test/unit/test_struct_utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/atomsci/ddm/test/unit/test_struct_utils.py b/atomsci/ddm/test/unit/test_struct_utils.py index 4084a091..5ce7463e 100644 --- a/atomsci/ddm/test/unit/test_struct_utils.py +++ b/atomsci/ddm/test/unit/test_struct_utils.py @@ -65,7 +65,7 @@ def test_base_smiles_from_smiles(): useCanonicalTautomers=True, useIsomericSmiles=False, removeCharges=True, - workers=1) + workers=2) assert results == ['Brc1cc(OC2CC3CCC(C2)N3)cc(-c2ccccc2)c1', 'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1', 'Brc1ccc(N2CCN(CCCN3CCC(Cc4ccccc4)CC3)CC2)cc1'] @@ -74,7 +74,7 @@ def test_base_smiles_from_smiles(): useCanonicalTautomers=True, useIsomericSmiles=False, removeCharges=True, - workers=1) + workers=2) assert results == ['', 'Brc1cc(OC2CC3CCC(C2)N3)cc(-c2ccccc2)c1', 'Brc1ccc(N2CCN(CCCCOc3ccc4ccccc4c3)CC2)cc1', @@ -177,4 +177,4 @@ def test_canonical_tautomers_from_smiles(): test_draw_structure() test_smiles_to_inchi_key() test_mol_wt_from_smiles() - test_canonical_tautomers_from_smiles() \ No newline at end of file + test_canonical_tautomers_from_smiles()