From b8f2d1c1cd5f196c824cf27ea7f0350b5c697132 Mon Sep 17 00:00:00 2001 From: meesters Date: Mon, 17 Nov 2025 14:30:25 +0100 Subject: [PATCH 1/2] feat: more output files labelled temp --- workflow/rules/docking.smk | 8 ++++---- workflow/rules/preparation.smk | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/workflow/rules/docking.smk b/workflow/rules/docking.smk index bee23f9..980b8ad 100644 --- a/workflow/rules/docking.smk +++ b/workflow/rules/docking.smk @@ -63,7 +63,7 @@ rule prepare_docking_ligand: input: ligands=path.join("library", "{database}_{dataset}_{name}_{i}.txt"), output: - lig=path.join( + temp(path.join( "docking", "{receptorID}", "{dataset}", @@ -78,7 +78,7 @@ rule prepare_docking_ligand: outdir = os.path.join(params.directory, wildcards.receptorID, wildcards.dataset) os.makedirs(outdir, exist_ok=True) - shutil.copy(input.ligands, output.lig) + shutil.copy(input.ligands, output[0]) rule docking: @@ -137,10 +137,10 @@ rule mergeDocking: input: unpack(aggregate_in), output: - path.join( + temp(path.join( "docking", "{receptorID}", "{receptorID}_{database}_{dataset}_{name}.pdbqt.gz", - ), + )), script: "../scripts/mergeOutput.py" diff --git a/workflow/rules/preparation.smk b/workflow/rules/preparation.smk index 45c6eb7..a918578 100644 --- a/workflow/rules/preparation.smk +++ b/workflow/rules/preparation.smk @@ -121,7 +121,7 @@ rule makeReceptorPDBQT: input: path.join("scratch", "PDB", "receptor", "{receptorID}.pdb"), output: - path.join("prepared", "receptor", "{receptorID}.pdbqt"), + temp(path.join("prepared", "receptor", "{receptorID}.pdbqt")), conda: "../envs/openbabel.yml" envmodules: @@ -215,7 +215,7 @@ rule prepareGeometry: input: path.join(config["GRID_DIR"], "{receptorID}.gpf"), output: - path.join("grid", "{receptorID}_grid.txt"), + temp(path.join("grid", "{receptorID}_grid.txt")), log: "logs/prepareGeometry/{receptorID}.log", run: @@ -253,7 +253,7 @@ rule prepareDocking: input: rules.makeReceptorPDBQT.output, output: - path.join("receptor", "{receptorID}.txt"), + temp(path.join("receptor", "{receptorID}.txt")), log: "logs/prepareDocking/{receptorID}.log", shell: From 83b37a84f8c348a47ae15ae7a3a9d4d241dc9bf2 Mon Sep 17 00:00:00 2001 From: meesters Date: Mon, 17 Nov 2025 14:35:16 +0100 Subject: [PATCH 2/2] fix: formatting --- workflow/rules/analyse.smk | 82 +++++++++++++++++++--------------- workflow/rules/docking.smk | 50 ++++++++++++--------- workflow/rules/preparation.smk | 6 ++- 3 files changed, 79 insertions(+), 59 deletions(-) diff --git a/workflow/rules/analyse.smk b/workflow/rules/analyse.smk index 1ace296..77c8194 100644 --- a/workflow/rules/analyse.smk +++ b/workflow/rules/analyse.smk @@ -7,8 +7,15 @@ import builtins import importlib from urllib.parse import urlparse + # all rules in this file are local rules -localrules: dockingResults, dockingResultsTxt, bestLigands, makeHistogram, mergeDocking +localrules: + dockingResults, + dockingResultsTxt, + bestLigands, + makeHistogram, + mergeDocking, + def url_reachable(url): """ @@ -47,7 +54,9 @@ def check_zinc_url(url): return bool(mod.zinc_available(url)) except Exception: # if the user function errors, fall back to default checks - logger.warning(f"user zinc_available in {modname} raised an exception; falling back to default checks") + logger.warning( + f"user zinc_available in {modname} raised an exception; falling back to default checks" + ) except Exception: continue @@ -83,11 +92,13 @@ def library_files(wildcards): out = [] # Use ZINC_MIRROR from config if available zinc_mirror = config.get("ZINC_MIRROR", "files.docking.org") - if not zinc_mirror.startswith("http://") and not zinc_mirror.startswith("https://"): + if not zinc_mirror.startswith("http://") and not zinc_mirror.startswith( + "https://" + ): zinc_mirror = "http://" + zinc_mirror zinc_mirror = zinc_mirror.rstrip("/") zinc_test_url = f"{zinc_mirror}/3D/" - + # test for ZINC reachability (robust): if not check_zinc_url(zinc_test_url): logger.info( @@ -123,20 +134,27 @@ def library_files(wildcards): ) sys.exit(1) rawOut = expand( - path.join( + path.join( "docking", "{receptorID}", "{receptorID}_{database}_{dataset}_{name}.pdbqt.gz", ), receptorID=config["TARGETS"][0].split(",")[0], database=config["DATABASE"], - dataset=[w+l for w in config["ZINC_INPUT"]["WEIGHT"] for l in config["ZINC_INPUT"]["LOGP"]], - name=[w+l+r+p+ph+c for w in config["ZINC_INPUT"]["WEIGHT"] - for l in config["ZINC_INPUT"]["LOGP"] - for r in config["ZINC_INPUT"]["REACT"] - for p in config["ZINC_INPUT"]["PURCHASE"] - for ph in config["ZINC_INPUT"]["PH"] - for c in config["ZINC_INPUT"]["CHARGE"]], + dataset=[ + w + l + for w in config["ZINC_INPUT"]["WEIGHT"] + for l in config["ZINC_INPUT"]["LOGP"] + ], + name=[ + w + l + r + p + ph + c + for w in config["ZINC_INPUT"]["WEIGHT"] + for l in config["ZINC_INPUT"]["LOGP"] + for r in config["ZINC_INPUT"]["REACT"] + for p in config["ZINC_INPUT"]["PURCHASE"] + for ph in config["ZINC_INPUT"]["PH"] + for c in config["ZINC_INPUT"]["CHARGE"] + ], ) for i in rawOut: weighLog = i.split("_")[-2] @@ -183,7 +201,9 @@ def library_files(wildcards): logger.warning(f"Could not connect to ZINC to validate subset: {e}") try: - r_zinc = requests.get("https://zinc15.docking.org/", allow_redirects=True, timeout=10) + r_zinc = requests.get( + "https://zinc15.docking.org/", allow_redirects=True, timeout=10 + ) if r_zinc.status_code != 200: # test if ZINC database is available logger.info( "The ZINC database is not accessible right now. Perhaps it is temporarily down?" @@ -291,9 +311,7 @@ rule removeDuplicateLigands: input: path.join("results", "{receptorID}_{percentage}.pdbqt"), output: - path.join( - "rescreening", "unique", "{receptorID}_{percentage}.pdbqt" - ), + path.join("rescreening", "unique", "{receptorID}_{percentage}.pdbqt"), log: "logs/removeDuplicateLigands_{receptorID}_{percentage}.log", shell: @@ -302,13 +320,15 @@ rule removeDuplicateLigands: checkpoint split2: input: - path.join( - "rescreening", "unique", "{receptorID}_{percentage}.pdbqt" - ), + path.join("rescreening", "unique", "{receptorID}_{percentage}.pdbqt"), output: - temp(directory( - os.path.join("scratch", "rescreening_ligands_{percentage}", "{receptorID}") - )), + temp( + directory( + os.path.join( + "scratch", "rescreening_ligands_{percentage}", "{receptorID}" + ) + ) + ), log: "logs/split2_{receptorID}_{percentage}.log", script: @@ -321,9 +341,7 @@ rule prepareLigands2: "scratch", "rescreening_ligands_{percentage}", "{receptorID}", "{i}.pdbqt" ), output: - ligands=path.join( - "rescreening_{percentage}", "{name}_{receptorID}", "{i}.txt" - ), + ligands=path.join("rescreening_{percentage}", "{name}_{receptorID}", "{i}.txt"), log: "logs/prepareLigands2_{receptorID}_{percentage}_{name}_{i}.log", shell: @@ -335,11 +353,9 @@ rule prepareSecondDocking: grid=path.join("grid", "{name}_grid.txt"), receptor=path.join("prepared", "receptor", "{name}.pdbqt"), output: - grid=path.join( - "rescreening_{percentage}", "{name}_{receptorID}", "{name}.grd" - ), + grid=path.join("rescreening_{percentage}", "{name}_{receptorID}", "{name}.grd"), receptor=path.join( - "rescreening_{percentage}", "{name}_{receptorID}", "{name}.rec" + "rescreening_{percentage}", "{name}_{receptorID}", "{name}.rec" ), log: "logs/prepareSecondDocking_{name}_{receptorID}_{percentage}.log", @@ -352,12 +368,8 @@ rule prepareSecondDocking: rule docking2: input: - ligands=path.join( - "rescreening_{percentage}", "{name}_{receptorID}", "{i}.txt" - ), - grid=path.join( - "rescreening_{percentage}", "{name}_{receptorID}", "{name}.grd" - ), + ligands=path.join("rescreening_{percentage}", "{name}_{receptorID}", "{i}.txt"), + grid=path.join("rescreening_{percentage}", "{name}_{receptorID}", "{name}.grd"), receptor=path.join( "rescreening_{percentage}", "{name}_{receptorID}", "{name}.rec" ), diff --git a/workflow/rules/docking.smk b/workflow/rules/docking.smk index 980b8ad..6b62f16 100644 --- a/workflow/rules/docking.smk +++ b/workflow/rules/docking.smk @@ -1,7 +1,11 @@ -localrules: prepare_docking_local, prepare_docking_ligand +localrules: + prepare_docking_local, + prepare_docking_ligand, + from snakemake.exceptions import WorkflowError + def get_spacing(gridfile): """Return spacing as float parsed from a .gpf grid file. @@ -35,24 +39,20 @@ rule prepare_docking_local: receptor=rules.makeReceptorPDBQT.output, geometry=path.join("grid", "{receptorID}_grid.txt"), output: - temp(path.join( - "docking", "{receptorID}", "{dataset}", "{receptorID}.txt" - )), - temp(path.join( - "docking", "{receptorID}", "{dataset}", "{receptorID}_grid.txt" - )), + temp(path.join("docking", "{receptorID}", "{dataset}", "{receptorID}.txt")), + temp(path.join("docking", "{receptorID}", "{dataset}", "{receptorID}_grid.txt")), message: ( f" Copying receptor from {str(input.receptor)} to {str(output[0])}; " f"Copying geometry from {str(input.geometry)} to {str(output[1])}" - ), + ) run: import shutil + shutil.copy(str(input.receptor), str(output[0])) shutil.copy(str(input.geometry), str(output[1])) - rule prepare_docking_ligand: """Copy a single ligand-list entry into the per-job output directory. @@ -63,11 +63,13 @@ rule prepare_docking_ligand: input: ligands=path.join("library", "{database}_{dataset}_{name}_{i}.txt"), output: - temp(path.join( - "docking", - "{receptorID}", - "{dataset}", - "{database}_{dataset}_{name}_{i}.txt", + temp( + path.join( + "docking", + "{receptorID}", + "{dataset}", + "{database}_{dataset}_{name}_{i}.txt", + ) ), params: directory=path.join("docking"), @@ -105,18 +107,20 @@ rule docking: config["VINALC"], params: # get spacing from the receptor's .gpf at runtime using wildcards - space=lambda wildcards: get_spacing(os.path.join(config['GRID_DIR'], f'{wildcards.receptorID}.gpf')), + space=lambda wildcards: get_spacing( + os.path.join(config["GRID_DIR"], f"{wildcards.receptorID}.gpf") + ), log: "logs/docking/{receptorID}_{dataset}_{database}_{name}_{i}.log", resources: - mpi="mpiexec" + mpi="mpiexec", shell: ( "cd docking/{wildcards.receptorID}/{wildcards.dataset} ; " "{resources.mpi} vinalc --recList {wildcards.receptorID}.txt " "--ligList {wildcards.database}_{wildcards.dataset}_{wildcards.name}_{wildcards.i}.txt " "--geoList {wildcards.receptorID}_grid.txt --granularity {params.space} " - ) + ) def aggregate_in(wildcards): @@ -137,10 +141,12 @@ rule mergeDocking: input: unpack(aggregate_in), output: - temp(path.join( - "docking", - "{receptorID}", - "{receptorID}_{database}_{dataset}_{name}.pdbqt.gz", - )), + temp( + path.join( + "docking", + "{receptorID}", + "{receptorID}_{database}_{dataset}_{name}.pdbqt.gz", + ) + ), script: "../scripts/mergeOutput.py" diff --git a/workflow/rules/preparation.smk b/workflow/rules/preparation.smk index a918578..4091ad2 100644 --- a/workflow/rules/preparation.smk +++ b/workflow/rules/preparation.smk @@ -32,9 +32,10 @@ rule targetProtein: rule getZINCdata: output: temp(path.join(DATABASE, "{dataset}", "{name}.pdbqt.gz")), - log: "logs/downloadZINC/{dataset}_{name}.log", + log: + "logs/downloadZINC/{dataset}_{name}.log", message: - "Downloading ZINC data for {wildcards.name} from ZINC database {wildcards.dataset}...", + "Downloading ZINC data for {wildcards.name} from ZINC database {wildcards.dataset}..." script: "../scripts/ZINCdownload.py" @@ -147,6 +148,7 @@ rule gunzip: "logs/gunzip/{database}_{dataset}_{name}_{filetype}.log", run: import gzip, shutil, os + try: with gzip.open(input[0], "rb") as src, open(output[0], "wb") as dst: shutil.copyfileobj(src, dst)