diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index 9592691..df2fa62 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -33,7 +33,7 @@ jobs: # stop the build if there are Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --indent-size 3 --statistics - name: Test with pytest run: | pytest diff --git a/pyproject.toml b/pyproject.toml index dcd1ebd..945715b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "rda_python_miscs" -version = "1.0.7" +version = "2.0.0" authors = [ { name="Zaihua Ji", email="zji@ucar.edu" }, ] @@ -24,16 +24,10 @@ dependencies = [ "rda_python_setuid", ] -[tool.setuptools] -include-package-data = true - -[tool.setuptools.packages.find] -where = ["src"] - -[tool.setuptools.package-data] -"rda_python_miscs" = ["bashqsub.usg", "tcshqsub.usg", "rdasub.usg", "rdacp.usg", - "rdakill.usg", "rdals.usg", "rdamod.usg", "rdaown.usg", - "rdaps.usg", "rdazip.usg"] +[tool.pytest.ini_options] +pythonpath = [ + "src" +] [project.urls] "Homepage" = "https://github.com/NCAR/rda-python-miscs" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d2294aa --- /dev/null +++ b/requirements.txt @@ -0,0 +1,6 @@ +psycopg2-binary==2.9.10 +pytest +rda-python-globus +unidecode +hvac +rda_python_common diff --git a/src/rda_python_miscs/bash_qsub.py b/src/rda_python_miscs/bash_qsub.py new file mode 100644 index 0000000..021d327 --- /dev/null +++ b/src/rda_python_miscs/bash_qsub.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title: bashqsub +# Author: Zaihua Ji, zji@ucar.edu +# Date: 11/19/2020 +# 2025-03-07 transferred to package rda_python_miscs from +# https://github.com/NCAR/rda-utility-programs.git +# Purpose: python script to submit a batch job on PBS node via bash script +# +# Github: https://github.com/NCAR/rda-pythn-miscs.git +# +################################################################################## + +import os +import sys +import re +from os import path as op +from rda_python_common import PgLOG + +DEFMODS = { + 'default' : "ncarenv,netcdf,ncl,nco,cdo,conda,grib-util,wgrib2", +} + +DEFLIBS = { + 'default' : "conda activate /glade/work/gdexdata/conda-envs/pg-gdex", +} + +SWAPMODS = { +} + +RESOURCES = { # resource list for option -l + 'walltime' : '6:00:00', # if this is changed, change defpbstime in PgCheck.py too + 'select' : '1:ncpus=1:mem=1gb' +} + +SOPTIONS = { # single-dash option values + 'o' : None, # will set to default if not provided + 'e' : None, + 'A' : "P43713000", + 'q' : "gdex@casper-pbs", +# 'm' : 'a', + 'm' : 'n', +} + +# +# main function to excecute this script +# +def main(): + + aname = 'bashqsub' + pname = 'gdexqsub' + PgLOG.set_help_path(__file__) + gdexsub = PgLOG.BCHCMDS['PBS'] + coptions = {'cmd' : None, 'cwd' : None, 'env' : None, 'mod' : None, 'res' : 'default'} # customized options + copts = '|'.join(coptions) + option = None + dcount = 0 + argv = sys.argv[1:] + if not argv: PgLOG.show_usage(aname) + PgLOG.PGLOG['LOGFILE'] = pname + ".log" + PgLOG.cmdlog("{} {}".format(aname, ' '.join(argv))) + if not PgLOG.valid_command(gdexsub): PgLOG.pglog("{}: miss {} command to submit batch job".format(gdexsub, PgLOG.PGLOG['PBSNAME']), PgLOG.LGWNEX) + + while argv: + arg = argv.pop(0) + ms = re.match(r'^-(\w)$', arg) + if ms: + option = ms.group(1) + if option == "b": + PgLOG.PGLOG['BCKGRND'] = 1 + option = None + else: + SOPTIONS[option] = '' + continue + ms = re.match(r'^-({})$'.format(copts), arg) + if ms: + option = ms.group(1) + if option == "env": option = 'v' + continue + + if not option: PgLOG.pglog("{}: Value passed in without leading option for {}".format(arg, gdexsub), PgLOG.LGEREX) + if arg.find(' ') > -1 and not re.match(r'^[\'\"].*[\'\"]$', arg): # quote string with space but not quoted yet + if arg.find("'") > -1: + arg = '"{}"'.format(arg) + else: + arg = "'{}'".format(arg) + + if option in coptions: + coptions[option] = arg + if option == "cmd": break + else: + SOPTIONS[option] = arg + option = None + + if not coptions['cmd']: PgLOG.pglog(aname + ": specify command via option -cmd to run", PgLOG.LGWNEX) + args = PgLOG.argv_to_string(argv, 0) # append command options + if not SOPTIONS['o']: SOPTIONS['o'] = "{}/{}/".format(PgLOG.PGLOG['LOGPATH'], pname) + if not SOPTIONS['e']: SOPTIONS['e'] = "{}/{}/".format(PgLOG.PGLOG['LOGPATH'], pname) + if 'N' not in SOPTIONS: SOPTIONS['N'] = op.basename(coptions['cmd']) + msg = "{}-{}{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.PGLOG['CURUID'], PgLOG.current_datetime()) + + if coptions['cwd']: + if 's' in coptions['cwd']: coptions['cwd'] = PgLOG.replace_environments(coptions['cwd'], '', PgLOG.LGWNEX) + msg += "-" + coptions['cwd'] + os.chdir(coptions['cwd']) + + cmd = PgLOG.valid_command(coptions['cmd']) + if not cmd and not re.match(r'^/', coptions['cmd']): cmd = PgLOG.valid_command('./' + coptions['cmd']) + if not cmd: PgLOG.pglog(coptions['cmd'] + ": Cannot find given command to run", PgLOG.LGWNEX) + if args: cmd += " " + args + + sbuf = build_bash_script(cmd, coptions, gdexsub) + PgLOG.pglog(sbuf, PgLOG.MSGLOG) + PgLOG.PGLOG['ERR2STD'] = ['bind mouting'] + PgLOG.pgsystem(gdexsub, PgLOG.LOGWRN, 6, sbuf) + PgLOG.PGLOG['ERR2STD'] = [] + + sys.exit(0) + +# +# build bash script to submit a PBS batch job +# +def build_bash_script(cmd, coptions, gdexsub): + + buf = "#!/usr/bin/bash\n\n" # qsub starting bash script + + if 'l' in SOPTIONS: add_resources() + # add options to bash script for qsub + for option in SOPTIONS: + buf += "#PBS -" + option + if SOPTIONS[option]: buf += " {}".format(SOPTIONS[option]) + buf += "\n" + for option in RESOURCES: + buf += "#PBS -l" + if RESOURCES[option]: buf += " {}={}".format(option, RESOURCES[option]) + buf += "\n" + + # always include the login user's bash resource file + homedir = "{}/{}".format(PgLOG.PGLOG['USRHOME'], PgLOG.PGLOG['CURUID']) + buf += "export HOME={}\n".format(homedir) + buf += "source /etc/profile.d/z00_modules.sh\n" + buf += "source /glade/u/apps/opt/conda/etc/profile.d/conda.sh\n" + buf += "source {}/.bashrc\n".format(homedir) + buf += "pwd; hostname; date\n" + buf += add_modules(coptions['res'], coptions['mod']) + buf += set_vm_libs(coptions['res']) + buf += "\necho {}\n{}\n\ndate\n".format(cmd, cmd) + + return buf + +# +# check and add resource options +# +def add_resources(): + + for res in re.split(',', SOPTIONS['l']): + ms = re.match(r'^([^=]+)=(.+)$', res) + if ms: + RESOURCES[ms.group(1)] = ms.group(2) + else: + PgLOG.pglog(res + ": use '=' to separate resource name & value", PgLOG.LGEREX) + del SOPTIONS['l'] + +# +# add module loads for modules provided +# +def add_modules(res, mods): + + mbuf = "\n" + defmods = DEFMODS[res] if res in DEFMODS else DEFMODS['default'] + + dmods = re.split(',', defmods) + for dmod in dmods: + ms = re.match(r'^(.+)/', dmod) + smod = ms.group(1) if ms else dmod + if smod in SWAPMODS: mbuf += "module unload {}\n".format(SWAPMODS[smod]) + mbuf += "module load {}\n".format(dmod) + + if mods: + amods = re.split(',', mods) + for amod in amods: + if re.match(r'^/', amod): + mbuf += "module use {}\n".format(amod) + else: + ms = re.match(r'^(.+)/', amod) + smod = ms.group(1) if ms else amod + if smod in dmods: continue + if smod in SWAPMODS: mbuf += "module unload {}\n".format(SWAPMODS[smod]) + mbuf += "module load {}\n".format(amod) + + return mbuf + +# +# set virtual machine libraries +# +def set_vm_libs(res): + + deflibs = DEFLIBS[res] if res in DEFLIBS else DEFLIBS['default'] + if not deflibs: return '' + + dlibs = re.split(',', deflibs) + libbuf = "\n" + for dlib in dlibs: + libbuf += dlib + "\n" + + return libbuf + +# +# call main() to start program +# +if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/bashqsub.py b/src/rda_python_miscs/bashqsub.py index 2b8e769..b51acf3 100644 --- a/src/rda_python_miscs/bashqsub.py +++ b/src/rda_python_miscs/bashqsub.py @@ -1,213 +1,184 @@ #!/usr/bin/env python3 -# ################################################################################## -# # Title: bashqsub # Author: Zaihua Ji, zji@ucar.edu # Date: 11/19/2020 # 2025-03-07 transferred to package rda_python_miscs from # https://github.com/NCAR/rda-utility-programs.git +# 2025-12-29 convert to class BashQsub # Purpose: python script to submit a batch job on PBS node via bash script -# # Github: https://github.com/NCAR/rda-pythn-miscs.git -# ################################################################################## - import os import sys import re from os import path as op -from rda_python_common import PgLOG - -DEFMODS = { - 'default' : "ncarenv,netcdf,ncl,nco,cdo,conda,grib-util,wgrib2", -} - -DEFLIBS = { - 'default' : "conda activate /glade/work/gdexdata/conda-envs/pg-casper", -} - -SWAPMODS = { -} - -RESOURCES = { # resource list for option -l - 'walltime' : '6:00:00', # if this is changed, change defpbstime in PgCheck.py too - 'select' : '1:ncpus=1:mem=1gb' -} - -SOPTIONS = { # single-dash option values - 'o' : None, # will set to default if not provided - 'e' : None, - 'A' : "P43713000", - 'q' : "gdex@casper-pbs", -# 'm' : 'a', - 'm' : 'n', -} - -# -# main function to excecute this script -# -def main(): - - aname = 'bashqsub' - pname = 'gdexqsub' - PgLOG.set_help_path(__file__) - gdexsub = PgLOG.BCHCMDS['PBS'] - coptions = {'cmd' : None, 'cwd' : None, 'env' : None, 'mod' : None, 'res' : 'default'} # customized options - copts = '|'.join(coptions) - option = None - dcount = 0 - argv = sys.argv[1:] - if not argv: PgLOG.show_usage(aname) - PgLOG.PGLOG['LOGFILE'] = pname + ".log" - PgLOG.cmdlog("{} {}".format(aname, ' '.join(argv))) - if not PgLOG.valid_command(gdexsub): PgLOG.pglog("{}: miss {} command to submit batch job".format(gdexsub, PgLOG.PGLOG['PBSNAME']), PgLOG.LGWNEX) - - while argv: - arg = argv.pop(0) - ms = re.match(r'^-(\w)$', arg) - if ms: - option = ms.group(1) - if option == "b": - PgLOG.PGLOG['BCKGRND'] = 1 - option = None - else: - SOPTIONS[option] = '' - continue - ms = re.match(r'^-({})$'.format(copts), arg) - if ms: - option = ms.group(1) - if option == "env": option = 'v' - continue - - if not option: PgLOG.pglog("{}: Value passed in without leading option for {}".format(arg, gdexsub), PgLOG.LGEREX) - if arg.find(' ') > -1 and not re.match(r'^[\'\"].*[\'\"]$', arg): # quote string with space but not quoted yet - if arg.find("'") > -1: - arg = '"{}"'.format(arg) - else: - arg = "'{}'".format(arg) - - if option in coptions: - coptions[option] = arg - if option == "cmd": break - else: - SOPTIONS[option] = arg +from rda_python_common.pg_log import PgLOG + +class BashQsub(PgLOG): + + def __init__(self): + super().__init__() + self.DEFMODS = { + 'default' : "ncarenv,netcdf,ncl,nco,cdo,conda,grib-util,wgrib2", + } + self.DEFLIBS = { + 'default' : "conda activate /glade/work/gdexdata/conda-envs/pg-gdex", + } + self.SWAPMODS = { + } + self.RESOURCES = { # resource list for option -l + 'walltime' : '6:00:00', # if this is changed, change defpbstime in PgCheck.py too + 'select' : '1:ncpus=1:mem=1gb' + } + self.SOPTIONS = { # single-dash option values + 'o' : None, # will set to default if not provided + 'e' : None, + 'A' : "P43713000", + 'q' : "gdex@casper-pbs", + # 'm' : 'a', + 'm' : 'n', + } + self.coptions = {'cmd' : None, 'cwd' : None, 'env' : None, 'mod' : None, 'res' : 'default'} # customized options + self.gdexsub = self.BCHCMDS['PBS'] + self.args = None + + # function to readparameters + def read_parameters(self): + aname = 'bashqsub' + pname = 'gdexqsub' + self.set_help_path(__file__) + copts = '|'.join(self.coptions) option = None - - if not coptions['cmd']: PgLOG.pglog(aname + ": specify command via option -cmd to run", PgLOG.LGWNEX) - args = PgLOG.argv_to_string(argv, 0) # append command options - if not SOPTIONS['o']: SOPTIONS['o'] = "{}/{}/".format(PgLOG.PGLOG['LOGPATH'], pname) - if not SOPTIONS['e']: SOPTIONS['e'] = "{}/{}/".format(PgLOG.PGLOG['LOGPATH'], pname) - if 'N' not in SOPTIONS: SOPTIONS['N'] = op.basename(coptions['cmd']) - msg = "{}-{}{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.PGLOG['CURUID'], PgLOG.current_datetime()) - - if coptions['cwd']: - if coptions['cwd'].find('$'): coptions['cwd'] = PgLOG.replace_environments(coptions['cwd'], '', PgLOG.LGWNEX) - msg += "-" + coptions['cwd'] - os.chdir(coptions['cwd']) - - cmd = PgLOG.valid_command(coptions['cmd']) - if not cmd and not re.match(r'^/', coptions['cmd']): cmd = PgLOG.valid_command('./' + coptions['cmd']) - if not cmd: PgLOG.pglog(coptions['cmd'] + ": Cannot find given command to run", PgLOG.LGWNEX) - if args: cmd += " " + args - - sbuf = build_bash_script(cmd, coptions, gdexsub) - PgLOG.pglog(sbuf, PgLOG.MSGLOG) - PgLOG.PGLOG['ERR2STD'] = ['bind mouting'] - PgLOG.pgsystem(gdexsub, PgLOG.LOGWRN, 6, sbuf) - PgLOG.PGLOG['ERR2STD'] = [] - - sys.exit(0) - -# -# build bash script to submit a PBS batch job -# -def build_bash_script(cmd, coptions, gdexsub): - - buf = "#!/usr/bin/bash\n\n" # qsub starting bash script - - if 'l' in SOPTIONS: add_resources() - # add options to bash script for qsub - for option in SOPTIONS: - buf += "#PBS -" + option - if SOPTIONS[option]: buf += " {}".format(SOPTIONS[option]) - buf += "\n" - for option in RESOURCES: - buf += "#PBS -l" - if RESOURCES[option]: buf += " {}={}".format(option, RESOURCES[option]) - buf += "\n" - - # always include the login user's bash resource file - homedir = "{}/{}".format(PgLOG.PGLOG['USRHOME'], PgLOG.PGLOG['CURUID']) - buf += "export HOME={}\n".format(homedir) - buf += "source /etc/profile.d/z00_modules.sh\n" - buf += "source /glade/u/apps/opt/conda/etc/profile.d/conda.sh\n" - buf += "source {}/.bashrc\n".format(homedir) - buf += "pwd; hostname; date\n" - buf += add_modules(coptions['res'], coptions['mod']) - buf += set_vm_libs(coptions['res']) - buf += "\necho {}\n{}\n\ndate\n".format(cmd, cmd) + dcount = 0 + argv = sys.argv[1:] + if not argv: self.show_usage(aname) + self.PGLOG['LOGFILE'] = pname + ".log" + self.cmdlog("{} {}".format(aname, ' '.join(argv))) + if not self.valid_command(self.gdexsub): self.pglog("{}: miss {} command to submit batch job".format(self.gdexsub, self.PGLOG['PBSNAME']), self.LGWNEX) + while argv: + arg = argv.pop(0) + ms = re.match(r'^-(\w)$', arg) + if ms: + option = ms.group(1) + if option == "b": + self.PGLOG['BCKGRND'] = 1 + option = None + else: + self.SOPTIONS[option] = '' + continue + ms = re.match(r'^-({})$'.format(copts), arg) + if ms: + option = ms.group(1) + if option == "env": option = 'v' + continue + if not option: self.pglog("{}: Value passed in without leading option for {}".format(arg, self.gdexsub), self.LGEREX) + if arg.find(' ') > -1 and not re.match(r'^[\'\"].*[\'\"]$', arg): # quote string with space but not quoted yet + if arg.find("'") > -1: + arg = '"{}"'.format(arg) + else: + arg = "'{}'".format(arg) + if option in self.coptions: + self.coptions[option] = arg + if option == "cmd": break + else: + self.SOPTIONS[option] = arg + option = None + self.args = self.argv_to_string(argv, 0) # append command options + if not self.coptions['cmd']: self.pglog(aname + ": specify command via option -cmd to run", self.LGWNEX) + if not self.SOPTIONS['o']: self.SOPTIONS['o'] = "{}/{}/".format(self.PGLOG['LOGPATH'], pname) + if not self.SOPTIONS['e']: self.SOPTIONS['e'] = "{}/{}/".format(self.PGLOG['LOGPATH'], pname) + if 'N' not in self.SOPTIONS: self.SOPTIONS['N'] = op.basename(self.coptions['cmd']) + if self.coptions['cwd']: + if 's' in self.coptions['cwd']: self.coptions['cwd'] = self.replace_environments(self.coptions['cwd'], '', self.LGWNEX) + os.chdir(self.coptions['cwd']) - return buf - -# -# check and add resource options -# -def add_resources(): - - for res in re.split(',', SOPTIONS['l']): - ms = re.match(r'^([^=]+)=(.+)$', res) - if ms: - RESOURCES[ms.group(1)] = ms.group(2) - else: - PgLOG.pglog(res + ": use '=' to separate resource name & value", PgLOG.LGEREX) - del SOPTIONS['l'] - -# -# add module loads for modules provided -# -def add_modules(res, mods): - - mbuf = "\n" - defmods = DEFMODS[res] if res in DEFMODS else DEFMODS['default'] - - dmods = re.split(',', defmods) - for dmod in dmods: - ms = re.match(r'^(.+)/', dmod) - smod = ms.group(1) if ms else dmod - if smod in SWAPMODS: mbuf += "module unload {}\n".format(SWAPMODS[smod]) - mbuf += "module load {}\n".format(dmod) - - if mods: - amods = re.split(',', mods) - for amod in amods: - if re.match(r'^/', amod): - mbuf += "module use {}\n".format(amod) + # function to start actions + def start_actions(self): + cmd = self.valid_command(self.coptions['cmd']) + if not cmd and not re.match(r'^/', self.coptions['cmd']): cmd = self.valid_command('./' + self.coptions['cmd']) + if not cmd: self.pglog(self.coptions['cmd'] + ": Cannot find given command to run", self.LGWNEX) + if self.args: cmd += " " + self.args + sbuf = self.build_bash_script(cmd) + self.pglog(sbuf, self.MSGLOG) + self.PGLOG['ERR2STD'] = ['bind mouting'] + self.pgsystem(self.gdexsub, self.LOGWRN, 6, sbuf) + self.PGLOG['ERR2STD'] = [] + + # build bash script to submit a PBS batch job + def build_bash_script(self, cmd): + buf = "#!/usr/bin/bash\n\n" # qsub starting bash script + if 'l' in self.SOPTIONS: self.add_resources() + # add options to bash script for qsub + for option in self.SOPTIONS: + buf += "#PBS -" + option + if self.SOPTIONS[option]: buf += " {}".format(self.SOPTIONS[option]) + buf += "\n" + for option in self.RESOURCES: + buf += "#PBS -l" + if self.RESOURCES[option]: buf += " {}={}".format(option, self.RESOURCES[option]) + buf += "\n" + # always include the login user's bash resource file + homedir = "{}/{}".format(self.PGLOG['USRHOME'], self.PGLOG['CURUID']) + buf += "export HOME={}\n".format(homedir) + buf += "source /etc/profile.d/z00_modules.sh\n" + buf += "source /glade/u/apps/opt/conda/etc/profile.d/conda.sh\n" + buf += "source {}/.bashrc\n".format(homedir) + buf += "pwd; hostname; date\n" + buf += self.add_modules(self.coptions['res'], self.coptions['mod']) + buf += self.set_vm_libs(self.coptions['res']) + buf += "\necho {}\n{}\n\ndate\n".format(cmd, cmd) + return buf + + # check and add resource options + def add_resources(self): + for res in re.split(',', self.SOPTIONS['l']): + ms = re.match(r'^([^=]+)=(.+)$', res) + if ms: + self.RESOURCES[ms.group(1)] = ms.group(2) else: - ms = re.match(r'^(.+)/', amod) - smod = ms.group(1) if ms else amod - if smod in dmods: continue - if smod in SWAPMODS: mbuf += "module unload {}\n".format(SWAPMODS[smod]) - mbuf += "module load {}\n".format(amod) - - return mbuf - -# -# set virtual machine libraries -# -def set_vm_libs(res): - - deflibs = DEFLIBS[res] if res in DEFLIBS else DEFLIBS['default'] - if not deflibs: return '' + self.pglog(res + ": use '=' to separate resource name & value", self.LGEREX) + del self.SOPTIONS['l'] + + # add module loads for modules provided + def add_modules(self, res, mods): + mbuf = "\n" + defmods = self.DEFMODS[res] if res in self.DEFMODS else self.DEFMODS['default'] + dmods = re.split(',', defmods) + for dmod in dmods: + ms = re.match(r'^(.+)/', dmod) + smod = ms.group(1) if ms else dmod + if smod in self.SWAPMODS: mbuf += "module unload {}\n".format(self.SWAPMODS[smod]) + mbuf += "module load {}\n".format(dmod) + if mods: + amods = re.split(',', mods) + for amod in amods: + if re.match(r'^/', amod): + mbuf += "module use {}\n".format(amod) + else: + ms = re.match(r'^(.+)/', amod) + smod = ms.group(1) if ms else amod + if smod in dmods: continue + if smod in self.SWAPMODS: mbuf += "module unload {}\n".format(self.SWAPMODS[smod]) + mbuf += "module load {}\n".format(amod) + return mbuf - dlibs = re.split(',', deflibs) - libbuf = "\n" - for dlib in dlibs: - libbuf += dlib + "\n" + # set virtual machine libraries + def set_vm_libs(self, res): + deflibs = self.DEFLIBS[res] if res in self.DEFLIBS else self.DEFLIBS['default'] + if not deflibs: return '' + dlibs = re.split(',', deflibs) + libbuf = "\n" + for dlib in dlibs: + libbuf += dlib + "\n" + return libbuf - return libbuf +# main function to excecute this script +def main(): + object = BashQsub() + object.read_parameters() + object.start_actions() + object.pgexit(0) -# # call main() to start program -# if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/gdex_ls.py b/src/rda_python_miscs/gdex_ls.py new file mode 100644 index 0000000..75d9c43 --- /dev/null +++ b/src/rda_python_miscs/gdex_ls.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title: gdexls +# Author: Zaihua Ji, zji@ucar.edu +# Date: 10/20/2020 +# 2025-03-10 transferred to package rda_python_miscs from +# https://github.com/NCAR/rda-utility-programs.git +# 2025-09-21 copied from rdals to gdexls +# Purpose: list files/directories in a local directory and show additional +# information recorded in GDEXDB if any +# +# Github: https://github.com/NCAR/rda-python-miscs.git +# +################################################################################## +# +import re +import os +import sys +import glob +from os import path as op +from rda_python_common import PgLOG +from rda_python_common import PgDBI +from rda_python_common import PgUtil +from rda_python_common import PgSplit + +# define some constants for gdexls actions +DIDX = 3 # description column index +CLMT = 500 # reformat list if count reach this limit +WIDTHS = [0, 0, 0] # WIDTHS for formated display +ALIGNS = [0, 1, 1] # alignment, 0 - left; 1 - right + +GDEXLS = { + 'd' : 0, # 1 to list directory information only + 'f' : 0, # 1 to list file information only + 'N' : 0, # 1 to list files unformatted + 'r' : 0, # 1 if recursive all + 'R' : 0, # > 0 to set recursive limit + 'D' : None, # specify delimiting symbols, default to ' ' +} + +LINFO = { + 'files' : [], + 'curdir' : None, + 'tpath' : None, + 'dhome' : None, + 'dsid' : None, + 'dcnt' : 0, + 'gcnt' : 0, + 'fcnt' : 0, + 'pcnt' : 0, + 'pgrecs' : [] +} + +# +# main function to run the application +# +def main(): + + PgDBI.view_dbinfo() + PgLOG.set_help_path(__file__) + PgLOG.PGLOG['LOGFILE'] = "gdexls.log" # set different log file + LINFO['curdir'] = get_real_path(os.getcwd()) + argv = sys.argv[1:] + PgLOG.pglog("gdexls {} ({})".format(' '.join(argv), LINFO['curdir'])) + option = defopt = 'l' + for arg in argv: + if re.match(r'-(h|-*help|\?)$', arg): PgLOG.show_usage("gdexls") + ms = re.match(r'-(\w)$', arg) + if ms: + option = ms.group(1) + if option not in GDEXLS: PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) + if 'dfNr'.find(option) > -1: + GDEXLS[option] = 1 + option = defopt + continue + if not option: PgLOG.pglog(arg + ": Value provided without option", PgLOG.LGEREX) + if option == 'l': + LINFO['files'].append(get_real_path(arg)) + defopt = None + else: + if option == 'R': + GDEXLS[option] = int(arg) + else: + GDEXLS[option] = arg + option = defopt + + if not LINFO['files']: + LINFO['files'] = sorted(glob.glob('*')) # view all files in current directory + if not LINFO['files']: + sys.stderr.write(LINFO['curdir'] + ": Empty directory\n") + PgLOG.pgexit(1) + + if not (GDEXLS['d'] or GDEXLS['f']): + GDEXLS['d'] = GDEXLS['f'] = 1 # list both directories and files as default + if not GDEXLS['D']: GDEXLS['D'] = '|' if GDEXLS['N'] else " " # default delimiter for no format display + if not GDEXLS['R'] and GDEXLS['r']: GDEXLS['R'] = 1000 + + display_top_list(LINFO['files']) # display or cache file/directory list + if LINFO['pcnt'] > 0: display_format_list() # if some left over + + if (LINFO['dcnt'] + LINFO['gcnt'] + LINFO['fcnt']) > 1: + msg = '' + if LINFO['dcnt'] > 0: + s = 's' if LINFO['dcnt'] > 1 else '' + msg += "{} Dataset{}".format(LINFO['dcnt'], s) + if LINFO['gcnt'] > 0: + s = 's' if LINFO['gcnt'] > 1 else '' + if msg: msg += " & " + msg += "{} Group{}".format(LINFO['gcnt'], s) + if LINFO['fcnt'] > 0: + s = 's' if LINFO['fcnt'] > 1 else '' + if msg: msg += " & " + msg += "{} File{}".format(LINFO['fcnt'], s) + print("Total {} displayed".format(msg)) + elif (LINFO['dcnt'] + LINFO['gcnt'] + LINFO['fcnt']) == 0: + sys.stderr.write((LINFO['tpath'] if LINFO['tpath'] else LINFO['curdir']) + ": No GDEX data information found\n") + PgLOG.pgexit(1) + + PgLOG.pgexit(0) + +# +# display the top level list +# +def display_top_list(files): + + for file in files: + + if not op.exists(file): + sys.stderr.write(file + ": NOT exists\n") + continue + + isdir = 1 if op.isdir(file) else 0 + display = 1 + if isdir and re.search(r'/$', file): + display = 0 # do not display the directory info if it is ended by '/' + file = re.sub(r'/$', '', file) + + if not re.match(r'^/', file): file = PgLOG.join_paths(LINFO['curdir'], file) + LINFO['tpath'] = (op.dirname(file) if display else file) + "/" + if display: display_line(file, isdir) + if isdir and (GDEXLS['R'] or not display or not LINFO['dsid']): + fs = sorted(glob.glob(file + "/*")) + display_list(fs, 1) + if LINFO['pcnt'] > CLMT: display_format_list() + +# +# recursively display directory/file info +# +def display_list(files, level): + + for file in files: + isdir = 1 if op.isdir(file) else 0 + display_line(file, isdir) + if isdir and level < GDEXLS['R']: + fs = sorted(glob.glob(file + "/*")) + display_list(fs, level+1) + if LINFO['pcnt'] > CLMT: display_format_list() + +# +# find dataset/group info; display or cache file +# +def display_line(file, isdir): + + getwfile = 1 + if LINFO['dsid'] and LINFO['dhome']: + ms = re.match(r'^{}/(.*)$'.format(LINFO['dhome']), file) + if ms: + wfile = ms.group(1) + getwfile = 0 + if getwfile: + LINFO['dsid'] = PgUtil.find_dataset_id(file) + if LINFO['dsid'] == None: return # skip for missing dsid + + pgrec = PgDBI.pgget("dataset", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns", "dsid = '{}'".format(LINFO['dsid']), PgLOG.LGEREX) + if not pgrec: return None + + LINFO['dhome'] = "{}/{}".format(PgLOG.PGLOG['DSDHOME'], LINFO['dsid']) + if LINFO['dhome'] == file: + file = re.sub(r'^{}'.format(LINFO['tpath']), '', file, 1) + if GDEXLS['d']: + title = pgrec['title'] if pgrec['title'] else '' + display_record(["D" + file, pgrec['ns'], str(pgrec['nc']), title]) + LINFO['dcnt'] += 1 + return + + ms = re.match(r'^{}/(.*)$'.format(LINFO['dhome']), file) + if ms: + wfile = ms.group(1) + else: + return + + if isdir: + if GDEXLS['d']: # check and display group info for directory + pgrec = PgDBI.pgget("dsgroup", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns", + "dsid = '{}' AND webpath = '{}'".format(LINFO['dsid'], wfile), PgLOG.LGEREX) + if pgrec: + file = re.sub(r'^{}'.format(LINFO['tpath']), '', file, 1) + title = pgrec['title'] if pgrec['title'] else '' + display_record(["G" + file, pgrec['ns'], str(pgrec['nc']), title]) + LINFO['gcnt'] += 1 + + elif GDEXLS['f']: # check and display file info + pgrec = PgSplit.pgget_wfile(LINFO['dsid'], "data_size, data_format, note", + "wfile = '{}'".format(wfile), PgLOG.LGEREX) + if pgrec: + if pgrec['note']: + note = re.sub(r'\n', ' ', pgrec['note']) # remove '\n' in note + else: + note = '' + file = re.sub(r'^{}'.format(LINFO['tpath']), '', file, 1) + display_record(["F" + file, pgrec['data_size'], pgrec['data_format'], note]) + LINFO['fcnt'] += 1 + +# +# display one file info +# +def display_record(disp): + + disp[1] = get_float_string(disp[1]) + if GDEXLS['N']: + print(GDEXLS['D'].join(disp)) + else: + LINFO['pgrecs'].append(disp) + LINFO['pcnt'] += 1 + for i in range(DIDX): + dlen = len(disp[i]) + if dlen > WIDTHS[i]: WIDTHS[i] = dlen + +# +# display cached list with format +# +def display_format_list(): + + for j in range(LINFO['pcnt']): + disp = LINFO['pgrecs'][j] + for i in range(DIDX): + if ALIGNS[i] == 1: + disp[i] = "{:>{}}".format(disp[i], WIDTHS[i]) + else: + disp[i] = "{:{}}".format(disp[i], WIDTHS[i]) + print(GDEXLS['D'].join(disp)) + + LINFO['pcnt'] = 0 + +# +# change size to floating point value with unit +# +def get_float_string(val): + + units = ['B', 'K', 'M', 'G', 'T', 'P'] + + idx = 0 + while val > 1000 and idx < 5: + val /= 1000 + idx += 1 + + if idx > 0: + return "{:.2f}{}".format(val, units[idx]) + else: + return "{}{}".format(val, units[idx]) + +# +# replace /gpfs to the path /glade +# +def get_real_path(path): + + if re.match(r'^/gpfs/u', path): + path = re.sub(r'^/gpfs', '/glade', path, 1) + elif re.match(r'^/gpfs/csfs1/', path): + path = re.sub(r'^/gpfs/csfs1', '/glade/campaign', path, 1) + + return op.realpath(path) + +# +# call main() to start program +# +if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/gdexls.py b/src/rda_python_miscs/gdexls.py index 75d9c43..e0d6ff1 100644 --- a/src/rda_python_miscs/gdexls.py +++ b/src/rda_python_miscs/gdexls.py @@ -1,7 +1,5 @@ #!/usr/bin/env python3 -# ################################################################################## -# # Title: gdexls # Author: Zaihua Ji, zji@ucar.edu # Date: 10/20/2020 @@ -10,270 +8,233 @@ # 2025-09-21 copied from rdals to gdexls # Purpose: list files/directories in a local directory and show additional # information recorded in GDEXDB if any -# # Github: https://github.com/NCAR/rda-python-miscs.git -# ################################################################################## -# import re import os import sys import glob from os import path as op -from rda_python_common import PgLOG -from rda_python_common import PgDBI -from rda_python_common import PgUtil -from rda_python_common import PgSplit - -# define some constants for gdexls actions -DIDX = 3 # description column index -CLMT = 500 # reformat list if count reach this limit -WIDTHS = [0, 0, 0] # WIDTHS for formated display -ALIGNS = [0, 1, 1] # alignment, 0 - left; 1 - right - -GDEXLS = { - 'd' : 0, # 1 to list directory information only - 'f' : 0, # 1 to list file information only - 'N' : 0, # 1 to list files unformatted - 'r' : 0, # 1 if recursive all - 'R' : 0, # > 0 to set recursive limit - 'D' : None, # specify delimiting symbols, default to ' ' -} - -LINFO = { - 'files' : [], - 'curdir' : None, - 'tpath' : None, - 'dhome' : None, - 'dsid' : None, - 'dcnt' : 0, - 'gcnt' : 0, - 'fcnt' : 0, - 'pcnt' : 0, - 'pgrecs' : [] -} - -# -# main function to run the application -# -def main(): - - PgDBI.view_dbinfo() - PgLOG.set_help_path(__file__) - PgLOG.PGLOG['LOGFILE'] = "gdexls.log" # set different log file - LINFO['curdir'] = get_real_path(os.getcwd()) - argv = sys.argv[1:] - PgLOG.pglog("gdexls {} ({})".format(' '.join(argv), LINFO['curdir'])) - option = defopt = 'l' - for arg in argv: - if re.match(r'-(h|-*help|\?)$', arg): PgLOG.show_usage("gdexls") - ms = re.match(r'-(\w)$', arg) - if ms: - option = ms.group(1) - if option not in GDEXLS: PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) - if 'dfNr'.find(option) > -1: - GDEXLS[option] = 1 - option = defopt - continue - if not option: PgLOG.pglog(arg + ": Value provided without option", PgLOG.LGEREX) - if option == 'l': - LINFO['files'].append(get_real_path(arg)) - defopt = None - else: - if option == 'R': - GDEXLS[option] = int(arg) +from rda_python_common.pg_split import PgSplit + +class GdexLs(PgSplit): + + def __init__(self): + super().__init__() + # define some constants for gdexls actions + self.DIDX = 3 # description column index + self.CLMT = 500 # reformat list if count reach this limit + self.WIDTHS = [0, 0, 0] # WIDTHS for formated display + self.ALIGNS = [0, 1, 1] # alignment, 0 - left; 1 - right + self.GDEXLS = { + 'd' : 0, # 1 to list directory information only + 'f' : 0, # 1 to list file information only + 'N' : 0, # 1 to list files unformatted + 'r' : 0, # 1 if recursive all + 'R' : 0, # > 0 to set recursive limit + 'D' : None, # specify delimiting symbols, default to ' ' + } + self.LINFO = { + 'files' : [], + 'curdir' : None, + 'tpath' : None, + 'dhome' : None, + 'dsid' : None, + 'dcnt' : 0, + 'gcnt' : 0, + 'fcnt' : 0, + 'pcnt' : 0, + 'pgrecs' : [] + } + + # function to read parameters + def read_parameters(self): + self.set_help_path(__file__) + self.PGLOG['LOGFILE'] = "gdexls.log" # set different log file + self.LINFO['curdir'] = self.get_real_path(os.getcwd()) + argv = sys.argv[1:] + self.pglog("gdexls {} ({})".format(' '.join(argv), self.LINFO['curdir'])) + option = defopt = 'l' + for arg in argv: + if re.match(r'-(h|-*help|\?)$', arg): self.show_usage("gdexls") + ms = re.match(r'-(\w)$', arg) + if ms: + option = ms.group(1) + if option not in self.GDEXLS: self.pglog(arg + ": Unknown Option", self.LGEREX) + if 'dfNr'.find(option) > -1: + self.GDEXLS[option] = 1 + option = defopt + continue + if not option: self.pglog(arg + ": Value provided without option", self.LGEREX) + if option == 'l': + self.LINFO['files'].append(self.get_real_path(arg)) + defopt = None else: - GDEXLS[option] = arg - option = defopt + if option == 'R': + self.GDEXLS[option] = int(arg) + else: + self.GDEXLS[option] = arg + option = defopt - if not LINFO['files']: - LINFO['files'] = sorted(glob.glob('*')) # view all files in current directory - if not LINFO['files']: - sys.stderr.write(LINFO['curdir'] + ": Empty directory\n") - PgLOG.pgexit(1) - - if not (GDEXLS['d'] or GDEXLS['f']): - GDEXLS['d'] = GDEXLS['f'] = 1 # list both directories and files as default - if not GDEXLS['D']: GDEXLS['D'] = '|' if GDEXLS['N'] else " " # default delimiter for no format display - if not GDEXLS['R'] and GDEXLS['r']: GDEXLS['R'] = 1000 - - display_top_list(LINFO['files']) # display or cache file/directory list - if LINFO['pcnt'] > 0: display_format_list() # if some left over - - if (LINFO['dcnt'] + LINFO['gcnt'] + LINFO['fcnt']) > 1: - msg = '' - if LINFO['dcnt'] > 0: - s = 's' if LINFO['dcnt'] > 1 else '' - msg += "{} Dataset{}".format(LINFO['dcnt'], s) - if LINFO['gcnt'] > 0: - s = 's' if LINFO['gcnt'] > 1 else '' - if msg: msg += " & " - msg += "{} Group{}".format(LINFO['gcnt'], s) - if LINFO['fcnt'] > 0: - s = 's' if LINFO['fcnt'] > 1 else '' - if msg: msg += " & " - msg += "{} File{}".format(LINFO['fcnt'], s) - print("Total {} displayed".format(msg)) - elif (LINFO['dcnt'] + LINFO['gcnt'] + LINFO['fcnt']) == 0: - sys.stderr.write((LINFO['tpath'] if LINFO['tpath'] else LINFO['curdir']) + ": No GDEX data information found\n") - PgLOG.pgexit(1) + # functio to start actions + def start_actions(self): + self.view_dbinfo() + if not self.LINFO['files']: + self.LINFO['files'] = sorted(glob.glob('*')) # view all files in current directory + if not self.LINFO['files']: + sys.stderr.write(self.LINFO['curdir'] + ": Empty directory\n") + self.pgexit(1) - PgLOG.pgexit(0) - -# -# display the top level list -# -def display_top_list(files): - - for file in files: - - if not op.exists(file): - sys.stderr.write(file + ": NOT exists\n") - continue - - isdir = 1 if op.isdir(file) else 0 - display = 1 - if isdir and re.search(r'/$', file): - display = 0 # do not display the directory info if it is ended by '/' - file = re.sub(r'/$', '', file) - - if not re.match(r'^/', file): file = PgLOG.join_paths(LINFO['curdir'], file) - LINFO['tpath'] = (op.dirname(file) if display else file) + "/" - if display: display_line(file, isdir) - if isdir and (GDEXLS['R'] or not display or not LINFO['dsid']): - fs = sorted(glob.glob(file + "/*")) - display_list(fs, 1) - if LINFO['pcnt'] > CLMT: display_format_list() - -# -# recursively display directory/file info -# -def display_list(files, level): - - for file in files: - isdir = 1 if op.isdir(file) else 0 - display_line(file, isdir) - if isdir and level < GDEXLS['R']: - fs = sorted(glob.glob(file + "/*")) - display_list(fs, level+1) - if LINFO['pcnt'] > CLMT: display_format_list() - -# -# find dataset/group info; display or cache file -# -def display_line(file, isdir): + if not (self.GDEXLS['d'] or self.GDEXLS['f']): + self.GDEXLS['d'] = self.GDEXLS['f'] = 1 # list both directories and files as default + if not self.GDEXLS['D']: self.GDEXLS['D'] = '|' if self.GDEXLS['N'] else " " # default delimiter for no format display + if not self.GDEXLS['R'] and self.GDEXLS['r']: self.GDEXLS['R'] = 1000 - getwfile = 1 - if LINFO['dsid'] and LINFO['dhome']: - ms = re.match(r'^{}/(.*)$'.format(LINFO['dhome']), file) - if ms: - wfile = ms.group(1) - getwfile = 0 - if getwfile: - LINFO['dsid'] = PgUtil.find_dataset_id(file) - if LINFO['dsid'] == None: return # skip for missing dsid - - pgrec = PgDBI.pgget("dataset", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns", "dsid = '{}'".format(LINFO['dsid']), PgLOG.LGEREX) - if not pgrec: return None - - LINFO['dhome'] = "{}/{}".format(PgLOG.PGLOG['DSDHOME'], LINFO['dsid']) - if LINFO['dhome'] == file: - file = re.sub(r'^{}'.format(LINFO['tpath']), '', file, 1) - if GDEXLS['d']: - title = pgrec['title'] if pgrec['title'] else '' - display_record(["D" + file, pgrec['ns'], str(pgrec['nc']), title]) - LINFO['dcnt'] += 1 - return - - ms = re.match(r'^{}/(.*)$'.format(LINFO['dhome']), file) - if ms: - wfile = ms.group(1) - else: - return - - if isdir: - if GDEXLS['d']: # check and display group info for directory - pgrec = PgDBI.pgget("dsgroup", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns", - "dsid = '{}' AND webpath = '{}'".format(LINFO['dsid'], wfile), PgLOG.LGEREX) - if pgrec: - file = re.sub(r'^{}'.format(LINFO['tpath']), '', file, 1) - title = pgrec['title'] if pgrec['title'] else '' - display_record(["G" + file, pgrec['ns'], str(pgrec['nc']), title]) - LINFO['gcnt'] += 1 - - elif GDEXLS['f']: # check and display file info - pgrec = PgSplit.pgget_wfile(LINFO['dsid'], "data_size, data_format, note", - "wfile = '{}'".format(wfile), PgLOG.LGEREX) - if pgrec: - if pgrec['note']: - note = re.sub(r'\n', ' ', pgrec['note']) # remove '\n' in note + self.display_top_list(self.LINFO['files']) # display or cache file/directory list + if self.LINFO['pcnt'] > 0: self.display_format_list() # if some left over + if (self.LINFO['dcnt'] + self.LINFO['gcnt'] + self.LINFO['fcnt']) > 1: + msg = '' + if self.LINFO['dcnt'] > 0: + s = 's' if self.LINFO['dcnt'] > 1 else '' + msg += "{} Dataset{}".format(self.LINFO['dcnt'], s) + if self.LINFO['gcnt'] > 0: + s = 's' if self.LINFO['gcnt'] > 1 else '' + if msg: msg += " & " + msg += "{} Group{}".format(self.LINFO['gcnt'], s) + if self.LINFO['fcnt'] > 0: + s = 's' if self.LINFO['fcnt'] > 1 else '' + if msg: msg += " & " + msg += "{} File{}".format(self.LINFO['fcnt'], s) + print("Total {} displayed".format(msg)) + elif (self.LINFO['dcnt'] + self.LINFO['gcnt'] + self.LINFO['fcnt']) == 0: + sys.stderr.write((self.LINFO['tpath'] if self.LINFO['tpath'] else self.LINFO['curdir']) + ": No GDEX data information found\n") + self.pgexit(1) + + # display the top level list + def display_top_list(self, files): + for file in files: + if not op.exists(file): + sys.stderr.write(file + ": NOT exists\n") + continue + isdir = 1 if op.isdir(file) else 0 + display = 1 + if isdir and re.search(r'/$', file): + display = 0 # do not display the directory info if it is ended by '/' + file = re.sub(r'/$', '', file) + if not re.match(r'^/', file): file = self.join_paths(self.LINFO['curdir'], file) + self.LINFO['tpath'] = (op.dirname(file) if display else file) + "/" + if display: self.display_line(file, isdir) + if isdir and (self.GDEXLS['R'] or not display or not self.LINFO['dsid']): + fs = sorted(glob.glob(file + "/*")) + self.display_list(fs, 1) + if self.LINFO['pcnt'] > self.CLMT: self.display_format_list() + + # recursively display directory/file info + def display_list(self, files, level): + for file in files: + isdir = 1 if op.isdir(file) else 0 + self.display_line(file, isdir) + if isdir and level < self.GDEXLS['R']: + fs = sorted(glob.glob(file + "/*")) + self.display_list(fs, level+1) + if self.LINFO['pcnt'] > self.CLMT: self.display_format_list() + + # find dataset/group info; display or cache file + def display_line(self, file, isdir): + getwfile = 1 + if self.LINFO['dsid'] and self.LINFO['dhome']: + ms = re.match(r'^{}/(.*)$'.format(self.LINFO['dhome']), file) + if ms: + wfile = ms.group(1) + getwfile = 0 + if getwfile: + self.LINFO['dsid'] = self.find_dataset_id(file) + if self.LINFO['dsid'] == None: return # skip for missing dsid + pgrec = self.pgget("dataset", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns", "dsid = '{}'".format(self.LINFO['dsid']), self.LGEREX) + if not pgrec: return None + self.LINFO['dhome'] = "{}/{}".format(self.PGLOG['DSDHOME'], self.LINFO['dsid']) + if self.LINFO['dhome'] == file: + file = re.sub(r'^{}'.format(self.LINFO['tpath']), '', file, 1) + if self.GDEXLS['d']: + title = pgrec['title'] if pgrec['title'] else '' + self.display_record(["D" + file, pgrec['ns'], str(pgrec['nc']), title]) + self.LINFO['dcnt'] += 1 + return + ms = re.match(r'^{}/(.*)$'.format(self.LINFO['dhome']), file) + if ms: + wfile = ms.group(1) else: - note = '' - file = re.sub(r'^{}'.format(LINFO['tpath']), '', file, 1) - display_record(["F" + file, pgrec['data_size'], pgrec['data_format'], note]) - LINFO['fcnt'] += 1 - -# -# display one file info -# -def display_record(disp): - - disp[1] = get_float_string(disp[1]) - if GDEXLS['N']: - print(GDEXLS['D'].join(disp)) - else: - LINFO['pgrecs'].append(disp) - LINFO['pcnt'] += 1 - for i in range(DIDX): - dlen = len(disp[i]) - if dlen > WIDTHS[i]: WIDTHS[i] = dlen - -# -# display cached list with format -# -def display_format_list(): - - for j in range(LINFO['pcnt']): - disp = LINFO['pgrecs'][j] - for i in range(DIDX): - if ALIGNS[i] == 1: - disp[i] = "{:>{}}".format(disp[i], WIDTHS[i]) - else: - disp[i] = "{:{}}".format(disp[i], WIDTHS[i]) - print(GDEXLS['D'].join(disp)) - - LINFO['pcnt'] = 0 - -# -# change size to floating point value with unit -# -def get_float_string(val): - - units = ['B', 'K', 'M', 'G', 'T', 'P'] - - idx = 0 - while val > 1000 and idx < 5: - val /= 1000 - idx += 1 - - if idx > 0: - return "{:.2f}{}".format(val, units[idx]) - else: - return "{}{}".format(val, units[idx]) - -# -# replace /gpfs to the path /glade -# -def get_real_path(path): - - if re.match(r'^/gpfs/u', path): - path = re.sub(r'^/gpfs', '/glade', path, 1) - elif re.match(r'^/gpfs/csfs1/', path): - path = re.sub(r'^/gpfs/csfs1', '/glade/campaign', path, 1) - - return op.realpath(path) + return + if isdir: + if self.GDEXLS['d']: # check and display group info for directory + pgrec = self.pgget("dsgroup", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns", + "dsid = '{}' AND webpath = '{}'".format(self.LINFO['dsid'], wfile), self.LGEREX) + if pgrec: + file = re.sub(r'^{}'.format(self.LINFO['tpath']), '', file, 1) + title = pgrec['title'] if pgrec['title'] else '' + self.display_record(["G" + file, pgrec['ns'], str(pgrec['nc']), title]) + self.LINFO['gcnt'] += 1 + elif self.GDEXLS['f']: # check and display file info + pgrec = self.pgget_wfile(self.LINFO['dsid'], "data_size, data_format, note", + "wfile = '{}'".format(wfile), self.LGEREX) + if pgrec: + note = re.sub(r'\n', ' ', pgrec['note']) if pgrec['note'] else '' + file = re.sub(r'^{}'.format(self.LINFO['tpath']), '', file, 1) + self.display_record(["F" + file, pgrec['data_size'], pgrec['data_format'], note]) + self.LINFO['fcnt'] += 1 + + # display one file info + def display_record(self, disp): + disp[1] = self.get_float_string(disp[1]) + if self.GDEXLS['N']: + print(self.GDEXLS['D'].join(disp)) + else: + self.LINFO['pgrecs'].append(disp) + self.LINFO['pcnt'] += 1 + for i in range(self.DIDX): + dlen = len(disp[i]) + if dlen > self.WIDTHS[i]: self.WIDTHS[i] = dlen + + # display cached list with format + def display_format_list(self): + for j in range(self.LINFO['pcnt']): + disp = self.LINFO['pgrecs'][j] + for i in range(self.DIDX): + if self.ALIGNS[i] == 1: + disp[i] = "{:>{}}".format(disp[i], self.WIDTHS[i]) + else: + disp[i] = "{:{}}".format(disp[i], self.WIDTHS[i]) + print(self.GDEXLS['D'].join(disp)) + self.LINFO['pcnt'] = 0 + + # change size to floating point value with unit + @staticmethod + def get_float_string(val): + units = ['B', 'K', 'M', 'G', 'T', 'P'] + idx = 0 + while val > 1000 and idx < 5: + val /= 1000 + idx += 1 + if idx > 0: + return "{:.2f}{}".format(val, units[idx]) + else: + return "{}{}".format(val, units[idx]) + + # replace /gpfs to the path /glade + @staticmethod + def get_real_path(path): + if re.match(r'^/gpfs/u', path): + path = re.sub(r'^/gpfs', '/glade', path, 1) + elif re.match(r'^/gpfs/csfs1/', path): + path = re.sub(r'^/gpfs/csfs1', '/glade/campaign', path, 1) + return op.realpath(path) + +# main function to excecute this script +def main(): + object = GdexLs() + object.read_parameters() + object.start_actions() + object.pgexit(0) -# # call main() to start program -# if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/gdexls_standalone.py b/src/rda_python_miscs/gdexls_standalone.py index 4ed290d..b46a370 100644 --- a/src/rda_python_miscs/gdexls_standalone.py +++ b/src/rda_python_miscs/gdexls_standalone.py @@ -1,9 +1,9 @@ -#!/glade/work/zji/conda-envs/pg-rda/bin/python +#!/glade/work/zji/conda-envs/pg-gdex/bin/python # -*- coding: utf-8 -*- # 2025-09-23, zji@ucar.edu, created for a standalone version of gdexls import re import sys -pgpath = '/glade/work/zji/conda-envs/pg-rda/lib/python3.10/site-packages' +pgpath = '/glade/work/zji/conda-envs/pg-gdex/lib/python3.12/site-packages' if pgpath not in sys.path: sys.path.insert(0, pgpath) from rda_python_miscs.gdexls import main diff --git a/src/rda_python_miscs/pg_wget.py b/src/rda_python_miscs/pg_wget.py new file mode 100644 index 0000000..c254ecb --- /dev/null +++ b/src/rda_python_miscs/pg_wget.py @@ -0,0 +1,186 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title : pgwget +# Author : Zaihua Ji, zji@ucar.edu +# Date : 12/02/2020 +# 2025-03-10 transferred to package rda_python_miscs from +# https://github.com/NCAR/rda-utility-programs.git +# Purpose : wrapper to wget to get a file with wildcard in name +# +# Github: https://github.com/NCAR/rda-python-miscs.git +# +################################################################################## + +import sys +import re +from rda_python_common import PgLOG +from rda_python_common import PgUtil +from rda_python_common import PgFile + +OPTIONS = { + 'OP' : "-np -nH -nd -m -e robots=off --no-check-certificate", + 'UL' : None, + 'RN' : None, + 'FN' : None, + 'FC' : 1, + 'SM' : 0, + 'MC' : 0, + 'CN' : 0, + 'CR' : 0, + 'EX' : None, + 'JC' : 'cat' +} + +# +# main function to excecute this script +# +def main(): + + option = None + JCS = ['cat', 'tar', 'first', 'last'] + options = '|'.join(OPTIONS) + argv = sys.argv[1:] + PgLOG.PGLOG['LOGFILE'] = "pgwget.log" + + for arg in argv: + if arg == "-b": + PgLOG.PGLOG['BCKGRND'] = 1 + option = None + continue + ms = re.match(r'^-({})$'.format(options), arg, re.I) + if ms: + option = ms.group(1).upper() + if re.match(r'^(CN|CR|SM)$', option): + OPTIONS[option] = 1 + option = None + continue + if re.match(r'^-.*$', arg): PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) + if not option: PgLOG.pglog(arg + ": Value passed in without leading option", PgLOG.LGEREX) + + if option == 'JC' and arg not in JCS: + PgLOG.pglog(arg + ": Joining Command must be one of {}".format(JCS), PgLOG.LGEREX) + OPTIONS[option] = int(arg) if re.match(r'^(FC|MC)$', option) else arg + option = None + + if not (OPTIONS['UL'] and OPTIONS['RN']): + print("Usage: pgwget [-CN] [-CR] [-FC FileCount] [-JC JoinCommand] [-MC MinFileCount] [-FN FileName] -UL WebURL -RN RootFileName [-EX FileNameExtension]") + print(" Provide at least WebURL and RootFileName to wget file(s)") + print(" Option -CN - check new file if presents") + print(" Option -CR - clean the downloaded remote file(s) if presents") + print(" Option -FC - number of files to be valid download; defaults to 1") + print(" Option -JC - file joining command, it defaults to cat, could be tar, or last/first to choose the last/first one") + print(" Option -SM - Show wget dumping message; defaults to False") + print(" Option -MC - minimal number of files to be valid download; defaults to -FC") + print(" Option -FN - file name to be used if successful download; defaults to RootFileName.FileNameExtension") + print(" Option -OP - options used by wget, defaults to '-np -nH -nd -m -e robots=off'") + print(" Option -UL - (mandatory) WebURL with path") + print(" Option -RN - (mandatory) the root portion of the remote file name to be downloaded") + print(" Option -EX - file name extension to be used.") + sys.exit(0) + + PgLOG.cmdlog("pgwget " + ' '.join(argv)) + if not OPTIONS['MC']: OPTIONS['MC'] = OPTIONS['FC'] + if not OPTIONS['SM']: OPTIONS['OP'] += ' -q' + download_wildcard_files() + PgLOG.cmdlog() + + sys.exit(0) + +# +# download one or multiple remote files via wget; concat files to a single one if multiple +# +def download_wildcard_files(): + + deleted = 0 + if OPTIONS['FN']: + dfile = OPTIONS['FN'] + else: + dfile = OPTIONS['RN'] + if OPTIONS['EX']: dfile += "." + OPTIONS['EX'] + + dinfo = PgFile.check_local_file(dfile, 1) + if dinfo and not OPTIONS['CN']: + return PgLOG.pglog("{}: file dowloaded already ({} {})".format(dfile, dinfo['date_modified'], dinfo['time_modified']), PgLOG.LOGWRN) + + build = 0 if dinfo else 1 + wfile = OPTIONS['RN'] + "*" + if OPTIONS['EX']: wfile += "." + OPTIONS['EX'] + dlist = PgFile.local_glob(wfile, 1) + if dfile in dlist and dinfo: + del dlist[dfile] + deleted = 1 + dcnt = len(dlist) + + if OPTIONS['CN'] or dcnt < OPTIONS['FC']: + cmd = "wget {} {} -A '{}'".format(OPTIONS['OP'], OPTIONS['UL'], wfile) + PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 7) + nlist = PgFile.local_glob(wfile, 1) + if dfile in nlist and dinfo: + del nlist[dfile] + deleted = 1 + ncnt = len(nlist) + else: + nlist = dlist + ncnt = dcnt + + if ncnt == 0: + if deleted: + return PgLOG.pglog("{}: File dowloaded on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN) + else: + return PgLOG.pglog("{}: NO file to dowload on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN) + elif ncnt < OPTIONS['MC']: + return PgLOG.pglog("{}: NOT ready, only {} of {} files dowloaded".format(dfile, ncnt, OPTIONS['MC']), PgLOG.LOGWRN) + + rfiles = sorted(nlist) + size = skip = 0 + for i in range(ncnt): + rfile = rfiles[i] + rinfo = nlist[rfile] + size += rinfo['data_size'] + if dinfo and PgUtil.cmptime(dinfo['date_modified'], dinfo['time_modified'], rinfo['date_modified'], rinfo['time_modified']) >= 0: + PgLOG.pglog("{}: Not newer than {}".format(rfile, dfile), PgLOG.LOGWRN) + skip += 1 + elif rfile not in dlist: + build = 1 + elif PgFile.compare_file_info(dlist[rfile], rinfo) > 0: + PgLOG.pglog("{}: Newer file dowloaded from {}".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN) + build = 1 + else: + PgLOG.pglog("{}: No newer file found on ".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN) + + if skip == ncnt: return 0 + + if not (build or size == dinfo['data_size']): build = 1 + if not build: return PgLOG.pglog(dfile + ": Use existing file", PgLOG.LOGWRN) + + if OPTIONS['JC'] == 'cat': + for i in range(ncnt): + rfile = rfiles[i] + if i == 0: + if dfile != rfile: PgFile.local_copy_local(dfile, rfile, PgLOG.LOGWRN) + else: + PgLOG.pgsystem("cat {} >> {}".format(rfile, dfile), PgLOG.LOGWRN, 5) + if OPTIONS['CR'] and dfile != rfile: PgLOG.pgsystem("rm -f " + rfile, PgLOG.LOGWRN, 5) + elif OPTIONS['JC'] == 'tar': + topt = 'c' + for i in range(ncnt): + rfile = rfiles[i] + PgLOG.pgsystem("tar -{}vf {} {}".format(topt, dfile, rfile), PgLOG.LOGWRN, 5) + topt = 'u' + if OPTIONS['CR']: PgLOG.pgsystem("rm -f " + rfile, PgLOG.LOGWRN, 5) + else: + didx = 0 if OPTIONS['JC'] == 'first' else (ncnt - 1) + PgLOG.pgsystem("mv {} {}".format(rfiles[didx], dfile), PgLOG.LOGWRN, 5) + if OPTIONS['CR']: + for i in range(ncnt): + if i == didx: continue + PgLOG.pgsystem("rm -f " + rfiles[i], PgLOG.LOGWRN, 5) + + return 1 + +# +# call main() to start program +# +if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/pgwget.py b/src/rda_python_miscs/pgwget.py index c254ecb..3775c7c 100644 --- a/src/rda_python_miscs/pgwget.py +++ b/src/rda_python_miscs/pgwget.py @@ -1,186 +1,171 @@ #!/usr/bin/env python3 -# ################################################################################## -# # Title : pgwget # Author : Zaihua Ji, zji@ucar.edu # Date : 12/02/2020 -# 2025-03-10 transferred to package rda_python_miscs from -# https://github.com/NCAR/rda-utility-programs.git +# 2025-03-10 transferred to package rda_python_miscs from +# https://github.com/NCAR/rda-utility-programs.git +# 2026-01-05 convert to class PgWget # Purpose : wrapper to wget to get a file with wildcard in name -# # Github: https://github.com/NCAR/rda-python-miscs.git -# ################################################################################## - import sys import re -from rda_python_common import PgLOG -from rda_python_common import PgUtil -from rda_python_common import PgFile - -OPTIONS = { - 'OP' : "-np -nH -nd -m -e robots=off --no-check-certificate", - 'UL' : None, - 'RN' : None, - 'FN' : None, - 'FC' : 1, - 'SM' : 0, - 'MC' : 0, - 'CN' : 0, - 'CR' : 0, - 'EX' : None, - 'JC' : 'cat' -} - -# -# main function to excecute this script -# -def main(): - - option = None - JCS = ['cat', 'tar', 'first', 'last'] - options = '|'.join(OPTIONS) - argv = sys.argv[1:] - PgLOG.PGLOG['LOGFILE'] = "pgwget.log" - - for arg in argv: - if arg == "-b": - PgLOG.PGLOG['BCKGRND'] = 1 - option = None - continue - ms = re.match(r'^-({})$'.format(options), arg, re.I) - if ms: - option = ms.group(1).upper() - if re.match(r'^(CN|CR|SM)$', option): - OPTIONS[option] = 1 - option = None - continue - if re.match(r'^-.*$', arg): PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) - if not option: PgLOG.pglog(arg + ": Value passed in without leading option", PgLOG.LGEREX) - - if option == 'JC' and arg not in JCS: - PgLOG.pglog(arg + ": Joining Command must be one of {}".format(JCS), PgLOG.LGEREX) - OPTIONS[option] = int(arg) if re.match(r'^(FC|MC)$', option) else arg +from rda_python_common.pg_file import PgFile + +class PgWget(PgFile): + + def __init__(self): + super().__init__() + self.OPTIONS = { + 'OP' : "-np -nH -nd -m -e robots=off --no-check-certificate", + 'UL' : None, + 'RN' : None, + 'FN' : None, + 'FC' : 1, + 'SM' : 0, + 'MC' : 0, + 'CN' : 0, + 'CR' : 0, + 'EX' : None, + 'JC' : 'cat' + } + + # function to read parameters + def read_parameters(self): option = None - - if not (OPTIONS['UL'] and OPTIONS['RN']): - print("Usage: pgwget [-CN] [-CR] [-FC FileCount] [-JC JoinCommand] [-MC MinFileCount] [-FN FileName] -UL WebURL -RN RootFileName [-EX FileNameExtension]") - print(" Provide at least WebURL and RootFileName to wget file(s)") - print(" Option -CN - check new file if presents") - print(" Option -CR - clean the downloaded remote file(s) if presents") - print(" Option -FC - number of files to be valid download; defaults to 1") - print(" Option -JC - file joining command, it defaults to cat, could be tar, or last/first to choose the last/first one") - print(" Option -SM - Show wget dumping message; defaults to False") - print(" Option -MC - minimal number of files to be valid download; defaults to -FC") - print(" Option -FN - file name to be used if successful download; defaults to RootFileName.FileNameExtension") - print(" Option -OP - options used by wget, defaults to '-np -nH -nd -m -e robots=off'") - print(" Option -UL - (mandatory) WebURL with path") - print(" Option -RN - (mandatory) the root portion of the remote file name to be downloaded") - print(" Option -EX - file name extension to be used.") - sys.exit(0) - - PgLOG.cmdlog("pgwget " + ' '.join(argv)) - if not OPTIONS['MC']: OPTIONS['MC'] = OPTIONS['FC'] - if not OPTIONS['SM']: OPTIONS['OP'] += ' -q' - download_wildcard_files() - PgLOG.cmdlog() - - sys.exit(0) - -# -# download one or multiple remote files via wget; concat files to a single one if multiple -# -def download_wildcard_files(): - - deleted = 0 - if OPTIONS['FN']: - dfile = OPTIONS['FN'] - else: - dfile = OPTIONS['RN'] - if OPTIONS['EX']: dfile += "." + OPTIONS['EX'] - - dinfo = PgFile.check_local_file(dfile, 1) - if dinfo and not OPTIONS['CN']: - return PgLOG.pglog("{}: file dowloaded already ({} {})".format(dfile, dinfo['date_modified'], dinfo['time_modified']), PgLOG.LOGWRN) - - build = 0 if dinfo else 1 - wfile = OPTIONS['RN'] + "*" - if OPTIONS['EX']: wfile += "." + OPTIONS['EX'] - dlist = PgFile.local_glob(wfile, 1) - if dfile in dlist and dinfo: - del dlist[dfile] - deleted = 1 - dcnt = len(dlist) - - if OPTIONS['CN'] or dcnt < OPTIONS['FC']: - cmd = "wget {} {} -A '{}'".format(OPTIONS['OP'], OPTIONS['UL'], wfile) - PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 7) - nlist = PgFile.local_glob(wfile, 1) - if dfile in nlist and dinfo: - del nlist[dfile] - deleted = 1 - ncnt = len(nlist) - else: - nlist = dlist - ncnt = dcnt - - if ncnt == 0: - if deleted: - return PgLOG.pglog("{}: File dowloaded on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN) + JCS = ['cat', 'tar', 'first', 'last'] + options = '|'.join(self.OPTIONS) + argv = sys.argv[1:] + self.PGLOG['LOGFILE'] = "pgwget.log" + for arg in argv: + if arg == "-b": + self.PGLOG['BCKGRND'] = 1 + option = None + continue + ms = re.match(r'^-({})$'.format(options), arg, re.I) + if ms: + option = ms.group(1).upper() + if re.match(r'^(CN|CR|SM)$', option): + self.OPTIONS[option] = 1 + option = None + continue + if re.match(r'^-.*$', arg): self.pglog(arg + ": Unknown Option", self.LGEREX) + if not option: self.pglog(arg + ": Value passed in without leading option", self.LGEREX) + if option == 'JC' and arg not in JCS: + self.pglog(arg + ": Joining Command must be one of {}".format(JCS), self.LGEREX) + self.OPTIONS[option] = int(arg) if re.match(r'^(FC|MC)$', option) else arg + option = None + if not (self.OPTIONS['UL'] and self.OPTIONS['RN']): + print("Usage: pgwget [-CN] [-CR] [-FC FileCount] [-JC JoinCommand] [-MC MinFileCount] [-FN FileName] -UL WebURL -RN RootFileName [-EX FileNameExtension]") + print(" Provide at least WebURL and RootFileName to wget file(s)") + print(" Option -CN - check new file if presents") + print(" Option -CR - clean the downloaded remote file(s) if presents") + print(" Option -FC - number of files to be valid download; defaults to 1") + print(" Option -JC - file joining command, it defaults to cat, could be tar, or last/first to choose the last/first one") + print(" Option -SM - Show wget dumping message; defaults to False") + print(" Option -MC - minimal number of files to be valid download; defaults to -FC") + print(" Option -FN - file name to be used if successful download; defaults to RootFileName.FileNameExtension") + print(" Option -OP - options used by wget, defaults to '-np -nH -nd -m -e robots=off'") + print(" Option -UL - (mandatory) WebURL with path") + print(" Option -RN - (mandatory) the root portion of the remote file name to be downloaded") + print(" Option -EX - file name extension to be used.") + sys.exit(0) + self.cmdlog("pgwget " + ' '.join(argv)) + if not self.OPTIONS['MC']: self.OPTIONS['MC'] = self.OPTIONS['FC'] + if not self.OPTIONS['SM']: self.OPTIONS['OP'] += ' -q' + + # function to start actions + def start_actions(self): + self.download_wildcard_files() + self.cmdlog() + + # download one or multiple remote files via wget; concat files to a single one if multiple + def download_wildcard_files(self): + deleted = 0 + if self.OPTIONS['FN']: + dfile = self.OPTIONS['FN'] else: - return PgLOG.pglog("{}: NO file to dowload on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN) - elif ncnt < OPTIONS['MC']: - return PgLOG.pglog("{}: NOT ready, only {} of {} files dowloaded".format(dfile, ncnt, OPTIONS['MC']), PgLOG.LOGWRN) - - rfiles = sorted(nlist) - size = skip = 0 - for i in range(ncnt): - rfile = rfiles[i] - rinfo = nlist[rfile] - size += rinfo['data_size'] - if dinfo and PgUtil.cmptime(dinfo['date_modified'], dinfo['time_modified'], rinfo['date_modified'], rinfo['time_modified']) >= 0: - PgLOG.pglog("{}: Not newer than {}".format(rfile, dfile), PgLOG.LOGWRN) - skip += 1 - elif rfile not in dlist: - build = 1 - elif PgFile.compare_file_info(dlist[rfile], rinfo) > 0: - PgLOG.pglog("{}: Newer file dowloaded from {}".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN) - build = 1 + dfile = self.OPTIONS['RN'] + if self.OPTIONS['EX']: dfile += "." + self.OPTIONS['EX'] + dinfo = self.check_local_file(dfile, 1) + if dinfo and not self.OPTIONS['CN']: + return self.pglog("{}: file dowloaded already ({} {})".format(dfile, dinfo['date_modified'], dinfo['time_modified']), self.LOGWRN) + build = 0 if dinfo else 1 + wfile = self.OPTIONS['RN'] + "*" + if self.OPTIONS['EX']: wfile += "." + self.OPTIONS['EX'] + dlist = self.local_glob(wfile, 1) + if dfile in dlist and dinfo: + del dlist[dfile] + deleted = 1 + dcnt = len(dlist) + if self.OPTIONS['CN'] or dcnt < self.OPTIONS['FC']: + cmd = "wget {} {} -A '{}'".format(self.OPTIONS['OP'], self.OPTIONS['UL'], wfile) + self.pgsystem(cmd, self.LOGWRN, 7) + nlist = self.local_glob(wfile, 1) + if dfile in nlist and dinfo: + del nlist[dfile] + deleted = 1 + ncnt = len(nlist) else: - PgLOG.pglog("{}: No newer file found on ".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN) - - if skip == ncnt: return 0 - - if not (build or size == dinfo['data_size']): build = 1 - if not build: return PgLOG.pglog(dfile + ": Use existing file", PgLOG.LOGWRN) - - if OPTIONS['JC'] == 'cat': - for i in range(ncnt): - rfile = rfiles[i] - if i == 0: - if dfile != rfile: PgFile.local_copy_local(dfile, rfile, PgLOG.LOGWRN) + nlist = dlist + ncnt = dcnt + if ncnt == 0: + if deleted: + return self.pglog("{}: File dowloaded on {}".format(dfile, self.OPTIONS['UL']), self.LOGWRN) else: - PgLOG.pgsystem("cat {} >> {}".format(rfile, dfile), PgLOG.LOGWRN, 5) - if OPTIONS['CR'] and dfile != rfile: PgLOG.pgsystem("rm -f " + rfile, PgLOG.LOGWRN, 5) - elif OPTIONS['JC'] == 'tar': - topt = 'c' + return self.pglog("{}: NO file to dowload on {}".format(dfile, self.OPTIONS['UL']), self.LOGWRN) + elif ncnt < self.OPTIONS['MC']: + return self.pglog("{}: NOT ready, only {} of {} files dowloaded".format(dfile, ncnt, self.OPTIONS['MC']), self.LOGWRN) + rfiles = sorted(nlist) + size = skip = 0 for i in range(ncnt): rfile = rfiles[i] - PgLOG.pgsystem("tar -{}vf {} {}".format(topt, dfile, rfile), PgLOG.LOGWRN, 5) - topt = 'u' - if OPTIONS['CR']: PgLOG.pgsystem("rm -f " + rfile, PgLOG.LOGWRN, 5) - else: - didx = 0 if OPTIONS['JC'] == 'first' else (ncnt - 1) - PgLOG.pgsystem("mv {} {}".format(rfiles[didx], dfile), PgLOG.LOGWRN, 5) - if OPTIONS['CR']: + rinfo = nlist[rfile] + size += rinfo['data_size'] + if dinfo and self.cmptime(dinfo['date_modified'], dinfo['time_modified'], rinfo['date_modified'], rinfo['time_modified']) >= 0: + self.pglog("{}: Not newer than {}".format(rfile, dfile), self.LOGWRN) + skip += 1 + elif rfile not in dlist: + build = 1 + elif self.compare_file_info(dlist[rfile], rinfo) > 0: + self.pglog("{}: Newer file dowloaded from {}".format(rfile, self.OPTIONS['UL']), self.LOGWRN) + build = 1 + else: + self.pglog("{}: No newer file found on ".format(rfile, self.OPTIONS['UL']), self.LOGWRN) + if skip == ncnt: return 0 + if not (build or size == dinfo['data_size']): build = 1 + if not build: return self.pglog(dfile + ": Use existing file", self.LOGWRN) + if self.OPTIONS['JC'] == 'cat': for i in range(ncnt): - if i == didx: continue - PgLOG.pgsystem("rm -f " + rfiles[i], PgLOG.LOGWRN, 5) + rfile = rfiles[i] + if i == 0: + if dfile != rfile: self.local_copy_local(dfile, rfile, self.LOGWRN) + else: + self.pgsystem("cat {} >> {}".format(rfile, dfile), self.LOGWRN, 5) + if self.OPTIONS['CR'] and dfile != rfile: self.pgsystem("rm -f " + rfile, self.LOGWRN, 5) + elif self.OPTIONS['JC'] == 'tar': + topt = 'c' + for i in range(ncnt): + rfile = rfiles[i] + self.pgsystem("tar -{}vf {} {}".format(topt, dfile, rfile), self.LOGWRN, 5) + topt = 'u' + if self.OPTIONS['CR']: self.pgsystem("rm -f " + rfile, self.LOGWRN, 5) + else: + didx = 0 if self.OPTIONS['JC'] == 'first' else (ncnt - 1) + self.pgsystem("mv {} {}".format(rfiles[didx], dfile), self.LOGWRN, 5) + if self.OPTIONS['CR']: + for i in range(ncnt): + if i == didx: continue + self.pgsystem("rm -f " + rfiles[i], self.LOGWRN, 5) + return 1 - return 1 +# main function to excecute this script +def main(): + object = PgWget() + object.read_parameters() + object.start_actions() + object.pgexit(0) -# # call main() to start program -# if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/rda_cp.py b/src/rda_python_miscs/rda_cp.py new file mode 100644 index 0000000..7ae5b4f --- /dev/null +++ b/src/rda_python_miscs/rda_cp.py @@ -0,0 +1,212 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title: rdacp +# Author: Zaihua Ji, zji@ucar.edu +# Date: 10/24/2020 +# 2025-03-10 transferred to package rda_python_miscs from +# https://github.com/NCAR/rda-utility-programs.git +# Purpose: copy files locally and remotely by 'rdadata' +# +# Github: https://github.com/NCAR/rda-python-miscs.git +# +################################################################################## +# +import re +import os +import sys +from os import path as op +from rda_python_common import PgLOG +from rda_python_common import PgUtil +from rda_python_common import PgDBI +from rda_python_common import PgFile + +RDACP = { + 'fh' : None, # from host name, default to localhost + 'th' : None, # to host name, defaul to localhost + 'fb' : None, # from bucket name for a from file in Object Store + 'tb' : None, # to bucket name for a to file in Object Store + 'fp' : None, # from Globus endpoint + 'tp' : None, # to Globus endpoint + 'f' : [], # from file names + 't' : None, # to file name + 'r' : 0, # 1 if recursive all + 'R' : 0, # > 0 to set recursive limit + 'F' : 0o664, # to file mode, default to 664 + 'D' : 0o775, # to directory mode, default to 775 +} + +CINFO = { + 'tcnt' : 0, + 'htcnt' : 0, + 'cpflag' : 0, # 1 file only, 2 directory only, 3 both + 'cpstr' : ['', 'Files', 'Directories', 'Files/Directories'], + 'fpath' : None, + 'tpath' : None, + 'fhost' : '', + 'thost' : '', + 'curdir' : os.getcwd() +} + +# +# main function to run the application +# +def main(): + + dohelp = 0 + argv = sys.argv[1:] + PgDBI.dssdb_dbname() + PgLOG.set_suid(PgLOG.PGLOG['EUID']) + PgLOG.set_help_path(__file__) + PgLOG.PGLOG['LOGFILE'] = "rdacp.log" # set different log file + PgLOG.cmdlog("rdacp {} ({})".format(' '.join(argv), CINFO['curdir'])) + defopt = option = 'f' + for arg in argv: + if re.match(r'-(h|-help)$', arg, re.I): + dohelp = 1 + continue + ms = re.match(r'-(\w+)$', arg) + if ms: + option = ms.group(1) + if option not in RDACP: PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) + if option == 'r': + RDACP['r'] = 1 + option = None + continue + if not option: PgLOG.pglog(arg + ": Value provided without option", PgLOG.LGEREX) + if option == "f": + RDACP['f'].append(arg) + defopt = None + else: + if option == 'R': + RDACP[option] = int(arg) + elif 'FD'.find(option) > -1: + RDACP[option] = PgLOG.base2int(arg, 8) + else: + RDACP[option] = arg + if option == 'th': + CINFO['thost'] = arg + '-' + elif option == 'fh': + CINFO['fhost'] = arg + '-' + option = defopt + + if dohelp or not RDACP['f']: PgLOG.show_usage("rdacp") + PgDBI.validate_decs_group('rdacp', PgLOG.PGLOG['CURUID'], 1) + if not RDACP['R'] and RDACP['r']: RDACP['R'] = 1000 + if not RDACP['t']: + CINFO['tpath'] = RDACP['t'] = "." + else: + ms = re.match(r'^(.+)/$', RDACP['t']) + if ms: + CINFO['tpath'] = ms.group(1) + else: + tinfo = PgFile.check_gdex_file(RDACP['t'], RDACP['th'], 0, PgLOG.LGWNEX) + if tinfo and tinfo['isfile'] == 0: CINFO['tpath'] = RDACP['t'] + PgLOG.PGLOG['FILEMODE'] = RDACP['F'] + PgLOG.PGLOG['EXECMODE'] = RDACP['D'] + + fcnt = len(RDACP['f']) + if not CINFO['tpath'] and fcnt > 1: + PgLOG.pglog("{}{}: Cannot copy multiple files to a single file".format(CINFO['thost'], RDACP['t']), PgLOG.LGEREX) + if RDACP['th'] and RDACP['fh'] and RDACP['th'] == RDACP['fh'] and RDACP['fh'] != 'HPSS': + PgLOG.pglog(RDACP['fh'] + ": Cannot copy file onto the same host", PgLOG.LGEREX) + if RDACP['fb']: + PgLOG.PGLOG['OBJCTBKT'] = RDACP['fb'] + elif RDACP['tb']: + PgLOG.PGLOG['OBJCTBKT'] = RDACP['tb'] + if RDACP['fp']: + PgLOG.PGLOG['BACKUPEP'] = RDACP['fp'] + elif RDACP['tp']: + PgLOG.PGLOG['BACKUPEP'] = RDACP['tp'] + + copy_top_list(RDACP['f']) + + hinfo = '' + if RDACP['fh']: hinfo += " From " + RDACP['fh'] + if RDACP['th']: hinfo += " To " + RDACP['th'] + + if CINFO['tcnt'] > 1: + PgLOG.pglog("Total {} {} copiled{}".format(CINFO['tcnt'], CINFO['cpstr'][CINFO['cpflag']], hinfo), PgLOG.LOGWRN) + elif CINFO['tcnt'] == 0 and not RDACP['fh']: + PgLOG.pglog("{}: No File copied{}".format((CINFO['fpath'] if CINFO['fpath'] else CINFO['curdir']), hinfo), PgLOG.LOGWRN) + + PgLOG.cmdlog() + PgLOG.pgexit(0) + +# +# display the top level list +# +def copy_top_list(files): + + for file in files: + if RDACP['th'] and not PgUtil.pgcmp(RDACP['th'], PgLOG.PGLOG['BACKUPNM'], 1): + info = PgFile.check_globus_file(file, 'gdex-glade', 0, PgLOG.LGWNEX) + else: + info = PgFile.check_gdex_file(file, RDACP['fh'], 0, PgLOG.LGWNEX) + if not info: + PgLOG.pglog("{}{}: {}".format(CINFO['fhost'], file, PgLOG.PGLOG['MISSFILE']), PgLOG.LOGERR) + continue + + dosub = 0 + if info['isfile'] == 0: + CINFO['cpflag'] |= 2 + if not CINFO['tpath']: + PgLOG.pglog("{}{}: Cannot copy directory to a single file".format(CINFO['fhost'], file), PgLOG.LGEREX) + + if re.search(r'/$', file): + dosub = 1 # copy the file under this directory if it is ended by '/' + file = re.sub(r'/$', '', file) + else: + CINFO['cpflag'] |= 1 + + if not re.match(r'^/', file): file = PgLOG.join_paths(CINFO['curdir'], file) + CINFO['fpath'] = (file if dosub else op.dirname(file)) + "/" + if info['isfile']: + CINFO['tcnt'] += copy_file(file, info['isfile']) + elif dosub or RDACP['R']: + flist = PgFile.gdex_glob(file, RDACP['fh'], 0, PgLOG.LGWNEX) + if flist: copy_list(flist, 1, file) + else: + PgLOG.pglog("{}{}: Add option -r to copy directory".format(CINFO['fhost'], file), PgLOG.LGEREX) + +# +# recursively copy directory/file +# +def copy_list(tlist, level, cdir): + + fcnt = 0 + + for file in tlist: + if tlist[file]['isfile']: + fcnt += copy_file(file, tlist[file]['isfile']) + CINFO['cpflag'] |= (1 if tlist[file]['isfile'] else 2) + elif level < RDACP['R']: + flist = PgFile.gdex_glob(file, RDACP['fh'], 0, PgLOG.LGWNEX) + if flist: copy_list(flist, level+1, file) + + if fcnt > 1: # display sub count if two or more files are copied + PgLOG.pglog("{}{}: {} {} copied from directory".format(CINFO['fhost'], cdir, fcnt, CINFO['cpstr'][CINFO['cpflag']]), PgLOG.LOGWRN) + CINFO['tcnt'] += fcnt + +# +# copy one file each time +# +def copy_file(fromfile, isfile): + + if CINFO['tpath']: + fname = re.sub(r'^{}'.format(CINFO['fpath']), '', fromfile) + if isfile: + tofile = PgLOG.join_paths(CINFO['tpath'], fname) + else: + tofile = CINFO['tpath'] + '/' + else: + tofile = RDACP['t'] + + return (1 if PgFile.copy_gdex_file(tofile, fromfile, RDACP['th'], RDACP['fh'], PgLOG.LGWNEX) else 0) + +# +# call main() to start program +# +if __name__ == "__main__": main() + diff --git a/src/rda_python_miscs/rda_kill.py b/src/rda_python_miscs/rda_kill.py new file mode 100644 index 0000000..2f86fdf --- /dev/null +++ b/src/rda_python_miscs/rda_kill.py @@ -0,0 +1,267 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title: rdakill +# Author: Zaihua Ji, zji@ucar.edu +# Date: 10/24/2020 +# 2025-03-10 transferred to package rda_python_miscs from +# https://github.com/NCAR/rda-utility-programs.git +# Purpose: kill a local or batch process and its child processes for a given +# running process ID by 'rdadata' +# +# Github: https://github.com/NCAR/rda-python-miscs.git +# +################################################################################## +# +import re +import sys +import time +from rda_python_common import PgLOG +from rda_python_common import PgSIG +from rda_python_common import PgUtil +from rda_python_common import PgFile +from rda_python_common import PgDBI + +RDAKILL = { + 'a' : None, # application name + 'h' : None, # hostname + 'p' : 0, # process id to be killed + 'P' : 0, # parent pid + 'r' : 0, # 1 - reserved for exclusive, working with -s PEND only + 'u' : None, # login user name + 's' : None, # batch status to kill + 'q' : None # batch partition/queue for SLURM/PBS, rda for default +} + +# +# main function to run the application +# +def main(): + + optcnt = 0 + option = None + argv = sys.argv[1:] + PgDBI.dssdb_dbname() + PgLOG.set_suid(PgLOG.PGLOG['EUID']) + PgLOG.set_help_path(__file__) + PgLOG.PGLOG['LOGFILE'] = "rdakill.log" # set different log file + PgLOG.cmdlog("rdakill {}".format(' '.join(argv))) + + for arg in argv: + ms = re.match(r'-([ahpPqstu])$', arg) + if ms: + option = ms.group(1) + elif re.match(r'-r$', arg): + RDAKILL['r'] = 1 + elif re.match(r'-\w+$', arg): + PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) + elif option: + if RDAKILL[option]: PgLOG.pglog("{}: value passed to Option -{} already".format(arg, option), PgLOG.LGEREX) + if 'pPt'.find(option) > -1: + RDAKILL[option] = int(arg) + elif option == 'h': + RDAKILL[option] = PgLOG.get_short_host(arg) + else: + RDAKILL[option] = arg + option = None + optcnt += 1 + else: + ms = re.match(r'^(\d+)$', arg) + if ms and RDAKILL['p']: + RDAKILL['p'] = int(ms.group(1)) # pid allow value only without leading option + optcnt += 1 + else: + PgLOG.pglog(arg + ": pass in value without Option", PgLOG.LGEREX) + + if not optcnt: PgLOG.show_usage("rdakill") + killloc = 1 + if RDAKILL['h']: + PgFile.local_host_action(RDAKILL['h'], "kill processes", PgLOG.PGLOG['HOSTNAME'], PgLOG.LGEREX) + if not PgUtil.pgcmp(RDAKILL['h'], PgLOG.PGLOG['SLMNAME'], 1): + if not (RDAKILL['p'] or RDAKILL['s']): + PgLOG.pglog("Provide Batch ID or Job Status to kill SLURM jobs", PgLOG.LGEREX) + if RDAKILL['p']: + rdakill_slurm_batch(RDAKILL['p']) + else: + rdakill_slurm_status(RDAKILL['s'], RDAKILL['q'], RDAKILL['u']) + killloc = 0 + elif not PgUtil.pgcmp(RDAKILL['h'], PgLOG.PGLOG['PBSNAME'], 1): + if not (RDAKILL['p'] or RDAKILL['s']): + PgLOG.pglog("Provide Batch ID or Job Status to kill PBS jobs", PgLOG.LGEREX) + if RDAKILL['p']: + rdakill_pbs_batch(RDAKILL['p']) + else: + rdakill_pbs_status(RDAKILL['s'], RDAKILL['q'], RDAKILL['u']) + killloc = 0 + if killloc: + if not (RDAKILL['p'] or RDAKILL['P'] or RDAKILL['a']): + PgLOG.pglog("Specify process ID, parent PID or App Name to kill", PgLOG.LGEREX) + rdakill_processes(RDAKILL['p'], RDAKILL['P'], RDAKILL['a'], RDAKILL['u']) + + PgLOG.cmdlog() + PgLOG.pgexit(0) + +# +# kill processes for given condition +# +def rdakill_processes(pid, ppid, aname = None, uname = None, level = 0): + + kcnt = 0 + if pid: + cmd = "ps -p {} -f".format(pid) + elif ppid: + cmd = "ps --ppid {} -f".format(ppid) + elif uname: + cmd = "ps -u {} -f".format(uname) + else: + cmd = "ps -ef" + + buf = PgLOG.pgsystem(cmd, PgLOG.LGWNEX, 20) + if buf: + for line in re.split('\n', buf): + ms = re.match(r'\s*(\w+)\s+(\d+)\s+(\d+)\s+(.*)$', line) + if ms: + uid = ms.group(1) + cid = int(ms.group(2)) + pcid = int(ms.group(3)) + cname = ms.group(4) + if pid and pid != cid: continue + if ppid and ppid != pcid: continue + if uname and not re.match(r'all$', uname, re.I) and uname != uid: continue + if aname and cname.find(aname) < 0: continue + kcnt += 1 + rdakill_processes(0, cid, None, None, level+1) + kill_local_child(cid, uid, re.sub(r' +', ' ', line)) + record_dscheck_interrupt(cid, PgLOG.PGLOG['HOSTNAME']) + + if not (kcnt or level): + buf = "No process idendified to kill " + if RDAKILL['h']: + buf += "on " + RDAKILL['h'] + else: + buf += "locally" + if PgLOG.PGLOG['CURBID']: buf += "; add Option '-h SLURM' if SLURM batch ID provided" + PgLOG.pglog(buf, PgLOG.LOGWRN) + +# +# a local child process +def kill_local_child(pid, uid, line): + + if PgSIG.check_process(pid): + cmd = PgLOG.get_local_command("kill -9 {}".format(pid), uid) + if PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 260): # 4+256 + return PgLOG.pglog("Kill: " + line, PgLOG.LOGWRN) + elif PgSIG.check_process(pid): + return PgLOG.pglog("Error Kill: {}\n{}".format(line, PgLOG.PGLOG['SYSERR']), PgLOG.LOGWRN) + + if not PgSIG.check_process(pid): PgLOG.pglog("Quit: " + line, PgLOG.LOGWRN) + +# +# kill a slurm batch job +# +def rdakill_slurm_batch(bid): + + ret = 0 + stat = PgSIG.check_slurm_status(bid, PgLOG.LOGWRN) + if stat: + cmd = PgLOG.get_local_command("scancel {}".format(bid), stat['USER']) + ret = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 6) + if ret: record_dscheck_interrupt(bid, PgLOG.PGLOG['SLMNAME']) + else: + PgLOG.pglog("{}: cannot find SLURM batch ID".format(bid), PgLOG.LOGERR) + + if not ret and PgLOG.PGLOG['SYSERR']: PgLOG.pglog(PgLOG.PGLOG['SYSERR'], PgLOG.LGEREX) + + return ret + +# +# kill SLURM batch jobs for given status +# +def rdakill_slurm_status(stat, part, uname): + + if not part: part = 'rda' + bcmd = "sacct -o jobid,user,state -r {} -".format(part) + bcmd += ("u " + uname if uname else 'a') + + lines = PgSIG.get_slurm_multiple(bcmd) + bcnt = len(lines['JOBID']) if lines else 0 + pcnt = kcnt = 0 + for i in range(bcnt): + if lines['STATE'][i] == stat: + pcnt += 1 + kcnt += rdakill_slurm_batch(lines['JOBID'][i]) + + if pcnt > 0: + s = 's' if pcnt > 1 else '' + line = "{} of {} SLURM '{}' job{} Killed".format(kcnt, pcnt, stat, s) + else: + line = "No SLURM '{}' job found to kill".format(stat) + + line += " in Partition '{}'".format(part) + if uname: line += " for " + uname + PgLOG.pglog(line, PgLOG.LOGWRN) + +# +# kill a pbs batch job +# +def rdakill_pbs_batch(bid): + + ret = 0 + stat = PgSIG.get_pbs_info(bid, 0, PgLOG.LOGWRN) + if stat: + dcmd = 'qdel' + if PgLOG.PGLOG['HOSTTYPE'] == 'ch': dcmd += 'casper' + cmd = PgLOG.get_local_command("{} {}".format(dcmd, bid), stat['UserName']) + ret = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 7) + if ret: record_dscheck_interrupt(bid, PgLOG.PGLOG['PBSNAME']) + else: + PgLOG.pglog("{}: cannot find PBS batch ID".format(bid), PgLOG.LOGERR) + + if not ret and PgLOG.PGLOG['SYSERR']: PgLOG.pglog(PgLOG.PGLOG['SYSERR'], PgLOG.LGEREX) + + return ret + +# +# kill PBS batch jobs for given status +# +def rdakill_pbs_status(stat, queue, uname): + + if not queue: queue = 'rda' + qopts = '' + if uname: + qopts = "-u " + uname + if qopts: qopts += ' ' + qopts += queue + lines = PgSIG.get_pbs_info(qopts, 1) + bcnt = len(lines['JobID']) + pcnt = kcnt = 0 + for i in range(bcnt): + if stat != lines['State'][i]: continue + pcnt += 1 + kcnt += rdakill_pbs_batch(lines['JobID'][i]) + + if pcnt > 0: + s = 's' if pcnt > 1 else '' + line = "{} of {} PBS '{}' job{} Killed".format(kcnt, pcnt, stat, s) + else: + line = "No PBS '{}' job found to kill".format(stat) + + line += " in Queue '{}'".format(queue) + if uname: line += " for " + uname + PgLOG.pglog(line, PgLOG.LOGWRN) + +# +# record a dscheck +# +def record_dscheck_interrupt(pid, host): + + pgrec = PgDBI.pgget("dscheck", "cindex", "pid = {} AND hostname = '{}'".format(pid, host), PgLOG.LOGERR) + if pgrec: + record = {'chktime' : int(time.time()), 'status' : 'I', 'pid' : 0} # release lock + PgDBI.pgupdt("dscheck", record, "cindex = {}".format(pgrec['cindex']), PgLOG.LGEREX) + +# +# call main() to start program +# +if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/rda_mod.py b/src/rda_python_miscs/rda_mod.py new file mode 100644 index 0000000..54e6cf0 --- /dev/null +++ b/src/rda_python_miscs/rda_mod.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title: rdamod +# Author: Zaihua Ji, zji@ucar.edu +# Date: 10/24/2020 +# 2025-03-10 transferred to package rda_python_miscs from +# https://github.com/NCAR/rda-utility-programs.git +# Purpose: change file/directory modes in given one or mutilple local directories +# owned by 'rdadata' +# +# Github: https://github.com/NCAR/rda-python-miscs.git +# +################################################################################## +# +import re +import os +import sys +from os import path as op +from rda_python_common import PgLOG +from rda_python_common import PgUtil +from rda_python_common import PgFile +from rda_python_common import PgDBI + +RDAMOD = { + 'd' : 0, # 1 to change directory mode + 'f' : 0, # 1 to change file mode + 'h' : 0, # 1 to show help message + 'r' : 0, # 1 if recursive all + 'R' : 0, # > 0 to set recursive limit + 'F' : 0o664, # to chnage file mode, default to 664 + 'D' : 0o775, # to chnge directory mode, default to 775 +} + +MINFO = { + 'files' : [], + 'curdir' : os.getcwd(), + 'tpath' : None, + 'dcnt' : 0, + 'fcnt' : 0 +} + +# +# main function to run the application +# +def main(): + + PgDBI.dssdb_dbname() + PgLOG.set_suid(PgLOG.PGLOG['EUID']) + PgLOG.set_help_path(__file__) + PgLOG.PGLOG['LOGFILE'] = "rdamod.log" # set different log file + argv = sys.argv[1:] + PgLOG.cmdlog("rdamod {} ({})".format(' '.join(argv), MINFO['curdir'])) + option = defopt = 'l' + for arg in argv: + ms = re.match(r'-(\w)$', arg) + if ms: + option = ms.group(1) + if option not in RDAMOD: PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) + if 'dfhr'.find(option) > -1: + RDAMOD[option] = 1 + option = defopt + continue + if not option: PgLOG.pglog(arg + ": Value provided without option", PgLOG.LGEREX) + if option == 'l': + MINFO['files'].append(arg) + defopt = None + else: + if option == 'R': + RDAMOD[option] = int(arg) + elif 'FD'.find(option) > -1: + RDAMOD[option] = PgLOG.base2int(arg, 8) + else: + RDAMOD[option] = arg + option = defopt + + if RDAMOD['h'] or not MINFO['files']: PgLOG.show_usage("rdamod") + if not (RDAMOD['d'] or RDAMOD['f']): + RDAMOD['d'] = RDAMOD['f'] = 1 # both directories and files as default + if not RDAMOD['R'] and RDAMOD['r']: RDAMOD['R'] = 1000 + PgDBI.validate_decs_group('rdamod', PgLOG.PGLOG['CURUID'], 1) + + change_top_list(MINFO['files']) + + if (MINFO['dcnt'] + MINFO['fcnt']) > 1: + msg = '' + if MINFO['dcnt'] > 0: + s = ('ies' if MINFO['dcnt'] else 'y') + msg = "{} Director{}".format(MINFO['dcnt'], s) + if MINFO['fcnt'] > 0: + s = ('s' if MINFO['fcnt'] > 1 else '') + if msg: msg += " & " + msg += "{} File{}".format(MINFO['fcnt'], s) + PgLOG.pglog("Total {} changed Mode".format(msg), PgLOG.LOGWRN) + elif (MINFO['dcnt'] + MINFO['fcnt']) == 0: + PgLOG.pglog((MINFO['tpath'] if MINFO['tpath'] else MINFO['curdir']) + ": No Mode changed", PgLOG.LOGWRN) + + PgLOG.cmdlog() + PgLOG.pgexit(0) + +# +# change mode for the top level list +# +def change_top_list(files): + + for file in files: + info = PgFile.check_local_file(file, 6, PgLOG.LOGWRN) + if not info: + PgLOG.pglog(file + ": NOT exists", PgLOG.LOGERR) + continue + + change = 1 + if not info['isfile'] and re.search(r'/$', file): + change = 0 # do not change the directory mode if it is ended by '/' + file = re.sub(r'/$', '', file, 1) + + if not re.match(r'^/', file): file = PgLOG.join_paths(MINFO['curdir'], file) + MINFO['tpath'] = (op.dirname(file) if change else file) + "/" + if change: change_mode(file, info) + if not info['isfile'] and (RDAMOD['R'] > 0 or not change): + fs = PgFile.local_glob(file, 6, PgLOG.LOGWRN) + change_list(fs, 1, file) + +# +# recursively change directory/file mode +# +def change_list(files, level, cdir): + + fcnt = 0 + + for file in files: + info = files[file] + fcnt += change_mode(file, info) + if not info['isfile'] and level < RDAMOD['R']: + fs = PgFile.local_glob(file, 6, PgLOG.LOGWRN) + change_list(fs, level+1, file) + + if fcnt > 1: # display sub count if two more files are changed mode + PgLOG.pglog("{}: {} Files changed Mode".format(cdir, fcnt), PgLOG.LOGWRN) + +# +# change mode of a single directory/file +# +def change_mode(file, info): + + fname = re.sub(r'^{}'.format(MINFO['tpath']), '', file, 1) + if info['isfile']: + if not RDAMOD['d']: return 0 + fname = "F" + fname + mode = RDAMOD['F'] + else: + if not RDAMOD['d']: return 0 + fname = "D" + fname + mode = RDAMOD['D'] + + if info['logname'] != "rdadata": + return PgLOG.pglog("{}: owner {} not rdadata".format(fname, info['logname']), PgLOG.LOGERR) + if info['mode'] == mode: return 0 # no need change mode + + if PgFile.set_local_mode(file, info['isfile'], mode, info['mode'], info['logname'], PgLOG.LOGWRN): + if info['isfile']: + MINFO['fcnt'] += 1 + return 1 + else: + MINFO['dcnt'] += 1 + return 0 + +# +# call main() to start program +# +if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/rda_own.py b/src/rda_python_miscs/rda_own.py new file mode 100644 index 0000000..7040dbd --- /dev/null +++ b/src/rda_python_miscs/rda_own.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title: rdaown +# Author: Zaihua Ji, zji@ucar.edu +# Date: 10/24/2020 +# 2025-03-10 transferred to package rda_python_miscs from +# https://github.com/NCAR/rda-utility-programs.git +# Purpose: change file/directory ownership to 'rdadata' in given one or mutilple +# local directories that are owned by decs specialists. it needs +# super user privilege to execute. +# +# Github: https://github.com/NCAR/rda-python-miscs.git +# +################################################################################## +# +import re +import os +import sys +import glob +from os import path as op +from rda_python_common import PgLOG +from rda_python_common import PgUtil +from rda_python_common import PgFile +from rda_python_common import PgDBI + +RDAOWN = { + 'd' : 0, # 1 to change directory owner + 'f' : 0, # 1 to change file owner + 'h' : 0, # 1 to show help message + 'r' : 0, # 1 if recursive all + 'R' : 0, # > 0 to set recursive limit + 'F' : 0o664, # to change file mode, default to 664 + 'D' : 0o775, # to change directory mode, default to 775 +} + +OINFO = { + 'files' : [], + 'curdir' : os.getcwd(), + 'tpath' : None, + 'dcnt' : 0, + 'fcnt' : 0 +} + +# +# main function to run the application +# +def main(): + + argv = sys.argv[1:] + PgDBI.dssdb_scname() + PgLOG.set_help_path(__file__) + PgLOG.PGLOG['LOGFILE'] = "rdaown.log" # set different log file + PgLOG.cmdlog("rdaown {} ({})".format(' '.join(argv), OINFO['curdir'])) + option = defopt = 'l' + for arg in argv: + ms = re.match(r'-(\w+)$', arg) + if ms: + option = ms.group(1) + if option not in RDAOWN: PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) + if 'dfhr'.find(option) > -1: + RDAOWN[option] = 1 + option = defopt + continue + if not option: PgLOG.pglog(arg + ": Value provided without option", PgLOG.LGEREX) + if option == 'R': + RDAOWN['R'] = int(arg) + option = defopt + else: + OINFO['files'].append(arg) + defopt = None + + if RDAOWN['h'] or not OINFO['files']: PgLOG.show_usage("rdaown") + if PgLOG.PGLOG['CURUID'] != "root": + PgLOG.pglog(PgLOG.PGLOG['CURUID'] + ": you must execute 'rdaown' as 'root'!", PgLOG.LGEREX) + if not (RDAOWN['d'] or RDAOWN['f']): + RDAOWN['d'] = RDAOWN['f'] = 1 # list both directories and files as default + if not RDAOWN['R'] and RDAOWN['r']: RDAOWN['R'] = 1000 + + change_top_list(OINFO['files']) + + if (OINFO['dcnt'] + OINFO['fcnt']) > 1: + msg = "" + if OINFO['dcnt'] > 0: + s = ("ies" if OINFO['dcnt'] > 1 else "y") + msg = "{} Director{}".format(OINFO['dcnt'], s) + if OINFO['fcnt'] > 0: + s = ('s' if OINFO['fcnt'] > 1 else '') + if msg: msg += " & " + msg += "{} File{}".format(OINFO['fcnt'], s) + PgLOG.pglog("Total {} changed owner".format(msg), PgLOG.LOGWRN) + elif (OINFO['dcnt'] + OINFO['fcnt']) == 0: + PgLOG.pglog((OINFO['tpath'] if OINFO['tpath'] else OINFO['curdir']) + ": No Owner changed", PgLOG.LOGWRN) + + PgLOG.cmdlog() + PgLOG.pgexit(0) + +# +# change owner for the top level list +# +def change_top_list(files): + + for file in files: + info = PgFile.check_local_file(file, 2, PgLOG.LOGWRN) + if not info: + PgLOG.pglog(file + ": NOT exists", PgLOG.LOGERR) + continue + change = 1 + if not info['isfile'] and re.search(r'/$', file): + change = 0 # do not change the directory owner if it is ended by '/' + file = re.sub(r'/$', '', file, 1) + + if not re.match(r'^/', file): file = PgLOG.join_paths(OINFO['curdir'], file) + OINFO['tpath'] = (op.dirname(file) if change else file) + "/" + if change: change_owner(file, info) + if not info['isfile'] and (RDAOWN['R'] or not change): + fs = glob.glob(file + "/*") + change_list(fs, 1, file) + +# +# recursively change directory/file owner +# +def change_list(files, level, cdir): + + fcnt = 0 + for file in files: + info = PgFile.check_local_file(file, 2, PgLOG.LOGWRN) + if not info: continue # should not happen + fcnt += change_owner(file, info) + if not info['isfile'] and level < RDAOWN['R']: + fs = glob.glob(file + "/*") + change_list(fs, level+1, file) + + if fcnt > 1: # display sub count if two more files are changed mode + PgLOG.pglog("{}: {} Files changed owner in the directory".format(cdir, fcnt), PgLOG.LOGWRN) + +# +# change owner for a single directory/file +# +def change_owner(file, info): + + fname = re.sub(r'^{}'.format(OINFO['tpath']), '', file, 1) + if info['isfile']: + if not RDAOWN['f']: return 0 + fname = "F" + fname + else: + if not RDAOWN['d']: return 0 + fname = "D" + fname + + if info['logname'] == "rdadata": return 0 + if not PgLOG.pgget("dssgrp", "", "logname = '{}'".format(info['logname']), PgLOG.LGEREX): + return PgLOG.pglog("{}: owner {} not a DECS Specialist!".format(fname, info['logname']), PgLOG.LOGERR) + + if PgLOG.pgsystem("su root -c 'chown rdadata {}'".format(file), PgLOG.LOGWRN, 4): + PgLOG.pglog("{}: {} => rdadata".format(fname, info['logname']), PgLOG.LOGWRN) + if info['isfile']: + OINFO['fcnt'] += 1 + return 1 + else: + OINFO['dcnt'] += 1 + return 0 + + return PgLOG.pglog("{}: Error change owner {} to rdadata".format(fname, info['logname']), PgLOG.LOGERR) + +# +# call main() to start program +# +if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/rda_ps.py b/src/rda_python_miscs/rda_ps.py new file mode 100644 index 0000000..bc0920b --- /dev/null +++ b/src/rda_python_miscs/rda_ps.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title: rdaps +# Author: Zaihua Ji, zji@ucar.edu +# Date: 10/24/2020 +# 2025-03-10 transferred to package rda_python_miscs from +# https://github.com/NCAR/rda-utility-programs.git +# Purpose: run ps against running process ID locally or remotely +# +# Github: https://github.com/NCAR/rda-python-miscs.git +# +################################################################################## +# +import re +import os +import sys +from rda_python_common import PgLOG +from rda_python_common import PgSIG +from rda_python_common import PgUtil +from rda_python_common import PgFile +from rda_python_common import PgDBI + +RDAPS = { + 'a' : None, # application name + 'h' : None, # remote hostname + 'p' : 0, # process id to be checked + 'P' : 0, # parent process id to be checked + 'u' : None, # login user name +} + +# +# main function to run the application +# +def main(): + + optcnt = 0 + argv = sys.argv[1:] + PgDBI.dssdb_dbname() + PgLOG.set_suid(PgLOG.PGLOG['EUID']) + PgLOG.set_help_path(__file__) + PgLOG.PGLOG['LOGFILE'] = "rdaps.log" # set different log file + PgLOG.cmdlog("rdaps {}".format(' '.join(argv))) + + for arg in argv: + ms = re.match(r'-([ahpPtu])$', arg) + if ms: + option = ms.group(1) + elif re.match(r'-\w+$', arg): + PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) + elif option: + if RDAPS[option]: PgLOG.pglog("{}: value passed to Option -{} already".format(arg, option), PgLOG.LGEREX) + if 'pPt'.find(option) > -1: + RDAPS[option] = int(arg) + elif option == 'h': + RDAPS[option] = PgLOG.get_short_host(arg) + else: + RDAPS[option] = arg + option = None + optcnt += 1 + else: + ms = re.match(r'^(\d+)$', arg) + if ms and not RDAPS['p']: + RDAPS['p'] = int(ms.group(1)) # pid allow value only without leading option + optcnt += 1 + else: + PgLOG.pglog(arg + ": Value passed in without Option", PgLOG.LGEREX) + + if not optcnt: PgLOG.show_usage("rdaps") + chkloc = 1 + if RDAPS['h']: + PgFile.local_host_action(RDAPS['h'], "check processes", PgLOG.PGLOG['HOSTNAME'], PgLOG.LGEREX) + if not PgUtil.pgcmp(RDAPS['h'], PgLOG.PGLOG['SLMNAME'], 1): + slurm_snapshot() + chkloc = 0 + elif not PgUtil.pgcmp(RDAPS['h'], PgLOG.PGLOG['PBSNAME'], 1): + pbs_snapshot() + chkloc = 0 + if chkloc: process_snapshot() + + PgLOG.cmdlog() + PgLOG.pgexit(0) + +# +# get a snapshot of a process status +# +def process_snapshot(): + + if RDAPS['p']: + cmd = "ps -p {} -f".format(RDAPS['p']) + elif RDAPS['P']: + cmd = "ps --ppid {} -f".format(RDAPS['P']) + elif RDAPS['u']: + cmd = "ps -u {} -f".format(RDAPS['u']) + else: + cmd = "ps -ef" + + buf = PgLOG.pgsystem(cmd, PgLOG.LGWNEX, 20) + + for line in re.split('\n', buf): + ms = re.match(r'\s*(\w+)\s+(\d+)\s+(\d+)\s+(.*)$', line) + if ms: + uid = ms.group(1) + pid = int(ms.group(2)) + ppid = int(ms.group(3)) + aname = ms.group(4) + if RDAPS['u'] and RDAPS['u'] != uid: continue + if RDAPS['p'] and RDAPS['p'] != pid: continue + if RDAPS['P'] and RDAPS['P'] != ppid: continue + if RDAPS['a'] and aname.find(RDAPS['a']) < 0: continue + PgLOG.pglog(re.sub(r' +', ' ', line), PgLOG.LOGWRN) + +# +# get a snapshot of a SLURM batch process status +# +def slurm_snapshot(): + + qopts = '' + if RDAPS['u']: qopts += " -u " + RDAPS['u'] + if RDAPS['p']: + qopts += " -j {}".format(RDAPS['p']) + else: + qopts = " -p rda" + cmd = "squeue -l" + qopts + + buf = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 272) + if not buf: + if PgLOG.PGLOG['SYSERR'] and PgLOG.PGLOG['SYSERR'].find('Invalid job id specified') < 0: + PgLOG.pglog(PgLOG.PGLOG['SYSERR'], PgLOG.LGEREX) + return + + lines = re.split(r'\n', buf) + lcnt = len(lines) + if lcnt < 3: return + dochk = 1 + for line in lines: + if not line: continue + if dochk: + if re.match(r'^\s*JOBID\s', line): dochk = 0 + else: + vals = re.split(r'\s+', PgLOG.pgtrim(line)) + if RDAPS['a'] and vals[2] and RDAPS['a'] != vals[2]: continue + # move user name to front + val = vals[3] + vals[3] = vals[2] + vals[2] = vals[1] + vals[1] = vals[0] + vals[0] = val + PgLOG.pglog(' '.join(vals), PgLOG.LOGWRN) + +# +# get a snapshot of a PBS batch process status +# +def pbs_snapshot(): + + qopts = '' + if RDAPS['u']: + qopts = "-u {}".format(RDAPS['u']) + if RDAPS['p']: + if qopts: qopts += ' ' + qopts += str(RDAPS['p']) + if not qopts: qopts = 'rda' + + stat = PgSIG.get_pbs_info(qopts, 1, PgLOG.LOGWRN) + if not stat: + if PgLOG.PGLOG['SYSERR']: PgLOG.pglog(PgLOG.PGLOG['SYSERR'], PgLOG.LGEREX) + return + + lcnt = len(stat['JobID']) + + ckeys = list(stat.keys()) + kcnt = len(ckeys) + # moving 'UserName' to the first + for i in range(kcnt): + if i > 0 and ckeys[i] == 'UserName': + j = i + while j > 0: + ckeys[j] = ckeys[j-1] + j -= 1 + ckeys[0] = 'UserName' + break + + for i in range(lcnt): + if RDAPS['a'] and stat['JobName'] and RDAPS['a'] != stat['JobName']: continue + vals = [] + for k in ckeys: + vals.append(stat[k][i]) + PgLOG.pglog(' '.join(vals), PgLOG.LOGWRN) + +# +# call main() to start program +# +if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/rda_sub.py b/src/rda_python_miscs/rda_sub.py new file mode 100644 index 0000000..419e150 --- /dev/null +++ b/src/rda_python_miscs/rda_sub.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title: rdasub +# Author: Zaihua Ji, zji@ucar.edu +# Date: 03/51/2021 +# 2025-03-10 transferred to package rda_python_miscs from +# https://github.com/NCAR/rda-utility-programs.git +# Purpose: python script to submit a nohup bachground execution +# +# Github: https://github.com/NCAR/rda-python-miscs.git +# +################################################################################## + +import os +import sys +import re +import time +from rda_python_common import PgLOG +from rda_python_common import PgFile +from rda_python_common import PgUtil + +# +# main function to excecute this script +# +def main(): + + aname = 'rdasub' + PgLOG.set_help_path(__file__) + coptions = {'cmd' : None, 'cwd' : None, 'env' : None} # customized options + copts = '|'.join(coptions) + option = None + argv = sys.argv[1:] + if not argv: PgLOG.show_usage(aname) + PgLOG.PGLOG['LOGFILE'] = aname + ".log" + PgLOG.cmdlog("{} {}".format(aname, ' '.join(argv))) + + while argv: + arg = argv.pop(0) + if arg == "-b": + PgLOG.PGLOG['BCKGRND'] = 1 + option = None + continue + ms = re.match(r'^-({})$'.format(copts), arg) + if ms: + option = ms.group(1) + continue + if not option: PgLOG.pglog("{}: Value passed in without leading option for {}".format(arg, aname), PgLOG.LGEREX) + if arg.find(' ') > -1 and not re.match(r'^[\'\"].*[\'\"]$', arg): # quote string with space but not quoted yet + if arg.find("'") > -1: + arg = '"{}"'.format(arg) + else: + arg = "'{}'".format(arg) + + coptions[option] = arg + if option == "cmd": break + option = None + + if not coptions['cmd']: PgLOG.pglog(aname + ": specify command via option -cmd to run", PgLOG.LGWNEX) + args = PgLOG.argv_to_string(argv, 0) # append command options + msg = "{}-{}{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.PGLOG['CURUID'], PgLOG.current_datetime()) + if coptions['cwd']: + if coptions['cwd'].find('$'): coptions['cwd'] = PgLOG.replace_environments(coptions['cwd'], '', PgLOG.LGWNEX) + msg += "-" + coptions['cwd'] + PgFile.change_local_directory(coptions['cwd'], PgLOG.LGEREX) + else: + coptions['cwd'] = PgLOG.PGLOG['CURDIR'] + cmd = PgLOG.valid_command(coptions['cmd']) + if not cmd and not re.match(r'^/', coptions['cmd']): cmd = PgLOG.valid_command('./' + coptions['cmd']) + if not cmd: PgLOG.pglog(coptions['cmd'] + ": Cannot find given command to run", PgLOG.LGWNEX) + if args: cmd += " " + args + + msg += ": " + cmd + PgLOG.pglog(msg, PgLOG.LOGWRN) + os.system("nohup " + cmd + " > /dev/null 2>&1 &") + display_process_info(coptions['cmd'], cmd) + + sys.exit(0) + +# +# display the the most recent matching process info +# +def display_process_info(cname, cmd): + + ctime = time.time() + RTIME = PID = 0 + pscmd = "ps -u {},{} -f | grep {} | grep ' 1 ' | grep -v ' grep '".format(PgLOG.PGLOG['CURUID'], PgLOG.PGLOG['RDAUSER'], cname) + + for i in range(2): + buf = PgLOG.pgsystem(pscmd, PgLOG.LOGWRN, 20) + if buf: + lines = buf.split("\n") + for line in lines: + mp = "\s+(\d+)\s+1\s+.*\s(\d+:\d+)\s.*{}\S*\s*(.*)$".format(cname) + ms = re.search(mp, line) + if ms: + pid = ms.group(1) + rtm = ms.group(2) + arg = ms.group(3) + if not arg or cmd.find(arg) > -1: + rtime = PgUtil.unixtime(rtm + ':00') + if rtime > ctime: rtime -= 24*60*60 + if rtime > RTIME: + PID = pid + RTIME = rtime + if PID: + return PgLOG.pglog("Job <{}> is submitted to background <{}>".format(PID, PgLOG.PgLOG['HOSTNAME']), PgLOG.LOGWRN) + elif i == 0: + time.sleep(2) + else: + return PgLOG.pglog("{}: No job information found, It may have finished".format(cmd), PgLOG.LOGWRN) + +# +# call main() to start program +# +if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/rda_zip.py b/src/rda_python_miscs/rda_zip.py new file mode 100644 index 0000000..8dd370b --- /dev/null +++ b/src/rda_python_miscs/rda_zip.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title: rdazip +# Author: Zaihua Ji, zji@ucar.edu +# Date: 10/24/2020 +# 2025-03-17 transferred to package rda_python_miscs from +# https://github.com/NCAR/rda-utility-programs.git +# Purpose: compress/uncompress given file names +# +# Github: https://github.com/NCAR/rda-python-miscs.git +# +################################################################################## + +import re +import os +import sys +from rda_python_common import PgLOG +from rda_python_common import PgFile + +# +# main function to run the application +# +def main(): + + act = 0 + argv = sys.argv[1:] + PgLOG.set_help_path(__file__) + PgLOG.PGLOG['LOGFILE'] = "rdazip.log" # set different log file + PgLOG.cmdlog("rdazip {}".format(' '.join(argv))) + files = [] + fmt = option = None + for arg in argv: + ms = re.match(r'-(\w+)$', arg) + if ms: + option = ms.group(1) + if option == "b": + PgLOG.PGLOG['BCKGRND'] = 1 + option = None + elif option == "f": + act = 1 + else: + PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) + elif option: + if fmt: PgLOG.pglog("{}: compression format '{}' provided already".format(arg, fmt), PgLOG.LGEREX) + fmt = arg + if not files: option = None + else: + if not os.path.isfile(arg): PgLOG.pglog(arg + ": file not exists", PgLOG.LGEREX) + files.append(arg) + + if not files: PgLOG.show_usage("rdazip") + + for file in files: + PgFile.compress_local_file(file, fmt, act, PgLOG.LGWNEX) + + PgLOG.cmdlog() + sys.exit(0) + +# +# call main() to start program +# +if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/rdacp.py b/src/rda_python_miscs/rdacp.py index 7ae5b4f..2c875b5 100644 --- a/src/rda_python_miscs/rdacp.py +++ b/src/rda_python_miscs/rdacp.py @@ -1,212 +1,190 @@ #!/usr/bin/env python3 -# ################################################################################## -# # Title: rdacp # Author: Zaihua Ji, zji@ucar.edu # Date: 10/24/2020 # 2025-03-10 transferred to package rda_python_miscs from # https://github.com/NCAR/rda-utility-programs.git # Purpose: copy files locally and remotely by 'rdadata' -# # Github: https://github.com/NCAR/rda-python-miscs.git -# ################################################################################## -# import re import os import sys from os import path as op -from rda_python_common import PgLOG -from rda_python_common import PgUtil -from rda_python_common import PgDBI -from rda_python_common import PgFile - -RDACP = { - 'fh' : None, # from host name, default to localhost - 'th' : None, # to host name, defaul to localhost - 'fb' : None, # from bucket name for a from file in Object Store - 'tb' : None, # to bucket name for a to file in Object Store - 'fp' : None, # from Globus endpoint - 'tp' : None, # to Globus endpoint - 'f' : [], # from file names - 't' : None, # to file name - 'r' : 0, # 1 if recursive all - 'R' : 0, # > 0 to set recursive limit - 'F' : 0o664, # to file mode, default to 664 - 'D' : 0o775, # to directory mode, default to 775 -} - -CINFO = { - 'tcnt' : 0, - 'htcnt' : 0, - 'cpflag' : 0, # 1 file only, 2 directory only, 3 both - 'cpstr' : ['', 'Files', 'Directories', 'Files/Directories'], - 'fpath' : None, - 'tpath' : None, - 'fhost' : '', - 'thost' : '', - 'curdir' : os.getcwd() -} - -# -# main function to run the application -# -def main(): - - dohelp = 0 - argv = sys.argv[1:] - PgDBI.dssdb_dbname() - PgLOG.set_suid(PgLOG.PGLOG['EUID']) - PgLOG.set_help_path(__file__) - PgLOG.PGLOG['LOGFILE'] = "rdacp.log" # set different log file - PgLOG.cmdlog("rdacp {} ({})".format(' '.join(argv), CINFO['curdir'])) - defopt = option = 'f' - for arg in argv: - if re.match(r'-(h|-help)$', arg, re.I): - dohelp = 1 - continue - ms = re.match(r'-(\w+)$', arg) - if ms: - option = ms.group(1) - if option not in RDACP: PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) - if option == 'r': - RDACP['r'] = 1 - option = None - continue - if not option: PgLOG.pglog(arg + ": Value provided without option", PgLOG.LGEREX) - if option == "f": - RDACP['f'].append(arg) - defopt = None - else: - if option == 'R': - RDACP[option] = int(arg) - elif 'FD'.find(option) > -1: - RDACP[option] = PgLOG.base2int(arg, 8) +from rda_python_common.pg_file import PgFile + +class RdaCp(PgFile): + + def __init__(self): + super().__init__() + self.RDACP = { + 'fh' : None, # from host name, default to localhost + 'th' : None, # to host name, defaul to localhost + 'fb' : None, # from bucket name for a from file in Object Store + 'tb' : None, # to bucket name for a to file in Object Store + 'fp' : None, # from Globus endpoint + 'tp' : None, # to Globus endpoint + 'f' : [], # from file names + 't' : None, # to file name + 'r' : 0, # 1 if recursive all + 'R' : 0, # > 0 to set recursive limit + 'F' : 0o664, # to file mode, default to 664 + 'D' : 0o775, # to directory mode, default to 775 + } + self.CINFO = { + 'tcnt' : 0, + 'htcnt' : 0, + 'cpflag' : 0, # 1 file only, 2 directory only, 3 both + 'cpstr' : ['', 'Files', 'Directories', 'Files/Directories'], + 'fpath' : None, + 'tpath' : None, + 'fhost' : '', + 'thost' : '', + 'curdir' : os.getcwd() + } + + # function to read parameters + def read_parameters(self): + dohelp = 0 + argv = sys.argv[1:] + self.set_suid(self.PGLOG['EUID']) + self.set_help_path(__file__) + self.PGLOG['LOGFILE'] = "rdacp.log" # set different log file + self.cmdlog("rdacp {} ({})".format(' '.join(argv), self.CINFO['curdir'])) + defopt = option = 'f' + for arg in argv: + if re.match(r'-(h|-help)$', arg, re.I): + dohelp = 1 + continue + ms = re.match(r'-(\w+)$', arg) + if ms: + option = ms.group(1) + if option not in self.RDACP: self.pglog(arg + ": Unknown Option", self.LGEREX) + if option == 'r': + self.RDACP['r'] = 1 + option = None + continue + if not option: self.pglog(arg + ": Value provided without option", self.LGEREX) + if option == "f": + self.RDACP['f'].append(arg) + defopt = None else: - RDACP[option] = arg - if option == 'th': - CINFO['thost'] = arg + '-' - elif option == 'fh': - CINFO['fhost'] = arg + '-' - option = defopt - - if dohelp or not RDACP['f']: PgLOG.show_usage("rdacp") - PgDBI.validate_decs_group('rdacp', PgLOG.PGLOG['CURUID'], 1) - if not RDACP['R'] and RDACP['r']: RDACP['R'] = 1000 - if not RDACP['t']: - CINFO['tpath'] = RDACP['t'] = "." - else: - ms = re.match(r'^(.+)/$', RDACP['t']) - if ms: - CINFO['tpath'] = ms.group(1) + if option == 'R': + self.RDACP[option] = int(arg) + elif 'FD'.find(option) > -1: + self.RDACP[option] = self.base2int(arg, 8) + else: + self.RDACP[option] = arg + if option == 'th': + self.CINFO['thost'] = arg + '-' + elif option == 'fh': + self.CINFO['fhost'] = arg + '-' + option = defopt + if dohelp or not self.RDACP['f']: self.show_usage("rdacp") + + # function to start actions + def start_actions(self): + self.dssdb_dbname() + self.validate_decs_group('rdacp', self.PGLOG['CURUID'], 1) + if not self.RDACP['R'] and self.RDACP['r']: self.RDACP['R'] = 1000 + if not self.RDACP['t']: + self.CINFO['tpath'] = self.RDACP['t'] = "." else: - tinfo = PgFile.check_gdex_file(RDACP['t'], RDACP['th'], 0, PgLOG.LGWNEX) - if tinfo and tinfo['isfile'] == 0: CINFO['tpath'] = RDACP['t'] - PgLOG.PGLOG['FILEMODE'] = RDACP['F'] - PgLOG.PGLOG['EXECMODE'] = RDACP['D'] - - fcnt = len(RDACP['f']) - if not CINFO['tpath'] and fcnt > 1: - PgLOG.pglog("{}{}: Cannot copy multiple files to a single file".format(CINFO['thost'], RDACP['t']), PgLOG.LGEREX) - if RDACP['th'] and RDACP['fh'] and RDACP['th'] == RDACP['fh'] and RDACP['fh'] != 'HPSS': - PgLOG.pglog(RDACP['fh'] + ": Cannot copy file onto the same host", PgLOG.LGEREX) - if RDACP['fb']: - PgLOG.PGLOG['OBJCTBKT'] = RDACP['fb'] - elif RDACP['tb']: - PgLOG.PGLOG['OBJCTBKT'] = RDACP['tb'] - if RDACP['fp']: - PgLOG.PGLOG['BACKUPEP'] = RDACP['fp'] - elif RDACP['tp']: - PgLOG.PGLOG['BACKUPEP'] = RDACP['tp'] - - copy_top_list(RDACP['f']) - - hinfo = '' - if RDACP['fh']: hinfo += " From " + RDACP['fh'] - if RDACP['th']: hinfo += " To " + RDACP['th'] - - if CINFO['tcnt'] > 1: - PgLOG.pglog("Total {} {} copiled{}".format(CINFO['tcnt'], CINFO['cpstr'][CINFO['cpflag']], hinfo), PgLOG.LOGWRN) - elif CINFO['tcnt'] == 0 and not RDACP['fh']: - PgLOG.pglog("{}: No File copied{}".format((CINFO['fpath'] if CINFO['fpath'] else CINFO['curdir']), hinfo), PgLOG.LOGWRN) + ms = re.match(r'^(.+)/$', self.RDACP['t']) + if ms: + self.CINFO['tpath'] = ms.group(1) + else: + tinfo = self.check_gdex_file(self.RDACP['t'], self.RDACP['th'], 0, self.LGWNEX) + if tinfo and tinfo['isfile'] == 0: self.CINFO['tpath'] = self.RDACP['t'] + self.PGLOG['FILEMODE'] = self.RDACP['F'] + self.PGLOG['EXECMODE'] = self.RDACP['D'] + fcnt = len(self.RDACP['f']) + if not self.CINFO['tpath'] and fcnt > 1: + self.pglog("{}{}: Cannot copy multiple files to a single file".format(self.CINFO['thost'], self.RDACP['t']), self.LGEREX) + if self.RDACP['th'] and self.RDACP['fh'] and self.RDACP['th'] == self.RDACP['fh'] and self.RDACP['fh'] != 'HPSS': + self.pglog(self.RDACP['fh'] + ": Cannot copy file onto the same host", self.LGEREX) + if self.RDACP['fb']: + self.PGLOG['OBJCTBKT'] = self.RDACP['fb'] + elif self.RDACP['tb']: + self.PGLOG['OBJCTBKT'] = self.RDACP['tb'] + if self.RDACP['fp']: + self.PGLOG['BACKUPEP'] = self.RDACP['fp'] + elif self.RDACP['tp']: + self.PGLOG['BACKUPEP'] = self.RDACP['tp'] + self.copy_top_list(self.RDACP['f']) + hinfo = '' + if self.RDACP['fh']: hinfo += " From " + self.RDACP['fh'] + if self.RDACP['th']: hinfo += " To " + self.RDACP['th'] + if self.CINFO['tcnt'] > 1: + self.pglog("Total {} {} copiled{}".format(self.CINFO['tcnt'], self.CINFO['cpstr'][self.CINFO['cpflag']], hinfo), self.LOGWRN) + elif self.CINFO['tcnt'] == 0 and not self.RDACP['fh']: + self.pglog("{}: No File copied{}".format((self.CINFO['fpath'] if self.CINFO['fpath'] else self.CINFO['curdir']), hinfo), self.LOGWRN) + self.cmdlog() - PgLOG.cmdlog() - PgLOG.pgexit(0) - -# -# display the top level list -# -def copy_top_list(files): + # display the top level list + def copy_top_list(self, files): + for file in files: + if self.RDACP['th'] and not self.pgcmp(self.RDACP['th'], self.PGLOG['BACKUPNM'], 1): + info = self.check_globus_file(file, 'gdex-glade', 0, self.LGWNEX) + else: + info = self.check_gdex_file(file, self.RDACP['fh'], 0, self.LGWNEX) + if not info: + self.pglog("{}{}: {}".format(self.CINFO['fhost'], file, self.PGLOG['MISSFILE']), self.LOGERR) + continue + dosub = 0 + if info['isfile'] == 0: + self.CINFO['cpflag'] |= 2 + if not self.CINFO['tpath']: + self.pglog("{}{}: Cannot copy directory to a single file".format(self.CINFO['fhost'], file), self.LGEREX) + if re.search(r'/$', file): + dosub = 1 # copy the file under this directory if it is ended by '/' + file = re.sub(r'/$', '', file) + else: + self.CINFO['cpflag'] |= 1 + if not re.match(r'^/', file): file = self.join_paths(self.CINFO['curdir'], file) + self.CINFO['fpath'] = (file if dosub else op.dirname(file)) + "/" + if info['isfile']: + self.CINFO['tcnt'] += self.copy_file(file, info['isfile']) + elif dosub or self.RDACP['R']: + flist = self.gdex_glob(file, self.RDACP['fh'], 0, self.LGWNEX) + if flist: self.copy_list(flist, 1, file) + else: + self.pglog("{}{}: Add option -r to copy directory".format(self.CINFO['fhost'], file), self.LGEREX) - for file in files: - if RDACP['th'] and not PgUtil.pgcmp(RDACP['th'], PgLOG.PGLOG['BACKUPNM'], 1): - info = PgFile.check_globus_file(file, 'gdex-glade', 0, PgLOG.LGWNEX) - else: - info = PgFile.check_gdex_file(file, RDACP['fh'], 0, PgLOG.LGWNEX) - if not info: - PgLOG.pglog("{}{}: {}".format(CINFO['fhost'], file, PgLOG.PGLOG['MISSFILE']), PgLOG.LOGERR) - continue - - dosub = 0 - if info['isfile'] == 0: - CINFO['cpflag'] |= 2 - if not CINFO['tpath']: - PgLOG.pglog("{}{}: Cannot copy directory to a single file".format(CINFO['fhost'], file), PgLOG.LGEREX) - - if re.search(r'/$', file): - dosub = 1 # copy the file under this directory if it is ended by '/' - file = re.sub(r'/$', '', file) - else: - CINFO['cpflag'] |= 1 - - if not re.match(r'^/', file): file = PgLOG.join_paths(CINFO['curdir'], file) - CINFO['fpath'] = (file if dosub else op.dirname(file)) + "/" - if info['isfile']: - CINFO['tcnt'] += copy_file(file, info['isfile']) - elif dosub or RDACP['R']: - flist = PgFile.gdex_glob(file, RDACP['fh'], 0, PgLOG.LGWNEX) - if flist: copy_list(flist, 1, file) + # recursively copy directory/file + def copy_list(self, tlist, level, cdir): + fcnt = 0 + for file in tlist: + if tlist[file]['isfile']: + fcnt += self.copy_file(file, tlist[file]['isfile']) + self.CINFO['cpflag'] |= (1 if tlist[file]['isfile'] else 2) + elif level < self.RDACP['R']: + flist = self.gdex_glob(file, self.RDACP['fh'], 0, self.LGWNEX) + if flist: self.copy_list(flist, level+1, file) + if fcnt > 1: # display sub count if two or more files are copied + self.pglog("{}{}: {} {} copied from directory".format(self.CINFO['fhost'], cdir, fcnt, self.CINFO['cpstr'][self.CINFO['cpflag']]), self.LOGWRN) + self.CINFO['tcnt'] += fcnt + + # copy one file each time + def copy_file(self, fromfile, isfile): + if self.CINFO['tpath']: + fname = re.sub(r'^{}'.format(self.CINFO['fpath']), '', fromfile) + if isfile: + tofile = self.join_paths(self.CINFO['tpath'], fname) + else: + tofile = self.CINFO['tpath'] + '/' else: - PgLOG.pglog("{}{}: Add option -r to copy directory".format(CINFO['fhost'], file), PgLOG.LGEREX) + tofile = self.RDACP['t'] + return (1 if self.copy_gdex_file(tofile, fromfile, self.RDACP['th'], self.RDACP['fh'], self.LGWNEX) else 0) -# -# recursively copy directory/file -# -def copy_list(tlist, level, cdir): - - fcnt = 0 - - for file in tlist: - if tlist[file]['isfile']: - fcnt += copy_file(file, tlist[file]['isfile']) - CINFO['cpflag'] |= (1 if tlist[file]['isfile'] else 2) - elif level < RDACP['R']: - flist = PgFile.gdex_glob(file, RDACP['fh'], 0, PgLOG.LGWNEX) - if flist: copy_list(flist, level+1, file) - - if fcnt > 1: # display sub count if two or more files are copied - PgLOG.pglog("{}{}: {} {} copied from directory".format(CINFO['fhost'], cdir, fcnt, CINFO['cpstr'][CINFO['cpflag']]), PgLOG.LOGWRN) - CINFO['tcnt'] += fcnt - -# -# copy one file each time -# -def copy_file(fromfile, isfile): - - if CINFO['tpath']: - fname = re.sub(r'^{}'.format(CINFO['fpath']), '', fromfile) - if isfile: - tofile = PgLOG.join_paths(CINFO['tpath'], fname) - else: - tofile = CINFO['tpath'] + '/' - else: - tofile = RDACP['t'] - - return (1 if PgFile.copy_gdex_file(tofile, fromfile, RDACP['th'], RDACP['fh'], PgLOG.LGWNEX) else 0) +# main function to excecute this script +def main(): + object = RdaCp() + object.read_parameters() + object.start_actions() + object.pgexit(0) -# # call main() to start program -# if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/rdakill.py b/src/rda_python_miscs/rdakill.py index 2f86fdf..e758fbc 100644 --- a/src/rda_python_miscs/rdakill.py +++ b/src/rda_python_miscs/rdakill.py @@ -1,7 +1,5 @@ #!/usr/bin/env python3 -# ################################################################################## -# # Title: rdakill # Author: Zaihua Ji, zji@ucar.edu # Date: 10/24/2020 @@ -9,259 +7,225 @@ # https://github.com/NCAR/rda-utility-programs.git # Purpose: kill a local or batch process and its child processes for a given # running process ID by 'rdadata' -# # Github: https://github.com/NCAR/rda-python-miscs.git -# ################################################################################## -# import re import sys import time -from rda_python_common import PgLOG -from rda_python_common import PgSIG -from rda_python_common import PgUtil -from rda_python_common import PgFile -from rda_python_common import PgDBI - -RDAKILL = { - 'a' : None, # application name - 'h' : None, # hostname - 'p' : 0, # process id to be killed - 'P' : 0, # parent pid - 'r' : 0, # 1 - reserved for exclusive, working with -s PEND only - 'u' : None, # login user name - 's' : None, # batch status to kill - 'q' : None # batch partition/queue for SLURM/PBS, rda for default -} - -# -# main function to run the application -# -def main(): - - optcnt = 0 - option = None - argv = sys.argv[1:] - PgDBI.dssdb_dbname() - PgLOG.set_suid(PgLOG.PGLOG['EUID']) - PgLOG.set_help_path(__file__) - PgLOG.PGLOG['LOGFILE'] = "rdakill.log" # set different log file - PgLOG.cmdlog("rdakill {}".format(' '.join(argv))) - - for arg in argv: - ms = re.match(r'-([ahpPqstu])$', arg) - if ms: - option = ms.group(1) - elif re.match(r'-r$', arg): - RDAKILL['r'] = 1 - elif re.match(r'-\w+$', arg): - PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) - elif option: - if RDAKILL[option]: PgLOG.pglog("{}: value passed to Option -{} already".format(arg, option), PgLOG.LGEREX) - if 'pPt'.find(option) > -1: - RDAKILL[option] = int(arg) - elif option == 'h': - RDAKILL[option] = PgLOG.get_short_host(arg) - else: - RDAKILL[option] = arg - option = None - optcnt += 1 - else: - ms = re.match(r'^(\d+)$', arg) - if ms and RDAKILL['p']: - RDAKILL['p'] = int(ms.group(1)) # pid allow value only without leading option +from rda_python_common.pg_file import PgFile + +class RdaKill(PgFile): + + def __init__(self): + super().__init__() + self.RDAKILL = { + 'a' : None, # application name + 'h' : None, # hostname + 'p' : 0, # process id to be killed + 'P' : 0, # parent pid + 'r' : 0, # 1 - reserved for exclusive, working with -s PEND only + 'u' : None, # login user name + 's' : None, # batch status to kill + 'q' : None # batch partition/queue for SLURM/PBS, rda for default + } + + # function to read parameters + def read_parameters(self): + optcnt = 0 + option = None + argv = sys.argv[1:] + self.dssdb_dbname() + self.set_suid(self.PGLOG['EUID']) + self.set_help_path(__file__) + self.PGLOG['LOGFILE'] = "rdakill.log" # set different log file + self.cmdlog("rdakill {}".format(' '.join(argv))) + for arg in argv: + ms = re.match(r'-([ahpPqstu])$', arg) + if ms: + option = ms.group(1) + elif re.match(r'-r$', arg): + self.RDAKILL['r'] = 1 + elif re.match(r'-\w+$', arg): + self.pglog(arg + ": Unknown Option", self.LGEREX) + elif option: + if self.RDAKILL[option]: self.pglog("{}: value passed to Option -{} already".format(arg, option), self.LGEREX) + if 'pPt'.find(option) > -1: + self.RDAKILL[option] = int(arg) + elif option == 'h': + self.RDAKILL[option] = self.get_short_host(arg) + else: + self.RDAKILL[option] = arg + option = None optcnt += 1 else: - PgLOG.pglog(arg + ": pass in value without Option", PgLOG.LGEREX) + ms = re.match(r'^(\d+)$', arg) + if ms and self.RDAKILL['p']: + self.RDAKILL['p'] = int(ms.group(1)) # pid allow value only without leading option + optcnt += 1 + else: + self.pglog(arg + ": pass in value without Option", self.LGEREX) + if not optcnt: self.show_usage("rdakill") - if not optcnt: PgLOG.show_usage("rdakill") - killloc = 1 - if RDAKILL['h']: - PgFile.local_host_action(RDAKILL['h'], "kill processes", PgLOG.PGLOG['HOSTNAME'], PgLOG.LGEREX) - if not PgUtil.pgcmp(RDAKILL['h'], PgLOG.PGLOG['SLMNAME'], 1): - if not (RDAKILL['p'] or RDAKILL['s']): - PgLOG.pglog("Provide Batch ID or Job Status to kill SLURM jobs", PgLOG.LGEREX) - if RDAKILL['p']: - rdakill_slurm_batch(RDAKILL['p']) - else: - rdakill_slurm_status(RDAKILL['s'], RDAKILL['q'], RDAKILL['u']) - killloc = 0 - elif not PgUtil.pgcmp(RDAKILL['h'], PgLOG.PGLOG['PBSNAME'], 1): - if not (RDAKILL['p'] or RDAKILL['s']): - PgLOG.pglog("Provide Batch ID or Job Status to kill PBS jobs", PgLOG.LGEREX) - if RDAKILL['p']: - rdakill_pbs_batch(RDAKILL['p']) + # function to start actions + def start_actions(self): + killloc = 1 + if self.RDAKILL['h']: + self.local_host_action(self.RDAKILL['h'], "kill processes", self.PGLOG['HOSTNAME'], self.LGEREX) + if not self.pgcmp(self.RDAKILL['h'], self.PGLOG['SLMNAME'], 1): + if not (self.RDAKILL['p'] or self.RDAKILL['s']): + self.pglog("Provide Batch ID or Job Status to kill SLURM jobs", self.LGEREX) + if self.RDAKILL['p']: + self.dakill_slurm_batch(self.RDAKILL['p']) + else: + self.rdakill_slurm_status(self.RDAKILL['s'], self.RDAKILL['q'], self.RDAKILL['u']) + killloc = 0 + elif not self.pgcmp(self.RDAKILL['h'], self.PGLOG['PBSNAME'], 1): + if not (self.RDAKILL['p'] or self.RDAKILL['s']): + self.pglog("Provide Batch ID or Job Status to kill PBS jobs", self.LGEREX) + if self.RDAKILL['p']: + self.rdakill_pbs_batch(self.RDAKILL['p']) + else: + self.rdakill_pbs_status(self.RDAKILL['s'], self.RDAKILL['q'], self.RDAKILL['u']) + killloc = 0 + if killloc: + if not (self.RDAKILL['p'] or self.RDAKILL['P'] or self.RDAKILL['a']): + self.pglog("Specify process ID, parent PID or App Name to kill", self.LGEREX) + self.rdakill_processes(self.RDAKILL['p'], self.RDAKILL['P'], self.RDAKILL['a'], self.RDAKILL['u']) + self.cmdlog() + + # kill processes for given condition + def rdakill_processes(self, pid, ppid, aname = None, uname = None, level = 0): + kcnt = 0 + if pid: + cmd = "ps -p {} -f".format(pid) + elif ppid: + cmd = "ps --ppid {} -f".format(ppid) + elif uname: + cmd = "ps -u {} -f".format(uname) + else: + cmd = "ps -ef" + buf = self.pgsystem(cmd, self.LGWNEX, 20) + if buf: + for line in re.split('\n', buf): + ms = re.match(r'\s*(\w+)\s+(\d+)\s+(\d+)\s+(.*)$', line) + if ms: + uid = ms.group(1) + cid = int(ms.group(2)) + pcid = int(ms.group(3)) + cname = ms.group(4) + if pid and pid != cid: continue + if ppid and ppid != pcid: continue + if uname and not re.match(r'all$', uname, re.I) and uname != uid: continue + if aname and cname.find(aname) < 0: continue + kcnt += 1 + self.rdakill_processes(0, cid, None, None, level+1) + self.kill_local_child(cid, uid, re.sub(r' +', ' ', line)) + self.record_dscheck_interrupt(cid, self.PGLOG['HOSTNAME']) + if not (kcnt or level): + buf = "No process idendified to kill " + if self.RDAKILL['h']: + buf += "on " + self.RDAKILL['h'] else: - rdakill_pbs_status(RDAKILL['s'], RDAKILL['q'], RDAKILL['u']) - killloc = 0 - if killloc: - if not (RDAKILL['p'] or RDAKILL['P'] or RDAKILL['a']): - PgLOG.pglog("Specify process ID, parent PID or App Name to kill", PgLOG.LGEREX) - rdakill_processes(RDAKILL['p'], RDAKILL['P'], RDAKILL['a'], RDAKILL['u']) - - PgLOG.cmdlog() - PgLOG.pgexit(0) - -# -# kill processes for given condition -# -def rdakill_processes(pid, ppid, aname = None, uname = None, level = 0): - - kcnt = 0 - if pid: - cmd = "ps -p {} -f".format(pid) - elif ppid: - cmd = "ps --ppid {} -f".format(ppid) - elif uname: - cmd = "ps -u {} -f".format(uname) - else: - cmd = "ps -ef" - - buf = PgLOG.pgsystem(cmd, PgLOG.LGWNEX, 20) - if buf: - for line in re.split('\n', buf): - ms = re.match(r'\s*(\w+)\s+(\d+)\s+(\d+)\s+(.*)$', line) - if ms: - uid = ms.group(1) - cid = int(ms.group(2)) - pcid = int(ms.group(3)) - cname = ms.group(4) - if pid and pid != cid: continue - if ppid and ppid != pcid: continue - if uname and not re.match(r'all$', uname, re.I) and uname != uid: continue - if aname and cname.find(aname) < 0: continue - kcnt += 1 - rdakill_processes(0, cid, None, None, level+1) - kill_local_child(cid, uid, re.sub(r' +', ' ', line)) - record_dscheck_interrupt(cid, PgLOG.PGLOG['HOSTNAME']) - - if not (kcnt or level): - buf = "No process idendified to kill " - if RDAKILL['h']: - buf += "on " + RDAKILL['h'] + buf += "locally" + if self.PGLOG['CURBID']: buf += "; add Option '-h SLURM' if SLURM batch ID provided" + self.pglog(buf, self.LOGWRN) + + # a local child process + def kill_local_child(self, pid, uid, line): + if self.check_process(pid): + cmd = self.get_local_command("kill -9 {}".format(pid), uid) + if self.pgsystem(cmd, self.LOGWRN, 260): # 4+256 + return self.pglog("Kill: " + line, self.LOGWRN) + elif self.check_process(pid): + return self.pglog("Error Kill: {}\n{}".format(line, self.PGLOG['SYSERR']), self.LOGWRN) + if not self.check_process(pid): self.pglog("Quit: " + line, self.LOGWRN) + + # kill a slurm batch job + def rdakill_slurm_batch(self, bid): + ret = 0 + stat = self.check_slurm_status(bid, self.LOGWRN) + if stat: + cmd = self.get_local_command("scancel {}".format(bid), stat['USER']) + ret = self.pgsystem(cmd, self.LOGWRN, 6) + if ret: self.record_dscheck_interrupt(bid, self.PGLOG['SLMNAME']) else: - buf += "locally" - if PgLOG.PGLOG['CURBID']: buf += "; add Option '-h SLURM' if SLURM batch ID provided" - PgLOG.pglog(buf, PgLOG.LOGWRN) - -# -# a local child process -def kill_local_child(pid, uid, line): - - if PgSIG.check_process(pid): - cmd = PgLOG.get_local_command("kill -9 {}".format(pid), uid) - if PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 260): # 4+256 - return PgLOG.pglog("Kill: " + line, PgLOG.LOGWRN) - elif PgSIG.check_process(pid): - return PgLOG.pglog("Error Kill: {}\n{}".format(line, PgLOG.PGLOG['SYSERR']), PgLOG.LOGWRN) - - if not PgSIG.check_process(pid): PgLOG.pglog("Quit: " + line, PgLOG.LOGWRN) - -# -# kill a slurm batch job -# -def rdakill_slurm_batch(bid): - - ret = 0 - stat = PgSIG.check_slurm_status(bid, PgLOG.LOGWRN) - if stat: - cmd = PgLOG.get_local_command("scancel {}".format(bid), stat['USER']) - ret = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 6) - if ret: record_dscheck_interrupt(bid, PgLOG.PGLOG['SLMNAME']) - else: - PgLOG.pglog("{}: cannot find SLURM batch ID".format(bid), PgLOG.LOGERR) - - if not ret and PgLOG.PGLOG['SYSERR']: PgLOG.pglog(PgLOG.PGLOG['SYSERR'], PgLOG.LGEREX) + self.pglog("{}: cannot find SLURM batch ID".format(bid), self.LOGERR) + if not ret and self.PGLOG['SYSERR']: self.pglog(self.PGLOG['SYSERR'], self.LGEREX) + return ret - return ret - -# -# kill SLURM batch jobs for given status -# -def rdakill_slurm_status(stat, part, uname): + # kill SLURM batch jobs for given status + def rdakill_slurm_status(self, stat, part, uname): + if not part: part = 'rda' + bcmd = "sacct -o jobid,user,state -r {} -".format(part) + bcmd += ("u " + uname if uname else 'a') + lines = self.get_slurm_multiple(bcmd) + bcnt = len(lines['JOBID']) if lines else 0 + pcnt = kcnt = 0 + for i in range(bcnt): + if lines['STATE'][i] == stat: + pcnt += 1 + kcnt += self.rdakill_slurm_batch(lines['JOBID'][i]) + if pcnt > 0: + s = 's' if pcnt > 1 else '' + line = "{} of {} SLURM '{}' job{} Killed".format(kcnt, pcnt, stat, s) + else: + line = "No SLURM '{}' job found to kill".format(stat) + line += " in Partition '{}'".format(part) + if uname: line += " for " + uname + self.pglog(line, self.LOGWRN) - if not part: part = 'rda' - bcmd = "sacct -o jobid,user,state -r {} -".format(part) - bcmd += ("u " + uname if uname else 'a') - - lines = PgSIG.get_slurm_multiple(bcmd) - bcnt = len(lines['JOBID']) if lines else 0 - pcnt = kcnt = 0 - for i in range(bcnt): - if lines['STATE'][i] == stat: + # kill a pbs batch job + def rdakill_pbs_batch(self, bid): + ret = 0 + stat = self.get_pbs_info(bid, 0, self.LOGWRN) + if stat: + dcmd = 'qdel' + if self.PGLOG['HOSTTYPE'] == 'ch': dcmd += 'casper' + cmd = self.get_local_command("{} {}".format(dcmd, bid), stat['UserName']) + ret = self.pgsystem(cmd, self.LOGWRN, 7) + if ret: self.record_dscheck_interrupt(bid, self.PGLOG['PBSNAME']) + else: + self.pglog("{}: cannot find PBS batch ID".format(bid), self.LOGERR) + if not ret and self.PGLOG['SYSERR']: self.pglog(self.PGLOG['SYSERR'], self.LGEREX) + return ret + + # kill PBS batch jobs for given status + def rdakill_pbs_status(self, stat, queue, uname): + if not queue: queue = 'rda' + qopts = '' + if uname: + qopts = "-u " + uname + if qopts: qopts += ' ' + qopts += queue + lines = self.get_pbs_info(qopts, 1) + bcnt = len(lines['JobID']) + pcnt = kcnt = 0 + for i in range(bcnt): + if stat != lines['State'][i]: continue pcnt += 1 - kcnt += rdakill_slurm_batch(lines['JOBID'][i]) - - if pcnt > 0: - s = 's' if pcnt > 1 else '' - line = "{} of {} SLURM '{}' job{} Killed".format(kcnt, pcnt, stat, s) - else: - line = "No SLURM '{}' job found to kill".format(stat) - - line += " in Partition '{}'".format(part) - if uname: line += " for " + uname - PgLOG.pglog(line, PgLOG.LOGWRN) - -# -# kill a pbs batch job -# -def rdakill_pbs_batch(bid): - - ret = 0 - stat = PgSIG.get_pbs_info(bid, 0, PgLOG.LOGWRN) - if stat: - dcmd = 'qdel' - if PgLOG.PGLOG['HOSTTYPE'] == 'ch': dcmd += 'casper' - cmd = PgLOG.get_local_command("{} {}".format(dcmd, bid), stat['UserName']) - ret = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 7) - if ret: record_dscheck_interrupt(bid, PgLOG.PGLOG['PBSNAME']) - else: - PgLOG.pglog("{}: cannot find PBS batch ID".format(bid), PgLOG.LOGERR) - - if not ret and PgLOG.PGLOG['SYSERR']: PgLOG.pglog(PgLOG.PGLOG['SYSERR'], PgLOG.LGEREX) - - return ret - -# -# kill PBS batch jobs for given status -# -def rdakill_pbs_status(stat, queue, uname): - - if not queue: queue = 'rda' - qopts = '' - if uname: - qopts = "-u " + uname - if qopts: qopts += ' ' - qopts += queue - lines = PgSIG.get_pbs_info(qopts, 1) - bcnt = len(lines['JobID']) - pcnt = kcnt = 0 - for i in range(bcnt): - if stat != lines['State'][i]: continue - pcnt += 1 - kcnt += rdakill_pbs_batch(lines['JobID'][i]) - - if pcnt > 0: - s = 's' if pcnt > 1 else '' - line = "{} of {} PBS '{}' job{} Killed".format(kcnt, pcnt, stat, s) - else: - line = "No PBS '{}' job found to kill".format(stat) - - line += " in Queue '{}'".format(queue) - if uname: line += " for " + uname - PgLOG.pglog(line, PgLOG.LOGWRN) - -# -# record a dscheck -# -def record_dscheck_interrupt(pid, host): - - pgrec = PgDBI.pgget("dscheck", "cindex", "pid = {} AND hostname = '{}'".format(pid, host), PgLOG.LOGERR) - if pgrec: - record = {'chktime' : int(time.time()), 'status' : 'I', 'pid' : 0} # release lock - PgDBI.pgupdt("dscheck", record, "cindex = {}".format(pgrec['cindex']), PgLOG.LGEREX) + kcnt += self.rdakill_pbs_batch(lines['JobID'][i]) + if pcnt > 0: + s = 's' if pcnt > 1 else '' + line = "{} of {} PBS '{}' job{} Killed".format(kcnt, pcnt, stat, s) + else: + line = "No PBS '{}' job found to kill".format(stat) + line += " in Queue '{}'".format(queue) + if uname: line += " for " + uname + self.pglog(line, self.LOGWRN) + + # record a dscheck + def record_dscheck_interrupt(self, pid, host): + pgrec = self.pgget("dscheck", "cindex", "pid = {} AND hostname = '{}'".format(pid, host), self.LOGERR) + if pgrec: + record = {'chktime' : int(time.time()), 'status' : 'I', 'pid' : 0} # release lock + self.pgupdt("dscheck", record, "cindex = {}".format(pgrec['cindex']), self.LGEREX) + +# main function to excecute this script +def main(): + object = RdaKill() + object.read_parameters() + object.start_actions() + object.pgexit(0) -# # call main() to start program -# if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/rdamod.py b/src/rda_python_miscs/rdamod.py index 54e6cf0..1e527d0 100644 --- a/src/rda_python_miscs/rdamod.py +++ b/src/rda_python_miscs/rdamod.py @@ -1,7 +1,5 @@ #!/usr/bin/env python3 -# ################################################################################## -# # Title: rdamod # Author: Zaihua Ji, zji@ucar.edu # Date: 10/24/2020 @@ -9,164 +7,146 @@ # https://github.com/NCAR/rda-utility-programs.git # Purpose: change file/directory modes in given one or mutilple local directories # owned by 'rdadata' -# # Github: https://github.com/NCAR/rda-python-miscs.git -# ################################################################################## -# import re import os import sys from os import path as op -from rda_python_common import PgLOG -from rda_python_common import PgUtil -from rda_python_common import PgFile -from rda_python_common import PgDBI - -RDAMOD = { - 'd' : 0, # 1 to change directory mode - 'f' : 0, # 1 to change file mode - 'h' : 0, # 1 to show help message - 'r' : 0, # 1 if recursive all - 'R' : 0, # > 0 to set recursive limit - 'F' : 0o664, # to chnage file mode, default to 664 - 'D' : 0o775, # to chnge directory mode, default to 775 -} - -MINFO = { - 'files' : [], - 'curdir' : os.getcwd(), - 'tpath' : None, - 'dcnt' : 0, - 'fcnt' : 0 -} - -# -# main function to run the application -# -def main(): - - PgDBI.dssdb_dbname() - PgLOG.set_suid(PgLOG.PGLOG['EUID']) - PgLOG.set_help_path(__file__) - PgLOG.PGLOG['LOGFILE'] = "rdamod.log" # set different log file - argv = sys.argv[1:] - PgLOG.cmdlog("rdamod {} ({})".format(' '.join(argv), MINFO['curdir'])) - option = defopt = 'l' - for arg in argv: - ms = re.match(r'-(\w)$', arg) - if ms: - option = ms.group(1) - if option not in RDAMOD: PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) - if 'dfhr'.find(option) > -1: - RDAMOD[option] = 1 +from rda_python_common.pg_file import PgFile + +class RdaMod(PgFile): + + def __init__(self): + super().__init__() + self.RDAMOD = { + 'd' : 0, # 1 to change directory mode + 'f' : 0, # 1 to change file mode + 'h' : 0, # 1 to show help message + 'r' : 0, # 1 if recursive all + 'R' : 0, # > 0 to set recursive limit + 'F' : 0o664, # to chnage file mode, default to 664 + 'D' : 0o775, # to chnge directory mode, default to 775 + } + self.MINFO = { + 'files' : [], + 'curdir' : os.getcwd(), + 'tpath' : None, + 'dcnt' : 0, + 'fcnt' : 0 + } + + # function to read parameters + def read_parameters(self): + self.set_suid(self.PGLOG['EUID']) + self.set_help_path(__file__) + self.PGLOG['LOGFILE'] = "rdamod.log" # set different log file + argv = sys.argv[1:] + self.cmdlog("rdamod {} ({})".format(' '.join(argv), self.MINFO['curdir'])) + option = defopt = 'l' + for arg in argv: + ms = re.match(r'-(\w)$', arg) + if ms: + option = ms.group(1) + if option not in self.RDAMOD: self.pglog(arg + ": Unknown Option", self.LGEREX) + if 'dfhr'.find(option) > -1: + self.RDAMOD[option] = 1 + option = defopt + continue + if not option: self.pglog(arg + ": Value provided without option", self.LGEREX) + if option == 'l': + self.MINFO['files'].append(arg) + defopt = None + else: + if option == 'R': + self.RDAMOD[option] = int(arg) + elif 'FD'.find(option) > -1: + self.RDAMOD[option] = self.base2int(arg, 8) + else: + self.RDAMOD[option] = arg option = defopt - continue - if not option: PgLOG.pglog(arg + ": Value provided without option", PgLOG.LGEREX) - if option == 'l': - MINFO['files'].append(arg) - defopt = None + if self.RDAMOD['h'] or not self.MINFO['files']: self.show_usage("rdamod") + + # function to start actions + def start_actions(self): + self.dssdb_dbname() + if not (self.RDAMOD['d'] or self.RDAMOD['f']): + self.RDAMOD['d'] = self.RDAMOD['f'] = 1 # both directories and files as default + if not self.RDAMOD['R'] and self.RDAMOD['r']: self.RDAMOD['R'] = 1000 + self.validate_decs_group('rdamod', self.PGLOG['CURUID'], 1) + self.change_top_list(self.MINFO['files']) + if (self.MINFO['dcnt'] + self.MINFO['fcnt']) > 1: + msg = '' + if self.MINFO['dcnt'] > 0: + s = ('ies' if self.MINFO['dcnt'] else 'y') + msg = "{} Director{}".format(self.MINFO['dcnt'], s) + if self.MINFO['fcnt'] > 0: + s = ('s' if self.MINFO['fcnt'] > 1 else '') + if msg: msg += " & " + msg += "{} File{}".format(self.MINFO['fcnt'], s) + self.pglog("Total {} changed Mode".format(msg), self.LOGWRN) + elif (self.MINFO['dcnt'] + self.MINFO['fcnt']) == 0: + self.pglog((self.MINFO['tpath'] if self.MINFO['tpath'] else self.MINFO['curdir']) + ": No Mode changed", self.LOGWRN) + self.cmdlog() + + # change mode for the top level list + def change_top_list(self, files): + for file in files: + info = self.check_local_file(file, 6, self.LOGWRN) + if not info: + self.pglog(file + ": NOT exists", self.LOGERR) + continue + change = 1 + if not info['isfile'] and re.search(r'/$', file): + change = 0 # do not change the directory mode if it is ended by '/' + file = re.sub(r'/$', '', file, 1) + if not re.match(r'^/', file): file = self.join_paths(self.MINFO['curdir'], file) + self.MINFO['tpath'] = (op.dirname(file) if change else file) + "/" + if change: self.change_mode(file, info) + if not info['isfile'] and (self.RDAMOD['R'] > 0 or not change): + fs = self.local_glob(file, 6, self.LOGWRN) + self.change_list(fs, 1, file) + + # recursively change directory/file mode + def change_list(self, files, level, cdir): + fcnt = 0 + for file in files: + info = files[file] + fcnt += self.change_mode(file, info) + if not info['isfile'] and level < self.RDAMOD['R']: + fs = self.local_glob(file, 6, self.LOGWRN) + self.change_list(fs, level+1, file) + if fcnt > 1: # display sub count if two more files are changed mode + self.pglog("{}: {} Files changed Mode".format(cdir, fcnt), self.LOGWRN) + + # change mode of a single directory/file + def change_mode(self, file, info): + fname = re.sub(r'^{}'.format(self.MINFO['tpath']), '', file, 1) + if info['isfile']: + if not self.RDAMOD['d']: return 0 + fname = "F" + fname + mode = self.RDAMOD['F'] else: - if option == 'R': - RDAMOD[option] = int(arg) - elif 'FD'.find(option) > -1: - RDAMOD[option] = PgLOG.base2int(arg, 8) + if not self.RDAMOD['d']: return 0 + fname = "D" + fname + mode = self.RDAMOD['D'] + if info['logname'] != "rdadata": + return self.pglog("{}: owner {} not rdadata".format(fname, info['logname']), self.LOGERR) + if info['mode'] == mode: return 0 # no need change mode + if self.set_local_mode(file, info['isfile'], mode, info['mode'], info['logname'], self.LOGWRN): + if info['isfile']: + self.MINFO['fcnt'] += 1 + return 1 else: - RDAMOD[option] = arg - option = defopt - - if RDAMOD['h'] or not MINFO['files']: PgLOG.show_usage("rdamod") - if not (RDAMOD['d'] or RDAMOD['f']): - RDAMOD['d'] = RDAMOD['f'] = 1 # both directories and files as default - if not RDAMOD['R'] and RDAMOD['r']: RDAMOD['R'] = 1000 - PgDBI.validate_decs_group('rdamod', PgLOG.PGLOG['CURUID'], 1) - - change_top_list(MINFO['files']) - - if (MINFO['dcnt'] + MINFO['fcnt']) > 1: - msg = '' - if MINFO['dcnt'] > 0: - s = ('ies' if MINFO['dcnt'] else 'y') - msg = "{} Director{}".format(MINFO['dcnt'], s) - if MINFO['fcnt'] > 0: - s = ('s' if MINFO['fcnt'] > 1 else '') - if msg: msg += " & " - msg += "{} File{}".format(MINFO['fcnt'], s) - PgLOG.pglog("Total {} changed Mode".format(msg), PgLOG.LOGWRN) - elif (MINFO['dcnt'] + MINFO['fcnt']) == 0: - PgLOG.pglog((MINFO['tpath'] if MINFO['tpath'] else MINFO['curdir']) + ": No Mode changed", PgLOG.LOGWRN) - - PgLOG.cmdlog() - PgLOG.pgexit(0) - -# -# change mode for the top level list -# -def change_top_list(files): - - for file in files: - info = PgFile.check_local_file(file, 6, PgLOG.LOGWRN) - if not info: - PgLOG.pglog(file + ": NOT exists", PgLOG.LOGERR) - continue + self.MINFO['dcnt'] += 1 + return 0 - change = 1 - if not info['isfile'] and re.search(r'/$', file): - change = 0 # do not change the directory mode if it is ended by '/' - file = re.sub(r'/$', '', file, 1) - - if not re.match(r'^/', file): file = PgLOG.join_paths(MINFO['curdir'], file) - MINFO['tpath'] = (op.dirname(file) if change else file) + "/" - if change: change_mode(file, info) - if not info['isfile'] and (RDAMOD['R'] > 0 or not change): - fs = PgFile.local_glob(file, 6, PgLOG.LOGWRN) - change_list(fs, 1, file) - -# -# recursively change directory/file mode -# -def change_list(files, level, cdir): - - fcnt = 0 - - for file in files: - info = files[file] - fcnt += change_mode(file, info) - if not info['isfile'] and level < RDAMOD['R']: - fs = PgFile.local_glob(file, 6, PgLOG.LOGWRN) - change_list(fs, level+1, file) - - if fcnt > 1: # display sub count if two more files are changed mode - PgLOG.pglog("{}: {} Files changed Mode".format(cdir, fcnt), PgLOG.LOGWRN) - -# -# change mode of a single directory/file -# -def change_mode(file, info): - - fname = re.sub(r'^{}'.format(MINFO['tpath']), '', file, 1) - if info['isfile']: - if not RDAMOD['d']: return 0 - fname = "F" + fname - mode = RDAMOD['F'] - else: - if not RDAMOD['d']: return 0 - fname = "D" + fname - mode = RDAMOD['D'] - - if info['logname'] != "rdadata": - return PgLOG.pglog("{}: owner {} not rdadata".format(fname, info['logname']), PgLOG.LOGERR) - if info['mode'] == mode: return 0 # no need change mode - - if PgFile.set_local_mode(file, info['isfile'], mode, info['mode'], info['logname'], PgLOG.LOGWRN): - if info['isfile']: - MINFO['fcnt'] += 1 - return 1 - else: - MINFO['dcnt'] += 1 - return 0 +# main function to excecute this script +def main(): + object = RdaMod() + object.read_parameters() + object.start_actions() + object.pgexit(0) -# # call main() to start program -# if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/rdaown.py b/src/rda_python_miscs/rdaown.py index 7040dbd..4f8aed8 100644 --- a/src/rda_python_miscs/rdaown.py +++ b/src/rda_python_miscs/rdaown.py @@ -1,7 +1,5 @@ #!/usr/bin/env python3 -# ################################################################################## -# # Title: rdaown # Author: Zaihua Ji, zji@ucar.edu # Date: 10/24/2020 @@ -10,160 +8,143 @@ # Purpose: change file/directory ownership to 'rdadata' in given one or mutilple # local directories that are owned by decs specialists. it needs # super user privilege to execute. -# # Github: https://github.com/NCAR/rda-python-miscs.git -# ################################################################################## -# import re import os import sys import glob from os import path as op -from rda_python_common import PgLOG -from rda_python_common import PgUtil -from rda_python_common import PgFile -from rda_python_common import PgDBI - -RDAOWN = { - 'd' : 0, # 1 to change directory owner - 'f' : 0, # 1 to change file owner - 'h' : 0, # 1 to show help message - 'r' : 0, # 1 if recursive all - 'R' : 0, # > 0 to set recursive limit - 'F' : 0o664, # to change file mode, default to 664 - 'D' : 0o775, # to change directory mode, default to 775 -} - -OINFO = { - 'files' : [], - 'curdir' : os.getcwd(), - 'tpath' : None, - 'dcnt' : 0, - 'fcnt' : 0 -} - -# -# main function to run the application -# -def main(): - - argv = sys.argv[1:] - PgDBI.dssdb_scname() - PgLOG.set_help_path(__file__) - PgLOG.PGLOG['LOGFILE'] = "rdaown.log" # set different log file - PgLOG.cmdlog("rdaown {} ({})".format(' '.join(argv), OINFO['curdir'])) - option = defopt = 'l' - for arg in argv: - ms = re.match(r'-(\w+)$', arg) - if ms: - option = ms.group(1) - if option not in RDAOWN: PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) - if 'dfhr'.find(option) > -1: - RDAOWN[option] = 1 +from rda_python_common.pg_file import PgFile + +class RdaOwn(PgFile): + + def __init__(self): + super().__init__() + self.RDAOWN = { + 'd' : 0, # 1 to change directory owner + 'f' : 0, # 1 to change file owner + 'h' : 0, # 1 to show help message + 'r' : 0, # 1 if recursive all + 'R' : 0, # > 0 to set recursive limit + 'F' : 0o664, # to change file mode, default to 664 + 'D' : 0o775, # to change directory mode, default to 775 + } + self.OINFO = { + 'files' : [], + 'curdir' : os.getcwd(), + 'tpath' : None, + 'dcnt' : 0, + 'fcnt' : 0 + } + + # function to read paramters + def read_parameters(self): + argv = sys.argv[1:] + self.set_help_path(__file__) + self.PGLOG['LOGFILE'] = "rdaown.log" # set different log file + self.cmdlog("rdaown {} ({})".format(' '.join(argv), self.OINFO['curdir'])) + option = defopt = 'l' + for arg in argv: + ms = re.match(r'-(\w+)$', arg) + if ms: + option = ms.group(1) + if option not in self.RDAOWN: self.pglog(arg + ": Unknown Option", self.LGEREX) + if 'dfhr'.find(option) > -1: + self.RDAOWN[option] = 1 + option = defopt + continue + if not option: self.pglog(arg + ": Value provided without option", self.LGEREX) + if option == 'R': + self.RDAOWN['R'] = int(arg) option = defopt - continue - if not option: PgLOG.pglog(arg + ": Value provided without option", PgLOG.LGEREX) - if option == 'R': - RDAOWN['R'] = int(arg) - option = defopt - else: - OINFO['files'].append(arg) - defopt = None - - if RDAOWN['h'] or not OINFO['files']: PgLOG.show_usage("rdaown") - if PgLOG.PGLOG['CURUID'] != "root": - PgLOG.pglog(PgLOG.PGLOG['CURUID'] + ": you must execute 'rdaown' as 'root'!", PgLOG.LGEREX) - if not (RDAOWN['d'] or RDAOWN['f']): - RDAOWN['d'] = RDAOWN['f'] = 1 # list both directories and files as default - if not RDAOWN['R'] and RDAOWN['r']: RDAOWN['R'] = 1000 - - change_top_list(OINFO['files']) - - if (OINFO['dcnt'] + OINFO['fcnt']) > 1: - msg = "" - if OINFO['dcnt'] > 0: - s = ("ies" if OINFO['dcnt'] > 1 else "y") - msg = "{} Director{}".format(OINFO['dcnt'], s) - if OINFO['fcnt'] > 0: - s = ('s' if OINFO['fcnt'] > 1 else '') - if msg: msg += " & " - msg += "{} File{}".format(OINFO['fcnt'], s) - PgLOG.pglog("Total {} changed owner".format(msg), PgLOG.LOGWRN) - elif (OINFO['dcnt'] + OINFO['fcnt']) == 0: - PgLOG.pglog((OINFO['tpath'] if OINFO['tpath'] else OINFO['curdir']) + ": No Owner changed", PgLOG.LOGWRN) - - PgLOG.cmdlog() - PgLOG.pgexit(0) - -# -# change owner for the top level list -# -def change_top_list(files): + else: + self.OINFO['files'].append(arg) + defopt = None + if self.RDAOWN['h'] or not self.OINFO['files']: self.show_usage("rdaown") + if self.PGLOG['CURUID'] != "root": + self.pglog(self.PGLOG['CURUID'] + ": you must execute 'rdaown' as 'root'!", self.LGEREX) + if not (self.RDAOWN['d'] or self.RDAOWN['f']): + self.RDAOWN['d'] = self.RDAOWN['f'] = 1 # list both directories and files as default + if not self.RDAOWN['R'] and self.RDAOWN['r']: self.RDAOWN['R'] = 1000 - for file in files: - info = PgFile.check_local_file(file, 2, PgLOG.LOGWRN) - if not info: - PgLOG.pglog(file + ": NOT exists", PgLOG.LOGERR) - continue - change = 1 - if not info['isfile'] and re.search(r'/$', file): - change = 0 # do not change the directory owner if it is ended by '/' - file = re.sub(r'/$', '', file, 1) - - if not re.match(r'^/', file): file = PgLOG.join_paths(OINFO['curdir'], file) - OINFO['tpath'] = (op.dirname(file) if change else file) + "/" - if change: change_owner(file, info) - if not info['isfile'] and (RDAOWN['R'] or not change): - fs = glob.glob(file + "/*") - change_list(fs, 1, file) - -# -# recursively change directory/file owner -# -def change_list(files, level, cdir): - - fcnt = 0 - for file in files: - info = PgFile.check_local_file(file, 2, PgLOG.LOGWRN) - if not info: continue # should not happen - fcnt += change_owner(file, info) - if not info['isfile'] and level < RDAOWN['R']: - fs = glob.glob(file + "/*") - change_list(fs, level+1, file) - - if fcnt > 1: # display sub count if two more files are changed mode - PgLOG.pglog("{}: {} Files changed owner in the directory".format(cdir, fcnt), PgLOG.LOGWRN) - -# -# change owner for a single directory/file -# -def change_owner(file, info): - - fname = re.sub(r'^{}'.format(OINFO['tpath']), '', file, 1) - if info['isfile']: - if not RDAOWN['f']: return 0 - fname = "F" + fname - else: - if not RDAOWN['d']: return 0 - fname = "D" + fname - - if info['logname'] == "rdadata": return 0 - if not PgLOG.pgget("dssgrp", "", "logname = '{}'".format(info['logname']), PgLOG.LGEREX): - return PgLOG.pglog("{}: owner {} not a DECS Specialist!".format(fname, info['logname']), PgLOG.LOGERR) + # function to start actions + def start_actions(self): + self.dssdb_scname() + self.change_top_list(self.OINFO['files']) + if (self.OINFO['dcnt'] + self.OINFO['fcnt']) > 1: + msg = "" + if self.OINFO['dcnt'] > 0: + s = ("ies" if self.OINFO['dcnt'] > 1 else "y") + msg = "{} Director{}".format(self.OINFO['dcnt'], s) + if self.OINFO['fcnt'] > 0: + s = ('s' if self.OINFO['fcnt'] > 1 else '') + if msg: msg += " & " + msg += "{} File{}".format(self.OINFO['fcnt'], s) + self.pglog("Total {} changed owner".format(msg), self.LOGWRN) + elif (self.OINFO['dcnt'] + self.OINFO['fcnt']) == 0: + self.pglog((self.OINFO['tpath'] if self.OINFO['tpath'] else self.OINFO['curdir']) + ": No Owner changed", self.LOGWRN) + self.cmdlog() + + # change owner for the top level list + def change_top_list(self, files): + for file in files: + info = self.check_local_file(file, 2, self.LOGWRN) + if not info: + self.pglog(file + ": NOT exists", self.LOGERR) + continue + change = 1 + if not info['isfile'] and re.search(r'/$', file): + change = 0 # do not change the directory owner if it is ended by '/' + file = re.sub(r'/$', '', file, 1) + if not re.match(r'^/', file): file = self.join_paths(self.OINFO['curdir'], file) + self.OINFO['tpath'] = (op.dirname(file) if change else file) + "/" + if change: self.change_owner(file, info) + if not info['isfile'] and (self.RDAOWN['R'] or not change): + fs = glob.glob(file + "/*") + self.change_list(fs, 1, file) + + # recursively change directory/file owner + def change_list(self, files, level, cdir): + fcnt = 0 + for file in files: + info = self.check_local_file(file, 2, self.LOGWRN) + if not info: continue # should not happen + fcnt += self.change_owner(file, info) + if not info['isfile'] and level < self.RDAOWN['R']: + fs = glob.glob(file + "/*") + self.change_list(fs, level+1, file) + if fcnt > 1: # display sub count if two more files are changed mode + self.pglog("{}: {} Files changed owner in the directory".format(cdir, fcnt), self.LOGWRN) - if PgLOG.pgsystem("su root -c 'chown rdadata {}'".format(file), PgLOG.LOGWRN, 4): - PgLOG.pglog("{}: {} => rdadata".format(fname, info['logname']), PgLOG.LOGWRN) + # change owner for a single directory/file + def change_owner(self, file, info): + fname = re.sub(r'^{}'.format(self.OINFO['tpath']), '', file, 1) if info['isfile']: - OINFO['fcnt'] += 1 - return 1 + if not self.RDAOWN['f']: return 0 + fname = "F" + fname else: - OINFO['dcnt'] += 1 - return 0 - - return PgLOG.pglog("{}: Error change owner {} to rdadata".format(fname, info['logname']), PgLOG.LOGERR) + if not self.RDAOWN['d']: return 0 + fname = "D" + fname + if info['logname'] == "rdadata": return 0 + if not self.pgget("dssgrp", "", "logname = '{}'".format(info['logname']), self.LGEREX): + return self.pglog("{}: owner {} not a DECS Specialist!".format(fname, info['logname']), self.LOGERR) + if self.pgsystem("su root -c 'chown rdadata {}'".format(file), self.LOGWRN, 4): + self.pglog("{}: {} => rdadata".format(fname, info['logname']), self.LOGWRN) + if info['isfile']: + self.OINFO['fcnt'] += 1 + return 1 + else: + self.OINFO['dcnt'] += 1 + return 0 + return self.pglog("{}: Error change owner {} to rdadata".format(fname, info['logname']), self.LOGERR) + +# main function to excecute this script +def main(): + object = RdaOwn() + object.read_parameters() + object.start_actions() + object.pgexit(0) -# # call main() to start program -# if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/rdaps.py b/src/rda_python_miscs/rdaps.py index bc0920b..b1f606f 100644 --- a/src/rda_python_miscs/rdaps.py +++ b/src/rda_python_miscs/rdaps.py @@ -1,194 +1,173 @@ #!/usr/bin/env python3 -# ################################################################################## -# # Title: rdaps # Author: Zaihua Ji, zji@ucar.edu # Date: 10/24/2020 # 2025-03-10 transferred to package rda_python_miscs from # https://github.com/NCAR/rda-utility-programs.git # Purpose: run ps against running process ID locally or remotely -# # Github: https://github.com/NCAR/rda-python-miscs.git -# ################################################################################## -# import re import os import sys -from rda_python_common import PgLOG -from rda_python_common import PgSIG -from rda_python_common import PgUtil -from rda_python_common import PgFile -from rda_python_common import PgDBI - -RDAPS = { - 'a' : None, # application name - 'h' : None, # remote hostname - 'p' : 0, # process id to be checked - 'P' : 0, # parent process id to be checked - 'u' : None, # login user name -} - -# -# main function to run the application -# -def main(): - - optcnt = 0 - argv = sys.argv[1:] - PgDBI.dssdb_dbname() - PgLOG.set_suid(PgLOG.PGLOG['EUID']) - PgLOG.set_help_path(__file__) - PgLOG.PGLOG['LOGFILE'] = "rdaps.log" # set different log file - PgLOG.cmdlog("rdaps {}".format(' '.join(argv))) - - for arg in argv: - ms = re.match(r'-([ahpPtu])$', arg) - if ms: - option = ms.group(1) - elif re.match(r'-\w+$', arg): - PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) - elif option: - if RDAPS[option]: PgLOG.pglog("{}: value passed to Option -{} already".format(arg, option), PgLOG.LGEREX) - if 'pPt'.find(option) > -1: - RDAPS[option] = int(arg) - elif option == 'h': - RDAPS[option] = PgLOG.get_short_host(arg) - else: - RDAPS[option] = arg - option = None - optcnt += 1 - else: - ms = re.match(r'^(\d+)$', arg) - if ms and not RDAPS['p']: - RDAPS['p'] = int(ms.group(1)) # pid allow value only without leading option +from rda_python_common.pg_file import PgFile + +class RdaPs(PgFile): + + def __init__(self): + super().__init__() + self.RDAPS = { + 'a' : None, # application name + 'h' : None, # remote hostname + 'p' : 0, # process id to be checked + 'P' : 0, # parent process id to be checked + 'u' : None, # login user name + } + + # function to read parameters + def read_parameters(self): + optcnt = 0 + argv = sys.argv[1:] + self.set_suid(self.PGLOG['EUID']) + self.set_help_path(__file__) + self.PGLOG['LOGFILE'] = "rdaps.log" # set different log file + self.cmdlog("rdaps {}".format(' '.join(argv))) + for arg in argv: + ms = re.match(r'-([ahpPtu])$', arg) + if ms: + option = ms.group(1) + elif re.match(r'-\w+$', arg): + self.pglog(arg + ": Unknown Option", self.LGEREX) + elif option: + if self.RDAPS[option]: self.pglog("{}: value passed to Option -{} already".format(arg, option), self.LGEREX) + if 'pPt'.find(option) > -1: + self.RDAPS[option] = int(arg) + elif option == 'h': + self.RDAPS[option] = self.get_short_host(arg) + else: + self.RDAPS[option] = arg + option = None optcnt += 1 else: - PgLOG.pglog(arg + ": Value passed in without Option", PgLOG.LGEREX) + ms = re.match(r'^(\d+)$', arg) + if ms and not self.RDAPS['p']: + self.RDAPS['p'] = int(ms.group(1)) # pid allow value only without leading option + optcnt += 1 + else: + self.pglog(arg + ": Value passed in without Option", self.LGEREX) + if not optcnt: self.show_usage("rdaps") - if not optcnt: PgLOG.show_usage("rdaps") - chkloc = 1 - if RDAPS['h']: - PgFile.local_host_action(RDAPS['h'], "check processes", PgLOG.PGLOG['HOSTNAME'], PgLOG.LGEREX) - if not PgUtil.pgcmp(RDAPS['h'], PgLOG.PGLOG['SLMNAME'], 1): - slurm_snapshot() - chkloc = 0 - elif not PgUtil.pgcmp(RDAPS['h'], PgLOG.PGLOG['PBSNAME'], 1): - pbs_snapshot() - chkloc = 0 - if chkloc: process_snapshot() + # function to start actions + def start_actions(self): + self.dssdb_dbname() + chkloc = 1 + if self.RDAPS['h']: + self.local_host_action(self.RDAPS['h'], "check processes", self.PGLOG['HOSTNAME'], self.LGEREX) + if not self.pgcmp(self.RDAPS['h'], self.PGLOG['SLMNAME'], 1): + self.slurm_snapshot() + chkloc = 0 + elif not self.pgcmp(self.RDAPS['h'], self.PGLOG['PBSNAME'], 1): + self.pbs_snapshot() + chkloc = 0 + if chkloc: self.process_snapshot() + self.cmdlog() - PgLOG.cmdlog() - PgLOG.pgexit(0) - -# -# get a snapshot of a process status -# -def process_snapshot(): - - if RDAPS['p']: - cmd = "ps -p {} -f".format(RDAPS['p']) - elif RDAPS['P']: - cmd = "ps --ppid {} -f".format(RDAPS['P']) - elif RDAPS['u']: - cmd = "ps -u {} -f".format(RDAPS['u']) - else: - cmd = "ps -ef" - - buf = PgLOG.pgsystem(cmd, PgLOG.LGWNEX, 20) - - for line in re.split('\n', buf): - ms = re.match(r'\s*(\w+)\s+(\d+)\s+(\d+)\s+(.*)$', line) - if ms: - uid = ms.group(1) - pid = int(ms.group(2)) - ppid = int(ms.group(3)) - aname = ms.group(4) - if RDAPS['u'] and RDAPS['u'] != uid: continue - if RDAPS['p'] and RDAPS['p'] != pid: continue - if RDAPS['P'] and RDAPS['P'] != ppid: continue - if RDAPS['a'] and aname.find(RDAPS['a']) < 0: continue - PgLOG.pglog(re.sub(r' +', ' ', line), PgLOG.LOGWRN) - -# -# get a snapshot of a SLURM batch process status -# -def slurm_snapshot(): - - qopts = '' - if RDAPS['u']: qopts += " -u " + RDAPS['u'] - if RDAPS['p']: - qopts += " -j {}".format(RDAPS['p']) - else: - qopts = " -p rda" - cmd = "squeue -l" + qopts - - buf = PgLOG.pgsystem(cmd, PgLOG.LOGWRN, 272) - if not buf: - if PgLOG.PGLOG['SYSERR'] and PgLOG.PGLOG['SYSERR'].find('Invalid job id specified') < 0: - PgLOG.pglog(PgLOG.PGLOG['SYSERR'], PgLOG.LGEREX) - return - - lines = re.split(r'\n', buf) - lcnt = len(lines) - if lcnt < 3: return - dochk = 1 - for line in lines: - if not line: continue - if dochk: - if re.match(r'^\s*JOBID\s', line): dochk = 0 + # get a snapshot of a process status + def process_snapshot(self): + if self.RDAPS['p']: + cmd = "ps -p {} -f".format(self.RDAPS['p']) + elif self.RDAPS['P']: + cmd = "ps --ppid {} -f".format(self.RDAPS['P']) + elif self.RDAPS['u']: + cmd = "ps -u {} -f".format(self.RDAPS['u']) else: - vals = re.split(r'\s+', PgLOG.pgtrim(line)) - if RDAPS['a'] and vals[2] and RDAPS['a'] != vals[2]: continue - # move user name to front - val = vals[3] - vals[3] = vals[2] - vals[2] = vals[1] - vals[1] = vals[0] - vals[0] = val - PgLOG.pglog(' '.join(vals), PgLOG.LOGWRN) - -# -# get a snapshot of a PBS batch process status -# -def pbs_snapshot(): - - qopts = '' - if RDAPS['u']: - qopts = "-u {}".format(RDAPS['u']) - if RDAPS['p']: - if qopts: qopts += ' ' - qopts += str(RDAPS['p']) - if not qopts: qopts = 'rda' - - stat = PgSIG.get_pbs_info(qopts, 1, PgLOG.LOGWRN) - if not stat: - if PgLOG.PGLOG['SYSERR']: PgLOG.pglog(PgLOG.PGLOG['SYSERR'], PgLOG.LGEREX) - return - - lcnt = len(stat['JobID']) - - ckeys = list(stat.keys()) - kcnt = len(ckeys) - # moving 'UserName' to the first - for i in range(kcnt): - if i > 0 and ckeys[i] == 'UserName': - j = i - while j > 0: - ckeys[j] = ckeys[j-1] - j -= 1 - ckeys[0] = 'UserName' - break - - for i in range(lcnt): - if RDAPS['a'] and stat['JobName'] and RDAPS['a'] != stat['JobName']: continue - vals = [] - for k in ckeys: - vals.append(stat[k][i]) - PgLOG.pglog(' '.join(vals), PgLOG.LOGWRN) + cmd = "ps -ef" + buf = self.pgsystem(cmd, self.LGWNEX, 20) + for line in re.split('\n', buf): + ms = re.match(r'\s*(\w+)\s+(\d+)\s+(\d+)\s+(.*)$', line) + if ms: + uid = ms.group(1) + pid = int(ms.group(2)) + ppid = int(ms.group(3)) + aname = ms.group(4) + if self.RDAPS['u'] and self.RDAPS['u'] != uid: continue + if self.RDAPS['p'] and self.RDAPS['p'] != pid: continue + if self.RDAPS['P'] and self.RDAPS['P'] != ppid: continue + if self.RDAPS['a'] and aname.find(self.RDAPS['a']) < 0: continue + self.pglog(re.sub(r' +', ' ', line), self.LOGWRN) + + # get a snapshot of a SLURM batch process status + def slurm_snapshot(self): + qopts = '' + if self.RDAPS['u']: qopts += " -u " + self.RDAPS['u'] + if self.RDAPS['p']: + qopts += " -j {}".format(self.RDAPS['p']) + else: + qopts = " -p rda" + cmd = "squeue -l" + qopts + buf = self.pgsystem(cmd, self.LOGWRN, 272) + if not buf: + if self.PGLOG['SYSERR'] and self.PGLOG['SYSERR'].find('Invalid job id specified') < 0: + self.pglog(self.PGLOG['SYSERR'], self.LGEREX) + return + lines = re.split(r'\n', buf) + lcnt = len(lines) + if lcnt < 3: return + dochk = 1 + for line in lines: + if not line: continue + if dochk: + if re.match(r'^\s*JOBID\s', line): dochk = 0 + else: + vals = re.split(r'\s+', self.pgtrim(line)) + if self.RDAPS['a'] and vals[2] and self.RDAPS['a'] != vals[2]: continue + # move user name to front + val = vals[3] + vals[3] = vals[2] + vals[2] = vals[1] + vals[1] = vals[0] + vals[0] = val + self.pglog(' '.join(vals), self.LOGWRN) + + # get a snapshot of a PBS batch process status + def pbs_snapshot(self): + qopts = '' + if self.RDAPS['u']: + qopts = "-u {}".format(self.RDAPS['u']) + if self.RDAPS['p']: + if qopts: qopts += ' ' + qopts += str(self.RDAPS['p']) + if not qopts: qopts = 'rda' + stat = self.get_pbs_info(qopts, 1, self.LOGWRN) + if not stat: + if self.PGLOG['SYSERR']: self.pglog(self.PGLOG['SYSERR'], self.LGEREX) + return + lcnt = len(stat['JobID']) + ckeys = list(stat.keys()) + kcnt = len(ckeys) + # moving 'UserName' to the first + for i in range(kcnt): + if i > 0 and ckeys[i] == 'UserName': + j = i + while j > 0: + ckeys[j] = ckeys[j-1] + j -= 1 + ckeys[0] = 'UserName' + break + for i in range(lcnt): + if self.RDAPS['a'] and stat['JobName'] and self.RDAPS['a'] != stat['JobName']: continue + vals = [] + for k in ckeys: + vals.append(stat[k][i]) + self.pglog(' '.join(vals), self.LOGWRN) + +# main function to excecute this script +def main(): + object = RdaPs() + object.read_parameters() + object.start_actions() + object.pgexit(0) -# # call main() to start program -# if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/rdasub.py b/src/rda_python_miscs/rdasub.py index 419e150..37cb676 100644 --- a/src/rda_python_miscs/rdasub.py +++ b/src/rda_python_miscs/rdasub.py @@ -1,94 +1,84 @@ #!/usr/bin/env python3 -# ################################################################################## -# # Title: rdasub # Author: Zaihua Ji, zji@ucar.edu # Date: 03/51/2021 # 2025-03-10 transferred to package rda_python_miscs from # https://github.com/NCAR/rda-utility-programs.git # Purpose: python script to submit a nohup bachground execution -# # Github: https://github.com/NCAR/rda-python-miscs.git -# ################################################################################## import os import sys import re import time -from rda_python_common import PgLOG -from rda_python_common import PgFile -from rda_python_common import PgUtil +from rda_python_common.pg_file import PgFile -# -# main function to excecute this script -# -def main(): +class RdaSub(PgFile): + + def __init__(self): + super().__init__() + self.coptions = {'cmd' : None, 'cwd' : None, 'env' : None} # customized options + self.args = None +# function to read parameters +def read_parameters(self): aname = 'rdasub' - PgLOG.set_help_path(__file__) - coptions = {'cmd' : None, 'cwd' : None, 'env' : None} # customized options - copts = '|'.join(coptions) + self.set_help_path(__file__) + copts = '|'.join(self.coptions) option = None argv = sys.argv[1:] - if not argv: PgLOG.show_usage(aname) - PgLOG.PGLOG['LOGFILE'] = aname + ".log" - PgLOG.cmdlog("{} {}".format(aname, ' '.join(argv))) - + if not argv: self.show_usage(aname) + self.PGLOG['LOGFILE'] = aname + ".log" + self.cmdlog("{} {}".format(aname, ' '.join(argv))) while argv: arg = argv.pop(0) if arg == "-b": - PgLOG.PGLOG['BCKGRND'] = 1 + self.PGLOG['BCKGRND'] = 1 option = None continue ms = re.match(r'^-({})$'.format(copts), arg) if ms: option = ms.group(1) continue - if not option: PgLOG.pglog("{}: Value passed in without leading option for {}".format(arg, aname), PgLOG.LGEREX) + if not option: self.pglog("{}: Value passed in without leading option for {}".format(arg, aname), self.LGEREX) if arg.find(' ') > -1 and not re.match(r'^[\'\"].*[\'\"]$', arg): # quote string with space but not quoted yet if arg.find("'") > -1: arg = '"{}"'.format(arg) else: arg = "'{}'".format(arg) - - coptions[option] = arg + self.coptions[option] = arg if option == "cmd": break option = None + if not self.coptions['cmd']: self.pglog(aname + ": specify command via option -cmd to run", self.LGWNEX) + self.args = self.argv_to_string(argv, 0) # append command options - if not coptions['cmd']: PgLOG.pglog(aname + ": specify command via option -cmd to run", PgLOG.LGWNEX) - args = PgLOG.argv_to_string(argv, 0) # append command options - msg = "{}-{}{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.PGLOG['CURUID'], PgLOG.current_datetime()) - if coptions['cwd']: - if coptions['cwd'].find('$'): coptions['cwd'] = PgLOG.replace_environments(coptions['cwd'], '', PgLOG.LGWNEX) - msg += "-" + coptions['cwd'] - PgFile.change_local_directory(coptions['cwd'], PgLOG.LGEREX) +# function to start actions +def start_actions(self): + msg = "{}-{}{}".format(self.PGLOG['HOSTNAME'], self.PGLOG['CURUID'], self.current_datetime()) + if self.coptions['cwd']: + if self.coptions['cwd'].find('$'): self.coptions['cwd'] = self.replace_environments(self.coptions['cwd'], '', self.LGWNEX) + msg += "-" + self.coptions['cwd'] + self.change_local_directory(self.coptions['cwd'], self.LGEREX) else: - coptions['cwd'] = PgLOG.PGLOG['CURDIR'] - cmd = PgLOG.valid_command(coptions['cmd']) - if not cmd and not re.match(r'^/', coptions['cmd']): cmd = PgLOG.valid_command('./' + coptions['cmd']) - if not cmd: PgLOG.pglog(coptions['cmd'] + ": Cannot find given command to run", PgLOG.LGWNEX) - if args: cmd += " " + args - + self.coptions['cwd'] = self.PGLOG['CURDIR'] + cmd = self.valid_command(self.coptions['cmd']) + if not cmd and not re.match(r'^/', self.coptions['cmd']): cmd = self.valid_command('./' + self.coptions['cmd']) + if not cmd: self.pglog(self.coptions['cmd'] + ": Cannot find given command to run", self.LGWNEX) + if self.args: cmd += " " + self.args msg += ": " + cmd - PgLOG.pglog(msg, PgLOG.LOGWRN) + self.pglog(msg, self.LOGWRN) os.system("nohup " + cmd + " > /dev/null 2>&1 &") - display_process_info(coptions['cmd'], cmd) + self.display_process_info(self.coptions['cmd'], cmd) - sys.exit(0) - -# # display the the most recent matching process info -# -def display_process_info(cname, cmd): - +def display_process_info(self, cname, cmd): ctime = time.time() RTIME = PID = 0 - pscmd = "ps -u {},{} -f | grep {} | grep ' 1 ' | grep -v ' grep '".format(PgLOG.PGLOG['CURUID'], PgLOG.PGLOG['RDAUSER'], cname) - + pscmd = "ps -u {},{} -f | grep {} | grep ' 1 ' | grep -v ' grep '".format(self.PGLOG['CURUID'], self.PGLOG['RDAUSER'], cname) for i in range(2): - buf = PgLOG.pgsystem(pscmd, PgLOG.LOGWRN, 20) + buf = self.pgsystem(pscmd, self.LOGWRN, 20) if buf: lines = buf.split("\n") for line in lines: @@ -99,19 +89,24 @@ def display_process_info(cname, cmd): rtm = ms.group(2) arg = ms.group(3) if not arg or cmd.find(arg) > -1: - rtime = PgUtil.unixtime(rtm + ':00') + rtime = self.unixtime(rtm + ':00') if rtime > ctime: rtime -= 24*60*60 if rtime > RTIME: PID = pid RTIME = rtime if PID: - return PgLOG.pglog("Job <{}> is submitted to background <{}>".format(PID, PgLOG.PgLOG['HOSTNAME']), PgLOG.LOGWRN) + return self.pglog("Job <{}> is submitted to background <{}>".format(PID, self.PgLOG['HOSTNAME']), self.LOGWRN) elif i == 0: time.sleep(2) else: - return PgLOG.pglog("{}: No job information found, It may have finished".format(cmd), PgLOG.LOGWRN) + return self.pglog("{}: No job information found, It may have finished".format(cmd), self.LOGWRN) + +# main function to excecute this script +def main(): + object = RdaSub() + object.read_parameters() + object.start_actions() + object.pgexit(0) -# # call main() to start program -# if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/rdazip.py b/src/rda_python_miscs/rdazip.py index 8dd370b..cb7791c 100644 --- a/src/rda_python_miscs/rdazip.py +++ b/src/rda_python_miscs/rdazip.py @@ -1,64 +1,66 @@ #!/usr/bin/env python3 -# ################################################################################## -# # Title: rdazip # Author: Zaihua Ji, zji@ucar.edu # Date: 10/24/2020 # 2025-03-17 transferred to package rda_python_miscs from # https://github.com/NCAR/rda-utility-programs.git # Purpose: compress/uncompress given file names -# # Github: https://github.com/NCAR/rda-python-miscs.git -# ################################################################################## import re import os import sys -from rda_python_common import PgLOG -from rda_python_common import PgFile +from rda_python_common.pg_file import PgFile -# -# main function to run the application -# -def main(): +class RdaZip(PgFile): + + def __init__(self): + super().__init__() + self.action = 0 + self.format = None + self.files = [] - act = 0 - argv = sys.argv[1:] - PgLOG.set_help_path(__file__) - PgLOG.PGLOG['LOGFILE'] = "rdazip.log" # set different log file - PgLOG.cmdlog("rdazip {}".format(' '.join(argv))) - files = [] - fmt = option = None - for arg in argv: - ms = re.match(r'-(\w+)$', arg) - if ms: - option = ms.group(1) - if option == "b": - PgLOG.PGLOG['BCKGRND'] = 1 - option = None - elif option == "f": - act = 1 + # function to read parameters + def read_parameters(self): + argv = sys.argv[1:] + self.set_help_path(__file__) + self.PGLOG['LOGFILE'] = "rdazip.log" # set different log file + self.cmdlog("rdazip {}".format(' '.join(argv))) + option = None + for arg in argv: + ms = re.match(r'-(\w+)$', arg) + if ms: + option = ms.group(1) + if option == "b": + self.PGLOG['BCKGRND'] = 1 + option = None + elif option == "f": + self.action = 1 + else: + self.pglog(arg + ": Unknown Option", self.LGEREX) + elif option: + if self.format: self.pglog("{}: compression format '{}' provided already".format(arg, self.format), self.LGEREX) + self.format = arg + if not self.files: option = None else: - PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) - elif option: - if fmt: PgLOG.pglog("{}: compression format '{}' provided already".format(arg, fmt), PgLOG.LGEREX) - fmt = arg - if not files: option = None - else: - if not os.path.isfile(arg): PgLOG.pglog(arg + ": file not exists", PgLOG.LGEREX) - files.append(arg) - - if not files: PgLOG.show_usage("rdazip") - - for file in files: - PgFile.compress_local_file(file, fmt, act, PgLOG.LGWNEX) + if not os.path.isfile(arg): self.pglog(arg + ": file not exists", self.LGEREX) + self.files.append(arg) + if not self.files: self.show_usage("rdazip") - PgLOG.cmdlog() - sys.exit(0) + # function to start actions + def start_actions(self): + for file in self.files: + self.compress_local_file(file, self.format, self.action, self.LGWNEX) + self.cmdlog() + +# main function to excecute this script +def main(): + object = RdaZip() + object.read_parameters() + object.start_actions() + object.pgexit(0) -# # call main() to start program -# if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/tcsh_qsub.py b/src/rda_python_miscs/tcsh_qsub.py new file mode 100644 index 0000000..eb9af66 --- /dev/null +++ b/src/rda_python_miscs/tcsh_qsub.py @@ -0,0 +1,213 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title: tcshqsub +# Author: Zaihua Ji, zji@ucar.edu +# Date: 11/19/2020 +# 2025-03-07 transferred to package rda_python_miscs from +# https://github.com/NCAR/rda-utility-programs.git +# Purpose: python script to submit a batch job on PBS node via tcsh script +# +# Github: https://github.com/NCAR/rda-python-miscs.git +# +################################################################################## + +import os +import sys +import re +from os import path as op +from rda_python_common import PgLOG + +DEFMODS = { + 'default' : "ncarenv,netcdf,ncl,nco,cdo,conda,grib-util,wgrib2", +} + +DEFLIBS = { + 'default' : "conda activate /glade/work/gdexdata/conda-envs/pg-casper", +} + +SWAPMODS = { +} + +RESOURCES = { # resource list for option -l + 'walltime' : '6:00:00', # if this is changed, change defpbstime in PgCheck.py too + 'select' : '1:ncpus=1:mem=1gb' +} + +SOPTIONS = { # single-dash option values + 'o' : None, # will set to default if not provided + 'e' : None, + 'A' : "P43713000", + 'q' : "gdex@casper-pbs", +# 'm' : 'a', + 'm' : 'n', +} + +# +# main function to excecute this script +# +def main(): + + aname = 'tcshqsub' + pname = 'gdexqsub' + PgLOG.set_help_path(__file__) + gdexsub = PgLOG.BCHCMDS['PBS'] + coptions = {'cmd' : None, 'cwd' : None, 'env' : None, 'mod' : None, 'res' : 'default'} # customized options + copts = '|'.join(coptions) + option = None + dcount = 0 + argv = sys.argv[1:] + if not argv: PgLOG.show_usage(aname) + PgLOG.PGLOG['LOGFILE'] = pname + ".log" + PgLOG.cmdlog("{} {}".format(aname, ' '.join(argv))) + if not PgLOG.valid_command(gdexsub): PgLOG.pglog("{}: miss {} command to submit batch job".format(gdexsub, PgLOG.PGLOG['PBSNAME']), PgLOG.LGWNEX) + + while argv: + arg = argv.pop(0) + ms = re.match(r'^-(\w)$', arg) + if ms: + option = ms.group(1) + if option == "b": + PgLOG.PGLOG['BCKGRND'] = 1 + option = None + else: + SOPTIONS[option] = '' + continue + ms = re.match(r'^-({})$'.format(copts), arg) + if ms: + option = ms.group(1) + if option == "env": option = 'v' + continue + + if not option: PgLOG.pglog("{}: Value passed in without leading option for {}".format(arg, gdexsub), PgLOG.LGEREX) + if arg.find(' ') > -1 and not re.match(r'^[\'\"].*[\'\"]$', arg): # quote string with space but not quoted yet + if arg.find("'") > -1: + arg = '"{}"'.format(arg) + else: + arg = "'{}'".format(arg) + + if option in coptions: + coptions[option] = arg + if option == "cmd": break + else: + SOPTIONS[option] = arg + option = None + + if not coptions['cmd']: PgLOG.pglog(aname + ": specify command via option -cmd to run", PgLOG.LGWNEX) + args = PgLOG.argv_to_string(argv, 0) # append command options + if not SOPTIONS['o']: SOPTIONS['o'] = "{}/{}/".format(PgLOG.PGLOG['LOGPATH'], pname) + if not SOPTIONS['e']: SOPTIONS['e'] = "{}/{}/".format(PgLOG.PGLOG['LOGPATH'], pname) + if 'N' not in SOPTIONS: SOPTIONS['N'] = op.basename(coptions['cmd']) + msg = "{}-{}{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.PGLOG['CURUID'], PgLOG.current_datetime()) + + if coptions['cwd']: + if coptions['cwd'].find('$'): coptions['cwd'] = PgLOG.replace_environments(coptions['cwd'], '', PgLOG.LGWNEX) + msg += "-" + coptions['cwd'] + os.chdir(coptions['cwd']) + + cmd = PgLOG.valid_command(coptions['cmd']) + if not cmd and not re.match(r'^/', coptions['cmd']): cmd = PgLOG.valid_command('./' + coptions['cmd']) + if not cmd: PgLOG.pglog(coptions['cmd'] + ": Cannot find given command to run", PgLOG.LGWNEX) + if args: cmd += " " + args + + sbuf = build_tcsh_script(cmd, coptions, gdexsub) + PgLOG.pglog(sbuf, PgLOG.MSGLOG) + PgLOG.PGLOG['ERR2STD'] = ['bind mouting'] + PgLOG.pgsystem(gdexsub, PgLOG.LOGWRN, 6, sbuf) + PgLOG.PGLOG['ERR2STD'] = [] + + sys.exit(0) + +# +# build tcsh script to submit a PBS batch job +# +def build_tcsh_script(cmd, coptions, gdexsub): + + buf = "#!/bin/tcsh\n\n" # sbatch starting tcsh script + + if 'l' in SOPTIONS: add_resources() + # add options to tcsh script for qsub + for option in SOPTIONS: + buf += "#PBS -" + option + if SOPTIONS[option]: buf += " {}".format(SOPTIONS[option]) + buf += "\n" + for option in RESOURCES: + buf += "#PBS -l" + if RESOURCES[option]: buf += " {}={}".format(option, RESOURCES[option]) + buf += "\n" + + # always include the login user's tcsh resource file + homedir = "{}/{}".format(PgLOG.PGLOG['USRHOME'], PgLOG.PGLOG['CURUID']) + buf += "setenv HOME {}\n".format(homedir) + buf += "source /etc/profile.d/z00_modules.csh\n" + buf += "source /glade/u/apps/opt/conda/etc/profile.d/conda.csh\n" + buf += "source {}/.tcshrc\n".format(homedir) + buf += "pwd; hostname; date\n" + buf += add_modules(coptions['res'], coptions['mod']) + buf += set_vm_libs(coptions['res']) + buf += "\necho {}\n{}\n\ndate\n".format(cmd, cmd) + + return buf + +# +# check and add resource options +# +def add_resources(): + + for res in re.split(',', SOPTIONS['l']): + ms = re.match(r'^([^=]+)=(.+)$', res) + if ms: + RESOURCES[ms.group(1)] = ms.group(2) + else: + PgLOG.pglog(res + ": use '=' to separate resource name & value", PgLOG.LGEREX) + del SOPTIONS['l'] + +# +# add module loads for modules provided +# +def add_modules(res, mods): + + mbuf = "\n" + defmods = DEFMODS[res] if res in DEFMODS else DEFMODS['default'] + + dmods = re.split(',', defmods) + for dmod in dmods: + ms = re.match(r'^(.+)/', dmod) + smod = ms.group(1) if ms else dmod + if smod in SWAPMODS: mbuf += "module unload {}\n".format(SWAPMODS[smod]) + mbuf += "module load {}\n".format(dmod) + + if mods: + amods = re.split(',', mods) + for amod in amods: + if re.match(r'^/', amod): + mbuf += "module use {}\n".format(amod) + else: + ms = re.match(r'^(.+)/', amod) + smod = ms.group(1) if ms else amod + if smod in dmods: continue + if smod in SWAPMODS: mbuf += "module unload {}\n".format(SWAPMODS[smod]) + mbuf += "module load {}\n".format(amod) + + return mbuf + +# +# set virtual machine libraries +# +def set_vm_libs(res): + + deflibs = DEFLIBS[res] if res in DEFLIBS else DEFLIBS['default'] + if not deflibs: return '' + + dlibs = re.split(',', deflibs) + libbuf = "\n" + for dlib in dlibs: + libbuf += dlib + "\n" + + return libbuf + +# +# call main() to start program +# +if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/tcshqsub.py b/src/rda_python_miscs/tcshqsub.py index eb9af66..5b9475c 100644 --- a/src/rda_python_miscs/tcshqsub.py +++ b/src/rda_python_miscs/tcshqsub.py @@ -1,213 +1,184 @@ #!/usr/bin/env python3 -# ################################################################################## -# # Title: tcshqsub # Author: Zaihua Ji, zji@ucar.edu # Date: 11/19/2020 # 2025-03-07 transferred to package rda_python_miscs from # https://github.com/NCAR/rda-utility-programs.git +# 2025-12-29 convert to class TcshQsub # Purpose: python script to submit a batch job on PBS node via tcsh script -# # Github: https://github.com/NCAR/rda-python-miscs.git -# ################################################################################## - import os import sys import re from os import path as op -from rda_python_common import PgLOG - -DEFMODS = { - 'default' : "ncarenv,netcdf,ncl,nco,cdo,conda,grib-util,wgrib2", -} - -DEFLIBS = { - 'default' : "conda activate /glade/work/gdexdata/conda-envs/pg-casper", -} - -SWAPMODS = { -} - -RESOURCES = { # resource list for option -l - 'walltime' : '6:00:00', # if this is changed, change defpbstime in PgCheck.py too - 'select' : '1:ncpus=1:mem=1gb' -} - -SOPTIONS = { # single-dash option values - 'o' : None, # will set to default if not provided - 'e' : None, - 'A' : "P43713000", - 'q' : "gdex@casper-pbs", -# 'm' : 'a', - 'm' : 'n', -} - -# -# main function to excecute this script -# -def main(): - - aname = 'tcshqsub' - pname = 'gdexqsub' - PgLOG.set_help_path(__file__) - gdexsub = PgLOG.BCHCMDS['PBS'] - coptions = {'cmd' : None, 'cwd' : None, 'env' : None, 'mod' : None, 'res' : 'default'} # customized options - copts = '|'.join(coptions) - option = None - dcount = 0 - argv = sys.argv[1:] - if not argv: PgLOG.show_usage(aname) - PgLOG.PGLOG['LOGFILE'] = pname + ".log" - PgLOG.cmdlog("{} {}".format(aname, ' '.join(argv))) - if not PgLOG.valid_command(gdexsub): PgLOG.pglog("{}: miss {} command to submit batch job".format(gdexsub, PgLOG.PGLOG['PBSNAME']), PgLOG.LGWNEX) - - while argv: - arg = argv.pop(0) - ms = re.match(r'^-(\w)$', arg) - if ms: - option = ms.group(1) - if option == "b": - PgLOG.PGLOG['BCKGRND'] = 1 - option = None - else: - SOPTIONS[option] = '' - continue - ms = re.match(r'^-({})$'.format(copts), arg) - if ms: - option = ms.group(1) - if option == "env": option = 'v' - continue - - if not option: PgLOG.pglog("{}: Value passed in without leading option for {}".format(arg, gdexsub), PgLOG.LGEREX) - if arg.find(' ') > -1 and not re.match(r'^[\'\"].*[\'\"]$', arg): # quote string with space but not quoted yet - if arg.find("'") > -1: - arg = '"{}"'.format(arg) - else: - arg = "'{}'".format(arg) - - if option in coptions: - coptions[option] = arg - if option == "cmd": break - else: - SOPTIONS[option] = arg +from rda_python_common.pg_log import PgLOG + +class TcshQsub(PgLOG): + + def __init__(self): + super().__init__() + self.DEFMODS = { + 'default' : "ncarenv,netcdf,ncl,nco,cdo,conda,grib-util,wgrib2", + } + self.DEFLIBS = { + 'default' : "conda activate /glade/work/gdexdata/conda-envs/pg-gdex", + } + self.SWAPMODS = { + } + self.RESOURCES = { # resource list for option -l + 'walltime' : '6:00:00', # if this is changed, change defpbstime in PgCheck.py too + 'select' : '1:ncpus=1:mem=1gb' + } + self.SOPTIONS = { # single-dash option values + 'o' : None, # will set to default if not provided + 'e' : None, + 'A' : "P43713000", + 'q' : "gdex@casper-pbs", + # 'm' : 'a', + 'm' : 'n', + } + self.gdexsub = self.BCHCMDS['PBS'] + self.coptions = {'cmd' : None, 'cwd' : None, 'env' : None, 'mod' : None, 'res' : 'default'} # customized options + self.args = None + + # function to read parameters + def read_parameters(self): + aname = 'tcshqsub' + pname = 'gdexqsub' + self.set_help_path(__file__) + copts = '|'.join(self.coptions) option = None - - if not coptions['cmd']: PgLOG.pglog(aname + ": specify command via option -cmd to run", PgLOG.LGWNEX) - args = PgLOG.argv_to_string(argv, 0) # append command options - if not SOPTIONS['o']: SOPTIONS['o'] = "{}/{}/".format(PgLOG.PGLOG['LOGPATH'], pname) - if not SOPTIONS['e']: SOPTIONS['e'] = "{}/{}/".format(PgLOG.PGLOG['LOGPATH'], pname) - if 'N' not in SOPTIONS: SOPTIONS['N'] = op.basename(coptions['cmd']) - msg = "{}-{}{}".format(PgLOG.PGLOG['HOSTNAME'], PgLOG.PGLOG['CURUID'], PgLOG.current_datetime()) - - if coptions['cwd']: - if coptions['cwd'].find('$'): coptions['cwd'] = PgLOG.replace_environments(coptions['cwd'], '', PgLOG.LGWNEX) - msg += "-" + coptions['cwd'] - os.chdir(coptions['cwd']) - - cmd = PgLOG.valid_command(coptions['cmd']) - if not cmd and not re.match(r'^/', coptions['cmd']): cmd = PgLOG.valid_command('./' + coptions['cmd']) - if not cmd: PgLOG.pglog(coptions['cmd'] + ": Cannot find given command to run", PgLOG.LGWNEX) - if args: cmd += " " + args - - sbuf = build_tcsh_script(cmd, coptions, gdexsub) - PgLOG.pglog(sbuf, PgLOG.MSGLOG) - PgLOG.PGLOG['ERR2STD'] = ['bind mouting'] - PgLOG.pgsystem(gdexsub, PgLOG.LOGWRN, 6, sbuf) - PgLOG.PGLOG['ERR2STD'] = [] - - sys.exit(0) - -# -# build tcsh script to submit a PBS batch job -# -def build_tcsh_script(cmd, coptions, gdexsub): - - buf = "#!/bin/tcsh\n\n" # sbatch starting tcsh script - - if 'l' in SOPTIONS: add_resources() - # add options to tcsh script for qsub - for option in SOPTIONS: - buf += "#PBS -" + option - if SOPTIONS[option]: buf += " {}".format(SOPTIONS[option]) - buf += "\n" - for option in RESOURCES: - buf += "#PBS -l" - if RESOURCES[option]: buf += " {}={}".format(option, RESOURCES[option]) - buf += "\n" - - # always include the login user's tcsh resource file - homedir = "{}/{}".format(PgLOG.PGLOG['USRHOME'], PgLOG.PGLOG['CURUID']) - buf += "setenv HOME {}\n".format(homedir) - buf += "source /etc/profile.d/z00_modules.csh\n" - buf += "source /glade/u/apps/opt/conda/etc/profile.d/conda.csh\n" - buf += "source {}/.tcshrc\n".format(homedir) - buf += "pwd; hostname; date\n" - buf += add_modules(coptions['res'], coptions['mod']) - buf += set_vm_libs(coptions['res']) - buf += "\necho {}\n{}\n\ndate\n".format(cmd, cmd) + dcount = 0 + argv = sys.argv[1:] + if not argv: self.show_usage(aname) + self.PGLOG['LOGFILE'] = pname + ".log" + self.cmdlog("{} {}".format(aname, ' '.join(argv))) + if not self.valid_command(self.gdexsub): self.pglog("{}: miss {} command to submit batch job".format(self.gdexsub, self.PGLOG['PBSNAME']), self.LGWNEX) + while argv: + arg = argv.pop(0) + ms = re.match(r'^-(\w)$', arg) + if ms: + option = ms.group(1) + if option == "b": + self.PGLOG['BCKGRND'] = 1 + option = None + else: + self.SOPTIONS[option] = '' + continue + ms = re.match(r'^-({})$'.format(copts), arg) + if ms: + option = ms.group(1) + if option == "env": option = 'v' + continue + if not option: self.pglog("{}: Value passed in without leading option for {}".format(arg, self.gdexsub), self.LGEREX) + if arg.find(' ') > -1 and not re.match(r'^[\'\"].*[\'\"]$', arg): # quote string with space but not quoted yet + if arg.find("'") > -1: + arg = '"{}"'.format(arg) + else: + arg = "'{}'".format(arg) + if option in self.coptions: + self.coptions[option] = arg + if option == "cmd": break + else: + self.SOPTIONS[option] = arg + option = None + self.args = self.argv_to_string(argv, 0) # append command options + if not self.coptions['cmd']: self.pglog(aname + ": specify command via option -cmd to run", self.LGWNEX) + if not self.SOPTIONS['o']: self.SOPTIONS['o'] = "{}/{}/".format(self.PGLOG['LOGPATH'], pname) + if not self.SOPTIONS['e']: self.SOPTIONS['e'] = "{}/{}/".format(self.PGLOG['LOGPATH'], pname) + if 'N' not in self.SOPTIONS: self.SOPTIONS['N'] = op.basename(self.coptions['cmd']) + if self.coptions['cwd']: + if '$' in self.coptions['cwd']: self.coptions['cwd'] = self.replace_environments(self.coptions['cwd'], '', self.LGWNEX) + os.chdir(self.coptions['cwd']) - return buf - -# -# check and add resource options -# -def add_resources(): - - for res in re.split(',', SOPTIONS['l']): - ms = re.match(r'^([^=]+)=(.+)$', res) - if ms: - RESOURCES[ms.group(1)] = ms.group(2) - else: - PgLOG.pglog(res + ": use '=' to separate resource name & value", PgLOG.LGEREX) - del SOPTIONS['l'] - -# -# add module loads for modules provided -# -def add_modules(res, mods): - - mbuf = "\n" - defmods = DEFMODS[res] if res in DEFMODS else DEFMODS['default'] - - dmods = re.split(',', defmods) - for dmod in dmods: - ms = re.match(r'^(.+)/', dmod) - smod = ms.group(1) if ms else dmod - if smod in SWAPMODS: mbuf += "module unload {}\n".format(SWAPMODS[smod]) - mbuf += "module load {}\n".format(dmod) - - if mods: - amods = re.split(',', mods) - for amod in amods: - if re.match(r'^/', amod): - mbuf += "module use {}\n".format(amod) + # fnction to start actions + def start_actions(self): + cmd = self.valid_command(self.coptions['cmd']) + if not cmd and not re.match(r'^/', self.coptions['cmd']): cmd = self.valid_command('./' + self.coptions['cmd']) + if not cmd: self.pglog(self.coptions['cmd'] + ": Cannot find given command to run", self.LGWNEX) + if self.args: cmd += " " + self.args + sbuf = self.build_tcsh_script(cmd) + self.pglog(sbuf, self.MSGLOG) + self.PGLOG['ERR2STD'] = ['bind mouting'] + self.pgsystem(self.gdexsub, self.LOGWRN, 6, sbuf) + self.PGLOG['ERR2STD'] = [] + + # build tcsh script to submit a PBS batch job + def build_tcsh_script(self, cmd): + buf = "#!/bin/tcsh\n\n" # sbatch starting tcsh script + if 'l' in self.SOPTIONS: self.add_resources() + # add options to tcsh script for qsub + for option in self.SOPTIONS: + buf += "#PBS -" + option + if self.SOPTIONS[option]: buf += " {}".format(self.SOPTIONS[option]) + buf += "\n" + for option in self.RESOURCES: + buf += "#PBS -l" + if self.RESOURCES[option]: buf += " {}={}".format(option, self.RESOURCES[option]) + buf += "\n" + # always include the login user's tcsh resource file + homedir = "{}/{}".format(self.PGLOG['USRHOME'], self.PGLOG['CURUID']) + buf += "setenv HOME {}\n".format(homedir) + buf += "source /etc/profile.d/z00_modules.csh\n" + buf += "source /glade/u/apps/opt/conda/etc/profile.d/conda.csh\n" + buf += "source {}/.tcshrc\n".format(homedir) + buf += "pwd; hostname; date\n" + buf += self.add_modules(self.coptions['res'], self.coptions['mod']) + buf += self.set_vm_libs(self.coptions['res']) + buf += "\necho {}\n{}\n\ndate\n".format(cmd, cmd) + return buf + + # check and add resource options + def add_resources(self): + for res in re.split(',', self.SOPTIONS['l']): + ms = re.match(r'^([^=]+)=(.+)$', res) + if ms: + self.RESOURCES[ms.group(1)] = ms.group(2) else: - ms = re.match(r'^(.+)/', amod) - smod = ms.group(1) if ms else amod - if smod in dmods: continue - if smod in SWAPMODS: mbuf += "module unload {}\n".format(SWAPMODS[smod]) - mbuf += "module load {}\n".format(amod) - - return mbuf - -# -# set virtual machine libraries -# -def set_vm_libs(res): - - deflibs = DEFLIBS[res] if res in DEFLIBS else DEFLIBS['default'] - if not deflibs: return '' + self.pglog(res + ": use '=' to separate resource name & value", self.LGEREX) + del self.SOPTIONS['l'] + + # add module loads for modules provided + def add_modules(self, res, mods): + mbuf = "\n" + defmods = self.DEFMODS[res] if res in self.DEFMODS else self.DEFMODS['default'] + dmods = re.split(',', defmods) + for dmod in dmods: + ms = re.match(r'^(.+)/', dmod) + smod = ms.group(1) if ms else dmod + if smod in self.SWAPMODS: mbuf += "module unload {}\n".format(self.SWAPMODS[smod]) + mbuf += "module load {}\n".format(dmod) + if mods: + amods = re.split(',', mods) + for amod in amods: + if re.match(r'^/', amod): + mbuf += "module use {}\n".format(amod) + else: + ms = re.match(r'^(.+)/', amod) + smod = ms.group(1) if ms else amod + if smod in dmods: continue + if smod in self.SWAPMODS: mbuf += "module unload {}\n".format(self.SWAPMODS[smod]) + mbuf += "module load {}\n".format(amod) + return mbuf - dlibs = re.split(',', deflibs) - libbuf = "\n" - for dlib in dlibs: - libbuf += dlib + "\n" + # set virtual machine libraries + def set_vm_libs(self, res): + deflibs = self.DEFLIBS[res] if res in self.DEFLIBS else self.DEFLIBS['default'] + if not deflibs: return '' + dlibs = re.split(',', deflibs) + libbuf = "\n" + for dlib in dlibs: + libbuf += dlib + "\n" + return libbuf - return libbuf +# main function to excecute this script +def main(): + object = TcshQsub() + object.read_parameters() + object.start_actions() + object.pgexit(0) -# # call main() to start program -# if __name__ == "__main__": main() diff --git a/tests/test_miscs.py b/tests/test_miscs.py index f16f94a..cace781 100644 --- a/tests/test_miscs.py +++ b/tests/test_miscs.py @@ -3,4 +3,14 @@ import pytest def test_miscs(): - pass + import rda_python_miscs.bashqsub + import rda_python_miscs.tcshqsub + import rda_python_miscs.gdexls + import rda_python_miscs.pgwget + import rda_python_miscs.rdacp + import rda_python_miscs.rdakill + import rda_python_miscs.rdamod + import rda_python_miscs.rdaown + import rda_python_miscs.rdaps + import rda_python_miscs.rdasub + import rda_python_miscs.rdazip