diff --git a/pyproject.toml b/pyproject.toml index 44cbf34..ae96edd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,13 +36,20 @@ pythonpath = [ "bashqsub" = "rda_python_miscs.bashqsub:main" "tcshqsub" = "rda_python_miscs.tcshqsub:main" "rdasub" = "rda_python_miscs.rdasub:main" +"gdexsub" = "rda_python_miscs.rdasub:main" "pgwget" = "rda_python_miscs.pgwget:main" "rdals" = "rda_python_miscs.rdals:main" "gdexls" = "rda_python_miscs.gdexls:main" "rdaps" = "rda_python_miscs.rdaps:main" +"gdexps" = "rda_python_miscs.rdaps:main" "rdazip" = "rda_python_miscs.rdazip:main" +"gdexzip" = "rda_python_miscs.rdazip:main" "rdaown" = "rda_python_miscs.rdaown:main" +"gdexown" = "rda_python_miscs.rdaown:main" "rdacp.py" = "rda_python_miscs.rdacp:main" +"gdexcp.py" = "rda_python_miscs.rdacp:main" "rdakill.py" = "rda_python_miscs.rdakill:main" +"gdexkill.py" = "rda_python_miscs.rdakill:main" "rdamod.py" = "rda_python_miscs.rdamod:main" +"gdexmod.py" = "rda_python_miscs.rdamod:main" "pgrst" = "rda_python_miscs.pg_rst:main" diff --git a/src/rda_python_miscs/bashqsub.py b/src/rda_python_miscs/bashqsub.py index 12836b0..be1c9a6 100644 --- a/src/rda_python_miscs/bashqsub.py +++ b/src/rda_python_miscs/bashqsub.py @@ -7,7 +7,7 @@ # https://github.com/NCAR/rda-utility-programs.git # 2025-12-29 convert to class BashQsub # Purpose: python script to submit a batch job on PBS node via bash script -# Github: https://github.com/NCAR/rda-pythn-miscs.git +# Github: https://github.com/NCAR/rda-python-miscs.git ################################################################################## import os import sys @@ -16,8 +16,14 @@ from rda_python_common.pg_log import PgLOG class BashQsub(PgLOG): + """Submit a PBS batch job via a dynamically generated bash script using qsub. + + Builds a bash script with PBS directives, module loads, and conda environment + activation, then submits it through the PBS qsub command. + """ def __init__(self): + """Initialize BashQsub with default PBS resource settings and options.""" super().__init__() self.DEFMODS = { 'default': "ncarenv,netcdf,ncl,nco,cdo,conda,grib-util,wgrib2" @@ -42,8 +48,15 @@ def __init__(self): self.gdexsub = self.BCHCMDS['PBS'] self.args = None - # function to readparameters + # function to read parameters def read_parameters(self): + """Parse command-line arguments and populate PBS options and customized options. + + Handles single-dash qsub options (e.g. -q, -A, -l) and long custom options + (-cmd, -cwd, -env, -mod, -res). Validates that the qsub command is available + and that a -cmd value is provided. Sets default log paths and job name if not + specified, and changes the working directory if -cwd is given. + """ aname = 'bashqsub' pname = 'gdexqsub' self.set_help_path(__file__) @@ -88,11 +101,12 @@ def read_parameters(self): if not self.SOPTIONS['e']: self.SOPTIONS['e'] = "{}/{}/".format(self.PGLOG['LOGPATH'], pname) if 'N' not in self.SOPTIONS: self.SOPTIONS['N'] = op.basename(self.coptions['cmd']) if self.coptions['cwd']: - if 's' in self.coptions['cwd']: self.coptions['cwd'] = self.replace_environments(self.coptions['cwd'], '', self.LGWNEX) + if '$' in self.coptions['cwd']: self.coptions['cwd'] = self.replace_environments(self.coptions['cwd'], '', self.LGWNEX) os.chdir(self.coptions['cwd']) # function to start actions def start_actions(self): + """Resolve the command path, build the bash script, and submit it via qsub.""" cmd = self.valid_command(self.coptions['cmd']) if not cmd and not re.match(r'^/', self.coptions['cmd']): cmd = self.valid_command('./' + self.coptions['cmd']) if not cmd: self.pglog(self.coptions['cmd'] + ": Cannot find given command to run", self.LGWNEX) @@ -105,6 +119,17 @@ def start_actions(self): # build bash script to submit a PBS batch job def build_bash_script(self, cmd): + """Build and return a bash script string with PBS directives for the given command. + + Sets HOME, sources system and conda profile scripts and the user's .bashrc, + loads modules, activates the conda environment, then runs the command. + + Args: + cmd (str): The fully-resolved command (with arguments) to execute in the job. + + Returns: + str: The complete bash batch script content. + """ buf = "#!/usr/bin/bash\n\n" # qsub starting bash script if 'l' in self.SOPTIONS: self.add_resources() # add options to bash script for qsub @@ -128,8 +153,13 @@ def build_bash_script(self, cmd): buf += "\necho {}\n{}\n\ndate\n".format(cmd, cmd) return buf - # check and add resource options + # check and add resource options def add_resources(self): + """Parse -l option value into the RESOURCES dict and remove the raw -l entry. + + Expects comma-separated name=value pairs (e.g. 'walltime=2:00:00,select=1:ncpus=4'). + Logs an error if a token does not contain '='. + """ for res in re.split(',', self.SOPTIONS['l']): ms = re.match(r'^([^=]+)=(.+)$', res) if ms: @@ -140,6 +170,20 @@ def add_resources(self): # add module loads for modules provided def add_modules(self, res, mods): + """Build and return module load/unload commands for the bash script. + + Loads the default module set for the given reservation (or the 'default' set). + Additional modules in ``mods`` are appended; path-style entries (starting with + '/') use 'module use' instead of 'module load'. Modules already in the default + set are skipped. SWAPMODS entries trigger an unload before the new load. + + Args: + res (str): Reservation name used to look up DEFMODS; falls back to 'default'. + mods (str): Comma-separated list of extra modules (or None). + + Returns: + str: Shell commands to load/unload modules. + """ mbuf = "\n" defmods = self.DEFMODS[res] if res in self.DEFMODS else self.DEFMODS['default'] dmods = re.split(',', defmods) @@ -163,6 +207,16 @@ def add_modules(self, res, mods): # set virtual machine libraries def set_vm_libs(self, res): + """Build and return conda/VM library activation commands for the bash script. + + Looks up DEFLIBS for the given reservation (falls back to 'default'). + + Args: + res (str): Reservation name used to look up DEFLIBS; falls back to 'default'. + + Returns: + str: Shell commands to activate virtual environment libraries, or '' if none. + """ deflibs = self.DEFLIBS[res] if res in self.DEFLIBS else self.DEFLIBS['default'] if not deflibs: return '' dlibs = re.split(',', deflibs) @@ -171,8 +225,9 @@ def set_vm_libs(self, res): libbuf += dlib + "\n" return libbuf -# main function to excecute this script +# main function to execute this script def main(): + """Entry point: instantiate BashQsub, parse arguments, run, and exit.""" object = BashQsub() object.read_parameters() object.start_actions() diff --git a/src/rda_python_miscs/bashqsub.usg b/src/rda_python_miscs/bashqsub.usg index bffd895..cc810b7 100644 --- a/src/rda_python_miscs/bashqsub.usg +++ b/src/rda_python_miscs/bashqsub.usg @@ -10,9 +10,9 @@ -o LOGPATH/gdexqsub/ -e LOGPATH/gdexqsub/ -A P43713000 - -m a - -q gdex - -l walltime=6:00:00,select=1:node=1:mem=1gb + -m n + -q gdex@casper-pbs + -l walltime=6:00:00,select=1:ncpus=1:mem=1gb - Option -cwd, set the working directory for the Command to be executed. If it is not specified, it defaults to the current directory where qsub @@ -33,18 +33,18 @@ A bash script example: #!/usr/bin/bash -#PBS -o /gpfs/u/home/gdexdata/dssdb/log/gdexqsub/ -#PBS -e /gpfs/u/home/gdexdata/dssdb/log/gdexqsub/ +#PBS -o /glade/u/home/gdexdata/dssdb/log/gdexqsub/ +#PBS -e /glade/u/home/gdexdata/dssdb/log/gdexqsub/ #PBS -A P43713000 #PBS -q gdex@casper-pbs #PBS -m n #PBS -N dsrqst #PBS -l walltime=1:00:00 #PBS -l select=1:ncpus=1:mem=1gb -export HOME=/gpfs/u/home/zji +export HOME=/glade/u/home/zji source /etc/profile.d/z00_modules.sh source /glade/u/apps/opt/conda/etc/profile.d/conda.sh -source /gpfs/u/home/zji/.bashrc +source /glade/u/home/zji/.bashrc pwd; hostname; date module load ncarenv diff --git a/src/rda_python_miscs/gdexls.py b/src/rda_python_miscs/gdexls.py index f6cd640..b484564 100644 --- a/src/rda_python_miscs/gdexls.py +++ b/src/rda_python_miscs/gdexls.py @@ -18,8 +18,16 @@ from rda_python_common.pg_split import PgSplit class GdexLs(PgSplit): + """List local files/directories and display matching metadata from GDEXDB. + + For each path, queries the GDEX database for dataset, group, or file records + and prints them in aligned columns: type-prefixed name, size, file count (or + format), and description. A leading letter on each output line indicates the + item type: 'D' for a dataset root, 'G' for a sub-group, 'F' for a data file. + """ def __init__(self): + """Initialize display constants, CLI option flags, and listing state.""" super().__init__() # define some constants for gdexls actions self.DIDX = 3 # description column index @@ -49,6 +57,13 @@ def __init__(self): # function to read parameters def read_parameters(self): + """Parse command-line arguments into GDEXLS option flags and the file/directory list. + + Recognises boolean flags -d, -f, -N, -r and value options -R, -D. + Positional arguments are resolved to real paths and appended to LINFO['files']. + Exits with usage if -h/--help/? is given; errors on unknown options or + values without a preceding option. + """ self.set_help_path(__file__) self.PGLOG['LOGFILE'] = "gdexls.log" # set different log file self.LINFO['curdir'] = self.get_real_path(os.getcwd()) @@ -61,7 +76,7 @@ def read_parameters(self): if ms: option = ms.group(1) if option not in self.GDEXLS: self.pglog(arg + ": Unknown Option", self.LGEREX) - if 'dfNr'.find(option) > -1: + if option in 'dfNr': self.GDEXLS[option] = 1 option = defopt continue @@ -76,8 +91,15 @@ def read_parameters(self): self.GDEXLS[option] = arg option = defopt - # functio to start actions - def start_actions(self): + # function to start actions + def start_actions(self): + """Fetch DB connection info, resolve the default file list, and drive display. + + If no paths were given, lists all entries in the current directory. + Defaults both -d and -f flags when neither is explicitly set. + Prints a summary count of datasets, groups, and files at the end, + or exits with an error if nothing matched in the database. + """ self.view_dbinfo() if not self.LINFO['files']: self.LINFO['files'] = sorted(glob.glob('*')) # view all files in current directory @@ -112,6 +134,15 @@ def start_actions(self): # display the top level list def display_top_list(self, files): + """Process and display each top-level path, expanding directories as needed. + + A path ending with '/' suppresses display of the directory entry itself and + always recurses into it. Paths not starting with '/' are joined to curdir. + Flushes the cached formatted list when it exceeds CLMT entries. + + Args: + files (list[str]): Top-level paths provided on the command line (or cwd glob). + """ for file in files: if not op.exists(file): sys.stderr.write(file + ": NOT exists\n") @@ -131,6 +162,14 @@ def display_top_list(self, files): # recursively display directory/file info def display_list(self, files, level): + """Recursively display metadata for each path up to the configured depth limit. + + Flushes the formatted cache when it exceeds CLMT entries to keep memory bounded. + + Args: + files (list[str]): Glob-expanded paths at the current recursion level. + level (int): Current recursion depth (1-based); stops when >= GDEXLS['R']. + """ for file in files: isdir = 1 if op.isdir(file) else 0 self.display_line(file, isdir) @@ -141,6 +180,17 @@ def display_list(self, files, level): # find dataset/group info; display or cache file def display_line(self, file, isdir): + """Look up GDEX metadata for a path and pass a formatted record to display_record. + + Resolves the dataset ID and home path on first call, then reuses cached values + for subsequent paths under the same dataset. Skips paths with no matching + dataset ID. Dispatches to the dataset, group, or file branch based on whether + the path is the dataset root, a subdirectory, or a regular file. + + Args: + file (str): Absolute path to the file or directory. + isdir (int): 1 if the path is a directory, 0 otherwise. + """ getwfile = 1 if self.LINFO['dsid'] and self.LINFO['dhome']: ms = re.match(r'^{}/(.*)$'.format(self.LINFO['dhome']), file) @@ -151,7 +201,7 @@ def display_line(self, file, isdir): self.LINFO['dsid'] = self.find_dataset_id(file) if self.LINFO['dsid'] is None: return # skip for missing dsid pgrec = self.pgget("dataset", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns", "dsid = '{}'".format(self.LINFO['dsid']), self.LGEREX) - if not pgrec: return None + if not pgrec: return self.LINFO['dhome'] = "{}/{}".format(self.PGLOG['DSDHOME'], self.LINFO['dsid']) if self.LINFO['dhome'] == file: file = re.sub(r'^{}'.format(self.LINFO['tpath']), '', file, 1) @@ -183,8 +233,17 @@ def display_line(self, file, isdir): self.display_record(["F" + file, pgrec['data_size'], pgrec['data_format'], note]) self.LINFO['fcnt'] += 1 - # display one file info + # display one record def display_record(self, disp): + """Format the size field and either print immediately or cache for aligned output. + + In unformatted mode (-N) the columns are joined by the delimiter and printed + directly. Otherwise the record is appended to pgrecs and the per-column + maximum widths are updated for later aligned rendering by display_format_list. + + Args: + disp (list[str]): Four-element list: [name, size, count/format, description]. + """ disp[1] = self.get_float_string(disp[1]) if self.GDEXLS['N']: print(self.GDEXLS['D'].join(disp)) @@ -197,6 +256,12 @@ def display_record(self, disp): # display cached list with format def display_format_list(self): + """Flush the cached record list with column-aligned formatting and reset the cache. + + Applies left or right alignment to each of the first DIDX columns based on + ALIGNS, padding to the maximum observed width, then joins with the delimiter. + Resets pcnt to 0 after printing (pgrecs entries are left but ignored). + """ for j in range(self.LINFO['pcnt']): disp = self.LINFO['pgrecs'][j] for i in range(self.DIDX): @@ -207,9 +272,20 @@ def display_format_list(self): print(self.GDEXLS['D'].join(disp)) self.LINFO['pcnt'] = 0 - # change size to floating point value with unit + # convert size to floating point value with unit @staticmethod def get_float_string(val): + """Convert a numeric byte count to a human-readable string with a unit suffix. + + Divides by 1000 repeatedly until the value is <= 1000 or the largest unit + (Petabytes) is reached. Values >= 1K are formatted to two decimal places. + + Args: + val (int|float): Size in bytes. + + Returns: + str: Formatted string such as '1.50M' or '512B'. + """ units = ['B', 'K', 'M', 'G', 'T', 'P'] idx = 0 while val > 1000 and idx < 5: @@ -220,17 +296,32 @@ def get_float_string(val): else: return "{}{}".format(val, units[idx]) - # replace /gpfs to the path /glade + # normalize /gpfs paths to /glade equivalents and resolve symlinks @staticmethod def get_real_path(path): + """Translate legacy /gpfs mount-point prefixes to their /glade equivalents. + + Handles two mappings: + - /gpfs/u/... → /glade/... + - /gpfs/csfs1/... → /glade/campaign/... + + Then calls os.path.realpath to resolve any symlinks. + + Args: + path (str): Filesystem path, possibly using a /gpfs prefix. + + Returns: + str: Canonicalized absolute path under the /glade hierarchy. + """ if re.match(r'^/gpfs/u', path): path = re.sub(r'^/gpfs', '/glade', path, 1) elif re.match(r'^/gpfs/csfs1/', path): path = re.sub(r'^/gpfs/csfs1', '/glade/campaign', path, 1) return op.realpath(path) -# main function to excecute this script +# main function to execute this script def main(): + """Entry point: instantiate GdexLs, parse arguments, run, and exit.""" object = GdexLs() object.read_parameters() object.start_actions() diff --git a/src/rda_python_miscs/gdexls.usg b/src/rda_python_miscs/gdexls.usg index 1fd9538..3026efc 100644 --- a/src/rda_python_miscs/gdexls.usg +++ b/src/rda_python_miscs/gdexls.usg @@ -1,60 +1,74 @@ - List directory and file information of the current or specified directories - with metadata information if matched. Four columns are listed, they are Directory - Name, Data Volume, File Count, and Brief Description if the listed item is a - directory, and they are File Name, Data Size, Data Format, and Brief Description - if the listed item is a file. - - A leading letter is displayed on each line to indicate what type item is listed; - including 'D' for a whole dataset, 'G' for a group or subgroup in a dataset, - and 'F' for a data file. - - The output of directory/file list is formatted as default with double spaces - as delimiter and each column lined up vertically at least for the files under each - directory. Provide Option -N to display list without format. A delimiter symbol '|' - is defaulted if Option -N is present. - + List local files and directories with matching metadata from the GDEX database. + Each output line has four columns: + + For a dataset root (D) or group (G): + Name | Total Data Volume | File Count | Description + + For a data file (F): + Name | File Size | Data Format | Note + + A leading letter on each line indicates the item type: + D - dataset root directory + G - group or sub-group directory within a dataset + F - individual data file + + Output is column-aligned by default using double spaces as the delimiter. + Use -N to disable formatting; the delimiter then defaults to '|'. + Nothing is displayed if no matching GDEX metadata is found for the given paths. + Usage: gdexls [-d] [-f] [-N] [-h] [-r] [-D DelimitSymbols] [-R RecursiveLevel] [Directory/File List] - - Option -d, list directory information only. Directory information - is included as default. Add this option to exclude file information; + - Option -d, list dataset/group (directory) information only. + Both directories and files are listed by default; this option + suppresses file output; - - Option -f, list file information only. File information - is included as default. Add this option to exclude directory information; + - Option -f, list file information only. + Both directories and files are listed by default; this option + suppresses directory output; - - Option -N, list files unformatted; + - Option -N, display output without column alignment; - Option -h, display this help document; - - Option -r, list directories and files recursively; + - Option -r, list directories and files recursively (no depth limit); + + - Option -R RecursiveLevel, list recursively up to the specified depth. + -R 1 lists only the immediate contents of each given directory; + + - Option -D DelimitSymbols, specify the column delimiter string. + Defaults to " " (two spaces) for formatted output and '|' for + unformatted (-N) output. Quote the string if it contains shell + metacharacters, e.g. -D '<:>'; + + - Directory/File List is optional. Without it, all entries in the + current directory are listed. Shell wildcards are supported. + + This utility can be run from any directory. It searches the GDEX database + using the resolved absolute path of each argument, so both absolute and + relative paths are accepted. + + Examples for dataset d277006: + + 1. Change into the dataset home directory and run gdexls: - - Option -R, list directories and files recursively up to the level - provided with this Option; + cd /PathTo/d277006 + gdexls - - Option -D, specify delimiting symbols for dividing the columns. - It defaults to " " for formatted output and '|' for unformatted output. - Make sure quote the symbols if any character in the symbols has Unix - meaning, for example -D '<:>'; + Add -r to recurse into sub-directories, or cd into a sub-directory + first to list only its contents. - - Directory/file List is optional; without specification, all directories - and files in the current directory are listed. Unix command line - wildcards are supported. + 2. Pass an absolute path directly: - This utility program can be executed anywhere. Nothing is displayed if neither - directory nor file information pre-gathered in database. + gdexls /PathTo/d277006/ # list contents of the dataset directory + gdexls /PathTo/d277006/* # same effect via shell glob expansion - For examples, to check directories and files of d277006, you can + Without a trailing '/' or wildcard, the dataset entry itself is listed + unless -r or -R is given: - 1. Change into the dataset home data directory as 'cd /PathTo/d277006' and - execute 'gdexls'; add recursive option '-r' to check directories and files - further into the sub-directories, or change directory into a sub-directory - to check files inside of it. + gdexls /PathTo/d277006 # shows the D-line for the dataset root - 2. Pass an absolute path to gdexls as 'gdexls /PathTo/d277006/' or as - 'gdexls /PathTo/d277006/*'; without the ending by '/' or an appended - wildcard symbol '*' information of the dataset itself is check unless - the recursive option '-r' or '-R RecursiveLevel' is present + 3. Use a relative path from a neighbouring directory: - 3. If the current directory is in another dataset home data directory, - such as /PathTo/d277006, you can pass a relative path to gdexls - as 'gdexls ../d277006/' or as 'gdexls ../d277006/*' + gdexls ../d277006/ + gdexls ../d277006/* diff --git a/src/rda_python_miscs/pg_wget.py b/src/rda_python_miscs/pg_wget.py index c254ecb..5d0e66e 100644 --- a/src/rda_python_miscs/pg_wget.py +++ b/src/rda_python_miscs/pg_wget.py @@ -34,9 +34,10 @@ } # -# main function to excecute this script +# main function to execute this script # def main(): + """Parse command-line options, validate inputs, and run the wildcard download.""" option = None JCS = ['cat', 'tar', 'first', 'last'] @@ -89,9 +90,21 @@ def main(): sys.exit(0) # -# download one or multiple remote files via wget; concat files to a single one if multiple +# download one or multiple remote files via wget; join files to a single one if multiple # def download_wildcard_files(): + """Download remote files matching the wildcard pattern and combine into one output file. + + Skips the download if the local output file already exists and -CN is not set. + Runs wget only when -CN is set or fewer than FC files are already present locally. + Compares timestamps and file metadata to decide whether a rebuild is needed. + Combines downloaded parts using the strategy selected by -JC (cat/tar/first/last). + Removes intermediate part-files when -CR is set. + + Returns: + int: 1 if the output file was built or rebuilt, 0 if all parts were already + up-to-date, or None (implicitly) when a warning/error caused early return. + """ deleted = 0 if OPTIONS['FN']: @@ -102,7 +115,7 @@ def download_wildcard_files(): dinfo = PgFile.check_local_file(dfile, 1) if dinfo and not OPTIONS['CN']: - return PgLOG.pglog("{}: file dowloaded already ({} {})".format(dfile, dinfo['date_modified'], dinfo['time_modified']), PgLOG.LOGWRN) + return PgLOG.pglog("{}: file downloaded already ({} {})".format(dfile, dinfo['date_modified'], dinfo['time_modified']), PgLOG.LOGWRN) build = 0 if dinfo else 1 wfile = OPTIONS['RN'] + "*" @@ -127,11 +140,11 @@ def download_wildcard_files(): if ncnt == 0: if deleted: - return PgLOG.pglog("{}: File dowloaded on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN) + return PgLOG.pglog("{}: File downloaded on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN) else: - return PgLOG.pglog("{}: NO file to dowload on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN) + return PgLOG.pglog("{}: NO file to download on {}".format(dfile, OPTIONS['UL']), PgLOG.LOGWRN) elif ncnt < OPTIONS['MC']: - return PgLOG.pglog("{}: NOT ready, only {} of {} files dowloaded".format(dfile, ncnt, OPTIONS['MC']), PgLOG.LOGWRN) + return PgLOG.pglog("{}: NOT ready, only {} of {} files downloaded".format(dfile, ncnt, OPTIONS['MC']), PgLOG.LOGWRN) rfiles = sorted(nlist) size = skip = 0 @@ -145,10 +158,10 @@ def download_wildcard_files(): elif rfile not in dlist: build = 1 elif PgFile.compare_file_info(dlist[rfile], rinfo) > 0: - PgLOG.pglog("{}: Newer file dowloaded from {}".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN) + PgLOG.pglog("{}: Newer file downloaded from {}".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN) build = 1 else: - PgLOG.pglog("{}: No newer file found on ".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN) + PgLOG.pglog("{}: No newer file found on {}".format(rfile, OPTIONS['UL']), PgLOG.LOGWRN) if skip == ncnt: return 0 diff --git a/src/rda_python_miscs/pgwget.py b/src/rda_python_miscs/pgwget.py index 3775c7c..76a6831 100644 --- a/src/rda_python_miscs/pgwget.py +++ b/src/rda_python_miscs/pgwget.py @@ -14,8 +14,15 @@ from rda_python_common.pg_file import PgFile class PgWget(PgFile): + """Wrapper around wget to download one or more files identified by a root name pattern. + + Supports wildcard-based remote file matching, optional freshness checks, and + multiple strategies for combining multiple downloaded parts into a single output + file (concatenation, tar archive, or selecting the first/last file). + """ def __init__(self): + """Initialize PgWget with default wget options and download control flags.""" super().__init__() self.OPTIONS = { 'OP' : "-np -nH -nd -m -e robots=off --no-check-certificate", @@ -33,10 +40,19 @@ def __init__(self): # function to read parameters def read_parameters(self): + """Parse command-line arguments into OPTIONS and validate required inputs. + + Recognises boolean flags -CN, -CR, -SM and value options -OP, -UL, -RN, + -FN, -FC, -MC, -EX, -JC (case-insensitive). -FC and -MC are cast to int. + -JC must be one of: cat, tar, first, last. Prints usage and exits if + -UL or -RN is missing. Defaults MC to FC and appends -q to OP when -SM + is not set. + """ + self.set_help_path(__file__) option = None JCS = ['cat', 'tar', 'first', 'last'] options = '|'.join(self.OPTIONS) - argv = sys.argv[1:] + argv = sys.argv[1:] self.PGLOG['LOGFILE'] = "pgwget.log" for arg in argv: if arg == "-b": @@ -57,31 +73,31 @@ def read_parameters(self): self.OPTIONS[option] = int(arg) if re.match(r'^(FC|MC)$', option) else arg option = None if not (self.OPTIONS['UL'] and self.OPTIONS['RN']): - print("Usage: pgwget [-CN] [-CR] [-FC FileCount] [-JC JoinCommand] [-MC MinFileCount] [-FN FileName] -UL WebURL -RN RootFileName [-EX FileNameExtension]") - print(" Provide at least WebURL and RootFileName to wget file(s)") - print(" Option -CN - check new file if presents") - print(" Option -CR - clean the downloaded remote file(s) if presents") - print(" Option -FC - number of files to be valid download; defaults to 1") - print(" Option -JC - file joining command, it defaults to cat, could be tar, or last/first to choose the last/first one") - print(" Option -SM - Show wget dumping message; defaults to False") - print(" Option -MC - minimal number of files to be valid download; defaults to -FC") - print(" Option -FN - file name to be used if successful download; defaults to RootFileName.FileNameExtension") - print(" Option -OP - options used by wget, defaults to '-np -nH -nd -m -e robots=off'") - print(" Option -UL - (mandatory) WebURL with path") - print(" Option -RN - (mandatory) the root portion of the remote file name to be downloaded") - print(" Option -EX - file name extension to be used.") - sys.exit(0) + self.show_usage("pgwget") self.cmdlog("pgwget " + ' '.join(argv)) if not self.OPTIONS['MC']: self.OPTIONS['MC'] = self.OPTIONS['FC'] if not self.OPTIONS['SM']: self.OPTIONS['OP'] += ' -q' # function to start actions def start_actions(self): - self.download_wildcard_files() + """Run the wildcard download and close the command log.""" + self.download_wildcard_files() self.cmdlog() - # download one or multiple remote files via wget; concat files to a single one if multiple + # download one or multiple remote files via wget; join files to a single one if multiple def download_wildcard_files(self): + """Download remote files matching the wildcard pattern and combine into one output file. + + Skips the download if the local output file already exists and -CN is not set. + Runs wget only when -CN is set or fewer than FC files are already present locally. + Compares timestamps and file metadata to decide whether a rebuild is needed. + Combines downloaded parts using the strategy selected by -JC (cat/tar/first/last). + Removes intermediate part-files when -CR is set. + + Returns: + int: 1 if the output file was built or rebuilt, 0 if all parts were already + up-to-date, or None (implicitly) when a warning/error caused early return. + """ deleted = 0 if self.OPTIONS['FN']: dfile = self.OPTIONS['FN'] @@ -90,7 +106,7 @@ def download_wildcard_files(self): if self.OPTIONS['EX']: dfile += "." + self.OPTIONS['EX'] dinfo = self.check_local_file(dfile, 1) if dinfo and not self.OPTIONS['CN']: - return self.pglog("{}: file dowloaded already ({} {})".format(dfile, dinfo['date_modified'], dinfo['time_modified']), self.LOGWRN) + return self.pglog("{}: file downloaded already ({} {})".format(dfile, dinfo['date_modified'], dinfo['time_modified']), self.LOGWRN) build = 0 if dinfo else 1 wfile = self.OPTIONS['RN'] + "*" if self.OPTIONS['EX']: wfile += "." + self.OPTIONS['EX'] @@ -112,11 +128,11 @@ def download_wildcard_files(self): ncnt = dcnt if ncnt == 0: if deleted: - return self.pglog("{}: File dowloaded on {}".format(dfile, self.OPTIONS['UL']), self.LOGWRN) + return self.pglog("{}: File downloaded on {}".format(dfile, self.OPTIONS['UL']), self.LOGWRN) else: - return self.pglog("{}: NO file to dowload on {}".format(dfile, self.OPTIONS['UL']), self.LOGWRN) + return self.pglog("{}: NO file to download on {}".format(dfile, self.OPTIONS['UL']), self.LOGWRN) elif ncnt < self.OPTIONS['MC']: - return self.pglog("{}: NOT ready, only {} of {} files dowloaded".format(dfile, ncnt, self.OPTIONS['MC']), self.LOGWRN) + return self.pglog("{}: NOT ready, only {} of {} files downloaded".format(dfile, ncnt, self.OPTIONS['MC']), self.LOGWRN) rfiles = sorted(nlist) size = skip = 0 for i in range(ncnt): @@ -129,10 +145,10 @@ def download_wildcard_files(self): elif rfile not in dlist: build = 1 elif self.compare_file_info(dlist[rfile], rinfo) > 0: - self.pglog("{}: Newer file dowloaded from {}".format(rfile, self.OPTIONS['UL']), self.LOGWRN) + self.pglog("{}: Newer file downloaded from {}".format(rfile, self.OPTIONS['UL']), self.LOGWRN) build = 1 else: - self.pglog("{}: No newer file found on ".format(rfile, self.OPTIONS['UL']), self.LOGWRN) + self.pglog("{}: No newer file found on {}".format(rfile, self.OPTIONS['UL']), self.LOGWRN) if skip == ncnt: return 0 if not (build or size == dinfo['data_size']): build = 1 if not build: return self.pglog(dfile + ": Use existing file", self.LOGWRN) @@ -160,8 +176,9 @@ def download_wildcard_files(self): self.pgsystem("rm -f " + rfiles[i], self.LOGWRN, 5) return 1 -# main function to excecute this script +# main function to execute this script def main(): + """Entry point: instantiate PgWget, parse arguments, run, and exit.""" object = PgWget() object.read_parameters() object.start_actions() diff --git a/src/rda_python_miscs/pgwget.usg b/src/rda_python_miscs/pgwget.usg new file mode 100644 index 0000000..abd8e97 --- /dev/null +++ b/src/rda_python_miscs/pgwget.usg @@ -0,0 +1,75 @@ + + Wrapper around wget to download one or more remote files identified by a root + name pattern (wildcard), then combine the downloaded parts into a single local + output file. The download is skipped if the local output file already exists + and is up-to-date, unless Option -CN is given. + + Usage: pgwget -UL WebURL -RN RootFileName [-EX Extension] [-FN FileName] \ + [-FC FileCount] [-MC MinFileCount] [-JC JoinCommand] \ + [-CN] [-CR] [-SM] [-OP WgetOptions] [-b] + + - Option -UL, (required) base URL including the directory path where + the remote files reside, e.g. https://server.example.org/data/path/; + + - Option -RN, (required) root portion of the remote file name. A wildcard + '*' is appended automatically when searching for matching files, + e.g. -RN mydata_20240101 matches mydata_20240101.part1, etc.; + + - Option -EX, file name extension appended to -RN when building the + wildcard pattern, e.g. -RN mydata -EX nc matches mydata*.nc; + + - Option -FN, local output file name written after a successful download. + Defaults to RootFileName if -EX is not given, or RootFileName.Extension + if -EX is provided; + + - Option -FC, expected total number of matching remote files for the + download to be considered complete; defaults to 1; + + - Option -MC, minimum number of matching files required before combining. + Defaults to the -FC value when not specified; + + - Option -JC, strategy for combining multiple downloaded part-files into + the single output file. Must be one of: + cat - concatenate files in sorted order (default) + tar - build a tar archive (create with first file, update with rest) + first - use only the first file in sorted order; discard the rest + last - use only the last file in sorted order; discard the rest + + - Option -CN, check for a newer remote file even when the local output + file already exists. Forces a fresh wget and rebuilds the output if + any downloaded part is newer than the existing local file; + + - Option -CR, remove the intermediate downloaded part-files after they + have been combined into the output file; + + - Option -SM, show wget progress and diagnostic output. By default wget + runs in quiet mode (-q) and no download messages are printed; + + - Option -OP, override the default wget options string. The default is: + -np -nH -nd -m -e robots=off --no-check-certificate + + - Option -b, run the download in the background; + + Examples: + + 1. Download a single file whose name is exactly known: + + pgwget -UL https://server.example.org/data/ -RN myfile.nc + + This runs wget looking for 'myfile.nc*' on the server and saves the + result as myfile.nc locally. + + 2. Download a file that may arrive as numbered parts (.part1, .part2, ...), + concatenate them, and remove the parts when done: + + pgwget -UL https://server.example.org/data/ -RN mydata_20240101 \ + -FC 3 -JC cat -CR + + 3. Download split files with a known extension, using a custom output name: + + pgwget -UL https://server.example.org/data/ -RN obs_2024 -EX nc \ + -FN observations_2024.nc -FC 2 -CR + + 4. Check whether a newer version of an already-downloaded file is available: + + pgwget -UL https://server.example.org/data/ -RN myfile.nc -CN diff --git a/src/rda_python_miscs/rdacp.py b/src/rda_python_miscs/rdacp.py index d17159f..e227e80 100644 --- a/src/rda_python_miscs/rdacp.py +++ b/src/rda_python_miscs/rdacp.py @@ -15,12 +15,19 @@ from rda_python_common.pg_file import PgFile class RdaCp(PgFile): + """Copy files and directories locally or between remote hosts via 'rdadata'. + + Supports local-to-local, local-to-remote, remote-to-local, and Object Store / + Globus transfers. Target files are owned by 'rdadata' and created with + configurable permission modes. Recursive copying is controlled by -r / -R. + """ def __init__(self): + """Initialize RdaCp with default copy options and runtime state.""" super().__init__() self.RDACP = { 'fh': None, # from host name, default to localhost - 'th': None, # to host name, defaul to localhost + 'th': None, # to host name, default to localhost 'fb': None, # from bucket name for a from file in Object Store 'tb': None, # to bucket name for a to file in Object Store 'fp': None, # from Globus endpoint @@ -46,6 +53,13 @@ def __init__(self): # function to read parameters def read_parameters(self): + """Parse command-line arguments into RDACP options and validate inputs. + + The default option is -f (source paths); positional arguments before any + explicit option flag are treated as source paths. -r is a boolean flag; + -R and -F/-D take integer values. Displays usage and exits if -h is given + or no source files are specified. + """ dohelp = 0 argv = sys.argv[1:] self.set_suid(self.PGLOG['EUID']) @@ -72,7 +86,7 @@ def read_parameters(self): else: if option == 'R': self.RDACP[option] = int(arg) - elif 'FD'.find(option) > -1: + elif option in 'FD': self.RDACP[option] = self.base2int(arg, 8) else: self.RDACP[option] = arg @@ -85,6 +99,12 @@ def read_parameters(self): # function to start actions def start_actions(self): + """Validate copy targets, configure host/bucket/endpoint context, and dispatch copies. + + Resolves the target path, sets file and directory permission modes, checks + for invalid same-host copies, activates Object Store bucket or Globus endpoint + when specified, then calls copy_top_list. Logs a summary count on completion. + """ self.dssdb_dbname() self.validate_decs_group('rdacp', self.PGLOG['CURUID'], 1) if not self.RDACP['R'] and self.RDACP['r']: self.RDACP['R'] = 1000 @@ -117,13 +137,23 @@ def start_actions(self): if self.RDACP['fh']: hinfo += " From " + self.RDACP['fh'] if self.RDACP['th']: hinfo += " To " + self.RDACP['th'] if self.CINFO['tcnt'] > 1: - self.pglog("Total {} {} copiled{}".format(self.CINFO['tcnt'], self.CINFO['cpstr'][self.CINFO['cpflag']], hinfo), self.LOGWRN) + self.pglog("Total {} {} copied{}".format(self.CINFO['tcnt'], self.CINFO['cpstr'][self.CINFO['cpflag']], hinfo), self.LOGWRN) elif self.CINFO['tcnt'] == 0 and not self.RDACP['fh']: self.pglog("{}: No File copied{}".format((self.CINFO['fpath'] if self.CINFO['fpath'] else self.CINFO['curdir']), hinfo), self.LOGWRN) self.cmdlog() - # display the top level list + # copy the top level list def copy_top_list(self, files): + """Iterate the top-level source paths and initiate copies or recursive traversal. + + For each source path, checks existence via the appropriate method (Globus or + GDEX). A directory path ending with '/' copies its contents rather than the + directory entry itself. Directories without -r/-R cause an error unless the + trailing '/' form is used. + + Args: + files (list[str]): Source paths from the -f option. + """ for file in files: if self.RDACP['th'] and not self.pgcmp(self.RDACP['th'], self.PGLOG['BACKUPNM'], 1): info = self.check_globus_file(file, 'gdex-glade', 0, self.LGWNEX) @@ -154,6 +184,16 @@ def copy_top_list(self, files): # recursively copy directory/file def copy_list(self, tlist, level, cdir): + """Recursively copy a directory listing up to the configured depth limit. + + Logs a sub-count message when two or more files are copied from a single + directory. Accumulates the total copy count in CINFO['tcnt']. + + Args: + tlist (dict): Mapping of path → file-info dict from gdex_glob. + level (int): Current recursion depth (1-based); stops when >= RDACP['R']. + cdir (str): Path of the current directory being processed (for log messages). + """ fcnt = 0 for file in tlist: if tlist[file]['isfile']: @@ -166,8 +206,20 @@ def copy_list(self, tlist, level, cdir): self.pglog("{}{}: {} {} copied from directory".format(self.CINFO['fhost'], cdir, fcnt, self.CINFO['cpstr'][self.CINFO['cpflag']]), self.LOGWRN) self.CINFO['tcnt'] += fcnt - # copy one file each time + # copy one file def copy_file(self, fromfile, isfile): + """Resolve the destination path for one source file and perform the copy. + + When a target directory is set (tpath), strips the source base path prefix + and joins the remainder to tpath. Otherwise copies directly to the -t value. + + Args: + fromfile (str): Absolute source file path. + isfile (int): Non-zero when the source is a regular file (vs. a symlink type). + + Returns: + int: 1 if the file was copied successfully, 0 otherwise. + """ if self.CINFO['tpath']: fname = re.sub(r'^{}'.format(self.CINFO['fpath']), '', fromfile) if isfile: @@ -178,8 +230,9 @@ def copy_file(self, fromfile, isfile): tofile = self.RDACP['t'] return (1 if self.copy_gdex_file(tofile, fromfile, self.RDACP['th'], self.RDACP['fh'], self.LGWNEX) else 0) -# main function to excecute this script +# main function to execute this script def main(): + """Entry point: instantiate RdaCp, parse arguments, run, and exit.""" object = RdaCp() object.read_parameters() object.start_actions() diff --git a/src/rda_python_miscs/rdacp.usg b/src/rda_python_miscs/rdacp.usg index 3988eed..6241d2a 100644 --- a/src/rda_python_miscs/rdacp.usg +++ b/src/rda_python_miscs/rdacp.usg @@ -1,62 +1,77 @@ - Copy directories and files in the current or specified directories to a target - directory. The owner of the target directories and files is 'rdadata' with default - or specified modes. - - Usage: rdacp [-D DiretoryMode] [-f] FromDirecctories/Files [-F FileMode] \ - [-hf HtarFileName] [-fh FromHostName] [-h] [-r] [-R RecursiveLevel] \ - [-t ToDirectory/FileName] [-th ToHostName] [-fl FileCountLimit] - - - Option -D, changes mode of target directories. It defaults to "775"; - - - Option -f, mandatory option for directories and files to be copied from. - Unix command line wildcards are supported. Use './' or '*' for all - directories and files in the current directory to be copied. - The directories and files must be readable by user 'rdadata'; - otherwise 'rdacp' will try to change file mode to make them readable. - The file names are htar member files if a htar file name is provided - by Option -hf; - - - Option -fh, host name for directories and files to be copied from. - It defaults to local host unless htar file name is provded via Option - -hf, then it defaults to hpss; - - - Option -F, changes mode of target files. It defaults to "664"; + Copy files and directories to a target location. Source and target may each + reside on the local host, a remote host, an Object Store bucket, or a Globus + endpoint. Target files are owned by 'rdadata' and created with configurable + permission modes. + + Usage: rdacp [-f] FromDirectories/Files [-t ToDirectory/FileName] \ + [-fh FromHostName] [-th ToHostName] \ + [-fb FromBucket] [-tb ToBucket] \ + [-fp FromGlobusEndpoint] [-tp ToGlobusEndpoint] \ + [-F FileMode] [-D DirectoryMode] [-r] [-R RecursiveLevel] [-h] + + - Option -f, source directories and/or files to copy. This is the + default option, so paths may be given without the -f flag. + Shell wildcards are supported. Use './' or '*' to copy everything + in the current directory. Source paths must be readable by user + 'rdadata'; rdacp will attempt to fix the mode if they are not; + + - Option -t, target directory or file name. Defaults to '.' (current + directory). Multiple source files cannot be copied to a single + target file name. A trailing '/' on the target path treats it as + a directory; + + - Option -fh, host name where the source files reside. + Defaults to the local host; + + - Option -th, host name where the target files should be written. + Defaults to the local host; + + - Option -fb, Object Store bucket name for the source files; + + - Option -tb, Object Store bucket name for the target files; + + - Option -fp, Globus endpoint for the source files; + + - Option -tp, Globus endpoint for the target files; + + - Option -F, permission mode for target files in octal notation. + Defaults to 664; + + - Option -D, permission mode for target directories in octal notation. + Defaults to 775; + + - Option -r, copy directories and files recursively (no depth limit); + + - Option -R RecursiveLevel, copy recursively up to the specified depth. + -R 1 copies only the immediate contents of each source directory; - Option -h, display this help document; - - Option -hf, provides a HTAR file name to retrieve its one or multiple - member files to local for option -fh HPSS, or to htar givens local files - onto HPSS for option -th HPSS; - - - Option -r, copies directories and files recursively; - - - Option -R, copies directories and files recursively up to the level provided - with this Option; - - - Option -t, provides a target Directory/File name. Its value defaults to '.'. - Mutilple source files can not be copied if a single target file name is - specified. - - - Option -th, host name for target directories and files. - It defaults to local host; - - - Option -fl, file count limit in each htar file, default 0 means all files in - a single htar file; if > 0, it means to archive multiple htar files, with - each holding up to the given file count. The max value is 5000000; - - This utility program can be executed anywhere. This help document is displayed if - either the source Directory/File Names are missed. - - For examples, to copy every directory/file under the current directory to under - ds277.6 on host castle, you can - - rdacp -r -t /PathTo/ds277.6/ -th castle -f * - - To copy every directory/file under a given local directory to under ds277.6 - on host castle, you can - - rdacp -r -t /PathTo/ds277.6/ -th castle -f /PathTo/DirectoryName/ - - where if the ending '/' is missed, the DirectoryName itself is copied too. - \ No newline at end of file + This utility can be run from any directory. Usage is displayed if no source + files are provided. A trailing '/' on a source directory path copies the + contents of that directory rather than the directory entry itself. + + Examples: + + 1. Copy all files and subdirectories under the current directory to a + remote host: + + rdacp -r -f * -t /PathTo/d277006/ -th castle + + 2. Copy the contents of a specific local directory to a remote location + (trailing '/' on source omits the directory entry itself): + + rdacp -r -f /PathTo/DirectoryName/ -t /PathTo/d277006/ -th castle + + Without the trailing '/', DirectoryName itself is also copied: + + rdacp -r -f /PathTo/DirectoryName -t /PathTo/d277006/ -th castle + + 3. Copy a single file to an Object Store bucket: + + rdacp -f /PathTo/myfile.nc -tb my-bucket -t myfile.nc + + 4. Copy files from a remote host to the local current directory: + + rdacp -fh castle -f /PathTo/d277006/myfile.nc diff --git a/src/rda_python_miscs/rdakill.py b/src/rda_python_miscs/rdakill.py index d6497a1..b8da4a4 100644 --- a/src/rda_python_miscs/rdakill.py +++ b/src/rda_python_miscs/rdakill.py @@ -15,8 +15,16 @@ from rda_python_common.pg_file import PgFile class RdaKill(PgFile): + """Kill local processes or PBS batch jobs by process ID, parent PID, or status. + + For local processes, sends SIGKILL (-9) to the matched process and all its + children recursively. For PBS batch jobs, uses qdel to cancel jobs by job ID + or by filtering all jobs in a queue by their current status. Also records an + interrupt flag in the dscheck table when a tracked process is killed. + """ def __init__(self): + """Initialize RdaKill with default kill options.""" super().__init__() self.RDAKILL = { 'a': None, # application name @@ -31,6 +39,13 @@ def __init__(self): # function to read parameters def read_parameters(self): + """Parse command-line arguments into RDAKILL options. + + Accepts -a, -h, -p, -P, -q, -s, -u flags; -r is a boolean toggle. + -p and -P are cast to int. A bare integer argument without a leading + flag is accepted as a process ID when -p has not been set yet. + Displays usage and exits if no options or arguments are provided. + """ optcnt = 0 option = None argv = sys.argv[1:] @@ -40,7 +55,7 @@ def read_parameters(self): self.PGLOG['LOGFILE'] = "rdakill.log" # set different log file self.cmdlog("rdakill {}".format(' '.join(argv))) for arg in argv: - ms = re.match(r'-([ahpPqstu])$', arg) + ms = re.match(r'-([ahpPqsu])$', arg) if ms: option = ms.group(1) elif re.match(r'-r$', arg): @@ -49,7 +64,7 @@ def read_parameters(self): self.pglog(arg + ": Unknown Option", self.LGEREX) elif option: if self.RDAKILL[option]: self.pglog("{}: value passed to Option -{} already".format(arg, option), self.LGEREX) - if 'pPt'.find(option) > -1: + if option in 'pP': self.RDAKILL[option] = int(arg) elif option == 'h': self.RDAKILL[option] = self.get_short_host(arg) @@ -59,8 +74,8 @@ def read_parameters(self): optcnt += 1 else: ms = re.match(r'^(\d+)$', arg) - if ms and self.RDAKILL['p']: - self.RDAKILL['p'] = int(ms.group(1)) # pid allow value only without leading option + if ms and not self.RDAKILL['p']: + self.RDAKILL['p'] = int(ms.group(1)) # bare integer accepted as PID when -p not yet set optcnt += 1 else: self.pglog(arg + ": pass in value without Option", self.LGEREX) @@ -68,6 +83,12 @@ def read_parameters(self): # function to start actions def start_actions(self): + """Dispatch to PBS or local kill path based on the -h option. + + When -h matches the PBS node name, requires either a job ID (-p) or a + batch status (-s) and delegates accordingly. Otherwise kills local + processes matching -p, -P, or -a, requiring at least one to be set. + """ killloc = 1 if self.RDAKILL['h']: self.local_host_action(self.RDAKILL['h'], "kill processes", self.PGLOG['HOSTNAME'], self.LGEREX) @@ -85,8 +106,22 @@ def start_actions(self): self.rdakill_processes(self.RDAKILL['p'], self.RDAKILL['P'], self.RDAKILL['a'], self.RDAKILL['u']) self.cmdlog() - # kill processes for given condition + # kill local processes matching the given filters def rdakill_processes(self, pid, ppid, aname = None, uname = None, level = 0): + """Recursively kill local processes matching pid, ppid, app name, or user. + + Runs 'ps' with the most specific filter available, then walks each matching + line, recursing into child processes before killing the parent. Logs a + warning if no matching process is found at the top level. Also records a + dscheck interrupt for each killed PID. + + Args: + pid (int): Process ID to kill; 0 means no PID filter. + ppid (int): Parent PID filter; 0 means no parent filter. + aname (str|None): Application name substring filter. + uname (str|None): Owner username filter; None means all users. + level (int): Recursion depth (0 = top-level call). + """ kcnt = 0 if pid: cmd = "ps -p {} -f".format(pid) @@ -114,15 +149,26 @@ def rdakill_processes(self, pid, ppid, aname = None, uname = None, level = 0): self.kill_local_child(cid, uid, re.sub(r' +', ' ', line)) self.record_dscheck_interrupt(cid, self.PGLOG['HOSTNAME']) if not (kcnt or level): - buf = "No process idendified to kill " + buf = "No process identified to kill " if self.RDAKILL['h']: buf += "on " + self.RDAKILL['h'] else: buf += "locally" self.pglog(buf, self.LOGWRN) - # a local child process + # kill a local child process def kill_local_child(self, pid, uid, line): + """Send SIGKILL to a single local process and log the outcome. + + Skips the kill if the process is no longer running. Logs 'Kill' on + success, 'Error Kill' if the process persists after the kill command, + or 'Quit' if the process had already exited before the kill was sent. + + Args: + pid (int): PID of the process to kill. + uid (str): Owner username, used to build the kill command via suid. + line (str): Formatted ps output line for logging context. + """ if self.check_process(pid): cmd = self.get_local_command("kill -9 {}".format(pid), uid) if self.pgsystem(cmd, self.LOGWRN, 260): # 4+256 @@ -131,8 +177,20 @@ def kill_local_child(self, pid, uid, line): return self.pglog("Error Kill: {}\n{}".format(line, self.PGLOG['SYSERR']), self.LOGWRN) if not self.check_process(pid): self.pglog("Quit: " + line, self.LOGWRN) - # kill a pbs batch job + # kill a PBS batch job by job ID def rdakill_pbs_batch(self, bid): + """Cancel a single PBS batch job by job ID using qdel. + + Looks up job info to get the owner, then runs qdel (or qdelcasper on + Casper hosts) as that user. Records a dscheck interrupt on success. + Logs an error if the job ID is not found or if qdel fails. + + Args: + bid (int): PBS batch job ID to cancel. + + Returns: + int: 1 if the job was successfully cancelled, 0 otherwise. + """ ret = 0 stat = self.get_pbs_info(bid, 0, self.LOGWRN) if stat: @@ -146,9 +204,20 @@ def rdakill_pbs_batch(self, bid): if not ret and self.PGLOG['SYSERR']: self.pglog(self.PGLOG['SYSERR'], self.LGEREX) return ret - # kill PBS batch jobs for given status + # kill PBS batch jobs matching a given status def rdakill_pbs_status(self, stat, queue, uname): - if not queue: queue = 'rda' + """Cancel all PBS batch jobs in a queue that match the given status. + + Queries qstat for the specified queue (defaulting to 'rda') and optional + user filter, then calls rdakill_pbs_batch for each job whose State field + matches stat. Logs a summary of how many jobs were found and killed. + + Args: + stat (str): PBS job state to match (e.g. 'PEND', 'RUN'). + queue (str|None): PBS queue name; defaults to 'gdex' if None. + uname (str|None): Limit to jobs owned by this user; None means all users. + """ + if not queue: queue = 'gdex' qopts = '' if uname: qopts = "-u " + uname @@ -170,15 +239,25 @@ def rdakill_pbs_status(self, stat, queue, uname): if uname: line += " for " + uname self.pglog(line, self.LOGWRN) - # record a dscheck + # record a dscheck interrupt for a killed process def record_dscheck_interrupt(self, pid, host): + """Mark a dscheck record as interrupted when its process has been killed. + + Looks up the dscheck entry by PID and hostname. If found, sets its status + to 'I' (interrupted), clears the PID lock, and updates the check timestamp. + + Args: + pid (int): PID (or PBS job ID) of the killed process. + host (str): Hostname where the process was running. + """ pgrec = self.pgget("dscheck", "cindex", "pid = {} AND hostname = '{}'".format(pid, host), self.LOGERR) if pgrec: record = {'chktime': int(time.time()), 'status': 'I', 'pid': 0} # release lock self.pgupdt("dscheck", record, "cindex = {}".format(pgrec['cindex']), self.LGEREX) -# main function to excecute this script +# main function to execute this script def main(): + """Entry point: instantiate RdaKill, parse arguments, run, and exit.""" object = RdaKill() object.read_parameters() object.start_actions() diff --git a/src/rda_python_miscs/rdakill.usg b/src/rda_python_miscs/rdakill.usg index 2a1430c..207d09a 100644 --- a/src/rda_python_miscs/rdakill.usg +++ b/src/rda_python_miscs/rdakill.usg @@ -1,36 +1,66 @@ - Kill one of multiple processes and theirs children for given local process ID or - other process information; kill one or multiple PBS batch jobs for give batch - Job ID or Status. For killing PBS batch jobs, you must login to casper - login nodes. - + Kill one or more local processes and their children for a given process ID or + other filter criteria; or cancel one or more PBS batch jobs by job ID or by + job status. For PBS operations the -h option must specify the PBS node name. + Usage: rdakill [-h HostName] [-p ProcessID] [-P ParentProcessID] \ - [-s BatchStatus] [-u ProcessOwner] [-a ApplicationName] - - - Option -a, application name of the process; - - - Option -h, hostname the process is on. Omit it for local process, - but it is mandatory if the process id is a PBS bactch id. - - - Option -p, the process id or batch job id to be stopped. - - - Option -P, the parent process id; - - - Option -q, the PBS queue name. It defaults to 'rda'; - - - Option -s, the Batch Job Status; this is mantatory if batch id is not provided; - - - Option -u, use login name for the process owner. It defaults to 'all' for - all user login names. - - This help document is displayed if rdakill is executed without option. - - For examples, to kill a process of a dsrqst process with pid 13199 locally, - - rdakill -p 13199 -a dsrqst - - The child processes under the dsrqst process, if any, are also terminated. To kill - a PBS bactch process with a bid = 334323, - - rdakill -h PBS -p 334323 - \ No newline at end of file + [-s BatchStatus] [-q BatchQueue] [-r] \ + [-u ProcessOwner] [-a ApplicationName] + + - Option -a, filter by application (command) name. For local processes, + any process whose command field contains this string is matched. + Can be combined with -p, -P, or -u; + + - Option -h, hostname where the process is running. Omit for local + processes. Set to the PBS node name (e.g. casper-pbs) to cancel + PBS batch jobs; + + - Option -p, local process ID (PID) or PBS batch job ID to kill. + A bare integer argument without a leading flag is also accepted + as a process ID; + + - Option -P, kill the local process with this parent process ID (PPID), + along with all of its children; + + - Option -q, PBS queue name to search when killing by status (-s). + Defaults to 'gdex'; + + - Option -r, reserved flag for exclusive kill; currently works only with + -s PEND to target pending-only batch jobs; + + - Option -s, PBS batch job status to match when killing by status rather + than by job ID. Mandatory when -p is not given for PBS operations. + Common values: RUN, PEND; + + - Option -u, limit kills to processes owned by this login name. + Without this option all users are included; + + For local processes, all child processes are also terminated recursively. + Usage is displayed if rdakill is run without any options or arguments. + + Examples: + + 1. Kill a local process by PID: + + rdakill -p 13199 + rdakill 13199 + + 2. Kill a local process by PID and verify it matches a specific application: + + rdakill -p 13199 -a dsrqst + + 3. Kill all local processes (and their children) owned by a user: + + rdakill -u zji -a dsrqst + + 4. Kill a PBS batch job by job ID: + + rdakill -h casper-pbs -p 334323 + + 5. Kill all running PBS batch jobs for a user in the default queue: + + rdakill -h casper-pbs -s RUN -u zji + + 6. Kill all pending PBS batch jobs in a specific queue: + + rdakill -h casper-pbs -s PEND -q gdex diff --git a/src/rda_python_miscs/rdamod.py b/src/rda_python_miscs/rdamod.py index ea8ae20..257a815 100644 --- a/src/rda_python_miscs/rdamod.py +++ b/src/rda_python_miscs/rdamod.py @@ -5,7 +5,7 @@ # Date: 10/24/2020 # 2025-03-10 transferred to package rda_python_miscs from # https://github.com/NCAR/rda-utility-programs.git -# Purpose: change file/directory modes in given one or mutilple local directories +# Purpose: change file/directory modes in given one or multiple local directories # owned by 'rdadata' # Github: https://github.com/NCAR/rda-python-miscs.git ################################################################################## @@ -16,8 +16,16 @@ from rda_python_common.pg_file import PgFile class RdaMod(PgFile): + """Change file and directory permission modes for paths owned by 'rdadata'. + + Only items owned by 'rdadata' are changed; items with a different owner are + logged as errors. Items already at the target mode are silently skipped. + A leading letter ('D' or 'F') is logged with each changed path to indicate + its type. + """ def __init__(self): + """Initialize RdaMod with default mode-change options and runtime state.""" super().__init__() self.RDAMOD = { 'd': 0, # 1 to change directory mode @@ -25,8 +33,8 @@ def __init__(self): 'h': 0, # 1 to show help message 'r': 0, # 1 if recursive all 'R': 0, # > 0 to set recursive limit - 'F': 0o664, # to chnage file mode, default to 664 - 'D': 0o775, # to chnge directory mode, default to 775 + 'F': 0o664, # target file mode, default to 664 + 'D': 0o775, # target directory mode, default to 775 } self.MINFO = { 'files': [], @@ -38,6 +46,13 @@ def __init__(self): # function to read parameters def read_parameters(self): + """Parse command-line arguments into RDAMOD option flags and the file/directory list. + + Recognises boolean flags -d, -f, -h, -r and value options -R, -F, -D. + -R is cast to int; -F and -D are parsed as octal integers. Positional + arguments are collected into MINFO['files']. Exits with usage if -h is + given or no files are specified. + """ self.set_suid(self.PGLOG['EUID']) self.set_help_path(__file__) self.PGLOG['LOGFILE'] = "rdamod.log" # set different log file @@ -49,7 +64,7 @@ def read_parameters(self): if ms: option = ms.group(1) if option not in self.RDAMOD: self.pglog(arg + ": Unknown Option", self.LGEREX) - if 'dfhr'.find(option) > -1: + if option in 'dfhr': self.RDAMOD[option] = 1 option = defopt continue @@ -60,7 +75,7 @@ def read_parameters(self): else: if option == 'R': self.RDAMOD[option] = int(arg) - elif 'FD'.find(option) > -1: + elif option in 'FD': self.RDAMOD[option] = self.base2int(arg, 8) else: self.RDAMOD[option] = arg @@ -69,6 +84,7 @@ def read_parameters(self): # function to start actions def start_actions(self): + """Validate DECS group membership, process the path list, and log a summary count.""" self.dssdb_dbname() if not (self.RDAMOD['d'] or self.RDAMOD['f']): self.RDAMOD['d'] = self.RDAMOD['f'] = 1 # both directories and files as default @@ -78,7 +94,7 @@ def start_actions(self): if (self.MINFO['dcnt'] + self.MINFO['fcnt']) > 1: msg = '' if self.MINFO['dcnt'] > 0: - s = ('ies' if self.MINFO['dcnt'] else 'y') + s = ('ies' if self.MINFO['dcnt'] > 1 else 'y') msg = "{} Director{}".format(self.MINFO['dcnt'], s) if self.MINFO['fcnt'] > 0: s = ('s' if self.MINFO['fcnt'] > 1 else '') @@ -91,6 +107,15 @@ def start_actions(self): # change mode for the top level list def change_top_list(self, files): + """Iterate top-level paths and change modes, expanding directories as needed. + + A directory path ending with '/' changes the mode of its contents rather + than the directory entry itself. Relative paths are resolved against curdir. + Recurses into directories when -R is set or when the trailing '/' form is used. + + Args: + files (list[str]): Source paths from the command line. + """ for file in files: info = self.check_local_file(file, 6, self.LOGWRN) if not info: @@ -109,6 +134,16 @@ def change_top_list(self, files): # recursively change directory/file mode def change_list(self, files, level, cdir): + """Recursively change modes for a directory listing up to the depth limit. + + Logs a sub-count when two or more files have their mode changed in a + single directory. + + Args: + files (dict): Mapping of path → file-info dict from local_glob. + level (int): Current recursion depth (1-based); stops when >= RDAMOD['R']. + cdir (str): Path of the current directory (for log messages). + """ fcnt = 0 for file in files: info = files[file] @@ -116,14 +151,29 @@ def change_list(self, files, level, cdir): if not info['isfile'] and level < self.RDAMOD['R']: fs = self.local_glob(file, 6, self.LOGWRN) self.change_list(fs, level+1, file) - if fcnt > 1: # display sub count if two more files are changed mode + if fcnt > 1: # display sub count if two or more files changed mode self.pglog("{}: {} Files changed Mode".format(cdir, fcnt), self.LOGWRN) - # change mode of a single directory/file + # change mode of a single file or directory def change_mode(self, file, info): + """Change the permission mode of one file or directory. + + Skips the item if the -f/-d flag for its type is not set, if it is not + owned by 'rdadata', or if its current mode already matches the target. + Logs the old-to-new mode transition on success or an error on owner mismatch. + Updates MINFO['fcnt'] for files and MINFO['dcnt'] for directories on success. + + Args: + file (str): Absolute path to the file or directory. + info (dict): File metadata dict from local_glob/check_local_file + (includes 'isfile', 'logname', 'mode'). + + Returns: + int: 1 if a file mode was successfully changed, 0 otherwise. + """ fname = re.sub(r'^{}'.format(self.MINFO['tpath']), '', file, 1) if info['isfile']: - if not self.RDAMOD['d']: return 0 + if not self.RDAMOD['f']: return 0 fname = "F" + fname mode = self.RDAMOD['F'] else: @@ -141,8 +191,9 @@ def change_mode(self, file, info): self.MINFO['dcnt'] += 1 return 0 -# main function to excecute this script +# main function to execute this script def main(): + """Entry point: instantiate RdaMod, parse arguments, run, and exit.""" object = RdaMod() object.read_parameters() object.start_actions() diff --git a/src/rda_python_miscs/rdamod.usg b/src/rda_python_miscs/rdamod.usg index 8498345..81e9fe4 100644 --- a/src/rda_python_miscs/rdamod.usg +++ b/src/rda_python_miscs/rdamod.usg @@ -1,51 +1,71 @@ - Change modes for directories and files in the current or specified directories. - The owner of the directories and files must be 'rdadata' for their modes being - changed. For directories and files with modes changed successfully or with error, - a leading letter is displayed in font of the relative file names to indicate - file types; 'D' for a directory and 'F' for a data file. - - Usage: rdamod [-d] [-f] [-D DiretoryMode] [-F FileMode] [-h HostName] [-r] [-R RecursiveLevel] \ - [Directory/File List] - - - Option -d, change directory modes only. Changing Directory mode is included - as default. Add this option to exclude changing file mode; - - - Option -f, change file modes only. Changing File mode is included - as default. Add this option to exclude changing Directory mode; - - - Option -h, pass in the remote host name; hpss for change file mode on HPSS; - - - Option -r, change modes for directories and files recursively; - - - Option -R, change modes for directories and files recursively up to - the level provided with this Option; - - - Option -D, change directory mode to a value provided by this Option. - It defaults to "755"; - - - Option -F, change file mode to a value provided by this Option. - It defaults to "644"; - - - Directory/file List is mandatory; this help document is displayed - without it. Unix command line wildcards are supported. Use './' or '*' - for all directories and files in the current directory to be considered. - - This utility program can be executed anywhere. No Mode is changed if neither - directory nor file are owned by user 'rdadata'. - - For examples, to change modes for directories and files under ds277.6, you can - - 1. Change into the dataset home data directory as 'cd /PathTo/ds277.6' and - execute 'rdamod ./'; add recursive option '-r' to change modes for directories - and files further into the sub-directories, or change directory into - a sub-directory to change mode for files inside of it. - - 2. Pass an absolute path to rdamod as 'rdamod /PathTo/ds277.6/'; - without the ending by '/', mode of top directory itself is - changed only unless the recursive option '-r' or '-R RecursiveLevel' - is present. - - 3. If the current directory is in another dataset home data directory, - such as /PathTo/ds277.7, you can pass a relative path to rdamod - as 'rdamod ../ds277.6/' or as 'rdamod ../ds277.6/*' + Change permission modes for files and directories owned by 'rdadata'. Only + items owned by 'rdadata' are changed; items with a different owner are logged + as errors. Items already at the target mode are silently skipped. A leading + letter is displayed in front of each changed path to indicate its type: 'D' + for a directory and 'F' for a file. + + Usage: rdamod [-d] [-f] [-D DirectoryMode] [-F FileMode] [-h] [-r] \ + [-R RecursiveLevel] Directory/File List + + - Option -d, change directory modes only. Both directories and files are + processed by default; this option suppresses file mode changes; + + - Option -f, change file modes only. Both directories and files are + processed by default; this option suppresses directory mode changes; + + - Option -D DirectoryMode, target permission mode for directories in octal + notation. Defaults to 775; + + - Option -F FileMode, target permission mode for files in octal notation. + Defaults to 664; + + - Option -h, display this help document; + + - Option -r, change modes recursively (no depth limit); + + - Option -R RecursiveLevel, change modes recursively up to the specified + depth. -R 1 processes only the immediate contents of each given + directory; + + - Directory/File List is mandatory; usage is displayed without it. + Shell wildcards are supported. Use './' or '*' to process everything + in the current directory. + + This utility can be run from any directory. No mode is changed if all + matched items are already at the target mode or if none are owned by 'rdadata'. + + Examples: + + 1. Change into the dataset home directory and change modes of all + immediate contents: + + cd /PathTo/ds277.6 + rdamod ./ + + Add -r to recurse into sub-directories: + + rdamod -r ./ + + 2. Pass an absolute path with a trailing '/' to process the directory's + contents (the directory entry itself is not changed): + + rdamod /PathTo/ds277.6/ + + Without the trailing '/', the directory entry itself is also changed + unless -r or -R is given: + + rdamod /PathTo/ds277.6 + + 3. Use a relative path from a neighbouring directory: + + rdamod ../ds277.6/ + rdamod ../ds277.6/* + + 4. Change file modes only, with a non-default target mode: + + rdamod -f -F 660 -r /PathTo/ds277.6/ + + 5. Change directory modes only, recursively: + + rdamod -d -r /PathTo/ds277.6/ diff --git a/src/rda_python_miscs/rdaown.py b/src/rda_python_miscs/rdaown.py index 2db9bc2..0cbd4d6 100644 --- a/src/rda_python_miscs/rdaown.py +++ b/src/rda_python_miscs/rdaown.py @@ -5,7 +5,7 @@ # Date: 10/24/2020 # 2025-03-10 transferred to package rda_python_miscs from # https://github.com/NCAR/rda-utility-programs.git -# Purpose: change file/directory ownership to 'rdadata' in given one or mutilple +# Purpose: change file/directory ownership to 'rdadata' in given one or multiple # local directories that are owned by decs specialists. it needs # super user privilege to execute. # Github: https://github.com/NCAR/rda-python-miscs.git @@ -18,8 +18,15 @@ from rda_python_common.pg_file import PgFile class RdaOwn(PgFile): + """Change file and directory ownership to 'rdadata' for a given path list. + + Must be run as root. Only items currently owned by DECS specialists are + changed; items already owned by 'rdadata' are silently skipped. A leading + letter ('D' or 'F') is logged with each changed path to indicate its type. + """ def __init__(self): + """Initialize RdaOwn with default ownership-change options and runtime state.""" super().__init__() self.RDAOWN = { 'd': 0, # 1 to change directory owner @@ -38,8 +45,15 @@ def __init__(self): 'fcnt': 0 } - # function to read paramters + # function to read parameters def read_parameters(self): + """Parse command-line arguments into RDAOWN option flags and the file/directory list. + + Recognises boolean flags -d, -f, -h, -r and value option -R. Positional + arguments are collected into OINFO['files']. Exits with usage if -h is + given or no files are specified. Requires the effective user to be root. + Defaults both -d and -f when neither is explicitly set. + """ argv = sys.argv[1:] self.set_help_path(__file__) self.PGLOG['LOGFILE'] = "rdaown.log" # set different log file @@ -50,7 +64,7 @@ def read_parameters(self): if ms: option = ms.group(1) if option not in self.RDAOWN: self.pglog(arg + ": Unknown Option", self.LGEREX) - if 'dfhr'.find(option) > -1: + if option in 'dfhr': self.RDAOWN[option] = 1 option = defopt continue @@ -70,6 +84,7 @@ def read_parameters(self): # function to start actions def start_actions(self): + """Connect to the database, process the path list, and log a summary count.""" self.dssdb_scname() self.change_top_list(self.OINFO['files']) if (self.OINFO['dcnt'] + self.OINFO['fcnt']) > 1: @@ -88,6 +103,15 @@ def start_actions(self): # change owner for the top level list def change_top_list(self, files): + """Iterate top-level paths and change ownership, expanding directories as needed. + + A directory path ending with '/' changes ownership of its contents rather + than the directory entry itself. Relative paths are resolved against curdir. + Recurses into directories when -R is set or when the trailing '/' form is used. + + Args: + files (list[str]): Source paths from the command line. + """ for file in files: info = self.check_local_file(file, 2, self.LOGWRN) if not info: @@ -106,6 +130,15 @@ def change_top_list(self, files): # recursively change directory/file owner def change_list(self, files, level, cdir): + """Recursively change ownership for a directory listing up to the depth limit. + + Logs a sub-count when two or more files are changed in a single directory. + + Args: + files (list[str]): Glob-expanded paths at the current recursion level. + level (int): Current recursion depth (1-based); stops when >= RDAOWN['R']. + cdir (str): Path of the current directory (for log messages). + """ fcnt = 0 for file in files: info = self.check_local_file(file, 2, self.LOGWRN) @@ -114,11 +147,26 @@ def change_list(self, files, level, cdir): if not info['isfile'] and level < self.RDAOWN['R']: fs = glob.glob(file + "/*") self.change_list(fs, level+1, file) - if fcnt > 1: # display sub count if two more files are changed mode + if fcnt > 1: # display sub count if two or more files changed owner self.pglog("{}: {} Files changed owner in the directory".format(cdir, fcnt), self.LOGWRN) - # change owner for a single directory/file + # change owner for a single file or directory def change_owner(self, file, info): + """Change ownership of one file or directory to 'rdadata' using chown. + + Skips the item if it is already owned by 'rdadata' or if the current owner + is not a registered DECS specialist in the dssgrp table. Logs the result + as 'owner => rdadata' on success or an error message on failure. Updates + OINFO['fcnt'] for files and OINFO['dcnt'] for directories on success. + + Args: + file (str): Absolute path to the file or directory. + info (dict): File metadata dict from check_local_file (includes 'isfile', + 'logname'). + + Returns: + int: 1 if a file was successfully changed, 0 otherwise. + """ fname = re.sub(r'^{}'.format(self.OINFO['tpath']), '', file, 1) if info['isfile']: if not self.RDAOWN['f']: return 0 @@ -139,8 +187,9 @@ def change_owner(self, file, info): return 0 return self.pglog("{}: Error change owner {} to rdadata".format(fname, info['logname']), self.LOGERR) -# main function to excecute this script +# main function to execute this script def main(): + """Entry point: instantiate RdaOwn, parse arguments, run, and exit.""" object = RdaOwn() object.read_parameters() object.start_actions() diff --git a/src/rda_python_miscs/rdaown.usg b/src/rda_python_miscs/rdaown.usg index 0f02dfd..70c170c 100644 --- a/src/rda_python_miscs/rdaown.usg +++ b/src/rda_python_miscs/rdaown.usg @@ -1,46 +1,60 @@ - Change owner to 'rdadata' for directories and files in the current or specified - directories. You must execute this program as 'root' to be able to change owner. - The owner of the directories and files must be DSS specialists to be - changed. For directories and files with ownership changed successfully or with error, - a leading letter is displayed in font of the relative file names to indicate - file types; 'D' for a directory and 'F' for a data file. - - Usage: rdaown [-d] [-f] [-h] [-r] [-R RecursiveLevel] [Directory/File List] - - - Option -d, change directory owner only. Changing Directory owner is included - as default. Add this option to exclude changing file owner; - - - Option -f, change file owner only. Changing File owner is included - as default. Add this option to exclude changing Directory owner; - + Change ownership of files and directories to 'rdadata'. Must be run as root. + Only items currently owned by DECS specialists are changed; items already owned + by 'rdadata' or owned by non-specialists are skipped. A leading letter is + displayed in front of each changed path to indicate its type: 'D' for a + directory and 'F' for a file. + + Usage: rdaown [-d] [-f] [-h] [-r] [-R RecursiveLevel] Directory/File List + + - Option -d, change directory ownership only. Both directories and files + are processed by default; this option suppresses file ownership changes; + + - Option -f, change file ownership only. Both directories and files are + processed by default; this option suppresses directory ownership changes; + - Option -h, display this help document; - - Option -r, change owner for directories and files recursively; - - - Option -R, change owner for directories and files recursively up to - the level provided with this Option; - - - Directory/file List is mandatory; this help document is displayed - without it. Unix command line wildcards are supported. Use './' or '*' - for all directories and files in the current directory to be considered. - - - This utility program can be executed anywhere. No Mode is changed if neither - directory nor file are owned by user 'rdadata'. - - For examples, to change owner for directories and files under ds277.6, you can - - 1. Change into the dataset home data directory as 'cd /PathTo/ds277.6' and - execute 'rdaown ./'; add recursive option '-r' to change owner for directories - and files further into the sub-directories, or change directory into - a sub-directory to change owner for files inside of it. - - 2. Pass an absolute path to rdaown as 'rdaown /PathTo/ds277.6/'; - without the ending by '/', owner of top directory itself is - changed only unless the recursive option '-r' or '-R RecursiveLevel' - is present. - - 3. If the current directory is in another dataset home data directory, - such as /PathTo/ds277.7, you can pass a relative path to rdaown - as 'rdaown ../ds277.6/' or as 'rdaown ../ds277.6/*' + - Option -r, change ownership recursively (no depth limit); + + - Option -R RecursiveLevel, change ownership recursively up to the specified + depth. -R 1 processes only the immediate contents of each given directory; + + - Directory/File List is mandatory; usage is displayed without it. + Shell wildcards are supported. Use './' or '*' to process everything + in the current directory. + + This utility can be run from any directory. No ownership is changed if all + matched items are already owned by 'rdadata' or if none of their current owners + are registered DECS specialists. + + Examples: + + 1. Change into the dataset home directory and change ownership of all + immediate contents: + + cd /PathTo/ds277.6 + rdaown ./ + + Add -r to recurse into sub-directories: + + rdaown -r ./ + + 2. Pass an absolute path with a trailing '/' to process the directory's + contents (the directory entry itself is not changed): + + rdaown /PathTo/ds277.6/ + + Without the trailing '/', the directory entry itself is also changed + unless -r or -R is given: + + rdaown /PathTo/ds277.6 + + 3. Use a relative path from a neighbouring directory: + + rdaown ../ds277.6/ + rdaown ../ds277.6/* + + 4. Change ownership of files only, recursively: + + rdaown -f -r /PathTo/ds277.6/ diff --git a/src/rda_python_miscs/rdaps.py b/src/rda_python_miscs/rdaps.py index 3261318..b688029 100644 --- a/src/rda_python_miscs/rdaps.py +++ b/src/rda_python_miscs/rdaps.py @@ -14,8 +14,15 @@ from rda_python_common.pg_file import PgFile class RdaPs(PgFile): + """Show process status for local or PBS batch processes on any accessible host. + + Wraps the system 'ps' command for local processes and 'qstat' for PBS batch + jobs. Results can be filtered by process ID, parent process ID, owner, or + application name. + """ def __init__(self): + """Initialize RdaPs with default process query options.""" super().__init__() self.RDAPS = { 'a' : None, # application name @@ -27,6 +34,12 @@ def __init__(self): # function to read parameters def read_parameters(self): + """Parse command-line arguments into RDAPS options. + + Accepts -a, -h, -p, -P, -u flags. -p and -P are cast to int; a bare + integer argument without a leading flag is treated as a process ID for -p. + Displays usage and exits if no options or arguments are provided. + """ optcnt = 0 argv = sys.argv[1:] self.set_suid(self.PGLOG['EUID']) @@ -34,14 +47,14 @@ def read_parameters(self): self.PGLOG['LOGFILE'] = "rdaps.log" # set different log file self.cmdlog("rdaps {}".format(' '.join(argv))) for arg in argv: - ms = re.match(r'-([ahpPtu])$', arg) + ms = re.match(r'-([ahpPu])$', arg) if ms: option = ms.group(1) elif re.match(r'-\w+$', arg): self.pglog(arg + ": Unknown Option", self.LGEREX) elif option: if self.RDAPS[option]: self.pglog("{}: value passed to Option -{} already".format(arg, option), self.LGEREX) - if 'pPt'.find(option) > -1: + if option in 'pP': self.RDAPS[option] = int(arg) elif option == 'h': self.RDAPS[option] = self.get_short_host(arg) @@ -60,6 +73,11 @@ def read_parameters(self): # function to start actions def start_actions(self): + """Determine whether to query a PBS node or the local host, then take a snapshot. + + If a remote host is given via -h and it matches the PBS node name, calls + pbs_snapshot(); otherwise calls process_snapshot() on the local host. + """ self.dssdb_dbname() chkloc = 1 if self.RDAPS['h']: @@ -70,8 +88,15 @@ def start_actions(self): if chkloc: self.process_snapshot() self.cmdlog() - # get a snapshot of a process status + # get a snapshot of local process status def process_snapshot(self): + """Run 'ps' on the local host and print matching process lines. + + Builds the ps command based on which filter options are set (-p, -P, -u), + falling back to 'ps -ef' when none are given. Each output line is then + re-filtered against -u, -p, -P, and -a before being logged. Consecutive + spaces in each matching line are collapsed to a single space. + """ if self.RDAPS['p']: cmd = "ps -p {} -f".format(self.RDAPS['p']) elif self.RDAPS['P']: @@ -94,8 +119,14 @@ def process_snapshot(self): if self.RDAPS['a'] and aname.find(self.RDAPS['a']) < 0: continue self.pglog(re.sub(r' +', ' ', line), self.LOGWRN) - # get a snapshot of a PBS batch process status + # get a snapshot of PBS batch process status def pbs_snapshot(self): + """Query PBS job status via qstat and print matching job lines. + + Builds qstat options from -u and -p flags; defaults to querying the 'gdex' + queue when neither is set. Reorders the output columns so 'UserName' appears + first, then logs one line per job, filtering by -a (job name) when set. + """ qopts = '' if self.RDAPS['u']: qopts = "-u {}".format(self.RDAPS['u']) @@ -126,8 +157,9 @@ def pbs_snapshot(self): vals.append(stat[k][i]) self.pglog(' '.join(vals), self.LOGWRN) -# main function to excecute this script +# main function to execute this script def main(): + """Entry point: instantiate RdaPs, parse arguments, run, and exit.""" object = RdaPs() object.read_parameters() object.start_actions() diff --git a/src/rda_python_miscs/rdaps.usg b/src/rda_python_miscs/rdaps.usg index c85a20c..d10314f 100644 --- a/src/rda_python_miscs/rdaps.usg +++ b/src/rda_python_miscs/rdaps.usg @@ -1,29 +1,55 @@ - List matching process information for local or batch processes if they are running. - + Show process status for local or PBS batch processes. Results can be filtered + by process ID, parent process ID, process owner, or application name. Without + a hostname, 'ps' is run on the local machine. When the hostname matches the + PBS node name, 'qstat' is used instead. + Usage: rdaps [-h HostName] [-p ProcessID] [-P ParentProcessID] \ - [-u ProcessOnwer] [-a ApplicationName] + [-u ProcessOwner] [-a ApplicationName] + + - Option -a, filter results by application (command) name. For local + processes, any line whose command field contains this string is shown. + For PBS jobs, only jobs whose job name matches exactly are shown; + + - Option -h, hostname where the process is running. Omit for local + processes. Use the PBS node name (e.g. the value of PBSNAME) to + query PBS batch jobs via qstat instead of ps; + + - Option -p, process ID (PID) for a local process, or job ID for a PBS + batch job. A bare integer argument without a leading flag is also + accepted as a process ID; + + - Option -P, parent process ID (PPID) to filter local processes by their + parent; + + - Option -u, show only processes owned by this login name. Without this + option all users are included (equivalent to 'ps -ef'); + + Usage is displayed if no options or arguments are provided. + + Examples: + + 1. Check a local process by PID (with or without the -p flag): + + rdaps -p 13199 + rdaps 13199 + + 2. Filter by PID and application name on the local host: + + rdaps -p 13199 -a dsrqst + + 3. List all local processes owned by a specific user: + + rdaps -u zji - - Option -a, application name of the process; + 4. Query a PBS batch job by job ID: - - Option -h, hostname the process is on; omit it for local process, - but it is mandatory if the process id is a PBS bactch id. + rdaps -h casper-pbs -p 334323 - - Option -p, the local process or batch job id to be checked; + 5. List all PBS batch jobs for a user: - - Option -P, the parent process id on the local machine; - - - Option -u, use login name for the process owner. It defaults to 'all' for - all user login names. - - This utility program can be executed on selected machines. This help document - is displayed if no option is provided. - - For examples, to list process information of a dsrqst process with pid 13199 locally, + rdaps -h casper-pbs -u zji - rdaps -p 13199 -a dsrqst + 6. List all PBS batch jobs for a user filtered by job name: - To list a PBS bactch process with a bid = 334323, - - rdaps -h PBS -p 334323 - \ No newline at end of file + rdaps -h casper-pbs -u zji -a dsrqst diff --git a/src/rda_python_miscs/rdasub.py b/src/rda_python_miscs/rdasub.py index 7622900..a832be6 100644 --- a/src/rda_python_miscs/rdasub.py +++ b/src/rda_python_miscs/rdasub.py @@ -2,10 +2,10 @@ ################################################################################## # Title: rdasub # Author: Zaihua Ji, zji@ucar.edu -# Date: 03/51/2021 +# Date: 03/31/2021 # 2025-03-10 transferred to package rda_python_miscs from # https://github.com/NCAR/rda-utility-programs.git -# Purpose: python script to submit a nohup bachground execution +# Purpose: python script to submit a nohup background execution # Github: https://github.com/NCAR/rda-python-miscs.git ################################################################################## @@ -16,14 +16,31 @@ from rda_python_common.pg_file import PgFile class RdaSub(PgFile): + """Submit a command as a nohup background process on the local machine. + + Wraps the command in 'nohup ... > /dev/null 2>&1 &' and logs the resulting + PID once the process is detected in 'ps' output. Supports optional working + directory and environment variable setup before launch. + """ def __init__(self): + """Initialize RdaSub with empty customized options and argument string.""" super().__init__() - self.coptions = {'cmd': None, 'cwd': None, 'env': None} # customized options - self.args = None + self.coptions = {'cmd': None, 'cwd': None, 'env': None} # cmd: command to run, + # cwd: working directory, + # env: environment pairs + self.args = None # extra arguments to append after the command # function to read parameters def read_parameters(self): + """Parse command-line arguments into coptions and trailing command arguments. + + Recognises -cmd, -cwd, -env as long options and -b as a background flag. + Parsing stops after -cmd's value is consumed; any remaining argv tokens + are collected as extra arguments for the command. Exits with usage if no + arguments are given; errors if -cmd is not provided. Arguments containing + spaces are automatically quoted. + """ aname = 'rdasub' self.set_help_path(__file__) copts = '|'.join(self.coptions) @@ -56,9 +73,16 @@ def read_parameters(self): # function to start actions def start_actions(self): + """Resolve the command path, change to the working directory if set, and launch. + + Expands environment variables in cwd when a '$' is present. Resolves the + command to an absolute path, appends extra arguments, logs a descriptive + message, runs the command under nohup, then calls display_process_info to + find and log the resulting PID. + """ msg = "{}-{}{}".format(self.PGLOG['HOSTNAME'], self.PGLOG['CURUID'], self.current_datetime()) if self.coptions['cwd']: - if self.coptions['cwd'].find('$'): self.coptions['cwd'] = self.replace_environments(self.coptions['cwd'], '', self.LGWNEX) + if '$' in self.coptions['cwd']: self.coptions['cwd'] = self.replace_environments(self.coptions['cwd'], '', self.LGWNEX) msg += "-" + self.coptions['cwd'] self.change_local_directory(self.coptions['cwd'], self.LGEREX) else: @@ -72,8 +96,20 @@ def start_actions(self): os.system("nohup " + cmd + " > /dev/null 2>&1 &") self.display_process_info(self.coptions['cmd'], cmd) - # display the the most recent matching process info + # display the most recent matching process info def display_process_info(self, cname, cmd): + """Poll 'ps' up to twice to find the newly launched process and log its PID. + + Searches for a process whose command matches cname and whose PPID is 1 + (detached via nohup). Picks the most recently started match by comparing + start times. Sleeps 2 seconds between attempts if the first poll finds + nothing. Logs the PID on success or a warning if no matching process is + found. + + Args: + cname (str): Base command name used to filter ps output. + cmd (str): Full command string used to verify argument matches. + """ ctime = time.time() RTIME = PID = 0 pscmd = "ps -u {},{} -f | grep {} | grep ' 1 ' | grep -v ' grep '".format(self.PGLOG['CURUID'], self.PGLOG['RDAUSER'], cname) @@ -95,14 +131,15 @@ def display_process_info(self, cname, cmd): PID = pid RTIME = rtime if PID: - return self.pglog("Job <{}> is submitted to background <{}>".format(PID, self.PgLOG['HOSTNAME']), self.LOGWRN) + return self.pglog("Job <{}> is submitted to background <{}>".format(PID, self.PGLOG['HOSTNAME']), self.LOGWRN) elif i == 0: time.sleep(2) else: return self.pglog("{}: No job information found, It may have finished".format(cmd), self.LOGWRN) -# main function to excecute this script +# main function to execute this script def main(): + """Entry point: instantiate RdaSub, parse arguments, run, and exit.""" object = RdaSub() object.read_parameters() object.start_actions() diff --git a/src/rda_python_miscs/rdasub.usg b/src/rda_python_miscs/rdasub.usg index 0467757..0567efc 100644 --- a/src/rda_python_miscs/rdasub.usg +++ b/src/rda_python_miscs/rdasub.usg @@ -1,12 +1,49 @@ - To submit a nohup background execution job on a Linux machine. - - Usage: rdasub [-env EnvironmentPairs] [-cwd WorkDir] -cmd Command [cmd-options] + Submit a command as a nohup background process on the local machine. The + command is launched with 'nohup ... > /dev/null 2>&1 &' so it continues + running after the shell exits and all output is discarded. The PID of the + submitted job is logged once the process is detected. - - Option -cwd, set the working directory a Command to be executed; - - - Option -cmd, mandatory option to lead a Command to be executed; + Usage: rdasub [-cwd WorkDir] [-env EnvironmentPairs] [-b] -cmd Command [cmd-options] - - Option -env, set environment name/value pairs separated by ','; + - Option -cmd, (mandatory) command to run in the background. Must be an + executable reachable via PATH or given as an absolute path. All + arguments after -cmd are passed directly to the command; - - cmd-options, specifies options that can be passed to the Command. + - Option -cwd, working directory for the command. The process changes + into this directory before launching. Environment variables in the + path (e.g. $HOME) are expanded automatically. Defaults to the + current directory if not specified; + + - Option -env, environment variable name/value pairs to set before the + command runs, separated by ',', e.g. -env KEY1=val1,KEY2=val2; + + - Option -b, submit rdasub itself to the background first, then launch + the command; + + - cmd-options, any options and arguments to pass to the command. Place + them after the -cmd value on the command line. + + Usage is displayed if no arguments are provided. + + Examples: + + 1. Run a script in the background from the current directory: + + rdasub -cmd /path/to/myscript.sh + + 2. Run a command with arguments in the background: + + rdasub -cmd dsrqst d277006 SP -RI 750748 -b -d + + 3. Run a command in a specific working directory: + + rdasub -cwd /glade/work/zji/jobs -cmd ./process.sh -input data.nc + + 4. Run a command with environment variables set: + + rdasub -env LANG=en_US,TZ=UTC -cmd /path/to/myscript.sh + + 5. Run a Python module entry point in the background: + + rdasub -cwd /glade/work/zji -cmd python myscript.py --config cfg.yaml diff --git a/src/rda_python_miscs/rdazip.py b/src/rda_python_miscs/rdazip.py index cb7791c..6d04d39 100644 --- a/src/rda_python_miscs/rdazip.py +++ b/src/rda_python_miscs/rdazip.py @@ -15,15 +15,29 @@ from rda_python_common.pg_file import PgFile class RdaZip(PgFile): + """Compress or uncompress files using a supported format (gz, Z, bz2, zip). + + When a target format is specified via -f, files are compressed to that format. + Without -f, each file is uncompressed based on its current extension. + Conversion between formats is also supported by combining both in one call. + """ def __init__(self): + """Initialize RdaZip with default action (uncompress), no format, and empty file list.""" super().__init__() - self.action = 0 - self.format = None - self.files = [] + self.action = 0 # 0 - uncompress, 1 - compress to self.format + self.format = None # target compression format (gz, Z, bz2, zip) + self.files = [] # list of files to process # function to read parameters def read_parameters(self): + """Parse command-line arguments into action, format, and file list. + + -f sets compress mode (action=1) and reads the target format from the + next argument. -b enables background execution. All other non-option + arguments are treated as files to process; each must exist on disk. + Displays usage and exits if no files are given. + """ argv = sys.argv[1:] self.set_help_path(__file__) self.PGLOG['LOGFILE'] = "rdazip.log" # set different log file @@ -51,12 +65,14 @@ def read_parameters(self): # function to start actions def start_actions(self): + """Compress or uncompress each file in the list, then close the command log.""" for file in self.files: self.compress_local_file(file, self.format, self.action, self.LGWNEX) self.cmdlog() -# main function to excecute this script +# main function to execute this script def main(): + """Entry point: instantiate RdaZip, parse arguments, run, and exit.""" object = RdaZip() object.read_parameters() object.start_actions() diff --git a/src/rda_python_miscs/rdazip.usg b/src/rda_python_miscs/rdazip.usg index 6dc5fad..0a8bf0f 100644 --- a/src/rda_python_miscs/rdazip.usg +++ b/src/rda_python_miscs/rdazip.usg @@ -1,20 +1,56 @@ - Compress/Uncompress given files. - - Usage: rdazip [-f CompressFormat] FileList - - - Option -f, compression format, Z, gz, zip or bz2. Specify this format - to compress, otherwise to umcompress - - For examples, to compress a file using gzip, testfile.txt => testfile.txt.gz - - rdazip -f gz testfile.txt - - and to uncompress a gzipped file, testfile.txt.gz => get testfile.txt - - rdazip testfile.txt.gz - - and to change compression from Z to gz, testfile.txt.Z => testfile.txt.gz - - rdazip -f gz testfile.txt.Z - \ No newline at end of file + Compress or uncompress files using a supported format. When a target format + is given via -f, files are compressed to that format. Without -f, each file + is uncompressed based on its current extension. Conversion from one format + to another is also supported in a single call. + + Supported formats: gz, Z, bz2, zip + + Usage: rdazip [-f CompressFormat] [-b] FileList + + - Option -f, target compression format. Providing this option selects + compress mode; omitting it selects uncompress mode. Supported values: + gz - gzip compression (.gz extension) + Z - compress utility (.Z extension) + bz2 - bzip2 compression (.bz2 extension) + zip - zip archive (.zip extension) + + - Option -b, run the operation in the background; + + - FileList, one or more files to process. Each file must exist. + In compress mode the original file is replaced by the compressed + version. In uncompress mode the compressed file is replaced by + the uncompressed version. + + Usage is displayed if no files are provided. + + Examples: + + 1. Compress a file with gzip: + + rdazip -f gz testfile.txt + # result: testfile.txt => testfile.txt.gz + + 2. Uncompress a gzip file (format detected from extension): + + rdazip testfile.txt.gz + # result: testfile.txt.gz => testfile.txt + + 3. Compress a file with bzip2: + + rdazip -f bz2 testfile.txt + # result: testfile.txt => testfile.txt.bz2 + + 4. Uncompress a bzip2 file: + + rdazip testfile.txt.bz2 + # result: testfile.txt.bz2 => testfile.txt + + 5. Convert from compress (.Z) format to gzip (.gz): + + rdazip -f gz testfile.txt.Z + # result: testfile.txt.Z => testfile.txt.gz + + 6. Compress multiple files with gzip in the background: + + rdazip -b -f gz file1.txt file2.txt file3.txt diff --git a/src/rda_python_miscs/tcshqsub.py b/src/rda_python_miscs/tcshqsub.py index a078b93..405a24a 100644 --- a/src/rda_python_miscs/tcshqsub.py +++ b/src/rda_python_miscs/tcshqsub.py @@ -16,8 +16,14 @@ from rda_python_common.pg_log import PgLOG class TcshQsub(PgLOG): + """Submit a PBS batch job via a dynamically generated tcsh script using qsub. + + Builds a tcsh script with PBS directives, module loads, and conda environment + activation, then submits it through the PBS qsub command. + """ def __init__(self): + """Initialize TcshQsub with default PBS resource settings and options.""" super().__init__() self.DEFMODS = { 'default': "ncarenv,netcdf,ncl,nco,cdo,conda,grib-util,wgrib2" @@ -44,6 +50,13 @@ def __init__(self): # function to read parameters def read_parameters(self): + """Parse command-line arguments and populate PBS options and customized options. + + Handles single-dash qsub options (e.g. -q, -A, -l) and long custom options + (-cmd, -cwd, -env, -mod, -res). Validates that the qsub command is available + and that a -cmd value is provided. Sets default log paths and job name if not + specified, and changes the working directory if -cwd is given. + """ aname = 'tcshqsub' pname = 'gdexqsub' self.set_help_path(__file__) @@ -91,8 +104,9 @@ def read_parameters(self): if '$' in self.coptions['cwd']: self.coptions['cwd'] = self.replace_environments(self.coptions['cwd'], '', self.LGWNEX) os.chdir(self.coptions['cwd']) - # fnction to start actions + # function to start actions def start_actions(self): + """Resolve the command path, build the tcsh script, and submit it via qsub.""" cmd = self.valid_command(self.coptions['cmd']) if not cmd and not re.match(r'^/', self.coptions['cmd']): cmd = self.valid_command('./' + self.coptions['cmd']) if not cmd: self.pglog(self.coptions['cmd'] + ": Cannot find given command to run", self.LGWNEX) @@ -105,6 +119,17 @@ def start_actions(self): # build tcsh script to submit a PBS batch job def build_tcsh_script(self, cmd): + """Build and return a tcsh script string with PBS directives for the given command. + + Sets HOME, sources system and conda profile scripts and the user's .tcshrc, + loads modules, activates the conda environment, then runs the command. + + Args: + cmd (str): The fully-resolved command (with arguments) to execute in the job. + + Returns: + str: The complete tcsh batch script content. + """ buf = "#!/bin/tcsh\n\n" # sbatch starting tcsh script if 'l' in self.SOPTIONS: self.add_resources() # add options to tcsh script for qsub @@ -128,8 +153,13 @@ def build_tcsh_script(self, cmd): buf += "\necho {}\n{}\n\ndate\n".format(cmd, cmd) return buf - # check and add resource options + # check and add resource options def add_resources(self): + """Parse -l option value into the RESOURCES dict and remove the raw -l entry. + + Expects comma-separated name=value pairs (e.g. 'walltime=2:00:00,select=1:ncpus=4'). + Logs an error if a token does not contain '='. + """ for res in re.split(',', self.SOPTIONS['l']): ms = re.match(r'^([^=]+)=(.+)$', res) if ms: @@ -140,6 +170,20 @@ def add_resources(self): # add module loads for modules provided def add_modules(self, res, mods): + """Build and return module load/unload commands for the tcsh script. + + Loads the default module set for the given reservation (or the 'default' set). + Additional modules in ``mods`` are appended; path-style entries (starting with + '/') use 'module use' instead of 'module load'. Modules already in the default + set are skipped. SWAPMODS entries trigger an unload before the new load. + + Args: + res (str): Reservation name used to look up DEFMODS; falls back to 'default'. + mods (str): Comma-separated list of extra modules (or None). + + Returns: + str: Shell commands to load/unload modules. + """ mbuf = "\n" defmods = self.DEFMODS[res] if res in self.DEFMODS else self.DEFMODS['default'] dmods = re.split(',', defmods) @@ -163,6 +207,16 @@ def add_modules(self, res, mods): # set virtual machine libraries def set_vm_libs(self, res): + """Build and return conda/VM library activation commands for the tcsh script. + + Looks up DEFLIBS for the given reservation (falls back to 'default'). + + Args: + res (str): Reservation name used to look up DEFLIBS; falls back to 'default'. + + Returns: + str: Shell commands to activate virtual environment libraries, or '' if none. + """ deflibs = self.DEFLIBS[res] if res in self.DEFLIBS else self.DEFLIBS['default'] if not deflibs: return '' dlibs = re.split(',', deflibs) @@ -171,8 +225,9 @@ def set_vm_libs(self, res): libbuf += dlib + "\n" return libbuf -# main function to excecute this script +# main function to execute this script def main(): + """Entry point: instantiate TcshQsub, parse arguments, run, and exit.""" object = TcshQsub() object.read_parameters() object.start_actions() diff --git a/src/rda_python_miscs/tcshqsub.usg b/src/rda_python_miscs/tcshqsub.usg index e675ae8..4c04988 100644 --- a/src/rda_python_miscs/tcshqsub.usg +++ b/src/rda_python_miscs/tcshqsub.usg @@ -10,9 +10,9 @@ -o LOGPATH/gdexqsub/ -e LOGPATH/gdexqsub/ -A P43713000 - -m a - -q gdex - -l walltime=6:00:00,select=1:node=1:mem=1gb + -m n + -q gdex@casper-pbs + -l walltime=6:00:00,select=1:ncpus=1:mem=1gb - Option -cwd, set the working directory for the Command to be executed. If it is not specified, it defaults to the current directory where qsub @@ -30,21 +30,21 @@ - cmd-options, specifies options that can be passed to the Command. -A tash script example: +A tcsh script example: #!/bin/tcsh -#PBS -o /gpfs/u/home/gdexdata/dssdb/log/gdexqsub/ -#PBS -e /gpfs/u/home/gdexdata/dssdb/log/gdexqsub/ +#PBS -o /glade/u/home/gdexdata/dssdb/log/gdexqsub/ +#PBS -e /glade/u/home/gdexdata/dssdb/log/gdexqsub/ #PBS -A P43713000 #PBS -q gdex@casper-pbs -#PBS -m a +#PBS -m n #PBS -N dsrqst #PBS -l walltime=12:00:00 #PBS -l select=1:mem=20gb -setenv HOME /gpfs/u/home/davestep +setenv HOME /glade/u/home/davestep source /etc/profile.d/z00_modules.csh source /glade/u/apps/opt/conda/etc/profile.d/conda.csh -source /gpfs/u/home/davestep/.tcshrc +source /glade/u/home/davestep/.tcshrc pwd; hostname; date module load ncarenv