diff --git a/MANIFEST.in b/MANIFEST.in index 963cfff..816f50a 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,10 +1 @@ -include src/rda_python_miscs/bashqsub.usg -include src/rda_python_miscs/tcshqsub.usg -include src/rda_python_miscs/rdasub.usg -include src/rda_python_miscs/rdacp.usg -include src/rda_python_miscs/rdakill.usg -include src/rda_python_miscs/rdals.usg -include src/rda_python_miscs/rdamod.usg -include src/rda_python_miscs/rdaown.usg -include src/rda_python_miscs/rdaps.usg -include src/rda_python_miscs/rdazip.usg +include src/rda_python_miscs/*.usg diff --git a/pyproject.toml b/pyproject.toml index ca374ac..51a1d0f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ build-backend = "setuptools.build_meta" [project] name = "rda_python_miscs" -version = "1.0.1" +version = "1.0.2" authors = [ { name="Zaihua Ji", email="zji@ucar.edu" }, ] @@ -44,6 +44,7 @@ where = ["src"] "rdasub" = "rda_python_miscs.rdasub:main" "pgwget" = "rda_python_miscs.pgwget:main" "rdals" = "rda_python_miscs.rdals:main" +"gdexls" = "rda_python_miscs.gdexls:main" "rdaps" = "rda_python_miscs.rdaps:main" "rdaown" = "rda_python_miscs.rdaown:main" "rdacp.py" = "rda_python_miscs.rdacp:main" diff --git a/src/rda_python_miscs/gdexls.py b/src/rda_python_miscs/gdexls.py new file mode 100644 index 0000000..8b766a8 --- /dev/null +++ b/src/rda_python_miscs/gdexls.py @@ -0,0 +1,279 @@ +#!/usr/bin/env python3 +# +################################################################################## +# +# Title: gdexls +# Author: Zaihua Ji, zji@ucar.edu +# Date: 10/20/2020 +# 2025-03-10 transferred to package rda_python_miscs from +# https://github.com/NCAR/rda-utility-programs.git +# 2025-09-21 copied from rdals to gdexls +# Purpose: list files/directories in a local directory and show additional +# information recorded in RDADB if any +# +# Github: https://github.com/NCAR/rda-python-miscs.git +# +################################################################################## +# +import re +import os +import sys +import glob +from os import path as op +from rda_python_common import PgLOG +from rda_python_common import PgDBI +from rda_python_common import PgUtil +from rda_python_common import PgSplit + +# define some constants for gdexls actions +DIDX = 3 # description column index +CLMT = 500 # reformat list if count reach this limit +WIDTHS = [0, 0, 0] # WIDTHS for formated display +ALIGNS = [0, 1, 1] # alignment, 0 - left; 1 - right + +RDALS = { + 'd' : 0, # 1 to list directory information only + 'f' : 0, # 1 to list file information only + 'N' : 0, # 1 to list files unformatted + 'r' : 0, # 1 if recursive all + 'R' : 0, # > 0 to set recursive limit + 'D' : None, # specify delimiting symbols, default to ' ' +} + +LINFO = { + 'files' : [], + 'curdir' : None, + 'tpath' : None, + 'dhome' : None, + 'dsid' : None, + 'dcnt' : 0, + 'gcnt' : 0, + 'fcnt' : 0, + 'pcnt' : 0, + 'pgrecs' : [] +} + +# +# main function to run the application +# +def main(): + + PgDBI.view_dbinfo() + PgLOG.set_help_path(__file__) + PgLOG.PGLOG['LOGFILE'] = "gdexls.log" # set different log file + LINFO['curdir'] = get_real_path(os.getcwd()) + argv = sys.argv[1:] + PgLOG.pglog("gdexls {} ({})".format(' '.join(argv), LINFO['curdir'])) + option = defopt = 'l' + for arg in argv: + if re.match(r'-(h|-*help|\?)$', arg): PgLOG.show_usage("gdexls") + ms = re.match(r'-(\w)$', arg) + if ms: + option = ms.group(1) + if option not in RDALS: PgLOG.pglog(arg + ": Unknown Option", PgLOG.LGEREX) + if 'dfNr'.find(option) > -1: + RDALS[option] = 1 + option = defopt + continue + if not option: PgLOG.pglog(arg + ": Value provided without option", PgLOG.LGEREX) + if option == 'l': + LINFO['files'].append(get_real_path(arg)) + defopt = None + else: + if option == 'R': + RDALS[option] = int(arg) + else: + RDALS[option] = arg + option = defopt + + if not LINFO['files']: + LINFO['files'] = sorted(glob.glob('*')) # view all files in current directory + if not LINFO['files']: + sys.stderr.write(LINFO['curdir'] + ": Empty directory\n") + PgLOG.pgexit(1) + + if not (RDALS['d'] or RDALS['f']): + RDALS['d'] = RDALS['f'] = 1 # list both directories and files as default + if not RDALS['D']: RDALS['D'] = '|' if RDALS['N'] else " " # default delimiter for no format display + if not RDALS['R'] and RDALS['r']: RDALS['R'] = 1000 + + display_top_list(LINFO['files']) # display or cache file/directory list + if LINFO['pcnt'] > 0: display_format_list() # if some left over + + if (LINFO['dcnt'] + LINFO['gcnt'] + LINFO['fcnt']) > 1: + msg = '' + if LINFO['dcnt'] > 0: + s = 's' if LINFO['dcnt'] > 1 else '' + msg += "{} Dataset{}".format(LINFO['dcnt'], s) + if LINFO['gcnt'] > 0: + s = 's' if LINFO['gcnt'] > 1 else '' + if msg: msg += " & " + msg += "{} Group{}".format(LINFO['gcnt'], s) + if LINFO['fcnt'] > 0: + s = 's' if LINFO['fcnt'] > 1 else '' + if msg: msg += " & " + msg += "{} File{}".format(LINFO['fcnt'], s) + print("Total {} displayed".format(msg)) + elif (LINFO['dcnt'] + LINFO['gcnt'] + LINFO['fcnt']) == 0: + sys.stderr.write((LINFO['tpath'] if LINFO['tpath'] else LINFO['curdir']) + ": No RDA data information found\n") + PgLOG.pgexit(1) + + PgLOG.pgexit(0) + +# +# display the top level list +# +def display_top_list(files): + + for file in files: + + if not op.exists(file): + sys.stderr.write(file + ": NOT exists\n") + continue + + isdir = 1 if op.isdir(file) else 0 + display = 1 + if isdir and re.search(r'/$', file): + display = 0 # do not display the directory info if it is ended by '/' + file = re.sub(r'/$', '', file) + + if not re.match(r'^/', file): file = PgLOG.join_paths(LINFO['curdir'], file) + LINFO['tpath'] = (op.dirname(file) if display else file) + "/" + if display: display_line(file, isdir) + if isdir and (RDALS['R'] or not display or not LINFO['dsid']): + fs = sorted(glob.glob(file + "/*")) + display_list(fs, 1) + if LINFO['pcnt'] > CLMT: display_format_list() + +# +# recursively display directory/file info +# +def display_list(files, level): + + for file in files: + isdir = 1 if op.isdir(file) else 0 + display_line(file, isdir) + if isdir and level < RDALS['R']: + fs = sorted(glob.glob(file + "/*")) + display_list(fs, level+1) + if LINFO['pcnt'] > CLMT: display_format_list() + +# +# find dataset/group info; display or cache file +# +def display_line(file, isdir): + + getwfile = 1 + if LINFO['dsid'] and LINFO['dhome']: + ms = re.match(r'^{}/(.*)$'.format(LINFO['dhome']), file) + if ms: + wfile = ms.group(1) + getwfile = 0 + if getwfile: + LINFO['dsid'] = PgUtil.find_dataset_id(file, logact = PgLOG.LOGWRN) + if LINFO['dsid'] == None: return # skip for missing dsid + + pgrec = PgDBI.pgget("dataset", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns", "dsid = '{}'".format(LINFO['dsid']), PgLOG.LGEREX) + if not pgrec: return None + + LINFO['dhome'] = "{}/{}".format(PgLOG.PGLOG['DSDHOME'], LINFO['dsid']) + if LINFO['dhome'] == file: + file = re.sub(r'^{}'.format(LINFO['tpath']), '', file, 1) + if RDALS['d']: + title = pgrec['title'] if pgrec['title'] else '' + display_record(["D" + file, pgrec['ns'], str(pgrec['nc']), title]) + LINFO['dcnt'] += 1 + return + + ms = re.match(r'^{}/(.*)$'.format(LINFO['dhome']), file) + if ms: + wfile = ms.group(1) + else: + return + + if isdir: + if RDALS['d']: # check and display group info for directory + pgrec = PgDBI.pgget("dsgroup", "title, (dwebcnt + nwebcnt) nc, (dweb_size + nweb_size) ns", + "dsid = '{}' AND webpath = '{}'".format(LINFO['dsid'], wfile), PgLOG.LGEREX) + if pgrec: + file = re.sub(r'^{}'.format(LINFO['tpath']), '', file, 1) + title = pgrec['title'] if pgrec['title'] else '' + display_record(["G" + file, pgrec['ns'], str(pgrec['nc']), title]) + LINFO['gcnt'] += 1 + + elif RDALS['f']: # check and display file info + pgrec = PgSplit.pgget_wfile(LINFO['dsid'], "data_size, data_format, note", + "wfile = '{}'".format(wfile), PgLOG.LGEREX) + if pgrec: + if pgrec['note']: + note = re.sub(r'\n', ' ', pgrec['note']) # remove '\n' in note + else: + note = '' + file = re.sub(r'^{}'.format(LINFO['tpath']), '', file, 1) + display_record(["F" + file, pgrec['data_size'], pgrec['data_format'], note]) + LINFO['fcnt'] += 1 + +# +# display one file info +# +def display_record(disp): + + disp[1] = get_float_string(disp[1]) + if RDALS['N']: + print(RDALS['D'].join(disp)) + else: + LINFO['pgrecs'].append(disp) + LINFO['pcnt'] += 1 + for i in range(DIDX): + dlen = len(disp[i]) + if dlen > WIDTHS[i]: WIDTHS[i] = dlen + +# +# display cached list with format +# +def display_format_list(): + + for j in range(LINFO['pcnt']): + disp = LINFO['pgrecs'][j] + for i in range(DIDX): + if ALIGNS[i] == 1: + disp[i] = "{:>{}}".format(disp[i], WIDTHS[i]) + else: + disp[i] = "{:{}}".format(disp[i], WIDTHS[i]) + print(RDALS['D'].join(disp)) + + LINFO['pcnt'] = 0 + +# +# change size to floating point value with unit +# +def get_float_string(val): + + units = ['B', 'K', 'M', 'G', 'T', 'P'] + + idx = 0 + while val > 1000 and idx < 5: + val /= 1000 + idx += 1 + + if idx > 0: + return "{:.2f}{}".format(val, units[idx]) + else: + return "{}{}".format(val, units[idx]) + +# +# replace /gpfs to the path /glade +# +def get_real_path(path): + + if re.match(r'^/gpfs/u', path): + path = re.sub(r'^/gpfs', '/glade', path, 1) + elif re.match(r'^/gpfs/csfs1/', path): + path = re.sub(r'^/gpfs/csfs1', '/glade/campaign', path, 1) + + return path + +# +# call main() to start program +# +if __name__ == "__main__": main() diff --git a/src/rda_python_miscs/gdexls.usg b/src/rda_python_miscs/gdexls.usg new file mode 100644 index 0000000..ed45edd --- /dev/null +++ b/src/rda_python_miscs/gdexls.usg @@ -0,0 +1,60 @@ + + List directory and file information of the current or specified directories + with metadata information if matched. Four columns are listed, they are Directory + Name, Data Volume, File Count, and Brief Description if the listed item is a + directory, and they are File Name, Data Size, Data Format, and Brief Description + if the listed item is a file. + + A leading letter is displayed on each line to indicate what type item is listed; + including 'D' for a whole dataset, 'G' for a group or subgroup in a dataset, + and 'F' for a data file. + + The output of directory/file list is formatted as default with double spaces + as delimiter and each column lined up vertically at least for the files under each + directory. Provide Option -N to display list without format. A delimiter symbol '|' + is defaulted if Option -N is present. + + Usage: gdexls [-d] [-f] [-N] [-h] [-r] [-D DelimitSymbols] [-R RecursiveLevel] [Directory/File List] + + - Option -d, list directory information only. Directory information + is included as default. Add this option to exclude file information; + + - Option -f, list file information only. File information + is included as default. Add this option to exclude directory information; + + - Option -N, list files unformatted; + + - Option -h, display this help document; + + - Option -r, list directories and files recursively; + + - Option -R, list directories and files recursively up to the level + provided with this Option; + + - Option -D, specify delimiting symbols for dividing the columns. + It defaults to " " for formatted output and '|' for unformatted output. + Make sure quote the symbols if any character in the symbols has Unix + meaning, for example -D '<:>'; + + - Directory/file List is optional; without specification, all directories + and files in the current directory are listed. Unix command line + wildcards are supported. + + This utility program can be executed anywhere. Nothing is displayed if neither + directory nor file information pre-gathered in database. + + For examples, to check directories and files of ds277.6, you can + + 1. Change into the dataset home data directory as 'cd /PathTo/ds277.6' and + execute 'gdexls'; add recursive option '-r' to check directories and files + further into the sub-directories, or change directory into a sub-directory + to check files inside of it. + + 2. Pass an absolute path to gdexls as 'gdexls /PathTo/ds277.6/' or as + 'gdexls /PathTo/ds277.6/*'; without the ending by '/' or an appended + wildcard symbol '*' information of the dataset itself is check unless + the recursive option '-r' or '-R RecursiveLevel' is present + + 3. If the current directory is in another dataset home data directory, + such as /PathTo/ds277.7, you can pass a relative path to gdexls + as 'gdexls ../ds277.6/' or as 'gdexls ../ds277.6/*'