From ed38fdeaf8e6d465b52054f8f119400b902cc0c7 Mon Sep 17 00:00:00 2001 From: Aaron Lichtman Date: Fri, 12 Jan 2024 19:15:22 -0800 Subject: [PATCH 1/4] Format with black --- gitdir/gitdir.py | 99 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 68 insertions(+), 31 deletions(-) diff --git a/gitdir/gitdir.py b/gitdir/gitdir.py index a33b9b7..6aa4535 100644 --- a/gitdir/gitdir.py +++ b/gitdir/gitdir.py @@ -13,10 +13,14 @@ # this ANSI code lets us erase the current line ERASE_LINE = "\x1b[2K" -COLOR_NAME_TO_CODE = {"default": "", "red": Fore.RED, "green": Style.BRIGHT + Fore.GREEN} +COLOR_NAME_TO_CODE = { + "default": "", + "red": Fore.RED, + "green": Style.BRIGHT + Fore.GREEN, +} -def print_text(text, color="default", in_place=False, **kwargs): # type: (str, str, bool, any) -> None +def print_text(text, color="default", in_place=False, **kwargs) -> None: """ print text to console, a wrapper to built-in print @@ -34,27 +38,37 @@ def create_url(url): """ From the given url, produce a URL that is compatible with Github's REST API. Can handle blob or tree paths. """ - repo_only_url = re.compile(r"https:\/\/github\.com\/[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}\/[a-zA-Z0-9]+$") + repo_only_url = re.compile( + r"https:\/\/github\.com\/[a-z\d](?:[a-z\d]|-(?=[a-z\d])){0,38}\/[a-zA-Z0-9]+$" + ) re_branch = re.compile("/(tree|blob)/(.+?)/") # Check if the given url is a url to a GitHub repo. If it is, tell the # user to use 'git clone' to download it - if re.match(repo_only_url,url): - print_text("✘ The given url is a complete repository. Use 'git clone' to download the repository", - "red", in_place=True) + if re.match(repo_only_url, url): + print_text( + "✘ The given url is a complete repository. Use 'git clone' to download the repository", + "red", + in_place=True, + ) sys.exit() # extract the branch name from the given url (e.g master) branch = re_branch.search(url) - download_dirs = url[branch.end():] - api_url = (url[:branch.start()].replace("github.com", "api.github.com/repos", 1) + - "/contents/" + download_dirs + "?ref=" + branch.group(2)) + download_dirs = url[branch.end() :] + api_url = ( + url[: branch.start()].replace("github.com", "api.github.com/repos", 1) + + "/contents/" + + download_dirs + + "?ref=" + + branch.group(2) + ) return api_url, download_dirs def download(repo_url, flatten=False, output_dir="./"): - """ Downloads the files and directories in repo_url. If flatten is specified, the contents of any and all - sub-directories will be pulled upwards into the root folder. """ + """Downloads the files and directories in repo_url. If flatten is specified, the contents of any and all + sub-directories will be pulled upwards into the root folder.""" # generate the url which returns the JSON data api_url, download_dirs = create_url(repo_url) @@ -70,7 +84,7 @@ def download(repo_url, flatten=False, output_dir="./"): try: opener = urllib.request.build_opener() - opener.addheaders = [('User-agent', 'Mozilla/5.0')] + opener.addheaders = [("User-agent", "Mozilla/5.0")] urllib.request.install_opener(opener) response = urllib.request.urlretrieve(api_url) except KeyboardInterrupt: @@ -98,17 +112,23 @@ def download(repo_url, flatten=False, output_dir="./"): try: # download the file opener = urllib.request.build_opener() - opener.addheaders = [('User-agent', 'Mozilla/5.0')] + opener.addheaders = [("User-agent", "Mozilla/5.0")] urllib.request.install_opener(opener) - urllib.request.urlretrieve(data["download_url"], os.path.join(dir_out, data["name"])) + urllib.request.urlretrieve( + data["download_url"], os.path.join(dir_out, data["name"]) + ) # bring the cursor to the beginning, erase the current line, and dont make a new line - print_text("Downloaded: " + Fore.WHITE + "{}".format(data["name"]), "green", in_place=True) + print_text( + "Downloaded: " + Fore.WHITE + "{}".format(data["name"]), + "green", + in_place=True, + ) return total_files except KeyboardInterrupt: # when CTRL+C is pressed during the execution of this script, # bring the cursor to the beginning, erase the current line, and dont make a new line - print_text("✘ Got interrupted", 'red', in_place=False) + print_text("✘ Got interrupted", "red", in_place=False) sys.exit() for file in data: @@ -122,7 +142,7 @@ def download(repo_url, flatten=False, output_dir="./"): path = file_path dirname = os.path.dirname(path) - if dirname != '': + if dirname != "": os.makedirs(os.path.dirname(path), exist_ok=True) else: pass @@ -130,19 +150,24 @@ def download(repo_url, flatten=False, output_dir="./"): if file_url is not None: try: opener = urllib.request.build_opener() - opener.addheaders = [('User-agent', 'Mozilla/5.0')] + opener.addheaders = [("User-agent", "Mozilla/5.0")] urllib.request.install_opener(opener) # download the file urllib.request.urlretrieve(file_url, path) # bring the cursor to the beginning, erase the current line, and dont make a new line - print_text("Downloaded: " + Fore.WHITE + "{}".format(file_name), "green", in_place=False, end="\n", - flush=True) + print_text( + "Downloaded: " + Fore.WHITE + "{}".format(file_name), + "green", + in_place=False, + end="\n", + flush=True, + ) except KeyboardInterrupt: # when CTRL+C is pressed during the execution of this script, # bring the cursor to the beginning, erase the current line, and dont make a new line - print_text("✘ Got interrupted", 'red', in_place=False) + print_text("✘ Got interrupted", "red", in_place=False) sys.exit() else: download(file["html_url"], flatten, download_dirs) @@ -151,19 +176,31 @@ def download(repo_url, flatten=False, output_dir="./"): def main(): - if sys.platform != 'win32': + if sys.platform != "win32": # disbale CTRL+Z signal.signal(signal.SIGTSTP, signal.SIG_IGN) - parser = argparse.ArgumentParser(description="Download directories/folders from GitHub") - parser.add_argument('urls', nargs="+", - help="List of Github directories to download.") - parser.add_argument('--output_dir', "-d", dest="output_dir", default="./", - help="All directories will be downloaded to the specified directory.") - - parser.add_argument('--flatten', '-f', action="store_true", - help='Flatten directory structures. Do not create extra directory and download found files to' - ' output directory. (default to current directory if not specified)') + parser = argparse.ArgumentParser( + description="Download directories/folders from GitHub" + ) + parser.add_argument( + "urls", nargs="+", help="List of Github directories to download." + ) + parser.add_argument( + "--output_dir", + "-d", + dest="output_dir", + default="./", + help="All directories will be downloaded to the specified directory.", + ) + + parser.add_argument( + "--flatten", + "-f", + action="store_true", + help="Flatten directory structures. Do not create extra directory and download found files to" + " output directory. (default to current directory if not specified)", + ) args = parser.parse_args() From c67c4629160eb45b36237f10b48f2410018c21ba Mon Sep 17 00:00:00 2001 From: Aaron Lichtman Date: Sat, 13 Jan 2024 01:27:36 -0800 Subject: [PATCH 2/4] Gracefully handle URL without branch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current behavior: ``` gitdir https://github.com/folke/dot/ Traceback (most recent call last): File "/home/alichtman/.local/bin/gitdir", line 8, in sys.exit(main()) │ └ File "/home/alichtman/.local/lib/python3.12/site-packages/gitdir/gitdir.py", line 172, in main total_files = download(url, flatten, args.output_dir) │ │ │ └ Namespace(urls=['https://github.com/folke/dot/'], output_dir='./', flatten=False) │ │ └ False │ └ 'https://github.com/folke/dot/' └ File "/home/alichtman/.local/lib/python3.12/site-packages/gitdir/gitdir.py", line 60, in download api_url, download_dirs = create_url(repo_url) │ └ 'https://github.com/folke/dot/' └ File "/home/alichtman/.local/lib/python3.12/site-packages/gitdir/gitdir.py", line 49, in create_url download_dirs = url[branch.end():] │ └ None └ 'https://github.com/folke/dot/' AttributeError: 'NoneType' object has no attribute 'end' ``` New behavior: ``` python3 gitdir.py https://github.com/folke/dot/ ✘ Could not find branch name in the given url ``` --- gitdir/gitdir.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gitdir/gitdir.py b/gitdir/gitdir.py index 6aa4535..b566470 100644 --- a/gitdir/gitdir.py +++ b/gitdir/gitdir.py @@ -55,6 +55,11 @@ def create_url(url): # extract the branch name from the given url (e.g master) branch = re_branch.search(url) + if branch is None: + print_text( + "✘ Could not find branch name in the given url", "red", in_place=True + ) + sys.exit() download_dirs = url[branch.end() :] api_url = ( url[: branch.start()].replace("github.com", "api.github.com/repos", 1) From 4db2a9d3ab9e6c6bb282ad6e71888e928749994d Mon Sep 17 00:00:00 2001 From: Aaron Lichtman Date: Sat, 13 Jan 2024 01:46:49 -0800 Subject: [PATCH 3/4] Prompt before overwriting files And a bunch of refactoring --- gitdir/gitdir.py | 157 ++++++++++++++++++++++++++++++++++------------- 1 file changed, 115 insertions(+), 42 deletions(-) diff --git a/gitdir/gitdir.py b/gitdir/gitdir.py index b566470..18b5f0a 100644 --- a/gitdir/gitdir.py +++ b/gitdir/gitdir.py @@ -7,6 +7,7 @@ import json import sys from colorama import Fore, Style, init +from pathlib import Path init() @@ -17,9 +18,18 @@ "default": "", "red": Fore.RED, "green": Style.BRIGHT + Fore.GREEN, + "yellow": Style.BRIGHT + Fore.YELLOW, } +class FileToDownload: + def __init__(self, name, url, path, dest_path): + self.name = name + self.url = url + self.path = path + self.dest_path: Path = dest_path + + def print_text(text, color="default", in_place=False, **kwargs) -> None: """ print text to console, a wrapper to built-in print @@ -34,6 +44,32 @@ def print_text(text, color="default", in_place=False, **kwargs) -> None: print(COLOR_NAME_TO_CODE[color] + text + Style.RESET_ALL, **kwargs) +def prompt_yes_no(question: str, default: bool = True) -> bool: + """ + Prompt user for a yes/no question. + + :param question: question to ask + :param default: default answer if user just presses enter + :return: True if user answers yes, False if user answers no + """ + if default: + yes_no = "Y/n" + else: + yes_no = "y/N" + + while True: + print_text("{} [{}] ".format(question, yes_no), end="") + choice = input().lower() + if choice in {"y", "yes"}: + return True + elif choice in {"n", "no"}: + return False + elif choice == "": + return default + else: + print_text("Please respond with 'yes' or 'no' (or 'y' or 'n').") + + def create_url(url): """ From the given url, produce a URL that is compatible with Github's REST API. Can handle blob or tree paths. @@ -71,7 +107,42 @@ def create_url(url): return api_url, download_dirs -def download(repo_url, flatten=False, output_dir="./"): +def download_file(file_to_download: FileToDownload, force: bool) -> None: + if os.path.exists(file_to_download.dest_path) and not force: + if prompt_yes_no( + "✘ File {} already exists. Overwrite?".format(file_to_download.dest_path), + default=False, + ): + urllib.request.urlretrieve(file_to_download.url, file_to_download.dest_path) + # bring the cursor to the beginning, erase the current line, and dont make a new line + print_text( + "Downloading (overwriting): " + + Fore.WHITE + + "{} to {}".format( + file_to_download.name, file_to_download.dest_path.resolve() + ), + "green", + in_place=True, + ) + else: + print_text( + "Skipped: " + Fore.WHITE + "{}".format(file_to_download.name), + "yellow", + in_place=True, + ) + else: + urllib.request.urlretrieve(file_to_download.url, file_to_download.dest_path) + # bring the cursor to the beginning, erase the current line, and dont make a new line + print_text( + "Downloaded: " + + Fore.WHITE + + "{} to {}".format(file_to_download.name, file_to_download.dest_path), + "green", + in_place=True, + ) + + +def download(repo_url, flatten=False, force=False, output_dir="./"): """Downloads the files and directories in repo_url. If flatten is specified, the contents of any and all sub-directories will be pulled upwards into the root folder.""" @@ -87,10 +158,9 @@ def download(repo_url, flatten=False, output_dir="./"): else: dir_out = output_dir + dir_out = Path(dir_out) + try: - opener = urllib.request.build_opener() - opener.addheaders = [("User-agent", "Mozilla/5.0")] - urllib.request.install_opener(opener) response = urllib.request.urlretrieve(api_url) except KeyboardInterrupt: # when CTRL+C is pressed during the execution of this script, @@ -99,8 +169,7 @@ def download(repo_url, flatten=False, output_dir="./"): sys.exit() if not flatten: - # make a directory with the name which is taken from - # the actual repo + # make a directory with the name which is taken from the actual repo os.makedirs(dir_out, exist_ok=True) # total files count @@ -114,20 +183,17 @@ def download(repo_url, flatten=False, output_dir="./"): # If the data is a file, download it as one. if isinstance(data, dict) and data["type"] == "file": + print("Single file download") try: # download the file - opener = urllib.request.build_opener() - opener.addheaders = [("User-agent", "Mozilla/5.0")] - urllib.request.install_opener(opener) - urllib.request.urlretrieve( - data["download_url"], os.path.join(dir_out, data["name"]) - ) - # bring the cursor to the beginning, erase the current line, and dont make a new line - print_text( - "Downloaded: " + Fore.WHITE + "{}".format(data["name"]), - "green", - in_place=True, + dest_path = dir_out / Path(data["name"]) + file_to_download = FileToDownload( + name=data["name"], + url=data["download_url"], + dest_path=dest_path, + path=data["path"], ) + download_file(file_to_download, force) return total_files except KeyboardInterrupt: @@ -137,49 +203,48 @@ def download(repo_url, flatten=False, output_dir="./"): sys.exit() for file in data: - file_url = file["download_url"] - file_name = file["name"] file_path = file["path"] if flatten: - path = os.path.basename(file_path) + path = Path(os.path.basename(file_path)) else: - path = file_path - dirname = os.path.dirname(path) + path = Path(file_path) + + file_to_download = FileToDownload( + name=file["name"], + url=file["download_url"], + dest_path=path, + path=file["path"], + ) - if dirname != "": - os.makedirs(os.path.dirname(path), exist_ok=True) + if path.parent != "": + os.makedirs(path.parent, exist_ok=True) else: pass - if file_url is not None: + if file_to_download.url is not None: try: - opener = urllib.request.build_opener() - opener.addheaders = [("User-agent", "Mozilla/5.0")] - urllib.request.install_opener(opener) - # download the file - urllib.request.urlretrieve(file_url, path) - - # bring the cursor to the beginning, erase the current line, and dont make a new line - print_text( - "Downloaded: " + Fore.WHITE + "{}".format(file_name), - "green", - in_place=False, - end="\n", - flush=True, - ) - + download_file(file_to_download, force) except KeyboardInterrupt: # when CTRL+C is pressed during the execution of this script, # bring the cursor to the beginning, erase the current line, and dont make a new line print_text("✘ Got interrupted", "red", in_place=False) sys.exit() else: - download(file["html_url"], flatten, download_dirs) + download(file["html_url"], flatten, force, download_dirs) return total_files +def set_up_url_opener(): + """ + Set up the URL opener to mimic a browser. + """ + opener = urllib.request.build_opener() + opener.addheaders = [("User-agent", "Mozilla/5.0")] + urllib.request.install_opener(opener) + + def main(): if sys.platform != "win32": # disbale CTRL+Z @@ -207,11 +272,19 @@ def main(): " output directory. (default to current directory if not specified)", ) + parser.add_argument( + "--force", + action="store_true", + help="Force overwriting existing files.", + ) + args = parser.parse_args() + set_up_url_opener() + flatten = args.flatten for url in args.urls: - total_files = download(url, flatten, args.output_dir) + download(url, flatten, args.force, args.output_dir) print_text("✔ Download complete", "green", in_place=True) From bf7deab71cd19c1368beddc4b924b006d0ba357b Mon Sep 17 00:00:00 2001 From: Aaron Lichtman Date: Sat, 13 Jan 2024 01:49:18 -0800 Subject: [PATCH 4/4] Version bump to v1.2.8 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 686a151..0b579a3 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name='gitdir', - version='1.2.7', + version='1.2.8', author='Siddharth Dushantha', author_email='siddharth.dushantha@gmail.com', description='Download a single directory/folder from a GitHub repo',