From 001eb56147303dadfbf660fe559a09158c83b41d Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 1 Jun 2021 23:22:01 +0000 Subject: [PATCH 01/15] Bump urllib3 from 1.24.1 to 1.26.5 Bumps [urllib3](https://github.com/urllib3/urllib3) from 1.24.1 to 1.26.5. - [Release notes](https://github.com/urllib3/urllib3/releases) - [Changelog](https://github.com/urllib3/urllib3/blob/main/CHANGES.rst) - [Commits](https://github.com/urllib3/urllib3/compare/1.24.1...1.26.5) --- updated-dependencies: - dependency-name: urllib3 dependency-type: direct:production ... Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 6156c01..7476e03 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,5 +10,5 @@ multidict==4.5.2 requests==2.21.0 tqdm==4.30.0 typing-extensions==3.7.2 -urllib3==1.24.1 +urllib3==1.26.5 yarl==1.3.0 From 61501a493eba597199e06fe97df9eac36e81df14 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Fri, 26 Feb 2021 02:25:19 +0000 Subject: [PATCH 02/15] Bump aiohttp from 3.5.4 to 3.7.4 Bumps [aiohttp](https://github.com/aio-libs/aiohttp) from 3.5.4 to 3.7.4. - [Release notes](https://github.com/aio-libs/aiohttp/releases) - [Changelog](https://github.com/aio-libs/aiohttp/blob/master/CHANGES.rst) - [Commits](https://github.com/aio-libs/aiohttp/compare/v3.5.4...v3.7.4) Signed-off-by: dependabot[bot] --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7476e03..a1c50b9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ aiofiles==0.4.0 -aiohttp==3.5.4 +aiohttp==3.7.4 async-timeout==3.0.1 attrs==18.2.0 certifi==2018.11.29 From 12c65b1aa13894f119771065075bdbed2be701ed Mon Sep 17 00:00:00 2001 From: Pablo Delgado Date: Tue, 9 Feb 2021 17:44:24 +0100 Subject: [PATCH 03/15] Update main.py Corrected the issue #34 (books can't be downloaded if the filename contains symbol ) for errors in book names with ? --- main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/main.py b/main.py index dd6f88d..e711e9f 100755 --- a/main.py +++ b/main.py @@ -206,7 +206,7 @@ def main(argv): file_types = get_book_file_types(user, book['productId']) for file_type in file_types: if file_type in book_file_types: # check if the file type entered is available by the current book - book_name = book['productName'].replace(' ', '_').replace('.', '_').replace(':', '_').replace('/','') + book_name = book['productName'].replace(' ', '_').replace('.', '_').replace(':', '_').replace('/','').replace('?','') if separate: filename = f'{root_directory}/{book_name}/{book_name}.{file_type}' move_current_files(root_directory, book_name) From 1c9a1be806ccb5fec0ab74b26848ba4d83d22c9c Mon Sep 17 00:00:00 2001 From: Letouane <32341208+Letouane@users.noreply.github.com> Date: Fri, 18 Sep 2020 14:46:46 -1000 Subject: [PATCH 04/15] Update README.md Some video book need to be downloaded with the "video" option. --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 24ff5a7..991902b 100644 --- a/README.md +++ b/README.md @@ -32,7 +32,7 @@ After the execution, you can see the content in the `book` directory. - *-e*, *--email* = Your login email - *-p*, *--password* = Your login password - *-d*, *--directory* = Directory to download into. Default is "media/" in the current directory -- *-b*, *--books* = Assets to download. Options are: *pdf,mobi,epub,code* +- *-b*, *--books* = Assets to download. Options are: *pdf,mobi,epub,code,video* - *-s*, *--separate* = Create a separate directory for each book - *-v*, *--verbose* = Show more detailed information - *-q*, *--quiet* = Don't show information or progress bars @@ -43,5 +43,6 @@ After the execution, you can see the content in the `book` directory. - *mobi*: MOBI format - *epub*: EPUB format - *code*: Accompanying source code, saved as .zip files +- *video*: Some courses are in video format I'm working on Python 3.6.0 From 3f7af3588daa7f47351f2d20db934134d93a2e35 Mon Sep 17 00:00:00 2001 From: Levi Rizki Saputra Date: Tue, 2 Apr 2019 19:17:01 +0700 Subject: [PATCH 05/15] support download using ids and fix some mistake --- .gitignore | 9 ++++++- README.md | 1 + config.py | 6 +++++ main.py | 72 ++++++++++++++++++++++++++++++++++++++++++------------ 4 files changed, 71 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index 32b7418..e4c8bb7 100644 --- a/.gitignore +++ b/.gitignore @@ -103,4 +103,11 @@ venv.bak/ # mypy .mypy_cache/ -*.env +# VSCode +.vscode + +# SonarLint +.sonarlint + +# Downloaded Media +media/ diff --git a/README.md b/README.md index 991902b..e249b09 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ After the execution, you can see the content in the `book` directory. - *-s*, *--separate* = Create a separate directory for each book - *-v*, *--verbose* = Show more detailed information - *-q*, *--quiet* = Don't show information or progress bars +- *-i*, *--ids* = Products to download by id (If it is not specified, it will download all products that you have purchased) **Book File Types** diff --git a/config.py b/config.py index e0861fd..093358e 100644 --- a/config.py +++ b/config.py @@ -9,12 +9,18 @@ # this is base url where i do the requests BASE_URL = "https://services.packtpub.com/" +# this is base url for static content +BASE_STATIC_URL = "https://static.packt-cdn.com/" + # URL to request jwt token, params by post are user and pass, return jwt token AUTH_ENDPOINT = "auth-v1/users/tokens" # URL to get all your books, two params that i change are offset and limit, method GET PRODUCTS_ENDPOINT = "entitlements-v1/users/me/products?sort=createdAt:DESC&offset={offset}&limit={limit}" +# URL to get book information from id +PRODUCT_FROM_ID_ENDPOINT = "products/{id}/summary" + # URL to get types , param is book id, method GET URL_BOOK_TYPES_ENDPOINT = "products-v1/products/{book_id}/types" diff --git a/main.py b/main.py index e711e9f..3869ba6 100755 --- a/main.py +++ b/main.py @@ -9,7 +9,7 @@ import getopt import requests from tqdm import tqdm, trange -from config import BASE_URL, PRODUCTS_ENDPOINT, URL_BOOK_TYPES_ENDPOINT, URL_BOOK_ENDPOINT +from config import BASE_URL, BASE_STATIC_URL, PRODUCTS_ENDPOINT, PRODUCT_FROM_ID_ENDPOINT, URL_BOOK_TYPES_ENDPOINT, URL_BOOK_ENDPOINT from user import User @@ -18,12 +18,24 @@ def book_request(user, offset=0, limit=10, verbose=False): data = [] url = BASE_URL + PRODUCTS_ENDPOINT.format(offset=offset, limit=limit) if verbose: - print(url) + tqdm.write(url) r = requests.get(url, headers=user.get_header()) data += r.json().get('data', []) return url, r, data +def book_from_id_request(id, verbose=False): + url = BASE_STATIC_URL + PRODUCT_FROM_ID_ENDPOINT.format(id = id) + if verbose: + tqdm.write(url) + + r = requests.get(url) + rjson = r.json() + data = {'productId': id, 'productName': rjson.get('title')} + + return url, r, data + + def get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False): ''' Request all your books, return json with info of all your books @@ -50,6 +62,27 @@ def get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False): data += book_request(user, offset, limit, is_verbose)[2] return data +def get_books_from_ids(ids, is_verbose=False, is_quiet=False): + ''' + Get all boooks from id + Params + ... + ids : list + ''' + + data = [] + + print("Getting list of books...") + + if not is_quiet: + id_iter = tqdm(ids, unit="Pages") + else: + id_iter = ids + + for id in id_iter: + data.append(book_from_id_request(id, is_verbose)[2]) + + return data def get_url_book(user, book_id, format='pdf'): ''' @@ -66,9 +99,9 @@ def get_url_book(user, book_id, format='pdf'): user.refresh_header() # refresh token get_url_book(user, book_id, format) # call recursive - print('ERROR (please copy and paste in the issue)') - print(r.json()) - print(r.status_code) + tqdm.write('ERROR (please copy and paste in the issue)') + tqdm.write(r.json()) + tqdm.write(r.status_code) return '' @@ -87,9 +120,9 @@ def get_book_file_types(user, book_id): user.refresh_header() # refresh token get_book_file_types(user, book_id, format) # call recursive - print('ERROR (please copy and paste in the issue)') - print(r.json()) - print(r.status_code) + tqdm.write('ERROR (please copy and paste in the issue)') + tqdm.write(r.json()) + tqdm.write(r.status_code) return [] @@ -98,7 +131,7 @@ def download_book(filename, url): ''' Download your book ''' - print('Starting to download ' + filename) + tqdm.write('Starting to download ' + filename) with open(filename, 'wb') as f: r = requests.get(url, stream=True) @@ -112,7 +145,7 @@ def download_book(filename, url): if chunk: # filter out keep-alive new chunks f.write(chunk) f.flush() - print('Finished ' + filename) + tqdm.write('Finished ' + filename) def make_zip(filename): @@ -129,8 +162,8 @@ def move_current_files(root, book): except OSError: os.rename(f, f'{sub_dir}/{book}' + '_1' + f[f.index('.'):]) except ValueError as e: - print(e) - print('Skipping') + tqdm.write(e) + tqdm.write('Skipping') def does_dir_exist(directory): @@ -151,12 +184,13 @@ def main(argv): separate = None verbose = None quiet = None - errorMessage = 'Usage: main.py -e -p [-d -b -s -v -q]' + download_ids = None + errorMessage = 'Usage: main.py -e -p [-d -b -i -s -v -q]' # get the command line arguments/options try: opts, args = getopt.getopt( - argv, 'e:p:d:b:svq', ['email=', 'pass=', 'directory=', 'books=', 'separate', 'verbose', 'quiet']) + argv, 'e:p:d:b:i:svq', ['email=', 'pass=', 'directory=', 'books=', 'ids=', 'separate', 'verbose', 'quiet']) except getopt.GetoptError: print(errorMessage) sys.exit(2) @@ -178,6 +212,9 @@ def main(argv): verbose = True elif opt in ('-q', '--quiet'): quiet = True + elif opt in ('-i', '--ids'): + download_ids = arg.split(',') + if verbose and quiet: print("Verbose and quiet cannot be used together.") @@ -195,8 +232,11 @@ def main(argv): user = User(email, password) # get all your books - books = get_books(user, is_verbose=verbose, is_quiet=quiet) - print('Downloading books...') + if (download_ids): + books = get_books_from_ids(download_ids, is_verbose=verbose, is_quiet=quiet) + else: + books = get_books(user, is_verbose=verbose, is_quiet=quiet) + tqdm.write('Downloading books...') if not quiet: books_iter = tqdm(books, unit='Book') else: From 3b092b91bb0db9c1374211cb2cbf841be2ff5dd2 Mon Sep 17 00:00:00 2001 From: Levi Rizki Saputra Date: Wed, 3 Apr 2019 10:15:28 +0700 Subject: [PATCH 06/15] improving code quolity --- main.py | 180 ++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 136 insertions(+), 44 deletions(-) diff --git a/main.py b/main.py index 3869ba6..c6c94cd 100755 --- a/main.py +++ b/main.py @@ -12,8 +12,10 @@ from config import BASE_URL, BASE_STATIC_URL, PRODUCTS_ENDPOINT, PRODUCT_FROM_ID_ENDPOINT, URL_BOOK_TYPES_ENDPOINT, URL_BOOK_ENDPOINT from user import User +error_message = 'Usage: main.py -e -p [-d -b -i -s -v -q]' -#TODO: I should do a function that his only purpose is to request and return data + +# TODO: I should do a function that his only purpose is to request and return data def book_request(user, offset=0, limit=10, verbose=False): data = [] url = BASE_URL + PRODUCTS_ENDPOINT.format(offset=offset, limit=limit) @@ -24,15 +26,16 @@ def book_request(user, offset=0, limit=10, verbose=False): return url, r, data + def book_from_id_request(id, verbose=False): - url = BASE_STATIC_URL + PRODUCT_FROM_ID_ENDPOINT.format(id = id) + url = BASE_STATIC_URL + PRODUCT_FROM_ID_ENDPOINT.format(id=id) if verbose: tqdm.write(url) r = requests.get(url) rjson = r.json() data = {'productId': id, 'productName': rjson.get('title')} - + return url, r, data @@ -47,12 +50,12 @@ def get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False): how many book wanna get by request ''' # TODO: given x time jwt expired and should refresh the header, user.refresh_header() - + url, r, data = book_request(user, offset, limit) - + print(f'You have {str(r.json()["count"])} books') print("Getting list of books...") - + if not is_quiet: pages_list = trange(r.json()['count'] // limit, unit='Pages') else: @@ -62,6 +65,7 @@ def get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False): data += book_request(user, offset, limit, is_verbose)[2] return data + def get_books_from_ids(ids, is_verbose=False, is_quiet=False): ''' Get all boooks from id @@ -73,32 +77,33 @@ def get_books_from_ids(ids, is_verbose=False, is_quiet=False): data = [] print("Getting list of books...") - + if not is_quiet: - id_iter = tqdm(ids, unit="Pages") + id_iter = tqdm(ids, unit="Pages") else: - id_iter = ids + id_iter = ids for id in id_iter: data.append(book_from_id_request(id, is_verbose)[2]) - + return data + def get_url_book(user, book_id, format='pdf'): ''' Return url of the book to download ''' - + url = BASE_URL + URL_BOOK_ENDPOINT.format(book_id=book_id, format=format) r = requests.get(url, headers=user.get_header()) - if r.status_code == 200: # success + if r.status_code == 200: # success return r.json().get('data', '') - elif r.status_code == 401: # jwt expired - user.refresh_header() # refresh token - get_url_book(user, book_id, format) # call recursive - + elif r.status_code == 401: # jwt expired + user.refresh_header() # refresh token + get_url_book(user, book_id, format) # call recursive + tqdm.write('ERROR (please copy and paste in the issue)') tqdm.write(r.json()) tqdm.write(r.status_code) @@ -113,13 +118,13 @@ def get_book_file_types(user, book_id): url = BASE_URL + URL_BOOK_TYPES_ENDPOINT.format(book_id=book_id) r = requests.get(url, headers=user.get_header()) - if (r.status_code == 200): # success + if (r.status_code == 200): # success return r.json()['data'][0].get('fileTypes', []) - - elif (r.status_code == 401): # jwt expired - user.refresh_header() # refresh token - get_book_file_types(user, book_id, format) # call recursive - + + elif (r.status_code == 401): # jwt expired + user.refresh_header() # refresh token + get_book_file_types(user, book_id, format) # call recursive + tqdm.write('ERROR (please copy and paste in the issue)') tqdm.write(r.json()) tqdm.write(r.status_code) @@ -127,9 +132,9 @@ def get_book_file_types(user, book_id): # TODO: i'd like that this functions be async and download faster -def download_book(filename, url): +def download_file(filename, url): ''' - Download your book + Download file ''' tqdm.write('Starting to download ' + filename) @@ -141,13 +146,27 @@ def download_book(filename, url): else: total = int(total) # TODO: read more about tqdm - for chunk in tqdm(r.iter_content(chunk_size=1024), total=math.ceil(total//1024), unit='KB', unit_scale=True): + for chunk in tqdm( + r.iter_content(chunk_size=1024), + total=math.ceil(total//1024), + unit='KB', + unit_scale=True + ): if chunk: # filter out keep-alive new chunks f.write(chunk) f.flush() tqdm.write('Finished ' + filename) +def get_book_name(book, file_type): + book_name = book['productName'].replace( + ' ', '_').replace('.', '_').replace(':', '_') + if file_type == 'video' or file_type == 'code': + return book_name, book_name + '.' + file_type + '.zip' + else: + return book_name, book_name + '.' + file_type + + def make_zip(filename): if filename[-4:] == 'code': os.replace(filename, filename[:-4] + 'zip') @@ -166,34 +185,91 @@ def move_current_files(root, book): tqdm.write('Skipping') +def download_book_by_type(user, book, file_type, separate, root_directory, verbose=False): + book_name, book_filename = get_book_name(book, file_type) + if separate: + filename = f'{root_directory}/{book_name}/{book_filename}' + move_current_files(root_directory, book_name) + else: + filename = f'{root_directory}/{book_filename}' + # get url of the book to download + url = get_url_book(user, book['productId'], file_type) + if not os.path.exists(filename): + download_file(filename, url) + else: + if verbose: + tqdm.write(f'{filename} already exists, skipping.') + + +def downlaod_all_books(user, books, book_file_types, separate, root_directory, verbose=False, quiet=False): + tqdm.write('Downloading books...') + if not quiet: + books_iter = tqdm(books, unit='Book') + else: + books_iter = books + for book in books_iter: + # get the different file type of current book + file_types = get_book_file_types(user, book['productId']) + for file_type in file_types: + if file_type in book_file_types: # check if the file type entered is available by the current book + download_book_by_type( + user, book, file_type, separate, root_directory, verbose) + + def does_dir_exist(directory): + # Check if directory not exists if not os.path.exists(directory): try: + # try making dir if not exists os.makedirs(directory) except Exception as e: print(e) sys.exit(2) -def main(argv): - # thanks to https://github.com/ozzieperez/packtpub-library-downloader/blob/master/downloader.py +def get_opts_args(argv): + try: + return getopt.getopt( + argv, + 'e:p:d:b:i:svq', + [ + 'email=', + 'pass=', + 'directory=', + 'books=', + 'ids=', + 'separate', + 'verbose', + 'quiet' + ] + ) + except getopt.GetoptError: + print(error_message) + sys.exit(2) + +def check_arg(email, password, verbose, quiet): + # Is this true? + if verbose and quiet: + print("Verbose and quiet cannot be used together.") + sys.exit(2) + + # do we have the minimum required info? + if not email or not password: + print(error_message) + sys.exit(2) + +def parse_args(argv): email = None password = None - root_directory = 'media' + root_directory = 'media' book_file_types = ['pdf', 'mobi', 'epub', 'code'] separate = None verbose = None quiet = None download_ids = None - errorMessage = 'Usage: main.py -e -p [-d -b -i -s -v -q]' - # get the command line arguments/options - try: - opts, args = getopt.getopt( - argv, 'e:p:d:b:i:svq', ['email=', 'pass=', 'directory=', 'books=', 'ids=', 'separate', 'verbose', 'quiet']) - except getopt.GetoptError: - print(errorMessage) - sys.exit(2) + # get all options from argument + opts, args = get_opts_args(argv) # hold the values of the command line options for opt, arg in opts: @@ -215,15 +291,27 @@ def main(argv): elif opt in ('-i', '--ids'): download_ids = arg.split(',') + check_arg(email, password, verbose, quiet) - if verbose and quiet: - print("Verbose and quiet cannot be used together.") - sys.exit(2) + return email, \ + password, \ + root_directory, \ + book_file_types, \ + separate, verbose, \ + quiet, \ + download_ids - # do we have the minimum required info? - if not email or not password: - print(errorMessage) - sys.exit(2) + +def main(argv): + # thanks to https://github.com/ozzieperez/packtpub-library-downloader/blob/master/downloader.py + email, \ + password, \ + root_directory, \ + book_file_types, \ + separate, \ + verbose, \ + quiet, \ + download_ids = parse_args(argv) # check if not exists dir and create does_dir_exist(root_directory) @@ -233,7 +321,8 @@ def main(argv): # get all your books if (download_ids): - books = get_books_from_ids(download_ids, is_verbose=verbose, is_quiet=quiet) + books = get_books_from_ids( + download_ids, is_verbose=verbose, is_quiet=quiet) else: books = get_books(user, is_verbose=verbose, is_quiet=quiet) tqdm.write('Downloading books...') @@ -261,6 +350,9 @@ def main(argv): if verbose: tqdm.write(f'{filename} already exists, skipping.') + # downloading all books + # downlaod_all_books(user, books, book_file_types, separate, root_directory, verbose, quiet) + if __name__ == '__main__': main(sys.argv[1:]) From 1f7350f153146460335ca7c55770676ae4b4d719 Mon Sep 17 00:00:00 2001 From: Levi Rizki Saputra Date: Wed, 3 Apr 2019 11:18:25 +0700 Subject: [PATCH 07/15] fix downloading file size progress --- main.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/main.py b/main.py index c6c94cd..0b7b1ae 100755 --- a/main.py +++ b/main.py @@ -145,16 +145,18 @@ def download_file(filename, url): f.write(response.content) else: total = int(total) - # TODO: read more about tqdm - for chunk in tqdm( - r.iter_content(chunk_size=1024), - total=math.ceil(total//1024), + progress = tqdm( + total=math.ceil(total), unit='KB', - unit_scale=True - ): + unit_scale=True, + mininterval=1 + ) + for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) f.flush() + progress.update(1024) + progress.close() tqdm.write('Finished ' + filename) From 3ac716ef5eaded812365ea99582f1d841eb4f6eb Mon Sep 17 00:00:00 2001 From: greenflute Date: Sun, 12 Sep 2021 18:14:34 +0200 Subject: [PATCH 08/15] add environment variable to start in two different ways, fix file existence check. --- .gitignore | 6 +++++- README.md | 2 +- data.env-sample | 3 ++- entrypoint.sh | 7 ++++++- main.py | 52 +++++++++++++++++++++++++++++-------------------- 5 files changed, 45 insertions(+), 25 deletions(-) diff --git a/.gitignore b/.gitignore index e4c8bb7..8428ee2 100644 --- a/.gitignore +++ b/.gitignore @@ -82,7 +82,7 @@ celerybeat-schedule *.sage.py # Environments -.env +*.env .venv env/ venv/ @@ -109,5 +109,9 @@ venv.bak/ # SonarLint .sonarlint +# IntelliJ +.idea/ + # Downloaded Media media/ +book/ diff --git a/README.md b/README.md index e249b09..abd313b 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ Since PacktPub restructured their website [packtpub-library-downloader](https:// ## Usage: pip install -r requirements.txt - python main.py -e -p [-d -b -s -v -q] + python main.py -e -p [-d -b -s -v -q] [-i ] ##### Example: Download books in PDF format python main.py -e hello@world.com -p p@ssw0rd -d ~/Desktop/packt -b pdf,epub,mobi,code diff --git a/data.env-sample b/data.env-sample index c50c559..6f95966 100644 --- a/data.env-sample +++ b/data.env-sample @@ -1,2 +1,3 @@ EMAIL=email@example.com -PASSWORD=example$password \ No newline at end of file +PASSWORD=example$password +IDS= \ No newline at end of file diff --git a/entrypoint.sh b/entrypoint.sh index 51bf5cb..eee87af 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -1,2 +1,7 @@ pip install -r /app/requirements.txt -python /app/main.py -e $EMAIL -p $PASSWORD -d /app/book -b pdf,mobi,epub,code \ No newline at end of file + +if [[ -z "${IDS}" ]]; then + python /app/main.py -e $EMAIL -p $PASSWORD -d /app/book -b pdf,mobi,epub,code,video -s -v +else + python /app/main.py -e $EMAIL -p $PASSWORD -d /app/book -b pdf,mobi,epub,code,video -s -v -i $IDS +fi \ No newline at end of file diff --git a/main.py b/main.py index 0b7b1ae..080b49c 100755 --- a/main.py +++ b/main.py @@ -104,10 +104,10 @@ def get_url_book(user, book_id, format='pdf'): user.refresh_header() # refresh token get_url_book(user, book_id, format) # call recursive - tqdm.write('ERROR (please copy and paste in the issue)') - tqdm.write(r.json()) - tqdm.write(r.status_code) - return '' + tqdm.write('ERROR (please copy and paste in the issue): ' + str(r.status_code)) + for key,value in r.json().items(): + tqdm.write(' ' + key + ': ' + str(value)) + raise PermissionError('Could not download book: ' + book_id + ' in format: ' + format) def get_book_file_types(user, book_id): @@ -125,9 +125,9 @@ def get_book_file_types(user, book_id): user.refresh_header() # refresh token get_book_file_types(user, book_id, format) # call recursive - tqdm.write('ERROR (please copy and paste in the issue)') - tqdm.write(r.json()) - tqdm.write(r.status_code) + tqdm.write('ERROR (please copy and paste in the issue): ' + str(r.status_code)) + for key, value in r.json().items(): + tqdm.write(' ' + key + ': ' + str(value)) return [] @@ -194,16 +194,22 @@ def download_book_by_type(user, book, file_type, separate, root_directory, verbo move_current_files(root_directory, book_name) else: filename = f'{root_directory}/{book_filename}' + + if os.path.exists(filename): + if verbose: + tqdm.write(f'{filename} already exists, skipping.') + return + + try: # get url of the book to download url = get_url_book(user, book['productId'], file_type) - if not os.path.exists(filename): - download_file(filename, url) - else: - if verbose: - tqdm.write(f'{filename} already exists, skipping.') + download_file(filename, url) + except PermissionError as e: + tqdm.write(repr(e)) + tqdm.write('Skipping') -def downlaod_all_books(user, books, book_file_types, separate, root_directory, verbose=False, quiet=False): +def download_all_books(user, books, book_file_types, separate, root_directory, verbose=False, quiet=False): tqdm.write('Downloading books...') if not quiet: books_iter = tqdm(books, unit='Book') @@ -343,17 +349,21 @@ def main(argv): move_current_files(root_directory, book_name) else: filename = f'{root_directory}/{book_name}.{file_type}' - # get url of the book to download - url = get_url_book(user, book['productId'], file_type) - if not os.path.exists(filename) and not os.path.exists(filename.replace('.code', '.zip')): - download_book(filename, url) - make_zip(filename) - else: + + # implied check for pdf, epub, mobi, video + if os.path.exists(filename.replace('.code', '.zip')): if verbose: tqdm.write(f'{filename} already exists, skipping.') + continue - # downloading all books - # downlaod_all_books(user, books, book_file_types, separate, root_directory, verbose, quiet) + try: + # get url of the book to download + url = get_url_book(user, book['productId'], file_type) + download_file(filename, url) + make_zip(filename) + except PermissionError as e: + tqdm.write(repr(e)) + tqdm.write('Skipping') if __name__ == '__main__': From ec5f4805c329873808b3d6785a61a908715dac1b Mon Sep 17 00:00:00 2001 From: greenflute Date: Sun, 12 Sep 2021 19:56:00 +0200 Subject: [PATCH 09/15] fixed when both code and video are available. --- main.py | 62 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 32 insertions(+), 30 deletions(-) diff --git a/main.py b/main.py index 080b49c..7058709 100755 --- a/main.py +++ b/main.py @@ -27,20 +27,20 @@ def book_request(user, offset=0, limit=10, verbose=False): return url, r, data -def book_from_id_request(id, verbose=False): - url = BASE_STATIC_URL + PRODUCT_FROM_ID_ENDPOINT.format(id=id) +def book_from_id_request(book_id, verbose=False): + url = BASE_STATIC_URL + PRODUCT_FROM_ID_ENDPOINT.format(id=book_id) if verbose: tqdm.write(url) r = requests.get(url) rjson = r.json() - data = {'productId': id, 'productName': rjson.get('title')} + data = {'productId': book_id, 'productName': rjson.get('title')} return url, r, data def get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False): - ''' + """ Request all your books, return json with info of all your books Params ... @@ -48,7 +48,7 @@ def get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False): offset : int limit : int how many book wanna get by request - ''' + """ # TODO: given x time jwt expired and should refresh the header, user.refresh_header() url, r, data = book_request(user, offset, limit) @@ -67,12 +67,12 @@ def get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False): def get_books_from_ids(ids, is_verbose=False, is_quiet=False): - ''' - Get all boooks from id + """ + Get all books from id Params ... ids : list - ''' + """ data = [] @@ -83,18 +83,18 @@ def get_books_from_ids(ids, is_verbose=False, is_quiet=False): else: id_iter = ids - for id in id_iter: - data.append(book_from_id_request(id, is_verbose)[2]) + for book_id in id_iter: + data.append(book_from_id_request(book_id, is_verbose)[2]) return data -def get_url_book(user, book_id, format='pdf'): - ''' +def get_url_book(user, book_id, file_format='pdf'): + """ Return url of the book to download - ''' + """ - url = BASE_URL + URL_BOOK_ENDPOINT.format(book_id=book_id, format=format) + url = BASE_URL + URL_BOOK_ENDPOINT.format(book_id=book_id, format=file_format) r = requests.get(url, headers=user.get_header()) if r.status_code == 200: # success @@ -102,28 +102,28 @@ def get_url_book(user, book_id, format='pdf'): elif r.status_code == 401: # jwt expired user.refresh_header() # refresh token - get_url_book(user, book_id, format) # call recursive + get_url_book(user, book_id, file_format) # call recursive tqdm.write('ERROR (please copy and paste in the issue): ' + str(r.status_code)) for key,value in r.json().items(): tqdm.write(' ' + key + ': ' + str(value)) - raise PermissionError('Could not download book: ' + book_id + ' in format: ' + format) + raise PermissionError('Could not download book: ' + book_id + ' in format: ' + file_format) def get_book_file_types(user, book_id): - ''' + """ Return a list with file types of a book - ''' + """ url = BASE_URL + URL_BOOK_TYPES_ENDPOINT.format(book_id=book_id) r = requests.get(url, headers=user.get_header()) - if (r.status_code == 200): # success + if r.status_code == 200: # success return r.json()['data'][0].get('fileTypes', []) - elif (r.status_code == 401): # jwt expired + elif r.status_code == 401: # jwt expired user.refresh_header() # refresh token - get_book_file_types(user, book_id, format) # call recursive + return get_book_file_types(user, book_id) # call recursive tqdm.write('ERROR (please copy and paste in the issue): ' + str(r.status_code)) for key, value in r.json().items(): @@ -133,9 +133,9 @@ def get_book_file_types(user, book_id): # TODO: i'd like that this functions be async and download faster def download_file(filename, url): - ''' + """ Download file - ''' + """ tqdm.write('Starting to download ' + filename) with open(filename, 'wb') as f: @@ -161,8 +161,7 @@ def download_file(filename, url): def get_book_name(book, file_type): - book_name = book['productName'].replace( - ' ', '_').replace('.', '_').replace(':', '_') + book_name = book['productName'].replace(' ', '_').replace('.', '_').replace(':', '_').replace('/','').replace('?','') if file_type == 'video' or file_type == 'code': return book_name, book_name + '.' + file_type + '.zip' else: @@ -171,7 +170,9 @@ def get_book_name(book, file_type): def make_zip(filename): if filename[-4:] == 'code': - os.replace(filename, filename[:-4] + 'zip') + os.replace(filename, filename[:-4] + 'code.zip') + elif filename[-5:] == 'video': + os.replace(filename, filename[:-5] + 'video.zip') def move_current_files(root, book): @@ -183,7 +184,7 @@ def move_current_files(root, book): except OSError: os.rename(f, f'{sub_dir}/{book}' + '_1' + f[f.index('.'):]) except ValueError as e: - tqdm.write(e) + tqdm.write(repr(e)) tqdm.write('Skipping') @@ -328,7 +329,7 @@ def main(argv): user = User(email, password) # get all your books - if (download_ids): + if download_ids: books = get_books_from_ids( download_ids, is_verbose=verbose, is_quiet=quiet) else: @@ -341,6 +342,7 @@ def main(argv): for book in books_iter: # get the different file type of current book file_types = get_book_file_types(user, book['productId']) + tqdm.write('Requested formats: ' + ','.join(book_file_types) + ' but only available: ' + ','.join(file_types)) for file_type in file_types: if file_type in book_file_types: # check if the file type entered is available by the current book book_name = book['productName'].replace(' ', '_').replace('.', '_').replace(':', '_').replace('/','').replace('?','') @@ -350,8 +352,8 @@ def main(argv): else: filename = f'{root_directory}/{book_name}.{file_type}' - # implied check for pdf, epub, mobi, video - if os.path.exists(filename.replace('.code', '.zip')): + # implied check for pdf, epub, mobi, also avoid name collision when both code and video are available. + if os.path.exists(filename.replace('.code', '.code.zip').replace('.video', '.video.zip')): if verbose: tqdm.write(f'{filename} already exists, skipping.') continue From ef02de9c14a9ca2601dc805c0039396c54b36b65 Mon Sep 17 00:00:00 2001 From: Alessandro Varesi Date: Tue, 28 Sep 2021 15:24:04 +0200 Subject: [PATCH 10/15] Added creation of README.md (option -R | --readme) --- main.py | 63 ++++++++++++++++++++++++++++++++++++++------------------- user.py | 29 +++++++++++++++----------- 2 files changed, 59 insertions(+), 33 deletions(-) diff --git a/main.py b/main.py index dd6f88d..ee88aea 100755 --- a/main.py +++ b/main.py @@ -24,6 +24,7 @@ def book_request(user, offset=0, limit=10, verbose=False): return url, r, data + def get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False): ''' Request all your books, return json with info of all your books @@ -35,12 +36,12 @@ def get_books(user, offset=0, limit=10, is_verbose=False, is_quiet=False): how many book wanna get by request ''' # TODO: given x time jwt expired and should refresh the header, user.refresh_header() - + url, r, data = book_request(user, offset, limit) - + print(f'You have {str(r.json()["count"])} books') print("Getting list of books...") - + if not is_quiet: pages_list = trange(r.json()['count'] // limit, unit='Pages') else: @@ -55,17 +56,17 @@ def get_url_book(user, book_id, format='pdf'): ''' Return url of the book to download ''' - + url = BASE_URL + URL_BOOK_ENDPOINT.format(book_id=book_id, format=format) r = requests.get(url, headers=user.get_header()) - if r.status_code == 200: # success + if r.status_code == 200: # success return r.json().get('data', '') - elif r.status_code == 401: # jwt expired - user.refresh_header() # refresh token - get_url_book(user, book_id, format) # call recursive - + elif r.status_code == 401: # jwt expired + user.refresh_header() # refresh token + get_url_book(user, book_id, format) # call recursive + print('ERROR (please copy and paste in the issue)') print(r.json()) print(r.status_code) @@ -80,13 +81,13 @@ def get_book_file_types(user, book_id): url = BASE_URL + URL_BOOK_TYPES_ENDPOINT.format(book_id=book_id) r = requests.get(url, headers=user.get_header()) - if (r.status_code == 200): # success + if (r.status_code == 200): # success return r.json()['data'][0].get('fileTypes', []) - - elif (r.status_code == 401): # jwt expired - user.refresh_header() # refresh token - get_book_file_types(user, book_id, format) # call recursive - + + elif (r.status_code == 401): # jwt expired + user.refresh_header() # refresh token + get_book_file_types(user, book_id, format) # call recursive + print('ERROR (please copy and paste in the issue)') print(r.json()) print(r.status_code) @@ -108,7 +109,10 @@ def download_book(filename, url): else: total = int(total) # TODO: read more about tqdm - for chunk in tqdm(r.iter_content(chunk_size=1024), total=math.ceil(total//1024), unit='KB', unit_scale=True): + for chunk in tqdm(r.iter_content(chunk_size=1024), + total=math.ceil(total // 1024), + unit='KB', + unit_scale=True): if chunk: # filter out keep-alive new chunks f.write(chunk) f.flush() @@ -142,11 +146,20 @@ def does_dir_exist(directory): sys.exit(2) +def create_readme(path, book): + filename = os.path.join(path, 'README.md') + with open(filename, 'w', encoding='utf-8') as file: + file.write('# ' + str(book['productName']) + '\n\n') + file.write('productId: ' + str(book['productId']) + '\n') + file.write('Release date: ' + str(book['releaseDate']) + '\n') + file.close() + + def main(argv): # thanks to https://github.com/ozzieperez/packtpub-library-downloader/blob/master/downloader.py email = None password = None - root_directory = 'media' + root_directory = 'media' book_file_types = ['pdf', 'mobi', 'epub', 'code'] separate = None verbose = None @@ -155,8 +168,10 @@ def main(argv): # get the command line arguments/options try: - opts, args = getopt.getopt( - argv, 'e:p:d:b:svq', ['email=', 'pass=', 'directory=', 'books=', 'separate', 'verbose', 'quiet']) + opts, args = getopt.getopt(argv, 'e:p:d:b:svqR', [ + 'email=', 'pass=', 'directory=', 'books=', 'separate', 'verbose', + 'quiet', 'readme' + ]) except getopt.GetoptError: print(errorMessage) sys.exit(2) @@ -178,6 +193,8 @@ def main(argv): verbose = True elif opt in ('-q', '--quiet'): quiet = True + elif opt in ('-R', '--readme'): + readme = True if verbose and quiet: print("Verbose and quiet cannot be used together.") @@ -206,15 +223,19 @@ def main(argv): file_types = get_book_file_types(user, book['productId']) for file_type in file_types: if file_type in book_file_types: # check if the file type entered is available by the current book - book_name = book['productName'].replace(' ', '_').replace('.', '_').replace(':', '_').replace('/','') + book_name = book['productName'].replace(' ', '_').replace( + '.', '_').replace(':', '_').replace('/', '') if separate: filename = f'{root_directory}/{book_name}/{book_name}.{file_type}' move_current_files(root_directory, book_name) + if readme: + create_readme(f'{root_directory}/{book_name}', book) else: filename = f'{root_directory}/{book_name}.{file_type}' # get url of the book to download url = get_url_book(user, book['productId'], file_type) - if not os.path.exists(filename) and not os.path.exists(filename.replace('.code', '.zip')): + if not os.path.exists(filename) and not os.path.exists( + filename.replace('.code', '.zip')): download_book(filename, url) make_zip(filename) else: diff --git a/user.py b/user.py index 689cd70..43e353a 100644 --- a/user.py +++ b/user.py @@ -5,6 +5,7 @@ import requests from config import BASE_URL, AUTH_ENDPOINT + class User: """ User object that contain his header @@ -13,30 +14,35 @@ class User: password = "" # need to fill Authoritazion with current token provide by api header = { - "User-Agent": "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 " + + "User-Agent": + "Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 " + "(KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36", - "Authorization":"" - } - + "Authorization": "" + } + def __init__(self, username, password): self.username = username self.password = password self.header["Authorization"] = self.get_token() - + def get_token(self): """ Request auth endpoint and return user token """ - url = BASE_URL+AUTH_ENDPOINT - # use json paramenter because for any reason they send user and pass in plain text :'( - r = requests.post(url, json={'username':self.username, 'password':self.password}) + url = BASE_URL + AUTH_ENDPOINT + # use json paramenter because for any reason they send user and pass in plain text :'( + r = requests.post(url, + json={ + 'username': self.username, + 'password': self.password + }) if r.status_code == 200: print("You are in!") return 'Bearer ' + r.json()['data']['access'] - - # except should happend when user and pass are incorrect + + # except should happend when user and pass are incorrect print("Error login, check user and password") - print("Error {}".format(e)) + #print("Error {}".format(e)) sys.exit(2) def get_header(self): @@ -49,4 +55,3 @@ def refresh_header(self): self.header["Authorization"] = self.get_token() return self.header - From 09c3ca787c35985cceb242e82d3c8cdbecca21c1 Mon Sep 17 00:00:00 2001 From: Alessandro Varesi Date: Tue, 28 Sep 2021 16:46:03 +0200 Subject: [PATCH 11/15] Updated README.md --- README.md | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index abd313b..b038190 100644 --- a/README.md +++ b/README.md @@ -5,15 +5,17 @@ Script to download all your PacktPub books inspired by https://github.com/ozziep Since PacktPub restructured their website [packtpub-library-downloader](https://github.com/ozzieperez/packtpub-library-downloader) became obsolete because the downloader used webscraping. So I figured out that now PacktPub uses a REST API. Then I found which endpoint to use for downloading books and made a simple script. Feel free to fork and PR to improve. Packtpub's API isn't documented :'( ## Usage: + pip install -r requirements.txt - python main.py -e -p [-d -b -s -v -q] [-i ] + python main.py -e -p [-d -b -s -v -q] [-i ] ##### Example: Download books in PDF format - python main.py -e hello@world.com -p p@ssw0rd -d ~/Desktop/packt -b pdf,epub,mobi,code + + python main.py -e hello@world.com -p p@ssw0rd -d ~/Desktop/packt -b pdf,epub,mobi,code ## Docker integration -You must put your data in the `.env` file. +You must put your data in the `.env` file. ``` mv data.env-sample data.env @@ -27,23 +29,24 @@ docker-compose up After the execution, you can see the content in the `book` directory. - ## Commandline Options -- *-e*, *--email* = Your login email -- *-p*, *--password* = Your login password -- *-d*, *--directory* = Directory to download into. Default is "media/" in the current directory -- *-b*, *--books* = Assets to download. Options are: *pdf,mobi,epub,code,video* -- *-s*, *--separate* = Create a separate directory for each book -- *-v*, *--verbose* = Show more detailed information -- *-q*, *--quiet* = Don't show information or progress bars -- *-i*, *--ids* = Products to download by id (If it is not specified, it will download all products that you have purchased) + +- _-e_, _--email_ = Your login email +- _-p_, _--password_ = Your login password +- _-d_, _--directory_ = Directory to download into. Default is "media/" in the current directory +- _-b_, _--books_ = Assets to download. Options are: _pdf,mobi,epub,code,video_ +- _-s_, _--separate_ = Create a separate directory for each book +- _-v_, _--verbose_ = Show more detailed information +- _-q_, _--quiet_ = Don't show information or progress bars +- _-i_, _--ids_ = Products to download by id (If it is not specified, it will download all products that you have purchased) +- _-R_, _--readme_ = Create a README.md file with info of the book (_--separate_ option required) **Book File Types** -- *pdf*: PDF format -- *mobi*: MOBI format -- *epub*: EPUB format -- *code*: Accompanying source code, saved as .zip files -- *video*: Some courses are in video format +- _pdf_: PDF format +- _mobi_: MOBI format +- _epub_: EPUB format +- _code_: Accompanying source code, saved as .zip files +- _video_: Some courses are in video format -I'm working on Python 3.6.0 +I'm working on Python 3.6.0 From 58aa910aabb51f26068e4ec1a038e10b0cbff908 Mon Sep 17 00:00:00 2001 From: alv67 Date: Thu, 30 Sep 2021 00:25:28 +0200 Subject: [PATCH 12/15] DONE - Option -R : README.md for each book --- config.py | 5 ++- main.py | 86 +++++++++++++++++++++++++++++++++++++++--------- requirements.txt | 2 +- 3 files changed, 75 insertions(+), 18 deletions(-) diff --git a/config.py b/config.py index 093358e..9b5faec 100644 --- a/config.py +++ b/config.py @@ -18,9 +18,12 @@ # URL to get all your books, two params that i change are offset and limit, method GET PRODUCTS_ENDPOINT = "entitlements-v1/users/me/products?sort=createdAt:DESC&offset={offset}&limit={limit}" -# URL to get book information from id +# URL(BASE_STATIC) to get book information from id PRODUCT_FROM_ID_ENDPOINT = "products/{id}/summary" +# URL(BASE_STATIC) to get author information from id +AUTHOR_FROM_ID_ENDPOINT = "authors/{id}" + # URL to get types , param is book id, method GET URL_BOOK_TYPES_ENDPOINT = "products-v1/products/{book_id}/types" diff --git a/main.py b/main.py index 089985f..2cb9a22 100755 --- a/main.py +++ b/main.py @@ -9,10 +9,10 @@ import getopt import requests from tqdm import tqdm, trange -from config import BASE_URL, BASE_STATIC_URL, PRODUCTS_ENDPOINT, PRODUCT_FROM_ID_ENDPOINT, URL_BOOK_TYPES_ENDPOINT, URL_BOOK_ENDPOINT +from config import BASE_URL, BASE_STATIC_URL, PRODUCTS_ENDPOINT, PRODUCT_FROM_ID_ENDPOINT, URL_BOOK_TYPES_ENDPOINT, URL_BOOK_ENDPOINT, AUTHOR_FROM_ID_ENDPOINT from user import User -error_message = 'Usage: main.py -e -p [-d -b -i -s -v -q]' +error_message = 'Usage: main.py -e -p [-d -b -i -sR -v -q]' # TODO: I should do a function that his only purpose is to request and return data @@ -235,14 +235,63 @@ def does_dir_exist(directory): print(e) sys.exit(2) +def get_author_info(author_id): + url = BASE_STATIC_URL + AUTHOR_FROM_ID_ENDPOINT.format(id=author_id) + r = requests.get(url) + rjson = r.json() + return rjson + + +def get_book_info(book_id): + url = BASE_STATIC_URL + PRODUCT_FROM_ID_ENDPOINT.format(id=book_id) + + r = requests.get(url) + rjson = r.json() + authors = [] + + + try: + for author in rjson.get('authors'): + authors.append(get_author_info(author).get('author')) + data = { + 'title': rjson.get('title'), + 'authors': authors, + 'isbn13': rjson.get('isbn13'), + 'description': rjson.get('oneLiner'), + 'pages': rjson.get('pages'), + 'releaseDate': rjson.get('publicationDate')[:10], + 'category': rjson.get('category'), + 'homepage': f"https://subscription.packtpub.com{rjson.get('readUrl')}" + } + except: + pass + + return data + +# TODO: Get link to Github repository where present (see book 9781789957754) def create_readme(path, book): filename = os.path.join(path, 'README.md') - with open(filename, 'w', encoding='utf-8') as file: - file.write('# ' + str(book['productName']) + '\n\n') - file.write('productId: ' + str(book['productId']) + '\n') - file.write('Release date: ' + str(book['releaseDate']) + '\n') - file.close() + try: + data = get_book_info(book['productId']) + + with open(filename, 'w', encoding='utf-8') as file: + file.write(f"# {str(data['title'])}\n") + file.write('\n') + file.write(f"- By {', '.join(data['authors'])}\n") + file.write(f"- Publication date: {data['releaseDate']}\n") + file.write(f"- ISBN: {data['isbn13']}\n") + file.write(f"- Pages: {data['pages']}\n") + file.write('\n') + file.write(data['description'] + '\n') + file.write('\n') + file.write(f"* [Book Home Page]({data['homepage']})\n") + for k, v in book['files'].items(): + file.write(f"* [{k.upper()}]({v})\n") + file.write('\n') + file.write('') + except Exception as e: + pass def get_opts_args(argv): @@ -358,17 +407,19 @@ def main(argv): # get the different file type of current book file_types = get_book_file_types(user, book['productId']) tqdm.write('Requested formats: ' + ','.join(book_file_types) + ' but only available: ' + ','.join(file_types)) + book_name = book['productName'].replace(' ', '_').replace('.', '_').replace(':', '_').replace('/','').replace('?','') + book['files'] = {} + if separate: + filepath = f'{root_directory}/{book_name}' + move_current_files(root_directory, book_name) + #if readme: + # create_readme(f'{filepath}', book) + else: + filepath = f'{root_directory}' for file_type in file_types: if file_type in book_file_types: # check if the file type entered is available by the current book - book_name = book['productName'].replace(' ', '_').replace('.', '_').replace(':', '_').replace('/','').replace('?','') - if separate: - filename = f'{root_directory}/{book_name}/{book_name}.{file_type}' - move_current_files(root_directory, book_name) - if readme: - create_readme(f'{root_directory}/{book_name}', book) - else: - filename = f'{root_directory}/{book_name}.{file_type}' - + filename = f'{filepath}/{book_name}.{file_type}' + book['files'][file_type] = f'{book_name}.{file_type}' # implied check for pdf, epub, mobi, also avoid name collision when both code and video are available. if os.path.exists(filename.replace('.code', '.code.zip').replace('.video', '.video.zip')): if verbose: @@ -383,6 +434,9 @@ def main(argv): except PermissionError as e: tqdm.write(repr(e)) tqdm.write('Skipping') + if separate and readme: + print (book) + create_readme(f'{filepath}', book) if __name__ == '__main__': diff --git a/requirements.txt b/requirements.txt index a1c50b9..ae08abc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,7 +7,7 @@ chardet==3.0.4 idna==2.8 idna-ssl==1.1.0 multidict==4.5.2 -requests==2.21.0 +requests==2.26.0 tqdm==4.30.0 typing-extensions==3.7.2 urllib3==1.26.5 From 01ee98f72ae2d9f406b1eaeb9255440a3682642d Mon Sep 17 00:00:00 2001 From: alv67 Date: Thu, 30 Sep 2021 00:51:23 +0200 Subject: [PATCH 13/15] Purged from debug prints --- main.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/main.py b/main.py index 2cb9a22..74040b1 100755 --- a/main.py +++ b/main.py @@ -412,8 +412,6 @@ def main(argv): if separate: filepath = f'{root_directory}/{book_name}' move_current_files(root_directory, book_name) - #if readme: - # create_readme(f'{filepath}', book) else: filepath = f'{root_directory}' for file_type in file_types: @@ -435,7 +433,6 @@ def main(argv): tqdm.write(repr(e)) tqdm.write('Skipping') if separate and readme: - print (book) create_readme(f'{filepath}', book) From 906e7284aae38abcc797f26f2701bf4c142035ec Mon Sep 17 00:00:00 2001 From: Alessandro Varesi Date: Thu, 28 Oct 2021 14:42:39 +0200 Subject: [PATCH 14/15] Corret login error due to new access procedure --- user.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/user.py b/user.py index 43e353a..2502752 100644 --- a/user.py +++ b/user.py @@ -39,6 +39,15 @@ def get_token(self): if r.status_code == 200: print("You are in!") return 'Bearer ' + r.json()['data']['access'] + + r = requests.post(url, + json={ + 'username': self.username, + 'password': self.password + }) + if r.status_code == 200: + print("You are in!") + return 'Bearer ' + r.json()['data']['access'] # except should happend when user and pass are incorrect print("Error login, check user and password") From 0d5e88c935f906e5060926acfe540fca6f28230c Mon Sep 17 00:00:00 2001 From: Alessandro Varesi Date: Tue, 19 Jul 2022 10:38:26 +0200 Subject: [PATCH 15/15] Entrypoint with -R option and only books --- entrypoint.sh | 5 +++-- main.py | 62 +++++++++++++++++++++++++-------------------------- 2 files changed, 34 insertions(+), 33 deletions(-) diff --git a/entrypoint.sh b/entrypoint.sh index eee87af..c06147e 100644 --- a/entrypoint.sh +++ b/entrypoint.sh @@ -1,7 +1,8 @@ pip install -r /app/requirements.txt if [[ -z "${IDS}" ]]; then - python /app/main.py -e $EMAIL -p $PASSWORD -d /app/book -b pdf,mobi,epub,code,video -s -v +# python /app/main.py -e $EMAIL -p $PASSWORD -d /app/book -b pdf,mobi,epub,code,video -s -v + python /app/main.py -e $EMAIL -p $PASSWORD -d /app/book -b pdf,mobi,epub -s -v -R else - python /app/main.py -e $EMAIL -p $PASSWORD -d /app/book -b pdf,mobi,epub,code,video -s -v -i $IDS + python /app/main.py -e $EMAIL -p $PASSWORD -d /app/book -b pdf,mobi,epub,code,video -s -v -R -i $IDS fi \ No newline at end of file diff --git a/main.py b/main.py index 74040b1..8b679de 100755 --- a/main.py +++ b/main.py @@ -252,20 +252,20 @@ def get_book_info(book_id): try: - for author in rjson.get('authors'): - authors.append(get_author_info(author).get('author')) - data = { - 'title': rjson.get('title'), - 'authors': authors, - 'isbn13': rjson.get('isbn13'), - 'description': rjson.get('oneLiner'), - 'pages': rjson.get('pages'), - 'releaseDate': rjson.get('publicationDate')[:10], - 'category': rjson.get('category'), - 'homepage': f"https://subscription.packtpub.com{rjson.get('readUrl')}" - } + for author in rjson.get('authors'): + authors.append(get_author_info(author).get('author')) + data = { + 'title': rjson.get('title'), + 'authors': authors, + 'isbn13': rjson.get('isbn13'), + 'description': rjson.get('oneLiner'), + 'pages': rjson.get('pages'), + 'releaseDate': rjson.get('publicationDate')[:10], + 'category': rjson.get('category'), + 'homepage': f"https://subscription.packtpub.com{rjson.get('readUrl')}" + } except: - pass + pass return data @@ -273,25 +273,25 @@ def get_book_info(book_id): def create_readme(path, book): filename = os.path.join(path, 'README.md') try: - data = get_book_info(book['productId']) - - with open(filename, 'w', encoding='utf-8') as file: - file.write(f"# {str(data['title'])}\n") - file.write('\n') - file.write(f"- By {', '.join(data['authors'])}\n") - file.write(f"- Publication date: {data['releaseDate']}\n") - file.write(f"- ISBN: {data['isbn13']}\n") - file.write(f"- Pages: {data['pages']}\n") - file.write('\n') - file.write(data['description'] + '\n') - file.write('\n') - file.write(f"* [Book Home Page]({data['homepage']})\n") - for k, v in book['files'].items(): - file.write(f"* [{k.upper()}]({v})\n") - file.write('\n') - file.write('') + data = get_book_info(book['productId']) + + with open(filename, 'w', encoding='utf-8') as file: + file.write(f"# {str(data['title'])}\n") + file.write('\n') + file.write(f"- By {', '.join(data['authors'])}\n") + file.write(f"- Publication date: {data['releaseDate']}\n") + file.write(f"- ISBN: {data['isbn13']}\n") + file.write(f"- Pages: {data['pages']}\n") + file.write('\n') + file.write(data['description'] + '\n') + file.write('\n') + file.write(f"* [Book Home Page]({data['homepage']})\n") + for k, v in book['files'].items(): + file.write(f"* [{k.upper()}]({v})\n") + file.write('\n') + file.write('') except Exception as e: - pass + pass def get_opts_args(argv):