From 2dc09aa7d09c6feeb44c6bb746c482304ac2f8da Mon Sep 17 00:00:00 2001 From: TrellixVulnTeam Date: Sat, 26 Nov 2022 14:07:53 +0000 Subject: [PATCH] Adding tarfile member sanitization to extractall() --- catalyst/data/bundles/base.py | 24 +++++++++++++++++++++++- catalyst/exchange/utils/bundle_utils.py | 21 ++++++++++++++++++++- catalyst/marketplace/utils/path_utils.py | 21 ++++++++++++++++++++- tests/test_examples.py | 24 +++++++++++++++++++++++- 4 files changed, 86 insertions(+), 4 deletions(-) diff --git a/catalyst/data/bundles/base.py b/catalyst/data/bundles/base.py index c37000039..0d56b7e25 100644 --- a/catalyst/data/bundles/base.py +++ b/catalyst/data/bundles/base.py @@ -230,7 +230,29 @@ def _download_and_untar(self, show_progress, output_dir): # File transfer has completed, untar the bundle to the appropriate # data directory. with tarfile.open('r', fileobj=data) as tar: - tar.extractall(output_dir) + + import os + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, output_dir) def _fetch_metadata_frame(self, api_key, diff --git a/catalyst/exchange/utils/bundle_utils.py b/catalyst/exchange/utils/bundle_utils.py index f4fc06254..b50bca760 100644 --- a/catalyst/exchange/utils/bundle_utils.py +++ b/catalyst/exchange/utils/bundle_utils.py @@ -46,7 +46,26 @@ def get_bcolz_chunk(exchange_name, symbol, data_frequency, period): bytes = download_without_progress(url) with tarfile.open('r', fileobj=bytes) as tar: - tar.extractall(path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, path) return path diff --git a/catalyst/marketplace/utils/path_utils.py b/catalyst/marketplace/utils/path_utils.py index 9e7d9c2e8..3bc4aa342 100644 --- a/catalyst/marketplace/utils/path_utils.py +++ b/catalyst/marketplace/utils/path_utils.py @@ -102,7 +102,26 @@ def extract_bundle(tar_filename): """ target_path = tar_filename.replace('.tar.gz', '') with tarfile.open(tar_filename, 'r') as tar: - tar.extractall(target_path) + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, target_path) return target_path diff --git a/tests/test_examples.py b/tests/test_examples.py index 85abb5517..318fe88c4 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -45,7 +45,29 @@ def init_class_fixtures(cls): cls.add_class_callback(partial(unregister, 'test')) with tarfile.open(test_resource_path('example_data.tar.gz')) as tar: - tar.extractall(cls.tmpdir.path) + + import os + + def is_within_directory(directory, target): + + abs_directory = os.path.abspath(directory) + abs_target = os.path.abspath(target) + + prefix = os.path.commonprefix([abs_directory, abs_target]) + + return prefix == abs_directory + + def safe_extract(tar, path=".", members=None, *, numeric_owner=False): + + for member in tar.getmembers(): + member_path = os.path.join(path, member.name) + if not is_within_directory(path, member_path): + raise Exception("Attempted Path Traversal in Tar File") + + tar.extractall(path, members, numeric_owner=numeric_owner) + + + safe_extract(tar, cls.tmpdir.path) cls.expected_perf = dataframe_cache( cls.tmpdir.getpath(