Skip to content

Commit bc9822f

Browse files
committed
feat: Support downloading dbt projects from zip files
1 parent ed740ee commit bc9822f

File tree

3 files changed

+763
-550
lines changed

3 files changed

+763
-550
lines changed

airflow_dbt_python/hooks/s3.py

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from pathlib import Path
44
from typing import Optional
5+
from zipfile import ZipFile
56

67
from airflow.hooks.S3_hook import S3Hook
78

@@ -43,47 +44,77 @@ def get_dbt_profiles(
4344
else:
4445
local_profiles_file = Path(profiles_dir) / "profiles.yml"
4546

46-
self.log.info("Saving profiles file to: %s", local_profiles_file)
47-
with open(local_profiles_file, "wb+") as f:
48-
s3_object.download_fileobj(f)
47+
self.download_one_s3_object(local_profiles_file, s3_object)
4948
return local_profiles_file
5049

50+
def download_one_s3_object(self, target: Path, s3_object):
51+
"""Download a single s3 object."""
52+
self.log.info("Saving profiles file to: %s", target)
53+
54+
with open(target, "wb+") as f:
55+
s3_object.download_fileobj(f)
56+
5157
def get_dbt_project(
5258
self, s3_project_url: str, project_dir: Optional[str] = None
5359
) -> Path:
5460
"""Fetch all dbt project files from S3.
5561
5662
Fetches the dbt project files from the directory given by s3_project_url
57-
and pulls them to project_dir.
63+
and pulls them to project_dir. However, if the URL points to a zip file,
64+
we assume it contains all the project files, and only download and unzip that
65+
instead.
5866
5967
Arguments:
60-
s3_project_url: An S3 URL to a directory containing the dbt project files.
61-
project_dir: An optional directory to download the S3 project files into.
62-
If not provided, one will be created using the S3 URL.
68+
s3_project_url: An S3 URL to a directory containing the dbt project files
69+
or a zip file containing all project files.
70+
project_dir: An optional directory to download/unzip the S3 project files
71+
into. If not provided, one will be created using the S3 URL.
6372
6473
Returns:
6574
A Path to the local directory containing the dbt project files.
6675
"""
67-
self.log.info("Downloading dbt project file from: %s", s3_project_url)
76+
self.log.info("Downloading dbt project files from: %s", s3_project_url)
6877
bucket_name, key_prefix = self.parse_s3_url(s3_project_url)
69-
if not key_prefix.endswith("/"):
70-
key_prefix += "/"
71-
s3_object_keys = self.list_keys(bucket_name=bucket_name, prefix=f"{key_prefix}")
7278

7379
if project_dir is None:
7480
local_project_dir = Path(bucket_name) / key_prefix
7581
else:
7682
local_project_dir = Path(project_dir)
7783

78-
for s3_object_key in s3_object_keys:
84+
if key_prefix.endswith(".zip"):
85+
s3_object = self.get_key(key=key_prefix, bucket_name=bucket_name)
86+
target = local_project_dir / "dbt_project.zip"
87+
self.download_one_s3_object(target, s3_object)
88+
89+
with ZipFile(target, "r") as zf:
90+
zf.extractall(local_project_dir)
91+
92+
target.unlink()
93+
94+
else:
95+
if not key_prefix.endswith("/"):
96+
key_prefix += "/"
97+
s3_object_keys = self.list_keys(
98+
bucket_name=bucket_name, prefix=f"{key_prefix}"
99+
)
100+
101+
self.download_many_s3_keys(
102+
bucket_name, s3_object_keys, local_project_dir, key_prefix
103+
)
104+
105+
return local_project_dir
106+
107+
def download_many_s3_keys(
108+
self, bucket_name: str, s3_keys: list[str], target_dir: Path, prefix: str
109+
):
110+
"""Download multiple s3 keys."""
111+
print(s3_keys)
112+
for s3_object_key in s3_keys:
79113
s3_object = self.get_key(key=s3_object_key, bucket_name=bucket_name)
80-
path_file = Path(s3_object_key).relative_to(f"{key_prefix}")
81-
local_project_file = local_project_dir / path_file
114+
path_file = Path(s3_object_key).relative_to(prefix)
115+
local_project_file = target_dir / path_file
82116
local_project_file.parent.mkdir(parents=True, exist_ok=True)
83117

84118
self.log.info("Saving %s to: %s", s3_object_key, local_project_file)
85119

86-
with open(local_project_file, "wb+") as f:
87-
s3_object.download_fileobj(f)
88-
89-
return local_project_dir
120+
self.download_one_s3_object(local_project_file, s3_object)

0 commit comments

Comments
 (0)