Skip to content

Commit 2c4fc2a

Browse files
committed
feat: Support for basic auth in git remotes
1 parent 421b074 commit 2c4fc2a

File tree

3 files changed

+265
-24
lines changed

3 files changed

+265
-24
lines changed

airflow_dbt_python/hooks/git.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -187,17 +187,23 @@ def get_git_client_path(self, url: URL) -> Tuple[GitClients, str]:
187187
path = f"{url.netloc.split(':')[1]}/{str(url.path)}"
188188

189189
elif url.scheme in ("http", "https"):
190-
parsedurl = url._parsed
191-
netloc = parsedurl.hostname
190+
base_url = url.hostname
192191

193-
if parsedurl.port:
194-
netloc = "{}:{}".format(netloc, parsedurl.port)
195-
if parsedurl.username:
196-
netloc = "{}@{}".format(parsedurl.username, netloc)
192+
if url.port:
193+
base_url = f"{base_url}:{url.port}"
197194

198-
url._replace(netloc=netloc)
195+
auth_params = {}
196+
if url.authentication.username and url.authentication.password:
197+
auth_params = {
198+
"username": url.authentication.username,
199+
"password": url.authentication.password,
200+
}
201+
base_url = f"{url.scheme}://{base_url}"
202+
elif url.authentication.username:
203+
base_url = f"{url.scheme}://{url.authentication.username}@{base_url}"
204+
205+
client = HttpGitClient(base_url, **auth_params)
199206

200-
client = HttpGitClient(url.netloc)
201207
path = str(url.path)
202208

203209
else:

airflow_dbt_python/hooks/remote.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,11 @@ def download_dbt_project(self, source: URLLike, destination: URLLike) -> Path:
6262
Returns:
6363
The destination Path.
6464
"""
65-
self.log.info("Downloading dbt project from %s to %s", source, destination)
66-
6765
source_url = URL(source)
6866
destination_url = URL(destination)
6967

68+
self.log.info("Downloading dbt project from %s to %s", source, destination)
69+
7070
if source_url.is_archive():
7171
destination_url = destination_url / source_url.name
7272

@@ -91,11 +91,11 @@ def download_dbt_profiles(self, source: URLLike, destination: URLLike) -> Path:
9191
Returns:
9292
The destination Path.
9393
"""
94-
self.log.info("Downloading dbt profiles from %s to %s", source, destination)
95-
9694
source_url = URL(source)
9795
destination_url = URL(destination)
9896

97+
self.log.info("Downloading dbt profiles from %s to %s", source, destination)
98+
9999
if source_url.name != "profiles.yml":
100100
source_url = source_url / "profiles.yml"
101101

@@ -122,11 +122,11 @@ def upload_dbt_project(
122122
delete_before: Flag to indicate wheter to clear any existing files before
123123
uploading the dbt project.
124124
"""
125-
self.log.info("Uploading dbt project from %s to %s", source, destination)
126-
127125
source_url = URL(source)
128126
destination_url = URL(destination)
129127

128+
self.log.info("Uploading dbt project from %s to %s", source, destination)
129+
130130
if destination_url.is_archive() and source_url.is_dir():
131131
zip_url = source_url / destination_url.name
132132
source_url.archive(zip_url)

tests/hooks/test_git_hook.py

Lines changed: 245 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""Unit test module for DbtGitRemoteHook."""
22
import multiprocessing
3+
import os
34
import shutil
5+
from urllib.parse import quote
46

57
import pytest
68
from dulwich.repo import Repo
@@ -9,27 +11,176 @@
911
from airflow_dbt_python.hooks.git import DbtGitRemoteHook
1012
from airflow_dbt_python.utils.url import URL
1113

12-
JAFFLE_SHOP_REPO = "dbt-labs/jaffle_shop"
13-
PLATFORM = "github.com"
14+
JAFFLE_SHOP = os.getenv("GIT_TEST_REPO", "tomasfarias/jaffle_shop")
15+
JAFFLE_SHOP_PRIVATE = os.getenv(
16+
"GIT_PRIVATE_TEST_REPO", "tomasfarias/jaffle_shop_private"
17+
)
18+
GITHUB = "github.com"
19+
GITLAB = "gitlab.com"
20+
21+
22+
@pytest.mark.parametrize(
23+
"repo_url",
24+
(
25+
f"https://{GITHUB}/{JAFFLE_SHOP}",
26+
f"http://{GITHUB}/{JAFFLE_SHOP}",
27+
),
28+
)
29+
def test_download_dbt_project_from_http_public_github_repo(
30+
tmp_path, repo_url, assert_dir_contents
31+
):
32+
"""Test downloading dbt project from GitHub public fork of dbt-lab's jaffle-shop.
33+
34+
In this test we use an HTTP/HTTPS connection to access GitHub. No credentials are
35+
required as the test repo is public.
36+
"""
37+
remote = DbtGitRemoteHook()
38+
source = URL(repo_url)
39+
local_repo_path = remote.download_dbt_project(source, tmp_path)
40+
41+
expected = [
42+
URL(local_repo_path / "dbt_project.yml"),
43+
URL(local_repo_path / "models" / "customers.sql"),
44+
URL(local_repo_path / "models" / "orders.sql"),
45+
URL(local_repo_path / "seeds" / "raw_customers.csv"),
46+
URL(local_repo_path / "seeds" / "raw_orders.csv"),
47+
]
48+
49+
assert local_repo_path.exists()
50+
51+
assert_dir_contents(local_repo_path, expected, exact=False)
1452

1553

1654
@pytest.mark.xfail(
1755
strict=False,
18-
reason=(
19-
"Attempting to clone from GitHub may fail for missing keys, or other reasons."
56+
reason="Attempting to clone from GitHub may fail for missing keys.",
57+
)
58+
@pytest.mark.parametrize(
59+
"repo_url",
60+
(
61+
f"ssh://{GITHUB}:{JAFFLE_SHOP}",
62+
f"git+ssh://{GITHUB}:{JAFFLE_SHOP}",
2063
),
2164
)
65+
def test_download_dbt_project_from_ssh_public_github_repo(
66+
tmp_path, repo_url, assert_dir_contents
67+
):
68+
"""Test downloading dbt project from GitHub public fork of dbt-lab's jaffle-shop.
69+
70+
In this test we use an SSH connection to access GitHub. Currently, this requires an
71+
SSH key to be setup in the host, so the tests are flaky by design. Future tests will
72+
rely on Airflow connections and test SSH keys instead.
73+
"""
74+
remote = DbtGitRemoteHook()
75+
source = URL(repo_url)
76+
local_repo_path = remote.download_dbt_project(source, tmp_path)
77+
78+
expected = [
79+
URL(local_repo_path / "dbt_project.yml"),
80+
URL(local_repo_path / "models" / "customers.sql"),
81+
URL(local_repo_path / "models" / "orders.sql"),
82+
URL(local_repo_path / "seeds" / "raw_customers.csv"),
83+
URL(local_repo_path / "seeds" / "raw_orders.csv"),
84+
]
85+
86+
assert local_repo_path.exists()
87+
88+
assert_dir_contents(local_repo_path, expected, exact=False)
89+
90+
91+
@pytest.mark.parametrize(
92+
"repo_url",
93+
(
94+
f"https://{GITLAB}/{JAFFLE_SHOP}",
95+
f"http://{GITLAB}/{JAFFLE_SHOP}",
96+
),
97+
)
98+
def test_download_dbt_project_from_http_public_gitlab_repo(
99+
tmp_path, repo_url, assert_dir_contents
100+
):
101+
"""Test downloading dbt project from GitLab public fork of dbt-lab's jaffle-shop.
102+
103+
In this test we use an HTTP/HTTPS connection to access GitLab. No credentials are
104+
required as the test repo is public.
105+
"""
106+
remote = DbtGitRemoteHook()
107+
source = URL(repo_url)
108+
local_repo_path = remote.download_dbt_project(source, tmp_path)
109+
110+
expected = [
111+
URL(local_repo_path / "dbt_project.yml"),
112+
URL(local_repo_path / "models" / "customers.sql"),
113+
URL(local_repo_path / "models" / "orders.sql"),
114+
URL(local_repo_path / "seeds" / "raw_customers.csv"),
115+
URL(local_repo_path / "seeds" / "raw_orders.csv"),
116+
]
117+
118+
assert local_repo_path.exists()
119+
120+
assert_dir_contents(local_repo_path, expected, exact=False)
121+
122+
123+
@pytest.mark.skipif(
124+
"GITHUB_READ_TOKEN" not in os.environ,
125+
reason="Missing Github read token in environment.",
126+
)
127+
@pytest.mark.parametrize(
128+
"repo_url",
129+
(f"https://{{username}}:{{token}}@{GITHUB}/{JAFFLE_SHOP_PRIVATE}",),
130+
)
131+
def test_download_dbt_project_from_https_private_github_repo_using_token(
132+
tmp_path, repo_url, assert_dir_contents
133+
):
134+
"""Test downloading dbt project from Github private fork of dbt-lab's jaffle-shop.
135+
136+
In this test we use an HTTPS connection to access Github. As the repo is
137+
private, we need to authenticate. In this test, we are authenticating with a Github
138+
Personal Access Token. Said token will be fetched from the GITHUB_READ_TOKEN env
139+
variable. If missing, this test is skipped.
140+
141+
The user the token represents should have access to the test Github repo. We
142+
have no way to check this though. Modify the JAFFLE_SHOP_PRIVATE variable with your
143+
own private fork.
144+
"""
145+
username, token = os.environ["GITHUB_USERNAME"], os.environ["GITHUB_READ_TOKEN"]
146+
147+
remote = DbtGitRemoteHook()
148+
source = URL(repo_url.format(username=username, token=token))
149+
local_repo_path = remote.download_dbt_project(source, tmp_path)
150+
151+
expected = [
152+
URL(local_repo_path / "dbt_project.yml"),
153+
URL(local_repo_path / "models" / "customers.sql"),
154+
URL(local_repo_path / "models" / "orders.sql"),
155+
URL(local_repo_path / "seeds" / "raw_customers.csv"),
156+
URL(local_repo_path / "seeds" / "raw_orders.csv"),
157+
]
158+
159+
assert local_repo_path.exists()
160+
161+
assert_dir_contents(local_repo_path, expected, exact=False)
162+
163+
164+
@pytest.mark.xfail(
165+
strict=False,
166+
reason="Attempting to clone from GitLab may fail for missing keys.",
167+
)
22168
@pytest.mark.parametrize(
23169
"repo_url",
24170
(
25-
f"ssh://{PLATFORM}:{JAFFLE_SHOP_REPO}",
26-
f"git+ssh://{PLATFORM}:{JAFFLE_SHOP_REPO}",
27-
f"https://{PLATFORM}/{JAFFLE_SHOP_REPO}",
28-
f"http://{PLATFORM}/{JAFFLE_SHOP_REPO}",
171+
f"ssh://{GITLAB}:{JAFFLE_SHOP}",
172+
f"git+ssh://{GITLAB}:{JAFFLE_SHOP}",
29173
),
30174
)
31-
def test_download_dbt_project(tmp_path, repo_url, assert_dir_contents):
32-
"""Test downloading dbt project from dbt-lab's very own jaffle-shop."""
175+
def test_download_dbt_project_from_ssh_public_gitlab_repo(
176+
tmp_path, repo_url, assert_dir_contents
177+
):
178+
"""Test downloading dbt project from GitLab public fork of dbt-lab's jaffle-shop.
179+
180+
In this test we use an SSH connection to access GitLab. Currently, this requires an
181+
SSH key to be setup in the host, so the tests are flaky by design. Future tests will
182+
rely on Airflow connections and test SSH keys instead.
183+
"""
33184
remote = DbtGitRemoteHook()
34185
source = URL(repo_url)
35186
local_repo_path = remote.download_dbt_project(source, tmp_path)
@@ -47,6 +198,90 @@ def test_download_dbt_project(tmp_path, repo_url, assert_dir_contents):
47198
assert_dir_contents(local_repo_path, expected, exact=False)
48199

49200

201+
@pytest.mark.skipif(
202+
"GITLAB_READ_TOKEN" not in os.environ,
203+
reason="Missing GitLab read token in environment.",
204+
)
205+
@pytest.mark.parametrize(
206+
"repo_url",
207+
(f"https://oauth2:{{token}}@{GITLAB}/{JAFFLE_SHOP_PRIVATE}",),
208+
)
209+
def test_download_dbt_project_from_https_private_gitlab_repo_using_token(
210+
tmp_path, repo_url, assert_dir_contents
211+
):
212+
"""Test downloading dbt project from GitLab private fork of dbt-lab's jaffle-shop.
213+
214+
In this test we use an HTTPS connection to access GitLab. As the repo is
215+
private, we need to authenticate. In this test, we are authenticating with a GitLab
216+
Personal Access Token. Said token will be fetched from the GITLAB_READ_TOKEN env
217+
variable. If missing, this test is skipped.
218+
219+
The user the token represents should have access to the test GitLab repo. We
220+
have no way to check this though. Modify the JAFFLE_SHOP_PRIVATE variable with your
221+
own private fork.
222+
"""
223+
token = os.environ["GITLAB_READ_TOKEN"]
224+
225+
remote = DbtGitRemoteHook()
226+
source = URL(repo_url.format(token=token))
227+
local_repo_path = remote.download_dbt_project(source, tmp_path)
228+
229+
expected = [
230+
URL(local_repo_path / "dbt_project.yml"),
231+
URL(local_repo_path / "models" / "customers.sql"),
232+
URL(local_repo_path / "models" / "orders.sql"),
233+
URL(local_repo_path / "seeds" / "raw_customers.csv"),
234+
URL(local_repo_path / "seeds" / "raw_orders.csv"),
235+
]
236+
237+
assert local_repo_path.exists()
238+
239+
assert_dir_contents(local_repo_path, expected, exact=False)
240+
241+
242+
@pytest.mark.skipif(
243+
any(
244+
env_var not in os.environ for env_var in ("GITLAB_USERNAME", "GITLAB_PASSWORD")
245+
),
246+
reason="Missing GitLab credentials in environment.",
247+
)
248+
@pytest.mark.parametrize(
249+
"repo_url",
250+
(f"https://{{username}}:{{password}}@{GITLAB}/{JAFFLE_SHOP_PRIVATE}",),
251+
)
252+
def test_download_dbt_project_from_https_private_gitlab_repo_using_credentials(
253+
tmp_path, repo_url, assert_dir_contents
254+
):
255+
"""Test downloading dbt project from GitLab private fork of dbt-lab's jaffle-shop.
256+
257+
In this test we use an HTTPS connection to access GitLab. As the repo is
258+
private, we need to authenticate. In this test, we are authenticating with GitLab
259+
credentials (username and password). Said credentials will be fetched from the
260+
GITLAB_USERNAME and GITLAB_PASSWORD env variables. If missing, this test is skipped.
261+
262+
The user the credentials represent should have access to the test GitLab repo. We
263+
have no way to check this though. Modify the JAFFLE_SHOP_PRIVATE variable with your
264+
own private fork.
265+
"""
266+
username, password = os.environ["GITLAB_USERNAME"], os.environ["GITLAB_PASSWORD"]
267+
268+
remote = DbtGitRemoteHook()
269+
source = URL(repo_url.format(username=username, password=password))
270+
local_repo_path = remote.download_dbt_project(source, tmp_path)
271+
272+
expected = [
273+
URL(local_repo_path / "dbt_project.yml"),
274+
URL(local_repo_path / "models" / "customers.sql"),
275+
URL(local_repo_path / "models" / "orders.sql"),
276+
URL(local_repo_path / "seeds" / "raw_customers.csv"),
277+
URL(local_repo_path / "seeds" / "raw_orders.csv"),
278+
]
279+
280+
assert local_repo_path.exists()
281+
282+
assert_dir_contents(local_repo_path, expected, exact=False)
283+
284+
50285
@pytest.fixture
51286
def repo_name():
52287
"""A testing local git repo name."""

0 commit comments

Comments
 (0)