11"""Unit test module for DbtGitRemoteHook."""
22import multiprocessing
3+ import os
34import shutil
5+ from urllib .parse import quote
46
57import pytest
68from dulwich .repo import Repo
911from airflow_dbt_python .hooks .git import DbtGitRemoteHook
1012from airflow_dbt_python .utils .url import URL
1113
12- JAFFLE_SHOP_REPO = "dbt-labs/jaffle_shop"
13- PLATFORM = "github.com"
14+ JAFFLE_SHOP = os .getenv ("GIT_TEST_REPO" , "tomasfarias/jaffle_shop" )
15+ JAFFLE_SHOP_PRIVATE = os .getenv (
16+ "GIT_PRIVATE_TEST_REPO" , "tomasfarias/jaffle_shop_private"
17+ )
18+ GITHUB = "github.com"
19+ GITLAB = "gitlab.com"
20+
21+
22+ @pytest .mark .parametrize (
23+ "repo_url" ,
24+ (
25+ f"https://{ GITHUB } /{ JAFFLE_SHOP } " ,
26+ f"http://{ GITHUB } /{ JAFFLE_SHOP } " ,
27+ ),
28+ )
29+ def test_download_dbt_project_from_http_public_github_repo (
30+ tmp_path , repo_url , assert_dir_contents
31+ ):
32+ """Test downloading dbt project from GitHub public fork of dbt-lab's jaffle-shop.
33+
34+ In this test we use an HTTP/HTTPS connection to access GitHub. No credentials are
35+ required as the test repo is public.
36+ """
37+ remote = DbtGitRemoteHook ()
38+ source = URL (repo_url )
39+ local_repo_path = remote .download_dbt_project (source , tmp_path )
40+
41+ expected = [
42+ URL (local_repo_path / "dbt_project.yml" ),
43+ URL (local_repo_path / "models" / "customers.sql" ),
44+ URL (local_repo_path / "models" / "orders.sql" ),
45+ URL (local_repo_path / "seeds" / "raw_customers.csv" ),
46+ URL (local_repo_path / "seeds" / "raw_orders.csv" ),
47+ ]
48+
49+ assert local_repo_path .exists ()
50+
51+ assert_dir_contents (local_repo_path , expected , exact = False )
1452
1553
1654@pytest .mark .xfail (
1755 strict = False ,
18- reason = (
19- "Attempting to clone from GitHub may fail for missing keys, or other reasons."
56+ reason = "Attempting to clone from GitHub may fail for missing keys." ,
57+ )
58+ @pytest .mark .parametrize (
59+ "repo_url" ,
60+ (
61+ f"ssh://{ GITHUB } :{ JAFFLE_SHOP } " ,
62+ f"git+ssh://{ GITHUB } :{ JAFFLE_SHOP } " ,
2063 ),
2164)
65+ def test_download_dbt_project_from_ssh_public_github_repo (
66+ tmp_path , repo_url , assert_dir_contents
67+ ):
68+ """Test downloading dbt project from GitHub public fork of dbt-lab's jaffle-shop.
69+
70+ In this test we use an SSH connection to access GitHub. Currently, this requires an
71+ SSH key to be setup in the host, so the tests are flaky by design. Future tests will
72+ rely on Airflow connections and test SSH keys instead.
73+ """
74+ remote = DbtGitRemoteHook ()
75+ source = URL (repo_url )
76+ local_repo_path = remote .download_dbt_project (source , tmp_path )
77+
78+ expected = [
79+ URL (local_repo_path / "dbt_project.yml" ),
80+ URL (local_repo_path / "models" / "customers.sql" ),
81+ URL (local_repo_path / "models" / "orders.sql" ),
82+ URL (local_repo_path / "seeds" / "raw_customers.csv" ),
83+ URL (local_repo_path / "seeds" / "raw_orders.csv" ),
84+ ]
85+
86+ assert local_repo_path .exists ()
87+
88+ assert_dir_contents (local_repo_path , expected , exact = False )
89+
90+
91+ @pytest .mark .parametrize (
92+ "repo_url" ,
93+ (
94+ f"https://{ GITLAB } /{ JAFFLE_SHOP } " ,
95+ f"http://{ GITLAB } /{ JAFFLE_SHOP } " ,
96+ ),
97+ )
98+ def test_download_dbt_project_from_http_public_gitlab_repo (
99+ tmp_path , repo_url , assert_dir_contents
100+ ):
101+ """Test downloading dbt project from GitLab public fork of dbt-lab's jaffle-shop.
102+
103+ In this test we use an HTTP/HTTPS connection to access GitLab. No credentials are
104+ required as the test repo is public.
105+ """
106+ remote = DbtGitRemoteHook ()
107+ source = URL (repo_url )
108+ local_repo_path = remote .download_dbt_project (source , tmp_path )
109+
110+ expected = [
111+ URL (local_repo_path / "dbt_project.yml" ),
112+ URL (local_repo_path / "models" / "customers.sql" ),
113+ URL (local_repo_path / "models" / "orders.sql" ),
114+ URL (local_repo_path / "seeds" / "raw_customers.csv" ),
115+ URL (local_repo_path / "seeds" / "raw_orders.csv" ),
116+ ]
117+
118+ assert local_repo_path .exists ()
119+
120+ assert_dir_contents (local_repo_path , expected , exact = False )
121+
122+
123+ @pytest .mark .skipif (
124+ "GITHUB_READ_TOKEN" not in os .environ ,
125+ reason = "Missing Github read token in environment." ,
126+ )
127+ @pytest .mark .parametrize (
128+ "repo_url" ,
129+ (f"https://{{username}}:{{token}}@{ GITHUB } /{ JAFFLE_SHOP_PRIVATE } " ,),
130+ )
131+ def test_download_dbt_project_from_https_private_github_repo_using_token (
132+ tmp_path , repo_url , assert_dir_contents
133+ ):
134+ """Test downloading dbt project from Github private fork of dbt-lab's jaffle-shop.
135+
136+ In this test we use an HTTPS connection to access Github. As the repo is
137+ private, we need to authenticate. In this test, we are authenticating with a Github
138+ Personal Access Token. Said token will be fetched from the GITHUB_READ_TOKEN env
139+ variable. If missing, this test is skipped.
140+
141+ The user the token represents should have access to the test Github repo. We
142+ have no way to check this though. Modify the JAFFLE_SHOP_PRIVATE variable with your
143+ own private fork.
144+ """
145+ username , token = os .environ ["GITHUB_USERNAME" ], os .environ ["GITHUB_READ_TOKEN" ]
146+
147+ remote = DbtGitRemoteHook ()
148+ source = URL (repo_url .format (username = username , token = token ))
149+ local_repo_path = remote .download_dbt_project (source , tmp_path )
150+
151+ expected = [
152+ URL (local_repo_path / "dbt_project.yml" ),
153+ URL (local_repo_path / "models" / "customers.sql" ),
154+ URL (local_repo_path / "models" / "orders.sql" ),
155+ URL (local_repo_path / "seeds" / "raw_customers.csv" ),
156+ URL (local_repo_path / "seeds" / "raw_orders.csv" ),
157+ ]
158+
159+ assert local_repo_path .exists ()
160+
161+ assert_dir_contents (local_repo_path , expected , exact = False )
162+
163+
164+ @pytest .mark .xfail (
165+ strict = False ,
166+ reason = "Attempting to clone from GitLab may fail for missing keys." ,
167+ )
22168@pytest .mark .parametrize (
23169 "repo_url" ,
24170 (
25- f"ssh://{ PLATFORM } :{ JAFFLE_SHOP_REPO } " ,
26- f"git+ssh://{ PLATFORM } :{ JAFFLE_SHOP_REPO } " ,
27- f"https://{ PLATFORM } /{ JAFFLE_SHOP_REPO } " ,
28- f"http://{ PLATFORM } /{ JAFFLE_SHOP_REPO } " ,
171+ f"ssh://{ GITLAB } :{ JAFFLE_SHOP } " ,
172+ f"git+ssh://{ GITLAB } :{ JAFFLE_SHOP } " ,
29173 ),
30174)
31- def test_download_dbt_project (tmp_path , repo_url , assert_dir_contents ):
32- """Test downloading dbt project from dbt-lab's very own jaffle-shop."""
175+ def test_download_dbt_project_from_ssh_public_gitlab_repo (
176+ tmp_path , repo_url , assert_dir_contents
177+ ):
178+ """Test downloading dbt project from GitLab public fork of dbt-lab's jaffle-shop.
179+
180+ In this test we use an SSH connection to access GitLab. Currently, this requires an
181+ SSH key to be setup in the host, so the tests are flaky by design. Future tests will
182+ rely on Airflow connections and test SSH keys instead.
183+ """
33184 remote = DbtGitRemoteHook ()
34185 source = URL (repo_url )
35186 local_repo_path = remote .download_dbt_project (source , tmp_path )
@@ -47,6 +198,90 @@ def test_download_dbt_project(tmp_path, repo_url, assert_dir_contents):
47198 assert_dir_contents (local_repo_path , expected , exact = False )
48199
49200
201+ @pytest .mark .skipif (
202+ "GITLAB_READ_TOKEN" not in os .environ ,
203+ reason = "Missing GitLab read token in environment." ,
204+ )
205+ @pytest .mark .parametrize (
206+ "repo_url" ,
207+ (f"https://oauth2:{{token}}@{ GITLAB } /{ JAFFLE_SHOP_PRIVATE } " ,),
208+ )
209+ def test_download_dbt_project_from_https_private_gitlab_repo_using_token (
210+ tmp_path , repo_url , assert_dir_contents
211+ ):
212+ """Test downloading dbt project from GitLab private fork of dbt-lab's jaffle-shop.
213+
214+ In this test we use an HTTPS connection to access GitLab. As the repo is
215+ private, we need to authenticate. In this test, we are authenticating with a GitLab
216+ Personal Access Token. Said token will be fetched from the GITLAB_READ_TOKEN env
217+ variable. If missing, this test is skipped.
218+
219+ The user the token represents should have access to the test GitLab repo. We
220+ have no way to check this though. Modify the JAFFLE_SHOP_PRIVATE variable with your
221+ own private fork.
222+ """
223+ token = os .environ ["GITLAB_READ_TOKEN" ]
224+
225+ remote = DbtGitRemoteHook ()
226+ source = URL (repo_url .format (token = token ))
227+ local_repo_path = remote .download_dbt_project (source , tmp_path )
228+
229+ expected = [
230+ URL (local_repo_path / "dbt_project.yml" ),
231+ URL (local_repo_path / "models" / "customers.sql" ),
232+ URL (local_repo_path / "models" / "orders.sql" ),
233+ URL (local_repo_path / "seeds" / "raw_customers.csv" ),
234+ URL (local_repo_path / "seeds" / "raw_orders.csv" ),
235+ ]
236+
237+ assert local_repo_path .exists ()
238+
239+ assert_dir_contents (local_repo_path , expected , exact = False )
240+
241+
242+ @pytest .mark .skipif (
243+ any (
244+ env_var not in os .environ for env_var in ("GITLAB_USERNAME" , "GITLAB_PASSWORD" )
245+ ),
246+ reason = "Missing GitLab credentials in environment." ,
247+ )
248+ @pytest .mark .parametrize (
249+ "repo_url" ,
250+ (f"https://{{username}}:{{password}}@{ GITLAB } /{ JAFFLE_SHOP_PRIVATE } " ,),
251+ )
252+ def test_download_dbt_project_from_https_private_gitlab_repo_using_credentials (
253+ tmp_path , repo_url , assert_dir_contents
254+ ):
255+ """Test downloading dbt project from GitLab private fork of dbt-lab's jaffle-shop.
256+
257+ In this test we use an HTTPS connection to access GitLab. As the repo is
258+ private, we need to authenticate. In this test, we are authenticating with GitLab
259+ credentials (username and password). Said credentials will be fetched from the
260+ GITLAB_USERNAME and GITLAB_PASSWORD env variables. If missing, this test is skipped.
261+
262+ The user the credentials represent should have access to the test GitLab repo. We
263+ have no way to check this though. Modify the JAFFLE_SHOP_PRIVATE variable with your
264+ own private fork.
265+ """
266+ username , password = os .environ ["GITLAB_USERNAME" ], os .environ ["GITLAB_PASSWORD" ]
267+
268+ remote = DbtGitRemoteHook ()
269+ source = URL (repo_url .format (username = username , password = password ))
270+ local_repo_path = remote .download_dbt_project (source , tmp_path )
271+
272+ expected = [
273+ URL (local_repo_path / "dbt_project.yml" ),
274+ URL (local_repo_path / "models" / "customers.sql" ),
275+ URL (local_repo_path / "models" / "orders.sql" ),
276+ URL (local_repo_path / "seeds" / "raw_customers.csv" ),
277+ URL (local_repo_path / "seeds" / "raw_orders.csv" ),
278+ ]
279+
280+ assert local_repo_path .exists ()
281+
282+ assert_dir_contents (local_repo_path , expected , exact = False )
283+
284+
50285@pytest .fixture
51286def repo_name ():
52287 """A testing local git repo name."""
0 commit comments