From ff94b4249f1da844b425f85f9d5d48e43e2948b8 Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Mon, 16 Mar 2026 15:01:37 -0400 Subject: [PATCH 1/2] test: add test case for null base revision in Github --- test/test_util_vcs.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/test/test_util_vcs.py b/test/test_util_vcs.py index e68821bce..8929e6d07 100644 --- a/test/test_util_vcs.py +++ b/test/test_util_vcs.py @@ -594,3 +594,23 @@ def test_get_changed_files_shallow_clone(git_repo, tmp_path, default_git_branch) modified = shallow_repo.get_changed_files("M", "all", feature_commit, main_commit) assert "file_to_modify.txt" in modified + + +def test_get_changed_files_with_null_base_revision(repo): + if repo.tool == "git": + pytest.xfail() + + second_file = os.path.join(repo.path, "second_file") + with open(second_file, "w") as f: + f.write("second file content") + + repo.run("add", second_file) + repo.run("commit", "-m", "Add second file") + + head_rev = repo.head_rev + + changed_files = repo.get_changed_files( + "AMD", "all", rev=head_rev, base=Repository.NULL_REVISION + ) + + assert isinstance(changed_files, list) From b17c17fe45444b91a0f351e61735d83269f5e8e0 Mon Sep 17 00:00:00 2001 From: Andrew Halberstadt Date: Mon, 16 Mar 2026 15:13:56 -0400 Subject: [PATCH 2/2] fix: 'vcs.get_changed_files' with null base revision This case can happen when you push a new branch to Github. Issue: #864 --- src/taskgraph/util/vcs.py | 5 +++++ test/test_util_vcs.py | 40 ++++++++++++++++++++++++++++++++++++--- 2 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/taskgraph/util/vcs.py b/src/taskgraph/util/vcs.py index efed2c5bb..602cb81c5 100644 --- a/src/taskgraph/util/vcs.py +++ b/src/taskgraph/util/vcs.py @@ -505,6 +505,11 @@ def get_changed_files(self, diff_filter=None, mode=None, rev=None, base=None): cmd.append("--cached") elif mode == "all": cmd.append("HEAD") + elif base == self.NULL_REVISION: + # When base is NULL_REVISION (e.g new branches on Github), diff + # from the empty tree to show all files present at rev. Use Git's + # well-known empty tree hash. + cmd = ["diff", "4b825dc642cb6eb9a060e54bf8d69288fbee4904", rev] elif self.is_shallow: # In shallow clones, `git log` won't have the history necessary to # determine the files changed. Using `git diff` finds the diff --git a/test/test_util_vcs.py b/test/test_util_vcs.py index 8929e6d07..da710307e 100644 --- a/test/test_util_vcs.py +++ b/test/test_util_vcs.py @@ -597,9 +597,6 @@ def test_get_changed_files_shallow_clone(git_repo, tmp_path, default_git_branch) def test_get_changed_files_with_null_base_revision(repo): - if repo.tool == "git": - pytest.xfail() - second_file = os.path.join(repo.path, "second_file") with open(second_file, "w") as f: f.write("second file content") @@ -614,3 +611,40 @@ def test_get_changed_files_with_null_base_revision(repo): ) assert isinstance(changed_files, list) + # When base is NULL_REVISION, we should get all files from the beginning + # of history up to head_rev. This includes both the initial "first_file" + # and the newly added "second_file". + assert "first_file" in changed_files + assert "second_file" in changed_files + + +def test_get_changed_files_with_null_base_revision_shallow_clone( + git_repo, tmp_path, default_git_branch +): + tmp_repo = Path(git_repo) + + (tmp_repo / "file1.txt").write_text("content 1") + (tmp_repo / "file2.txt").write_text("content 2") + subprocess.check_call(["git", "add", "."], cwd=tmp_repo) + subprocess.check_call(["git", "commit", "-m", "Add files"], cwd=tmp_repo) + + commit_hash = subprocess.check_output( + ["git", "rev-parse", "HEAD"], cwd=tmp_repo, text=True + ).strip() + + shallow_path = tmp_path / "shallow_null_test" + subprocess.check_call( + ["git", "clone", "--depth=1", f"file://{tmp_repo}", str(shallow_path)], + cwd=tmp_path, + ) + + shallow_repo = get_repository(str(shallow_path)) + assert shallow_repo.is_shallow + + changed_files = shallow_repo.get_changed_files( + "AMD", "all", rev=commit_hash, base=Repository.NULL_REVISION + ) + + assert "first_file" in changed_files + assert "file1.txt" in changed_files + assert "file2.txt" in changed_files