diff --git a/src/taskgraph/util/vcs.py b/src/taskgraph/util/vcs.py index efed2c5bb..602cb81c5 100644 --- a/src/taskgraph/util/vcs.py +++ b/src/taskgraph/util/vcs.py @@ -505,6 +505,11 @@ def get_changed_files(self, diff_filter=None, mode=None, rev=None, base=None): cmd.append("--cached") elif mode == "all": cmd.append("HEAD") + elif base == self.NULL_REVISION: + # When base is NULL_REVISION (e.g new branches on Github), diff + # from the empty tree to show all files present at rev. Use Git's + # well-known empty tree hash. + cmd = ["diff", "4b825dc642cb6eb9a060e54bf8d69288fbee4904", rev] elif self.is_shallow: # In shallow clones, `git log` won't have the history necessary to # determine the files changed. Using `git diff` finds the diff --git a/test/test_util_vcs.py b/test/test_util_vcs.py index e68821bce..da710307e 100644 --- a/test/test_util_vcs.py +++ b/test/test_util_vcs.py @@ -594,3 +594,57 @@ def test_get_changed_files_shallow_clone(git_repo, tmp_path, default_git_branch) modified = shallow_repo.get_changed_files("M", "all", feature_commit, main_commit) assert "file_to_modify.txt" in modified + + +def test_get_changed_files_with_null_base_revision(repo): + second_file = os.path.join(repo.path, "second_file") + with open(second_file, "w") as f: + f.write("second file content") + + repo.run("add", second_file) + repo.run("commit", "-m", "Add second file") + + head_rev = repo.head_rev + + changed_files = repo.get_changed_files( + "AMD", "all", rev=head_rev, base=Repository.NULL_REVISION + ) + + assert isinstance(changed_files, list) + # When base is NULL_REVISION, we should get all files from the beginning + # of history up to head_rev. This includes both the initial "first_file" + # and the newly added "second_file". + assert "first_file" in changed_files + assert "second_file" in changed_files + + +def test_get_changed_files_with_null_base_revision_shallow_clone( + git_repo, tmp_path, default_git_branch +): + tmp_repo = Path(git_repo) + + (tmp_repo / "file1.txt").write_text("content 1") + (tmp_repo / "file2.txt").write_text("content 2") + subprocess.check_call(["git", "add", "."], cwd=tmp_repo) + subprocess.check_call(["git", "commit", "-m", "Add files"], cwd=tmp_repo) + + commit_hash = subprocess.check_output( + ["git", "rev-parse", "HEAD"], cwd=tmp_repo, text=True + ).strip() + + shallow_path = tmp_path / "shallow_null_test" + subprocess.check_call( + ["git", "clone", "--depth=1", f"file://{tmp_repo}", str(shallow_path)], + cwd=tmp_path, + ) + + shallow_repo = get_repository(str(shallow_path)) + assert shallow_repo.is_shallow + + changed_files = shallow_repo.get_changed_files( + "AMD", "all", rev=commit_hash, base=Repository.NULL_REVISION + ) + + assert "first_file" in changed_files + assert "file1.txt" in changed_files + assert "file2.txt" in changed_files