From 15372b11bb14bb4fc6be86746b5de051eb613839 Mon Sep 17 00:00:00 2001 From: Minh Vu <38443830+fallintoplace@users.noreply.github.com> Date: Thu, 14 May 2026 12:19:05 +0200 Subject: [PATCH] fix: harden local directory copy against symlink swaps --- src/agents/sandbox/entries/artifacts.py | 278 ++++++++++++++++++------ tests/sandbox/test_entries.py | 200 +++++++++++++++++ 2 files changed, 417 insertions(+), 61 deletions(-) diff --git a/src/agents/sandbox/entries/artifacts.py b/src/agents/sandbox/entries/artifacts.py index 7412126260..daffc4fbdb 100644 --- a/src/agents/sandbox/entries/artifacts.py +++ b/src/agents/sandbox/entries/artifacts.py @@ -397,7 +397,10 @@ def _list_local_dir_files_from_dir_fd( try: child_fd = os.open(entry.name, dir_flags, dir_fd=dir_fd) child_stat = os.fstat(child_fd) - if not stat.S_ISDIR(child_stat.st_mode): + if not stat.S_ISDIR(child_stat.st_mode) or not os.path.samestat( + entry_stat, + child_stat, + ): raise LocalDirReadError( src=src_root, context={ @@ -499,48 +502,25 @@ def _open_local_dir_file_for_copy( dir_fds.append(current_fd) for part in rel_child.parts[:-1]: current_rel = current_rel / part if current_rel.parts else Path(part) - try: - next_fd = os.open(part, dir_flags, dir_fd=current_fd) - except OSError as e: - raise self._local_dir_open_error( - src_root=src_root, - parent_fd=current_fd, - entry_name=part, - rel_child=current_rel, - expect_dir=True, - error=e, - ) from e - next_stat = os.fstat(next_fd) - if not stat.S_ISDIR(next_stat.st_mode): - raise LocalDirReadError( - src=src_root, - context={ - "reason": "path_changed_during_copy", - "child": rel_child.as_posix(), - }, - ) - dir_fds.append(next_fd) - current_fd = next_fd - - try: - leaf_fd = os.open(rel_child.name, file_flags, dir_fd=current_fd) - except OSError as e: - raise self._local_dir_open_error( + next_fd = self._open_local_dir_entry_checked( src_root=src_root, parent_fd=current_fd, - entry_name=rel_child.name, - rel_child=rel_child, - expect_dir=False, - error=e, - ) from e - leaf_stat = os.fstat(leaf_fd) - if not stat.S_ISREG(leaf_stat.st_mode): - os.close(leaf_fd) - raise LocalDirReadError( - src=src_root, - context={"reason": "path_changed_during_copy", "child": rel_child.as_posix()}, + entry_name=part, + rel_child=current_rel, + expect_dir=True, + flags=dir_flags, ) - return leaf_fd + dir_fds.append(next_fd) + current_fd = next_fd + + return self._open_local_dir_entry_checked( + src_root=src_root, + parent_fd=current_fd, + entry_name=rel_child.name, + rel_child=rel_child, + expect_dir=False, + flags=file_flags, + ) except FileNotFoundError: raise LocalDirReadError( src=src_root, @@ -583,30 +563,23 @@ def _open_local_dir_src_root_fd( parts = self.src.parts try: - current_fd = os.open(current_path, dir_flags) + current_fd = self._open_local_dir_path_checked( + src_root=src_root, + path=current_path, + rel_child=Path(self._local_dir_source_child_label(base_dir, current_path)), + flags=dir_flags, + ) dir_fds.append(current_fd) for part in parts: current_rel = current_rel / part if current_rel.parts else Path(part) - try: - next_fd = os.open(part, dir_flags, dir_fd=current_fd) - except OSError as e: - raise self._local_dir_open_error( - src_root=src_root, - parent_fd=current_fd, - entry_name=part, - rel_child=current_rel, - expect_dir=True, - error=e, - ) from e - next_stat = os.fstat(next_fd) - if not stat.S_ISDIR(next_stat.st_mode): - raise LocalDirReadError( - src=src_root, - context={ - "reason": "path_changed_during_copy", - "child": current_rel.as_posix(), - }, - ) + next_fd = self._open_local_dir_entry_checked( + src_root=src_root, + parent_fd=current_fd, + entry_name=part, + rel_child=current_rel, + expect_dir=True, + flags=dir_flags, + ) dir_fds.append(next_fd) current_fd = next_fd return dir_fds.pop() @@ -620,6 +593,189 @@ def _open_local_dir_src_root_fd( for dir_fd in reversed(dir_fds): os.close(dir_fd) + def _open_local_dir_path_checked( + self, + *, + src_root: Path, + path: Path, + rel_child: Path, + flags: int, + ) -> int: + path_stat = self._stat_local_dir_path( + src_root=src_root, + path=path, + rel_child=rel_child, + ) + try: + path_fd = os.open(path, flags) + except OSError as e: + raise self._local_dir_path_open_error( + src_root=src_root, + path=path, + rel_child=rel_child, + error=e, + ) from e + + try: + opened_stat = os.fstat(path_fd) + if not stat.S_ISDIR(opened_stat.st_mode) or not os.path.samestat( + path_stat, + opened_stat, + ): + raise LocalDirReadError( + src=src_root, + context={"reason": "path_changed_during_copy", "child": rel_child.as_posix()}, + ) + return path_fd + except Exception: + os.close(path_fd) + raise + + def _stat_local_dir_path( + self, + *, + src_root: Path, + path: Path, + rel_child: Path, + ) -> os.stat_result: + try: + path_stat = path.stat(follow_symlinks=False) + except FileNotFoundError: + raise LocalDirReadError( + src=src_root, + context={"reason": "path_changed_during_copy", "child": rel_child.as_posix()}, + ) from None + except OSError as e: + raise LocalDirReadError(src=src_root, cause=e) from e + + if stat.S_ISLNK(path_stat.st_mode): + raise LocalDirReadError( + src=src_root, + context={"reason": "symlink_not_supported", "child": rel_child.as_posix()}, + ) + if not stat.S_ISDIR(path_stat.st_mode): + raise LocalDirReadError( + src=src_root, + context={"reason": "path_changed_during_copy", "child": rel_child.as_posix()}, + ) + return path_stat + + def _open_local_dir_entry_checked( + self, + *, + src_root: Path, + parent_fd: int, + entry_name: str, + rel_child: Path, + expect_dir: bool, + flags: int, + ) -> int: + entry_stat = self._stat_local_dir_entry( + src_root=src_root, + parent_fd=parent_fd, + entry_name=entry_name, + rel_child=rel_child, + expect_dir=expect_dir, + ) + try: + entry_fd = os.open(entry_name, flags, dir_fd=parent_fd) + except OSError as e: + raise self._local_dir_open_error( + src_root=src_root, + parent_fd=parent_fd, + entry_name=entry_name, + rel_child=rel_child, + expect_dir=expect_dir, + error=e, + ) from e + + try: + opened_stat = os.fstat(entry_fd) + if not self._local_dir_entry_stat_matches( + opened_stat, + expect_dir=expect_dir, + ) or not os.path.samestat(entry_stat, opened_stat): + raise LocalDirReadError( + src=src_root, + context={"reason": "path_changed_during_copy", "child": rel_child.as_posix()}, + ) + return entry_fd + except Exception: + os.close(entry_fd) + raise + + def _stat_local_dir_entry( + self, + *, + src_root: Path, + parent_fd: int, + entry_name: str, + rel_child: Path, + expect_dir: bool, + ) -> os.stat_result: + try: + entry_stat = os.stat(entry_name, dir_fd=parent_fd, follow_symlinks=False) + except FileNotFoundError: + raise LocalDirReadError( + src=src_root, + context={"reason": "path_changed_during_copy", "child": rel_child.as_posix()}, + ) from None + except OSError as e: + raise LocalDirReadError(src=src_root, cause=e) from e + + if stat.S_ISLNK(entry_stat.st_mode): + raise LocalDirReadError( + src=src_root, + context={"reason": "symlink_not_supported", "child": rel_child.as_posix()}, + ) + if not self._local_dir_entry_stat_matches(entry_stat, expect_dir=expect_dir): + raise LocalDirReadError( + src=src_root, + context={"reason": "path_changed_during_copy", "child": rel_child.as_posix()}, + ) + return entry_stat + + @staticmethod + def _local_dir_entry_stat_matches(entry_stat: os.stat_result, *, expect_dir: bool) -> bool: + if expect_dir: + return stat.S_ISDIR(entry_stat.st_mode) + return stat.S_ISREG(entry_stat.st_mode) + + def _local_dir_path_open_error( + self, + *, + src_root: Path, + path: Path, + rel_child: Path, + error: OSError, + ) -> LocalDirReadError: + try: + path_stat = path.stat(follow_symlinks=False) + except FileNotFoundError: + return LocalDirReadError( + src=src_root, + context={"reason": "path_changed_during_copy", "child": rel_child.as_posix()}, + ) + except OSError: + path_stat = None + + if path_stat is not None and stat.S_ISLNK(path_stat.st_mode): + return LocalDirReadError( + src=src_root, + context={"reason": "symlink_not_supported", "child": rel_child.as_posix()}, + ) + if path_stat is not None and not stat.S_ISDIR(path_stat.st_mode): + return LocalDirReadError( + src=src_root, + context={"reason": "path_changed_during_copy", "child": rel_child.as_posix()}, + ) + if error.errno == errno.ELOOP: + return LocalDirReadError( + src=src_root, + context={"reason": "symlink_not_supported", "child": rel_child.as_posix()}, + ) + return LocalDirReadError(src=src_root, cause=error) + def _local_dir_open_error( self, *, diff --git a/tests/sandbox/test_entries.py b/tests/sandbox/test_entries.py index d00d008137..027021f26c 100644 --- a/tests/sandbox/test_entries.py +++ b/tests/sandbox/test_entries.py @@ -496,6 +496,59 @@ def swap_then_open( assert session.writes == {} +@pytest.mark.asyncio +async def test_local_dir_copy_rejects_swapped_file_if_no_follow_is_bypassed( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + if not artifacts_module._OPEN_SUPPORTS_DIR_FD or not artifacts_module._HAS_O_DIRECTORY: + pytest.skip("safe dir_fd open pinning is unavailable on this platform") + + src_root = tmp_path / "src" + src_root.mkdir() + src_file = src_root / "safe.txt" + src_file.write_text("safe", encoding="utf-8") + secret = tmp_path / "secret.txt" + secret.write_text("secret", encoding="utf-8") + session = _RecordingSession() + local_dir = LocalDir(src=Path("src")) + original_open = os.open + no_follow = getattr(os, "O_NOFOLLOW", 0) + swapped = False + + def swap_then_open( + path: str | Path, + flags: int, + mode: int = 0o777, + *, + dir_fd: int | None = None, + ) -> int: + nonlocal swapped + if (path == "safe.txt" or Path(path) == src_file) and not swapped: + src_file.unlink() + _symlink_or_skip(src_file, secret) + flags &= ~no_follow + swapped = True + if dir_fd is None: + return original_open(path, flags, mode) + return original_open(path, flags, mode, dir_fd=dir_fd) + + monkeypatch.setattr("agents.sandbox.entries.artifacts.os.open", swap_then_open) + + with pytest.raises(LocalDirReadError) as excinfo: + await local_dir._copy_local_dir_file( + base_dir=tmp_path, + session=session, + src_root=src_root, + src=src_file, + dest_root=Path("/workspace/copied"), + ) + + assert excinfo.value.context["reason"] == "path_changed_during_copy" + assert excinfo.value.context["child"] == "safe.txt" + assert session.writes == {} + + @pytest.mark.asyncio async def test_local_dir_copy_pins_parent_directories_during_open( monkeypatch: pytest.MonkeyPatch, @@ -548,6 +601,57 @@ def swap_parent_then_open( assert session.writes[Path("/workspace/copied/nested/safe.txt")] == b"safe" +@pytest.mark.asyncio +async def test_local_dir_apply_rejects_nested_dir_swap_during_listing_if_no_follow_is_bypassed( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + if not artifacts_module._OPEN_SUPPORTS_DIR_FD or not artifacts_module._HAS_O_DIRECTORY: + pytest.skip("safe dir_fd open pinning is unavailable on this platform") + + src_root = tmp_path / "src" + src_root.mkdir() + nested_dir = src_root / "nested" + nested_dir.mkdir() + (nested_dir / "safe.txt").write_text("safe", encoding="utf-8") + secret_dir = tmp_path / "secret-dir" + secret_dir.mkdir() + (secret_dir / "secret.txt").write_text("secret", encoding="utf-8") + session = _RecordingSession() + local_dir = LocalDir(src=Path("src")) + original_open = os.open + no_follow = getattr(os, "O_NOFOLLOW", 0) + swapped = False + + def swap_nested_then_open( + path: str | Path, + flags: int, + mode: int = 0o777, + *, + dir_fd: int | None = None, + ) -> int: + nonlocal swapped + if path == "nested" and not swapped: + nested_dir.rename(src_root / "nested-original") + _symlink_or_skip(src_root / "nested", secret_dir, target_is_directory=True) + swapped = True + if path == "nested" and swapped: + flags &= ~no_follow + if dir_fd is None: + return original_open(path, flags, mode) + return original_open(path, flags, mode, dir_fd=dir_fd) + + monkeypatch.setattr("agents.sandbox.entries.artifacts.os.open", swap_nested_then_open) + + with pytest.raises(LocalDirReadError) as excinfo: + await local_dir.apply(session, Path("/workspace/copied"), tmp_path) + + assert excinfo.value.context["reason"] == "path_changed_during_copy" + assert excinfo.value.context["child"] == "nested" + assert Path("/workspace/copied/nested/secret.txt") not in session.writes + assert session.writes == {} + + @pytest.mark.asyncio async def test_local_dir_copy_fallback_rejects_swapped_parent_directory( monkeypatch: pytest.MonkeyPatch, @@ -647,6 +751,102 @@ def swap_root_then_open( assert session.writes == {} +@pytest.mark.asyncio +async def test_local_dir_apply_rejects_source_root_swap_if_no_follow_is_bypassed( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + if not artifacts_module._OPEN_SUPPORTS_DIR_FD or not artifacts_module._HAS_O_DIRECTORY: + pytest.skip("safe dir_fd open pinning is unavailable on this platform") + + src_root = tmp_path / "src" + src_root.mkdir() + (src_root / "safe.txt").write_text("safe", encoding="utf-8") + secret_dir = tmp_path / "secret-dir" + secret_dir.mkdir() + (secret_dir / "secret.txt").write_text("secret", encoding="utf-8") + session = _RecordingSession() + local_dir = LocalDir(src=Path("src")) + original_open = os.open + no_follow = getattr(os, "O_NOFOLLOW", 0) + swapped = False + + def swap_root_then_open( + path: str | Path, + flags: int, + mode: int = 0o777, + *, + dir_fd: int | None = None, + ) -> int: + nonlocal swapped + if path == "src" and not swapped: + src_root.rename(tmp_path / "src-original") + _symlink_or_skip(tmp_path / "src", secret_dir, target_is_directory=True) + flags &= ~no_follow + swapped = True + if dir_fd is None: + return original_open(path, flags, mode) + return original_open(path, flags, mode, dir_fd=dir_fd) + + monkeypatch.setattr("agents.sandbox.entries.artifacts.os.open", swap_root_then_open) + + with pytest.raises(LocalDirReadError) as excinfo: + await local_dir.apply(session, Path("/workspace/copied"), tmp_path) + + assert excinfo.value.context["reason"] == "path_changed_during_copy" + assert excinfo.value.context["child"] == "src" + assert Path("/workspace/copied/secret.txt") not in session.writes + assert session.writes == {} + + +@pytest.mark.asyncio +async def test_local_dir_apply_rejects_current_dir_source_swap_if_no_follow_is_bypassed( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + if not artifacts_module._OPEN_SUPPORTS_DIR_FD or not artifacts_module._HAS_O_DIRECTORY: + pytest.skip("safe dir_fd open pinning is unavailable on this platform") + + base_dir = tmp_path / "base" + base_dir.mkdir() + (base_dir / "safe.txt").write_text("safe", encoding="utf-8") + secret_dir = tmp_path / "secret-dir" + secret_dir.mkdir() + (secret_dir / "secret.txt").write_text("secret", encoding="utf-8") + session = _RecordingSession() + local_dir = LocalDir(src=Path(".")) + original_open = os.open + no_follow = getattr(os, "O_NOFOLLOW", 0) + swapped = False + + def swap_base_then_open( + path: str | Path, + flags: int, + mode: int = 0o777, + *, + dir_fd: int | None = None, + ) -> int: + nonlocal swapped + if Path(path) == base_dir and not swapped: + base_dir.rename(tmp_path / "base-original") + _symlink_or_skip(base_dir, secret_dir, target_is_directory=True) + flags &= ~no_follow + swapped = True + if dir_fd is None: + return original_open(path, flags, mode) + return original_open(path, flags, mode, dir_fd=dir_fd) + + monkeypatch.setattr("agents.sandbox.entries.artifacts.os.open", swap_base_then_open) + + with pytest.raises(LocalDirReadError) as excinfo: + await local_dir.apply(session, Path("/workspace/copied"), base_dir) + + assert excinfo.value.context["reason"] == "path_changed_during_copy" + assert excinfo.value.context["child"] == "." + assert Path("/workspace/copied/secret.txt") not in session.writes + assert session.writes == {} + + @pytest.mark.asyncio async def test_local_dir_apply_fallback_rejects_source_root_swapped_to_symlink_after_validation( monkeypatch: pytest.MonkeyPatch,