From b9dd8f0e13ffb7483cdb68a01683c083f6a27f01 Mon Sep 17 00:00:00 2001 From: "umi-appcoder[bot]" Date: Wed, 1 Jul 2026 18:32:20 +0000 Subject: [PATCH 1/5] fix(sandbox): length-frame exec-stdin writes so file I/O works over a TLS DOCKER_HOST The Docker sandbox materializes files with `docker exec` + `tar -x`/`cat` reading the payload from stdin, then signals end-of-input with `raw_sock.shutdown(SHUT_WR)` (wrapped in `except Exception: pass`). Over a TLS DOCKER_HOST that half-close never delivers a stdin EOF to the container, so the in-container `tar`/`cat` blocks forever and `session.write()` / `apply_manifest()` hang. This reproduces against Docker-in-Docker sidecars and remote daemons reached over TLS (both common setups). Fix without falling back to `put_archive()` (deliberately avoided for volume-driver-mounted containers, see `write`): frame the payload by length and pipe the real command through `head -c `, so the reader stops after exactly bytes regardless of whether the stdin half-close is delivered. Works identically over unix sockets, TLS TCP, and DinD. Adds regression tests asserting the exec command is length-framed and the exact payload is streamed (seekable and non-seekable streams). Co-Authored-By: Claude Opus 4.8 (1M context) --- src/agents/sandbox/sandboxes/docker.py | 43 ++++++++- tests/sandbox/test_docker.py | 127 +++++++++++++++++++++++++ 2 files changed, 169 insertions(+), 1 deletion(-) diff --git a/src/agents/sandbox/sandboxes/docker.py b/src/agents/sandbox/sandboxes/docker.py index ae160fc978..42d536b06a 100644 --- a/src/agents/sandbox/sandboxes/docker.py +++ b/src/agents/sandbox/sandboxes/docker.py @@ -85,6 +85,28 @@ logger = logging.getLogger(__name__) + +def _measure_stream(stream: io.IOBase) -> tuple[int, io.IOBase]: + """Return the remaining byte length of ``stream`` plus a stream to read it. + + Seekable streams are measured in place (and rewound); non-seekable streams + are drained into an in-memory buffer. Used to length-frame exec-stdin writes + so the in-container reader does not depend on a stdin half-close (which is + unreliable over a TLS ``DOCKER_HOST``). + """ + try: + start = stream.tell() + stream.seek(0, io.SEEK_END) + end = stream.tell() + stream.seek(start) + return end - start, stream + except (AttributeError, OSError, ValueError): + data = stream.read() + if isinstance(data, str): + data = data.encode("utf-8") + return len(data), io.BytesIO(data) + + _PREPARE_USER_PTY_PID_SCRIPT = ( 'pid_path="$1"\n' 'pid_user="$2"\n' @@ -562,13 +584,32 @@ async def _stream_into_exec( error_path: Path, user: str | User | None = None, ) -> None: + # Frame the payload by length so the in-container reader terminates on a + # byte count rather than a stdin half-close. Docker's exec-attach stream + # does not carry a reliable stdin EOF over a TLS DOCKER_HOST: the + # ``shutdown(SHUT_WR)`` below is silently swallowed, so ``tar -x`` / ``cat`` + # would block forever waiting for input that never ends (observed against + # Docker-in-Docker sidecars and remote daemons reached over TLS). Piping + # the real command through ``head -c `` makes it stop after exactly + # ```` bytes, independent of transport, and keeps the deliberate + # avoidance of ``put_archive()`` (see ``write``) intact. + payload_length, stream = _measure_stream(stream) + framed_cmd = [ + "sh", + "-c", + 'n=$1; shift; head -c "$n" | "$@"', + "sh", + str(payload_length), + *cmd, + ] + def _write() -> int | None: container_client = self._container.client assert container_client is not None api = container_client.api resp = api.exec_create( self._container.id, - cmd, + framed_cmd, stdin=True, stdout=True, stderr=True, diff --git a/tests/sandbox/test_docker.py b/tests/sandbox/test_docker.py index 52701274eb..8ce8a91a9d 100644 --- a/tests/sandbox/test_docker.py +++ b/tests/sandbox/test_docker.py @@ -704,6 +704,133 @@ def test_docker_start_exec_socket_closes_underlying_http_response() -> None: assert api.response.close_calls == 1 +class _RecordingStreamSocket: + """Exec socket that records stdin bytes and returns EOF immediately, as a + real daemon does once the (length-bounded) in-container command exits.""" + + def __init__(self) -> None: + self.sent = bytearray() + self.shutdown_calls: list[int] = [] + self.closed = False + + @property + def _sock(self) -> _RecordingStreamSocket: + return self + + def sendall(self, data: bytes) -> None: + self.sent.extend(data) + + def shutdown(self, how: int) -> None: + self.shutdown_calls.append(how) + + def recv(self, _n: int) -> bytes: + return b"" + + def close(self) -> None: + self.closed = True + + +class _RecordingStreamAPI: + def __init__(self) -> None: + self.exec_create_calls: list[dict[str, object]] = [] + self.sock = _RecordingStreamSocket() + + def exec_create(self, container_id: str, cmd: list[str], **kwargs: object) -> dict[str, str]: + self.exec_create_calls.append({"container_id": container_id, "cmd": cmd, **kwargs}) + return {"Id": "exec-stream"} + + def exec_start(self, exec_id: str, *, socket: bool = False, tty: bool = False) -> object: + return self.sock + + def exec_inspect(self, exec_id: str) -> dict[str, int]: + return {"ExitCode": 0} + + +def _make_streaming_session(api: _RecordingStreamAPI) -> DockerSandboxSession: + class _Container: + def __init__(self) -> None: + self.client = cast("object", type("_C", (), {"api": api})()) + self.id = "container" + + session = cast(DockerSandboxSession, object.__new__(DockerSandboxSession)) + session._container = cast("object", _Container()) # type: ignore[attr-defined] + session._coerce_exec_user = lambda user=None: "" # type: ignore[attr-defined] + return session + + +@pytest.mark.asyncio +async def test_stream_into_exec_length_frames_stdin_payload() -> None: + """The in-container command is wrapped in ``head -c `` so it terminates on + a byte count rather than a stdin half-close (which is unreliable over a TLS + DOCKER_HOST — see the DinD hang this guards against). Regression test.""" + api = _RecordingStreamAPI() + session = _make_streaming_session(api) + payload = b"hello-\x00\xff-world" * 500 # includes NULs / non-utf8 bytes + + await session._stream_into_exec( + cmd=["tar", "-x", "-C", "/workspace"], + stream=io.BytesIO(payload), + error_path=Path("/workspace"), + ) + + assert len(api.exec_create_calls) == 1 + framed = cast("list[str]", api.exec_create_calls[0]["cmd"]) + assert framed == [ + "sh", + "-c", + 'n=$1; shift; head -c "$n" | "$@"', + "sh", + str(len(payload)), + "tar", + "-x", + "-C", + "/workspace", + ] + # Exactly the payload is streamed, and the count matches the head -c bound — + # so completion never depends on the stdin half-close working. + assert bytes(api.sock.sent) == payload + assert framed[4] == str(len(api.sock.sent)) + + +@pytest.mark.asyncio +async def test_stream_into_exec_frames_non_seekable_stream() -> None: + """A non-seekable stream is buffered so the byte count is still correct.""" + + class _NonSeekable(io.RawIOBase): + def __init__(self, data: bytes) -> None: + self._data = data + self._read = False + + def readable(self) -> bool: + return True + + def seekable(self) -> bool: + return False + + def seek(self, *_a: object, **_k: object) -> int: + raise OSError("not seekable") + + def read(self, _size: int = -1) -> bytes: + if self._read: + return b"" + self._read = True + return self._data + + api = _RecordingStreamAPI() + session = _make_streaming_session(api) + payload = b"x" * 1234 + + await session._stream_into_exec( + cmd=["sh", "-lc", 'cat > "$1"', "sh", "/workspace/f"], + stream=cast(io.IOBase, _NonSeekable(payload)), + error_path=Path("/workspace/f"), + ) + + framed = cast("list[str]", api.exec_create_calls[0]["cmd"]) + assert framed[4] == str(len(payload)) + assert bytes(api.sock.sent) == payload + + @pytest.mark.asyncio async def test_docker_persist_workspace_prunes_ephemeral_entries_from_staged_copy( tmp_path: Path, From a7d491ceede6ff2b4f5f954a765006b0c88abf6f Mon Sep 17 00:00:00 2001 From: "umi-appcoder[bot]" Date: Wed, 1 Jul 2026 19:09:41 +0000 Subject: [PATCH 2/5] sandbox: measure/spool exec-stdin payload off the event loop, bound memory MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback: `_measure_stream` previously drained a non-seekable stream into a BytesIO on the event-loop thread, which both blocked the loop and was unbounded (a large HTTP-body/pipe upload could OOM the process). Now all measuring/spooling runs inside the executor thread (`_write`), and a non-seekable stream is copied into a `SpooledTemporaryFile` — kept in memory up to 16 MiB, then spilled to disk — so the length can be framed without buffering the whole payload in RAM. Seekable streams (the common case: in-memory tar, sized file payloads) are still measured in place with zero copy. The spool is closed in a finally. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/agents/sandbox/sandboxes/docker.py | 154 ++++++++++++++----------- 1 file changed, 89 insertions(+), 65 deletions(-) diff --git a/src/agents/sandbox/sandboxes/docker.py b/src/agents/sandbox/sandboxes/docker.py index 42d536b06a..5abcd2bdc5 100644 --- a/src/agents/sandbox/sandboxes/docker.py +++ b/src/agents/sandbox/sandboxes/docker.py @@ -86,25 +86,42 @@ logger = logging.getLogger(__name__) -def _measure_stream(stream: io.IOBase) -> tuple[int, io.IOBase]: - """Return the remaining byte length of ``stream`` plus a stream to read it. +# Non-seekable payloads are spooled to measure their length; keep small ones in +# RAM and spill larger ones to a temp file so a big upload can't OOM the process. +_STREAM_SPOOL_MAX_SIZE = 16 * 1024 * 1024 - Seekable streams are measured in place (and rewound); non-seekable streams - are drained into an in-memory buffer. Used to length-frame exec-stdin writes - so the in-container reader does not depend on a stdin half-close (which is - unreliable over a TLS ``DOCKER_HOST``). + +def _measure_stream(stream: io.IOBase) -> tuple[int, io.IOBase, io.IOBase | None]: + """Return ``(length, readable_stream, spool_to_close)`` for a length-framed write. + + Seekable streams are measured in place (and rewound); ``spool_to_close`` is + ``None``. Non-seekable streams (e.g. an HTTP response body or pipe) are copied + into a ``SpooledTemporaryFile`` — kept in memory up to + ``_STREAM_SPOOL_MAX_SIZE``, spilled to disk beyond it — so the byte length can + be determined without buffering the whole payload in RAM; the spool is returned + so the caller can close it. + + Callers run this on the executor thread, never the event loop. """ try: start = stream.tell() stream.seek(0, io.SEEK_END) end = stream.tell() stream.seek(start) - return end - start, stream + return end - start, stream, None except (AttributeError, OSError, ValueError): - data = stream.read() - if isinstance(data, str): - data = data.encode("utf-8") - return len(data), io.BytesIO(data) + spool: Any = tempfile.SpooledTemporaryFile(max_size=_STREAM_SPOOL_MAX_SIZE) + length = 0 + while True: + chunk = stream.read(1024 * 1024) + if not chunk: + break + if isinstance(chunk, str): + chunk = chunk.encode("utf-8") + length += len(chunk) + spool.write(chunk) + spool.seek(0) + return length, spool, spool _PREPARE_USER_PTY_PID_SCRIPT = ( @@ -593,67 +610,74 @@ async def _stream_into_exec( # the real command through ``head -c `` makes it stop after exactly # ```` bytes, independent of transport, and keeps the deliberate # avoidance of ``put_archive()`` (see ``write``) intact. - payload_length, stream = _measure_stream(stream) - framed_cmd = [ - "sh", - "-c", - 'n=$1; shift; head -c "$n" | "$@"', - "sh", - str(payload_length), - *cmd, - ] - def _write() -> int | None: container_client = self._container.client assert container_client is not None api = container_client.api - resp = api.exec_create( - self._container.id, - framed_cmd, - stdin=True, - stdout=True, - stderr=True, - workdir=None, - user=self._coerce_exec_user(user) or "", - ) - exec_socket = self._start_exec_socket(api=api, exec_id=cast(str, resp["Id"])) - sock = exec_socket.sock - raw_sock = exec_socket.raw_sock - try: - while True: - chunk = stream.read(1024 * 1024) - if not chunk: - break - if isinstance(chunk, str): - chunk = chunk.encode("utf-8") - elif not isinstance(chunk, bytes): - chunk = bytes(chunk) - if hasattr(raw_sock, "sendall"): - raw_sock.sendall(chunk) - else: - cast(Any, sock).write(chunk) - - try: - if hasattr(raw_sock, "shutdown"): - raw_sock.shutdown(socket.SHUT_WR) - else: - cast(Any, sock).flush() - except Exception: - pass + # Measure/spool on this executor thread (never the event loop). A + # non-seekable stream is spooled to a SpooledTemporaryFile (bounded + # memory, then disk) rather than read whole into RAM. + payload_length, read_stream, spool = _measure_stream(stream) + try: + framed_cmd = [ + "sh", + "-c", + 'n=$1; shift; head -c "$n" | "$@"', + "sh", + str(payload_length), + *cmd, + ] + resp = api.exec_create( + self._container.id, + framed_cmd, + stdin=True, + stdout=True, + stderr=True, + workdir=None, + user=self._coerce_exec_user(user) or "", + ) + exec_socket = self._start_exec_socket(api=api, exec_id=cast(str, resp["Id"])) + sock = exec_socket.sock + raw_sock = exec_socket.raw_sock try: - if hasattr(raw_sock, "recv"): - while raw_sock.recv(1024 * 1024): - pass - else: - while cast(Any, sock).read(1024 * 1024): - pass - except Exception: - pass + while True: + chunk = read_stream.read(1024 * 1024) + if not chunk: + break + if isinstance(chunk, str): + chunk = chunk.encode("utf-8") + elif not isinstance(chunk, bytes): + chunk = bytes(chunk) + if hasattr(raw_sock, "sendall"): + raw_sock.sendall(chunk) + else: + cast(Any, sock).write(chunk) + + try: + if hasattr(raw_sock, "shutdown"): + raw_sock.shutdown(socket.SHUT_WR) + else: + cast(Any, sock).flush() + except Exception: + pass + + try: + if hasattr(raw_sock, "recv"): + while raw_sock.recv(1024 * 1024): + pass + else: + while cast(Any, sock).read(1024 * 1024): + pass + except Exception: + pass + finally: + exec_socket.close() + + return cast(int | None, api.exec_inspect(resp["Id"]).get("ExitCode")) finally: - exec_socket.close() - - return cast(int | None, api.exec_inspect(resp["Id"]).get("ExitCode")) + if spool is not None: + spool.close() loop = asyncio.get_running_loop() try: From dfe259dc47895e1e11682deea45dcbfa06d48d36 Mon Sep 17 00:00:00 2001 From: "umi-appcoder[bot]" Date: Wed, 1 Jul 2026 19:22:20 +0000 Subject: [PATCH 3/5] sandbox: fail the exec when the length-framing producer (head) fails MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback. `head -c "$n" | "$@"` reports only the consumer's exit status, so on an image without `head` the `head: not found` error was discarded while `cat`/`tar` saw an empty pipe, exited 0, and the write "succeeded" after creating an empty file — silent data loss. Capture the producer's status via a temp file and exit non-zero (98) when it fails, so a missing/broken `head` surfaces as a WorkspaceArchiveWriteError instead. Uses a portable POSIX pattern (no `pipefail`, which dash lacks). Verified end-to-end against a real container: normal writes still exit 0 with correct contents; with `head` removed the exec now exits 98 instead of silently writing an empty file. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/agents/sandbox/sandboxes/docker.py | 23 ++++++++++++++++++++++- tests/sandbox/test_docker.py | 7 ++++++- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/agents/sandbox/sandboxes/docker.py b/src/agents/sandbox/sandboxes/docker.py index 5abcd2bdc5..51f609bc6e 100644 --- a/src/agents/sandbox/sandboxes/docker.py +++ b/src/agents/sandbox/sandboxes/docker.py @@ -124,6 +124,27 @@ def _measure_stream(stream: io.IOBase) -> tuple[int, io.IOBase, io.IOBase | None return length, spool, spool +# POSIX sh that pipes exactly ```` bytes into the real command (``"$@"``). +# ``head -c`` bounds the read so completion never depends on a stdin half-close +# (unreliable over a TLS DOCKER_HOST). A plain ``head -c "$n" | "$@"`` pipeline +# reports only the *consumer's* exit status, so a missing/failed ``head`` — e.g. a +# custom image without coreutils/busybox ``head`` — would be masked: the consumer +# (``cat``/``tar``) sees an empty pipe, exits 0, and the write "succeeds" while +# creating an empty file. Capture the producer's status via a temp file and fail +# the exec (exit 98) if it is non-zero, so such writes surface as errors instead +# of silent data loss. ``pipefail`` is avoided because it is not POSIX (dash). +_LENGTH_FRAMED_STDIN_SCRIPT = ( + "n=$1; shift; " + 'status_file="${TMPDIR:-/tmp}/.agents-sandbox-framed.$$"; ' + '{ head -c "$n"; echo "$?" >"$status_file"; } | "$@"; ' + "consumer_status=$?; " + 'producer_status="$(cat "$status_file" 2>/dev/null)"; ' + 'rm -f "$status_file"; ' + '[ "$producer_status" = 0 ] || exit 98; ' + 'exit "$consumer_status"' +) + + _PREPARE_USER_PTY_PID_SCRIPT = ( 'pid_path="$1"\n' 'pid_user="$2"\n' @@ -623,7 +644,7 @@ def _write() -> int | None: framed_cmd = [ "sh", "-c", - 'n=$1; shift; head -c "$n" | "$@"', + _LENGTH_FRAMED_STDIN_SCRIPT, "sh", str(payload_length), *cmd, diff --git a/tests/sandbox/test_docker.py b/tests/sandbox/test_docker.py index 8ce8a91a9d..8d335f402b 100644 --- a/tests/sandbox/test_docker.py +++ b/tests/sandbox/test_docker.py @@ -778,7 +778,7 @@ async def test_stream_into_exec_length_frames_stdin_payload() -> None: assert framed == [ "sh", "-c", - 'n=$1; shift; head -c "$n" | "$@"', + docker_sandbox._LENGTH_FRAMED_STDIN_SCRIPT, "sh", str(len(payload)), "tar", @@ -786,6 +786,10 @@ async def test_stream_into_exec_length_frames_stdin_payload() -> None: "-C", "/workspace", ] + # The framing script bounds the read by byte count (`head -c`) and makes a + # failed/missing producer fatal instead of silently writing an empty file. + assert 'head -c "$n"' in framed[2] + assert "exit 98" in framed[2] and "producer_status" in framed[2] # Exactly the payload is streamed, and the count matches the head -c bound — # so completion never depends on the stdin half-close working. assert bytes(api.sock.sent) == payload @@ -827,6 +831,7 @@ def read(self, _size: int = -1) -> bytes: ) framed = cast("list[str]", api.exec_create_calls[0]["cmd"]) + assert framed[:4] == ["sh", "-c", docker_sandbox._LENGTH_FRAMED_STDIN_SCRIPT, "sh"] assert framed[4] == str(len(payload)) assert bytes(api.sock.sent) == payload From a7f0b5a76ed9cac8a01a8f484d324aff2d077625 Mon Sep 17 00:00:00 2001 From: "umi-appcoder[bot]" Date: Wed, 1 Jul 2026 19:30:16 +0000 Subject: [PATCH 4/5] sandbox: send exactly the framed byte count, fail on a short stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback. The send loop read the payload to EOF while the exec was framed with the length measured earlier by _measure_stream. If a seekable input changed in between (e.g. an open file appended or truncated during write()), that desynced: extra bytes piled up behind a `head -c` that had already stopped, and a short read left `head` blocked on a TLS stdin that never EOFs — re-introducing the hang this change removes. Send at most `payload_length` bytes (the framed count) and raise WorkspaceArchiveWriteError if the stream ends early, instead of truncating silently or blocking. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/agents/sandbox/sandboxes/docker.py | 26 ++++++++++++-- tests/sandbox/test_docker.py | 49 ++++++++++++++++++++++++++ 2 files changed, 72 insertions(+), 3 deletions(-) diff --git a/src/agents/sandbox/sandboxes/docker.py b/src/agents/sandbox/sandboxes/docker.py index 51f609bc6e..88f364e94b 100644 --- a/src/agents/sandbox/sandboxes/docker.py +++ b/src/agents/sandbox/sandboxes/docker.py @@ -662,18 +662,38 @@ def _write() -> int | None: sock = exec_socket.sock raw_sock = exec_socket.raw_sock try: - while True: - chunk = read_stream.read(1024 * 1024) + # Send exactly ``payload_length`` bytes — the count the exec + # was framed with (``head -c "$n"``). Reading to EOF instead + # would desync if the stream changed after _measure_stream: + # extra bytes would pile up behind a ``head`` that already + # stopped, and a short read would leave ``head`` blocked on a + # TLS stdin that never EOFs (the original hang). If the stream + # ends early we fail loudly rather than truncate silently. + remaining = payload_length + while remaining > 0: + chunk = read_stream.read(min(1024 * 1024, remaining)) if not chunk: - break + raise WorkspaceArchiveWriteError( + path=error_path, + context={ + "reason": "stream_shorter_than_measured", + "expected": str(payload_length), + "sent": str(payload_length - remaining), + }, + ) if isinstance(chunk, str): chunk = chunk.encode("utf-8") elif not isinstance(chunk, bytes): chunk = bytes(chunk) + if len(chunk) > remaining: + # Only reachable for multibyte text streams (never the + # byte streams these writes use); cap to the framed count. + chunk = chunk[:remaining] if hasattr(raw_sock, "sendall"): raw_sock.sendall(chunk) else: cast(Any, sock).write(chunk) + remaining -= len(chunk) try: if hasattr(raw_sock, "shutdown"): diff --git a/tests/sandbox/test_docker.py b/tests/sandbox/test_docker.py index 8d335f402b..d06470fbb5 100644 --- a/tests/sandbox/test_docker.py +++ b/tests/sandbox/test_docker.py @@ -836,6 +836,55 @@ def read(self, _size: int = -1) -> bytes: assert bytes(api.sock.sent) == payload +@pytest.mark.asyncio +async def test_stream_into_exec_fails_when_stream_ends_before_measured_length() -> None: + """If the stream yields fewer bytes than measured (e.g. truncated after + _measure_stream), fail loudly and send at most the framed count — never + short-feed `head -c` and re-introduce the TLS stdin hang.""" + + class _ShrinkingStream(io.RawIOBase): + """Reports length 100 via seek/tell but only yields 10 bytes.""" + + def __init__(self) -> None: + self._pos = 0 + self._served = False + + def seekable(self) -> bool: + return True + + def readable(self) -> bool: + return True + + def tell(self) -> int: + return self._pos + + def seek(self, offset: int, whence: int = io.SEEK_SET) -> int: + self._pos = 100 if whence == io.SEEK_END else offset + return self._pos + + def read(self, _size: int = -1) -> bytes: + if self._served: + return b"" + self._served = True + return b"x" * 10 + + api = _RecordingStreamAPI() + session = _make_streaming_session(api) + + with pytest.raises(docker_sandbox.WorkspaceArchiveWriteError): + await session._stream_into_exec( + cmd=["sh", "-lc", 'cat > "$1"', "sh", "/workspace/f"], + stream=cast(io.IOBase, _ShrinkingStream()), + error_path=Path("/workspace/f"), + ) + + # It framed for 100 bytes but sent at most what the stream produced (10) — + # never more than the measured count. + framed = cast("list[str]", api.exec_create_calls[0]["cmd"]) + assert framed[4] == "100" + assert len(api.sock.sent) == 10 + + @pytest.mark.asyncio async def test_docker_persist_workspace_prunes_ephemeral_entries_from_staged_copy( tmp_path: Path, From becfc83c8423ae835bdcd97df6d6e14747e1567d Mon Sep 17 00:00:00 2001 From: "umi-appcoder[bot]" Date: Wed, 1 Jul 2026 20:17:57 +0000 Subject: [PATCH 5/5] sandbox: clamp negative measured length before framing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback. A seekable stream positioned past its end (e.g. BytesIO(b"abc").seek(10), or a file left beyond EOF) makes `end - start` negative. That negative count becomes `head -c -N`, which means "all but the last N bytes" and reads to EOF — re-hanging over a TLS stdin — while the send loop (`remaining > 0` false) sends nothing. Clamp the measured length to 0: a position past end has no readable bytes, so a 0-byte write (`head -c 0`, which exits immediately) is the correct and safe result. Co-Authored-By: Claude Opus 4.8 (1M context) --- src/agents/sandbox/sandboxes/docker.py | 5 ++++- tests/sandbox/test_docker.py | 20 ++++++++++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/agents/sandbox/sandboxes/docker.py b/src/agents/sandbox/sandboxes/docker.py index 88f364e94b..2f59b51206 100644 --- a/src/agents/sandbox/sandboxes/docker.py +++ b/src/agents/sandbox/sandboxes/docker.py @@ -108,7 +108,10 @@ def _measure_stream(stream: io.IOBase) -> tuple[int, io.IOBase, io.IOBase | None stream.seek(0, io.SEEK_END) end = stream.tell() stream.seek(start) - return end - start, stream, None + # Clamp to 0: a stream positioned past its end has no readable bytes, and + # a negative count would become `head -c -N` ("all but the last N bytes"), + # which reads to EOF and re-hangs over a TLS stdin. + return max(0, end - start), stream, None except (AttributeError, OSError, ValueError): spool: Any = tempfile.SpooledTemporaryFile(max_size=_STREAM_SPOOL_MAX_SIZE) length = 0 diff --git a/tests/sandbox/test_docker.py b/tests/sandbox/test_docker.py index d06470fbb5..c51b6cfd04 100644 --- a/tests/sandbox/test_docker.py +++ b/tests/sandbox/test_docker.py @@ -885,6 +885,26 @@ def read(self, _size: int = -1) -> bytes: assert len(api.sock.sent) == 10 +@pytest.mark.asyncio +async def test_stream_into_exec_clamps_length_when_position_past_end() -> None: + """A stream positioned past its end measures to a negative delta; clamp to 0 + so it never becomes `head -c -N` (which reads to EOF and re-hangs over TLS).""" + api = _RecordingStreamAPI() + session = _make_streaming_session(api) + stream = io.BytesIO(b"abc") + stream.seek(10) # past EOF -> end - start would be negative + + await session._stream_into_exec( + cmd=["tar", "-x", "-C", "/workspace"], + stream=stream, + error_path=Path("/workspace"), + ) + + framed = cast("list[str]", api.exec_create_calls[0]["cmd"]) + assert framed[4] == "0" # not "-7" + assert api.sock.sent == bytearray() # nothing sent; no unbounded read + + @pytest.mark.asyncio async def test_docker_persist_workspace_prunes_ephemeral_entries_from_staged_copy( tmp_path: Path,