diff --git a/jigsawstack/async_request.py b/jigsawstack/async_request.py index d86e6b2..d57bc3b 100644 --- a/jigsawstack/async_request.py +++ b/jigsawstack/async_request.py @@ -35,9 +35,13 @@ def __init__( self.base_url = config.get("base_url") self.api_key = config.get("api_key") self.data = data - self.headers = config.get("headers", None) or {"Content-Type": "application/json"} + # Defensive copy: we must not mutate the caller's config dict. Without + # this, __get_headers() could permanently strip keys (e.g. Content-Type) + # from the shared config that all service-class methods reuse. + raw_headers = config.get("headers", None) or {"Content-Type": "application/json"} + self.headers: Dict[str, str] = dict(raw_headers) self.stream = stream - self.files = files # Store files for multipart requests + self.files = files def __convert_params( self, params: Union[Dict[Any, Any], List[Dict[Any, Any]]] @@ -175,19 +179,22 @@ def __get_headers(self) -> Dict[str, str]: "x-api-key": f"{self.api_key}", } - # only add Content-Type if not using multipart (files) + # Only add Content-Type if not using multipart (files) if not self.files and not self.data: h["Content-Type"] = "application/json" - _headers = h.copy() + # Work on a local copy so we never mutate self.headers (which itself is + # already a copy of the original config dict — see __init__). + caller_headers = dict(self.headers) - # don't override Content-Type if using multipart - if self.files and "Content-Type" in self.headers: - self.headers.pop("Content-Type") + # Strip Content-Type for multipart requests so that aiohttp can set + # the correct multipart/form-data boundary itself. + if self.files: + caller_headers.pop("Content-Type", None) - _headers.update(self.headers) + h.update(caller_headers) - return _headers + return h async def perform_streaming(self) -> AsyncGenerator[Union[T, str], None]: """ @@ -253,8 +260,6 @@ async def make_request( _form_data.add_field("file", BytesIO(files["file"]), filename="upload") if params and isinstance(params, dict): _form_data.add_field("body", json.dumps(params), content_type="application/json") - - headers.pop("Content-Type", None) elif data: # raw data request _data = data else: # pure JSON request diff --git a/jigsawstack/request.py b/jigsawstack/request.py index 84b25d9..254b6d6 100644 --- a/jigsawstack/request.py +++ b/jigsawstack/request.py @@ -35,7 +35,11 @@ def __init__( self.base_url = config.get("base_url") self.api_key = config.get("api_key") self.data = data - self.headers = config.get("headers", None) or {"Content-Type": "application/json"} + # Defensive copy: we must not mutate the caller's config dict. Without + # this, __get_headers() could permanently strip keys (e.g. Content-Type) + # from the shared config that all service-class methods reuse. + raw_headers = config.get("headers", None) or {"Content-Type": "application/json"} + self.headers: Dict[str, str] = dict(raw_headers) self.stream = stream self.files = files @@ -160,15 +164,18 @@ def __get_headers(self) -> Dict[Any, Any]: if not self.files and not self.data: h["Content-Type"] = "application/json" - _headers = h.copy() + # Work on a local copy so we never mutate self.headers (which itself is + # already a copy of the original config dict — see __init__). + caller_headers = dict(self.headers) - # Don't override Content-Type if using multipart - if self.files and "Content-Type" in self.headers: - self.headers.pop("Content-Type") + # Strip Content-Type for multipart requests so that the requests + # library can set the correct multipart/form-data boundary itself. + if self.files: + caller_headers.pop("Content-Type", None) - _headers.update(self.headers) + h.update(caller_headers) - return _headers + return h def perform_streaming(self) -> Generator[Union[T, str], None, None]: """Is the main function that makes the HTTP request @@ -261,7 +268,6 @@ def make_request(self, url: str) -> requests.Response: _files = files if params and isinstance(params, dict): _data = {"body": json.dumps(params)} - headers.pop("Content-Type", None) # let requests set it for multipart elif data: # raw data request _data = data else: # pure JSON request diff --git a/tests/test_headers_mutation.py b/tests/test_headers_mutation.py new file mode 100644 index 0000000..a53195e --- /dev/null +++ b/tests/test_headers_mutation.py @@ -0,0 +1,153 @@ +""" +tests/test_headers_mutation.py + +Regression tests for the __get_headers mutation bug. + +Both Request and AsyncRequest were holding a direct reference to the headers +dict inside the caller's config TypedDict and calling .pop("Content-Type") on +it during multipart file-upload requests. That permanently modified the shared +config, so every subsequent request reusing the same config would go out without +Content-Type. + +These tests are self-contained and require no API key or network access. +""" + +from typing import Optional + +from jigsawstack.async_request import AsyncRequest, AsyncRequestConfig +from jigsawstack.request import Request, RequestConfig + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + +def _sync_config(extra_headers: Optional[dict] = None) -> RequestConfig: + headers = {"Content-Type": "application/json"} + if extra_headers: + headers.update(extra_headers) + return RequestConfig(base_url="http://test", api_key="key", headers=headers) + + +def _async_config(extra_headers: Optional[dict] = None) -> AsyncRequestConfig: + headers = {"Content-Type": "application/json"} + if extra_headers: + headers.update(extra_headers) + return AsyncRequestConfig(base_url="http://test", api_key="key", headers=headers) + + +# --------------------------------------------------------------------------- +# Sync Request +# --------------------------------------------------------------------------- + +class TestSyncRequestHeadersMutation: + def test_multipart_does_not_mutate_config_headers(self): + """A multipart request must not remove Content-Type from the config dict.""" + config = _sync_config() + r = Request(config=config, path="/test", params={}, verb="post", files={"file": b"data"}) + r._Request__get_headers() + assert "Content-Type" in config["headers"], ( + "__get_headers() stripped Content-Type from the caller's config dict" + ) + + def test_repeated_multipart_calls_do_not_degrade(self): + """Calling __get_headers() multiple times must always produce the same result.""" + config = _sync_config() + r = Request(config=config, path="/test", params={}, verb="post", files={"file": b"data"}) + first = r._Request__get_headers() + second = r._Request__get_headers() + assert first == second, "__get_headers() returned different results on second call" + + def test_multipart_output_omits_content_type(self): + """Outgoing headers for a multipart request must not contain Content-Type + so that the requests library can insert the correct multipart boundary.""" + config = _sync_config() + r = Request(config=config, path="/test", params={}, verb="post", files={"file": b"data"}) + out = r._Request__get_headers() + assert "Content-Type" not in out, ( + "Content-Type should be absent from multipart outgoing headers" + ) + + def test_json_request_includes_content_type(self): + """Plain JSON requests must still set Content-Type: application/json.""" + config = _sync_config() + r = Request(config=config, path="/test", params={"k": "v"}, verb="post") + out = r._Request__get_headers() + assert out.get("Content-Type") == "application/json" + + def test_custom_headers_propagated(self): + """Any extra caller-supplied headers must survive into the outgoing dict.""" + config = _sync_config(extra_headers={"x-custom": "value"}) + r = Request(config=config, path="/test", params={}, verb="post") + out = r._Request__get_headers() + assert out.get("x-custom") == "value" + + def test_second_request_on_same_config_unaffected(self): + """A second Request built from the same config after a multipart call + must still produce correct headers — the config was not poisoned.""" + config = _sync_config() + # First request: multipart — used to trigger the bug + r1 = Request(config=config, path="/test", params={}, verb="post", files={"file": b"x"}) + r1._Request__get_headers() + # Second request: plain JSON — previously would be missing Content-Type + r2 = Request(config=config, path="/test", params={"k": "v"}, verb="post") + out = r2._Request__get_headers() + assert out.get("Content-Type") == "application/json", ( + "Config was mutated by the first multipart request; second request lost Content-Type" + ) + + +# --------------------------------------------------------------------------- +# Async Request +# --------------------------------------------------------------------------- + +class TestAsyncRequestHeadersMutation: + def test_multipart_does_not_mutate_config_headers(self): + """A multipart request must not remove Content-Type from the config dict.""" + config = _async_config() + r = AsyncRequest(config=config, path="/test", params={}, verb="post", files={"file": b"data"}) + r._AsyncRequest__get_headers() + assert "Content-Type" in config["headers"], ( + "__get_headers() stripped Content-Type from the caller's async config dict" + ) + + def test_repeated_multipart_calls_do_not_degrade(self): + """Calling __get_headers() multiple times must always produce the same result.""" + config = _async_config() + r = AsyncRequest(config=config, path="/test", params={}, verb="post", files={"file": b"data"}) + first = r._AsyncRequest__get_headers() + second = r._AsyncRequest__get_headers() + assert first == second, "__get_headers() returned different results on second call" + + def test_multipart_output_omits_content_type(self): + """Outgoing headers for a multipart request must not contain Content-Type.""" + config = _async_config() + r = AsyncRequest(config=config, path="/test", params={}, verb="post", files={"file": b"data"}) + out = r._AsyncRequest__get_headers() + assert "Content-Type" not in out + + def test_json_request_includes_content_type(self): + """Plain JSON requests must still set Content-Type: application/json.""" + config = _async_config() + r = AsyncRequest(config=config, path="/test", params={"k": "v"}, verb="post") + out = r._AsyncRequest__get_headers() + assert out.get("Content-Type") == "application/json" + + def test_custom_headers_propagated(self): + """Any extra caller-supplied headers must survive into the outgoing dict.""" + config = _async_config(extra_headers={"x-custom": "value"}) + r = AsyncRequest(config=config, path="/test", params={}, verb="post") + out = r._AsyncRequest__get_headers() + assert out.get("x-custom") == "value" + + def test_second_request_on_same_config_unaffected(self): + """A second AsyncRequest built from the same config after a multipart call + must still produce correct headers.""" + config = _async_config() + r1 = AsyncRequest(config=config, path="/test", params={}, verb="post", files={"file": b"x"}) + r1._AsyncRequest__get_headers() + r2 = AsyncRequest(config=config, path="/test", params={"k": "v"}, verb="post") + out = r2._AsyncRequest__get_headers() + assert out.get("Content-Type") == "application/json", ( + "Config was mutated by the first multipart request; second request lost Content-Type" + )