From 897dfe23378a4296b3a91f04297cd14b18382364 Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Thu, 30 Apr 2026 08:36:34 +0900 Subject: [PATCH 1/6] Add tests for JSON with charset Signed-off-by: Anuraag Agrawal --- test/test_client.py | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/test/test_client.py b/test/test_client.py index 3c4d681b..987784a6 100644 --- a/test/test_client.py +++ b/test/test_client.py @@ -113,3 +113,38 @@ async def make_hat(self, request, ctx): assert list(resp.headers().allitems()) == response_headers assert list(resp.trailers().allitems()) == response_trailers + + +def test_json_charset_content_type() -> None: + class HeadersHaberdasherSync(HaberdasherSync): + def make_hat(self, request, ctx): + return Hat(size=2) + + transport = WSGITransport(HaberdasherWSGIApplication(HeadersHaberdasherSync())) + + client = SyncClient(transport=transport) + + res = client.post( + "http://localhost/connectrpc.example.Haberdasher/MakeHat", + content=b"{}", + headers={"Content-Type": "application/json; charset=utf-8"}, + ) + assert res.json() == {"size": 2} + + +@pytest.mark.asyncio +async def test_json_charset_content_type_async() -> None: + class HeadersHaberdasher(Haberdasher): + async def make_hat(self, request, ctx): + return Hat(size=2) + + transport = ASGITransport(HaberdasherASGIApplication(HeadersHaberdasher())) + + client = Client(transport=transport) + + res = await client.post( + "http://localhost/connectrpc.example.Haberdasher/MakeHat", + content=b"{}", + headers={"Content-Type": "application/json; charset=utf-8"}, + ) + assert res.json() == {"size": 2} From 05c52d5b914ecc7bbfd6239dce4558f895de5520 Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Thu, 30 Apr 2026 09:54:50 +0900 Subject: [PATCH 2/6] Ignore parameters in content type matching Signed-off-by: Anuraag Agrawal --- src/connectrpc/_codec.py | 6 +-- src/connectrpc/_protocol_connect.py | 30 +++++++------- test/test_client.py | 35 ----------------- test/test_http.py | 61 +++++++++++++++++++++++++++++ 4 files changed, 77 insertions(+), 55 deletions(-) create mode 100644 test/test_http.py diff --git a/src/connectrpc/_codec.py b/src/connectrpc/_codec.py index 06190f40..89beaef9 100644 --- a/src/connectrpc/_codec.py +++ b/src/connectrpc/_codec.py @@ -68,11 +68,7 @@ def decode(self, data: bytes | bytearray, message: V) -> V: _proto_binary_codec = ProtoBinaryCodec() _proto_json_codec = ProtoJSONCodec() -_default_codecs = [ - _proto_binary_codec, - _proto_json_codec, - ProtoJSONCodec(name=CODEC_NAME_JSON_CHARSET_UTF8), -] +_default_codecs = [_proto_binary_codec, _proto_json_codec] def get_default_codecs() -> list[Codec]: diff --git a/src/connectrpc/_protocol_connect.py b/src/connectrpc/_protocol_connect.py index d61fd6f0..3d8eb195 100644 --- a/src/connectrpc/_protocol_connect.py +++ b/src/connectrpc/_protocol_connect.py @@ -5,7 +5,7 @@ from http import HTTPStatus from typing import TYPE_CHECKING, Any, TypeVar -from ._codec import CODEC_NAME_JSON, CODEC_NAME_JSON_CHARSET_UTF8, Codec +from ._codec import CODEC_NAME_JSON, Codec from ._compression import IdentityCompression, negotiate_compression from ._envelope import EnvelopeReader, EnvelopeWriter from ._protocol import ( @@ -36,6 +36,9 @@ CONNECT_PROTOCOL_VERSION = "1" CONNECT_HEADER_TIMEOUT = "connect-timeout-ms" CONNECT_UNARY_CONTENT_TYPE_PREFIX = "application/" +CONNECT_UNARY_CONTENT_TYPE_JSON = ( + f"{CONNECT_UNARY_CONTENT_TYPE_PREFIX}{CODEC_NAME_JSON}" +) CONNECT_STREAMING_CONTENT_TYPE_PREFIX = "application/connect+" CONNECT_UNARY_HEADER_COMPRESSION = "content-encoding" CONNECT_UNARY_HEADER_ACCEPT_COMPRESSION = "accept-encoding" @@ -46,7 +49,16 @@ _DEFAULT_CONNECT_USER_AGENT = f"connectrpc/{__version__}" +def _normalize_content_type(content_type: str) -> str: + # content-type can have parameters, most commonly charset. Our support codecs, + # binary and JSON are always either non-text or utf-8 and the parameters are not + # important for matching to a codec. A custom codec could conceivably need to + # match on parameters, but we will reconsider that if it is ever asked for. + return content_type.partition(";")[0].strip() + + def codec_name_from_content_type(content_type: str, *, stream: bool) -> str: + content_type = _normalize_content_type(content_type) prefix = ( CONNECT_STREAMING_CONTENT_TYPE_PREFIX if stream @@ -226,12 +238,10 @@ def create_request_context( def validate_response( self, request_codec_name: str, status_code: int, response_content_type: str ) -> None: + response_content_type = _normalize_content_type(response_content_type) if status_code != HTTPStatus.OK: # Error responses must be JSON-encoded - if response_content_type in ( - f"{CONNECT_UNARY_CONTENT_TYPE_PREFIX}{CODEC_NAME_JSON}", - f"{CONNECT_UNARY_CONTENT_TYPE_PREFIX}{CODEC_NAME_JSON_CHARSET_UTF8}", - ): + if response_content_type == CONNECT_UNARY_CONTENT_TYPE_JSON: return raise ConnectWireError.from_http_status(status_code).to_exception() @@ -247,16 +257,6 @@ def validate_response( if response_codec_name == request_codec_name: return - if ( - response_codec_name == CODEC_NAME_JSON - and request_codec_name == CODEC_NAME_JSON_CHARSET_UTF8 - ) or ( - response_codec_name == CODEC_NAME_JSON_CHARSET_UTF8 - and request_codec_name == CODEC_NAME_JSON - ): - # Both are JSON - return - raise ConnectError( Code.INTERNAL, f"invalid content-type: '{response_content_type}'; expecting '{CONNECT_UNARY_CONTENT_TYPE_PREFIX}{request_codec_name}'", diff --git a/test/test_client.py b/test/test_client.py index 987784a6..3c4d681b 100644 --- a/test/test_client.py +++ b/test/test_client.py @@ -113,38 +113,3 @@ async def make_hat(self, request, ctx): assert list(resp.headers().allitems()) == response_headers assert list(resp.trailers().allitems()) == response_trailers - - -def test_json_charset_content_type() -> None: - class HeadersHaberdasherSync(HaberdasherSync): - def make_hat(self, request, ctx): - return Hat(size=2) - - transport = WSGITransport(HaberdasherWSGIApplication(HeadersHaberdasherSync())) - - client = SyncClient(transport=transport) - - res = client.post( - "http://localhost/connectrpc.example.Haberdasher/MakeHat", - content=b"{}", - headers={"Content-Type": "application/json; charset=utf-8"}, - ) - assert res.json() == {"size": 2} - - -@pytest.mark.asyncio -async def test_json_charset_content_type_async() -> None: - class HeadersHaberdasher(Haberdasher): - async def make_hat(self, request, ctx): - return Hat(size=2) - - transport = ASGITransport(HaberdasherASGIApplication(HeadersHaberdasher())) - - client = Client(transport=transport) - - res = await client.post( - "http://localhost/connectrpc.example.Haberdasher/MakeHat", - content=b"{}", - headers={"Content-Type": "application/json; charset=utf-8"}, - ) - assert res.json() == {"size": 2} diff --git a/test/test_http.py b/test/test_http.py new file mode 100644 index 00000000..b25be2c5 --- /dev/null +++ b/test/test_http.py @@ -0,0 +1,61 @@ +from __future__ import annotations + +import pytest +from pyqwest import Client, SyncClient +from pyqwest.testing import ASGITransport, WSGITransport + +from .haberdasher_connect import ( + Haberdasher, + HaberdasherASGIApplication, + HaberdasherSync, + HaberdasherWSGIApplication, +) +from .haberdasher_pb2 import Hat + +_charset_content_type_cases = [ + "application/json", + "application/json; charset=utf-8", + "application/json; charset=UTF-8", + "application/json;charset=utf-8", + "application/json; charset=utf-8", + "application/json; charset=utf-8; version=1", +] + + +@pytest.mark.parametrize("header", _charset_content_type_cases) +def test_json_charset_content_type(header: str) -> None: + class HeadersHaberdasherSync(HaberdasherSync): + def make_hat(self, request, ctx): + return Hat(size=2) + + transport = WSGITransport(HaberdasherWSGIApplication(HeadersHaberdasherSync())) + + client = SyncClient(transport=transport) + + res = client.post( + "http://localhost/connectrpc.example.Haberdasher/MakeHat", + content=b"{}", + headers={"content-type": header}, + ) + assert res.status == 200 + assert res.json() == {"size": 2} + + +@pytest.mark.asyncio +@pytest.mark.parametrize("header", _charset_content_type_cases) +async def test_json_charset_content_type_async(header: str) -> None: + class HeadersHaberdasher(Haberdasher): + async def make_hat(self, request, ctx): + return Hat(size=2) + + transport = ASGITransport(HaberdasherASGIApplication(HeadersHaberdasher())) + + client = Client(transport=transport) + + res = await client.post( + "http://localhost/connectrpc.example.Haberdasher/MakeHat", + content=b"{}", + headers={"content-type": header}, + ) + assert res.status == 200 + assert res.json() == {"size": 2} From a8df5534f2b2b3ea1f3e2d9df8d2769eb3aba7b7 Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Thu, 30 Apr 2026 10:17:21 +0900 Subject: [PATCH 3/6] Cleanup lowercase Signed-off-by: Anuraag Agrawal --- src/connectrpc/_protocol_connect.py | 2 +- src/connectrpc/_server_async.py | 2 +- src/connectrpc/_server_sync.py | 22 +++++----------------- test/test_http.py | 1 + 4 files changed, 8 insertions(+), 19 deletions(-) diff --git a/src/connectrpc/_protocol_connect.py b/src/connectrpc/_protocol_connect.py index 3d8eb195..1b8a04a4 100644 --- a/src/connectrpc/_protocol_connect.py +++ b/src/connectrpc/_protocol_connect.py @@ -54,7 +54,7 @@ def _normalize_content_type(content_type: str) -> str: # binary and JSON are always either non-text or utf-8 and the parameters are not # important for matching to a codec. A custom codec could conceivably need to # match on parameters, but we will reconsider that if it is ever asked for. - return content_type.partition(";")[0].strip() + return content_type.partition(";")[0].strip().lower() def codec_name_from_content_type(content_type: str, *, stream: bool) -> str: diff --git a/src/connectrpc/_server_async.py b/src/connectrpc/_server_async.py index 46162c9d..b2b87c95 100644 --- a/src/connectrpc/_server_async.py +++ b/src/connectrpc/_server_async.py @@ -213,7 +213,7 @@ async def __call__( codec_name = protocol.codec_name_from_content_type( headers.get("content-type", ""), stream=not is_unary ) - codec = self._codecs.get(codec_name.lower()) + codec = self._codecs.get(codec_name) if not codec: raise HTTPException( HTTPStatus.UNSUPPORTED_MEDIA_TYPE, diff --git a/src/connectrpc/_server_sync.py b/src/connectrpc/_server_sync.py index d3650e2b..6655edd9 100644 --- a/src/connectrpc/_server_sync.py +++ b/src/connectrpc/_server_sync.py @@ -70,13 +70,12 @@ ) -def _normalize_wsgi_headers(environ: WSGIEnvironment) -> dict: - """Extract and normalize HTTP headers from WSGI environment.""" - headers = {} +def _process_headers(environ: WSGIEnvironment) -> Headers: + headers = Headers() if "CONTENT_TYPE" in environ: - headers["content-type"] = environ["CONTENT_TYPE"].lower() + headers["content-type"] = environ["CONTENT_TYPE"] if "CONTENT_LENGTH" in environ: - headers["content-length"] = environ["CONTENT_LENGTH"].lower() + headers["content-length"] = environ["CONTENT_LENGTH"] for key, value in environ.items(): if key.startswith("HTTP_"): @@ -85,17 +84,6 @@ def _normalize_wsgi_headers(environ: WSGIEnvironment) -> dict: return headers -def _process_headers(headers: dict) -> Headers: - result = Headers() - for key, value in headers.items(): - if isinstance(value, list | tuple): - for v in value: - result.add(key, v) - else: - result.add(key, str(value)) - return result - - def prepare_response_headers( base_headers: dict[str, list[str]], selected_encoding: str ) -> dict[str, list[str]]: @@ -220,7 +208,7 @@ def __call__( http_method = environ["REQUEST_METHOD"] http_scheme = environ.get("wsgi.url_scheme", "http") - headers = _process_headers(_normalize_wsgi_headers(environ)) + headers = _process_headers(environ) if ra := environ.get("REMOTE_ADDR"): port = environ.get("REMOTE_PORT", "0") client_address = f"{ra}:{port}" diff --git a/test/test_http.py b/test/test_http.py index b25be2c5..7566f14f 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -19,6 +19,7 @@ "application/json;charset=utf-8", "application/json; charset=utf-8", "application/json; charset=utf-8; version=1", + "application/JSON", ] From e2e30a9787b2ffa2f2ab967d6ca63b9a271563a7 Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Thu, 30 Apr 2026 10:19:43 +0900 Subject: [PATCH 4/6] Cleanup Signed-off-by: Anuraag Agrawal --- src/connectrpc/_codec.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/connectrpc/_codec.py b/src/connectrpc/_codec.py index 89beaef9..864cdb8e 100644 --- a/src/connectrpc/_codec.py +++ b/src/connectrpc/_codec.py @@ -8,9 +8,6 @@ CODEC_NAME_PROTO = "proto" CODEC_NAME_JSON = "json" -# Follow connect-go's hacky approach to handling charset parameter -# https://github.com/connectrpc/connect-go/blob/fe4915717d32438c40a24a50e3895271d4c24751/codec.go#L31 -CODEC_NAME_JSON_CHARSET_UTF8 = "json; charset=utf-8" T_contra = TypeVar("T_contra", contravariant=True) From 8783aa12fe1d516885af76b1849278efafe92432 Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Thu, 30 Apr 2026 10:23:08 +0900 Subject: [PATCH 5/6] Fix comment Signed-off-by: Anuraag Agrawal --- src/connectrpc/_protocol_connect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/connectrpc/_protocol_connect.py b/src/connectrpc/_protocol_connect.py index 1b8a04a4..4997dafa 100644 --- a/src/connectrpc/_protocol_connect.py +++ b/src/connectrpc/_protocol_connect.py @@ -50,7 +50,7 @@ def _normalize_content_type(content_type: str) -> str: - # content-type can have parameters, most commonly charset. Our support codecs, + # content-type can have parameters, most commonly charset. Our supported codecs, # binary and JSON are always either non-text or utf-8 and the parameters are not # important for matching to a codec. A custom codec could conceivably need to # match on parameters, but we will reconsider that if it is ever asked for. From a59b8d48ac58c430c96d967bdaf20fd7174ed632 Mon Sep 17 00:00:00 2001 From: Anuraag Agrawal Date: Fri, 1 May 2026 09:57:34 +0900 Subject: [PATCH 6/6] Streaming tests Signed-off-by: Anuraag Agrawal --- test/test_http.py | 83 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/test/test_http.py b/test/test_http.py index 7566f14f..e543cdce 100644 --- a/test/test_http.py +++ b/test/test_http.py @@ -1,16 +1,25 @@ from __future__ import annotations +from typing import TYPE_CHECKING + import pytest -from pyqwest import Client, SyncClient +from pyqwest import Client, Request, SyncClient, SyncRequest, SyncTransport, Transport from pyqwest.testing import ASGITransport, WSGITransport +from connectrpc.codec import proto_json_codec + from .haberdasher_connect import ( Haberdasher, HaberdasherASGIApplication, + HaberdasherClient, + HaberdasherClientSync, HaberdasherSync, HaberdasherWSGIApplication, ) -from .haberdasher_pb2 import Hat +from .haberdasher_pb2 import Hat, Size + +if TYPE_CHECKING: + from pyqwest._pyqwest import Response, SyncResponse _charset_content_type_cases = [ "application/json", @@ -60,3 +69,73 @@ async def make_hat(self, request, ctx): ) assert res.status == 200 assert res.json() == {"size": 2} + + +_streaming_charset_content_type_cases = [ + "application/connect+" + h.split("/")[1] for h in _charset_content_type_cases +] + + +@pytest.mark.parametrize("header", _streaming_charset_content_type_cases) +def test_json_charset_content_type_stream(header: str) -> None: + class HeadersHaberdasherSync(HaberdasherSync): + def make_similar_hats(self, request, ctx): + yield Hat(size=2) + yield Hat(size=3) + + # Difficult to parse an HTTP streaming response so override the header + # with a real client's transport instead. + class HeaderTransport(SyncTransport): + def __init__(self, delegate: SyncTransport): + self._delegate = delegate + + def execute_sync(self, request: SyncRequest) -> SyncResponse: + request.headers["content-type"] = header + return self._delegate.execute_sync(request) + + transport = HeaderTransport( + WSGITransport(HaberdasherWSGIApplication(HeadersHaberdasherSync())) + ) + + client = HaberdasherClientSync( + address="http://localhost", + codec=proto_json_codec(), + http_client=SyncClient(transport=transport), + ) + + hats = list(client.make_similar_hats(Size(inches=2))) + assert hats == [Hat(size=2), Hat(size=3)] + + +@pytest.mark.asyncio +@pytest.mark.parametrize("header", _streaming_charset_content_type_cases) +async def test_json_charset_content_type_stream_async(header: str) -> None: + class HeadersHaberdasher(Haberdasher): + async def make_similar_hats(self, request, ctx): + yield Hat(size=2) + yield Hat(size=3) + + # Difficult to parse an HTTP streaming response so override the header + # with a real client's transport instead. + class HeaderTransport(Transport): + def __init__(self, delegate: Transport): + self._delegate = delegate + + async def execute(self, request: Request) -> Response: + request.headers["content-type"] = header + return await self._delegate.execute(request) + + transport = HeaderTransport( + ASGITransport(HaberdasherASGIApplication(HeadersHaberdasher())) + ) + + client = HaberdasherClient( + address="http://localhost", + codec=proto_json_codec(), + http_client=Client(transport=transport), + ) + + hats = [] + async for hat in client.make_similar_hats(Size(inches=2)): + hats.append(hat) + assert hats == [Hat(size=2), Hat(size=3)]