From 45aca331678cc4e63fb1a8cd8874031b18f0211c Mon Sep 17 00:00:00 2001 From: Anatolii Yatsuk Date: Fri, 12 Jun 2026 18:15:17 +0300 Subject: [PATCH 1/2] fix: JsonItemsParser should yield floats, not Decimals ijson.items() parses non-integer JSON numbers as decimal.Decimal by default, which the CDK cannot serialize downstream (orjson raises 'Decimal is not JSON serializable'). This broke any JsonItemsDecoder stream with decimal fields (e.g. Amazon Brand Analytics clickShare/conversionShare, Sales & Traffic, Vendor reports). Pass use_float=True so non-integer numbers are parsed as float, matching the json.loads/orjson behavior of the other JSON parsers. Adds a regression test. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../decoders/composite_raw_decoder.py | 4 +++- .../decoders/test_composite_decoder.py | 16 ++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py b/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py index 99b2de3d2..0561369a7 100644 --- a/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py @@ -149,7 +149,9 @@ def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE: # ijson auto-selects the best available backend (yajl2_c when present) # and reads from `data` lazily — it does not call `.read()` on the # whole stream up front. - yield from ijson.items(data, f"{self.items_path}.item") + # use_float=True yields floats for non-integer numbers instead of Decimal, matching + # json.loads/orjson behavior so downstream JSON serialization doesn't choke on Decimal. + yield from ijson.items(data, f"{self.items_path}.item", use_float=True) @dataclass diff --git a/unit_tests/sources/declarative/decoders/test_composite_decoder.py b/unit_tests/sources/declarative/decoders/test_composite_decoder.py index ec1a621b1..3c5573106 100644 --- a/unit_tests/sources/declarative/decoders/test_composite_decoder.py +++ b/unit_tests/sources/declarative/decoders/test_composite_decoder.py @@ -451,6 +451,22 @@ def test_json_items_parser_composes_with_gzip(requests_mock) -> None: assert list(decoder.decode(response)) == payload["dataByAsin"] +def test_json_items_parser_yields_floats_not_decimals(requests_mock) -> None: + """Non-integer numbers must be parsed as float (not Decimal) so downstream JSON + serialization (orjson) does not fail on Decimal values.""" + import orjson + + payload = {"data": [{"ratio": 0.5, "rank": 3, "amount": 0.0000}]} + requests_mock.register_uri("GET", "https://airbyte.io/", content=json.dumps(payload).encode("utf-8")) + response = requests.get("https://airbyte.io/", stream=True) + + records = list(CompositeRawDecoder(parser=JsonItemsParser(items_path="data")).decode(response)) + assert isinstance(records[0]["ratio"], float) + assert isinstance(records[0]["rank"], int) + # Must be serializable by orjson (which rejects Decimal). + orjson.dumps(records[0]) + + def test_json_items_parser_requires_items_path() -> None: parser = JsonItemsParser() with pytest.raises(ValueError, match="items_path"): From fba766bb20ff9dcd7c5a4da0c0634385332f89d7 Mon Sep 17 00:00:00 2001 From: octavia-squidington-iii Date: Fri, 12 Jun 2026 15:59:04 +0000 Subject: [PATCH 2/2] Auto-fix lint and format issues --- .../sources/declarative/decoders/test_composite_decoder.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/unit_tests/sources/declarative/decoders/test_composite_decoder.py b/unit_tests/sources/declarative/decoders/test_composite_decoder.py index 3c5573106..8af9a4b4c 100644 --- a/unit_tests/sources/declarative/decoders/test_composite_decoder.py +++ b/unit_tests/sources/declarative/decoders/test_composite_decoder.py @@ -457,7 +457,9 @@ def test_json_items_parser_yields_floats_not_decimals(requests_mock) -> None: import orjson payload = {"data": [{"ratio": 0.5, "rank": 3, "amount": 0.0000}]} - requests_mock.register_uri("GET", "https://airbyte.io/", content=json.dumps(payload).encode("utf-8")) + requests_mock.register_uri( + "GET", "https://airbyte.io/", content=json.dumps(payload).encode("utf-8") + ) response = requests.get("https://airbyte.io/", stream=True) records = list(CompositeRawDecoder(parser=JsonItemsParser(items_path="data")).decode(response))