diff --git a/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py b/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py index 99b2de3d2..0561369a7 100644 --- a/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py +++ b/airbyte_cdk/sources/declarative/decoders/composite_raw_decoder.py @@ -149,7 +149,9 @@ def parse(self, data: BufferedIOBase) -> PARSER_OUTPUT_TYPE: # ijson auto-selects the best available backend (yajl2_c when present) # and reads from `data` lazily — it does not call `.read()` on the # whole stream up front. - yield from ijson.items(data, f"{self.items_path}.item") + # use_float=True yields floats for non-integer numbers instead of Decimal, matching + # json.loads/orjson behavior so downstream JSON serialization doesn't choke on Decimal. + yield from ijson.items(data, f"{self.items_path}.item", use_float=True) @dataclass diff --git a/unit_tests/sources/declarative/decoders/test_composite_decoder.py b/unit_tests/sources/declarative/decoders/test_composite_decoder.py index ec1a621b1..8af9a4b4c 100644 --- a/unit_tests/sources/declarative/decoders/test_composite_decoder.py +++ b/unit_tests/sources/declarative/decoders/test_composite_decoder.py @@ -451,6 +451,24 @@ def test_json_items_parser_composes_with_gzip(requests_mock) -> None: assert list(decoder.decode(response)) == payload["dataByAsin"] +def test_json_items_parser_yields_floats_not_decimals(requests_mock) -> None: + """Non-integer numbers must be parsed as float (not Decimal) so downstream JSON + serialization (orjson) does not fail on Decimal values.""" + import orjson + + payload = {"data": [{"ratio": 0.5, "rank": 3, "amount": 0.0000}]} + requests_mock.register_uri( + "GET", "https://airbyte.io/", content=json.dumps(payload).encode("utf-8") + ) + response = requests.get("https://airbyte.io/", stream=True) + + records = list(CompositeRawDecoder(parser=JsonItemsParser(items_path="data")).decode(response)) + assert isinstance(records[0]["ratio"], float) + assert isinstance(records[0]["rank"], int) + # Must be serializable by orjson (which rejects Decimal). + orjson.dumps(records[0]) + + def test_json_items_parser_requires_items_path() -> None: parser = JsonItemsParser() with pytest.raises(ValueError, match="items_path"):