diff --git a/python/packages/gemini/agent_framework_gemini/_chat_client.py b/python/packages/gemini/agent_framework_gemini/_chat_client.py index f1aa36a9e7c..29f0feaedef 100644 --- a/python/packages/gemini/agent_framework_gemini/_chat_client.py +++ b/python/packages/gemini/agent_framework_gemini/_chat_client.py @@ -2,6 +2,7 @@ from __future__ import annotations +import base64 import json import logging import sys @@ -678,10 +679,57 @@ def _convert_message_contents( parts.append(raw_part.model_copy(update={"function_call": function_call}, deep=True)) else: parts.append(types.Part(function_call=function_call)) + case "data" | "uri": + part = self._convert_data_or_uri_content(content) + if part is not None: + parts.append(part) case _: logger.debug("Skipping unsupported content type for Gemini: %s", content.type) return parts + def _convert_data_or_uri_content(self, content: Content) -> types.Part | None: + """Convert a ``data`` or ``uri`` Content to a Gemini Part. + + Data URIs (``type="data"``) become ``inline_data`` Parts with the decoded bytes. + External URIs (``type="uri"``) become ``file_data`` Parts referencing the resource. + + Args: + content: The framework Content object, expected to be of type ``data`` or ``uri``. + + Returns: + A Gemini Part carrying the multimodal content, or None if the content cannot be + converted (e.g. missing URI, non-base64 data URI, or undecodable data). + """ + uri = content.uri + if not uri: + logger.warning("Skipping %s content for Gemini: missing uri", content.type) + return None + + if uri.startswith("data:"): + if ";base64," not in uri: + logger.warning("Skipping data content for Gemini: data URI is not base64-encoded") + return None + header, encoded = uri.split(";base64,", 1) + mime_type = content.media_type or header[len("data:") :].split(";")[0] or None + if not mime_type: + logger.warning("Skipping data content for Gemini: missing media_type") + return None + try: + raw_bytes = base64.b64decode(encoded) + except Exception: + logger.warning("Skipping data content for Gemini: failed to decode base64 data") + return None + return types.Part.from_bytes(data=raw_bytes, mime_type=mime_type) + + try: + return types.Part.from_uri(file_uri=uri, mime_type=content.media_type) + except ValueError: + # from_uri raises when no media_type is given and one cannot be inferred from the URI + # (e.g. presigned URLs or API endpoints without an extension). Pass the URI through + # without a mime type rather than dropping the content or raising. + logger.warning("Could not determine media_type for URI content; sending to Gemini without one: %s", uri) + return types.Part(file_data=types.FileData(file_uri=uri, mime_type=None)) + def _convert_function_result( self, content: Content, diff --git a/python/packages/gemini/tests/test_gemini_client.py b/python/packages/gemini/tests/test_gemini_client.py index c85d1bbec3b..3151e378a48 100644 --- a/python/packages/gemini/tests/test_gemini_client.py +++ b/python/packages/gemini/tests/test_gemini_client.py @@ -711,6 +711,128 @@ def test_function_call_part_preserves_thought_signature_from_raw_part() -> None: assert parts[0].function_call.args == {"location": "Paris"} +# multimodal (data/uri) parts + + +def test_data_content_converted_to_inline_data_part() -> None: + """Content.from_data is converted to a Gemini inline_data Part so images reach the model.""" + import base64 + + client, _ = _make_gemini_client() + png = base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" + ) + content = Content.from_data(data=png, media_type="image/png") + assert content.type == "data" + + parts = client._convert_message_contents([content], {}) + + assert len(parts) == 1 + assert parts[0].inline_data is not None + assert parts[0].inline_data.mime_type == "image/png" + assert parts[0].inline_data.data == png + + +def test_data_uri_content_converted_to_inline_data_part() -> None: + """A data URI created via Content.from_uri becomes an inline_data Part with decoded bytes.""" + import base64 + + client, _ = _make_gemini_client() + png = base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" + ) + content = Content.from_uri(uri=f"data:image/png;base64,{base64.b64encode(png).decode()}") + assert content.type == "data" + + parts = client._convert_message_contents([content], {}) + + assert len(parts) == 1 + assert parts[0].inline_data is not None + assert parts[0].inline_data.mime_type == "image/png" + assert parts[0].inline_data.data == png + + +def test_external_uri_content_converted_to_file_data_part() -> None: + """Content.from_uri with an external URL becomes a Gemini file_data Part.""" + client, _ = _make_gemini_client() + content = Content.from_uri(uri="https://example.com/image.png", media_type="image/png") + assert content.type == "uri" + + parts = client._convert_message_contents([content], {}) + + assert len(parts) == 1 + assert parts[0].file_data is not None + assert parts[0].file_data.file_uri == "https://example.com/image.png" + assert parts[0].file_data.mime_type == "image/png" + + +def test_text_and_image_content_both_reach_the_model() -> None: + """A multimodal message keeps both the text and the image parts.""" + import base64 + + client, _ = _make_gemini_client() + png = base64.b64decode( + "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg==" + ) + text = Content.from_text("What is in this image?") + image = Content.from_data(data=png, media_type="image/png") + + parts = client._convert_message_contents([text, image], {}) + + assert len(parts) == 2 + assert parts[0].text == "What is in this image?" + assert any(p.inline_data is not None for p in parts) + + +def test_non_base64_data_uri_is_skipped(caplog: pytest.LogCaptureFixture) -> None: + """A data URI that is not base64-encoded is skipped with a warning rather than crashing.""" + client, _ = _make_gemini_client() + content = Content.from_text("placeholder") + content.type = "data" # type: ignore[assignment] + content.uri = "data:text/plain,hello" + + with caplog.at_level(logging.WARNING): + parts = client._convert_message_contents([content], {}) + + assert parts == [] + assert any("base64" in r.message for r in caplog.records) + + +def test_data_uri_media_type_parameters_are_stripped() -> None: + """Parameters in a data URI media type (e.g. charset) are dropped before reaching Gemini.""" + import base64 + + client, _ = _make_gemini_client() + encoded = base64.b64encode(b"hello").decode() + content = Content.from_text("placeholder") + content.type = "data" # type: ignore[assignment] + content.uri = f"data:text/plain;charset=utf-8;base64,{encoded}" + content.media_type = None + + parts = client._convert_message_contents([content], {}) + + assert len(parts) == 1 + assert parts[0].inline_data is not None + assert parts[0].inline_data.mime_type == "text/plain" + + +def test_external_uri_without_inferable_media_type_is_passed_through(caplog: pytest.LogCaptureFixture) -> None: + """A URI with no media_type and no guessable extension is sent as file_data without crashing.""" + client, _ = _make_gemini_client() + content = Content.from_uri(uri="https://api.example.com/files/123") + assert content.type == "uri" + assert content.media_type is None + + with caplog.at_level(logging.WARNING): + parts = client._convert_message_contents([content], {}) + + assert len(parts) == 1 + assert parts[0].file_data is not None + assert parts[0].file_data.file_uri == "https://api.example.com/files/123" + assert parts[0].file_data.mime_type is None + assert any("media_type" in r.message for r in caplog.records) + + # code execution parts