Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions python/packages/gemini/agent_framework_gemini/_chat_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from __future__ import annotations

import base64
import json
import logging
import sys
Expand Down Expand Up @@ -678,10 +679,57 @@ def _convert_message_contents(
parts.append(raw_part.model_copy(update={"function_call": function_call}, deep=True))
else:
parts.append(types.Part(function_call=function_call))
case "data" | "uri":
part = self._convert_data_or_uri_content(content)
if part is not None:
parts.append(part)
case _:
logger.debug("Skipping unsupported content type for Gemini: %s", content.type)
return parts

def _convert_data_or_uri_content(self, content: Content) -> types.Part | None:
"""Convert a ``data`` or ``uri`` Content to a Gemini Part.

Data URIs (``type="data"``) become ``inline_data`` Parts with the decoded bytes.
External URIs (``type="uri"``) become ``file_data`` Parts referencing the resource.

Args:
content: The framework Content object, expected to be of type ``data`` or ``uri``.

Returns:
A Gemini Part carrying the multimodal content, or None if the content cannot be
converted (e.g. missing URI, non-base64 data URI, or undecodable data).
"""
uri = content.uri
if not uri:
logger.warning("Skipping %s content for Gemini: missing uri", content.type)
return None

if uri.startswith("data:"):
if ";base64," not in uri:
logger.warning("Skipping data content for Gemini: data URI is not base64-encoded")
return None
header, encoded = uri.split(";base64,", 1)
mime_type = content.media_type or header[len("data:") :].split(";")[0] or None
if not mime_type:
logger.warning("Skipping data content for Gemini: missing media_type")
return None
try:
raw_bytes = base64.b64decode(encoded)
except Exception:
logger.warning("Skipping data content for Gemini: failed to decode base64 data")
return None
return types.Part.from_bytes(data=raw_bytes, mime_type=mime_type)

try:
return types.Part.from_uri(file_uri=uri, mime_type=content.media_type)
except ValueError:
# from_uri raises when no media_type is given and one cannot be inferred from the URI
# (e.g. presigned URLs or API endpoints without an extension). Pass the URI through
# without a mime type rather than dropping the content or raising.
logger.warning("Could not determine media_type for URI content; sending to Gemini without one: %s", uri)
return types.Part(file_data=types.FileData(file_uri=uri, mime_type=None))

def _convert_function_result(
self,
content: Content,
Expand Down
122 changes: 122 additions & 0 deletions python/packages/gemini/tests/test_gemini_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -711,6 +711,128 @@ def test_function_call_part_preserves_thought_signature_from_raw_part() -> None:
assert parts[0].function_call.args == {"location": "Paris"}


# multimodal (data/uri) parts


def test_data_content_converted_to_inline_data_part() -> None:
"""Content.from_data is converted to a Gemini inline_data Part so images reach the model."""
import base64

client, _ = _make_gemini_client()
png = base64.b64decode(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
)
content = Content.from_data(data=png, media_type="image/png")
assert content.type == "data"

parts = client._convert_message_contents([content], {})

assert len(parts) == 1
assert parts[0].inline_data is not None
assert parts[0].inline_data.mime_type == "image/png"
assert parts[0].inline_data.data == png


def test_data_uri_content_converted_to_inline_data_part() -> None:
"""A data URI created via Content.from_uri becomes an inline_data Part with decoded bytes."""
import base64

client, _ = _make_gemini_client()
png = base64.b64decode(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
)
content = Content.from_uri(uri=f"data:image/png;base64,{base64.b64encode(png).decode()}")
assert content.type == "data"

parts = client._convert_message_contents([content], {})

assert len(parts) == 1
assert parts[0].inline_data is not None
assert parts[0].inline_data.mime_type == "image/png"
assert parts[0].inline_data.data == png


def test_external_uri_content_converted_to_file_data_part() -> None:
"""Content.from_uri with an external URL becomes a Gemini file_data Part."""
client, _ = _make_gemini_client()
content = Content.from_uri(uri="https://example.com/image.png", media_type="image/png")
assert content.type == "uri"

parts = client._convert_message_contents([content], {})

assert len(parts) == 1
assert parts[0].file_data is not None
assert parts[0].file_data.file_uri == "https://example.com/image.png"
assert parts[0].file_data.mime_type == "image/png"


def test_text_and_image_content_both_reach_the_model() -> None:
"""A multimodal message keeps both the text and the image parts."""
import base64

client, _ = _make_gemini_client()
png = base64.b64decode(
"iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNk+M9QDwADhgGAWjR9awAAAABJRU5ErkJggg=="
)
text = Content.from_text("What is in this image?")
image = Content.from_data(data=png, media_type="image/png")

parts = client._convert_message_contents([text, image], {})

assert len(parts) == 2
assert parts[0].text == "What is in this image?"
assert any(p.inline_data is not None for p in parts)


def test_non_base64_data_uri_is_skipped(caplog: pytest.LogCaptureFixture) -> None:
"""A data URI that is not base64-encoded is skipped with a warning rather than crashing."""
client, _ = _make_gemini_client()
content = Content.from_text("placeholder")
content.type = "data" # type: ignore[assignment]
content.uri = "data:text/plain,hello"

with caplog.at_level(logging.WARNING):
parts = client._convert_message_contents([content], {})

assert parts == []
assert any("base64" in r.message for r in caplog.records)


def test_data_uri_media_type_parameters_are_stripped() -> None:
"""Parameters in a data URI media type (e.g. charset) are dropped before reaching Gemini."""
import base64

client, _ = _make_gemini_client()
encoded = base64.b64encode(b"hello").decode()
content = Content.from_text("placeholder")
content.type = "data" # type: ignore[assignment]
content.uri = f"data:text/plain;charset=utf-8;base64,{encoded}"
content.media_type = None

parts = client._convert_message_contents([content], {})

assert len(parts) == 1
assert parts[0].inline_data is not None
assert parts[0].inline_data.mime_type == "text/plain"


def test_external_uri_without_inferable_media_type_is_passed_through(caplog: pytest.LogCaptureFixture) -> None:
"""A URI with no media_type and no guessable extension is sent as file_data without crashing."""
client, _ = _make_gemini_client()
content = Content.from_uri(uri="https://api.example.com/files/123")
assert content.type == "uri"
assert content.media_type is None

with caplog.at_level(logging.WARNING):
parts = client._convert_message_contents([content], {})

assert len(parts) == 1
assert parts[0].file_data is not None
assert parts[0].file_data.file_uri == "https://api.example.com/files/123"
assert parts[0].file_data.mime_type is None
assert any("media_type" in r.message for r in caplog.records)


# code execution parts


Expand Down
Loading