diff --git a/sentry_sdk/integrations/pydantic_ai/consts.py b/sentry_sdk/integrations/pydantic_ai/consts.py index afa66dc47d..61e9b9bacf 100644 --- a/sentry_sdk/integrations/pydantic_ai/consts.py +++ b/sentry_sdk/integrations/pydantic_ai/consts.py @@ -1 +1,8 @@ +import re + SPAN_ORIGIN = "auto.ai.pydantic_ai" + +# Matches data URLs with base64-encoded content, e.g. "data:image/png;base64,iVBORw0K..." +DATA_URL_BASE64_REGEX = re.compile( + r"^data:(?:[a-zA-Z0-9][a-zA-Z0-9.+\-]*/[a-zA-Z0-9][a-zA-Z0-9.+\-]*)(?:;[a-zA-Z0-9\-]+=[^;,]*)*;base64,(?:[A-Za-z0-9+/\-_]+={0,2})$" +) diff --git a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py index b5ce15e99e..8e0b6b9f35 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/ai_client.py @@ -1,12 +1,10 @@ import json import sentry_sdk -from sentry_sdk._types import BLOB_DATA_SUBSTITUTE from sentry_sdk.ai.utils import ( normalize_message_roles, set_data_normalized, truncate_and_annotate_messages, - get_modality_from_mime_type, ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.utils import safe_serialize @@ -21,7 +19,11 @@ get_current_agent, get_is_streaming, ) -from .utils import _set_usage_data +from .utils import ( + _serialize_binary_content_item, + _serialize_image_url_item, + _set_usage_data, +) from typing import TYPE_CHECKING @@ -40,6 +42,7 @@ TextPart, ThinkingPart, BinaryContent, + ImageUrl, ) except ImportError: # Fallback if these classes are not available @@ -50,6 +53,7 @@ TextPart = None ThinkingPart = None BinaryContent = None + ImageUrl = None def _transform_system_instructions( @@ -158,22 +162,14 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non for item in part.content: if isinstance(item, str): content.append({"type": "text", "text": item}) + elif ImageUrl and isinstance(item, ImageUrl): + content.append(_serialize_image_url_item(item)) elif BinaryContent and isinstance(item, BinaryContent): - content.append( - { - "type": "blob", - "modality": get_modality_from_mime_type( - item.media_type - ), - "mime_type": item.media_type, - "content": BLOB_DATA_SUBSTITUTE, - } - ) + content.append(_serialize_binary_content_item(item)) else: content.append(safe_serialize(item)) else: content.append({"type": "text", "text": str(part.content)}) - # Add message if we have content or tool calls if content or tool_calls: message: "Dict[str, Any]" = {"role": role} diff --git a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py index b4f8307170..ee08ca7036 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py @@ -1,7 +1,5 @@ import sentry_sdk -from sentry_sdk._types import BLOB_DATA_SUBSTITUTE from sentry_sdk.ai.utils import ( - get_modality_from_mime_type, get_start_span_function, normalize_message_roles, set_data_normalized, @@ -16,7 +14,11 @@ _set_model_data, _should_send_prompts, ) -from .utils import _set_usage_data +from .utils import ( + _serialize_binary_content_item, + _serialize_image_url_item, + _set_usage_data, +) from typing import TYPE_CHECKING @@ -24,9 +26,10 @@ from typing import Any try: - from pydantic_ai.messages import BinaryContent # type: ignore + from pydantic_ai.messages import BinaryContent, ImageUrl # type: ignore except ImportError: BinaryContent = None + ImageUrl = None def invoke_agent_span( @@ -105,17 +108,10 @@ def invoke_agent_span( for item in user_prompt: if isinstance(item, str): content.append({"text": item, "type": "text"}) + elif ImageUrl and isinstance(item, ImageUrl): + content.append(_serialize_image_url_item(item)) elif BinaryContent and isinstance(item, BinaryContent): - content.append( - { - "type": "blob", - "modality": get_modality_from_mime_type( - item.media_type - ), - "mime_type": item.media_type, - "content": BLOB_DATA_SUBSTITUTE, - } - ) + content.append(_serialize_binary_content_item(item)) if content: messages.append( { diff --git a/sentry_sdk/integrations/pydantic_ai/spans/utils.py b/sentry_sdk/integrations/pydantic_ai/spans/utils.py index 4a8ad4c68c..70e47dc034 100644 --- a/sentry_sdk/integrations/pydantic_ai/spans/utils.py +++ b/sentry_sdk/integrations/pydantic_ai/spans/utils.py @@ -1,15 +1,50 @@ """Utility functions for PydanticAI span instrumentation.""" import sentry_sdk +from sentry_sdk._types import BLOB_DATA_SUBSTITUTE +from sentry_sdk.ai.utils import get_modality_from_mime_type from sentry_sdk.consts import SPANDATA +from ..consts import DATA_URL_BASE64_REGEX + from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Union, Dict, Any, List + from typing import Union, Dict, Any from pydantic_ai.usage import RequestUsage, RunUsage # type: ignore +def _serialize_image_url_item(item: "Any") -> "Dict[str, Any]": + """Serialize an ImageUrl content item for span data. + + For data URLs containing base64-encoded images, the content is redacted. + For regular HTTP URLs, the URL string is preserved. + """ + url = str(item.url) + data_url_match = DATA_URL_BASE64_REGEX.match(url) + + if data_url_match: + return { + "type": "image", + "content": BLOB_DATA_SUBSTITUTE, + } + + return { + "type": "image", + "content": url, + } + + +def _serialize_binary_content_item(item: "Any") -> "Dict[str, Any]": + """Serialize a BinaryContent item for span data, redacting the blob data.""" + return { + "type": "blob", + "modality": get_modality_from_mime_type(item.media_type), + "mime_type": item.media_type, + "content": BLOB_DATA_SUBSTITUTE, + } + + def _set_usage_data( span: "sentry_sdk.tracing.Span", usage: "Union[RequestUsage, RunUsage]" ) -> None: diff --git a/tests/integrations/pydantic_ai/test_pydantic_ai.py b/tests/integrations/pydantic_ai/test_pydantic_ai.py index 992067b0b1..52f724a4dc 100644 --- a/tests/integrations/pydantic_ai/test_pydantic_ai.py +++ b/tests/integrations/pydantic_ai/test_pydantic_ai.py @@ -12,9 +12,8 @@ from sentry_sdk.integrations.pydantic_ai import PydanticAIIntegration from sentry_sdk.integrations.pydantic_ai.spans.ai_client import _set_input_messages from sentry_sdk.integrations.pydantic_ai.spans.utils import _set_usage_data - from pydantic_ai import Agent -from pydantic_ai.messages import BinaryContent, UserPromptPart +from pydantic_ai.messages import BinaryContent, ImageUrl, UserPromptPart from pydantic_ai.usage import RequestUsage from pydantic_ai.exceptions import ModelRetry, UnexpectedModelBehavior @@ -2797,6 +2796,150 @@ async def test_set_usage_data_with_cache_tokens(sentry_init, capture_events): assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20 +@pytest.mark.parametrize( + "url,image_url_kwargs,expected_content", + [ + pytest.param( + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs", + {}, + BLOB_DATA_SUBSTITUTE, + id="base64_data_url", + ), + pytest.param( + "https://example.com/image.png", + {}, + "https://example.com/image.png", + id="http_url_no_redaction", + ), + pytest.param( + "https://example.com/api?data=iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs", + {"media_type": "image/png"}, + "https://example.com/api?data=iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs", + id="http_url_with_base64_query_param", + ), + pytest.param( + "data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciLz4=", + {}, + BLOB_DATA_SUBSTITUTE, + id="complex_mime_type", + ), + pytest.param( + "data:image/png;name=file.png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs", + {}, + BLOB_DATA_SUBSTITUTE, + id="optional_parameters", + ), + pytest.param( + "data:text/plain;charset=utf-8;name=hello.txt;base64,SGVsbG8sIFdvcmxkIQ==", + {}, + BLOB_DATA_SUBSTITUTE, + id="multiple_optional_parameters", + ), + ], +) +def test_image_url_base64_content_in_span( + sentry_init, capture_events, url, image_url_kwargs, expected_content +): + from sentry_sdk.integrations.pydantic_ai.spans.ai_client import ai_client_span + + sentry_init( + integrations=[PydanticAIIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + with sentry_sdk.start_transaction(op="test", name="test"): + image_url = ImageUrl(url=url, **image_url_kwargs) + user_part = UserPromptPart(content=["Look at this image:", image_url]) + mock_msg = MagicMock() + mock_msg.parts = [user_part] + mock_msg.instructions = None + + span = ai_client_span([mock_msg], None, None, None) + span.finish() + + (event,) = events + chat_spans = [s for s in event["spans"] if s["op"] == "gen_ai.chat"] + assert len(chat_spans) >= 1 + messages_data = _get_messages_from_span(chat_spans[0]["data"]) + + found_image = False + for msg in messages_data: + if "content" not in msg: + continue + for content_item in msg["content"]: + if content_item.get("type") == "image": + found_image = True + assert content_item["content"] == expected_content + + assert found_image, "Image content item should be found in messages data" + + +@pytest.mark.asyncio +@pytest.mark.parametrize( + "url, image_url_kwargs, expected_content", + [ + pytest.param( + "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs", + {}, + BLOB_DATA_SUBSTITUTE, + id="base64_data_url_redacted", + ), + pytest.param( + "https://example.com/image.png", + {}, + "https://example.com/image.png", + id="http_url_no_redaction", + ), + pytest.param( + "https://example.com/api?data=iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs", + {}, + "https://example.com/api?data=iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs", + id="http_url_with_base64_query_param", + ), + pytest.param( + "https://example.com/api?data=iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs", + {"media_type": "image/png"}, + "https://example.com/api?data=iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs", + id="http_url_with_base64_query_param_and_media_type", + ), + ], +) +async def test_invoke_agent_image_url( + sentry_init, capture_events, url, image_url_kwargs, expected_content +): + sentry_init( + integrations=[PydanticAIIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + agent = Agent("test", name="test_image_url_agent") + + events = capture_events() + image_url = ImageUrl(url=url, **image_url_kwargs) + await agent.run([image_url, "Describe this image"]) + + (transaction,) = events + + found_image = False + + chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"] + for chat_span in chat_spans: + messages_data = _get_messages_from_span(chat_span["data"]) + for msg in messages_data: + if "content" not in msg: + continue + for content_item in msg["content"]: + if content_item.get("type") == "image": + assert content_item["content"] == expected_content + found_image = True + + assert found_image, "Image content item should be found in messages data" + + @pytest.mark.asyncio async def test_tool_description_in_execute_tool_span(sentry_init, capture_events): """