Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions sentry_sdk/integrations/pydantic_ai/consts.py
Original file line number Diff line number Diff line change
@@ -1 +1,8 @@
import re

SPAN_ORIGIN = "auto.ai.pydantic_ai"

# Matches data URLs with base64-encoded content, e.g. "data:image/png;base64,iVBORw0K..."
DATA_URL_BASE64_REGEX = re.compile(
r"^data:(?:[a-zA-Z0-9][a-zA-Z0-9.+\-]*/[a-zA-Z0-9][a-zA-Z0-9.+\-]*)(?:;[a-zA-Z0-9\-]+=[^;,]*)*;base64,(?:[A-Za-z0-9+/\-_]+={0,2})$"
)
24 changes: 10 additions & 14 deletions sentry_sdk/integrations/pydantic_ai/spans/ai_client.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import json

import sentry_sdk
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
from sentry_sdk.ai.utils import (
normalize_message_roles,
set_data_normalized,
truncate_and_annotate_messages,
get_modality_from_mime_type,
)
from sentry_sdk.consts import OP, SPANDATA
from sentry_sdk.utils import safe_serialize
Expand All @@ -21,7 +19,11 @@
get_current_agent,
get_is_streaming,
)
from .utils import _set_usage_data
from .utils import (
_serialize_binary_content_item,
_serialize_image_url_item,
_set_usage_data,
)

from typing import TYPE_CHECKING

Expand All @@ -40,6 +42,7 @@
TextPart,
ThinkingPart,
BinaryContent,
ImageUrl,
)
except ImportError:
# Fallback if these classes are not available
Expand All @@ -50,6 +53,7 @@
TextPart = None
ThinkingPart = None
BinaryContent = None
ImageUrl = None


def _transform_system_instructions(
Expand Down Expand Up @@ -158,22 +162,14 @@ def _set_input_messages(span: "sentry_sdk.tracing.Span", messages: "Any") -> Non
for item in part.content:
if isinstance(item, str):
content.append({"type": "text", "text": item})
elif ImageUrl and isinstance(item, ImageUrl):
content.append(_serialize_image_url_item(item))
elif BinaryContent and isinstance(item, BinaryContent):
content.append(
{
"type": "blob",
"modality": get_modality_from_mime_type(
item.media_type
),
"mime_type": item.media_type,
"content": BLOB_DATA_SUBSTITUTE,
}
)
content.append(_serialize_binary_content_item(item))
else:
content.append(safe_serialize(item))
else:
content.append({"type": "text", "text": str(part.content)})

# Add message if we have content or tool calls
if content or tool_calls:
message: "Dict[str, Any]" = {"role": role}
Expand Down
24 changes: 10 additions & 14 deletions sentry_sdk/integrations/pydantic_ai/spans/invoke_agent.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import sentry_sdk
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
from sentry_sdk.ai.utils import (
get_modality_from_mime_type,
get_start_span_function,
normalize_message_roles,
set_data_normalized,
Expand All @@ -16,17 +14,22 @@
_set_model_data,
_should_send_prompts,
)
from .utils import _set_usage_data
from .utils import (
_serialize_binary_content_item,
_serialize_image_url_item,
_set_usage_data,
)

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Any

try:
from pydantic_ai.messages import BinaryContent # type: ignore
from pydantic_ai.messages import BinaryContent, ImageUrl # type: ignore
except ImportError:
BinaryContent = None
ImageUrl = None


def invoke_agent_span(
Expand Down Expand Up @@ -105,17 +108,10 @@ def invoke_agent_span(
for item in user_prompt:
if isinstance(item, str):
content.append({"text": item, "type": "text"})
elif ImageUrl and isinstance(item, ImageUrl):
content.append(_serialize_image_url_item(item))
elif BinaryContent and isinstance(item, BinaryContent):
content.append(
{
"type": "blob",
"modality": get_modality_from_mime_type(
item.media_type
),
"mime_type": item.media_type,
"content": BLOB_DATA_SUBSTITUTE,
}
)
content.append(_serialize_binary_content_item(item))
if content:
messages.append(
{
Expand Down
37 changes: 36 additions & 1 deletion sentry_sdk/integrations/pydantic_ai/spans/utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,50 @@
"""Utility functions for PydanticAI span instrumentation."""

import sentry_sdk
from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
from sentry_sdk.ai.utils import get_modality_from_mime_type
from sentry_sdk.consts import SPANDATA

from ..consts import DATA_URL_BASE64_REGEX

from typing import TYPE_CHECKING

if TYPE_CHECKING:
from typing import Union, Dict, Any, List
from typing import Union, Dict, Any
from pydantic_ai.usage import RequestUsage, RunUsage # type: ignore


def _serialize_image_url_item(item: "Any") -> "Dict[str, Any]":
"""Serialize an ImageUrl content item for span data.

For data URLs containing base64-encoded images, the content is redacted.
For regular HTTP URLs, the URL string is preserved.
"""
url = str(item.url)
data_url_match = DATA_URL_BASE64_REGEX.match(url)

if data_url_match:
return {
"type": "image",
"content": BLOB_DATA_SUBSTITUTE,
}

return {
"type": "image",
"content": url,
}


def _serialize_binary_content_item(item: "Any") -> "Dict[str, Any]":
"""Serialize a BinaryContent item for span data, redacting the blob data."""
return {
"type": "blob",
"modality": get_modality_from_mime_type(item.media_type),
"mime_type": item.media_type,
"content": BLOB_DATA_SUBSTITUTE,
}


def _set_usage_data(
span: "sentry_sdk.tracing.Span", usage: "Union[RequestUsage, RunUsage]"
) -> None:
Expand Down
147 changes: 145 additions & 2 deletions tests/integrations/pydantic_ai/test_pydantic_ai.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,8 @@
from sentry_sdk.integrations.pydantic_ai import PydanticAIIntegration
from sentry_sdk.integrations.pydantic_ai.spans.ai_client import _set_input_messages
from sentry_sdk.integrations.pydantic_ai.spans.utils import _set_usage_data

from pydantic_ai import Agent
from pydantic_ai.messages import BinaryContent, UserPromptPart
from pydantic_ai.messages import BinaryContent, ImageUrl, UserPromptPart
from pydantic_ai.usage import RequestUsage
from pydantic_ai.exceptions import ModelRetry, UnexpectedModelBehavior

Expand Down Expand Up @@ -2797,6 +2796,150 @@ async def test_set_usage_data_with_cache_tokens(sentry_init, capture_events):
assert span_data["data"][SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHE_WRITE] == 20


@pytest.mark.parametrize(
"url,image_url_kwargs,expected_content",
[
pytest.param(
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs",
{},
BLOB_DATA_SUBSTITUTE,
id="base64_data_url",
),
pytest.param(
"https://example.com/image.png",
{},
"https://example.com/image.png",
id="http_url_no_redaction",
),
pytest.param(
"https://example.com/api?data=iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs",
{"media_type": "image/png"},
"https://example.com/api?data=iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs",
id="http_url_with_base64_query_param",
),
pytest.param(
"data:image/svg+xml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciLz4=",
{},
BLOB_DATA_SUBSTITUTE,
id="complex_mime_type",
),
pytest.param(
"data:image/png;name=file.png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs",
{},
BLOB_DATA_SUBSTITUTE,
id="optional_parameters",
),
pytest.param(
"data:text/plain;charset=utf-8;name=hello.txt;base64,SGVsbG8sIFdvcmxkIQ==",
{},
BLOB_DATA_SUBSTITUTE,
id="multiple_optional_parameters",
),
],
)
def test_image_url_base64_content_in_span(
sentry_init, capture_events, url, image_url_kwargs, expected_content
):
from sentry_sdk.integrations.pydantic_ai.spans.ai_client import ai_client_span

sentry_init(
integrations=[PydanticAIIntegration()],
traces_sample_rate=1.0,
send_default_pii=True,
)

events = capture_events()

with sentry_sdk.start_transaction(op="test", name="test"):
image_url = ImageUrl(url=url, **image_url_kwargs)
user_part = UserPromptPart(content=["Look at this image:", image_url])
mock_msg = MagicMock()
mock_msg.parts = [user_part]
mock_msg.instructions = None

span = ai_client_span([mock_msg], None, None, None)
span.finish()

(event,) = events
chat_spans = [s for s in event["spans"] if s["op"] == "gen_ai.chat"]
assert len(chat_spans) >= 1
messages_data = _get_messages_from_span(chat_spans[0]["data"])

found_image = False
for msg in messages_data:
if "content" not in msg:
continue
for content_item in msg["content"]:
if content_item.get("type") == "image":
found_image = True
assert content_item["content"] == expected_content

assert found_image, "Image content item should be found in messages data"


@pytest.mark.asyncio
@pytest.mark.parametrize(
"url, image_url_kwargs, expected_content",
[
pytest.param(
"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs",
{},
BLOB_DATA_SUBSTITUTE,
id="base64_data_url_redacted",
),
pytest.param(
"https://example.com/image.png",
{},
"https://example.com/image.png",
id="http_url_no_redaction",
),
pytest.param(
"https://example.com/api?data=iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs",
{},
"https://example.com/api?data=iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs",
id="http_url_with_base64_query_param",
),
pytest.param(
"https://example.com/api?data=iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs",
{"media_type": "image/png"},
"https://example.com/api?data=iVBORw0KGgoAAAANSUhEUgAAAAoAAAAKCAYAAACNMs",
id="http_url_with_base64_query_param_and_media_type",
),
],
)
async def test_invoke_agent_image_url(
sentry_init, capture_events, url, image_url_kwargs, expected_content
):
sentry_init(
integrations=[PydanticAIIntegration()],
traces_sample_rate=1.0,
send_default_pii=True,
)

agent = Agent("test", name="test_image_url_agent")

events = capture_events()
image_url = ImageUrl(url=url, **image_url_kwargs)
await agent.run([image_url, "Describe this image"])

(transaction,) = events

found_image = False

chat_spans = [s for s in transaction["spans"] if s["op"] == "gen_ai.chat"]
for chat_span in chat_spans:
messages_data = _get_messages_from_span(chat_span["data"])
for msg in messages_data:
if "content" not in msg:
continue
for content_item in msg["content"]:
if content_item.get("type") == "image":
assert content_item["content"] == expected_content
found_image = True

assert found_image, "Image content item should be found in messages data"


@pytest.mark.asyncio
async def test_tool_description_in_execute_tool_span(sentry_init, capture_events):
"""
Expand Down
Loading