-
Notifications
You must be signed in to change notification settings - Fork 590
DO NOT MERGE - test(pydantic-ai): test iterate-pr flow #5640
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1,9 @@ | ||
| import re | ||
|
|
||
| SPAN_ORIGIN = "auto.ai.pydantic_ai" | ||
|
|
||
| # Matches data URLs with base64-encoded content, e.g. "data:image/png;base64,iVBORw0K..." | ||
| # Group 1: MIME type (e.g. "image/png"), Group 2: base64 data | ||
| DATA_URL_BASE64_REGEX = re.compile( | ||
| r"^data:([a-zA-Z]+/[a-zA-Z]+);base64,([A-Za-z0-9+/\-_]+={0,2})$" | ||
| ) | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -9,15 +9,14 @@ | |
|
|
||
| if TYPE_CHECKING: | ||
| from typing import Any, Optional | ||
| from pydantic_ai._tool_manager import ToolDefinition # type: ignore | ||
|
|
||
|
|
||
| def execute_tool_span( | ||
| tool_name: str, | ||
| tool_args: "Any", | ||
| agent: "Any", | ||
| tool_type: str = "function", | ||
| tool_definition: "Optional[ToolDefinition]" = None, | ||
| tool_description: "Optional[str]" = None, | ||
| ) -> "sentry_sdk.tracing.Span": | ||
|
Comment on lines
14
to
20
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Removal of GEN_AI_TOOL_DESCRIPTION will break existing test The code removes the logic that sets VerificationVerified by reading the test file at lines 2799-2836 which shows Identified by Warden |
||
| """Create a span for tool execution. | ||
|
|
||
|
|
@@ -26,7 +25,7 @@ def execute_tool_span( | |
| tool_args: The arguments passed to the tool | ||
| agent: The agent executing the tool | ||
| tool_type: The type of tool ("function" for regular tools, "mcp" for MCP services) | ||
| tool_definition: The definition of the tool, if available | ||
| tool_description: Optional description of the tool | ||
| """ | ||
| span = sentry_sdk.start_span( | ||
| op=OP.GEN_AI_EXECUTE_TOOL, | ||
|
|
@@ -38,11 +37,8 @@ def execute_tool_span( | |
| span.set_data(SPANDATA.GEN_AI_TOOL_TYPE, tool_type) | ||
| span.set_data(SPANDATA.GEN_AI_TOOL_NAME, tool_name) | ||
|
|
||
| if tool_definition is not None and hasattr(tool_definition, "description"): | ||
| span.set_data( | ||
| SPANDATA.GEN_AI_TOOL_DESCRIPTION, | ||
| tool_definition.description, | ||
| ) | ||
| if tool_description is not None: | ||
| span.set_data(SPANDATA.GEN_AI_TOOL_DESCRIPTION, tool_description) | ||
|
|
||
| _set_agent_data(span, agent) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,56 @@ | ||
| """Utility functions for PydanticAI span instrumentation.""" | ||
|
|
||
| import sentry_sdk | ||
| from sentry_sdk._types import BLOB_DATA_SUBSTITUTE | ||
| from sentry_sdk.ai.utils import get_modality_from_mime_type | ||
| from sentry_sdk.consts import SPANDATA | ||
|
|
||
| from ..consts import DATA_URL_BASE64_REGEX | ||
|
|
||
| from typing import TYPE_CHECKING | ||
|
|
||
| if TYPE_CHECKING: | ||
| from typing import Union, Dict, Any, List | ||
| from typing import Union, Dict, Any, List, Optional | ||
| from pydantic_ai.usage import RequestUsage, RunUsage # type: ignore | ||
|
|
||
| try: | ||
| from pydantic_ai.messages import BinaryContent, ImageUrl # type: ignore | ||
| except ImportError: | ||
| BinaryContent = None | ||
| ImageUrl = None | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Unused runtime imports of BinaryContent and ImageUrlLow Severity
|
||
|
|
||
|
|
||
| def _serialize_image_url_item(item: "Any") -> "Dict[str, Any]": | ||
| """Serialize an ImageUrl content item for span data. | ||
|
|
||
| For data URLs containing base64-encoded images, the content is redacted. | ||
| For regular HTTP URLs, the URL string is preserved. | ||
| """ | ||
| data_url_matches = DATA_URL_BASE64_REGEX.match(item.url) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: The function Suggested FixConvert Prompt for AI AgentDid we get this right? 👍 / 👎 to inform future reviews. |
||
|
|
||
| if data_url_matches: | ||
| mime_type = data_url_matches[1] or "image" | ||
| return { | ||
| "type": "image", | ||
| "mime_type": mime_type, | ||
| "content": BLOB_DATA_SUBSTITUTE, | ||
| } | ||
|
|
||
| return { | ||
| "type": "image", | ||
| "content": str(item.url), | ||
|
Comment on lines
+29
to
+41
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Regex fails to match valid base64 data URLs with complex MIME types The VerificationVerified by reading consts.py (line 7-9) which defines the regex as Also found at 2 additional locations
Identified by Warden |
||
| } | ||
|
|
||
|
|
||
| def _serialize_binary_content_item(item: "Any") -> "Dict[str, Any]": | ||
| """Serialize a BinaryContent item for span data, redacting the blob data.""" | ||
| return { | ||
| "type": "blob", | ||
| "modality": get_modality_from_mime_type(item.media_type), | ||
| "mime_type": item.media_type, | ||
| "content": BLOB_DATA_SUBSTITUTE, | ||
| } | ||
|
|
||
|
|
||
| def _set_usage_data( | ||
| span: "sentry_sdk.tracing.Span", usage: "Union[RequestUsage, RunUsage]" | ||
|
|
||


There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Regex fails to match common MIME types, leaking data
High Severity
DATA_URL_BASE64_REGEXuses[a-zA-Z]+/[a-zA-Z]+for the MIME type, which only matches pure alphabetic characters. Valid MIME types likevideo/mp4,audio/mp3,image/svg+xml,image/x-icon, andfont/woff2contain digits, hyphens, or plus signs and won't match. When a valid base64 data URL fails to match,_serialize_image_url_itemfalls through to the non-redacted path and exposes the full base64-encoded content in span data. An existingparse_data_uriutility insentry_sdk/ai/utils.pyalready handles data URI parsing robustly per RFC 2397 and is used elsewhere in the codebase.Additional Locations (1)
sentry_sdk/integrations/pydantic_ai/spans/utils.py#L28-L42