diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index 06c9a23604..afa34f6831 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -1,4 +1,5 @@ import json +from copy import deepcopy from collections import deque from typing import TYPE_CHECKING from sys import getsizeof @@ -12,6 +13,8 @@ from sentry_sdk.utils import logger MAX_GEN_AI_MESSAGE_BYTES = 20_000 # 20KB +# Maximum characters when only a single message is left after bytes truncation +MAX_SINGLE_MESSAGE_CONTENT_CHARS = 10_000 class GEN_AI_ALLOWED_MESSAGE_ROLES: @@ -101,6 +104,23 @@ def get_start_span_function(): return sentry_sdk.start_span if transaction_exists else sentry_sdk.start_transaction +def _truncate_single_message_content_if_present(message, max_chars): + # type: (Dict[str, Any], int) -> Dict[str, Any] + """ + Truncate a message's content to at most `max_chars` characters and append an + ellipsis if truncation occurs. + """ + if not isinstance(message, dict) or "content" not in message: + return message + content = message["content"] + + if not isinstance(content, str) or len(content) <= max_chars: + return message + + message["content"] = content[:max_chars] + "..." + return message + + def _find_truncation_index(messages, max_bytes): # type: (List[Dict[str, Any]], int) -> int """ @@ -118,8 +138,22 @@ def _find_truncation_index(messages, max_bytes): return 0 -def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): - # type: (List[Dict[str, Any]], int) -> Tuple[List[Dict[str, Any]], int] +def truncate_messages_by_size( + messages, + max_bytes=MAX_GEN_AI_MESSAGE_BYTES, + max_single_message_chars=MAX_SINGLE_MESSAGE_CONTENT_CHARS, +): + # type: (List[Dict[str, Any]], int, int) -> Tuple[List[Dict[str, Any]], int] + """ + Returns a truncated messages list, consisting of + - the last message, with its content truncated to `max_single_message_chars` characters, + if the last message's size exceeds `max_bytes` bytes; otherwise, + - the maximum number of messages, starting from the end of the `messages` list, whose total + serialized size does not exceed `max_bytes` bytes. + + In the single message case, the serialized message size may exceed `max_bytes`, because + truncation is based only on character count in that case. + """ serialized_json = json.dumps(messages, separators=(",", ":")) current_size = len(serialized_json.encode("utf-8")) @@ -127,7 +161,18 @@ def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES): return messages, 0 truncation_index = _find_truncation_index(messages, max_bytes) - return messages[truncation_index:], truncation_index + if truncation_index < len(messages): + truncated_messages = messages[truncation_index:] + else: + truncation_index = len(messages) - 1 + truncated_messages = messages[-1:] + + if len(truncated_messages) == 1: + truncated_messages[0] = _truncate_single_message_content_if_present( + deepcopy(truncated_messages[0]), max_chars=max_single_message_chars + ) + + return truncated_messages, truncation_index def truncate_and_annotate_messages( diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py index 5ff136f810..8d3d4ba204 100644 --- a/tests/test_ai_monitoring.py +++ b/tests/test_ai_monitoring.py @@ -8,6 +8,7 @@ from sentry_sdk.ai.monitoring import ai_track from sentry_sdk.ai.utils import ( MAX_GEN_AI_MESSAGE_BYTES, + MAX_SINGLE_MESSAGE_CONTENT_CHARS, set_data_normalized, truncate_and_annotate_messages, truncate_messages_by_size, @@ -226,8 +227,7 @@ def test_truncation_removes_oldest_first(self, large_messages): ) assert len(result) < len(large_messages) - if result: - assert result[-1] == large_messages[-1] + assert result[-1] == large_messages[-1] assert truncation_index == len(large_messages) - len(result) def test_empty_messages_list(self): @@ -278,6 +278,33 @@ def test_progressive_truncation(self, large_messages): assert current_count >= 1 prev_count = current_count + def test_single_message_truncation(self): + large_content = "This is a very long message. " * 10_000 + + messages = [ + {"role": "system", "content": "You are a helpful assistant."}, + {"role": "user", "content": large_content}, + ] + + result, truncation_index = truncate_messages_by_size( + messages, max_single_message_chars=MAX_SINGLE_MESSAGE_CONTENT_CHARS + ) + + assert len(result) == 1 + assert ( + len(result[0]["content"].rstrip("...")) <= MAX_SINGLE_MESSAGE_CONTENT_CHARS + ) + + # If the last message is too large, the system message is not present + system_msgs = [m for m in result if m.get("role") == "system"] + assert len(system_msgs) == 0 + + # Confirm the user message is truncated with '...' + user_msgs = [m for m in result if m.get("role") == "user"] + assert len(user_msgs) == 1 + assert user_msgs[0]["content"].endswith("...") + assert len(user_msgs[0]["content"]) < len(large_content) + class TestTruncateAndAnnotateMessages: def test_no_truncation_returns_list(self, sample_messages):