diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index acdf7177b4..7f75ede0d9 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -499,7 +499,7 @@ class SPANDATA: GEN_AI_RESPONSE_FINISH_REASONS = "gen_ai.response.finish_reasons" """ The reason why the model stopped generating. - Example: "COMPLETE" + Example: ["COMPLETE"] """ GEN_AI_RESPONSE_ID = "gen_ai.response.id" diff --git a/sentry_sdk/integrations/anthropic.py b/sentry_sdk/integrations/anthropic.py index 0aa812cab3..a5f1e3cfff 100644 --- a/sentry_sdk/integrations/anthropic.py +++ b/sentry_sdk/integrations/anthropic.py @@ -55,6 +55,7 @@ class _RecordedUsage: input_tokens: int = 0 cache_write_input_tokens: "Optional[int]" = 0 cache_read_input_tokens: "Optional[int]" = 0 + finish_reason: "Optional[str]" = None class AnthropicIntegration(Integration): @@ -186,6 +187,10 @@ def _collect_ai_data( usage.cache_read_input_tokens = cache_read_input_tokens # TODO: Record event.usage.server_tool_use + stop_reason = getattr(event.delta, "stop_reason", None) + if stop_reason is not None: + usage.finish_reason = stop_reason + return ( model, usage, @@ -348,10 +353,13 @@ def _set_output_data( cache_write_input_tokens: "int | None", content_blocks: "list[Any]", finish_span: bool = False, + finish_reason: "str | None" = None, ) -> None: """ Set output data for the span based on the AI response.""" span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model) + if finish_reason is not None: + span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, [finish_reason]) if should_send_default_pii() and integration.include_prompts: output_messages: "dict[str, list[Any]]" = { "response": [], @@ -443,6 +451,7 @@ def _sentry_patched_create_common(f: "Any", *args: "Any", **kwargs: "Any") -> "A cache_write_input_tokens=cache_write_input_tokens, content_blocks=content_blocks, finish_span=True, + finish_reason=getattr(result, "stop_reason", None), ) # Streaming response @@ -485,6 +494,7 @@ def new_iterator() -> "Iterator[MessageStreamEvent]": cache_write_input_tokens=usage.cache_write_input_tokens, content_blocks=[{"text": "".join(content_blocks), "type": "text"}], finish_span=True, + finish_reason=usage.finish_reason, ) async def new_iterator_async() -> "AsyncIterator[MessageStreamEvent]": @@ -523,6 +533,7 @@ async def new_iterator_async() -> "AsyncIterator[MessageStreamEvent]": cache_write_input_tokens=usage.cache_write_input_tokens, content_blocks=[{"text": "".join(content_blocks), "type": "text"}], finish_span=True, + finish_reason=usage.finish_reason, ) if str(type(result._iterator)) == "": diff --git a/tests/integrations/anthropic/test_anthropic.py b/tests/integrations/anthropic/test_anthropic.py index ea48f5d4db..6048f6a5ed 100644 --- a/tests/integrations/anthropic/test_anthropic.py +++ b/tests/integrations/anthropic/test_anthropic.py @@ -64,6 +64,7 @@ async def __call__(self, *args, **kwargs): content=[TextBlock(type="text", text="Hi, I'm Claude.")], type="message", usage=Usage(input_tokens=10, output_tokens=20), + stop_reason="end_turn", ) @@ -134,6 +135,7 @@ def test_nonstreaming_create_message( assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.asyncio @@ -204,6 +206,122 @@ async def test_nonstreaming_create_message_async( assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 20 assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 30 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is False + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] + + +def test_streaming_create_message_with_finish_reason(sentry_init, capture_events): + client = Anthropic(api_key="z") + returned_stream = Stream(cast_to=None, response=None, client=client) + returned_stream._iterator = [ + MessageStartEvent( + message=EXAMPLE_MESSAGE, + type="message_start", + ), + ContentBlockStartEvent( + type="content_block_start", + index=0, + content_block=TextBlock(type="text", text=""), + ), + ContentBlockDeltaEvent( + delta=TextDelta(text="Hi!", type="text_delta"), + index=0, + type="content_block_delta", + ), + ContentBlockStopEvent(type="content_block_stop", index=0), + MessageDeltaEvent( + delta=Delta(stop_reason="end_turn"), + usage=MessageDeltaUsage(output_tokens=10), + type="message_delta", + ), + ] + + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + client.messages._post = mock.Mock(return_value=returned_stream) + + messages = [ + { + "role": "user", + "content": "Hello, Claude", + } + ] + + with start_transaction(name="anthropic"): + message = client.messages.create( + max_tokens=1024, messages=messages, model="model", stream=True + ) + for _ in message: + pass + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] + + +@pytest.mark.asyncio +async def test_streaming_create_message_with_finish_reason_async( + sentry_init, capture_events, async_iterator +): + client = AsyncAnthropic(api_key="z") + returned_stream = AsyncStream(cast_to=None, response=None, client=client) + returned_stream._iterator = async_iterator( + [ + MessageStartEvent( + message=EXAMPLE_MESSAGE, + type="message_start", + ), + ContentBlockStartEvent( + type="content_block_start", + index=0, + content_block=TextBlock(type="text", text=""), + ), + ContentBlockDeltaEvent( + delta=TextDelta(text="Hi!", type="text_delta"), + index=0, + type="content_block_delta", + ), + ContentBlockStopEvent(type="content_block_stop", index=0), + MessageDeltaEvent( + delta=Delta(stop_reason="end_turn"), + usage=MessageDeltaUsage(output_tokens=10), + type="message_delta", + ), + ] + ) + + sentry_init( + integrations=[AnthropicIntegration(include_prompts=True)], + traces_sample_rate=1.0, + send_default_pii=True, + ) + events = capture_events() + client.messages._post = AsyncMock(return_value=returned_stream) + + messages = [ + { + "role": "user", + "content": "Hello, Claude", + } + ] + + with start_transaction(name="anthropic"): + message = await client.messages.create( + max_tokens=1024, messages=messages, model="model", stream=True + ) + async for _ in message: + pass + + assert len(events) == 1 + (event,) = events + (span,) = event["spans"] + + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["end_turn"] @pytest.mark.parametrize( @@ -545,6 +663,7 @@ def test_streaming_create_message_with_input_json_delta( assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["tool_use"] @pytest.mark.asyncio @@ -687,6 +806,7 @@ async def test_streaming_create_message_with_input_json_delta_async( assert span["data"][SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS] == 41 assert span["data"][SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS] == 407 assert span["data"][SPANDATA.GEN_AI_RESPONSE_STREAMING] is True + assert span["data"][SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS] == ["tool_use"] def test_exception_message_create(sentry_init, capture_events):