Support logprobs output from Responses API (#3535)

tim-becker · DouweM · web-flow · commit 5d6ab260f217 · 2025-11-26T22:57:43.000Z
Co-authored-by: Douwe Maan &lt;douwe@pydantic.dev&gt;
diff --git a/pydantic_ai_slim/pydantic_ai/models/openai.py b/pydantic_ai_slim/pydantic_ai/models/openai.py
@@ -65,6 +65,7 @@
         ChatCompletionContentPartTextParam,
         chat_completion,
         chat_completion_chunk,
+        chat_completion_token_logprob,
     )
     from openai.types.chat.chat_completion_content_part_image_param import ImageURL
     from openai.types.chat.chat_completion_content_part_input_audio_param import InputAudio
@@ -169,7 +170,11 @@ class OpenAIChatModelSettings(ModelSettings, total=False):
     """
 
     openai_logprobs: bool
-    """Include log probabilities in the response."""
+    """Include log probabilities in the response.
+
+    For Chat models, these will be included in `ModelResponse.provider_details['logprobs']`.
+    For Responses models, these will be included in the response output parts `TextPart.provider_details['logprobs']`.
+    """
 
     openai_top_logprobs: int
     """Include log probabilities of the top n tokens in the response."""
@@ -1157,7 +1162,10 @@ def _process_response(  # noqa: C901
             elif isinstance(item, responses.ResponseOutputMessage):
                 for content in item.content:
                     if isinstance(content, responses.ResponseOutputText):  # pragma: no branch
-                        items.append(TextPart(content.text, id=item.id))
+                        part_provider_details: dict[str, Any] | None = None
+                        if content.logprobs:
+                            part_provider_details = {'logprobs': _map_logprobs(content.logprobs)}
+                        items.append(TextPart(content.text, id=item.id, provider_details=part_provider_details))
             elif isinstance(item, responses.ResponseFunctionToolCall):
                 items.append(
                     ToolCallPart(
@@ -1264,7 +1272,7 @@ async def _responses_create(
         model_request_parameters: ModelRequestParameters,
     ) -> AsyncStream[responses.ResponseStreamEvent]: ...
 
-    async def _responses_create(
+    async def _responses_create(  # noqa: C901
         self,
         messages: list[ModelRequest | ModelResponse],
         stream: bool,
@@ -1323,6 +1331,8 @@ async def _responses_create(
             include.append('code_interpreter_call.outputs')
         if model_settings.get('openai_include_web_search_sources'):
             include.append('web_search_call.action.sources')
+        if model_settings.get('openai_logprobs'):
+            include.append('message.output_text.logprobs')
 
         # When there are no input messages and we're not reusing a previous response,
         # the OpenAI API will reject a request without any input,
@@ -1354,6 +1364,7 @@ async def _responses_create(
                 timeout=model_settings.get('timeout', NOT_GIVEN),
                 service_tier=model_settings.get('openai_service_tier', OMIT),
                 previous_response_id=previous_response_id or OMIT,
+                top_logprobs=model_settings.get('openai_top_logprobs', OMIT),
                 reasoning=reasoning,
                 user=model_settings.get('openai_user', OMIT),
                 text=text or OMIT,
@@ -2283,6 +2294,24 @@ def timestamp(self) -> datetime:
         return self._timestamp
 
 
+# Convert logprobs to a serializable format
+def _map_logprobs(
+    logprobs: list[chat_completion_token_logprob.ChatCompletionTokenLogprob]
+    | list[responses.response_output_text.Logprob],
+) -> list[dict[str, Any]]:
+    return [
+        {
+            'token': lp.token,
+            'bytes': lp.bytes,
+            'logprob': lp.logprob,
+            'top_logprobs': [
+                {'token': tlp.token, 'bytes': tlp.bytes, 'logprob': tlp.logprob} for tlp in lp.top_logprobs
+            ],
+        }
+        for lp in logprobs
+    ]
+
+
 def _map_usage(
     response: chat.ChatCompletion | ChatCompletionChunk | responses.Response,
     provider: str,
@@ -2331,19 +2360,7 @@ def _map_provider_details(
 
     # Add logprobs to vendor_details if available
     if choice.logprobs is not None and choice.logprobs.content:
-        # Convert logprobs to a serializable format
-        provider_details['logprobs'] = [
-            {
-                'token': lp.token,
-                'bytes': lp.bytes,
-                'logprob': lp.logprob,
-                'top_logprobs': [
-                    {'token': tlp.token, 'bytes': tlp.bytes, 'logprob': tlp.logprob} for tlp in lp.top_logprobs
-                ],
-            }
-            for lp in choice.logprobs.content
-        ]
-
+        provider_details['logprobs'] = _map_logprobs(choice.logprobs.content)
     if raw_finish_reason := choice.finish_reason:
         provider_details['finish_reason'] = raw_finish_reason
 
diff --git a/tests/models/cassettes/test_openai/test_openai_instructions_with_responses_logprobs.yaml b/tests/models/cassettes/test_openai/test_openai_instructions_with_responses_logprobs.yaml
@@ -0,0 +1,186 @@
+interactions:
+- request:
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '202'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+    method: POST
+    parsed_body:
+      include:
+      - message.output_text.logprobs
+      input:
+      - content: What is the capital of Minas Gerais?
+        role: user
+      instructions: You are a helpful assistant.
+      model: gpt-4o-mini
+      stream: false
+    uri: https://api.openai.com/v1/responses
+  response:
+    headers:
+      alt-svc:
+      - h3=":443"; ma=86400
+      connection:
+      - keep-alive
+      content-length:
+      - '3981'
+      content-type:
+      - application/json
+      openai-organization:
+      - pydantic-28gund
+      openai-processing-ms:
+      - '2151'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      transfer-encoding:
+      - chunked
+    parsed_body:
+      background: false
+      billing:
+        payer: developer
+      created_at: 1764012314
+      error: null
+      id: resp_03c6f7a0e7df74a9006924b11a6120819395892ac2d8143b03
+      incomplete_details: null
+      instructions: You are a helpful assistant.
+      max_output_tokens: null
+      max_tool_calls: null
+      metadata: {}
+      model: gpt-4o-mini-2024-07-18
+      object: response
+      output:
+      - content:
+        - annotations: []
+          logprobs:
+          - bytes:
+            - 84
+            - 104
+            - 101
+            logprob: -0.0
+            token: The
+            top_logprobs: []
+          - bytes:
+            - 32
+            - 99
+            - 97
+            - 112
+            - 105
+            - 116
+            - 97
+            - 108
+            logprob: 0.0
+            token: ' capital'
+            top_logprobs: []
+          - bytes:
+            - 32
+            - 111
+            - 102
+            logprob: 0.0
+            token: ' of'
+            top_logprobs: []
+          - bytes:
+            - 32
+            - 77
+            - 105
+            - 110
+            - 97
+            - 115
+            logprob: -0.0
+            token: ' Minas'
+            top_logprobs: []
+          - bytes:
+            - 32
+            - 71
+            - 101
+            - 114
+            - 97
+            - 105
+            - 115
+            logprob: -0.0
+            token: ' Gerais'
+            top_logprobs: []
+          - bytes:
+            - 32
+            - 105
+            - 115
+            logprob: -5.2e-05
+            token: ' is'
+            top_logprobs: []
+          - bytes:
+            - 32
+            - 66
+            - 101
+            - 108
+            - 111
+            logprob: -4.3e-05
+            token: ' Belo'
+            top_logprobs: []
+          - bytes:
+            - 32
+            - 72
+            - 111
+            - 114
+            - 105
+            - 122
+            - 111
+            - 110
+            - 116
+            - 101
+            logprob: -2.0e-06
+            token: ' Horizonte'
+            top_logprobs: []
+          - bytes:
+            - 46
+            logprob: -0.0
+            token: .
+            top_logprobs: []
+          text: The capital of Minas Gerais is Belo Horizonte.
+          type: output_text
+        id: msg_03c6f7a0e7df74a9006924b11adb348193adc4091df13b8d7c
+        role: assistant
+        status: completed
+        type: message
+      parallel_tool_calls: true
+      previous_response_id: null
+      prompt_cache_key: null
+      prompt_cache_retention: null
+      reasoning:
+        effort: null
+        summary: null
+      safety_identifier: null
+      service_tier: default
+      status: completed
+      store: true
+      temperature: 1.0
+      text:
+        format:
+          type: text
+        verbosity: medium
+      tool_choice: auto
+      tools: []
+      top_logprobs: 0
+      top_p: 1.0
+      truncation: disabled
+      usage:
+        input_tokens: 25
+        input_tokens_details:
+          cached_tokens: 0
+        output_tokens: 10
+        output_tokens_details:
+          reasoning_tokens: 0
+        total_tokens: 35
+      user: null
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/models/test_openai.py b/tests/models/test_openai.py
@@ -2261,6 +2261,40 @@ async def test_openai_instructions_with_logprobs(allow_model_requests: None):
     ]
 
 
+async def test_openai_instructions_with_responses_logprobs(allow_model_requests: None, openai_api_key: str):
+    m = OpenAIResponsesModel(
+        'gpt-4o-mini',
+        provider=OpenAIProvider(api_key=openai_api_key),
+    )
+    agent = Agent(m, instructions='You are a helpful assistant.')
+    result = await agent.run(
+        'What is the capital of Minas Gerais?',
+        model_settings=OpenAIResponsesModelSettings(openai_logprobs=True),
+    )
+    messages = result.all_messages()
+    response = cast(Any, messages[1])
+    text_part = response.parts[0]
+    assert hasattr(text_part, 'provider_details')
+    assert text_part.provider_details is not None
+    assert 'logprobs' in text_part.provider_details
+    assert text_part.provider_details['logprobs'] == [
+        {'token': 'The', 'logprob': -0.0, 'bytes': [84, 104, 101], 'top_logprobs': []},
+        {'token': ' capital', 'logprob': 0.0, 'bytes': [32, 99, 97, 112, 105, 116, 97, 108], 'top_logprobs': []},
+        {'token': ' of', 'logprob': 0.0, 'bytes': [32, 111, 102], 'top_logprobs': []},
+        {'token': ' Minas', 'logprob': -0.0, 'bytes': [32, 77, 105, 110, 97, 115], 'top_logprobs': []},
+        {'token': ' Gerais', 'logprob': -0.0, 'bytes': [32, 71, 101, 114, 97, 105, 115], 'top_logprobs': []},
+        {'token': ' is', 'logprob': -5.2e-05, 'bytes': [32, 105, 115], 'top_logprobs': []},
+        {'token': ' Belo', 'logprob': -4.3e-05, 'bytes': [32, 66, 101, 108, 111], 'top_logprobs': []},
+        {
+            'token': ' Horizonte',
+            'logprob': -2.0e-06,
+            'bytes': [32, 72, 111, 114, 105, 122, 111, 110, 116, 101],
+            'top_logprobs': [],
+        },
+        {'token': '.', 'logprob': -0.0, 'bytes': [46], 'top_logprobs': []},
+    ]
+
+
 async def test_openai_web_search_tool_model_not_supported(allow_model_requests: None, openai_api_key: str):
     m = OpenAIChatModel('gpt-4o', provider=OpenAIProvider(api_key=openai_api_key))
     agent = Agent(