From 9a4af37646764a282d9e900a90f89b82ce5e513f Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Wed, 4 Mar 2026 11:22:57 +0100
Subject: [PATCH 1/3] test(openai-agents): Remove test for unreachable state

---
 .../openai_agents/test_openai_agents.py       | 69 -------------------
 1 file changed, 69 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 491223e804..1c036bb669 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -2507,75 +2507,6 @@ def calculator(a: int, b: int) -> int:
     assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 3
 
 
-@pytest.mark.asyncio
-async def test_response_model_not_set_when_unavailable(
-    sentry_init, capture_events, test_agent
-):
-    """
-    Test that response model is not set if the API response doesn't have a model field.
-    The request model should still be set correctly.
-    """
-
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel._fetch_response"
-        ) as mock_fetch_response:
-            # Create a mock response without a model field
-            mock_response = MagicMock()
-            mock_response.model = None  # No model in response
-            mock_response.id = "resp_123"
-            mock_response.output = [
-                ResponseOutputMessage(
-                    id="msg_123",
-                    type="message",
-                    status="completed",
-                    content=[
-                        ResponseOutputText(
-                            text="Response without model field",
-                            type="output_text",
-                            annotations=[],
-                        )
-                    ],
-                    role="assistant",
-                )
-            ]
-            mock_response.usage = MagicMock()
-            mock_response.usage.input_tokens = 10
-            mock_response.usage.output_tokens = 20
-            mock_response.usage.total_tokens = 30
-            mock_response.usage.input_tokens_details = InputTokensDetails(
-                cached_tokens=0
-            )
-            mock_response.usage.output_tokens_details = OutputTokensDetails(
-                reasoning_tokens=0
-            )
-
-            mock_fetch_response.return_value = mock_response
-
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-            )
-
-            events = capture_events()
-
-            result = await agents.Runner.run(
-                test_agent, "Test input", run_config=test_run_config
-            )
-
-            assert result is not None
-
-    (transaction,) = events
-    spans = transaction["spans"]
-    _, ai_client_span = spans
-
-    # Response model should NOT be set when API doesn't return it
-    assert "gen_ai.response.model" not in ai_client_span["data"]
-    # But request model should still be set
-    assert "gen_ai.request.model" in ai_client_span["data"]
-    assert ai_client_span["data"]["gen_ai.request.model"] == "gpt-4"
-
-
 @pytest.mark.asyncio
 async def test_invoke_agent_span_includes_response_model(
     sentry_init, capture_events, test_agent

From 3575c11f91312670d4c5cfb54a681ccc4242b32a Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 6 Mar 2026 09:57:39 +0100
Subject: [PATCH 2/3] test(openai-agents): Replace mocks with httpx in
 non-error single-response tests

---
 .../openai_agents/test_openai_agents.py       | 281 +++++++++++-------
 1 file changed, 172 insertions(+), 109 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index 1c036bb669..e3f606421f 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -5,16 +5,17 @@
 import os
 import json
 import logging
+import httpx
 
 import sentry_sdk
 from sentry_sdk import start_span
-from sentry_sdk.consts import SPANDATA
+from sentry_sdk.consts import SPANDATA, OP
 from sentry_sdk.integrations.logging import LoggingIntegration
 from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
 from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize
 from sentry_sdk.utils import parse_version
 
-from openai import AsyncOpenAI
+from openai import AsyncOpenAI, InternalServerError
 from agents.models.openai_responses import OpenAIResponsesModel
 
 from unittest import mock
@@ -235,8 +236,9 @@ def mock_usage():
 
 
 @pytest.fixture
-def mock_model_response(mock_usage):
-    return ModelResponse(
+def mock_model_response():
+    return Response(
+        id="resp_123",
         output=[
             ResponseOutputMessage(
                 id="msg_123",
@@ -252,8 +254,23 @@ def mock_model_response(mock_usage):
                 role="assistant",
             )
         ],
-        usage=mock_usage,
-        response_id="resp_123",
+        parallel_tool_calls=False,
+        tool_choice="none",
+        tools=[],
+        created_at=10000000,
+        model="gpt-4",
+        object="response",
+        usage=ResponseUsage(
+            input_tokens=20,
+            input_tokens_details=InputTokensDetails(
+                cached_tokens=5,
+            ),
+            output_tokens=10,
+            output_tokens_details=OutputTokensDetails(
+                reasoning_tokens=8,
+            ),
+            total_tokens=30,
+        ),
     )
 
 
@@ -295,51 +312,60 @@ def inner(instructions):
 
 
 @pytest.fixture
-def test_agent_custom_model():
-    """Create a real Agent instance for testing."""
-    return Agent(
-        name="test_agent_custom_model",
-        instructions="You are a helpful test assistant.",
-        # the model could be agents.OpenAIChatCompletionsModel()
-        model="my-custom-model",
-        model_settings=ModelSettings(
-            max_tokens=100,
-            temperature=0.7,
-            top_p=1.0,
-            presence_penalty=0.0,
-            frequency_penalty=0.0,
-        ),
-    )
+def get_model_response():
+    def inner(response_content):
+        model_request = httpx.Request(
+            "POST",
+            "/responses",
+        )
+
+        response = httpx.Response(
+            200,
+            request=model_request,
+            content=json.dumps(response_content.model_dump()).encode("utf-8"),
+        )
+
+        return response
+
+    return inner
 
 
 @pytest.mark.asyncio
 async def test_agent_invocation_span_no_pii(
-    sentry_init, capture_events, test_agent, mock_model_response
+    sentry_init, capture_events, test_agent, mock_model_response, get_model_response
 ):
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            mock_get_response.return_value = mock_model_response
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent.clone(model=model)
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=False,
-            )
+    response = get_model_response(mock_model_response)
 
-            events = capture_events()
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=False,
+        )
 
-            result = await agents.Runner.run(
-                test_agent, "Test input", run_config=test_run_config
-            )
+        events = capture_events()
 
-            assert result is not None
-            assert result.final_output == "Hello, how can I help you?"
+        result = await agents.Runner.run(
+            agent, "Test input", run_config=test_run_config
+        )
+
+        assert result is not None
+        assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
-    invoke_agent_span, ai_client_span = spans
+    invoke_agent_span = next(
+        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
@@ -452,33 +478,38 @@ async def test_agent_invocation_span(
     instructions,
     input,
     request,
+    get_model_response,
 ):
     """
     Test that the integration creates spans for agent invocations.
     """
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent_with_instructions(instructions).clone(model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            mock_get_response.return_value = mock_model_response
+    response = get_model_response(mock_model_response)
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            result = await agents.Runner.run(
-                test_agent_with_instructions(instructions),
-                input,
-                run_config=test_run_config,
-            )
+        result = await agents.Runner.run(
+            agent,
+            input,
+            run_config=test_run_config,
+        )
 
-            assert result is not None
-            assert result.final_output == "Hello, how can I help you?"
+        assert result is not None
+        assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
@@ -603,35 +634,56 @@ async def test_agent_invocation_span(
 
 @pytest.mark.asyncio
 async def test_client_span_custom_model(
-    sentry_init, capture_events, test_agent_custom_model, mock_model_response
+    sentry_init,
+    capture_events,
+    mock_model_response,
+    get_model_response,
 ):
     """
     Test that the integration uses the correct model name if a custom model is used.
     """
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            mock_get_response.return_value = mock_model_response
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="my-custom-model", openai_client=client)
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-            )
+    agent = Agent(
+        name="test_agent_custom_model",
+        instructions="You are a helpful test assistant.",
+        # the model could be agents.OpenAIChatCompletionsModel()
+        model=model,
+        model_settings=ModelSettings(
+            max_tokens=100,
+            temperature=0.7,
+            top_p=1.0,
+            presence_penalty=0.0,
+            frequency_penalty=0.0,
+        ),
+    )
 
-            events = capture_events()
+    response = get_model_response(mock_model_response)
 
-            result = await agents.Runner.run(
-                test_agent_custom_model, "Test input", run_config=test_run_config
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+        )
 
-            assert result is not None
-            assert result.final_output == "Hello, how can I help you?"
+        events = capture_events()
+
+        result = await agents.Runner.run(
+            agent, "Test input", run_config=test_run_config
+        )
+
+        assert result is not None
+        assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
-    _, ai_client_span = spans
+    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     assert ai_client_span["description"] == "chat my-custom-model"
     assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model"
@@ -642,35 +694,41 @@ def test_agent_invocation_span_sync_no_pii(
     capture_events,
     test_agent,
     mock_model_response,
+    get_model_response,
 ):
     """
     Test that the integration creates spans for agent invocations.
     """
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent.clone(model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            mock_get_response.return_value = mock_model_response
+    response = get_model_response(mock_model_response)
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=False,
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=False,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            result = agents.Runner.run_sync(
-                test_agent, "Test input", run_config=test_run_config
-            )
+        result = agents.Runner.run_sync(agent, "Test input", run_config=test_run_config)
 
-            assert result is not None
-            assert result.final_output == "Hello, how can I help you?"
+        assert result is not None
+        assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
-    invoke_agent_span, ai_client_span = spans
+    invoke_agent_span = next(
+        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
+    )
+    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
 
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
@@ -779,33 +837,38 @@ def test_agent_invocation_span_sync(
     instructions,
     input,
     request,
+    get_model_response,
 ):
     """
     Test that the integration creates spans for agent invocations.
     """
+    client = AsyncOpenAI(api_key="test-key")
+    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
+    agent = test_agent_with_instructions(instructions).clone(model=model)
 
-    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
-        with patch(
-            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
-        ) as mock_get_response:
-            mock_get_response.return_value = mock_model_response
+    response = get_model_response(mock_model_response)
 
-            sentry_init(
-                integrations=[OpenAIAgentsIntegration()],
-                traces_sample_rate=1.0,
-                send_default_pii=True,
-            )
+    with patch.object(
+        agent.model._client._client,
+        "send",
+        return_value=response,
+    ) as _:
+        sentry_init(
+            integrations=[OpenAIAgentsIntegration()],
+            traces_sample_rate=1.0,
+            send_default_pii=True,
+        )
 
-            events = capture_events()
+        events = capture_events()
 
-            result = agents.Runner.run_sync(
-                test_agent_with_instructions(instructions),
-                input,
-                run_config=test_run_config,
-            )
+        result = agents.Runner.run_sync(
+            agent,
+            input,
+            run_config=test_run_config,
+        )
 
-            assert result is not None
-            assert result.final_output == "Hello, how can I help you?"
+        assert result is not None
+        assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]

From 84d1450e814cc1dfba46d9e5a417c8116ec0c645 Mon Sep 17 00:00:00 2001
From: Alexander Alderman Webb <alexander.webb@sentry.io>
Date: Fri, 6 Mar 2026 10:26:20 +0100
Subject: [PATCH 3/3] revert

---
 .../openai_agents/test_openai_agents.py       | 313 +++++++-----------
 1 file changed, 128 insertions(+), 185 deletions(-)

diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py
index e3f606421f..d8a831c227 100644
--- a/tests/integrations/openai_agents/test_openai_agents.py
+++ b/tests/integrations/openai_agents/test_openai_agents.py
@@ -5,17 +5,16 @@
 import os
 import json
 import logging
-import httpx
 
 import sentry_sdk
 from sentry_sdk import start_span
-from sentry_sdk.consts import SPANDATA, OP
+from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations.logging import LoggingIntegration
 from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration
 from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize
-from sentry_sdk.utils import parse_version
+from sentry_sdk.utils import parse_version, package_version
 
-from openai import AsyncOpenAI, InternalServerError
+from openai import AsyncOpenAI
 from agents.models.openai_responses import OpenAIResponsesModel
 
 from unittest import mock
@@ -38,6 +37,8 @@
 from agents.exceptions import MaxTurnsExceeded, ModelBehaviorError
 from agents.version import __version__ as OPENAI_AGENTS_VERSION
 
+OPENAI_VERSION = package_version("openai")
+
 from openai.types.responses import (
     ResponseCreatedEvent,
     ResponseTextDeltaEvent,
@@ -236,9 +237,8 @@ def mock_usage():
 
 
 @pytest.fixture
-def mock_model_response():
-    return Response(
-        id="resp_123",
+def mock_model_response(mock_usage):
+    return ModelResponse(
         output=[
             ResponseOutputMessage(
                 id="msg_123",
@@ -254,23 +254,8 @@ def mock_model_response():
                 role="assistant",
             )
         ],
-        parallel_tool_calls=False,
-        tool_choice="none",
-        tools=[],
-        created_at=10000000,
-        model="gpt-4",
-        object="response",
-        usage=ResponseUsage(
-            input_tokens=20,
-            input_tokens_details=InputTokensDetails(
-                cached_tokens=5,
-            ),
-            output_tokens=10,
-            output_tokens_details=OutputTokensDetails(
-                reasoning_tokens=8,
-            ),
-            total_tokens=30,
-        ),
+        usage=mock_usage,
+        response_id="resp_123",
     )
 
 
@@ -312,60 +297,51 @@ def inner(instructions):
 
 
 @pytest.fixture
-def get_model_response():
-    def inner(response_content):
-        model_request = httpx.Request(
-            "POST",
-            "/responses",
-        )
-
-        response = httpx.Response(
-            200,
-            request=model_request,
-            content=json.dumps(response_content.model_dump()).encode("utf-8"),
-        )
-
-        return response
-
-    return inner
+def test_agent_custom_model():
+    """Create a real Agent instance for testing."""
+    return Agent(
+        name="test_agent_custom_model",
+        instructions="You are a helpful test assistant.",
+        # the model could be agents.OpenAIChatCompletionsModel()
+        model="my-custom-model",
+        model_settings=ModelSettings(
+            max_tokens=100,
+            temperature=0.7,
+            top_p=1.0,
+            presence_penalty=0.0,
+            frequency_penalty=0.0,
+        ),
+    )
 
 
 @pytest.mark.asyncio
 async def test_agent_invocation_span_no_pii(
-    sentry_init, capture_events, test_agent, mock_model_response, get_model_response
+    sentry_init, capture_events, test_agent, mock_model_response
 ):
-    client = AsyncOpenAI(api_key="test-key")
-    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
-    agent = test_agent.clone(model=model)
-
-    response = get_model_response(mock_model_response)
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            mock_get_response.return_value = mock_model_response
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=False,
-        )
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=False,
+            )
 
-        events = capture_events()
+            events = capture_events()
 
-        result = await agents.Runner.run(
-            agent, "Test input", run_config=test_run_config
-        )
+            result = await agents.Runner.run(
+                test_agent, "Test input", run_config=test_run_config
+            )
 
-        assert result is not None
-        assert result.final_output == "Hello, how can I help you?"
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
-    invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
-    )
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
+    invoke_agent_span, ai_client_span = spans
 
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
@@ -478,38 +454,33 @@ async def test_agent_invocation_span(
     instructions,
     input,
     request,
-    get_model_response,
 ):
     """
     Test that the integration creates spans for agent invocations.
     """
-    client = AsyncOpenAI(api_key="test-key")
-    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
-    agent = test_agent_with_instructions(instructions).clone(model=model)
 
-    response = get_model_response(mock_model_response)
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            mock_get_response.return_value = mock_model_response
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-        )
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+            )
 
-        events = capture_events()
+            events = capture_events()
 
-        result = await agents.Runner.run(
-            agent,
-            input,
-            run_config=test_run_config,
-        )
+            result = await agents.Runner.run(
+                test_agent_with_instructions(instructions),
+                input,
+                run_config=test_run_config,
+            )
 
-        assert result is not None
-        assert result.final_output == "Hello, how can I help you?"
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
@@ -634,56 +605,35 @@ async def test_agent_invocation_span(
 
 @pytest.mark.asyncio
 async def test_client_span_custom_model(
-    sentry_init,
-    capture_events,
-    mock_model_response,
-    get_model_response,
+    sentry_init, capture_events, test_agent_custom_model, mock_model_response
 ):
     """
     Test that the integration uses the correct model name if a custom model is used.
     """
 
-    client = AsyncOpenAI(api_key="test-key")
-    model = OpenAIResponsesModel(model="my-custom-model", openai_client=client)
-
-    agent = Agent(
-        name="test_agent_custom_model",
-        instructions="You are a helpful test assistant.",
-        # the model could be agents.OpenAIChatCompletionsModel()
-        model=model,
-        model_settings=ModelSettings(
-            max_tokens=100,
-            temperature=0.7,
-            top_p=1.0,
-            presence_penalty=0.0,
-            frequency_penalty=0.0,
-        ),
-    )
-
-    response = get_model_response(mock_model_response)
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            mock_get_response.return_value = mock_model_response
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-        )
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+            )
 
-        events = capture_events()
+            events = capture_events()
 
-        result = await agents.Runner.run(
-            agent, "Test input", run_config=test_run_config
-        )
+            result = await agents.Runner.run(
+                test_agent_custom_model, "Test input", run_config=test_run_config
+            )
 
-        assert result is not None
-        assert result.final_output == "Hello, how can I help you?"
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
+    _, ai_client_span = spans
 
     assert ai_client_span["description"] == "chat my-custom-model"
     assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model"
@@ -694,41 +644,35 @@ def test_agent_invocation_span_sync_no_pii(
     capture_events,
     test_agent,
     mock_model_response,
-    get_model_response,
 ):
     """
     Test that the integration creates spans for agent invocations.
     """
-    client = AsyncOpenAI(api_key="test-key")
-    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
-    agent = test_agent.clone(model=model)
 
-    response = get_model_response(mock_model_response)
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            mock_get_response.return_value = mock_model_response
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=False,
-        )
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=False,
+            )
 
-        events = capture_events()
+            events = capture_events()
 
-        result = agents.Runner.run_sync(agent, "Test input", run_config=test_run_config)
+            result = agents.Runner.run_sync(
+                test_agent, "Test input", run_config=test_run_config
+            )
 
-        assert result is not None
-        assert result.final_output == "Hello, how can I help you?"
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
-    invoke_agent_span = next(
-        span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT
-    )
-    ai_client_span = next(span for span in spans if span["op"] == OP.GEN_AI_CHAT)
+    invoke_agent_span, ai_client_span = spans
 
     assert transaction["transaction"] == "test_agent workflow"
     assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents"
@@ -837,38 +781,33 @@ def test_agent_invocation_span_sync(
     instructions,
     input,
     request,
-    get_model_response,
 ):
     """
     Test that the integration creates spans for agent invocations.
     """
-    client = AsyncOpenAI(api_key="test-key")
-    model = OpenAIResponsesModel(model="gpt-4", openai_client=client)
-    agent = test_agent_with_instructions(instructions).clone(model=model)
 
-    response = get_model_response(mock_model_response)
+    with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}):
+        with patch(
+            "agents.models.openai_responses.OpenAIResponsesModel.get_response"
+        ) as mock_get_response:
+            mock_get_response.return_value = mock_model_response
 
-    with patch.object(
-        agent.model._client._client,
-        "send",
-        return_value=response,
-    ) as _:
-        sentry_init(
-            integrations=[OpenAIAgentsIntegration()],
-            traces_sample_rate=1.0,
-            send_default_pii=True,
-        )
+            sentry_init(
+                integrations=[OpenAIAgentsIntegration()],
+                traces_sample_rate=1.0,
+                send_default_pii=True,
+            )
 
-        events = capture_events()
+            events = capture_events()
 
-        result = agents.Runner.run_sync(
-            agent,
-            input,
-            run_config=test_run_config,
-        )
+            result = agents.Runner.run_sync(
+                test_agent_with_instructions(instructions),
+                input,
+                run_config=test_run_config,
+            )
 
-        assert result is not None
-        assert result.final_output == "Hello, how can I help you?"
+            assert result is not None
+            assert result.final_output == "Hello, how can I help you?"
 
     (transaction,) = events
     spans = transaction["spans"]
@@ -1319,18 +1258,22 @@ def simple_test_tool(message: str) -> str:
     assert ai_client_span1["data"]["gen_ai.usage.output_tokens"] == 5
     assert ai_client_span1["data"]["gen_ai.usage.output_tokens.reasoning"] == 0
     assert ai_client_span1["data"]["gen_ai.usage.total_tokens"] == 15
-    assert ai_client_span1["data"]["gen_ai.response.tool_calls"] == safe_serialize(
-        [
-            {
-                "arguments": '{"message": "hello"}',
-                "call_id": "call_123",
-                "name": "simple_test_tool",
-                "type": "function_call",
-                "id": "call_123",
-                "status": None,
-            }
-        ]
-    )
+
+    tool_call = {
+        "arguments": '{"message": "hello"}',
+        "call_id": "call_123",
+        "name": "simple_test_tool",
+        "type": "function_call",
+        "id": "call_123",
+        "status": None,
+    }
+
+    if OPENAI_VERSION >= (2, 25, 0):
+        tool_call["namespace"] = None
+
+    assert json.loads(ai_client_span1["data"]["gen_ai.response.tool_calls"]) == [
+        tool_call
+    ]
 
     assert tool_span["description"] == "execute_tool simple_test_tool"
     assert tool_span["data"]["gen_ai.agent.name"] == "test_agent"