From 1278b1f704a2bc1e7043d7ce330b6982939e930d Mon Sep 17 00:00:00 2001 From: Kazuhiro Sera Date: Wed, 25 Mar 2026 22:00:04 +0900 Subject: [PATCH] fix: update default reasoning effort for newer models --- src/agents/models/default_models.py | 67 ++++++++++--- tests/models/test_default_models.py | 140 ++++++++++++++++++---------- 2 files changed, 147 insertions(+), 60 deletions(-) diff --git a/src/agents/models/default_models.py b/src/agents/models/default_models.py index 3a8a122e8b..d869945e8d 100644 --- a/src/agents/models/default_models.py +++ b/src/agents/models/default_models.py @@ -1,6 +1,7 @@ import copy import os -from typing import Optional +import re +from typing import Literal, Optional from openai.types.shared.reasoning import Reasoning @@ -8,9 +9,11 @@ OPENAI_DEFAULT_MODEL_ENV_VARIABLE_NAME = "OPENAI_DEFAULT_MODEL" -# discourage directly accessing this constant +GPT5DefaultReasoningEffort = Literal["none", "low", "medium"] + +# discourage directly accessing these constants # use the get_default_model and get_default_model_settings() functions instead -_GPT_5_DEFAULT_MODEL_SETTINGS: ModelSettings = ModelSettings( +_GPT_5_LOW_DEFAULT_MODEL_SETTINGS: ModelSettings = ModelSettings( # We chose "low" instead of "minimal" because some of the built-in tools # (e.g., file search, image generation, etc.) do not support "minimal" # If you want to use "minimal" reasoning effort, you can pass your own model settings @@ -21,20 +24,59 @@ reasoning=Reasoning(effort="none"), verbosity="low", ) +_GPT_5_MEDIUM_DEFAULT_MODEL_SETTINGS: ModelSettings = ModelSettings( + reasoning=Reasoning(effort="medium"), + verbosity="low", +) +_GPT_5_TEXT_ONLY_DEFAULT_MODEL_SETTINGS: ModelSettings = ModelSettings( + verbosity="low", +) -_GPT_5_NONE_EFFORT_MODELS = {"gpt-5.1", "gpt-5.2"} +_GPT_5_CHAT_MODEL_PATTERNS: tuple[re.Pattern[str], ...] = ( + re.compile(r"^gpt-5-chat-latest$"), + re.compile(r"^gpt-5\.1-chat-latest$"), + re.compile(r"^gpt-5\.2-chat-latest$"), + re.compile(r"^gpt-5\.3-chat-latest$"), +) + +_GPT_5_DEFAULT_MODEL_SETTINGS_BY_REASONING_EFFORT: dict[ + GPT5DefaultReasoningEffort, ModelSettings +] = { + "none": _GPT_5_NONE_DEFAULT_MODEL_SETTINGS, + "low": _GPT_5_LOW_DEFAULT_MODEL_SETTINGS, + "medium": _GPT_5_MEDIUM_DEFAULT_MODEL_SETTINGS, +} + +_GPT_5_DEFAULT_REASONING_EFFORT_PATTERNS: tuple[ + tuple[re.Pattern[str], GPT5DefaultReasoningEffort], + ..., +] = ( + (re.compile(r"^gpt-5(?:-\d{4}-\d{2}-\d{2})?$"), "low"), + (re.compile(r"^gpt-5\.1(?:-\d{4}-\d{2}-\d{2})?$"), "none"), + (re.compile(r"^gpt-5\.2(?:-\d{4}-\d{2}-\d{2})?$"), "none"), + (re.compile(r"^gpt-5\.2-pro(?:-\d{4}-\d{2}-\d{2})?$"), "medium"), + (re.compile(r"^gpt-5\.2-codex$"), "low"), + (re.compile(r"^gpt-5\.3-codex$"), "none"), + (re.compile(r"^gpt-5\.4(?:-\d{4}-\d{2}-\d{2})?$"), "none"), + (re.compile(r"^gpt-5\.4-pro(?:-\d{4}-\d{2}-\d{2})?$"), "medium"), + (re.compile(r"^gpt-5\.4-mini(?:-\d{4}-\d{2}-\d{2})?$"), "none"), + (re.compile(r"^gpt-5\.4-nano(?:-\d{4}-\d{2}-\d{2})?$"), "none"), +) -def _is_gpt_5_none_effort_model(model_name: str) -> bool: - return model_name in _GPT_5_NONE_EFFORT_MODELS +def _get_default_reasoning_effort(model_name: str) -> GPT5DefaultReasoningEffort | None: + for pattern, effort in _GPT_5_DEFAULT_REASONING_EFFORT_PATTERNS: + if pattern.fullmatch(model_name): + return effort + return None def gpt_5_reasoning_settings_required(model_name: str) -> bool: """ Returns True if the model name is a GPT-5 model and reasoning settings are required. """ - if model_name.startswith("gpt-5-chat"): - # gpt-5-chat-latest does not require reasoning settings + if any(pattern.fullmatch(model_name) for pattern in _GPT_5_CHAT_MODEL_PATTERNS): + # Chat-latest aliases do not accept reasoning.effort. return False # matches any of gpt-5 models return model_name.startswith("gpt-5") @@ -64,7 +106,10 @@ def get_default_model_settings(model: Optional[str] = None) -> ModelSettings: """ _model = model if model is not None else get_default_model() if gpt_5_reasoning_settings_required(_model): - if _is_gpt_5_none_effort_model(_model): - return copy.deepcopy(_GPT_5_NONE_DEFAULT_MODEL_SETTINGS) - return copy.deepcopy(_GPT_5_DEFAULT_MODEL_SETTINGS) + effort = _get_default_reasoning_effort(_model) + if effort is not None: + return copy.deepcopy(_GPT_5_DEFAULT_MODEL_SETTINGS_BY_REASONING_EFFORT[effort]) + # Keep the GPT-5 verbosity default, but omit reasoning.effort for + # variants whose supported values are not confirmed yet. + return copy.deepcopy(_GPT_5_TEXT_ONLY_DEFAULT_MODEL_SETTINGS) return ModelSettings() diff --git a/tests/models/test_default_models.py b/tests/models/test_default_models.py index d291aac1e3..d0904cd4e2 100644 --- a/tests/models/test_default_models.py +++ b/tests/models/test_default_models.py @@ -1,6 +1,9 @@ import os +from typing import Literal from unittest.mock import patch +from openai.types.shared.reasoning import Reasoning + from agents import Agent from agents.model_settings import ModelSettings from agents.models import ( @@ -11,6 +14,14 @@ ) +def _gpt_5_default_settings( + reasoning_effort: Literal["none", "low", "medium"] | None, +) -> ModelSettings: + if reasoning_effort is None: + return ModelSettings(verbosity="low") + return ModelSettings(reasoning=Reasoning(effort=reasoning_effort), verbosity="low") + + def test_default_model_is_gpt_4_1(): assert get_default_model() == "gpt-4.1" assert is_gpt_5_default() is False @@ -18,68 +29,99 @@ def test_default_model_is_gpt_4_1(): assert get_default_model_settings().reasoning is None -@patch.dict(os.environ, {"OPENAI_DEFAULT_MODEL": "gpt-5"}) -def test_default_model_env_gpt_5(): - assert get_default_model() == "gpt-5" +@patch.dict(os.environ, {"OPENAI_DEFAULT_MODEL": "gpt-5.4"}) +def test_is_gpt_5_default_with_real_model_name(): + assert get_default_model() == "gpt-5.4" assert is_gpt_5_default() is True - assert gpt_5_reasoning_settings_required(get_default_model()) is True - assert get_default_model_settings().reasoning.effort == "low" # type: ignore[union-attr] -@patch.dict(os.environ, {"OPENAI_DEFAULT_MODEL": "gpt-5.1"}) -def test_default_model_env_gpt_5_1(): - assert get_default_model() == "gpt-5.1" - assert is_gpt_5_default() is True - assert gpt_5_reasoning_settings_required(get_default_model()) is True - assert get_default_model_settings().reasoning.effort == "none" # type: ignore[union-attr] +@patch.dict(os.environ, {"OPENAI_DEFAULT_MODEL": "gpt-4.1"}) +def test_is_gpt_5_default_returns_false_for_non_gpt_5_default_model(): + assert get_default_model() == "gpt-4.1" + assert is_gpt_5_default() is False -@patch.dict(os.environ, {"OPENAI_DEFAULT_MODEL": "gpt-5.2"}) -def test_default_model_env_gpt_5_2(): - assert get_default_model() == "gpt-5.2" - assert is_gpt_5_default() is True - assert gpt_5_reasoning_settings_required(get_default_model()) is True - assert get_default_model_settings().reasoning.effort == "none" # type: ignore[union-attr] +def test_gpt_5_reasoning_settings_required_detects_gpt_5_models_while_ignoring_chat_latest(): + assert gpt_5_reasoning_settings_required("gpt-5") is True + assert gpt_5_reasoning_settings_required("gpt-5.1") is True + assert gpt_5_reasoning_settings_required("gpt-5.2") is True + assert gpt_5_reasoning_settings_required("gpt-5.2-codex") is True + assert gpt_5_reasoning_settings_required("gpt-5.2-pro") is True + assert gpt_5_reasoning_settings_required("gpt-5.4-pro") is True + assert gpt_5_reasoning_settings_required("gpt-5-mini") is True + assert gpt_5_reasoning_settings_required("gpt-5-nano") is True + assert gpt_5_reasoning_settings_required("gpt-5-chat-latest") is False + assert gpt_5_reasoning_settings_required("gpt-5.1-chat-latest") is False + assert gpt_5_reasoning_settings_required("gpt-5.2-chat-latest") is False + assert gpt_5_reasoning_settings_required("gpt-5.3-chat-latest") is False -@patch.dict(os.environ, {"OPENAI_DEFAULT_MODEL": "gpt-5.2-codex"}) -def test_default_model_env_gpt_5_2_codex(): - assert get_default_model() == "gpt-5.2-codex" - assert is_gpt_5_default() is True - assert gpt_5_reasoning_settings_required(get_default_model()) is True - assert get_default_model_settings().reasoning.effort == "low" # type: ignore[union-attr] +def test_gpt_5_reasoning_settings_required_returns_false_for_non_gpt_5_models(): + assert gpt_5_reasoning_settings_required("gpt-4.1") is False -@patch.dict(os.environ, {"OPENAI_DEFAULT_MODEL": "gpt-5-mini"}) -def test_default_model_env_gpt_5_mini(): - assert get_default_model() == "gpt-5-mini" - assert is_gpt_5_default() is True - assert gpt_5_reasoning_settings_required(get_default_model()) is True - assert get_default_model_settings().reasoning.effort == "low" # type: ignore[union-attr] +def test_get_default_model_settings_returns_none_reasoning_defaults_for_gpt_5_1_models(): + assert get_default_model_settings("gpt-5.1") == _gpt_5_default_settings("none") + assert get_default_model_settings("gpt-5.1-2025-11-13") == _gpt_5_default_settings("none") -@patch.dict(os.environ, {"OPENAI_DEFAULT_MODEL": "gpt-5-nano"}) -def test_default_model_env_gpt_5_nano(): - assert get_default_model() == "gpt-5-nano" - assert is_gpt_5_default() is True - assert gpt_5_reasoning_settings_required(get_default_model()) is True - assert get_default_model_settings().reasoning.effort == "low" # type: ignore[union-attr] +def test_get_default_model_settings_returns_none_reasoning_defaults_for_gpt_5_2_models(): + assert get_default_model_settings("gpt-5.2") == _gpt_5_default_settings("none") + assert get_default_model_settings("gpt-5.2-2025-12-11") == _gpt_5_default_settings("none") -@patch.dict(os.environ, {"OPENAI_DEFAULT_MODEL": "gpt-5-chat-latest"}) -def test_default_model_env_gpt_5_chat_latest(): - assert get_default_model() == "gpt-5-chat-latest" - assert is_gpt_5_default() is False - assert gpt_5_reasoning_settings_required(get_default_model()) is False - assert get_default_model_settings().reasoning is None +def test_get_default_model_settings_returns_none_reasoning_defaults_for_gpt_5_3_codex_models(): + assert get_default_model_settings("gpt-5.3-codex") == _gpt_5_default_settings("none") -@patch.dict(os.environ, {"OPENAI_DEFAULT_MODEL": "gpt-4o"}) -def test_default_model_env_gpt_4o(): - assert get_default_model() == "gpt-4o" - assert is_gpt_5_default() is False - assert gpt_5_reasoning_settings_required(get_default_model()) is False - assert get_default_model_settings().reasoning is None +def test_get_default_model_settings_returns_none_reasoning_defaults_for_gpt_5_4_models(): + assert get_default_model_settings("gpt-5.4") == _gpt_5_default_settings("none") + + +def test_get_default_model_settings_returns_none_reasoning_defaults_for_gpt_5_4_snapshot_families(): + assert get_default_model_settings("gpt-5.4-2026-03-05") == _gpt_5_default_settings("none") + assert get_default_model_settings("gpt-5.4-mini-2026-03-17") == _gpt_5_default_settings("none") + assert get_default_model_settings("gpt-5.4-nano-2026-03-17") == _gpt_5_default_settings("none") + + +def test_get_default_model_settings_returns_none_reasoning_defaults_for_gpt_5_4_mini_and_nano(): + assert get_default_model_settings("gpt-5.4-mini") == _gpt_5_default_settings("none") + assert get_default_model_settings("gpt-5.4-nano") == _gpt_5_default_settings("none") + + +def test_get_default_model_settings_returns_low_reasoning_defaults_for_base_gpt_5(): + assert get_default_model_settings("gpt-5") == _gpt_5_default_settings("low") + assert get_default_model_settings("gpt-5-2025-08-07") == _gpt_5_default_settings("low") + + +def test_get_default_model_settings_returns_low_reasoning_defaults_for_gpt_5_2_codex(): + assert get_default_model_settings("gpt-5.2-codex") == _gpt_5_default_settings("low") + + +def test_get_default_model_settings_returns_medium_reasoning_defaults_for_gpt_5_pro_models(): + assert get_default_model_settings("gpt-5.2-pro") == _gpt_5_default_settings("medium") + assert get_default_model_settings("gpt-5.2-pro-2025-12-11") == _gpt_5_default_settings("medium") + assert get_default_model_settings("gpt-5.4-pro") == _gpt_5_default_settings("medium") + assert get_default_model_settings("gpt-5.4-pro-2026-03-05") == _gpt_5_default_settings("medium") + + +def test_get_default_model_settings_omits_reasoning_for_unconfirmed_gpt_5_variants(): + assert get_default_model_settings("gpt-5-mini") == _gpt_5_default_settings(None) + assert get_default_model_settings("gpt-5-mini-2025-08-07") == _gpt_5_default_settings(None) + assert get_default_model_settings("gpt-5-nano") == _gpt_5_default_settings(None) + assert get_default_model_settings("gpt-5-nano-2025-08-07") == _gpt_5_default_settings(None) + assert get_default_model_settings("gpt-5.1-codex") == _gpt_5_default_settings(None) + + +def test_get_default_model_settings_returns_empty_settings_for_gpt_5_chat_latest_aliases(): + assert get_default_model_settings("gpt-5-chat-latest") == ModelSettings() + assert get_default_model_settings("gpt-5.1-chat-latest") == ModelSettings() + assert get_default_model_settings("gpt-5.2-chat-latest") == ModelSettings() + assert get_default_model_settings("gpt-5.3-chat-latest") == ModelSettings() + + +def test_get_default_model_settings_returns_empty_settings_for_non_gpt_5_models(): + assert get_default_model_settings("gpt-4.1") == ModelSettings() @patch.dict(os.environ, {"OPENAI_DEFAULT_MODEL": "gpt-5"}) @@ -94,6 +136,6 @@ def test_agent_uses_gpt_5_default_model_settings(): @patch.dict(os.environ, {"OPENAI_DEFAULT_MODEL": "gpt-5"}) def test_agent_resets_model_settings_for_non_gpt_5_models(): """Agent should reset default GPT-5 settings when using a non-GPT-5 model.""" - agent = Agent(name="test", model="gpt-4o") - assert agent.model == "gpt-4o" + agent = Agent(name="test", model="gpt-4.1") + assert agent.model == "gpt-4.1" assert agent.model_settings == ModelSettings()