Skip to content

Commit a527525

Browse files
juanmichelinixingyaowwopenhands-agentenyst
authored
Claude Opus 4.5 as a reasoning model with custom effort param (#1250)
Co-authored-by: Xingyao Wang <xingyao@all-hands.dev> Co-authored-by: openhands <openhands@all-hands.dev> Co-authored-by: enyst <engel.nyst@gmail.com>
1 parent 1e8692b commit a527525

File tree

8 files changed

+319
-36
lines changed

8 files changed

+319
-36
lines changed

.github/scripts/check_deprecations.py

Lines changed: 77 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ class DeprecationRecord:
6464
deprecated_in: str | None
6565
path: Path
6666
line: int
67-
kind: Literal["decorator", "warn_call"]
67+
kind: Literal["decorator", "warn_call", "cleanup_call"]
6868
package: str
6969

7070

@@ -246,28 +246,47 @@ def _gather_warn_calls(
246246
else:
247247
continue
248248

249-
if func_name != "warn_deprecated":
250-
continue
249+
if func_name == "warn_deprecated":
250+
identifier_node = node.args[0] if node.args else None
251+
if identifier_node is None:
252+
continue
253+
identifier = ast.unparse(identifier_node)
251254

252-
identifier_node = node.args[0] if node.args else None
253-
if identifier_node is None:
254-
continue
255-
identifier = ast.unparse(identifier_node)
256-
257-
removed_expr = _extract_kw(node, "removed_in")
258-
deprecated_expr = _extract_kw(node, "deprecated_in")
259-
260-
yield DeprecationRecord(
261-
identifier=identifier,
262-
removed_in=_parse_removed_value(removed_expr, path=path, line=node.lineno),
263-
deprecated_in=_parse_deprecated_value(
264-
deprecated_expr, path=path, line=node.lineno
265-
),
266-
path=path,
267-
line=node.lineno,
268-
kind="warn_call",
269-
package=package,
270-
)
255+
removed_expr = _extract_kw(node, "removed_in")
256+
deprecated_expr = _extract_kw(node, "deprecated_in")
257+
258+
yield DeprecationRecord(
259+
identifier=identifier,
260+
removed_in=_parse_removed_value(
261+
removed_expr, path=path, line=node.lineno
262+
),
263+
deprecated_in=_parse_deprecated_value(
264+
deprecated_expr, path=path, line=node.lineno
265+
),
266+
path=path,
267+
line=node.lineno,
268+
kind="warn_call",
269+
package=package,
270+
)
271+
elif func_name == "warn_cleanup":
272+
identifier_node = node.args[0] if node.args else None
273+
if identifier_node is None:
274+
continue
275+
identifier = ast.unparse(identifier_node)
276+
277+
cleanup_expr = _extract_kw(node, "cleanup_by")
278+
279+
yield DeprecationRecord(
280+
identifier=identifier,
281+
removed_in=_parse_removed_value(
282+
cleanup_expr, path=path, line=node.lineno
283+
),
284+
deprecated_in=None,
285+
path=path,
286+
line=node.lineno,
287+
kind="cleanup_call",
288+
package=package,
289+
)
271290

272291

273292
def _build_identifier(node: ast.AST) -> str:
@@ -329,6 +348,14 @@ def _should_fail(current_version: str, record: DeprecationRecord) -> bool:
329348
def _format_record(record: DeprecationRecord) -> str:
330349
location = record.path.relative_to(REPO_ROOT)
331350
removed = record.removed_in if record.removed_in is not None else "(none)"
351+
352+
if record.kind == "cleanup_call":
353+
return (
354+
f"- [{record.package}] {record.identifier} ({record.kind})\n"
355+
f" cleanup by: {removed}\n"
356+
f" defined at: {location}:{record.line}"
357+
)
358+
332359
deprecated = (
333360
record.deprecated_in if record.deprecated_in is not None else "(unknown)"
334361
)
@@ -371,14 +398,34 @@ def main(argv: Sequence[str] | None = None) -> int:
371398
package_summaries.append((package.name, current_version, len(records)))
372399

373400
if overdue:
374-
print("The following deprecated features have passed their removal deadline:\n")
375-
for record in overdue:
376-
print(_format_record(record))
377-
print()
378-
print(
379-
"Update or remove the listed features before publishing a version that "
380-
"meets or exceeds their removal deadline."
381-
)
401+
deprecated_items = [r for r in overdue if r.kind != "cleanup_call"]
402+
cleanup_items = [r for r in overdue if r.kind == "cleanup_call"]
403+
404+
if deprecated_items:
405+
print(
406+
"The following deprecated features have passed their removal "
407+
"deadline:\n"
408+
)
409+
for record in deprecated_items:
410+
print(_format_record(record))
411+
print()
412+
413+
if cleanup_items:
414+
print("The following workarounds have passed their cleanup deadline:\n")
415+
for record in cleanup_items:
416+
print(_format_record(record))
417+
print()
418+
419+
if deprecated_items:
420+
print(
421+
"Update or remove the listed features before publishing a version that "
422+
"meets or exceeds their removal deadline."
423+
)
424+
if cleanup_items:
425+
print(
426+
"Remove the listed workarounds before publishing a version that "
427+
"meets or exceeds their cleanup deadline."
428+
)
382429
return 1
383430

384431
for package_name, version, count in package_summaries:

openhands-sdk/openhands/sdk/llm/options/chat_options.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from openhands.sdk.llm.options.common import apply_defaults_if_absent
66
from openhands.sdk.llm.utils.model_features import get_features
7+
from openhands.sdk.utils.deprecation import warn_cleanup
78

89

910
def select_chat_options(
@@ -34,12 +35,33 @@ def select_chat_options(
3435

3536
# Reasoning-model quirks
3637
if get_features(llm.model).supports_reasoning_effort:
37-
# Preferred: use reasoning_effort
38-
if llm.reasoning_effort is not None:
39-
out["reasoning_effort"] = llm.reasoning_effort
40-
# Anthropic/OpenAI reasoning models ignore temp/top_p
38+
# Claude models use different parameter format
39+
if "claude-opus-4-5" in llm.model.lower():
40+
warn_cleanup(
41+
"Claude Opus 4.5 effort parameter workaround",
42+
cleanup_by="1.4.0",
43+
details=(
44+
"LiteLLM does not yet redirect reasoning_effort to "
45+
"output_config.effort for Claude Opus 4.5. Remove this workaround "
46+
"once LiteLLM adds native support."
47+
),
48+
)
49+
# Claude uses output_config.effort instead of reasoning_effort
50+
if llm.reasoning_effort is not None:
51+
out["output_config"] = {"effort": llm.reasoning_effort}
52+
# Claude requires beta header for effort parameter
53+
if "extra_headers" not in out:
54+
out["extra_headers"] = {}
55+
out["extra_headers"]["anthropic-beta"] = "effort-2025-11-24"
56+
else:
57+
# OpenAI/other models use reasoning_effort parameter
58+
if llm.reasoning_effort is not None:
59+
out["reasoning_effort"] = llm.reasoning_effort
60+
61+
# All reasoning models ignore temp/top_p
4162
out.pop("temperature", None)
4263
out.pop("top_p", None)
64+
4365
# Gemini 2.5-pro default to low if not set
4466
if "gemini-2.5-pro" in llm.model:
4567
if llm.reasoning_effort in {None, "none"}:

openhands-sdk/openhands/sdk/llm/utils/model_features.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ class ModelFeatures:
4242
"gemini-2.5-pro",
4343
# OpenAI GPT-5 family (includes mini variants)
4444
"gpt-5",
45+
# Anthropic Opus 4.5
46+
"claude-opus-4-5",
4547
]
4648

4749
EXTENDED_THINKING_PATTERNS: list[str] = [
@@ -63,6 +65,7 @@ class ModelFeatures:
6365
"claude-opus-4",
6466
# Anthropic Haiku 4.5 variants (dash only; official IDs use hyphens)
6567
"claude-haiku-4-5",
68+
"claude-opus-4-5",
6669
]
6770

6871
SUPPORTS_STOP_WORDS_FALSE_PATTERNS: list[str] = [

openhands-sdk/openhands/sdk/llm/utils/verified_models.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
VERIFIED_ANTHROPIC_MODELS = [
1717
"claude-sonnet-4-5-20250929",
1818
"claude-haiku-4-5-20251001",
19+
"claude-opus-4-5-20251101",
1920
"claude-sonnet-4-20250514",
2021
"claude-opus-4-20250514",
2122
"claude-opus-4-1-20250805",
@@ -40,6 +41,7 @@
4041
"gpt-5-2025-08-07",
4142
"gpt-5-codex",
4243
"claude-haiku-4-5-20251001",
44+
"claude-opus-4-5-20251101",
4345
"kimi-k2-thinking",
4446
"gpt-5-mini-2025-08-07",
4547
"claude-opus-4-1-20250805",

openhands-sdk/openhands/sdk/utils/deprecation.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,55 @@ def warn_deprecated(
112112
warnings.warn(warning, stacklevel=stacklevel)
113113

114114

115+
def warn_cleanup(
116+
workaround: str,
117+
*,
118+
cleanup_by: str | date,
119+
current_version: str | None = None,
120+
details: str = "",
121+
stacklevel: int = 2,
122+
) -> None:
123+
"""Emit a warning for temporary workarounds that need cleanup by a deadline.
124+
125+
Use this helper for temporary code that addresses upstream issues, compatibility
126+
shims, or other workarounds that should be removed once external conditions
127+
change (e.g., when a library adds support for a feature, or when an API
128+
stabilizes). The deprecation check workflow will fail when the cleanup deadline
129+
is reached, ensuring the workaround is removed before the specified version or
130+
date.
131+
132+
Args:
133+
workaround: Description of the temporary workaround
134+
cleanup_by: Version string or date when this workaround must be removed
135+
current_version: Override the detected package version (for testing)
136+
details: Additional context about why cleanup is needed
137+
stacklevel: Stack level for warning emission
138+
"""
139+
current_version = current_version or _current_version()
140+
141+
should_cleanup = False
142+
if isinstance(cleanup_by, date):
143+
should_cleanup = date.today() >= cleanup_by
144+
else:
145+
try:
146+
current = pkg_version.parse(current_version)
147+
target = pkg_version.parse(str(cleanup_by))
148+
should_cleanup = current >= target
149+
except pkg_version.InvalidVersion:
150+
pass
151+
152+
if should_cleanup:
153+
message = (
154+
f"Cleanup required: {workaround}. "
155+
f"This workaround was scheduled for removal by {cleanup_by}."
156+
)
157+
if details:
158+
message += f" {details}"
159+
warnings.warn(message, UserWarning, stacklevel=stacklevel)
160+
161+
115162
__all__ = [
116163
"deprecated",
117164
"warn_deprecated",
165+
"warn_cleanup",
118166
]

tests/sdk/llm/test_chat_options.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
from dataclasses import dataclass
2+
from typing import Any
3+
4+
from openhands.sdk.llm.options.chat_options import select_chat_options
5+
6+
7+
@dataclass
8+
class DummyLLM:
9+
model: str
10+
top_k: int | None = None
11+
top_p: float | None = 1.0
12+
temperature: float | None = 0.0
13+
max_output_tokens: int = 1024
14+
extra_headers: dict[str, str] | None = None
15+
reasoning_effort: str | None = None
16+
extended_thinking_budget: int | None = None
17+
safety_settings: list[dict[str, Any]] | None = None
18+
litellm_extra_body: dict[str, Any] | None = None
19+
20+
21+
def test_opus_4_5_uses_effort_and_beta_header_and_strips_temp_top_p():
22+
llm = DummyLLM(
23+
model="claude-opus-4-5-20251101",
24+
top_p=0.9,
25+
temperature=0.7,
26+
reasoning_effort="medium",
27+
)
28+
out = select_chat_options(llm, user_kwargs={}, has_tools=True)
29+
30+
# Uses output_config.effort instead of reasoning_effort
31+
assert out.get("output_config") == {"effort": "medium"}
32+
assert "reasoning_effort" not in out
33+
34+
# Adds the required beta header for effort
35+
headers = out.get("extra_headers") or {}
36+
assert headers.get("anthropic-beta") == "effort-2025-11-24"
37+
38+
# Strips temperature/top_p for reasoning models
39+
assert "temperature" not in out
40+
assert "top_p" not in out
41+
42+
43+
def test_gpt5_uses_reasoning_effort_and_strips_temp_top_p():
44+
llm = DummyLLM(
45+
model="gpt-5-mini-2025-08-07",
46+
temperature=0.5,
47+
top_p=0.8,
48+
reasoning_effort="high",
49+
)
50+
out = select_chat_options(llm, user_kwargs={}, has_tools=True)
51+
52+
assert out.get("reasoning_effort") == "high"
53+
assert "output_config" not in out
54+
headers = out.get("extra_headers") or {}
55+
assert "anthropic-beta" not in headers
56+
assert "temperature" not in out
57+
assert "top_p" not in out
58+
59+
60+
def test_gemini_2_5_pro_defaults_reasoning_effort_low_when_none():
61+
llm = DummyLLM(model="gemini-2.5-pro-experimental", reasoning_effort=None)
62+
out = select_chat_options(llm, user_kwargs={}, has_tools=True)
63+
64+
assert out.get("reasoning_effort") == "low"
65+
66+
67+
def test_non_reasoning_model_preserves_temp_and_top_p():
68+
llm = DummyLLM(model="gpt-4o", temperature=0.6, top_p=0.7)
69+
out = select_chat_options(llm, user_kwargs={}, has_tools=True)
70+
71+
# Non-reasoning models should retain temperature/top_p defaults
72+
assert out.get("temperature") == 0.6
73+
assert out.get("top_p") == 0.7
74+
75+
76+
def test_azure_renames_max_completion_tokens_to_max_tokens():
77+
llm = DummyLLM(model="azure/gpt-4o")
78+
out = select_chat_options(llm, user_kwargs={}, has_tools=True)
79+
80+
assert "max_completion_tokens" not in out
81+
assert out.get("max_tokens") == llm.max_output_tokens
82+
83+
84+
def test_tools_removed_when_has_tools_false():
85+
llm = DummyLLM(model="gpt-4o")
86+
uk = {"tools": ["t1"], "tool_choice": "auto"}
87+
out = select_chat_options(llm, user_kwargs=uk, has_tools=False)
88+
89+
assert "tools" not in out
90+
assert "tool_choice" not in out
91+
92+
93+
def test_extra_body_is_forwarded():
94+
llm = DummyLLM(model="gpt-4o", litellm_extra_body={"x": 1})
95+
out = select_chat_options(llm, user_kwargs={}, has_tools=True)
96+
97+
assert out.get("extra_body") == {"x": 1}

tests/sdk/llm/test_model_features.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ def test_model_matches(name, pattern, expected):
3030
("o1", True),
3131
("o3-mini", True),
3232
("o3", True),
33+
# Anthropic Opus 4.5 (dash variant only)
34+
("claude-opus-4-5", True),
3335
("gpt-4o", False),
3436
("claude-3-5-sonnet", False),
3537
("gemini-1.5-pro", False),
@@ -51,12 +53,12 @@ def test_reasoning_effort_support(model, expected_reasoning):
5153
# AWS Bedrock model ids (provider-prefixed)
5254
("bedrock/anthropic.claude-3-5-sonnet-20241022-v2:0", True),
5355
("bedrock/anthropic.claude-3-haiku-20240307-v1:0", True),
54-
# Anthropic Haiku 4.5 variants (dash only; official IDs use hyphens)
56+
# Anthropic 4.5 variants (dash only; official IDs use hyphens)
5557
("claude-haiku-4-5", True),
5658
("us.anthropic.claude-haiku-4-5-20251001", True),
5759
("bedrock/anthropic.claude-3-opus-20240229-v1:0", True),
58-
# Anthropic 4.5 variants (dash only; official IDs use hyphens)
5960
("claude-sonnet-4-5", True),
61+
("claude-opus-4-5", True),
6062
# User-facing model names (no provider prefix)
6163
("anthropic.claude-3-5-sonnet-20241022", True),
6264
("anthropic.claude-3-haiku-20240307", True),

0 commit comments

Comments
 (0)