From c843f68fdc8d5b68cdaf37121d2e0de5acf90698 Mon Sep 17 00:00:00 2001 From: "fangyaozheng@bytedance.com" Date: Mon, 1 Jun 2026 14:46:58 +0800 Subject: [PATCH 1/2] feat(runtime): add pluggable agent runtimes (cc / codex) Introduce a `runtime` selector on `Agent` (`adk` default | `cc` | `codex`) so the inner reasoning/tool loop can be delegated to an external agent harness while the ADK `Runner` keeps owning multi-tenancy, session, memory and tracing. - `veadk/agent.py`: add `runtime` field + a minimal `_run_async_impl` dispatch hook that defers to ADK for `adk` and to `veadk.runtime.get_runtime` otherwise. - `veadk/runtime/base_runtime.py`: `BaseRuntime` ABC + `build_system_append` (appends agent name/description/instruction to the harness system prompt). - `veadk/runtime/__init__.py`: `get_runtime` registry with friendly errors when optional deps are missing. - `cc` (Claude Code SDK): translate session<->events, isolate from `~/.claude` via `setting_sources=[]`, pin the configured model, and bridge OpenAI-compatible endpoints to Anthropic via an in-process litellm shim. - `codex` (OpenAI Codex SDK): isolated `CODEX_HOME` config, Responses<->chat bridge via an in-process litellm shim (Codex only speaks the Responses API). Co-Authored-By: Claude Opus 4.8 --- veadk/agent.py | 31 +++- veadk/runtime/__init__.py | 73 +++++++++ veadk/runtime/base_runtime.py | 94 +++++++++++ veadk/runtime/cc/__init__.py | 19 +++ veadk/runtime/cc/proxy.py | 270 +++++++++++++++++++++++++++++++ veadk/runtime/cc/runtime.py | 175 ++++++++++++++++++++ veadk/runtime/cc/translate.py | 128 +++++++++++++++ veadk/runtime/codex/__init__.py | 19 +++ veadk/runtime/codex/proxy.py | 231 ++++++++++++++++++++++++++ veadk/runtime/codex/runtime.py | 151 +++++++++++++++++ veadk/runtime/codex/translate.py | 97 +++++++++++ 11 files changed, 1287 insertions(+), 1 deletion(-) create mode 100644 veadk/runtime/__init__.py create mode 100644 veadk/runtime/base_runtime.py create mode 100644 veadk/runtime/cc/__init__.py create mode 100644 veadk/runtime/cc/proxy.py create mode 100644 veadk/runtime/cc/runtime.py create mode 100644 veadk/runtime/cc/translate.py create mode 100644 veadk/runtime/codex/__init__.py create mode 100644 veadk/runtime/codex/proxy.py create mode 100644 veadk/runtime/codex/runtime.py create mode 100644 veadk/runtime/codex/translate.py diff --git a/veadk/agent.py b/veadk/agent.py index dde661d1..54a0f2f0 100644 --- a/veadk/agent.py +++ b/veadk/agent.py @@ -15,7 +15,7 @@ from __future__ import annotations import os -from typing import Dict, Literal, Optional, Union +from typing import TYPE_CHECKING, AsyncGenerator, Dict, Literal, Optional, Union from google.adk.flows.llm_flows.base_llm_flow import BaseLlmFlow @@ -53,6 +53,10 @@ from veadk.utils.patches import patch_asyncio, patch_tracer from veadk.version import VERSION +if TYPE_CHECKING: + from google.adk.agents.invocation_context import InvocationContext + from google.adk.events.event import Event + patch_tracer() patch_asyncio() logger = get_logger(__name__) @@ -164,6 +168,11 @@ class Agent(LlmAgent): enable_skills_checklist: bool = False _skills_with_checklist: Dict[str, Any] = {} + runtime: Literal["adk", "cc", "codex"] = "adk" + """Agent runtime backend. ``"adk"`` (default) uses Google ADK's built-in LLM + flow. ``"cc"`` delegates the inner agent loop to the Claude Code SDK; ``"codex"`` + is reserved. Non-``adk`` runtimes are implemented under :mod:`veadk.runtime`.""" + enable_a2ui: bool = False """Enable A2UI (agent-driven UI). When True, a `SendA2uiToClientToolset` is appended so the agent can reply with declarative UI rendered by a client. @@ -649,6 +658,26 @@ def _llm_flow(self) -> BaseLlmFlow: return SupervisorAutoFlow(supervised_agent=self) return AutoFlow() + async def _run_async_impl( + self, ctx: "InvocationContext" + ) -> AsyncGenerator["Event", None]: + """Dispatch the agent loop to the configured runtime. + + For the default ``"adk"`` runtime this defers to ADK's built-in LLM flow. + Other runtimes are resolved from :mod:`veadk.runtime` and bridge an + external agent harness (e.g. the Claude Code SDK) back into the ADK event + stream, so the surrounding ``Runner`` is unaffected. + """ + if self.runtime == "adk": + async for event in super()._run_async_impl(ctx): + yield event + return + + from veadk.runtime import get_runtime + + async for event in get_runtime(self.runtime).run_async(self, ctx): + yield event + # async def run(self, **kwargs): # raise NotImplementedError( # "Run method in VeADK agent is deprecated since version 0.5.6. Please use runner.run_async instead. Ref: https://agentkit.gitbook.io/docs/runner/overview" diff --git a/veadk/runtime/__init__.py b/veadk/runtime/__init__.py new file mode 100644 index 00000000..d5c5a66f --- /dev/null +++ b/veadk/runtime/__init__.py @@ -0,0 +1,73 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Pluggable agent runtimes for VeADK. + +``Agent(runtime=...)`` selects which runtime drives the inner agent loop: + +- ``"adk"`` (default): Google ADK's built-in ``BaseLlmFlow`` (handled directly in + :class:`veadk.agent.Agent`, no runtime object). +- ``"cc"``: the Claude Code SDK as the agent harness. +- ``"codex"``: reserved for a future Codex SDK runtime. +""" + +from __future__ import annotations + +from functools import lru_cache + +from veadk.runtime.base_runtime import BaseRuntime + + +@lru_cache(maxsize=None) +def get_runtime(name: str) -> BaseRuntime: + """Return the (cached) runtime instance for ``name``. + + Args: + name (str): Runtime identifier from ``Agent(runtime=...)``. ``"adk"`` is + handled inline by the agent and never reaches this function. + + Returns: + BaseRuntime: The runtime instance. + + Raises: + NotImplementedError: If ``name`` is a known-but-unimplemented runtime. + ValueError: If ``name`` is unknown. + """ + if name == "cc": + try: + from veadk.runtime.cc import ClaudeCodeRuntime + except ModuleNotFoundError as e: + raise ImportError( + f"The 'cc' runtime requires extra dependencies (missing: {e.name}). " + "Install them with: pip install claude-agent-sdk fastapi uvicorn" + ) from e + + return ClaudeCodeRuntime() + + if name == "codex": + try: + from veadk.runtime.codex import CodexRuntime + except ModuleNotFoundError as e: + raise ImportError( + f"The 'codex' runtime requires extra dependencies (missing: {e.name}). " + "Install the Codex binary and the codex_app_server SDK, plus: " + "pip install fastapi uvicorn" + ) from e + + return CodexRuntime() + + raise ValueError(f"Unknown runtime: {name!r}") + + +__all__ = ["BaseRuntime", "get_runtime"] diff --git a/veadk/runtime/base_runtime.py b/veadk/runtime/base_runtime.py new file mode 100644 index 00000000..bf7869a7 --- /dev/null +++ b/veadk/runtime/base_runtime.py @@ -0,0 +1,94 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Base class for pluggable agent runtimes. + +A *runtime* replaces the inner reasoning + tool loop of an agent while VeADK's +``Runner`` keeps owning multi-tenancy, session, memory and tracing. The default +``adk`` runtime is Google ADK's own ``BaseLlmFlow``; alternative runtimes (e.g. +``cc`` backed by the Claude Code SDK) subclass :class:`BaseRuntime` and bridge an +external agent harness back into ADK's :class:`~google.adk.events.event.Event` +stream so the surrounding ``Runner`` is unaffected. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import TYPE_CHECKING, AsyncGenerator + +if TYPE_CHECKING: + from google.adk.agents.invocation_context import InvocationContext + from google.adk.events.event import Event + + from veadk.agent import Agent + + +def build_system_append(agent: "Agent") -> str: + """Build the text to append to a runtime's own system prompt. + + Combines the agent's identity and instruction (name, description, + instruction) into one block. This is *appended* to the runtime's built-in + system prompt, never replacing it. A non-string ``instruction`` (an + ``InstructionProvider`` callable) is skipped, since it requires a context to + resolve. + + Args: + agent (veadk.agent.Agent): The agent being invoked. + + Returns: + str: The append block, or an empty string if there is nothing to add. + """ + parts: list[str] = [] + if agent.name: + parts.append(f"Your name is {agent.name}.") + if agent.description: + parts.append(agent.description) + if isinstance(agent.instruction, str) and agent.instruction.strip(): + parts.append(agent.instruction) + return "\n\n".join(parts) + + +class BaseRuntime(ABC): + """Abstract agent runtime. + + Implementations translate an incoming invocation into calls against an + external agent harness and yield the results back as ADK ``Event`` objects. + The contract mirrors ADK's ``BaseAgent._run_async_impl`` so that whatever a + runtime yields can be persisted by the existing VeADK ``Runner`` without any + special handling. + + Attributes: + name (str): Stable identifier of the runtime, matching the value passed + to ``Agent(runtime=...)`` (for example ``"cc"``). + """ + + name: str = "base" + + @abstractmethod + def run_async( + self, agent: "Agent", ctx: "InvocationContext" + ) -> AsyncGenerator["Event", None]: + """Run one invocation and stream ADK events. + + Args: + agent (veadk.agent.Agent): The agent being invoked. Model, endpoint + and instruction are read from it. + ctx (google.adk.agents.invocation_context.InvocationContext): The + invocation context, providing the new message + (``ctx.user_content``) and session history (``ctx.session.events``). + + Yields: + google.adk.events.event.Event: Events produced during the run. + """ + raise NotImplementedError diff --git a/veadk/runtime/cc/__init__.py b/veadk/runtime/cc/__init__.py new file mode 100644 index 00000000..1036c23e --- /dev/null +++ b/veadk/runtime/cc/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Claude Code SDK runtime.""" + +from veadk.runtime.cc.runtime import ClaudeCodeRuntime + +__all__ = ["ClaudeCodeRuntime"] diff --git a/veadk/runtime/cc/proxy.py b/veadk/runtime/cc/proxy.py new file mode 100644 index 00000000..0ebd6dc2 --- /dev/null +++ b/veadk/runtime/cc/proxy.py @@ -0,0 +1,270 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Anthropic ``/v1/messages`` translation shim for OpenAI-compatible backends. + +The Claude Code SDK only speaks the Anthropic protocol over HTTP +(``ANTHROPIC_BASE_URL``). When the user's model endpoint is OpenAI-compatible +(VeADK's default, e.g. Volcengine Ark), this module stands up a tiny in-process +FastAPI server that accepts Anthropic ``/v1/messages`` requests and forwards them +through :func:`litellm.anthropic_messages` to the OpenAI-compatible backend. The +Claude Code SDK is then pointed at the local server's URL. + +:func:`detect_endpoint_kind` decides whether a translation shim is needed at all. +""" + +from __future__ import annotations + +import asyncio +import json +from typing import Any, AsyncIterator, Literal, cast + +import litellm +import uvicorn +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse, StreamingResponse +from litellm.exceptions import APIError + +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + +EndpointKind = Literal["anthropic", "openai"] + +# Parameters accepted by litellm.anthropic_messages; anything else in the inbound +# request body is dropped to avoid leaking unsupported fields into the backend. +_PASSTHROUGH_KEYS = ( + "max_tokens", + "messages", + "metadata", + "stop_sequences", + "stream", + "system", + "temperature", + "thinking", + "tool_choice", + "tools", + "top_k", + "top_p", +) + + +def detect_endpoint_kind(base_url: str | None, provider: str | None) -> EndpointKind: + """Detect whether a model endpoint speaks the Anthropic or OpenAI protocol. + + Provider is authoritative when present: any provider naming Anthropic/Claude + maps to ``"anthropic"``; any other explicit provider maps to ``"openai"``. + When the provider is empty, the endpoint host is probed as a fallback. + + Args: + base_url (str | None): The model API base URL. + provider (str | None): The configured model provider (e.g. ``"openai"``). + + Returns: + EndpointKind: ``"anthropic"`` or ``"openai"``. + """ + p = (provider or "").lower() + if "anthropic" in p or "claude" in p: + return "anthropic" + if p: + return "openai" + + host = (base_url or "").lower() + if "anthropic" in host: + return "anthropic" + return "openai" + + +class AnthropicShim: + """In-process Anthropic ``/v1/messages`` server backed by an OpenAI endpoint. + + Translates inbound Anthropic requests via :func:`litellm.anthropic_messages` + and forwards them to ``api_base`` using ``api_key`` with + ``custom_llm_provider="openai"``. Supports both streaming (SSE) and + non-streaming responses. + + Attributes: + api_base (str): OpenAI-compatible backend base URL. + api_key (str): API key for the backend. + url (str | None): Local server URL once started. + """ + + def __init__(self, api_base: str, api_key: str) -> None: + self.api_base = api_base + self.api_key = api_key + self.url: str | None = None + self._server: uvicorn.Server | None = None + self._task: asyncio.Task[Any] | None = None + self._app = self._build_app() + + def _build_app(self) -> FastAPI: + app = FastAPI() + + @app.post("/v1/messages") + async def messages(request: Request) -> Any: + body = await request.json() + model = body["model"] + stream = bool(body.get("stream", False)) + + call_kwargs: dict[str, Any] = { + key: body[key] for key in _PASSTHROUGH_KEYS if key in body + } + call_kwargs.update( + model=f"openai/{model}", + api_base=self.api_base, + api_key=self.api_key, + custom_llm_provider="openai", + # Anthropic-only params (e.g. `thinking`) that the OpenAI-compatible + # backend doesn't support are dropped rather than erroring. + drop_params=True, + # Surface backend errors immediately instead of retrying inside + # litellm (the SDK has its own retry layer on top). + num_retries=0, + ) + + result = await litellm.anthropic_messages(**call_kwargs) + + if stream: + # Pull the first chunk here (still inside the handler) so an + # immediate backend error like 401 propagates to the exception + # handler as a proper HTTP status instead of breaking a stream + # that already returned 200 (which makes the SDK retry). + stream_iter = cast(AsyncIterator[Any], result).__aiter__() + first = await anext(stream_iter, None) + return StreamingResponse( + _encode_sse(stream_iter, first), + media_type="text/event-stream", + ) + return JSONResponse(_to_dict(result)) + + @app.exception_handler(APIError) + async def _on_api_error(_request: Request, exc: APIError) -> JSONResponse: + # Surface backend errors (auth, not-found, ...) as Anthropic-format + # errors with the right status code, so the SDK fails immediately + # instead of retrying an opaque 500. + status = getattr(exc, "status_code", 500) or 500 + return JSONResponse( + status_code=status, + content={ + "type": "error", + "error": { + "type": _anthropic_error_type(status), + "message": getattr(exc, "message", str(exc)), + }, + }, + ) + + return app + + async def start(self) -> str: + """Start the server on an ephemeral local port and return its URL.""" + if self.url: + return self.url + + config = uvicorn.Config( + self._app, host="127.0.0.1", port=0, log_level="warning" + ) + server = uvicorn.Server(config) + # Do not hijack process signal handlers from a library context. + server.install_signal_handlers = lambda: None # type: ignore[method-assign] + self._server = server + self._task = asyncio.create_task(server.serve()) + + while not server.started: + await asyncio.sleep(0.02) + + port = server.servers[0].sockets[0].getsockname()[1] + self.url = f"http://127.0.0.1:{port}" + logger.info(f"Anthropic shim started at {self.url} -> {self.api_base}") + return self.url + + async def stop(self) -> None: + """Stop the server and await its task.""" + if self._server is not None: + self._server.should_exit = True + if self._task is not None: + await self._task + self.url = None + + +def _anthropic_error_type(status: int) -> str: + """Map an HTTP status code to an Anthropic API error ``type`` string.""" + return { + 400: "invalid_request_error", + 401: "authentication_error", + 403: "permission_error", + 404: "not_found_error", + 429: "rate_limit_error", + 529: "overloaded_error", + }.get(status, "api_error") + + +def _to_dict(obj: Any) -> dict[str, Any]: + """Normalize a litellm Anthropic response object into a plain dict.""" + if isinstance(obj, dict): + return obj + if hasattr(obj, "model_dump"): + return obj.model_dump() + return dict(obj) + + +def _encode_chunk(chunk: Any) -> bytes: + """Encode one litellm stream chunk as Anthropic SSE bytes.""" + if isinstance(chunk, (bytes, bytearray)): + return bytes(chunk) + if isinstance(chunk, str): + return chunk.encode() + data = _to_dict(chunk) + event_type = data.get("type", "message") + return f"event: {event_type}\ndata: {json.dumps(data)}\n\n".encode() + + +async def _encode_sse( + chunks: AsyncIterator[Any], first: Any = None +) -> AsyncIterator[bytes]: + """Re-encode litellm stream chunks as Anthropic-style SSE bytes. + + ``first`` is the already-pulled leading chunk (or ``None``). A mid-stream + backend error is emitted as an Anthropic ``error`` SSE event so the client + sees a terminal error rather than a silently truncated stream. + """ + if first is not None: + yield _encode_chunk(first) + try: + async for chunk in chunks: + yield _encode_chunk(chunk) + except APIError as exc: + status = getattr(exc, "status_code", 500) or 500 + err = { + "type": "error", + "error": { + "type": _anthropic_error_type(status), + "message": getattr(exc, "message", str(exc)), + }, + } + yield f"event: error\ndata: {json.dumps(err)}\n\n".encode() + + +# Reuse one shim per (api_base, api_key) for the lifetime of the process. +_SHIMS: dict[tuple[str, str], AnthropicShim] = {} + + +async def get_shim_url(api_base: str, api_key: str) -> str: + """Return a started shim URL for the given backend, creating it if needed.""" + key = (api_base, api_key) + shim = _SHIMS.get(key) + if shim is None: + shim = AnthropicShim(api_base=api_base, api_key=api_key) + _SHIMS[key] = shim + return await shim.start() diff --git a/veadk/runtime/cc/runtime.py b/veadk/runtime/cc/runtime.py new file mode 100644 index 00000000..d46d3407 --- /dev/null +++ b/veadk/runtime/cc/runtime.py @@ -0,0 +1,175 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Claude Code SDK runtime for VeADK. + +Drives an agent invocation through the Claude Code SDK (``claude-agent-sdk``) +instead of ADK's built-in LLM flow, while the surrounding ``Runner`` keeps owning +session, memory and tracing. + +Key guarantees: + +- The model is always the one configured on the agent (or via ``ANTHROPIC_MODEL``); + if none resolves, the runtime fails fast. +- The SDK is fully isolated from the host's ``~/.claude`` settings via + ``setting_sources=[]``; all credentials/endpoint are injected through + ``ClaudeAgentOptions.env``. A wrong key therefore surfaces as an error rather + than silently falling back to the host's working credentials. +- OpenAI-compatible endpoints are reached through an in-process Anthropic shim + (see :mod:`veadk.runtime.cc.proxy`). +""" + +from __future__ import annotations + +import os +from typing import TYPE_CHECKING, AsyncGenerator + +from claude_agent_sdk import ClaudeAgentOptions, ResultMessage, query +from claude_agent_sdk.types import SystemPromptPreset + +from veadk.runtime.base_runtime import BaseRuntime, build_system_append +from veadk.runtime.cc.proxy import detect_endpoint_kind, get_shim_url +from veadk.runtime.cc.translate import build_prompt, sdk_message_to_events +from veadk.utils.logger import get_logger + +if TYPE_CHECKING: + from google.adk.agents.invocation_context import InvocationContext + from google.adk.events.event import Event + + from veadk.agent import Agent + +logger = get_logger(__name__) + +_LOCAL_SHIM_TOKEN = "veadk-local" + + +def _model_env(model: str) -> dict[str, str]: + """Pin every Claude Code model tier to ``model``. + + Claude Code resolves several model "tiers" (opus/sonnet/haiku, the small fast + model, etc.) from separate environment variables. Setting only + ``ANTHROPIC_MODEL`` lets the host's inherited ``ANTHROPIC_DEFAULT_*_MODEL`` + leak into sub-tasks, so we override the whole family to guarantee the agent + only ever calls the configured model. + """ + return { + "ANTHROPIC_MODEL": model, + "ANTHROPIC_DEFAULT_OPUS_MODEL": model, + "ANTHROPIC_DEFAULT_SONNET_MODEL": model, + "ANTHROPIC_DEFAULT_HAIKU_MODEL": model, + "ANTHROPIC_SMALL_FAST_MODEL": model, + } + + +class ClaudeCodeRuntime(BaseRuntime): + """Run an agent invocation via the Claude Code SDK.""" + + name = "cc" + + async def run_async( + self, agent: "Agent", ctx: "InvocationContext" + ) -> AsyncGenerator["Event", None]: + model = self._resolve_model(agent) + api_base = agent.model_api_base or os.getenv("ANTHROPIC_BASE_URL") + api_key = ( + agent.model_api_key + or os.getenv("ANTHROPIC_AUTH_TOKEN") + or os.getenv("ANTHROPIC_API_KEY") + ) + + kind = detect_endpoint_kind(api_base, agent.model_provider) + env = await self._build_env(kind, model, api_base, api_key) + + # Append the agent identity/instruction to Claude Code's own system + # prompt (preset), rather than replacing it. + append_text = build_system_append(agent) + system_prompt: SystemPromptPreset = {"type": "preset", "preset": "claude_code"} + if append_text: + system_prompt["append"] = append_text + options = ClaudeAgentOptions( + model=model, + env=env, + setting_sources=[], # never inherit the host's ~/.claude settings + system_prompt=system_prompt, + allowed_tools=[], + permission_mode="default", + ) + + prompt = build_prompt(ctx) + logger.info(f"cc runtime: model={model}, endpoint_kind={kind}") + + # ResultMessage is the terminal message; capture any error and raise only + # after the SDK stream completes (raising mid-iteration leaves the SDK's + # async generator in a running state and breaks its cleanup). + error: ResultMessage | None = None + async for message in query(prompt=prompt, options=options): + for event in sdk_message_to_events(message, agent.name, ctx.invocation_id): + yield event + if isinstance(message, ResultMessage) and message.is_error: + error = message + + if error is not None: + raise RuntimeError( + f"Claude Code runtime error (subtype={error.subtype}): {error.result}" + ) + + def _resolve_model(self, agent: "Agent") -> str: + name = agent.model_name + if isinstance(name, list): + name = name[0] if name else "" + name = name or os.getenv("ANTHROPIC_MODEL", "") + if not name: + raise ValueError( + "cc runtime requires a model: set Agent(model_name=...) " + "or the ANTHROPIC_MODEL environment variable." + ) + return name + + async def _build_env( + self, + kind: str, + model: str, + api_base: str | None, + api_key: str | None, + ) -> dict[str, str]: + if kind == "openai": + if not api_base or not api_key: + raise ValueError( + "cc runtime with an OpenAI-compatible endpoint requires both " + "model_api_base and model_api_key." + ) + base_url = await get_shim_url(api_base, api_key) + # Credentials are validated by the shim against the backend; the token + # the SDK sends to the local shim is irrelevant but must override any + # inherited one. + return { + "ANTHROPIC_BASE_URL": base_url, + "ANTHROPIC_AUTH_TOKEN": _LOCAL_SHIM_TOKEN, + "ANTHROPIC_API_KEY": _LOCAL_SHIM_TOKEN, + **_model_env(model), + } + + # Native Anthropic endpoint. + if not api_key: + raise ValueError( + "cc runtime with an Anthropic endpoint requires an API key." + ) + # Set both header variants to the configured key so whichever the + # endpoint reads, it is the configured one and never an inherited token. + return { + "ANTHROPIC_BASE_URL": api_base or "https://api.anthropic.com", + "ANTHROPIC_API_KEY": api_key, + "ANTHROPIC_AUTH_TOKEN": api_key, + **_model_env(model), + } diff --git a/veadk/runtime/cc/translate.py b/veadk/runtime/cc/translate.py new file mode 100644 index 00000000..d86a8225 --- /dev/null +++ b/veadk/runtime/cc/translate.py @@ -0,0 +1,128 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Translation between ADK and the Claude Code SDK. + +Two directions: + +- :func:`build_prompt` flattens an ADK session (history + current message) into a + single prompt string for the SDK. ADK remains the single source of truth for + conversation state (stateless replay), which keeps multi-tenancy clean. +- :func:`sdk_message_to_events` maps SDK stream messages back into ADK + :class:`~google.adk.events.event.Event` objects so the surrounding ``Runner`` + can persist them unchanged. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from claude_agent_sdk import ( + AssistantMessage, + TextBlock, + ThinkingBlock, + ToolUseBlock, +) +from google.adk.events.event import Event +from google.genai import types + +if TYPE_CHECKING: + from claude_agent_sdk import Message + from google.adk.agents.invocation_context import InvocationContext + +_USER_PREFIX = "User" +_ASSISTANT_PREFIX = "Assistant" + + +def build_prompt(ctx: "InvocationContext") -> str: + """Render the session into a single prompt string for the SDK. + + Walks ``ctx.session.events`` in order and renders each turn as a + ``User:``/``Assistant:`` line. The new user message is already the last event + appended by the ``Runner``, so it naturally terminates the transcript. + ``thought`` parts are skipped; only ``text`` parts contribute. + + Args: + ctx (google.adk.agents.invocation_context.InvocationContext): Invocation + context holding the session. + + Returns: + str: The flattened transcript. When the session has a single user turn + this is just that message. + """ + lines: list[str] = [] + for event in ctx.session.events: + if event.content is None or not event.content.parts: + continue + text = "".join( + part.text for part in event.content.parts if part.text and not part.thought + ).strip() + if not text: + continue + prefix = _USER_PREFIX if event.author == "user" else _ASSISTANT_PREFIX + lines.append(f"{prefix}: {text}") + + # Single user turn: pass the raw message instead of a labelled transcript. + if len(lines) == 1 and lines[0].startswith(f"{_USER_PREFIX}: "): + return lines[0][len(_USER_PREFIX) + 2 :] + + return "\n".join(lines) + + +def sdk_message_to_events( + message: "Message", author: str, invocation_id: str +) -> list[Event]: + """Convert one SDK stream message into ADK events. + + Only :class:`~claude_agent_sdk.AssistantMessage` carries renderable content + and is translated; other message types (user/system/result) produce no + events here. Final usage/session bookkeeping is handled by the caller. + + Args: + message (claude_agent_sdk.Message): A message yielded by the SDK stream. + author (str): Event author (the agent name). + invocation_id (str): The ADK invocation id to stamp on each event. + + Returns: + list[google.adk.events.event.Event]: Zero or more events for this message. + """ + if not isinstance(message, AssistantMessage): + return [] + + events: list[Event] = [] + for block in message.content: + part: types.Part | None = None + if isinstance(block, TextBlock): + part = types.Part(text=block.text) + elif isinstance(block, ThinkingBlock): + part = types.Part(text=block.thinking, thought=True) + elif isinstance(block, ToolUseBlock): + part = types.Part( + function_call=types.FunctionCall( + id=block.id, name=block.name, args=block.input + ) + ) + + if part is None: + continue + + events.append( + Event( + invocation_id=invocation_id, + author=author, + content=types.Content(role="model", parts=[part]), + ) + ) + + return events diff --git a/veadk/runtime/codex/__init__.py b/veadk/runtime/codex/__init__.py new file mode 100644 index 00000000..77bb00fa --- /dev/null +++ b/veadk/runtime/codex/__init__.py @@ -0,0 +1,19 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""OpenAI Codex SDK runtime.""" + +from veadk.runtime.codex.runtime import CodexRuntime + +__all__ = ["CodexRuntime"] diff --git a/veadk/runtime/codex/proxy.py b/veadk/runtime/codex/proxy.py new file mode 100644 index 00000000..546c9492 --- /dev/null +++ b/veadk/runtime/codex/proxy.py @@ -0,0 +1,231 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""OpenAI Responses ``/v1/responses`` translation shim for chat backends. + +OpenAI Codex only speaks the Responses API (its model providers require +``wire_api = "responses"``). When the user's model endpoint is a plain +OpenAI-compatible *chat-completions* endpoint (VeADK's default, e.g. Volcengine +Ark), this module stands up a tiny in-process FastAPI server that accepts +Responses requests and forwards them through :func:`litellm.aresponses` — whose +completion-transformation bridge converts Responses ⇄ chat-completions — to the +backend. Codex is then pointed at the local server. +""" + +from __future__ import annotations + +import asyncio +import json +from typing import Any, AsyncIterator, cast + +import litellm +import uvicorn +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse, StreamingResponse +from litellm.exceptions import APIError + +from veadk.utils.logger import get_logger + +logger = get_logger(__name__) + +# Parameters accepted by litellm.aresponses; everything else in the inbound +# request body is dropped to avoid forwarding unsupported fields. +_PASSTHROUGH_KEYS = ( + "input", + "include", + "instructions", + "max_output_tokens", + "metadata", + "parallel_tool_calls", + "previous_response_id", + "reasoning", + "store", + "stream", + "temperature", + "text", + "tool_choice", + "tools", + "top_p", + "truncation", + "user", +) + + +class ResponsesShim: + """In-process Responses ``/v1/responses`` server backed by a chat endpoint. + + Translates inbound Responses requests via :func:`litellm.aresponses` and + forwards them to ``api_base`` using ``api_key`` with + ``custom_llm_provider="openai"``. Supports streaming (SSE) and non-streaming. + + Attributes: + api_base (str): OpenAI-compatible (chat) backend base URL. + api_key (str): API key for the backend. + url (str | None): Local server URL once started. + """ + + def __init__(self, api_base: str, api_key: str) -> None: + self.api_base = api_base + self.api_key = api_key + self.url: str | None = None + self._server: uvicorn.Server | None = None + self._task: asyncio.Task[Any] | None = None + self._app = self._build_app() + + def _build_app(self) -> FastAPI: + app = FastAPI() + + @app.post("/v1/responses") + async def responses(request: Request) -> Any: + body = await request.json() + model = body["model"] + stream = bool(body.get("stream", False)) + + call_kwargs: dict[str, Any] = { + key: body[key] for key in _PASSTHROUGH_KEYS if key in body + } + call_kwargs.update( + model=f"openai/{model}", + api_base=self.api_base, + api_key=self.api_key, + custom_llm_provider="openai", + drop_params=True, + num_retries=0, + ) + + result = await litellm.aresponses(**call_kwargs) + + if stream: + stream_iter = cast(AsyncIterator[Any], result).__aiter__() + first = await anext(stream_iter, None) + return StreamingResponse( + _encode_sse(stream_iter, first), + media_type="text/event-stream", + ) + return JSONResponse(_to_dict(result)) + + @app.exception_handler(APIError) + async def _on_api_error(_request: Request, exc: APIError) -> JSONResponse: + status = getattr(exc, "status_code", 500) or 500 + return JSONResponse( + status_code=status, + content={ + "error": { + "type": _error_type(status), + "message": getattr(exc, "message", str(exc)), + } + }, + ) + + return app + + async def start(self) -> str: + """Start the server on an ephemeral local port and return its URL.""" + if self.url: + return self.url + + config = uvicorn.Config( + self._app, host="127.0.0.1", port=0, log_level="warning" + ) + server = uvicorn.Server(config) + server.install_signal_handlers = lambda: None # type: ignore[method-assign] + self._server = server + self._task = asyncio.create_task(server.serve()) + + while not server.started: + await asyncio.sleep(0.02) + + port = server.servers[0].sockets[0].getsockname()[1] + self.url = f"http://127.0.0.1:{port}" + logger.info(f"Responses shim started at {self.url} -> {self.api_base}") + return self.url + + async def stop(self) -> None: + """Stop the server and await its task.""" + if self._server is not None: + self._server.should_exit = True + if self._task is not None: + await self._task + self.url = None + + +def _error_type(status: int) -> str: + """Map an HTTP status code to an error ``type`` string.""" + return { + 400: "invalid_request_error", + 401: "authentication_error", + 403: "permission_error", + 404: "not_found_error", + 429: "rate_limit_error", + }.get(status, "api_error") + + +def _to_dict(obj: Any) -> dict[str, Any]: + """Normalize a litellm Responses object into a plain dict.""" + if isinstance(obj, dict): + return obj + if hasattr(obj, "model_dump"): + return obj.model_dump() + return dict(obj) + + +def _encode_chunk(chunk: Any) -> bytes: + """Encode one litellm Responses stream chunk as SSE bytes.""" + if isinstance(chunk, (bytes, bytearray)): + return bytes(chunk) + if isinstance(chunk, str): + return chunk.encode() + data = _to_dict(chunk) + event_type = data.get("type", "message") + return f"event: {event_type}\ndata: {json.dumps(data)}\n\n".encode() + + +async def _encode_sse( + chunks: AsyncIterator[Any], first: Any = None +) -> AsyncIterator[bytes]: + """Re-encode litellm Responses stream chunks as SSE bytes. + + ``first`` is the already-pulled leading chunk (or ``None``). A mid-stream + backend error is emitted as an ``error`` SSE event so the client sees a + terminal error rather than a silently truncated stream. + """ + if first is not None: + yield _encode_chunk(first) + try: + async for chunk in chunks: + yield _encode_chunk(chunk) + except APIError as exc: + status = getattr(exc, "status_code", 500) or 500 + err = { + "type": "error", + "error": { + "type": _error_type(status), + "message": getattr(exc, "message", str(exc)), + }, + } + yield f"event: error\ndata: {json.dumps(err)}\n\n".encode() + + +# Reuse one shim per (api_base, api_key) for the lifetime of the process. +_SHIMS: dict[tuple[str, str], ResponsesShim] = {} + + +async def get_shim_url(api_base: str, api_key: str) -> str: + """Return a started shim URL for the given backend, creating it if needed.""" + key = (api_base, api_key) + shim = _SHIMS.get(key) + if shim is None: + shim = ResponsesShim(api_base=api_base, api_key=api_key) + _SHIMS[key] = shim + return await shim.start() diff --git a/veadk/runtime/codex/runtime.py b/veadk/runtime/codex/runtime.py new file mode 100644 index 00000000..c6e06643 --- /dev/null +++ b/veadk/runtime/codex/runtime.py @@ -0,0 +1,151 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""OpenAI Codex runtime for VeADK. + +Drives an agent invocation through the Codex SDK (``codex_app_server``) instead +of ADK's built-in LLM flow, while the surrounding ``Runner`` keeps owning +session, memory and tracing. + +Key guarantees (mirroring the ``cc`` runtime): + +- The model is always the one configured on the agent (or via ``ANTHROPIC_MODEL`` / + settings); if none resolves, the runtime fails fast. +- Codex is isolated from the host's ``~/.codex`` via a dedicated ``CODEX_HOME`` with + a generated ``config.toml``; the backend credential is injected through the + provider's ``env_key`` env var. A wrong key fails loudly. +- Codex only speaks the Responses API, so requests are routed through an + in-process Responses→chat shim (see :mod:`veadk.runtime.codex.proxy`). + +Note: this requires the Codex binary on PATH and the ``codex_app_server`` SDK. +""" + +from __future__ import annotations + +import os +import tempfile +from typing import TYPE_CHECKING, AsyncGenerator + +from codex_app_server import AsyncCodex # type: ignore[import-not-found] + +from veadk.runtime.base_runtime import BaseRuntime, build_system_append +from veadk.runtime.codex.proxy import get_shim_url +from veadk.runtime.codex.translate import build_prompt, result_to_events +from veadk.utils.logger import get_logger + +if TYPE_CHECKING: + from google.adk.agents.invocation_context import InvocationContext + from google.adk.events.event import Event + + from veadk.agent import Agent + +logger = get_logger(__name__) + +_PROVIDER_ID = "veadk" +_KEY_ENV = "VEADK_CODEX_API_KEY" +_LOCAL_SHIM_TOKEN = "veadk-local" + +# Cache one isolated CODEX_HOME per (shim_url, model). +_CODEX_HOMES: dict[tuple[str, str], str] = {} + + +class CodexRuntime(BaseRuntime): + """Run an agent invocation via the Codex SDK.""" + + name = "codex" + + async def run_async( + self, agent: "Agent", ctx: "InvocationContext" + ) -> AsyncGenerator["Event", None]: + model = self._resolve_model(agent) + api_base = agent.model_api_base or os.getenv("OPENAI_BASE_URL") + api_key = agent.model_api_key or os.getenv("OPENAI_API_KEY") + if not api_base or not api_key: + raise ValueError( + "codex runtime requires model_api_base and model_api_key " + "(the chat endpoint Codex is bridged onto)." + ) + + shim_url = await get_shim_url(api_base, api_key) + codex_home = _prepare_codex_home(shim_url, model) + + # Codex has no clean SDK channel to append to its base system prompt, so + # the agent identity/instruction is folded into a leading block of the + # input (a labelled preamble), not the transcript itself. + prompt = build_prompt(ctx) + append_text = build_system_append(agent) + if append_text: + prompt = ( + f"# System instructions\n\n{append_text}\n\n# Conversation\n\n{prompt}" + ) + logger.info(f"codex runtime: model={model}, shim={shim_url}") + + # Isolate from the host's ~/.codex and pin the backend credential. The + # Codex app-server subprocess reads these from the environment at spawn. + previous = {k: os.environ.get(k) for k in ("CODEX_HOME", _KEY_ENV)} + os.environ["CODEX_HOME"] = codex_home + os.environ[_KEY_ENV] = _LOCAL_SHIM_TOKEN + try: + async with AsyncCodex() as codex: + thread = await codex.thread_start(model=model) + result = await thread.run(prompt) + finally: + for key, value in previous.items(): + if value is None: + os.environ.pop(key, None) + else: + os.environ[key] = value + + for event in result_to_events(result, agent.name, ctx.invocation_id): + yield event + + def _resolve_model(self, agent: "Agent") -> str: + name = agent.model_name + if isinstance(name, list): + name = name[0] if name else "" + name = name or os.getenv("OPENAI_MODEL", "") + if not name: + raise ValueError( + "codex runtime requires a model: set Agent(model_name=...) " + "or the OPENAI_MODEL environment variable." + ) + return name + + +def _prepare_codex_home(shim_url: str, model: str) -> str: + """Create (and cache) an isolated CODEX_HOME with a config.toml. + + The config points Codex at the local Responses shim using a dedicated + ``veadk`` provider, so the run never touches the host's ``~/.codex``. + """ + cache_key = (shim_url, model) + cached = _CODEX_HOMES.get(cache_key) + if cached is not None: + return cached + + home = tempfile.mkdtemp(prefix="veadk-codex-") + config = ( + f'model = "{model}"\n' + f'model_provider = "{_PROVIDER_ID}"\n\n' + f"[model_providers.{_PROVIDER_ID}]\n" + f'name = "{_PROVIDER_ID}"\n' + f'base_url = "{shim_url}/v1"\n' + f'env_key = "{_KEY_ENV}"\n' + f'wire_api = "responses"\n' + ) + with open(os.path.join(home, "config.toml"), "w", encoding="utf-8") as f: + f.write(config) + + _CODEX_HOMES[cache_key] = home + return home diff --git a/veadk/runtime/codex/translate.py b/veadk/runtime/codex/translate.py new file mode 100644 index 00000000..16902903 --- /dev/null +++ b/veadk/runtime/codex/translate.py @@ -0,0 +1,97 @@ +# Copyright (c) 2025 Beijing Volcano Engine Technology Co., Ltd. and/or its affiliates. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Translation between ADK and the Codex SDK. + +- :func:`build_prompt` flattens an ADK session into a single prompt string + (stateless replay; ADK stays the single source of truth). This mirrors the + ``cc`` runtime's helper but is duplicated here so the ``codex`` package does + not import ``claude_agent_sdk``. +- :func:`result_to_events` maps a Codex run result into ADK events. +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING, Any + +from google.adk.events.event import Event +from google.genai import types + +if TYPE_CHECKING: + from google.adk.agents.invocation_context import InvocationContext + +_USER_PREFIX = "User" +_ASSISTANT_PREFIX = "Assistant" + + +def build_prompt(ctx: "InvocationContext") -> str: + """Render the session into a single prompt string for the Codex SDK. + + Walks ``ctx.session.events`` in order, rendering each turn as a + ``User:``/``Assistant:`` line; the new user message is the last event the + ``Runner`` appended, so it terminates the transcript. ``thought`` parts are + skipped. + + Args: + ctx (google.adk.agents.invocation_context.InvocationContext): Invocation + context holding the session. + + Returns: + str: The flattened transcript (just the message for a single user turn). + """ + lines: list[str] = [] + for event in ctx.session.events: + if event.content is None or not event.content.parts: + continue + text = "".join( + part.text for part in event.content.parts if part.text and not part.thought + ).strip() + if not text: + continue + prefix = _USER_PREFIX if event.author == "user" else _ASSISTANT_PREFIX + lines.append(f"{prefix}: {text}") + + if len(lines) == 1 and lines[0].startswith(f"{_USER_PREFIX}: "): + return lines[0][len(_USER_PREFIX) + 2 :] + + return "\n".join(lines) + + +def result_to_events(result: Any, author: str, invocation_id: str) -> list[Event]: + """Convert a Codex run result into ADK events. + + The Codex SDK's run result exposes the assistant's final text as + ``final_response``. Richer per-item events (tool calls, reasoning) can be + added later. + + Args: + result (Any): The object returned by ``thread.run(...)``. + author (str): Event author (the agent name). + invocation_id (str): The ADK invocation id to stamp on the event. + + Returns: + list[google.adk.events.event.Event]: One text event, or empty if the + result carried no text. + """ + text = getattr(result, "final_response", None) + if not text: + return [] + + return [ + Event( + invocation_id=invocation_id, + author=author, + content=types.Content(role="model", parts=[types.Part(text=text)]), + ) + ] From 9f86c4992187ff65344118608fb1505f7fe22da2 Mon Sep 17 00:00:00 2001 From: "fangyaozheng@bytedance.com" Date: Mon, 1 Jun 2026 20:12:40 +0800 Subject: [PATCH 2/2] fix(runtime/cc): restrict cc runtime to Anthropic endpoints Disable routing Claude Code to a non-Anthropic model via the in-process Anthropic<->OpenAI shim, pending license/terms review. The cc runtime now accepts only Anthropic-compatible endpoints and raises a clear error for OpenAI-compatible ones. The shim implementation remains in cc/proxy.py so the path can be re-enabled if cleared. Co-Authored-By: Claude Opus 4.8 --- veadk/runtime/cc/runtime.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/veadk/runtime/cc/runtime.py b/veadk/runtime/cc/runtime.py index d46d3407..63057090 100644 --- a/veadk/runtime/cc/runtime.py +++ b/veadk/runtime/cc/runtime.py @@ -39,7 +39,7 @@ from claude_agent_sdk.types import SystemPromptPreset from veadk.runtime.base_runtime import BaseRuntime, build_system_append -from veadk.runtime.cc.proxy import detect_endpoint_kind, get_shim_url +from veadk.runtime.cc.proxy import detect_endpoint_kind from veadk.runtime.cc.translate import build_prompt, sdk_message_to_events from veadk.utils.logger import get_logger @@ -51,8 +51,6 @@ logger = get_logger(__name__) -_LOCAL_SHIM_TOKEN = "veadk-local" - def _model_env(model: str) -> dict[str, str]: """Pin every Claude Code model tier to ``model``. @@ -143,22 +141,17 @@ async def _build_env( api_base: str | None, api_key: str | None, ) -> dict[str, str]: - if kind == "openai": - if not api_base or not api_key: - raise ValueError( - "cc runtime with an OpenAI-compatible endpoint requires both " - "model_api_base and model_api_key." - ) - base_url = await get_shim_url(api_base, api_key) - # Credentials are validated by the shim against the backend; the token - # the SDK sends to the local shim is irrelevant but must override any - # inherited one. - return { - "ANTHROPIC_BASE_URL": base_url, - "ANTHROPIC_AUTH_TOKEN": _LOCAL_SHIM_TOKEN, - "ANTHROPIC_API_KEY": _LOCAL_SHIM_TOKEN, - **_model_env(model), - } + # Routing Claude Code to a non-Anthropic model (via the OpenAI<->Anthropic + # shim) is disabled pending license/terms review. The cc runtime currently + # supports only Anthropic-compatible endpoints. The shim itself still lives + # in `proxy.py` so this can be re-enabled once cleared. + if kind != "anthropic": + raise ValueError( + "The 'cc' runtime currently supports only Anthropic-compatible " + "endpoints; routing Claude Code to a non-Anthropic model is " + "disabled. Set an Anthropic endpoint (model_provider='anthropic') " + "or use runtime='adk'." + ) # Native Anthropic endpoint. if not api_key: