Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/ucode/agents/claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,11 @@ def _resolve_web_search_model(state: dict) -> str | None:


WEB_SEARCH_MCP_NAME = "web_search"
_CLAUDE_MODEL_RE = re.compile(r"^databricks-claude-(opus|sonnet)-(\d+)-(\d+)(.*)$")
# Matches both the AI Gateway form (`databricks-claude-opus-4-8`) and the UC
# model-services form (`system.ai.claude-opus-4-8`).
_CLAUDE_MODEL_RE = re.compile(
r"^(?:system\.ai\.)?(?:databricks-)?claude-(opus|sonnet)-(\d+)-(\d+)(.*)$"
)

# Env keys the MLflow Stop hook reads to route traces. Written into the
# settings `env` block alongside the hook itself.
Expand Down
11 changes: 10 additions & 1 deletion src/ucode/agents/codex.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,10 @@ def _openai_model_id(model: str | None) -> str | None:


def _codex_model_id(model: str | None) -> str | None:
# UC model-services ids (`system.ai.gpt-5`) route by name through the
# gateway, so they must be sent verbatim — not rewritten to an OpenAI id.
if model and model.startswith("system.ai."):
return model
if model in CODEX_OPENAI_ID_INCOMPATIBLE_MODELS:
return model
return _openai_model_id(model)
Expand All @@ -263,7 +267,12 @@ def _codex_model_id(model: str | None) -> str | None:
def _parse_gpt(model: str | None) -> tuple[int, int | None, int | None, str] | None:
if not model:
return None
match = _GPT_RE.fullmatch(model.split("/")[-1])
# Strip the UC model-services prefix so `system.ai.gpt-5` parses for version
# selection; the original id is preserved by callers that need it verbatim.
tail = model.split("/")[-1]
if tail.startswith("system.ai."):
tail = tail[len("system.ai.") :]
match = _GPT_RE.fullmatch(tail)
if not match:
return None
major, minor, patch, suffix = match.groups()
Expand Down
43 changes: 32 additions & 11 deletions src/ucode/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
discover_claude_models,
discover_codex_models,
discover_gemini_models,
discover_model_services,
ensure_ai_gateway_v2,
ensure_databricks_auth,
find_profile_name_for_host,
Expand All @@ -41,6 +42,7 @@
install_databricks_cli,
normalize_workspace_url,
run_databricks_login,
use_model_services,
)
from ucode.mcp import (
MCP_CLIENTS,
Expand Down Expand Up @@ -160,7 +162,13 @@ def configure_shared_state(
don't error out. If ``None``, we resolve it from the host after login.
"""
workspace = normalize_workspace_url(workspace)
previous_workspace = load_state().get("workspace")
prior_state = load_state()
previous_workspace = prior_state.get("workspace")
# The flag is sticky: an explicit env var wins, otherwise fall back to what
# was persisted when the workspace was configured. Without this, every
# launch re-runs discovery and a missing env var would silently revert a
# model-services workspace to the databricks-* gateway names.
model_services = use_model_services(default=bool(prior_state.get("use_model_services")))
fetch_all = tools is None
if force_login:
run_databricks_login(workspace, profile)
Expand All @@ -184,19 +192,29 @@ def configure_shared_state(
claude_reason: str | None = None
gemini_reason: str | None = None
codex_reason: str | None = None
with spinner("Fetching available models..."):
claude_models = {}
gemini_models = []
codex_models = []
if model_services:
# Opt-in: one UC model-services call yields all families as
# `system.ai.<model-name>` ids, bucketed by name. The single reason is
# shared across the families that were requested.
with spinner("Fetching available models (model services)..."):
ms_claude, ms_codex, ms_gemini, ms_reason = discover_model_services(workspace, token)
if want_claude:
claude_models, claude_reason = discover_claude_models(workspace, token)
else:
claude_models = {}
claude_models, claude_reason = ms_claude, ms_reason
if want_gemini:
gemini_models, gemini_reason = discover_gemini_models(workspace, token)
else:
gemini_models = []
gemini_models, gemini_reason = ms_gemini, ms_reason
if want_codex:
codex_models, codex_reason = discover_codex_models(workspace, token)
else:
codex_models = []
codex_models, codex_reason = ms_codex, ms_reason
else:
with spinner("Fetching available models..."):
if want_claude:
claude_models, claude_reason = discover_claude_models(workspace, token)
if want_gemini:
gemini_models, gemini_reason = discover_gemini_models(workspace, token)
if want_codex:
codex_models, codex_reason = discover_codex_models(workspace, token)
opencode_models: dict[str, list[str]] = {}
if claude_models:
opencode_models["anthropic"] = list(claude_models.values())
Expand All @@ -210,6 +228,9 @@ def configure_shared_state(
state["profile"] = profile
else:
state.pop("profile", None)
# Persist the resolved flag so subsequent launches stay on the same
# discovery path without the env var being re-exported.
state["use_model_services"] = model_services
state["base_urls"] = build_shared_base_urls(workspace)
if want_claude:
state["claude_models"] = claude_models
Expand Down
170 changes: 169 additions & 1 deletion src/ucode/databricks.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from typing import Literal, cast, overload
from urllib import error as urllib_error
from urllib import request as urllib_request
from urllib.parse import urlparse
from urllib.parse import urlencode, urlparse

from databricks.sql.exc import ServerOperationError

Expand Down Expand Up @@ -977,6 +977,174 @@ def build_auth_shell_command(workspace: str, profile: str | None = None) -> str:
)


def use_model_services(default: bool = False) -> bool:
"""True when the opt-in UC model-services discovery path is enabled.

Set ``UCODE_USE_MODEL_SERVICES=1`` (or true/yes/on) to discover models via
the Unity Catalog model-services API and address them as
``system.ai.<model-name>`` instead of the per-family AI Gateway listings.

The env var, when set to any value, wins. ``default`` is the fallback used
when the env var is unset — callers pass the value persisted in state so a
workspace configured with the flag keeps using model services on later
launches without the env var being re-exported each time.
"""
raw = os.environ.get("UCODE_USE_MODEL_SERVICES")
if raw is None or not raw.strip():
return default
return raw.strip().lower() in {"1", "true", "yes", "on"}


# A model-service's `name` is `model-services/system.ai.<model-name>`; the
# part after the prefix is exactly the model string agents send (no
# `databricks-` infix — that only appears on the inner destination name).
_MODEL_SERVICE_NAME_PREFIX = "model-services/"
# The metastore-scope listing returns services from EVERY schema (e.g.
# `main.user.foo`, `temp.*`, internal DLT schemas). We only want the
# Databricks-managed foundation models under `system.ai`.
_MODEL_SERVICE_REQUIRED_PREFIX = "system.ai."


def _model_service_id(service: dict) -> str | None:
"""Extract the `system.ai.<model-name>` id from one model-service entry.

Returns None for services in any other schema, so user/internal model
services don't leak into the family buckets."""
name = service.get("name")
if not isinstance(name, str):
return None
name = name.strip()
if name.startswith(_MODEL_SERVICE_NAME_PREFIX):
name = name[len(_MODEL_SERVICE_NAME_PREFIX) :]
if not name.startswith(_MODEL_SERVICE_REQUIRED_PREFIX):
return None
return name or None


# The model-services metastore listing is slow and flaky — large pages
# routinely 504 with `Timeout listing model services under metastore`. A small
# page is far more likely to come back, and each page gets a few retries before
# we give up.
_MODEL_SERVICES_PAGE_SIZE = 10
_MODEL_SERVICES_PAGE_RETRIES = 4


def _get_model_services_page(
url: str, token: str, *, retries: int = _MODEL_SERVICES_PAGE_RETRIES
) -> tuple[dict | list | None, str | None]:
"""GET one model-services page, retrying on failure.

The endpoint frequently 504s under load; a retry usually succeeds. Returns
the same (payload, reason) shape as ``_http_get_json`` — the last attempt's
result when all retries are exhausted."""
payload: dict | list | None = None
reason: str | None = None
for attempt in range(retries):
payload, reason = _http_get_json(url, token, timeout=30)
if payload is not None:
return payload, None
_debug("model-services page", f"attempt {attempt + 1}/{retries} failed: {reason}")
return payload, reason


def list_model_services(
workspace: str,
token: str,
*,
page_size: int = _MODEL_SERVICES_PAGE_SIZE,
max_pages: int = 100,
) -> tuple[list[str], str | None]:
"""List all `system.ai.*` model ids via the UC model-services API.

Pages through ``/api/2.1/unity-catalog/model-services`` (metastore scope)
and returns the de-duplicated, sorted list of ``system.ai.<model-name>``
ids. Uses a small page size with per-page retries because the endpoint is
slow and frequently 504s. Returns (ids, reason); reason is None on success,
otherwise it describes why the list is empty (HTTP/network error or no
services).
"""
hostname = workspace_hostname(workspace)
ids: list[str] = []
page_token: str | None = None
seen_tokens: set[str] = set()
last_reason: str | None = None
for _ in range(max_pages):
params: dict[str, str] = {"page_size": str(page_size)}
if page_token:
params["page_token"] = page_token
url = f"https://{hostname}/api/2.1/unity-catalog/model-services?{urlencode(params)}"
payload, reason = _get_model_services_page(url, token)
if payload is None:
# Surface the failure only if we have nothing yet; a mid-pagination
# blip still returns whatever we collected.
last_reason = reason
break
data = cast(dict, payload) if isinstance(payload, dict) else {}
for service in data.get("model_services", []):
if isinstance(service, dict):
model_id = _model_service_id(service)
if model_id:
ids.append(model_id)
page_token = data.get("next_page_token") or None
if not page_token:
last_reason = None
break
if page_token in seen_tokens:
break
seen_tokens.add(page_token)

deduped = sorted(set(ids))
if deduped:
return deduped, None
return [], last_reason or "model-services listing returned no models"


def discover_model_services(
workspace: str, token: str
) -> tuple[dict[str, str], list[str], list[str], str | None]:
"""Discover models via UC model-services and bucket them by family name.

Returns (claude_models, codex_models, gemini_models, reason):

- ``claude_models`` maps ``opus``/``sonnet``/``haiku`` to the newest
matching ``system.ai.claude-*`` id (mirrors ``discover_claude_models``).
- ``codex_models`` is the list of ``system.ai.*gpt-*`` ids.
- ``gemini_models`` is the list of ``system.ai.*gemini-*`` ids, newest first.

``reason`` is None on success, else explains why nothing was found. Family
bucketing is by name substring because the model-services API does not
expose per-model API dialects.
"""
ids, reason = list_model_services(workspace, token)
if not ids:
return {}, [], [], reason

claude_models: dict[str, str] = {}
for family in ("opus", "sonnet", "haiku"):
candidates = sorted(
[m for m in ids if f"claude-{family}-" in m],
reverse=True,
)
if candidates:
claude_models[family] = candidates[0]

codex_models = [m for m in ids if "gpt-" in m]
gemini_models = sorted([m for m in ids if "gemini-" in m], key=model_version_sort_key)

if not (claude_models or codex_models or gemini_models):
sample = ", ".join(ids[:5])
return (
{},
[],
[],
(
"model-services returned model ids but none matched "
f"claude/gpt/gemini families (got: {sample})"
),
)
return claude_models, codex_models, gemini_models, None


def discover_claude_models(workspace: str, token: str) -> tuple[dict[str, str], str | None]:
"""Discover Claude families on this workspace's AI Gateway.

Expand Down
8 changes: 8 additions & 0 deletions tests/test_agent_claude.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ def test_does_not_duplicate_1m_suffix(self):
overlay, _ = claude.render_overlay(WS, "databricks-claude-opus-4-7[1m]")
assert overlay["env"]["ANTHROPIC_MODEL"] == "databricks-claude-opus-4-7[1m]"

def test_adds_1m_suffix_for_model_services_name(self):
overlay, _ = claude.render_overlay(WS, "system.ai.claude-opus-4-8")
assert overlay["env"]["ANTHROPIC_MODEL"] == "system.ai.claude-opus-4-8[1m]"

def test_no_1m_suffix_for_model_services_haiku(self):
overlay, _ = claude.render_overlay(WS, "system.ai.claude-haiku-4-6")
assert overlay["env"]["ANTHROPIC_MODEL"] == "system.ai.claude-haiku-4-6"

def test_sets_anthropic_base_url(self):
overlay, _ = claude.render_overlay(WS, "s4")
assert overlay["env"]["ANTHROPIC_BASE_URL"] == f"{WS}/ai-gateway/anthropic"
Expand Down
11 changes: 11 additions & 0 deletions tests/test_agent_codex.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,17 @@ def test_openai_model_id_maps_databricks_naming(self):
def test_codex_model_id_preserves_openai_incompatible_models(self):
assert codex._codex_model_id("databricks-gpt-5-2-codex") == "databricks-gpt-5-2-codex"
assert codex._codex_model_id("databricks-gpt-5-4-nano") == "databricks-gpt-5-4-nano"

def test_codex_model_id_passes_model_services_id_verbatim(self):
# UC model-services ids route by name, so they must not be rewritten
# to the OpenAI id form.
assert codex._codex_model_id("system.ai.gpt-5") == "system.ai.gpt-5"
assert codex._codex_model_id("system.ai.gpt-5-2-codex") == "system.ai.gpt-5-2-codex"

def test_default_model_selects_model_services_gpt(self):
models = ["system.ai.gpt-5", "system.ai.gpt-5-5", "system.ai.claude-opus-4-8"]

assert codex.default_model({"codex_models": models}) == "system.ai.gpt-5-5"
assert codex._codex_model_id("databricks-gpt-5-5") == "gpt-5.5"


Expand Down
Loading
Loading