From c31d0e85ec29f5f628b168da223380f550dce7dd Mon Sep 17 00:00:00 2001 From: Rohit Agrawal Date: Tue, 9 Jun 2026 20:08:40 -0400 Subject: [PATCH] Cap Gemini CLI below 0.45 and prefer newest model version Gemini CLI 0.45 introduced a "Gemini 3.5 Flash GA" router that rewrites any forced flash model id (e.g. databricks-gemini-3-5-flash) to Google's canonical gemini-3.5-flash, which the Databricks AI Gateway rejects as an invalid Unity Catalog endpoint name. 0.44.x passes the configured model through verbatim and works. - Cap the supported Gemini version below 0.45 and check it on every launch (a too-new build runs but misbehaves, so it's a correctness blocker). - Warn and offer a y/n downgrade to the latest working release rather than forcing it; honor --skip-upgrade by warning without prompting. - Steer update prompts toward the newest version below the broken ceiling instead of npm's `latest` tag, which points at the broken line. Adds published_versions() and latest_version_below() helpers. - Order discovered Gemini models newest-version-first so default_model() launches gemini-3.5-flash rather than gemini-2.5-flash. Codex and the generic discovery path keep their alphabetical default. - Re-enable the e2e Gemini launch test with a version-aware skip. Co-authored-by: Isaac --- src/ucode/agent_updates.py | 63 +++++++++++++++++++++++ src/ucode/agents/__init__.py | 44 +++++++++++++++-- src/ucode/agents/gemini.py | 75 +++++++++++++++++++++++++++- src/ucode/databricks.py | 44 +++++++++++++++-- tests/test_agent_gemini.py | 46 +++++++++++++++++ tests/test_agent_updates.py | 72 ++++++++++++++++++++++++++- tests/test_agents_init.py | 81 ++++++++++++++++++++++++++++++ tests/test_databricks.py | 96 ++++++++++++++++++++++++++++++++++++ tests/test_e2e.py | 19 ++++--- 9 files changed, 522 insertions(+), 18 deletions(-) diff --git a/src/ucode/agent_updates.py b/src/ucode/agent_updates.py index f315583..175c19d 100644 --- a/src/ucode/agent_updates.py +++ b/src/ucode/agent_updates.py @@ -3,9 +3,72 @@ from __future__ import annotations import json +import re import shutil import subprocess +_BASE_VERSION_RE = re.compile(r"(\d+)\.(\d+)\.(\d+)") +_STABLE_VERSION_RE = re.compile(r"v?\d+\.\d+\.\d+$") + + +def _base_version(value: str) -> tuple[int, int, int] | None: + """Return the leading (major, minor, patch) of a version, ignoring any + prerelease/build suffix (e.g. `-nightly.20260515.g928a311fb`).""" + match = _BASE_VERSION_RE.search(value) + if not match: + return None + major, minor, patch = match.groups() + return int(major), int(minor), int(patch) + + +def _is_stable(value: str) -> bool: + """True for plain `X.Y.Z` releases (no prerelease/nightly/preview suffix).""" + return bool(_STABLE_VERSION_RE.fullmatch(value.strip())) + + +def published_versions(package: str) -> list[str]: + """Return every published version of an npm package, in npm's ascending + order, or an empty list if the registry can't be reached.""" + if not shutil.which("npm"): + return [] + try: + result = subprocess.run( + ["npm", "view", package, "versions", "--json"], + capture_output=True, + text=True, + timeout=15, + check=False, + ) + except (FileNotFoundError, subprocess.TimeoutExpired, OSError): + return [] + if result.returncode != 0 or not result.stdout.strip(): + return [] + try: + versions = json.loads(result.stdout) + except json.JSONDecodeError: + return [] + if isinstance(versions, str): + versions = [versions] + if not isinstance(versions, list): + return [] + return [v for v in versions if isinstance(v, str)] + + +def latest_version_below(package: str, ceiling: tuple[int, int, int]) -> str | None: + """Return the newest published version whose base (major, minor, patch) is + strictly below `ceiling`, preferring a stable release over a prerelease at + the same base. Returns None when nothing qualifies or npm is unavailable.""" + candidates = [(v, _base_version(v)) for v in published_versions(package)] + eligible = [(v, base) for v, base in candidates if base is not None and base < ceiling] + if not eligible: + return None + max_base = max(base for _, base in eligible) + at_max = [v for v, base in eligible if base == max_base] + stable = [v for v in at_max if _is_stable(v)] + pool = stable or at_max + # npm returns versions in ascending order, so the last entry is newest. + return pool[-1] + def available_npm_package_update(package: str) -> tuple[str, str] | None: if not shutil.which("npm"): diff --git a/src/ucode/agents/__init__.py b/src/ucode/agents/__init__.py index 3fb9602..b94e855 100644 --- a/src/ucode/agents/__init__.py +++ b/src/ucode/agents/__init__.py @@ -71,10 +71,11 @@ def normalize_tool(tool: str) -> str: return normalized -def _update_installed_tool_binary(tool: str) -> bool: +def _update_installed_tool_binary(tool: str, version: str | None = None) -> bool: spec = TOOL_SPECS[tool] binary = spec["binary"] package = spec["package"] + target = f"{package}@{version}" if version else package if not shutil.which("npm"): print_warning(f"`npm` is not available to update {spec['display']}; continuing.") @@ -82,7 +83,7 @@ def _update_installed_tool_binary(tool: str) -> bool: print_note(f"Updating {spec['display']}...") try: - subprocess.run(["npm", "install", "-g", package], check=True, timeout=300) + subprocess.run(["npm", "install", "-g", target], check=True, timeout=300) except (subprocess.CalledProcessError, subprocess.TimeoutExpired): print_warning(f"Could not update {spec['display']}; continuing.") return False @@ -116,6 +117,38 @@ def _confirm_update_installed_tool_binary(tool: str) -> bool: return prompt_yes_no(f"(Optional) Update {spec['display']} from {current} to {latest}?") +def _too_new_downgrade(tool: str) -> tuple[str, str] | None: + """Return (installed_version, downgrade_target) when the installed tool is + too new to work, or None. Agents opt in by defining `too_new_downgrade`.""" + checker = getattr(_MODULES[tool], "too_new_downgrade", None) + if not callable(checker): + return None + return checker() + + +def _maybe_downgrade_too_new_tool(tool: str, *, prompt: bool) -> bool: + """Warn when the installed tool exceeds its supported version and offer to + downgrade to the latest working release. Returns True when the tool was too + new (regardless of whether the client accepted the downgrade). + + Unlike a required *upgrade*, a too-new build may still launch (it just + misbehaves), so we never force the change — we warn and, when prompting is + enabled, let the client press `y` to downgrade. + """ + downgrade = _too_new_downgrade(tool) + if not downgrade: + return False + spec = TOOL_SPECS[tool] + installed, target = downgrade + print_warning( + f"{spec['display']} {installed} is newer than the latest version known to work " + f"with the Databricks AI Gateway ({target})." + ) + if prompt and prompt_yes_no(f"Downgrade {spec['display']} from {installed} to {target}?"): + _update_installed_tool_binary(tool, version=target) + return True + + def install_tool_binary( tool: str, *, @@ -128,7 +161,12 @@ def install_tool_binary( package = spec["package"] if shutil.which(binary): - if update_existing: + # A too-new build is a correctness blocker (the tool runs but misbehaves + # against the gateway), so check it on every launch — not just when + # auto-configuring — mirroring the minimum-version gate below. + too_new = _maybe_downgrade_too_new_tool(tool, prompt=prompt_optional_updates) + + if update_existing and not too_new: required_update = _required_update_message(tool) if required_update: # Required updates are forced regardless of prompt preference; diff --git a/src/ucode/agents/gemini.py b/src/ucode/agents/gemini.py index 0df9b41..d8499e4 100644 --- a/src/ucode/agents/gemini.py +++ b/src/ucode/agents/gemini.py @@ -3,12 +3,13 @@ from __future__ import annotations import os +import re import signal import subprocess import threading from pathlib import Path -from ucode.agent_updates import available_npm_package_update +from ucode.agent_updates import latest_version_below from ucode.config_io import ( APP_DIR, ToolSpec, @@ -51,8 +52,78 @@ ] +# Gemini CLI 0.45 introduced a "Gemini 3.5 Flash GA" router that rewrites any +# forced flash model id (e.g. `databricks-gemini-3-5-flash`) to Google's +# canonical `gemini-3.5-flash`, which the Databricks AI Gateway rejects as an +# invalid Unity Catalog endpoint name. Until that regression is fixed upstream +# we cap the supported version below 0.45 and steer clients onto the newest +# release that still passes the configured model through verbatim. +MAX_GEMINI_VERSION = (0, 45, 0) +MAX_GEMINI_VERSION_TEXT = "0.45.0" + + +def _parse_version(value: str) -> tuple[int, int, int] | None: + match = re.search(r"(\d+)\.(\d+)\.(\d+)", value) + if not match: + return None + major, minor, patch = match.groups() + return int(major), int(minor), int(patch) + + +def latest_working_version() -> str | None: + """Newest published Gemini CLI release below the broken-version ceiling.""" + return latest_version_below(SPEC["package"], MAX_GEMINI_VERSION) + + def is_update_available() -> tuple[str, str] | None: - return available_npm_package_update(SPEC["package"]) + """Offer an update only toward a known-working version. + + The npm `latest` tag points at the broken >= 0.45 line, so the generic + "outdated" check would steer clients onto the regression. Instead we + compare the installed build against the latest working release and only + surface an upgrade when it is genuinely newer (and still safe). + """ + installed = _parse_version(agent_version(SPEC["binary"])) + if installed is None: + return None + target = latest_working_version() + if target is None: + return None + target_base = _parse_version(target) + if target_base is None or target_base <= installed: + return None + return f"{installed[0]}.{installed[1]}.{installed[2]}", target + + +def too_new_version() -> str | None: + """Return the installed version string when it exceeds the safe ceiling. + + Used by the install flow to warn the client and offer a downgrade. + Returns None when the version is safe or cannot be determined. + """ + raw = agent_version(SPEC["binary"]) + parsed = _parse_version(raw) + if parsed is None: + return None + if parsed >= MAX_GEMINI_VERSION: + return raw + return None + + +def too_new_downgrade() -> tuple[str, str] | None: + """Return (installed_version, downgrade_target) when a downgrade is needed. + + `downgrade_target` is the newest published release below the broken + ceiling. Returns None when the installed version is safe, npm is + unavailable, or no working release can be resolved. + """ + installed = too_new_version() + if installed is None: + return None + target = latest_working_version() + if target is None: + return None + return installed, target def _ensure_local_settings_selected_type() -> None: diff --git a/src/ucode/databricks.py b/src/ucode/databricks.py index 90c1808..67fcc3f 100644 --- a/src/ucode/databricks.py +++ b/src/ucode/databricks.py @@ -1021,13 +1021,45 @@ def fetch_ai_gateway_claude_models(workspace: str, token: str) -> dict[str, str] return models +def model_version_sort_key(name: str) -> tuple: + """Sort endpoint names so newer model versions come first. + + Endpoint names embed a dotted version as dash-separated digits, e.g. + `databricks-gemini-3-5-flash` (3.5) or `databricks-gemini-3-flash` (3.0). + Plain alphabetical sorting buries `3-5-flash` below `2-5-flash`; this key + groups by the non-numeric prefix, orders by version descending, then falls + back to the remaining text so ties stay stable and deterministic. + """ + tokens = name.split("-") + start = next((i for i, tok in enumerate(tokens) if tok.isdigit()), None) + if start is None: + # No version segment — sort these after versioned ones, alphabetically. + # The leading 1 keeps the whole group below every versioned name (0). + return (1, name, (), "") + end = start + while end < len(tokens) and tokens[end].isdigit(): + end += 1 + version = tuple(int(tok) for tok in tokens[start:end]) + # Pad to a fixed width so (3,) compares as (3, 0) — i.e. 3.0 < 3.5. + padded = (version + (0, 0, 0))[:3] + prefix = "-".join(tokens[:start]) + suffix = "-".join(tokens[end:]) + # Negate version components for descending order within a prefix group. + return (0, prefix, tuple(-v for v in padded), suffix) + + def discover_endpoints_with_api_type( - workspace: str, token: str, api_type: str + workspace: str, + token: str, + api_type: str, + *, + sort_key=None, ) -> tuple[list[str], str | None]: """List endpoint names whose served_entities expose api_type with v2 support. Returns (endpoints, reason). reason is None on success; otherwise it - describes why the list is empty. + describes why the list is empty. `sort_key` overrides the default + alphabetical ordering of the returned names. """ hostname = workspace_hostname(workspace) payload, reason = _http_get_json( @@ -1055,7 +1087,7 @@ def discover_endpoints_with_api_type( if api_type in api_types: out.append(name) if out: - return sorted(out), None + return sorted(out, key=sort_key), None if not endpoints: return [], "foundation-models listing returned no endpoints" if saw_endpoint_without_v2: @@ -1073,7 +1105,11 @@ def _fetch_endpoints_with_api_type(workspace: str, token: str, api_type: str) -> def discover_gemini_models(workspace: str, token: str) -> tuple[list[str], str | None]: - return discover_endpoints_with_api_type(workspace, token, "gemini/v1/generateContent") + # Order newest model version first so `default_model()` (which picks the + # first entry) launches e.g. gemini-3.5-flash rather than gemini-2.5-flash. + return discover_endpoints_with_api_type( + workspace, token, "gemini/v1/generateContent", sort_key=model_version_sort_key + ) def discover_codex_models(workspace: str, token: str) -> tuple[list[str], str | None]: diff --git a/tests/test_agent_gemini.py b/tests/test_agent_gemini.py index b44172c..fb91abc 100644 --- a/tests/test_agent_gemini.py +++ b/tests/test_agent_gemini.py @@ -121,6 +121,52 @@ def test_returns_none_when_missing(self): assert gemini.default_model({}) is None +class TestGeminiVersionGating: + def test_too_new_version_flags_045(self, monkeypatch): + monkeypatch.setattr(gemini, "agent_version", lambda binary: "0.45.0-nightly.20260602") + assert gemini.too_new_version() == "0.45.0-nightly.20260602" + + def test_too_new_version_allows_044(self, monkeypatch): + monkeypatch.setattr(gemini, "agent_version", lambda binary: "0.44.1") + assert gemini.too_new_version() is None + + def test_too_new_version_none_when_unknown(self, monkeypatch): + monkeypatch.setattr(gemini, "agent_version", lambda binary: "unknown") + assert gemini.too_new_version() is None + + def test_too_new_downgrade_returns_target(self, monkeypatch): + monkeypatch.setattr(gemini, "agent_version", lambda binary: "0.45.0-nightly.20260602") + monkeypatch.setattr(gemini, "latest_version_below", lambda pkg, ceiling: "0.44.1") + assert gemini.too_new_downgrade() == ("0.45.0-nightly.20260602", "0.44.1") + + def test_too_new_downgrade_none_when_safe(self, monkeypatch): + monkeypatch.setattr(gemini, "agent_version", lambda binary: "0.44.1") + monkeypatch.setattr(gemini, "latest_version_below", lambda pkg, ceiling: "0.44.1") + assert gemini.too_new_downgrade() is None + + def test_too_new_downgrade_none_when_no_target(self, monkeypatch): + monkeypatch.setattr(gemini, "agent_version", lambda binary: "0.45.0") + monkeypatch.setattr(gemini, "latest_version_below", lambda pkg, ceiling: None) + assert gemini.too_new_downgrade() is None + + def test_update_only_offered_toward_working_version(self, monkeypatch): + # Installed 0.40.0, latest working 0.44.1 -> offer the upgrade. + monkeypatch.setattr(gemini, "agent_version", lambda binary: "0.40.0") + monkeypatch.setattr(gemini, "latest_version_below", lambda pkg, ceiling: "0.44.1") + assert gemini.is_update_available() == ("0.40.0", "0.44.1") + + def test_no_update_when_already_at_working_version(self, monkeypatch): + monkeypatch.setattr(gemini, "agent_version", lambda binary: "0.44.1") + monkeypatch.setattr(gemini, "latest_version_below", lambda pkg, ceiling: "0.44.1") + assert gemini.is_update_available() is None + + def test_no_update_offered_toward_broken_version(self, monkeypatch): + # Even when a newer 0.45 exists, the target stays below the ceiling. + monkeypatch.setattr(gemini, "agent_version", lambda binary: "0.44.1") + monkeypatch.setattr(gemini, "latest_version_below", lambda pkg, ceiling: "0.44.1") + assert gemini.is_update_available() is None + + class TestGeminiValidateCmd: def test_starts_with_binary(self): cmd = gemini.validate_cmd("gemini") diff --git a/tests/test_agent_updates.py b/tests/test_agent_updates.py index 3f88300..297eac1 100644 --- a/tests/test_agent_updates.py +++ b/tests/test_agent_updates.py @@ -2,9 +2,14 @@ from __future__ import annotations +import json import subprocess -from ucode.agent_updates import available_npm_package_update +from ucode.agent_updates import ( + available_npm_package_update, + latest_version_below, + published_versions, +) def test_returns_none_when_npm_missing(monkeypatch): @@ -49,3 +54,68 @@ def test_returns_none_for_malformed_output(monkeypatch): ) assert available_npm_package_update("opencode-ai") is None + + +_GEMINI_VERSIONS = [ + "0.43.0", + "0.44.0-nightly.20260515.g928a311fb", + "0.44.0", + "0.44.1", + "0.45.0-nightly.20260602.g665228e98", + "0.45.0-preview.0", +] + + +def _fake_published(monkeypatch, versions): + monkeypatch.setattr("ucode.agent_updates.shutil.which", lambda _: "/usr/bin/npm") + monkeypatch.setattr( + "ucode.agent_updates.subprocess.run", + lambda *args, **kwargs: subprocess.CompletedProcess( + args[0], 0, stdout=json.dumps(versions), stderr="" + ), + ) + + +class TestPublishedVersions: + def test_returns_empty_when_npm_missing(self, monkeypatch): + monkeypatch.setattr("ucode.agent_updates.shutil.which", lambda _: None) + assert published_versions("@google/gemini-cli") == [] + + def test_parses_version_list(self, monkeypatch): + _fake_published(monkeypatch, _GEMINI_VERSIONS) + assert published_versions("@google/gemini-cli") == _GEMINI_VERSIONS + + def test_wraps_single_string_response(self, monkeypatch): + _fake_published(monkeypatch, "0.44.1") + assert published_versions("@google/gemini-cli") == ["0.44.1"] + + +class TestLatestVersionBelow: + def test_picks_newest_stable_below_ceiling(self, monkeypatch): + _fake_published(monkeypatch, _GEMINI_VERSIONS) + # 0.44.1 is the newest base < 0.45.0, and it is stable. + assert latest_version_below("@google/gemini-cli", (0, 45, 0)) == "0.44.1" + + def test_excludes_versions_at_or_above_ceiling(self, monkeypatch): + _fake_published(monkeypatch, _GEMINI_VERSIONS) + result = latest_version_below("@google/gemini-cli", (0, 45, 0)) + assert result is not None + assert not result.startswith("0.45") + + def test_prefers_stable_over_prerelease_at_same_base(self, monkeypatch): + _fake_published( + monkeypatch, + ["0.44.0-nightly.20260515.g928a311fb", "0.44.0", "0.44.0-preview.0"], + ) + assert latest_version_below("@google/gemini-cli", (0, 45, 0)) == "0.44.0" + + def test_falls_back_to_prerelease_when_no_stable(self, monkeypatch): + _fake_published(monkeypatch, ["0.44.0-nightly.20260515.g928a311fb"]) + assert ( + latest_version_below("@google/gemini-cli", (0, 45, 0)) + == "0.44.0-nightly.20260515.g928a311fb" + ) + + def test_returns_none_when_nothing_qualifies(self, monkeypatch): + _fake_published(monkeypatch, ["0.45.0", "0.46.0"]) + assert latest_version_below("@google/gemini-cli", (0, 45, 0)) is None diff --git a/tests/test_agents_init.py b/tests/test_agents_init.py index a60255b..896eba0 100644 --- a/tests/test_agents_init.py +++ b/tests/test_agents_init.py @@ -339,6 +339,87 @@ def fake_run(args, **kwargs): ) assert calls and calls[0][:3] == ["npm", "install", "-g"] + def test_too_new_tool_warns_and_downgrades_on_confirm(self, monkeypatch, capsys): + """An installed build past its supported ceiling is offered as a + downgrade (to a pinned working version), not an upgrade.""" + calls: list[list[str]] = [] + prompt_calls: list[str] = [] + + def fake_which(binary: str) -> str | None: + return f"/usr/bin/{binary}" + + def fake_run(args, **kwargs): + calls.append(args) + return subprocess.CompletedProcess(args, 0) + + monkeypatch.setattr("ucode.agents.shutil.which", fake_which) + monkeypatch.setattr("ucode.agents.subprocess.run", fake_run) + monkeypatch.setattr("ucode.agents.gemini.too_new_downgrade", lambda: ("0.45.0", "0.44.1")) + # The optional-update path must never be reached for a too-new tool. + monkeypatch.setattr( + "ucode.agents._confirm_update_installed_tool_binary", + lambda _: (_ for _ in ()).throw(AssertionError("should not reach optional update")), + ) + monkeypatch.setattr( + "ucode.agents.prompt_yes_no", lambda prompt: prompt_calls.append(prompt) or True + ) + + assert install_tool_binary("gemini", strict=False, update_existing=True) is True + assert prompt_calls == ["Downgrade Gemini CLI from 0.45.0 to 0.44.1?"] + assert calls == [["npm", "install", "-g", "@google/gemini-cli@0.44.1"]] + out = capsys.readouterr().out + assert "newer than the latest version known to work" in out + + def test_too_new_tool_warns_but_keeps_version_on_decline(self, monkeypatch, capsys): + calls: list[list[str]] = [] + + def fake_which(binary: str) -> str | None: + return f"/usr/bin/{binary}" + + def fake_run(args, **kwargs): + calls.append(args) + return subprocess.CompletedProcess(args, 0) + + monkeypatch.setattr("ucode.agents.shutil.which", fake_which) + monkeypatch.setattr("ucode.agents.subprocess.run", fake_run) + monkeypatch.setattr("ucode.agents.gemini.too_new_downgrade", lambda: ("0.45.0", "0.44.1")) + monkeypatch.setattr("ucode.agents.prompt_yes_no", lambda prompt: False) + + assert install_tool_binary("gemini", strict=False, update_existing=True) is True + assert calls == [] + assert "newer than the latest version known to work" in capsys.readouterr().out + + def test_too_new_tool_warns_without_prompt_when_updates_disabled(self, monkeypatch, capsys): + """With prompts suppressed we still warn, but never downgrade.""" + calls: list[list[str]] = [] + + def fake_which(binary: str) -> str | None: + return f"/usr/bin/{binary}" + + def fake_run(args, **kwargs): + calls.append(args) + return subprocess.CompletedProcess(args, 0) + + monkeypatch.setattr("ucode.agents.shutil.which", fake_which) + monkeypatch.setattr("ucode.agents.subprocess.run", fake_run) + monkeypatch.setattr("ucode.agents.gemini.too_new_downgrade", lambda: ("0.45.0", "0.44.1")) + monkeypatch.setattr( + "ucode.agents.prompt_yes_no", + lambda prompt: (_ for _ in ()).throw(AssertionError("should not prompt")), + ) + + assert ( + install_tool_binary( + "gemini", + strict=False, + update_existing=True, + prompt_optional_updates=False, + ) + is True + ) + assert calls == [] + assert "newer than the latest version known to work" in capsys.readouterr().out + def test_update_failure_keeps_existing_binary_available(self, monkeypatch): def fake_which(binary: str) -> str | None: return f"/usr/bin/{binary}" diff --git a/tests/test_databricks.py b/tests/test_databricks.py index fffdba5..3401508 100644 --- a/tests/test_databricks.py +++ b/tests/test_databricks.py @@ -131,6 +131,102 @@ def test_selects_opus_4_8_when_advertised(self, monkeypatch): assert models["opus"] == "databricks-claude-opus-4-8" +def _foundation_models_payload(names): + return { + "endpoints": [ + { + "name": name, + "config": { + "served_entities": [ + { + "foundation_model": { + "ai_gateway_v2_supported": True, + "api_types": ["gemini/v1/generateContent"], + } + } + ] + }, + } + for name in names + ] + } + + +class TestModelVersionSortKey: + def test_orders_newest_version_first(self): + names = [ + "databricks-gemini-2-5-flash", + "databricks-gemini-2-5-pro", + "databricks-gemini-3-1-flash-lite", + "databricks-gemini-3-1-pro", + "databricks-gemini-3-5-flash", + "databricks-gemini-3-flash", + "databricks-gemini-3-pro", + ] + ordered = sorted(names, key=db_mod.model_version_sort_key) + assert ordered[0] == "databricks-gemini-3-5-flash" + + def test_treats_bare_major_as_dot_zero(self): + # 3-flash is 3.0, so 3-5-flash (3.5) must sort ahead of it. + names = ["databricks-gemini-3-flash", "databricks-gemini-3-5-flash"] + ordered = sorted(names, key=db_mod.model_version_sort_key) + assert ordered == [ + "databricks-gemini-3-5-flash", + "databricks-gemini-3-flash", + ] + + def test_unversioned_names_sort_last_alphabetically(self): + names = ["databricks-gemini-2-5-flash", "custom-endpoint", "another-endpoint"] + ordered = sorted(names, key=db_mod.model_version_sort_key) + assert ordered[0] == "databricks-gemini-2-5-flash" + assert ordered[1:] == ["another-endpoint", "custom-endpoint"] + + +class TestDiscoverGeminiModels: + def test_returns_newest_flash_first(self, monkeypatch): + payload = _foundation_models_payload( + [ + "databricks-gemini-2-5-flash", + "databricks-gemini-3-5-flash", + "databricks-gemini-3-flash", + ] + ) + monkeypatch.setattr(db_mod, "_http_get_json", lambda url, token: (payload, None)) + + models, reason = db_mod.discover_gemini_models(WS, "token") + + assert reason is None + assert models[0] == "databricks-gemini-3-5-flash" + + def test_codex_discovery_keeps_alphabetical_order(self, monkeypatch): + # Codex passes no sort_key, so ordering must stay the plain alphabetical + # default — guarding against the gemini change leaking across tools. + payload = { + "endpoints": [ + { + "name": name, + "config": { + "served_entities": [ + { + "foundation_model": { + "ai_gateway_v2_supported": True, + "api_types": ["openai/v1/responses"], + } + } + ] + }, + } + for name in ["databricks-gpt-5-2-codex", "databricks-gpt-4-1"] + ] + } + monkeypatch.setattr(db_mod, "_http_get_json", lambda url, token: (payload, None)) + + models, reason = db_mod.discover_codex_models(WS, "token") + + assert reason is None + assert models == ["databricks-gpt-4-1", "databricks-gpt-5-2-codex"] + + class TestBuildAuthShellCommand: def test_contains_workspace(self): cmd = build_auth_shell_command(WS) diff --git a/tests/test_e2e.py b/tests/test_e2e.py index bd3812c..f0c64e0 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -478,14 +478,6 @@ def test_launch_claude_per_model( class TestGeminiLaunch: """Run gemini against every available gemini model.""" - @pytest.mark.skipif( - os.environ.get("GITHUB_ACTIONS") == "true", - reason=( - "Skipped in CI: the gemini CLI version installed on the runner rewrites " - "model ids like 'databricks-gemini-3-5-flash' to 'gemini-3.5-flash', which " - "Unity Catalog rejects as an invalid endpoint name. Tracked separately." - ), - ) def test_launch_gemini_per_model( self, tmp_path, monkeypatch, e2e_state, e2e_workspace, e2e_token ): @@ -493,6 +485,17 @@ def test_launch_gemini_per_model( from ucode.agents import gemini, validate_tool _require_binary("gemini") + # Gemini CLI >= 0.45 rewrites forced flash model ids (e.g. + # 'databricks-gemini-3-5-flash') to 'gemini-3.5-flash', which Unity + # Catalog rejects. ucode caps the supported version below 0.45 and + # offers a downgrade; skip here if the runner still has a too-new build + # rather than asserting against a version we deliberately don't support. + too_new = gemini.too_new_version() + if too_new is not None: + pytest.skip( + f"Installed Gemini CLI {too_new} is past the supported ceiling " + f"({gemini.MAX_GEMINI_VERSION_TEXT}); run `ucode gemini` to downgrade." + ) gemini_models: list = e2e_state.get("gemini_models") or [] if not gemini_models: pytest.skip("No Gemini models available on this workspace")