From 12436c714cba52b0d43c5827ed62175d217954b0 Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Fri, 12 Jun 2026 19:32:34 +0400 Subject: [PATCH 1/2] =?UTF-8?q?feat(model):=20BYOK=20provider=20registry?= =?UTF-8?q?=20=E2=80=94=20one-command=20setup=20for=20Venice,=20OpenRouter?= =?UTF-8?q?=20&=20co.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Collapses provider knowledge (previously smeared across knownProviders, the setup dispatch switch, a default-model switch, detectCredentials, and buildModelEntries) into a single source-of-truth registry: one ProviderInfo row per provider carries id/env-var/Mode/BaseURL/Default/SignupURL/Free, and every layer reads from it. Adding a provider is now one row, no switch edits. New BYOK getting-started path (distinct from `obol buy inference`, which is x402 crypto-paid sellers): obol model setup venice --api-key $VENICE_API_KEY obol model setup openrouter --api-key $OPENROUTER_API_KEY --free - Mode=openai-compatible providers emit model_list entries openai/ + explicit api_base + key from env var (no wildcard). litellm-secrets is envFrom-mounted and record.go stores entries verbatim, so new providers persist across stack up with zero changes to those layers. - --model omitted → registry Default, else live GET /v1/models (TTY picker / non-TTY error naming real ids) so we never ship guessed, rotating model ids as a hardcoded default. - --free seeds OpenRouter's curated free-tier snapshot (mapped from hermes-agent's free list). - detectCredentials + detectProvider are now registry-driven (BYOK env keys auto-detected; aggregator models labelled by api_base, not 'openai'). Providers seeded: venice, openrouter, nvidia, gmi, novita, huggingface (plus existing anthropic/openai/ollama). All OpenAI-compatible, so they are pure data — no new wiring. --- CLAUDE.md | 7 + cmd/obol/model.go | 175 +++++++++++++++---------- cmd/obol/model_test.go | 46 +++++++ internal/model/model.go | 244 +++++++++++++++++++++++++++++++---- internal/model/model_test.go | 79 ++++++++++++ 5 files changed, 459 insertions(+), 92 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index b52da276..32fe9385 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -262,6 +262,13 @@ Caveats: **Auto-configuration**: `obol stack up` → `autoConfigureLLM()` detects host Ollama models, patches LiteLLM config. `obolup.sh` → `check_agent_model_api_key()` reads `~/.openclaw/openclaw.json`, resolves API key from `ANTHROPIC_API_KEY` / `CLAUDE_CODE_OAUTH_TOKEN` (Anthropic) or `OPENAI_API_KEY` (OpenAI), exports for downstream. +**BYOK cloud providers** (easiest getting-started path) — provider knowledge is a single registry in `internal/model/model.go` (`knownProviders` / `ProviderInfo` with `Mode`/`BaseURL`/`Default`/`Free`); adding a provider is one row, no per-provider switch. `obol model setup --api-key ` wires the agent's LiteLLM brain in one command. Built-in: `anthropic`, `openai`, `ollama` (native/local) + OpenAI-compatible aggregators `venice`, `openrouter`, `nvidia`, `gmi`, `novita`, `huggingface` (`Mode=openai-compatible` → `model_list` entry `openai/` + explicit `api_base` + key from the provider's env var; no wildcard). When `--model` is omitted, setup uses the registry `Default` or lists the live `GET /v1/models` (TTY picker / non-TTY error naming real ids). `obol model setup openrouter --free` seeds only the curated free-tier model snapshot. Distinct from `obol buy inference`, which is x402 crypto-paid sellers, NOT BYOK. Unlisted endpoints still use `obol model setup custom`. + +```bash +obol model setup venice --api-key $VENICE_API_KEY # one command, agent ready +obol model setup openrouter --api-key $OPENROUTER_API_KEY --free +``` + **External OpenAI-compatible LLM** (vLLM / sglang / mlx-lm / remote GPU) — canonical user flow, no ConfigMap surgery: ```bash diff --git a/cmd/obol/model.go b/cmd/obol/model.go index 729aee07..4ef123e6 100644 --- a/cmd/obol/model.go +++ b/cmd/obol/model.go @@ -5,7 +5,6 @@ import ( "errors" "fmt" "math/big" - "os" "sort" "strconv" "strings" @@ -67,16 +66,20 @@ func modelSetupCommand(cfg *config.Config) *cli.Command { Flags: []cli.Flag{ &cli.StringFlag{ Name: "provider", - Usage: "Provider name: anthropic, openai, or ollama", + Usage: "Provider id (anthropic, openai, ollama, venice, openrouter, nvidia, gmi, novita, huggingface). Run with no flags to pick interactively.", }, &cli.StringFlag{ Name: "api-key", - Usage: "API key for the provider", + Usage: "API key for the provider (BYOK; also read from the provider's env var if set)", Sources: cli.EnvVars("LLM_API_KEY"), }, &cli.StringSliceFlag{ Name: "model", - Usage: "Model(s) to configure (e.g. claude-sonnet-4-5-20250929, gpt-4o)", + Usage: "Model(s) to configure (e.g. claude-sonnet-4-6, gpt-5.5, or an aggregator model id)", + }, + &cli.BoolFlag{ + Name: "free", + Usage: "Seed only the provider's curated free-tier models (OpenRouter)", }, }, Commands: []*cli.Command{ @@ -120,15 +123,17 @@ func modelSetupCommand(cfg *config.Config) *cli.Command { } } - // Provider-specific flow - switch provider { - case "ollama": + // Provider-specific flow — dispatch off the registry, not a + // hardcoded switch. Ollama is local; everything else is a + // key-based cloud/BYOK provider handled by one generic path. + prof, ok := model.ProviderByID(provider) + if !ok { + return fmt.Errorf("unknown provider %q — run `obol model setup` (no flags) to pick from the list, or `obol model setup custom --endpoint … --model …` for an unlisted OpenAI-compatible endpoint", provider) + } + if prof.ID == model.ProviderOllama { return setupOllama(cfg, u, models) - case "anthropic", "openai": - return setupCloudProvider(cfg, u, provider, apiKey, models) - default: - return fmt.Errorf("unknown provider %q — use anthropic, openai, or ollama", provider) } + return setupCloudProvider(cfg, u, prof, apiKey, models, cmd.Bool("free")) }, } } @@ -187,13 +192,14 @@ func setupOllama(cfg *config.Config, u *ui.UI, models []string) error { return promoteAndSync(cfg, u, explicit) } -func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, models []string) error { +func setupCloudProvider(cfg *config.Config, u *ui.UI, prof model.ProviderInfo, apiKey string, models []string, free bool) error { if apiKey == "" { - var err error - - info := providerInfo(provider) + if prof.SignupURL != "" { + u.Dim(fmt.Sprintf("Get a %s API key: %s", prof.Name, prof.SignupURL)) + } - apiKey, err = u.SecretInput(fmt.Sprintf("%s API key (%s)", info.Name, info.EnvVar)) + var err error + apiKey, err = u.SecretInput(fmt.Sprintf("%s API key (%s)", prof.Name, prof.EnvVar)) if err != nil { return err } @@ -203,38 +209,34 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m } } - if len(models) == 0 { - // Per-provider defaults — kept in sync with what the providers - // document as their current chat-tuned flagship. Bumping these is a - // small follow-up PR when frontier models drop, and it isolates the - // "what's good today" maintenance to one place. - var defaultModel string - switch provider { - case "anthropic": - defaultModel = "claude-sonnet-4-6" - case "openai": - defaultModel = "gpt-5.5" + // --free: seed the provider's curated free-tier models (unless the + // operator already named explicit --model values). + if free { + if len(prof.Free) == 0 { + return fmt.Errorf("--free is not available for %s (no curated free models); pass --model instead", prof.Name) } + if len(models) == 0 { + models = append([]string(nil), prof.Free...) + u.Infof("Seeding %d curated free %s model(s)", len(models), prof.Name) + } + } - // Interactive: let the user override the default with a free-text - // entry. Non-interactive (no TTY): silently use the default — the - // caller can always pass --model to be explicit. - chosen := defaultModel - if defaultModel != "" && u.IsTTY() && !u.IsJSON() { - input, err := u.Input(fmt.Sprintf("Model for %s", provider), defaultModel) - if err != nil { - return err - } - if strings.TrimSpace(input) != "" { - chosen = strings.TrimSpace(input) - } + // Resolve a model when none was given: the registry Default, else (for + // BYOK aggregators with a rotating catalog) the live /v1/models list. + if len(models) == 0 { + chosen, err := resolveSetupModel(u, prof, apiKey) + if err != nil { + return err } if chosen != "" { models = []string{chosen} } } + if len(models) == 0 { + return fmt.Errorf("no model selected for %s — pass --model ", prof.Name) + } - if err := model.ConfigureLiteLLM(cfg, u, provider, apiKey, models); err != nil { + if err := model.ConfigureLiteLLM(cfg, u, prof.ID, apiKey, models); err != nil { u.Print("") u.Print(" Hint: Configuration stored in: litellm-config ConfigMap (llm namespace)") @@ -247,6 +249,58 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m return promoteAndSync(cfg, u, models) } +// resolveSetupModel picks a model when the operator passed none. A registry +// Default wins (overridable in a TTY). With no static default — BYOK +// aggregators whose catalog rotates — it lists the live /v1/models endpoint: +// a picker in a TTY, otherwise an error naming real ids so the operator can +// re-run with --model. Returns "" only when there is genuinely nothing to +// pick (the caller then errors). +func resolveSetupModel(u *ui.UI, prof model.ProviderInfo, apiKey string) (string, error) { + if prof.Default != "" { + if u.IsTTY() && !u.IsJSON() { + input, err := u.Input(fmt.Sprintf("Model for %s", prof.ID), prof.Default) + if err != nil { + return "", err + } + if strings.TrimSpace(input) != "" { + return strings.TrimSpace(input), nil + } + } + return prof.Default, nil + } + + if !prof.IsBYOK() { + return "", nil + } + + ids, err := model.FetchOpenAICompatibleModels(prof.BaseURL, apiKey) + if err != nil { + u.Dim(fmt.Sprintf("Couldn't list %s models (%v)", prof.Name, err)) + if u.IsTTY() && !u.IsJSON() { + return u.Input(fmt.Sprintf("Model id for %s", prof.Name), "") + } + return "", fmt.Errorf("could not resolve a model for %s: pass --model (keys/models at %s)", prof.Name, prof.SignupURL) + } + + if u.IsTTY() && !u.IsJSON() { + shown := ids + if len(shown) > 30 { + shown = shown[:30] + } + idx, err := u.Select(fmt.Sprintf("Select a %s model:", prof.Name), shown, 0) + if err != nil { + return "", err + } + return shown[idx], nil + } + + sample := ids + if len(sample) > 8 { + sample = sample[:8] + } + return "", fmt.Errorf("pass --model for %s; available include: %s", prof.Name, strings.Join(sample, ", ")) +} + // syncAgentModels re-renders the stack-managed Hermes default agent from the // current LiteLLM model inventory. func syncAgentModels(cfg *config.Config, u *ui.UI) error { @@ -852,17 +906,6 @@ func modelRemoveCommand(cfg *config.Config) *cli.Command { } } -func providerInfo(id string) model.ProviderInfo { - providers, _ := model.GetAvailableProviders(nil) - for _, p := range providers { - if p.ID == id { - return p - } - } - - return model.ProviderInfo{ID: id, Name: id} -} - // detectedCredential describes a credential found in the environment. type detectedCredential struct { key string // the actual API key value (empty for Ollama) @@ -875,22 +918,22 @@ type detectedCredential struct { func detectCredentials() map[string]detectedCredential { creds := make(map[string]detectedCredential) - // Anthropic: check ANTHROPIC_API_KEY, then CLAUDE_CODE_OAUTH_TOKEN - if key := os.Getenv("ANTHROPIC_API_KEY"); key != "" { - creds["anthropic"] = detectedCredential{key: key, source: "ANTHROPIC_API_KEY"} - } else if key := os.Getenv("CLAUDE_CODE_OAUTH_TOKEN"); key != "" { - creds["anthropic"] = detectedCredential{key: key, source: "CLAUDE_CODE_OAUTH_TOKEN"} - } - - // OpenAI: check OPENAI_API_KEY - if key := os.Getenv("OPENAI_API_KEY"); key != "" { - creds["openai"] = detectedCredential{key: key, source: "OPENAI_API_KEY"} - } + // Registry-driven: every provider's primary + alternate env vars are + // checked via model.ResolveAPIKey, so a new provider row auto-detects + // without editing this function. Ollama has no key — probe reachability. + providers, _ := model.GetAvailableProviders(nil) + for _, p := range providers { + if p.ID == model.ProviderOllama { + if ollamaModels, err := model.ListOllamaModels(); err == nil && len(ollamaModels) > 0 { + creds[p.ID] = detectedCredential{ + source: fmt.Sprintf("%d model(s) available", len(ollamaModels)), + } + } + continue + } - // Ollama: check if reachable with models - if ollamaModels, err := model.ListOllamaModels(); err == nil && len(ollamaModels) > 0 { - creds["ollama"] = detectedCredential{ - source: fmt.Sprintf("%d model(s) available", len(ollamaModels)), + if key, envVar := model.ResolveAPIKey(p.ID); key != "" { + creds[p.ID] = detectedCredential{key: key, source: envVar} } } diff --git a/cmd/obol/model_test.go b/cmd/obol/model_test.go index 972ef90d..86f9b7f3 100644 --- a/cmd/obol/model_test.go +++ b/cmd/obol/model_test.go @@ -3,6 +3,9 @@ package main import ( "testing" + "github.com/ObolNetwork/obol-stack/internal/model" + "github.com/urfave/cli/v3" + "github.com/ObolNetwork/obol-stack/internal/config" ) @@ -69,3 +72,46 @@ func TestSetupPromoteList(t *testing.T) { } }) } + +// TestModelSetup_BYOKFlags pins the BYOK onboarding surface: the setup +// command exposes --provider/--api-key/--model/--free, and every BYOK +// aggregator in the registry dispatches through the generic cloud path +// (only Ollama is special-cased). +func TestModelSetup_BYOKFlags(t *testing.T) { + cfg := &config.Config{} + var setup *cli.Command + for _, sub := range modelCommand(cfg).Commands { + if sub.Name == "setup" { + setup = sub + } + } + if setup == nil { + t.Fatal("model setup command missing") + } + + want := map[string]bool{"provider": false, "api-key": false, "model": false, "free": false} + for _, f := range setup.Flags { + for _, n := range f.Names() { + if _, ok := want[n]; ok { + want[n] = true + } + } + } + for n, found := range want { + if !found { + t.Errorf("model setup missing --%s flag", n) + } + } + + // The registry must carry the BYOK providers this PR adds. + for _, id := range []string{"venice", "openrouter", "nvidia", "gmi", "novita", "huggingface"} { + p, ok := model.ProviderByID(id) + if !ok { + t.Errorf("provider %q missing from registry", id) + continue + } + if p.BaseURL == "" { + t.Errorf("provider %q has no BaseURL", id) + } + } +} diff --git a/internal/model/model.go b/internal/model/model.go index 4614c4c9..e463a74d 100644 --- a/internal/model/model.go +++ b/internal/model/model.go @@ -37,19 +37,171 @@ const ( ProviderOpenAI = "openai" ) -// Known provider definitions — no need to query the running pod. -var knownProviders = []ProviderInfo{ - {ID: ProviderAnthropic, Name: "Anthropic", EnvVar: "ANTHROPIC_API_KEY", AltEnvVars: []string{"CLAUDE_CODE_OAUTH_TOKEN"}}, - {ID: ProviderOpenAI, Name: "OpenAI", EnvVar: "OPENAI_API_KEY"}, - {ID: ProviderOllama, Name: "Ollama (local)", EnvVar: ""}, -} +// apiMode selects how a provider's LiteLLM model_list entries are shaped. +type apiMode string + +const ( + // modeAnthropic: native LiteLLM anthropic routing + prompt-cache markers + // + an anthropic/* wildcard. Key read from EnvVar. + modeAnthropic apiMode = "anthropic" + // modeOpenAI: native LiteLLM openai/ routing + an openai/* wildcard. + modeOpenAI apiMode = "openai" + // modeOllama: local ollama_chat/ entries pointed at the in-cluster Ollama. + modeOllama apiMode = "ollama" + // modeOpenAICompatible: any OpenAI-compatible BYOK aggregator (OpenRouter, + // Venice, NVIDIA, …). Explicit entries only, Model="openai/" with an + // explicit api_base = BaseURL and key read from EnvVar. No wildcard: + // aggregator namespaces are huge and overlapping, so we register only the + // models the operator asked for. + modeOpenAICompatible apiMode = "openai-compatible" +) -// ProviderInfo describes an LLM provider. +// ProviderInfo describes an LLM provider. knownProviders is the single +// source of truth: adding a provider is one row here, and every layer (the +// setup CLI, default-model selection, LiteLLM entry shaping, status, and +// the persisted record) reads from this struct instead of a per-provider +// switch. type ProviderInfo struct { - ID string // provider id (e.g. "anthropic", "openai", "ollama") + ID string // provider id (e.g. "anthropic", "openai", "venice") Name string // display name EnvVar string // primary env var for API key (empty for Ollama) AltEnvVars []string // fallback env vars checked in order (e.g. CLAUDE_CODE_OAUTH_TOKEN) + Mode apiMode // how model_list entries are shaped + BaseURL string // OpenAI-compatible base_url (modeOpenAICompatible only) + Default string // default chat model when --model is omitted ("" = ask/require) + SignupURL string // where to obtain an API key (shown as a hint) + Free []string // curated zero-marginal-cost model ids (seeded by --free) +} + +// IsBYOK reports whether the provider is a BYOK OpenAI-compatible +// aggregator reached over the public internet (as opposed to a native +// provider or the local Ollama). +func (p ProviderInfo) IsBYOK() bool { return p.Mode == modeOpenAICompatible } + +// knownProviders is the registry of supported LLM providers. The first +// three are native/local; the rest are BYOK OpenAI-compatible aggregators — +// each is pure data, no bespoke wiring. base_url values are intentionally +// without a trailing /v1 where LiteLLM appends it; aggregator paths that +// already include /v1 keep it (LiteLLM only auto-appends for bare hosts). +var knownProviders = []ProviderInfo{ + { + ID: ProviderAnthropic, Name: "Anthropic", EnvVar: "ANTHROPIC_API_KEY", + AltEnvVars: []string{"CLAUDE_CODE_OAUTH_TOKEN"}, Mode: modeAnthropic, + Default: "claude-sonnet-4-6", SignupURL: "https://console.anthropic.com/settings/keys", + }, + { + ID: ProviderOpenAI, Name: "OpenAI", EnvVar: "OPENAI_API_KEY", Mode: modeOpenAI, + Default: "gpt-5.5", SignupURL: "https://platform.openai.com/api-keys", + }, + { + ID: ProviderOllama, Name: "Ollama (local)", EnvVar: "", Mode: modeOllama, + }, + // ── BYOK OpenAI-compatible aggregators (the easy getting-started path) ── + // model_list entries are pure data: Model="openai/", api_base=BaseURL, + // key from EnvVar. Default models that can't be statically pinned (the + // aggregator's catalog rotates) are left blank — setup then resolves a + // model from the live /v1/models list or --model. + { + ID: "venice", Name: "Venice", EnvVar: "VENICE_API_KEY", Mode: modeOpenAICompatible, + BaseURL: "https://api.venice.ai/api/v1", SignupURL: "https://venice.ai/settings/api", + }, + { + ID: "openrouter", Name: "OpenRouter", EnvVar: "OPENROUTER_API_KEY", Mode: modeOpenAICompatible, + BaseURL: "https://openrouter.ai/api/v1", Default: "openrouter/auto", + SignupURL: "https://openrouter.ai/keys", + // Curated zero-cost models (snapshot — OpenRouter's free roster + // rotates; pass --model for any other). Seeded by `--free`. + Free: []string{ + "openrouter/elephant-alpha", + "openrouter/owl-alpha", + "poolside/laguna-m.1:free", + "tencent/hy3-preview:free", + "nvidia/nemotron-3-super-120b-a12b:free", + "nvidia/nemotron-3-ultra-550b-a55b:free", + "inclusionai/ring-2.6-1t:free", + }, + }, + { + ID: "nvidia", Name: "NVIDIA NIM", EnvVar: "NVIDIA_API_KEY", Mode: modeOpenAICompatible, + BaseURL: "https://integrate.api.nvidia.com/v1", SignupURL: "https://build.nvidia.com", + }, + { + ID: "gmi", Name: "GMI Cloud", EnvVar: "GMI_API_KEY", Mode: modeOpenAICompatible, + BaseURL: "https://api.gmi-serving.com/v1", SignupURL: "https://console.gmicloud.ai", + }, + { + ID: "novita", Name: "Novita", EnvVar: "NOVITA_API_KEY", Mode: modeOpenAICompatible, + BaseURL: "https://api.novita.ai/openai/v1", SignupURL: "https://novita.ai/settings/key-management", + }, + { + ID: "huggingface", Name: "Hugging Face Router", EnvVar: "HF_TOKEN", Mode: modeOpenAICompatible, + BaseURL: "https://router.huggingface.co/v1", SignupURL: "https://huggingface.co/settings/tokens", + }, +} + +// ProviderByID returns the registry entry for id and whether it was found. +func ProviderByID(id string) (ProviderInfo, bool) { + for _, p := range knownProviders { + if p.ID == id { + return p, true + } + } + return ProviderInfo{}, false +} + +// FetchOpenAICompatibleModels lists model ids from a provider's +// OpenAI-compatible GET /models endpoint. Used at setup time to +// resolve a real model id when an aggregator has no statically-pinnable +// default (its catalog rotates). Best-effort: a non-200, a network error, +// or an unparseable body returns an error the caller falls back from +// (prompt for / require --model). The just-entered apiKey authenticates +// the call from the host. +func FetchOpenAICompatibleModels(baseURL, apiKey string) ([]string, error) { + endpoint := strings.TrimRight(baseURL, "/") + "/models" + ctx, cancel := context.WithTimeout(context.Background(), 12*time.Second) + defer cancel() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + if err != nil { + return nil, err + } + if apiKey != "" { + req.Header.Set("Authorization", "Bearer "+apiKey) + } + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20)) + if err != nil { + return nil, err + } + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("models endpoint returned %d", resp.StatusCode) + } + + var parsed struct { + Data []struct { + ID string `json:"id"` + } `json:"data"` + } + if err := json.Unmarshal(body, &parsed); err != nil { + return nil, fmt.Errorf("parse models response: %w", err) + } + + ids := make([]string, 0, len(parsed.Data)) + for _, m := range parsed.Data { + if m.ID != "" { + ids = append(ids, m.ID) + } + } + if len(ids) == 0 { + return nil, errors.New("models endpoint returned no models") + } + return ids, nil } // ProviderStatus captures effective global LiteLLM provider state. @@ -1308,16 +1460,42 @@ var WellKnownModels = map[string][]string{ }, } -// buildModelEntries creates LiteLLM model_list entries for a provider. -// Cloud providers (anthropic, openai) get a wildcard entry plus explicit -// entries for the requested models. Ollama gets explicit entries only -// (wildcards are broken for ollama_chat/). +// buildModelEntries creates LiteLLM model_list entries for a provider, +// shaped by its registry Mode: +// - anthropic/openai: explicit entries (so the chosen model wins Rank's +// "first chat-capable" rule) followed by a /* wildcard. +// - ollama: explicit ollama_chat/ entries only (wildcards are broken). +// - openai-compatible: explicit openai/ entries with an explicit +// api_base = BaseURL and key from EnvVar — no wildcard. +// +// A provider not in the registry falls back to the generic openai/ +// shape keyed on _API_KEY (legacy `setup custom` behavior). func buildModelEntries(provider string, models []string) []ModelEntry { - var entries []ModelEntry + p, ok := ProviderByID(provider) + if !ok { + // Unknown provider: legacy generic shape (no api_base). + var entries []ModelEntry + for _, m := range models { + entries = append(entries, ModelEntry{ + ModelName: m, + LiteLLMParams: LiteLLMParams{ + Model: provider + "/" + m, + APIKey: fmt.Sprintf("os.environ/%s_API_KEY", strings.ToUpper(provider)), + }, + }) + } + return entries + } + + keyRef := "" + if p.EnvVar != "" { + keyRef = "os.environ/" + p.EnvVar + } - switch provider { - case ProviderOllama: - // Explicit entries — ollama_chat/* wildcards are broken in LiteLLM + var entries []ModelEntry + switch p.Mode { + case modeOllama: + // Explicit entries — ollama_chat/* wildcards are broken in LiteLLM. for _, m := range models { entries = append(entries, ModelEntry{ ModelName: m, @@ -1327,7 +1505,7 @@ func buildModelEntries(provider string, models []string) []ModelEntry { }, }) } - case ProviderAnthropic: + case modeAnthropic: cachePoints := anthropicCacheControlPoints() // Explicit entries first so the user-selected model is the primary // under model.Rank's "first chat-capable wins" rule. Hermes cannot @@ -1338,39 +1516,41 @@ func buildModelEntries(provider string, models []string) []ModelEntry { ModelName: m, LiteLLMParams: LiteLLMParams{ Model: m, - APIKey: "os.environ/ANTHROPIC_API_KEY", + APIKey: keyRef, CacheControlInjectionPoints: cachePoints, }, }) } - // Wildcard: routes any anthropic model without explicit registration. entries = append(entries, ModelEntry{ ModelName: "anthropic/*", LiteLLMParams: LiteLLMParams{ Model: "anthropic/*", - APIKey: "os.environ/ANTHROPIC_API_KEY", + APIKey: keyRef, CacheControlInjectionPoints: cachePoints, }, }) - case ProviderOpenAI: + case modeOpenAI: // Explicit-before-wildcard, same rationale as Anthropic above. for _, m := range models { entries = append(entries, ModelEntry{ ModelName: m, - LiteLLMParams: LiteLLMParams{Model: "openai/" + m, APIKey: "os.environ/OPENAI_API_KEY"}, + LiteLLMParams: LiteLLMParams{Model: "openai/" + m, APIKey: keyRef}, }) } entries = append(entries, ModelEntry{ ModelName: "openai/*", - LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: "os.environ/OPENAI_API_KEY"}, + LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: keyRef}, }) - default: + case modeOpenAICompatible: + // Explicit openai-shaped entries with an explicit api_base. No + // wildcard — the aggregator's catalog is huge and overlaps others. for _, m := range models { entries = append(entries, ModelEntry{ ModelName: m, LiteLLMParams: LiteLLMParams{ - Model: provider + "/" + m, - APIKey: fmt.Sprintf("os.environ/%s_API_KEY", strings.ToUpper(provider)), + Model: "openai/" + m, + APIBase: p.BaseURL, + APIKey: keyRef, }, }) } @@ -1464,6 +1644,18 @@ func detectProvider(entry ModelEntry) string { } model := entry.LiteLLMParams.Model + // BYOK aggregator entries are openai-shaped (openai/) but carry an + // explicit api_base — match it back to the registry so status groups + // them under their real provider (venice, openrouter, …) rather than + // "openai". Checked before the bare openai/ prefix below. + if base := entry.LiteLLMParams.APIBase; base != "" && strings.HasPrefix(model, ProviderOpenAI+"/") { + for _, p := range knownProviders { + if p.Mode == modeOpenAICompatible && p.BaseURL == base { + return p.ID + } + } + } + // Wildcard entries if strings.HasPrefix(model, ProviderAnthropic+"/") { return ProviderAnthropic diff --git a/internal/model/model_test.go b/internal/model/model_test.go index 210ef676..59ec99fa 100644 --- a/internal/model/model_test.go +++ b/internal/model/model_test.go @@ -1166,3 +1166,82 @@ func modelNames(entries []ModelEntry) []string { } return out } + +func TestBuildModelEntries_OpenAICompatible(t *testing.T) { + entries := buildModelEntries("venice", []string{"venice-uncensored"}) + if len(entries) != 1 { + t.Fatalf("got %d entries, want 1 (aggregators get no wildcard)", len(entries)) + } + e := entries[0] + if e.ModelName != "venice-uncensored" { + t.Errorf("model_name = %q", e.ModelName) + } + if e.LiteLLMParams.Model != "openai/venice-uncensored" { + t.Errorf("model = %q, want openai/venice-uncensored", e.LiteLLMParams.Model) + } + if e.LiteLLMParams.APIBase != "https://api.venice.ai/api/v1" { + t.Errorf("api_base = %q, want venice base_url", e.LiteLLMParams.APIBase) + } + if e.LiteLLMParams.APIKey != "os.environ/VENICE_API_KEY" { + t.Errorf("api_key = %q, want os.environ/VENICE_API_KEY", e.LiteLLMParams.APIKey) + } +} + +func TestBuildModelEntries_UnknownProviderLegacyShape(t *testing.T) { + // Providers not in the registry keep the legacy generic shape (no api_base). + entries := buildModelEntries("somevendor", []string{"m1"}) + if len(entries) != 1 || entries[0].LiteLLMParams.Model != "somevendor/m1" { + t.Fatalf("unexpected legacy entries: %+v", entries) + } + if entries[0].LiteLLMParams.APIBase != "" { + t.Errorf("legacy shape must not set api_base, got %q", entries[0].LiteLLMParams.APIBase) + } +} + +func TestProviderByID(t *testing.T) { + p, ok := ProviderByID("openrouter") + if !ok { + t.Fatal("openrouter must be in the registry") + } + if p.BaseURL == "" || p.EnvVar != "OPENROUTER_API_KEY" || len(p.Free) == 0 { + t.Errorf("openrouter row incomplete: %+v", p) + } + if _, ok := ProviderByID("nope"); ok { + t.Error("unknown provider must not be found") + } +} + +func TestDetectProvider_AggregatorByAPIBase(t *testing.T) { + venice := ModelEntry{ModelName: "x", LiteLLMParams: LiteLLMParams{ + Model: "openai/x", APIBase: "https://api.venice.ai/api/v1", + }} + if got := detectProvider(venice); got != "venice" { + t.Errorf("venice entry detected as %q, want venice", got) + } + // A native OpenAI entry (no api_base) must still read as openai. + oai := ModelEntry{ModelName: "gpt-5.5", LiteLLMParams: LiteLLMParams{Model: "openai/gpt-5.5"}} + if got := detectProvider(oai); got != ProviderOpenAI { + t.Errorf("openai entry detected as %q, want openai", got) + } +} + +func TestProviderRegistry_Invariants(t *testing.T) { + seen := map[string]bool{} + for _, p := range knownProviders { + if seen[p.ID] { + t.Errorf("duplicate provider id %q", p.ID) + } + seen[p.ID] = true + if p.Mode == modeOpenAICompatible && (p.BaseURL == "" || p.EnvVar == "") { + t.Errorf("BYOK provider %q must set BaseURL and EnvVar", p.ID) + } + if len(p.Free) > 0 && p.Mode != modeOpenAICompatible { + t.Errorf("provider %q has Free models but is not openai-compatible", p.ID) + } + } + for _, id := range []string{ProviderAnthropic, ProviderOpenAI, ProviderOllama} { + if _, ok := ProviderByID(id); !ok { + t.Errorf("native provider %q missing from registry", id) + } + } +} From 60568bc5e998b7eabbf512efeb1dc1f15e8b0b9f Mon Sep 17 00:00:00 2001 From: bussyjd <145845+bussyjd@users.noreply.github.com> Date: Fri, 12 Jun 2026 19:42:46 +0400 Subject: [PATCH 2/2] =?UTF-8?q?feat(buy):=20obol=20buy=20inference=20=20=E2=80=94=20BYOK=20front=20door=20with=20open-URL=20onb?= =?UTF-8?q?oarding?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds the requested getting-started verb: `obol buy inference venice`, `obol buy inference openrouter --free`, etc. Dispatch keys on whether the positional arg matches a registry provider id — provider → BYOK onboarding, URL/none → the existing x402 crypto-paid seller flow (unchanged). Onboarding mirrors hermes-agent's api-key UX: resolve the key (--api-key → provider env var → prompt), open the provider's SignupURL in the browser (reuses openBrowser; skipped when a key is already in hand or non-TTY), then delegate to the shared setupCloudProvider engine (model resolution, --free seeding, LiteLLM patch, agent sync). No wallet, no x402 for this path. Also adds --api-key/--free to buy inference and corrects the CLAUDE.md framing (buy inference is now BOTH the BYOK door and the x402 door). --- CLAUDE.md | 11 +++-- cmd/obol/buy.go | 96 +++++++++++++++++++++++++++++++++++++------- cmd/obol/buy_test.go | 35 ++++++++++++++++ 3 files changed, 124 insertions(+), 18 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 32fe9385..1b279e77 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -262,11 +262,16 @@ Caveats: **Auto-configuration**: `obol stack up` → `autoConfigureLLM()` detects host Ollama models, patches LiteLLM config. `obolup.sh` → `check_agent_model_api_key()` reads `~/.openclaw/openclaw.json`, resolves API key from `ANTHROPIC_API_KEY` / `CLAUDE_CODE_OAUTH_TOKEN` (Anthropic) or `OPENAI_API_KEY` (OpenAI), exports for downstream. -**BYOK cloud providers** (easiest getting-started path) — provider knowledge is a single registry in `internal/model/model.go` (`knownProviders` / `ProviderInfo` with `Mode`/`BaseURL`/`Default`/`Free`); adding a provider is one row, no per-provider switch. `obol model setup --api-key ` wires the agent's LiteLLM brain in one command. Built-in: `anthropic`, `openai`, `ollama` (native/local) + OpenAI-compatible aggregators `venice`, `openrouter`, `nvidia`, `gmi`, `novita`, `huggingface` (`Mode=openai-compatible` → `model_list` entry `openai/` + explicit `api_base` + key from the provider's env var; no wildcard). When `--model` is omitted, setup uses the registry `Default` or lists the live `GET /v1/models` (TTY picker / non-TTY error naming real ids). `obol model setup openrouter --free` seeds only the curated free-tier model snapshot. Distinct from `obol buy inference`, which is x402 crypto-paid sellers, NOT BYOK. Unlisted endpoints still use `obol model setup custom`. +**BYOK cloud providers** (easiest getting-started path) — provider knowledge is a single registry in `internal/model/model.go` (`knownProviders` / `ProviderInfo` with `Mode`/`BaseURL`/`Default`/`SignupURL`/`Free`); adding a provider is one row, no per-provider switch. Built-in: `anthropic`, `openai`, `ollama` (native/local) + OpenAI-compatible aggregators `venice`, `openrouter`, `nvidia`, `gmi`, `novita`, `huggingface` (`Mode=openai-compatible` → `model_list` entry `openai/` + explicit `api_base` + key from the provider's env var; no wildcard). When `--model` is omitted, setup uses the registry `Default` or lists the live `GET /v1/models` (TTY picker / non-TTY error naming real ids). `--free` seeds only the curated free-tier model snapshot (OpenRouter). + +Two front doors share one engine (`setupCloudProvider` in `cmd/obol/model.go`): +- `obol buy inference ` — friendly onboarding: opens the provider's `SignupURL` in the browser (`openBrowser`, hermes-style), takes the key (`--api-key` → env var → prompt), wires LiteLLM + syncs agents. `obol buy inference` with a URL/no-arg is still the **x402 crypto-paid seller** path — dispatch keys on whether the positional arg matches a registry provider id. +- `obol model setup --api-key ` — the scriptable, no-browser equivalent. Unlisted endpoints still use `obol model setup custom`. ```bash -obol model setup venice --api-key $VENICE_API_KEY # one command, agent ready -obol model setup openrouter --api-key $OPENROUTER_API_KEY --free +obol buy inference venice # opens venice key page, prompts, wires up +obol buy inference openrouter --free # seeds curated free models +obol model setup venice --api-key $VENICE_API_KEY # scriptable / CI ``` **External OpenAI-compatible LLM** (vLLM / sglang / mlx-lm / remote GPU) — canonical user flow, no ConfigMap surgery: diff --git a/cmd/obol/buy.go b/cmd/obol/buy.go index fb19d923..5356c4b9 100644 --- a/cmd/obol/buy.go +++ b/cmd/obol/buy.go @@ -66,28 +66,45 @@ func buyCommand(cfg *config.Config) *cli.Command { func buyInferenceCommand(cfg *config.Config) *cli.Command { return &cli.Command{ Name: "inference", - Usage: "Buy paid inference from an x402-gated seller via the obol-agent", - ArgsUsage: "[]", - Description: `Pre-authorizes an x402-gated inference seller through an obol-agent's wallet. + Usage: "Buy inference for your agents — a hosted BYOK provider (Venice, OpenRouter, …) or an x402-gated seller", + ArgsUsage: "[|]", + Description: `Two ways to give your agents inference: -Hand the command a seller URL — either a storefront base -("https://inference.v1337.org") or a specific offer -("https://inference.v1337.org/services/aeon") — and the CLI will walk -/api/services.json, pick the inference offer, and pre-sign authorizations -via the agent's remote signer. + 1. Hosted provider (BYOK) — hand the command a provider id and it opens + that provider's API-key page in your browser, takes the key, and wires + your agents' LiteLLM gateway to it: -With no URL, the public ` + x402verifier.DefaultBuySellerURL + ` storefront is used. + obol buy inference venice + obol buy inference openrouter --free -In a TTY, the CLI prompts for auto-refill, request count, and -confirmation. Pass --yes / -y for non-interactive runs (CI, scripts) — ---count is required in that mode. + Built-in providers: venice, openrouter, nvidia, gmi, novita, + huggingface (plus anthropic, openai). The key is read from the + provider's env var when already set, so this stays non-interactive in CI. + + 2. x402-gated seller — hand it a seller URL (a storefront base like + "https://inference.v1337.org" or a specific offer ".../services/aeon") + and the CLI walks /api/services.json, picks the inference offer, and + pre-signs payment authorizations via the agent's remote signer. With no + argument, the public ` + x402verifier.DefaultBuySellerURL + ` storefront is used. + +In a TTY the seller flow prompts for auto-refill, request count, and +confirmation. Pass --yes / -y for non-interactive runs (--count required). Examples: - obol buy inference + obol buy inference venice + obol buy inference openrouter --free obol buy inference https://inference.v1337.org/services/aeon - obol buy inference https://seller.example/services/foo --yes --count 100 - obol buy inference https://seller.example/services/foo --auto-refill --refill-threshold 5 --refill-count 25`, + obol buy inference https://seller.example/services/foo --yes --count 100`, Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "api-key", + Usage: "API key for a hosted provider (BYOK). Also read from the provider's env var when set.", + Sources: cli.EnvVars("LLM_API_KEY"), + }, + &cli.BoolFlag{ + Name: "free", + Usage: "For a hosted provider that has them, seed only the curated free-tier models (OpenRouter)", + }, &cli.StringFlag{ Name: "seller", Usage: "Seller URL (alternative to positional). When neither is set the default storefront is used.", @@ -160,6 +177,43 @@ Examples: } } +// runBuyInferenceProvider is the BYOK front door: open the provider's +// API-key page (hermes-style openurl), take the key (--api-key → env → +// prompt), then wire the LiteLLM gateway via the shared model-setup +// engine. No wallet, no x402 — this is hosted inference with the user's +// own key, the easiest way to get an agent talking to a model. +func runBuyInferenceProvider(cfg *config.Config, cmd *cli.Command, prof model.ProviderInfo) error { + u := getUI(cmd) + u.Infof("Connecting %s for your agents (bring-your-own-key)", prof.Name) + + apiKey := strings.TrimSpace(cmd.String("api-key")) + if apiKey == "" { + if key, envVar := model.ResolveAPIKey(prof.ID); key != "" { + apiKey = key + u.Infof("Using %s API key from %s", prof.Name, envVar) + } + } + + // openurl: send the operator to the provider's key page before we + // prompt for the key (skipped when a key is already in hand or non-TTY). + if apiKey == "" && prof.SignupURL != "" && u.IsTTY() && !u.IsJSON() { + u.Infof("Opening %s to create an API key …", prof.SignupURL) + if err := openBrowser(prof.SignupURL); err != nil { + u.Dim(fmt.Sprintf("(couldn't open a browser — visit %s)", prof.SignupURL)) + } + } + + var models []string + if m := strings.TrimSpace(cmd.String("model")); m != "" { + models = []string{m} + } + + // Shared engine: prompts for the key if still empty, seeds --free, + // resolves a model (registry default or live /v1/models), patches + // LiteLLM, and promotes + syncs the agents to use it. + return setupCloudProvider(cfg, u, prof, apiKey, models, cmd.Bool("free")) +} + // runBuyInference is the orchestrator for the new flow. Kept separate from // the cli.Command literal so the steps stay scannable: resolve agent → // resolve seller URL → pick catalog entry → resolve token+count+budget → @@ -167,6 +221,18 @@ Examples: func runBuyInference(ctx context.Context, cfg *config.Config, cmd *cli.Command) error { u := getUI(cmd) + // Front door: if the argument names a hosted provider in the registry + // (venice, openrouter, …) rather than a seller URL, run BYOK onboarding + // — open the provider's key page and wire the LiteLLM gateway. Ollama is + // local and free, so it's not a "buy" target. + arg := strings.TrimSpace(cmd.String("seller")) + if arg == "" { + arg = strings.TrimSpace(cmd.Args().First()) + } + if prof, ok := model.ProviderByID(arg); ok && prof.ID != model.ProviderOllama { + return runBuyInferenceProvider(cfg, cmd, prof) + } + u.Info("Purchasing remote inference for running Obol Agents") target, err := resolveBuyAgent(cfg, cmd) diff --git a/cmd/obol/buy_test.go b/cmd/obol/buy_test.go index 5b8bce16..a59f117e 100644 --- a/cmd/obol/buy_test.go +++ b/cmd/obol/buy_test.go @@ -8,6 +8,8 @@ import ( "github.com/ObolNetwork/obol-stack/internal/agentruntime" "github.com/ObolNetwork/obol-stack/internal/buy" + "github.com/ObolNetwork/obol-stack/internal/config" + "github.com/ObolNetwork/obol-stack/internal/model" "github.com/ObolNetwork/obol-stack/internal/schemas" ) @@ -600,3 +602,36 @@ func TestLooksLikeURL(t *testing.T) { } } } + +// TestBuyInference_BYOKFrontDoor pins the BYOK onboarding surface on +// `obol buy inference`: the command exposes --api-key/--free/--model, and +// every registry provider that isn't local Ollama is recognized as a +// hosted-provider argument (the dispatch the Action keys on). +func TestBuyInference_BYOKFrontDoor(t *testing.T) { + cmd := buyInferenceCommand(&config.Config{}) + + want := map[string]bool{"api-key": false, "free": false, "model": false, "seller": false} + for _, f := range cmd.Flags { + for _, n := range f.Names() { + if _, ok := want[n]; ok { + want[n] = true + } + } + } + for n, found := range want { + if !found { + t.Errorf("buy inference missing --%s flag", n) + } + } + + // Hosted providers route to BYOK onboarding; ollama does not (local). + for _, id := range []string{"venice", "openrouter", "nvidia", "gmi", "novita", "huggingface"} { + p, ok := model.ProviderByID(id) + if !ok || p.ID == model.ProviderOllama { + t.Errorf("provider %q should be a BYOK buy-inference target", id) + } + } + if p, ok := model.ProviderByID("ollama"); !ok || p.ID != model.ProviderOllama { + t.Errorf("ollama must remain a local (non-buy) provider") + } +}