From 12436c714cba52b0d43c5827ed62175d217954b0 Mon Sep 17 00:00:00 2001
From: bussyjd <145845+bussyjd@users.noreply.github.com>
Date: Fri, 12 Jun 2026 19:32:34 +0400
Subject: [PATCH 1/2] =?UTF-8?q?feat(model):=20BYOK=20provider=20registry?=
 =?UTF-8?q?=20=E2=80=94=20one-command=20setup=20for=20Venice,=20OpenRouter?=
 =?UTF-8?q?=20&=20co.?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Collapses provider knowledge (previously smeared across knownProviders, the
setup dispatch switch, a default-model switch, detectCredentials, and
buildModelEntries) into a single source-of-truth registry: one ProviderInfo
row per provider carries id/env-var/Mode/BaseURL/Default/SignupURL/Free, and
every layer reads from it. Adding a provider is now one row, no switch edits.

New BYOK getting-started path (distinct from `obol buy inference`, which is
x402 crypto-paid sellers):

    obol model setup venice     --api-key $VENICE_API_KEY
    obol model setup openrouter --api-key $OPENROUTER_API_KEY --free

- Mode=openai-compatible providers emit model_list entries openai/<id> +
  explicit api_base + key from env var (no wildcard). litellm-secrets is
  envFrom-mounted and record.go stores entries verbatim, so new providers
  persist across stack up with zero changes to those layers.
- --model omitted → registry Default, else live GET <base>/v1/models
  (TTY picker / non-TTY error naming real ids) so we never ship guessed,
  rotating model ids as a hardcoded default.
- --free seeds OpenRouter's curated free-tier snapshot (mapped from
  hermes-agent's free list).
- detectCredentials + detectProvider are now registry-driven (BYOK env keys
  auto-detected; aggregator models labelled by api_base, not 'openai').

Providers seeded: venice, openrouter, nvidia, gmi, novita, huggingface
(plus existing anthropic/openai/ollama). All OpenAI-compatible, so they are
pure data — no new wiring.
---
 CLAUDE.md                    |   7 +
 cmd/obol/model.go            | 175 +++++++++++++++----------
 cmd/obol/model_test.go       |  46 +++++++
 internal/model/model.go      | 244 +++++++++++++++++++++++++++++++----
 internal/model/model_test.go |  79 ++++++++++++
 5 files changed, 459 insertions(+), 92 deletions(-)
diff --git a/CLAUDE.md b/CLAUDE.md
index b52da276..32fe9385 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -262,6 +262,13 @@ Caveats:
 
 **Auto-configuration**: `obol stack up` → `autoConfigureLLM()` detects host Ollama models, patches LiteLLM config. `obolup.sh` → `check_agent_model_api_key()` reads `~/.openclaw/openclaw.json`, resolves API key from `ANTHROPIC_API_KEY` / `CLAUDE_CODE_OAUTH_TOKEN` (Anthropic) or `OPENAI_API_KEY` (OpenAI), exports for downstream.
 
+**BYOK cloud providers** (easiest getting-started path) — provider knowledge is a single registry in `internal/model/model.go` (`knownProviders` / `ProviderInfo` with `Mode`/`BaseURL`/`Default`/`Free`); adding a provider is one row, no per-provider switch. `obol model setup <provider> --api-key <key>` wires the agent's LiteLLM brain in one command. Built-in: `anthropic`, `openai`, `ollama` (native/local) + OpenAI-compatible aggregators `venice`, `openrouter`, `nvidia`, `gmi`, `novita`, `huggingface` (`Mode=openai-compatible` → `model_list` entry `openai/<id>` + explicit `api_base` + key from the provider's env var; no wildcard). When `--model` is omitted, setup uses the registry `Default` or lists the live `GET <base>/v1/models` (TTY picker / non-TTY error naming real ids). `obol model setup openrouter --free` seeds only the curated free-tier model snapshot. Distinct from `obol buy inference`, which is x402 crypto-paid sellers, NOT BYOK. Unlisted endpoints still use `obol model setup custom`.
+
+```bash
+obol model setup venice     --api-key $VENICE_API_KEY        # one command, agent ready
+obol model setup openrouter --api-key $OPENROUTER_API_KEY --free
+```
+
 **External OpenAI-compatible LLM** (vLLM / sglang / mlx-lm / remote GPU) — canonical user flow, no ConfigMap surgery:
 
 ```bash
diff --git a/cmd/obol/model.go b/cmd/obol/model.go
index 729aee07..4ef123e6 100644
--- a/cmd/obol/model.go
+++ b/cmd/obol/model.go
@@ -5,7 +5,6 @@ import (
 	"errors"
 	"fmt"
 	"math/big"
-	"os"
 	"sort"
 	"strconv"
 	"strings"
@@ -67,16 +66,20 @@ func modelSetupCommand(cfg *config.Config) *cli.Command {
 		Flags: []cli.Flag{
 			&cli.StringFlag{
 				Name:  "provider",
-				Usage: "Provider name: anthropic, openai, or ollama",
+				Usage: "Provider id (anthropic, openai, ollama, venice, openrouter, nvidia, gmi, novita, huggingface). Run with no flags to pick interactively.",
 			},
 			&cli.StringFlag{
 				Name:    "api-key",
-				Usage:   "API key for the provider",
+				Usage:   "API key for the provider (BYOK; also read from the provider's env var if set)",
 				Sources: cli.EnvVars("LLM_API_KEY"),
 			},
 			&cli.StringSliceFlag{
 				Name:  "model",
-				Usage: "Model(s) to configure (e.g. claude-sonnet-4-5-20250929, gpt-4o)",
+				Usage: "Model(s) to configure (e.g. claude-sonnet-4-6, gpt-5.5, or an aggregator model id)",
+			},
+			&cli.BoolFlag{
+				Name:  "free",
+				Usage: "Seed only the provider's curated free-tier models (OpenRouter)",
 			},
 		},
 		Commands: []*cli.Command{
@@ -120,15 +123,17 @@ func modelSetupCommand(cfg *config.Config) *cli.Command {
 				}
 			}
 
-			// Provider-specific flow
-			switch provider {
-			case "ollama":
+			// Provider-specific flow — dispatch off the registry, not a
+			// hardcoded switch. Ollama is local; everything else is a
+			// key-based cloud/BYOK provider handled by one generic path.
+			prof, ok := model.ProviderByID(provider)
+			if !ok {
+				return fmt.Errorf("unknown provider %q — run `obol model setup` (no flags) to pick from the list, or `obol model setup custom --endpoint … --model …` for an unlisted OpenAI-compatible endpoint", provider)
+			}
+			if prof.ID == model.ProviderOllama {
 				return setupOllama(cfg, u, models)
-			case "anthropic", "openai":
-				return setupCloudProvider(cfg, u, provider, apiKey, models)
-			default:
-				return fmt.Errorf("unknown provider %q — use anthropic, openai, or ollama", provider)
 			}
+			return setupCloudProvider(cfg, u, prof, apiKey, models, cmd.Bool("free"))
 		},
 	}
 }
@@ -187,13 +192,14 @@ func setupOllama(cfg *config.Config, u *ui.UI, models []string) error {
 	return promoteAndSync(cfg, u, explicit)
 }
 
-func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, models []string) error {
+func setupCloudProvider(cfg *config.Config, u *ui.UI, prof model.ProviderInfo, apiKey string, models []string, free bool) error {
 	if apiKey == "" {
-		var err error
-
-		info := providerInfo(provider)
+		if prof.SignupURL != "" {
+			u.Dim(fmt.Sprintf("Get a %s API key: %s", prof.Name, prof.SignupURL))
+		}
 
-		apiKey, err = u.SecretInput(fmt.Sprintf("%s API key (%s)", info.Name, info.EnvVar))
+		var err error
+		apiKey, err = u.SecretInput(fmt.Sprintf("%s API key (%s)", prof.Name, prof.EnvVar))
 		if err != nil {
 			return err
 		}
@@ -203,38 +209,34 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m
 		}
 	}
 
-	if len(models) == 0 {
-		// Per-provider defaults — kept in sync with what the providers
-		// document as their current chat-tuned flagship. Bumping these is a
-		// small follow-up PR when frontier models drop, and it isolates the
-		// "what's good today" maintenance to one place.
-		var defaultModel string
-		switch provider {
-		case "anthropic":
-			defaultModel = "claude-sonnet-4-6"
-		case "openai":
-			defaultModel = "gpt-5.5"
+	// --free: seed the provider's curated free-tier models (unless the
+	// operator already named explicit --model values).
+	if free {
+		if len(prof.Free) == 0 {
+			return fmt.Errorf("--free is not available for %s (no curated free models); pass --model instead", prof.Name)
 		}
+		if len(models) == 0 {
+			models = append([]string(nil), prof.Free...)
+			u.Infof("Seeding %d curated free %s model(s)", len(models), prof.Name)
+		}
+	}
 
-		// Interactive: let the user override the default with a free-text
-		// entry. Non-interactive (no TTY): silently use the default — the
-		// caller can always pass --model to be explicit.
-		chosen := defaultModel
-		if defaultModel != "" && u.IsTTY() && !u.IsJSON() {
-			input, err := u.Input(fmt.Sprintf("Model for %s", provider), defaultModel)
-			if err != nil {
-				return err
-			}
-			if strings.TrimSpace(input) != "" {
-				chosen = strings.TrimSpace(input)
-			}
+	// Resolve a model when none was given: the registry Default, else (for
+	// BYOK aggregators with a rotating catalog) the live /v1/models list.
+	if len(models) == 0 {
+		chosen, err := resolveSetupModel(u, prof, apiKey)
+		if err != nil {
+			return err
 		}
 		if chosen != "" {
 			models = []string{chosen}
 		}
 	}
+	if len(models) == 0 {
+		return fmt.Errorf("no model selected for %s — pass --model <id>", prof.Name)
+	}
 
-	if err := model.ConfigureLiteLLM(cfg, u, provider, apiKey, models); err != nil {
+	if err := model.ConfigureLiteLLM(cfg, u, prof.ID, apiKey, models); err != nil {
 		u.Print("")
 		u.Print("  Hint: Configuration stored in: litellm-config ConfigMap (llm namespace)")
 
@@ -247,6 +249,58 @@ func setupCloudProvider(cfg *config.Config, u *ui.UI, provider, apiKey string, m
 	return promoteAndSync(cfg, u, models)
 }
 
+// resolveSetupModel picks a model when the operator passed none. A registry
+// Default wins (overridable in a TTY). With no static default — BYOK
+// aggregators whose catalog rotates — it lists the live /v1/models endpoint:
+// a picker in a TTY, otherwise an error naming real ids so the operator can
+// re-run with --model. Returns "" only when there is genuinely nothing to
+// pick (the caller then errors).
+func resolveSetupModel(u *ui.UI, prof model.ProviderInfo, apiKey string) (string, error) {
+	if prof.Default != "" {
+		if u.IsTTY() && !u.IsJSON() {
+			input, err := u.Input(fmt.Sprintf("Model for %s", prof.ID), prof.Default)
+			if err != nil {
+				return "", err
+			}
+			if strings.TrimSpace(input) != "" {
+				return strings.TrimSpace(input), nil
+			}
+		}
+		return prof.Default, nil
+	}
+
+	if !prof.IsBYOK() {
+		return "", nil
+	}
+
+	ids, err := model.FetchOpenAICompatibleModels(prof.BaseURL, apiKey)
+	if err != nil {
+		u.Dim(fmt.Sprintf("Couldn't list %s models (%v)", prof.Name, err))
+		if u.IsTTY() && !u.IsJSON() {
+			return u.Input(fmt.Sprintf("Model id for %s", prof.Name), "")
+		}
+		return "", fmt.Errorf("could not resolve a model for %s: pass --model <id> (keys/models at %s)", prof.Name, prof.SignupURL)
+	}
+
+	if u.IsTTY() && !u.IsJSON() {
+		shown := ids
+		if len(shown) > 30 {
+			shown = shown[:30]
+		}
+		idx, err := u.Select(fmt.Sprintf("Select a %s model:", prof.Name), shown, 0)
+		if err != nil {
+			return "", err
+		}
+		return shown[idx], nil
+	}
+
+	sample := ids
+	if len(sample) > 8 {
+		sample = sample[:8]
+	}
+	return "", fmt.Errorf("pass --model <id> for %s; available include: %s", prof.Name, strings.Join(sample, ", "))
+}
+
 // syncAgentModels re-renders the stack-managed Hermes default agent from the
 // current LiteLLM model inventory.
 func syncAgentModels(cfg *config.Config, u *ui.UI) error {
@@ -852,17 +906,6 @@ func modelRemoveCommand(cfg *config.Config) *cli.Command {
 	}
 }
 
-func providerInfo(id string) model.ProviderInfo {
-	providers, _ := model.GetAvailableProviders(nil)
-	for _, p := range providers {
-		if p.ID == id {
-			return p
-		}
-	}
-
-	return model.ProviderInfo{ID: id, Name: id}
-}
-
 // detectedCredential describes a credential found in the environment.
 type detectedCredential struct {
 	key    string // the actual API key value (empty for Ollama)
@@ -875,22 +918,22 @@ type detectedCredential struct {
 func detectCredentials() map[string]detectedCredential {
 	creds := make(map[string]detectedCredential)
 
-	// Anthropic: check ANTHROPIC_API_KEY, then CLAUDE_CODE_OAUTH_TOKEN
-	if key := os.Getenv("ANTHROPIC_API_KEY"); key != "" {
-		creds["anthropic"] = detectedCredential{key: key, source: "ANTHROPIC_API_KEY"}
-	} else if key := os.Getenv("CLAUDE_CODE_OAUTH_TOKEN"); key != "" {
-		creds["anthropic"] = detectedCredential{key: key, source: "CLAUDE_CODE_OAUTH_TOKEN"}
-	}
-
-	// OpenAI: check OPENAI_API_KEY
-	if key := os.Getenv("OPENAI_API_KEY"); key != "" {
-		creds["openai"] = detectedCredential{key: key, source: "OPENAI_API_KEY"}
-	}
+	// Registry-driven: every provider's primary + alternate env vars are
+	// checked via model.ResolveAPIKey, so a new provider row auto-detects
+	// without editing this function. Ollama has no key — probe reachability.
+	providers, _ := model.GetAvailableProviders(nil)
+	for _, p := range providers {
+		if p.ID == model.ProviderOllama {
+			if ollamaModels, err := model.ListOllamaModels(); err == nil && len(ollamaModels) > 0 {
+				creds[p.ID] = detectedCredential{
+					source: fmt.Sprintf("%d model(s) available", len(ollamaModels)),
+				}
+			}
+			continue
+		}
 
-	// Ollama: check if reachable with models
-	if ollamaModels, err := model.ListOllamaModels(); err == nil && len(ollamaModels) > 0 {
-		creds["ollama"] = detectedCredential{
-			source: fmt.Sprintf("%d model(s) available", len(ollamaModels)),
+		if key, envVar := model.ResolveAPIKey(p.ID); key != "" {
+			creds[p.ID] = detectedCredential{key: key, source: envVar}
 		}
 	}
 
diff --git a/cmd/obol/model_test.go b/cmd/obol/model_test.go
index 972ef90d..86f9b7f3 100644
--- a/cmd/obol/model_test.go
+++ b/cmd/obol/model_test.go
@@ -3,6 +3,9 @@ package main
 import (
 	"testing"
 
+	"github.com/ObolNetwork/obol-stack/internal/model"
+	"github.com/urfave/cli/v3"
+
 	"github.com/ObolNetwork/obol-stack/internal/config"
 )
 
@@ -69,3 +72,46 @@ func TestSetupPromoteList(t *testing.T) {
 		}
 	})
 }
+
+// TestModelSetup_BYOKFlags pins the BYOK onboarding surface: the setup
+// command exposes --provider/--api-key/--model/--free, and every BYOK
+// aggregator in the registry dispatches through the generic cloud path
+// (only Ollama is special-cased).
+func TestModelSetup_BYOKFlags(t *testing.T) {
+	cfg := &config.Config{}
+	var setup *cli.Command
+	for _, sub := range modelCommand(cfg).Commands {
+		if sub.Name == "setup" {
+			setup = sub
+		}
+	}
+	if setup == nil {
+		t.Fatal("model setup command missing")
+	}
+
+	want := map[string]bool{"provider": false, "api-key": false, "model": false, "free": false}
+	for _, f := range setup.Flags {
+		for _, n := range f.Names() {
+			if _, ok := want[n]; ok {
+				want[n] = true
+			}
+		}
+	}
+	for n, found := range want {
+		if !found {
+			t.Errorf("model setup missing --%s flag", n)
+		}
+	}
+
+	// The registry must carry the BYOK providers this PR adds.
+	for _, id := range []string{"venice", "openrouter", "nvidia", "gmi", "novita", "huggingface"} {
+		p, ok := model.ProviderByID(id)
+		if !ok {
+			t.Errorf("provider %q missing from registry", id)
+			continue
+		}
+		if p.BaseURL == "" {
+			t.Errorf("provider %q has no BaseURL", id)
+		}
+	}
+}
diff --git a/internal/model/model.go b/internal/model/model.go
index 4614c4c9..e463a74d 100644
--- a/internal/model/model.go
+++ b/internal/model/model.go
@@ -37,19 +37,171 @@ const (
 	ProviderOpenAI    = "openai"
 )
 
-// Known provider definitions — no need to query the running pod.
-var knownProviders = []ProviderInfo{
-	{ID: ProviderAnthropic, Name: "Anthropic", EnvVar: "ANTHROPIC_API_KEY", AltEnvVars: []string{"CLAUDE_CODE_OAUTH_TOKEN"}},
-	{ID: ProviderOpenAI, Name: "OpenAI", EnvVar: "OPENAI_API_KEY"},
-	{ID: ProviderOllama, Name: "Ollama (local)", EnvVar: ""},
-}
+// apiMode selects how a provider's LiteLLM model_list entries are shaped.
+type apiMode string
+
+const (
+	// modeAnthropic: native LiteLLM anthropic routing + prompt-cache markers
+	// + an anthropic/* wildcard. Key read from EnvVar.
+	modeAnthropic apiMode = "anthropic"
+	// modeOpenAI: native LiteLLM openai/ routing + an openai/* wildcard.
+	modeOpenAI apiMode = "openai"
+	// modeOllama: local ollama_chat/ entries pointed at the in-cluster Ollama.
+	modeOllama apiMode = "ollama"
+	// modeOpenAICompatible: any OpenAI-compatible BYOK aggregator (OpenRouter,
+	// Venice, NVIDIA, …). Explicit entries only, Model="openai/<id>" with an
+	// explicit api_base = BaseURL and key read from EnvVar. No wildcard:
+	// aggregator namespaces are huge and overlapping, so we register only the
+	// models the operator asked for.
+	modeOpenAICompatible apiMode = "openai-compatible"
+)
 
-// ProviderInfo describes an LLM provider.
+// ProviderInfo describes an LLM provider. knownProviders is the single
+// source of truth: adding a provider is one row here, and every layer (the
+// setup CLI, default-model selection, LiteLLM entry shaping, status, and
+// the persisted record) reads from this struct instead of a per-provider
+// switch.
 type ProviderInfo struct {
-	ID         string   // provider id (e.g. "anthropic", "openai", "ollama")
+	ID         string   // provider id (e.g. "anthropic", "openai", "venice")
 	Name       string   // display name
 	EnvVar     string   // primary env var for API key (empty for Ollama)
 	AltEnvVars []string // fallback env vars checked in order (e.g. CLAUDE_CODE_OAUTH_TOKEN)
+	Mode       apiMode  // how model_list entries are shaped
+	BaseURL    string   // OpenAI-compatible base_url (modeOpenAICompatible only)
+	Default    string   // default chat model when --model is omitted ("" = ask/require)
+	SignupURL  string   // where to obtain an API key (shown as a hint)
+	Free       []string // curated zero-marginal-cost model ids (seeded by --free)
+}
+
+// IsBYOK reports whether the provider is a BYOK OpenAI-compatible
+// aggregator reached over the public internet (as opposed to a native
+// provider or the local Ollama).
+func (p ProviderInfo) IsBYOK() bool { return p.Mode == modeOpenAICompatible }
+
+// knownProviders is the registry of supported LLM providers. The first
+// three are native/local; the rest are BYOK OpenAI-compatible aggregators —
+// each is pure data, no bespoke wiring. base_url values are intentionally
+// without a trailing /v1 where LiteLLM appends it; aggregator paths that
+// already include /v1 keep it (LiteLLM only auto-appends for bare hosts).
+var knownProviders = []ProviderInfo{
+	{
+		ID: ProviderAnthropic, Name: "Anthropic", EnvVar: "ANTHROPIC_API_KEY",
+		AltEnvVars: []string{"CLAUDE_CODE_OAUTH_TOKEN"}, Mode: modeAnthropic,
+		Default: "claude-sonnet-4-6", SignupURL: "https://console.anthropic.com/settings/keys",
+	},
+	{
+		ID: ProviderOpenAI, Name: "OpenAI", EnvVar: "OPENAI_API_KEY", Mode: modeOpenAI,
+		Default: "gpt-5.5", SignupURL: "https://platform.openai.com/api-keys",
+	},
+	{
+		ID: ProviderOllama, Name: "Ollama (local)", EnvVar: "", Mode: modeOllama,
+	},
+	// ── BYOK OpenAI-compatible aggregators (the easy getting-started path) ──
+	// model_list entries are pure data: Model="openai/<id>", api_base=BaseURL,
+	// key from EnvVar. Default models that can't be statically pinned (the
+	// aggregator's catalog rotates) are left blank — setup then resolves a
+	// model from the live /v1/models list or --model.
+	{
+		ID: "venice", Name: "Venice", EnvVar: "VENICE_API_KEY", Mode: modeOpenAICompatible,
+		BaseURL: "https://api.venice.ai/api/v1", SignupURL: "https://venice.ai/settings/api",
+	},
+	{
+		ID: "openrouter", Name: "OpenRouter", EnvVar: "OPENROUTER_API_KEY", Mode: modeOpenAICompatible,
+		BaseURL: "https://openrouter.ai/api/v1", Default: "openrouter/auto",
+		SignupURL: "https://openrouter.ai/keys",
+		// Curated zero-cost models (snapshot — OpenRouter's free roster
+		// rotates; pass --model for any other). Seeded by `--free`.
+		Free: []string{
+			"openrouter/elephant-alpha",
+			"openrouter/owl-alpha",
+			"poolside/laguna-m.1:free",
+			"tencent/hy3-preview:free",
+			"nvidia/nemotron-3-super-120b-a12b:free",
+			"nvidia/nemotron-3-ultra-550b-a55b:free",
+			"inclusionai/ring-2.6-1t:free",
+		},
+	},
+	{
+		ID: "nvidia", Name: "NVIDIA NIM", EnvVar: "NVIDIA_API_KEY", Mode: modeOpenAICompatible,
+		BaseURL: "https://integrate.api.nvidia.com/v1", SignupURL: "https://build.nvidia.com",
+	},
+	{
+		ID: "gmi", Name: "GMI Cloud", EnvVar: "GMI_API_KEY", Mode: modeOpenAICompatible,
+		BaseURL: "https://api.gmi-serving.com/v1", SignupURL: "https://console.gmicloud.ai",
+	},
+	{
+		ID: "novita", Name: "Novita", EnvVar: "NOVITA_API_KEY", Mode: modeOpenAICompatible,
+		BaseURL: "https://api.novita.ai/openai/v1", SignupURL: "https://novita.ai/settings/key-management",
+	},
+	{
+		ID: "huggingface", Name: "Hugging Face Router", EnvVar: "HF_TOKEN", Mode: modeOpenAICompatible,
+		BaseURL: "https://router.huggingface.co/v1", SignupURL: "https://huggingface.co/settings/tokens",
+	},
+}
+
+// ProviderByID returns the registry entry for id and whether it was found.
+func ProviderByID(id string) (ProviderInfo, bool) {
+	for _, p := range knownProviders {
+		if p.ID == id {
+			return p, true
+		}
+	}
+	return ProviderInfo{}, false
+}
+
+// FetchOpenAICompatibleModels lists model ids from a provider's
+// OpenAI-compatible GET <baseURL>/models endpoint. Used at setup time to
+// resolve a real model id when an aggregator has no statically-pinnable
+// default (its catalog rotates). Best-effort: a non-200, a network error,
+// or an unparseable body returns an error the caller falls back from
+// (prompt for / require --model). The just-entered apiKey authenticates
+// the call from the host.
+func FetchOpenAICompatibleModels(baseURL, apiKey string) ([]string, error) {
+	endpoint := strings.TrimRight(baseURL, "/") + "/models"
+	ctx, cancel := context.WithTimeout(context.Background(), 12*time.Second)
+	defer cancel()
+
+	req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil)
+	if err != nil {
+		return nil, err
+	}
+	if apiKey != "" {
+		req.Header.Set("Authorization", "Bearer "+apiKey)
+	}
+
+	resp, err := http.DefaultClient.Do(req)
+	if err != nil {
+		return nil, err
+	}
+	defer resp.Body.Close()
+
+	body, err := io.ReadAll(io.LimitReader(resp.Body, 1<<20))
+	if err != nil {
+		return nil, err
+	}
+	if resp.StatusCode != http.StatusOK {
+		return nil, fmt.Errorf("models endpoint returned %d", resp.StatusCode)
+	}
+
+	var parsed struct {
+		Data []struct {
+			ID string `json:"id"`
+		} `json:"data"`
+	}
+	if err := json.Unmarshal(body, &parsed); err != nil {
+		return nil, fmt.Errorf("parse models response: %w", err)
+	}
+
+	ids := make([]string, 0, len(parsed.Data))
+	for _, m := range parsed.Data {
+		if m.ID != "" {
+			ids = append(ids, m.ID)
+		}
+	}
+	if len(ids) == 0 {
+		return nil, errors.New("models endpoint returned no models")
+	}
+	return ids, nil
 }
 
 // ProviderStatus captures effective global LiteLLM provider state.
@@ -1308,16 +1460,42 @@ var WellKnownModels = map[string][]string{
 	},
 }
 
-// buildModelEntries creates LiteLLM model_list entries for a provider.
-// Cloud providers (anthropic, openai) get a wildcard entry plus explicit
-// entries for the requested models. Ollama gets explicit entries only
-// (wildcards are broken for ollama_chat/).
+// buildModelEntries creates LiteLLM model_list entries for a provider,
+// shaped by its registry Mode:
+//   - anthropic/openai: explicit entries (so the chosen model wins Rank's
+//     "first chat-capable" rule) followed by a <provider>/* wildcard.
+//   - ollama: explicit ollama_chat/ entries only (wildcards are broken).
+//   - openai-compatible: explicit openai/<id> entries with an explicit
+//     api_base = BaseURL and key from EnvVar — no wildcard.
+//
+// A provider not in the registry falls back to the generic openai/<id>
+// shape keyed on <PROVIDER>_API_KEY (legacy `setup custom` behavior).
 func buildModelEntries(provider string, models []string) []ModelEntry {
-	var entries []ModelEntry
+	p, ok := ProviderByID(provider)
+	if !ok {
+		// Unknown provider: legacy generic shape (no api_base).
+		var entries []ModelEntry
+		for _, m := range models {
+			entries = append(entries, ModelEntry{
+				ModelName: m,
+				LiteLLMParams: LiteLLMParams{
+					Model:  provider + "/" + m,
+					APIKey: fmt.Sprintf("os.environ/%s_API_KEY", strings.ToUpper(provider)),
+				},
+			})
+		}
+		return entries
+	}
+
+	keyRef := ""
+	if p.EnvVar != "" {
+		keyRef = "os.environ/" + p.EnvVar
+	}
 
-	switch provider {
-	case ProviderOllama:
-		// Explicit entries — ollama_chat/* wildcards are broken in LiteLLM
+	var entries []ModelEntry
+	switch p.Mode {
+	case modeOllama:
+		// Explicit entries — ollama_chat/* wildcards are broken in LiteLLM.
 		for _, m := range models {
 			entries = append(entries, ModelEntry{
 				ModelName: m,
@@ -1327,7 +1505,7 @@ func buildModelEntries(provider string, models []string) []ModelEntry {
 				},
 			})
 		}
-	case ProviderAnthropic:
+	case modeAnthropic:
 		cachePoints := anthropicCacheControlPoints()
 		// Explicit entries first so the user-selected model is the primary
 		// under model.Rank's "first chat-capable wins" rule. Hermes cannot
@@ -1338,39 +1516,41 @@ func buildModelEntries(provider string, models []string) []ModelEntry {
 				ModelName: m,
 				LiteLLMParams: LiteLLMParams{
 					Model:                       m,
-					APIKey:                      "os.environ/ANTHROPIC_API_KEY",
+					APIKey:                      keyRef,
 					CacheControlInjectionPoints: cachePoints,
 				},
 			})
 		}
-		// Wildcard: routes any anthropic model without explicit registration.
 		entries = append(entries, ModelEntry{
 			ModelName: "anthropic/*",
 			LiteLLMParams: LiteLLMParams{
 				Model:                       "anthropic/*",
-				APIKey:                      "os.environ/ANTHROPIC_API_KEY",
+				APIKey:                      keyRef,
 				CacheControlInjectionPoints: cachePoints,
 			},
 		})
-	case ProviderOpenAI:
+	case modeOpenAI:
 		// Explicit-before-wildcard, same rationale as Anthropic above.
 		for _, m := range models {
 			entries = append(entries, ModelEntry{
 				ModelName:     m,
-				LiteLLMParams: LiteLLMParams{Model: "openai/" + m, APIKey: "os.environ/OPENAI_API_KEY"},
+				LiteLLMParams: LiteLLMParams{Model: "openai/" + m, APIKey: keyRef},
 			})
 		}
 		entries = append(entries, ModelEntry{
 			ModelName:     "openai/*",
-			LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: "os.environ/OPENAI_API_KEY"},
+			LiteLLMParams: LiteLLMParams{Model: "openai/*", APIKey: keyRef},
 		})
-	default:
+	case modeOpenAICompatible:
+		// Explicit openai-shaped entries with an explicit api_base. No
+		// wildcard — the aggregator's catalog is huge and overlaps others.
 		for _, m := range models {
 			entries = append(entries, ModelEntry{
 				ModelName: m,
 				LiteLLMParams: LiteLLMParams{
-					Model:  provider + "/" + m,
-					APIKey: fmt.Sprintf("os.environ/%s_API_KEY", strings.ToUpper(provider)),
+					Model:   "openai/" + m,
+					APIBase: p.BaseURL,
+					APIKey:  keyRef,
 				},
 			})
 		}
@@ -1464,6 +1644,18 @@ func detectProvider(entry ModelEntry) string {
 	}
 
 	model := entry.LiteLLMParams.Model
+	// BYOK aggregator entries are openai-shaped (openai/<id>) but carry an
+	// explicit api_base — match it back to the registry so status groups
+	// them under their real provider (venice, openrouter, …) rather than
+	// "openai". Checked before the bare openai/ prefix below.
+	if base := entry.LiteLLMParams.APIBase; base != "" && strings.HasPrefix(model, ProviderOpenAI+"/") {
+		for _, p := range knownProviders {
+			if p.Mode == modeOpenAICompatible && p.BaseURL == base {
+				return p.ID
+			}
+		}
+	}
+
 	// Wildcard entries
 	if strings.HasPrefix(model, ProviderAnthropic+"/") {
 		return ProviderAnthropic
diff --git a/internal/model/model_test.go b/internal/model/model_test.go
index 210ef676..59ec99fa 100644
--- a/internal/model/model_test.go
+++ b/internal/model/model_test.go
@@ -1166,3 +1166,82 @@ func modelNames(entries []ModelEntry) []string {
 	}
 	return out
 }
+
+func TestBuildModelEntries_OpenAICompatible(t *testing.T) {
+	entries := buildModelEntries("venice", []string{"venice-uncensored"})
+	if len(entries) != 1 {
+		t.Fatalf("got %d entries, want 1 (aggregators get no wildcard)", len(entries))
+	}
+	e := entries[0]
+	if e.ModelName != "venice-uncensored" {
+		t.Errorf("model_name = %q", e.ModelName)
+	}
+	if e.LiteLLMParams.Model != "openai/venice-uncensored" {
+		t.Errorf("model = %q, want openai/venice-uncensored", e.LiteLLMParams.Model)
+	}
+	if e.LiteLLMParams.APIBase != "https://api.venice.ai/api/v1" {
+		t.Errorf("api_base = %q, want venice base_url", e.LiteLLMParams.APIBase)
+	}
+	if e.LiteLLMParams.APIKey != "os.environ/VENICE_API_KEY" {
+		t.Errorf("api_key = %q, want os.environ/VENICE_API_KEY", e.LiteLLMParams.APIKey)
+	}
+}
+
+func TestBuildModelEntries_UnknownProviderLegacyShape(t *testing.T) {
+	// Providers not in the registry keep the legacy generic shape (no api_base).
+	entries := buildModelEntries("somevendor", []string{"m1"})
+	if len(entries) != 1 || entries[0].LiteLLMParams.Model != "somevendor/m1" {
+		t.Fatalf("unexpected legacy entries: %+v", entries)
+	}
+	if entries[0].LiteLLMParams.APIBase != "" {
+		t.Errorf("legacy shape must not set api_base, got %q", entries[0].LiteLLMParams.APIBase)
+	}
+}
+
+func TestProviderByID(t *testing.T) {
+	p, ok := ProviderByID("openrouter")
+	if !ok {
+		t.Fatal("openrouter must be in the registry")
+	}
+	if p.BaseURL == "" || p.EnvVar != "OPENROUTER_API_KEY" || len(p.Free) == 0 {
+		t.Errorf("openrouter row incomplete: %+v", p)
+	}
+	if _, ok := ProviderByID("nope"); ok {
+		t.Error("unknown provider must not be found")
+	}
+}
+
+func TestDetectProvider_AggregatorByAPIBase(t *testing.T) {
+	venice := ModelEntry{ModelName: "x", LiteLLMParams: LiteLLMParams{
+		Model: "openai/x", APIBase: "https://api.venice.ai/api/v1",
+	}}
+	if got := detectProvider(venice); got != "venice" {
+		t.Errorf("venice entry detected as %q, want venice", got)
+	}
+	// A native OpenAI entry (no api_base) must still read as openai.
+	oai := ModelEntry{ModelName: "gpt-5.5", LiteLLMParams: LiteLLMParams{Model: "openai/gpt-5.5"}}
+	if got := detectProvider(oai); got != ProviderOpenAI {
+		t.Errorf("openai entry detected as %q, want openai", got)
+	}
+}
+
+func TestProviderRegistry_Invariants(t *testing.T) {
+	seen := map[string]bool{}
+	for _, p := range knownProviders {
+		if seen[p.ID] {
+			t.Errorf("duplicate provider id %q", p.ID)
+		}
+		seen[p.ID] = true
+		if p.Mode == modeOpenAICompatible && (p.BaseURL == "" || p.EnvVar == "") {
+			t.Errorf("BYOK provider %q must set BaseURL and EnvVar", p.ID)
+		}
+		if len(p.Free) > 0 && p.Mode != modeOpenAICompatible {
+			t.Errorf("provider %q has Free models but is not openai-compatible", p.ID)
+		}
+	}
+	for _, id := range []string{ProviderAnthropic, ProviderOpenAI, ProviderOllama} {
+		if _, ok := ProviderByID(id); !ok {
+			t.Errorf("native provider %q missing from registry", id)
+		}
+	}
+}

From 60568bc5e998b7eabbf512efeb1dc1f15e8b0b9f Mon Sep 17 00:00:00 2001
From: bussyjd <145845+bussyjd@users.noreply.github.com>
Date: Fri, 12 Jun 2026 19:42:46 +0400
Subject: [PATCH 2/2] =?UTF-8?q?feat(buy):=20obol=20buy=20inference=20<prov?=
 =?UTF-8?q?ider>=20=E2=80=94=20BYOK=20front=20door=20with=20open-URL=20onb?=
 =?UTF-8?q?oarding?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the requested getting-started verb: `obol buy inference venice`,
`obol buy inference openrouter --free`, etc. Dispatch keys on whether the
positional arg matches a registry provider id — provider → BYOK onboarding,
URL/none → the existing x402 crypto-paid seller flow (unchanged).

Onboarding mirrors hermes-agent's api-key UX: resolve the key (--api-key →
provider env var → prompt), open the provider's SignupURL in the browser
(reuses openBrowser; skipped when a key is already in hand or non-TTY), then
delegate to the shared setupCloudProvider engine (model resolution, --free
seeding, LiteLLM patch, agent sync). No wallet, no x402 for this path.

Also adds --api-key/--free to buy inference and corrects the CLAUDE.md
framing (buy inference is now BOTH the BYOK door and the x402 door).
---
 CLAUDE.md            | 11 +++--
 cmd/obol/buy.go      | 96 +++++++++++++++++++++++++++++++++++++-------
 cmd/obol/buy_test.go | 35 ++++++++++++++++
 3 files changed, 124 insertions(+), 18 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index 32fe9385..1b279e77 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -262,11 +262,16 @@ Caveats:
 
 **Auto-configuration**: `obol stack up` → `autoConfigureLLM()` detects host Ollama models, patches LiteLLM config. `obolup.sh` → `check_agent_model_api_key()` reads `~/.openclaw/openclaw.json`, resolves API key from `ANTHROPIC_API_KEY` / `CLAUDE_CODE_OAUTH_TOKEN` (Anthropic) or `OPENAI_API_KEY` (OpenAI), exports for downstream.
 
-**BYOK cloud providers** (easiest getting-started path) — provider knowledge is a single registry in `internal/model/model.go` (`knownProviders` / `ProviderInfo` with `Mode`/`BaseURL`/`Default`/`Free`); adding a provider is one row, no per-provider switch. `obol model setup <provider> --api-key <key>` wires the agent's LiteLLM brain in one command. Built-in: `anthropic`, `openai`, `ollama` (native/local) + OpenAI-compatible aggregators `venice`, `openrouter`, `nvidia`, `gmi`, `novita`, `huggingface` (`Mode=openai-compatible` → `model_list` entry `openai/<id>` + explicit `api_base` + key from the provider's env var; no wildcard). When `--model` is omitted, setup uses the registry `Default` or lists the live `GET <base>/v1/models` (TTY picker / non-TTY error naming real ids). `obol model setup openrouter --free` seeds only the curated free-tier model snapshot. Distinct from `obol buy inference`, which is x402 crypto-paid sellers, NOT BYOK. Unlisted endpoints still use `obol model setup custom`.
+**BYOK cloud providers** (easiest getting-started path) — provider knowledge is a single registry in `internal/model/model.go` (`knownProviders` / `ProviderInfo` with `Mode`/`BaseURL`/`Default`/`SignupURL`/`Free`); adding a provider is one row, no per-provider switch. Built-in: `anthropic`, `openai`, `ollama` (native/local) + OpenAI-compatible aggregators `venice`, `openrouter`, `nvidia`, `gmi`, `novita`, `huggingface` (`Mode=openai-compatible` → `model_list` entry `openai/<id>` + explicit `api_base` + key from the provider's env var; no wildcard). When `--model` is omitted, setup uses the registry `Default` or lists the live `GET <base>/v1/models` (TTY picker / non-TTY error naming real ids). `--free` seeds only the curated free-tier model snapshot (OpenRouter).
+
+Two front doors share one engine (`setupCloudProvider` in `cmd/obol/model.go`):
+- `obol buy inference <provider>` — friendly onboarding: opens the provider's `SignupURL` in the browser (`openBrowser`, hermes-style), takes the key (`--api-key` → env var → prompt), wires LiteLLM + syncs agents. `obol buy inference` with a URL/no-arg is still the **x402 crypto-paid seller** path — dispatch keys on whether the positional arg matches a registry provider id.
+- `obol model setup <provider> --api-key <key>` — the scriptable, no-browser equivalent. Unlisted endpoints still use `obol model setup custom`.
 
 ```bash
-obol model setup venice     --api-key $VENICE_API_KEY        # one command, agent ready
-obol model setup openrouter --api-key $OPENROUTER_API_KEY --free
+obol buy inference venice                 # opens venice key page, prompts, wires up
+obol buy inference openrouter --free      # seeds curated free models
+obol model setup venice --api-key $VENICE_API_KEY   # scriptable / CI
 ```
 
 **External OpenAI-compatible LLM** (vLLM / sglang / mlx-lm / remote GPU) — canonical user flow, no ConfigMap surgery:
diff --git a/cmd/obol/buy.go b/cmd/obol/buy.go
index fb19d923..5356c4b9 100644
--- a/cmd/obol/buy.go
+++ b/cmd/obol/buy.go
@@ -66,28 +66,45 @@ func buyCommand(cfg *config.Config) *cli.Command {
 func buyInferenceCommand(cfg *config.Config) *cli.Command {
 	return &cli.Command{
 		Name:      "inference",
-		Usage:     "Buy paid inference from an x402-gated seller via the obol-agent",
-		ArgsUsage: "[<seller-url>]",
-		Description: `Pre-authorizes an x402-gated inference seller through an obol-agent's wallet.
+		Usage:     "Buy inference for your agents — a hosted BYOK provider (Venice, OpenRouter, …) or an x402-gated seller",
+		ArgsUsage: "[<provider>|<seller-url>]",
+		Description: `Two ways to give your agents inference:
 
-Hand the command a seller URL — either a storefront base
-("https://inference.v1337.org") or a specific offer
-("https://inference.v1337.org/services/aeon") — and the CLI will walk
-/api/services.json, pick the inference offer, and pre-sign authorizations
-via the agent's remote signer.
+  1. Hosted provider (BYOK) — hand the command a provider id and it opens
+     that provider's API-key page in your browser, takes the key, and wires
+     your agents' LiteLLM gateway to it:
 
-With no URL, the public ` + x402verifier.DefaultBuySellerURL + ` storefront is used.
+         obol buy inference venice
+         obol buy inference openrouter --free
 
-In a TTY, the CLI prompts for auto-refill, request count, and
-confirmation. Pass --yes / -y for non-interactive runs (CI, scripts) —
---count is required in that mode.
+     Built-in providers: venice, openrouter, nvidia, gmi, novita,
+     huggingface (plus anthropic, openai). The key is read from the
+     provider's env var when already set, so this stays non-interactive in CI.
+
+  2. x402-gated seller — hand it a seller URL (a storefront base like
+     "https://inference.v1337.org" or a specific offer ".../services/aeon")
+     and the CLI walks /api/services.json, picks the inference offer, and
+     pre-signs payment authorizations via the agent's remote signer. With no
+     argument, the public ` + x402verifier.DefaultBuySellerURL + ` storefront is used.
+
+In a TTY the seller flow prompts for auto-refill, request count, and
+confirmation. Pass --yes / -y for non-interactive runs (--count required).
 
 Examples:
-    obol buy inference
+    obol buy inference venice
+    obol buy inference openrouter --free
     obol buy inference https://inference.v1337.org/services/aeon
-    obol buy inference https://seller.example/services/foo --yes --count 100
-    obol buy inference https://seller.example/services/foo --auto-refill --refill-threshold 5 --refill-count 25`,
+    obol buy inference https://seller.example/services/foo --yes --count 100`,
 		Flags: []cli.Flag{
+			&cli.StringFlag{
+				Name:    "api-key",
+				Usage:   "API key for a hosted provider (BYOK). Also read from the provider's env var when set.",
+				Sources: cli.EnvVars("LLM_API_KEY"),
+			},
+			&cli.BoolFlag{
+				Name:  "free",
+				Usage: "For a hosted provider that has them, seed only the curated free-tier models (OpenRouter)",
+			},
 			&cli.StringFlag{
 				Name:  "seller",
 				Usage: "Seller URL (alternative to positional). When neither is set the default storefront is used.",
@@ -160,6 +177,43 @@ Examples:
 	}
 }
 
+// runBuyInferenceProvider is the BYOK front door: open the provider's
+// API-key page (hermes-style openurl), take the key (--api-key → env →
+// prompt), then wire the LiteLLM gateway via the shared model-setup
+// engine. No wallet, no x402 — this is hosted inference with the user's
+// own key, the easiest way to get an agent talking to a model.
+func runBuyInferenceProvider(cfg *config.Config, cmd *cli.Command, prof model.ProviderInfo) error {
+	u := getUI(cmd)
+	u.Infof("Connecting %s for your agents (bring-your-own-key)", prof.Name)
+
+	apiKey := strings.TrimSpace(cmd.String("api-key"))
+	if apiKey == "" {
+		if key, envVar := model.ResolveAPIKey(prof.ID); key != "" {
+			apiKey = key
+			u.Infof("Using %s API key from %s", prof.Name, envVar)
+		}
+	}
+
+	// openurl: send the operator to the provider's key page before we
+	// prompt for the key (skipped when a key is already in hand or non-TTY).
+	if apiKey == "" && prof.SignupURL != "" && u.IsTTY() && !u.IsJSON() {
+		u.Infof("Opening %s to create an API key …", prof.SignupURL)
+		if err := openBrowser(prof.SignupURL); err != nil {
+			u.Dim(fmt.Sprintf("(couldn't open a browser — visit %s)", prof.SignupURL))
+		}
+	}
+
+	var models []string
+	if m := strings.TrimSpace(cmd.String("model")); m != "" {
+		models = []string{m}
+	}
+
+	// Shared engine: prompts for the key if still empty, seeds --free,
+	// resolves a model (registry default or live /v1/models), patches
+	// LiteLLM, and promotes + syncs the agents to use it.
+	return setupCloudProvider(cfg, u, prof, apiKey, models, cmd.Bool("free"))
+}
+
 // runBuyInference is the orchestrator for the new flow. Kept separate from
 // the cli.Command literal so the steps stay scannable: resolve agent →
 // resolve seller URL → pick catalog entry → resolve token+count+budget →
@@ -167,6 +221,18 @@ Examples:
 func runBuyInference(ctx context.Context, cfg *config.Config, cmd *cli.Command) error {
 	u := getUI(cmd)
 
+	// Front door: if the argument names a hosted provider in the registry
+	// (venice, openrouter, …) rather than a seller URL, run BYOK onboarding
+	// — open the provider's key page and wire the LiteLLM gateway. Ollama is
+	// local and free, so it's not a "buy" target.
+	arg := strings.TrimSpace(cmd.String("seller"))
+	if arg == "" {
+		arg = strings.TrimSpace(cmd.Args().First())
+	}
+	if prof, ok := model.ProviderByID(arg); ok && prof.ID != model.ProviderOllama {
+		return runBuyInferenceProvider(cfg, cmd, prof)
+	}
+
 	u.Info("Purchasing remote inference for running Obol Agents")
 
 	target, err := resolveBuyAgent(cfg, cmd)
diff --git a/cmd/obol/buy_test.go b/cmd/obol/buy_test.go
index 5b8bce16..a59f117e 100644
--- a/cmd/obol/buy_test.go
+++ b/cmd/obol/buy_test.go
@@ -8,6 +8,8 @@ import (
 
 	"github.com/ObolNetwork/obol-stack/internal/agentruntime"
 	"github.com/ObolNetwork/obol-stack/internal/buy"
+	"github.com/ObolNetwork/obol-stack/internal/config"
+	"github.com/ObolNetwork/obol-stack/internal/model"
 	"github.com/ObolNetwork/obol-stack/internal/schemas"
 )
 
@@ -600,3 +602,36 @@ func TestLooksLikeURL(t *testing.T) {
 		}
 	}
 }
+
+// TestBuyInference_BYOKFrontDoor pins the BYOK onboarding surface on
+// `obol buy inference`: the command exposes --api-key/--free/--model, and
+// every registry provider that isn't local Ollama is recognized as a
+// hosted-provider argument (the dispatch the Action keys on).
+func TestBuyInference_BYOKFrontDoor(t *testing.T) {
+	cmd := buyInferenceCommand(&config.Config{})
+
+	want := map[string]bool{"api-key": false, "free": false, "model": false, "seller": false}
+	for _, f := range cmd.Flags {
+		for _, n := range f.Names() {
+			if _, ok := want[n]; ok {
+				want[n] = true
+			}
+		}
+	}
+	for n, found := range want {
+		if !found {
+			t.Errorf("buy inference missing --%s flag", n)
+		}
+	}
+
+	// Hosted providers route to BYOK onboarding; ollama does not (local).
+	for _, id := range []string{"venice", "openrouter", "nvidia", "gmi", "novita", "huggingface"} {
+		p, ok := model.ProviderByID(id)
+		if !ok || p.ID == model.ProviderOllama {
+			t.Errorf("provider %q should be a BYOK buy-inference target", id)
+		}
+	}
+	if p, ok := model.ProviderByID("ollama"); !ok || p.ID != model.ProviderOllama {
+		t.Errorf("ollama must remain a local (non-buy) provider")
+	}
+}