From 74f896ebc6f566d133145ef075afac917985862f Mon Sep 17 00:00:00 2001
From: Ben White <ben@posthog.com>
Date: Wed, 24 Jun 2026 15:52:47 +0200
Subject: [PATCH 1/9] feat(agent-applications): model policy config UI +
 session model
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Surface and edit `spec.model_policy` (the auto/manual model picker) in the
agent config pane, and show which model a session used.

- agent-platform-types: add AgentModelPolicy (auto level / manual list) +
  ModelCatalog types; make spec.model optional (legacy).
- Model section: interactive policy editor (mode/level/reasoning dropdowns
  with descriptions + icons), an "auto level resolves to" preview with live
  pricing, and a searchable model browser (all served models + cost profiles).
  Manual mode supports add-from-browser and drag-to-reorder. Editing is
  local-state only — no save wired yet.
- useModelCatalog: catalog stand-in (snapshot of the gateway /v1/models), the
  single swap point for a real model-info endpoint.
- Session detail: add a "Model" KPI showing the model(s) that answered.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 packages/shared/src/agent-platform-types.ts   |  55 +-
 .../components/AgentConfigurationPane.tsx     |   8 +-
 .../components/AgentModelConfig.tsx           | 620 ++++++++++++++++++
 .../components/AgentSessionDetailBody.tsx     |  25 +-
 .../hooks/useModelCatalog.ts                  | 446 +++++++++++++
 5 files changed, 1147 insertions(+), 7 deletions(-)
 create mode 100644 packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
 create mode 100644 packages/ui/src/features/agent-applications/hooks/useModelCatalog.ts

diff --git a/packages/shared/src/agent-platform-types.ts b/packages/shared/src/agent-platform-types.ts
index de61bdb7f..f961fd7ba 100644
--- a/packages/shared/src/agent-platform-types.ts
+++ b/packages/shared/src/agent-platform-types.ts
@@ -57,12 +57,63 @@ export interface AgentApplication {
   ingress_base_url: string | null;
 }
 
+export type AgentReasoningEffort =
+  | "minimal"
+  | "low"
+  | "medium"
+  | "high"
+  | "xhigh";
+
+export type AgentModelLevel = "low" | "medium" | "high";
+
+/** One model in a manual policy: a canonical model id (e.g.
+ *  `anthropic/claude-sonnet-4-6`) plus an optional per-model reasoning override. */
+export interface AgentModelEntry {
+  model: string;
+  reasoning?: AgentReasoningEffort;
+}
+
+/**
+ * How a revision picks its model. `auto` resolves a maintained, priority-ordered,
+ * cross-provider list from `level` at runtime; `manual` pins an author-ordered
+ * fallback list (primary first). Mirrors `spec.model_policy` in the backend.
+ */
+export type AgentModelPolicy =
+  | { mode: "auto"; level?: AgentModelLevel; reasoning?: AgentReasoningEffort }
+  | { mode: "manual"; models: AgentModelEntry[] };
+
+/**
+ * A served model + its cost profile, as the model browser shows it. Mirrors the
+ * ai-gateway catalog (`@posthog/agent-applications-models`). Pricing is USD per
+ * million tokens.
+ */
+export interface ModelCatalogEntry {
+  /** Canonical id, e.g. `anthropic/claude-sonnet-4.6`. */
+  model: string;
+  provider: string;
+  context_window: number;
+  input: number;
+  output: number;
+  cacheRead?: number;
+  cacheWrite?: number;
+}
+
+/** The full served catalog plus the curated `auto` level → model mapping. */
+export interface ModelCatalog {
+  models: ModelCatalogEntry[];
+  /** Canonical ids each auto level resolves to, in priority order. */
+  levels: Record<AgentModelLevel, string[]>;
+}
+
 /**
  * The agent spec carried on a revision. Known top-level fields are surfaced and
  * the rest passes through pending fully-typed elaboration.
  */
 export interface AgentSpec {
-  model: string;
+  /** Model selection. `model` is the legacy single-string form; current specs
+   *  carry `model_policy`. One or the other is present. */
+  model_policy?: AgentModelPolicy;
+  model?: string;
   triggers?: unknown[];
   tools?: unknown[];
   mcps?: unknown[];
@@ -75,7 +126,7 @@ export interface AgentSpec {
     max_wall_seconds?: number;
   };
   entrypoint?: string;
-  reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh";
+  reasoning?: AgentReasoningEffort;
   [key: string]: unknown;
 }
 
diff --git a/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx b/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx
index 7c4206d6d..4df298119 100644
--- a/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx
+++ b/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx
@@ -37,6 +37,7 @@ import { useAgentRevisionBundle } from "../hooks/useAgentRevisionBundle";
 import { useAgentRevisions } from "../hooks/useAgentRevisions";
 import { triggerRequiredSecretsFor } from "../utils/triggerSecrets";
 import { AgentDetailEmptyState, AgentDetailLayout } from "./AgentDetailLayout";
+import { AgentModelConfig } from "./AgentModelConfig";
 import { AgentRevisionBar } from "./AgentRevisionBar";
 import { CopyButton } from "./CopyButton";
 import { CronFireButton } from "./CronFireButton";
@@ -460,7 +461,7 @@ export function AgentConfigurationPane({
 
 const SECTION_INFO: Record<string, string> = {
   "cfg:model":
-    "The model every request goes to. `reasoning` sets the extended-thinking budget; limits cap a run's turns, tool calls and wall time.",
+    "How the agent picks its model. `auto` resolves a level (low/medium/high) to a maintained cross-provider list at runtime; `manual` pins an explicit priority list. `reasoning` sets the extended-thinking budget.",
   "cfg:instructions":
     "The agent's entrypoint prompt (agent.md) — the always-on system instructions.",
   "cfg:triggers": "What can start a session — chat, webhook, mcp, slack, cron.",
@@ -695,9 +696,8 @@ function byPath(files: BundleFile[], path: string): BundleFile | undefined {
 
 function ModelBody({ spec }: { spec: AgentSpec }) {
   return (
-    <Flex direction="column" gap="2">
-      <Row label="model" value={spec.model ?? "not set"} mono />
-      <Row label="reasoning" value={spec.reasoning ?? "default"} />
+    <Flex direction="column" gap="4">
+      <AgentModelConfig spec={spec} />
       {spec.entrypoint ? (
         <Row label="entrypoint" value={spec.entrypoint} mono />
       ) : null}
diff --git a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
new file mode 100644
index 000000000..578312834
--- /dev/null
+++ b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
@@ -0,0 +1,620 @@
+import { PointerSensor } from "@dnd-kit/dom";
+import { type DragDropEvents, DragDropProvider } from "@dnd-kit/react";
+import { useSortable } from "@dnd-kit/react/sortable";
+import {
+  BrainIcon,
+  CaretDownIcon,
+  GaugeIcon,
+  MagnifyingGlassIcon,
+  SlidersHorizontalIcon,
+} from "@phosphor-icons/react";
+import type {
+  AgentModelEntry,
+  AgentModelLevel,
+  AgentModelPolicy,
+  AgentReasoningEffort,
+  AgentSpec,
+  ModelCatalogEntry,
+} from "@posthog/shared/agent-platform-types";
+import { Badge } from "@posthog/ui/primitives/Badge";
+import { Flex, Popover, Text } from "@radix-ui/themes";
+import { type ReactNode, type RefCallback, useMemo, useState } from "react";
+import { useModelCatalog } from "../hooks/useModelCatalog";
+
+/**
+ * The rich model section: an interactive policy editor (mode + level +
+ * reasoning), a preview of what an `auto` level resolves to, and a searchable
+ * browser of every served model with its cost profile.
+ *
+ * Editing is local-state only — there's no save wired yet; the point is to see
+ * the UX. The catalog comes from `useModelCatalog` (a stand-in for the
+ * model-info endpoint).
+ */
+export function AgentModelConfig({ spec }: { spec: AgentSpec }) {
+  const { catalog } = useModelCatalog();
+  const initial = spec.model_policy;
+
+  const [mode, setMode] = useState<"auto" | "manual">(initial?.mode ?? "auto");
+  const [level, setLevel] = useState<AgentModelLevel>(
+    initial?.mode === "auto" ? (initial.level ?? "medium") : "medium",
+  );
+  const [reasoning, setReasoning] = useState<AgentReasoningEffort | undefined>(
+    initial?.mode === "auto" ? initial.reasoning : spec.reasoning,
+  );
+  const [manual, setManual] = useState<AgentModelEntry[]>(
+    initial?.mode === "manual" ? initial.models : [],
+  );
+
+  const policy: AgentModelPolicy =
+    mode === "auto"
+      ? { mode: "auto", level, ...(reasoning ? { reasoning } : {}) }
+      : { mode: "manual", models: manual };
+
+  const dirty =
+    JSON.stringify(policy) !==
+    JSON.stringify(initial ?? { mode: "auto", level: "medium" });
+
+  const byId = useMemo(
+    () => new Map(catalog.models.map((m) => [m.model, m])),
+    [catalog.models],
+  );
+
+  return (
+    <Flex direction="column" gap="4">
+      <Flex direction="column" gap="3">
+        <Select
+          label="mode"
+          icon={<SlidersHorizontalIcon size={14} />}
+          value={mode}
+          onChange={(v) => setMode(v as "auto" | "manual")}
+          options={MODE_OPTIONS}
+        />
+
+        {mode === "auto" ? (
+          <>
+            <Select
+              label="level"
+              icon={<GaugeIcon size={14} />}
+              value={level}
+              onChange={(v) => setLevel(v as AgentModelLevel)}
+              options={LEVEL_OPTIONS}
+            />
+            <Select
+              label="reasoning"
+              icon={<BrainIcon size={14} />}
+              value={reasoning ?? "default"}
+              onChange={(v) =>
+                setReasoning(
+                  v === "default" ? undefined : (v as AgentReasoningEffort),
+                )
+              }
+              options={REASONING_OPTIONS}
+            />
+          </>
+        ) : null}
+      </Flex>
+
+      {mode === "auto" ? (
+        <AutoLevelPreview
+          level={level}
+          ids={catalog.levels[level]}
+          byId={byId}
+        />
+      ) : (
+        <ManualEditor models={manual} byId={byId} onChange={setManual} />
+      )}
+
+      <Flex align="center" justify="between" gap="2" wrap="wrap">
+        <Subhead>browse all models · {catalog.models.length}</Subhead>
+        {dirty ? (
+          <Text className="text-[11px] text-amber-11">
+            preview — not saved yet
+          </Text>
+        ) : null}
+      </Flex>
+      <ModelBrowser
+        models={catalog.models}
+        canAdd={mode === "manual"}
+        selected={mode === "manual" ? manual.map((m) => m.model) : []}
+        onAdd={(id) =>
+          setManual((prev) =>
+            prev.some((m) => m.model === id) ? prev : [...prev, { model: id }],
+          )
+        }
+      />
+    </Flex>
+  );
+}
+
+const MODE_OPTIONS = [
+  {
+    value: "auto",
+    title: "Auto",
+    description: "Platform-managed list, resolved across providers at runtime.",
+  },
+  {
+    value: "manual",
+    title: "Manual",
+    description: "Explicit, author-ordered fallback list you pin yourself.",
+  },
+] as const;
+
+const LEVEL_OPTIONS = [
+  {
+    value: "low",
+    title: "Low",
+    description: "Cheapest — short, formulaic, no-reasoning jobs.",
+  },
+  {
+    value: "medium",
+    title: "Medium",
+    description: "Balanced default — multi-step but bounded work.",
+  },
+  {
+    value: "high",
+    title: "High",
+    description: "Top-tier — long, branching, reasoning-heavy work.",
+  },
+] as const;
+
+const REASONING_OPTIONS = [
+  {
+    value: "default",
+    title: "Default",
+    description: "Provider / spec default — no explicit budget.",
+  },
+  {
+    value: "minimal",
+    title: "Minimal",
+    description: "No deliberation — cheapest, fastest.",
+  },
+  { value: "low", title: "Low", description: "Light deliberation." },
+  { value: "medium", title: "Medium", description: "Moderate deliberation." },
+  { value: "high", title: "High", description: "Deep deliberation." },
+  {
+    value: "xhigh",
+    title: "Xhigh",
+    description: "Maximal — research-grade, ~5–10× the per-turn cost.",
+  },
+] as const;
+
+const LEVEL_BLURB: Record<AgentModelLevel, string> = {
+  low: "Cheapest — short, formulaic, no-reasoning jobs (lookups, FAQ bots).",
+  medium: "Balanced default — multi-step but bounded work.",
+  high: "Top-tier — long, branching, reasoning-heavy work.",
+};
+
+function AutoLevelPreview({
+  level,
+  ids,
+  byId,
+}: {
+  level: AgentModelLevel;
+  ids: string[];
+  byId: Map<string, ModelCatalogEntry>;
+}) {
+  return (
+    <Flex direction="column" gap="2">
+      <Muted>
+        <b className="text-gray-12">auto</b> resolves the level to a maintained,
+        priority-ordered, cross-provider list at runtime — the runner tries each
+        in order until one answers, so the agent rides upgrades and survives a
+        provider outage without a spec change. <b>{level}</b>:{" "}
+        {LEVEL_BLURB[level]}
+      </Muted>
+      <Subhead>{level} resolves to · priority order</Subhead>
+      {ids.map((id, i) => {
+        const m = byId.get(id);
+        return (
+          <Flex
+            key={id}
+            align="center"
+            justify="between"
+            gap="3"
+            className="rounded-(--radius-2) border border-border bg-(--gray-2) px-3 py-2"
+          >
+            <Flex align="center" gap="2" className="min-w-0">
+              <Text className="shrink-0 text-[11px] text-gray-10">
+                {i === 0 ? "primary" : `#${i + 1}`}
+              </Text>
+              <Text className="truncate text-[12.5px] text-gray-12 [font-family:var(--font-mono)]">
+                {id}
+              </Text>
+            </Flex>
+            {m ? <CostInline m={m} /> : <Muted>not in catalog</Muted>}
+          </Flex>
+        );
+      })}
+    </Flex>
+  );
+}
+
+function ManualEditor({
+  models,
+  byId,
+  onChange,
+}: {
+  models: AgentModelEntry[];
+  byId: Map<string, ModelCatalogEntry>;
+  onChange: (next: AgentModelEntry[]) => void;
+}) {
+  const ids = models.map((m) => m.model);
+  const handleDragOver: DragDropEvents["dragover"] = (event) => {
+    const sourceId = event.operation.source?.id;
+    const targetId = event.operation.target?.id;
+    if (!sourceId || !targetId || sourceId === targetId) return;
+    const from = ids.indexOf(String(sourceId));
+    const to = ids.indexOf(String(targetId));
+    if (from === -1 || to === -1 || from === to) return;
+    const next = [...models];
+    const [moved] = next.splice(from, 1);
+    next.splice(to, 0, moved);
+    onChange(next);
+  };
+  return (
+    <Flex direction="column" gap="2">
+      <Muted>
+        <b className="text-gray-12">manual</b> pins an explicit fallback list
+        (primary first). Add models from the browser below; order them
+        provider-diverse so a single provider outage degrades instead of
+        failing.
+      </Muted>
+      <Subhead>models · priority order · drag to reorder</Subhead>
+      {models.length === 0 ? (
+        <Text className="rounded-(--radius-2) border border-(--gray-5) border-dashed px-3 py-3 text-[12px] text-gray-10">
+          No models yet — add one from the browser below.
+        </Text>
+      ) : (
+        <DragDropProvider
+          onDragOver={handleDragOver}
+          sensors={[
+            {
+              plugin: PointerSensor,
+              options: { activationConstraints: { distance: { value: 5 } } },
+            },
+          ]}
+        >
+          <Flex direction="column" gap="2">
+            {models.map((entry, i) => (
+              <SortableModelRow
+                key={entry.model}
+                id={entry.model}
+                index={i}
+                entry={entry}
+                m={byId.get(entry.model)}
+                onRemove={() => onChange(models.filter((_, k) => k !== i))}
+              />
+            ))}
+          </Flex>
+        </DragDropProvider>
+      )}
+    </Flex>
+  );
+}
+
+function SortableModelRow({
+  id,
+  index,
+  entry,
+  m,
+  onRemove,
+}: {
+  id: string;
+  index: number;
+  entry: AgentModelEntry;
+  m?: ModelCatalogEntry;
+  onRemove: () => void;
+}) {
+  const { ref, handleRef, isDragging } = useSortable({
+    id,
+    index,
+    group: "manual-models",
+    transition: { duration: 200, easing: "ease" },
+  });
+  return (
+    <div ref={ref} style={{ opacity: isDragging ? 0.5 : 1 }}>
+      <Flex
+        align="center"
+        justify="between"
+        gap="3"
+        className="rounded-(--radius-2) border border-border bg-(--gray-2) px-3 py-2"
+      >
+        <Flex align="center" gap="2" className="min-w-0">
+          <button
+            ref={handleRef as RefCallback<HTMLButtonElement>}
+            type="button"
+            title="Drag to reorder"
+            className="shrink-0 cursor-grab text-[13px] text-gray-9 leading-none hover:text-gray-11"
+          >
+            ⠿
+          </button>
+          <Text className="shrink-0 text-[11px] text-gray-10">
+            {index === 0 ? "primary" : `#${index + 1}`}
+          </Text>
+          <Text className="truncate text-[12.5px] text-gray-12 [font-family:var(--font-mono)]">
+            {entry.model}
+          </Text>
+        </Flex>
+        <Flex align="center" gap="2" className="shrink-0">
+          {m ? <CostInline m={m} /> : null}
+          <MiniBtn label="remove" title="Remove" onClick={onRemove} />
+        </Flex>
+      </Flex>
+    </div>
+  );
+}
+
+type SortKey = "name" | "cheapest" | "priciest";
+
+function ModelBrowser({
+  models,
+  canAdd,
+  selected,
+  onAdd,
+}: {
+  models: ModelCatalogEntry[];
+  canAdd: boolean;
+  selected: string[];
+  onAdd: (id: string) => void;
+}) {
+  const [q, setQ] = useState("");
+  const [sort, setSort] = useState<SortKey>("name");
+
+  const rows = useMemo(() => {
+    const needle = q.trim().toLowerCase();
+    const filtered = needle
+      ? models.filter(
+          (m) =>
+            m.model.toLowerCase().includes(needle) ||
+            m.provider.toLowerCase().includes(needle),
+        )
+      : models;
+    const sorted = [...filtered];
+    if (sort === "name") sorted.sort((a, b) => a.model.localeCompare(b.model));
+    if (sort === "cheapest") sorted.sort((a, b) => a.input - b.input);
+    if (sort === "priciest") sorted.sort((a, b) => b.input - a.input);
+    return sorted;
+  }, [models, q, sort]);
+
+  return (
+    <Flex direction="column" gap="2">
+      <Flex align="center" gap="2" wrap="wrap">
+        <div className="relative min-w-0 flex-1">
+          <MagnifyingGlassIcon
+            size={13}
+            className="-translate-y-1/2 pointer-events-none absolute top-1/2 left-2.5 text-gray-10"
+          />
+          <input
+            type="search"
+            value={q}
+            onChange={(e) => setQ(e.currentTarget.value)}
+            placeholder="Search models…"
+            aria-label="Search models"
+            className="h-8 w-full rounded-(--radius-2) border border-border bg-(--color-panel-solid) pr-2 pl-8 text-[12.5px]"
+          />
+        </div>
+        <Seg
+          value={sort}
+          onChange={(v) => setSort(v as SortKey)}
+          options={[
+            { value: "name", label: "Name" },
+            { value: "cheapest", label: "Cheapest" },
+            { value: "priciest", label: "Priciest" },
+          ]}
+        />
+      </Flex>
+
+      <Flex direction="column" gap="1">
+        {rows.map((m) => {
+          const added = selected.includes(m.model);
+          return (
+            <Flex
+              key={m.model}
+              direction="column"
+              gap="1"
+              className="rounded-(--radius-2) border border-border bg-(--gray-2) px-3 py-2"
+            >
+              <Flex align="center" justify="between" gap="2">
+                <Text className="truncate text-[12.5px] text-gray-12 [font-family:var(--font-mono)]">
+                  {m.model}
+                </Text>
+                {canAdd ? (
+                  <MiniBtn
+                    label={added ? "added" : "+ add"}
+                    title={added ? "Already in the list" : "Add to manual list"}
+                    onClick={() => onAdd(m.model)}
+                    disabled={added}
+                  />
+                ) : null}
+              </Flex>
+              <Flex align="center" gap="2" wrap="wrap">
+                <Badge color="gray">{m.provider}</Badge>
+                <Stat label="ctx" value={fmtCtx(m.context_window)} />
+                <Stat label="in" value={fmtUsd(m.input)} />
+                <Stat label="out" value={fmtUsd(m.output)} />
+                {m.cacheRead != null ? (
+                  <Stat label="cache" value={fmtUsd(m.cacheRead)} />
+                ) : null}
+              </Flex>
+            </Flex>
+          );
+        })}
+        {rows.length === 0 ? (
+          <Text className="px-1 py-2 text-[12px] text-gray-10">
+            No models match “{q}”.
+          </Text>
+        ) : null}
+      </Flex>
+    </Flex>
+  );
+}
+
+// --- small presentational helpers ---
+
+function CostInline({ m }: { m: ModelCatalogEntry }) {
+  return (
+    <Text className="shrink-0 text-[11px] text-gray-10">
+      in {fmtUsd(m.input)} · out {fmtUsd(m.output)}
+      <span className="text-gray-9"> /Mtok</span>
+    </Text>
+  );
+}
+
+function Stat({ label, value }: { label: string; value: string }) {
+  return (
+    <Text className="text-[11px] text-gray-10">
+      {label} <span className="text-gray-12">{value}</span>
+    </Text>
+  );
+}
+
+function Select({
+  label,
+  icon,
+  value,
+  onChange,
+  options,
+}: {
+  label: string;
+  icon?: ReactNode;
+  value: string;
+  onChange: (v: string) => void;
+  options: readonly { value: string; title: string; description: string }[];
+}) {
+  const [open, setOpen] = useState(false);
+  const current = options.find((o) => o.value === value) ?? options[0];
+  return (
+    <Flex direction="column" gap="1">
+      <Flex align="center" justify="between" gap="3">
+        <Flex align="center" gap="2" className="shrink-0 text-gray-10">
+          {icon}
+          <Text className="text-[11px] uppercase tracking-wide">{label}</Text>
+        </Flex>
+        <Popover.Root open={open} onOpenChange={setOpen}>
+          <Popover.Trigger>
+            <button
+              type="button"
+              className="inline-flex w-fit items-center gap-2 rounded-(--radius-2) border border-border bg-(--color-panel-solid) px-2.5 py-1.5 text-left hover:border-(--gray-7)"
+            >
+              <Text className="text-[12.5px] text-gray-12">
+                {current?.title}
+              </Text>
+              <CaretDownIcon size={12} className="shrink-0 text-gray-10" />
+            </button>
+          </Popover.Trigger>
+          <Popover.Content size="1" width="360px" className="p-0">
+            <ul className="max-h-72 divide-y divide-(--gray-4) overflow-auto">
+              {options.map((o) => (
+                <li key={o.value}>
+                  <button
+                    type="button"
+                    onClick={() => {
+                      onChange(o.value);
+                      setOpen(false);
+                    }}
+                    aria-current={o.value === value ? "true" : undefined}
+                    className={`flex w-full flex-col gap-0.5 px-3 py-2 text-left ${
+                      o.value === value
+                        ? "bg-(--accent-3)"
+                        : "hover:bg-(--gray-3)"
+                    }`}
+                  >
+                    <Text className="text-[12.5px] text-gray-12">
+                      {o.title}
+                    </Text>
+                    <Text className="text-[11px] text-gray-10 leading-snug">
+                      {o.description}
+                    </Text>
+                  </button>
+                </li>
+              ))}
+            </ul>
+          </Popover.Content>
+        </Popover.Root>
+      </Flex>
+      {current?.description ? (
+        <Text className="text-[11px] text-gray-10 leading-snug">
+          {current.description}
+        </Text>
+      ) : null}
+    </Flex>
+  );
+}
+
+function Seg<T extends string>({
+  value,
+  onChange,
+  options,
+}: {
+  value: T;
+  onChange: (v: T) => void;
+  options: { value: T; label: string }[];
+}) {
+  return (
+    <Flex gap="1" wrap="wrap">
+      {options.map((o) => (
+        <button
+          key={o.value}
+          type="button"
+          onClick={() => onChange(o.value)}
+          className={`rounded-full border px-3 py-1 text-[12px] capitalize ${
+            value === o.value
+              ? "border-(--accent-7) bg-(--accent-3) text-gray-12"
+              : "border-border bg-(--color-panel-solid) text-gray-11 hover:text-gray-12"
+          }`}
+        >
+          {o.label}
+        </button>
+      ))}
+    </Flex>
+  );
+}
+
+function MiniBtn({
+  label,
+  title,
+  onClick,
+  disabled,
+}: {
+  label: string;
+  title: string;
+  onClick: () => void;
+  disabled?: boolean;
+}) {
+  return (
+    <button
+      type="button"
+      title={title}
+      onClick={onClick}
+      disabled={disabled}
+      className="rounded-(--radius-1) border border-border px-2 py-0.5 text-[11px] text-gray-11 hover:text-gray-12 disabled:opacity-40"
+    >
+      {label}
+    </button>
+  );
+}
+
+function Subhead({ children }: { children: ReactNode }) {
+  return (
+    <Text className="block text-[11px] text-gray-10 uppercase tracking-wide [font-family:var(--font-mono)]">
+      {children}
+    </Text>
+  );
+}
+
+function Muted({ children }: { children: ReactNode }) {
+  return (
+    <Text className="text-[12px] text-gray-10 leading-snug">{children}</Text>
+  );
+}
+
+function fmtUsd(n: number): string {
+  return `$${n}`;
+}
+
+function fmtCtx(n: number): string {
+  if (n >= 1_000_000) {
+    return `${(n / 1_000_000).toFixed(1).replace(/\.0$/, "")}M`;
+  }
+  return `${Math.round(n / 1000)}K`;
+}
diff --git a/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx b/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx
index 2de536756..99fc3193d 100644
--- a/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx
+++ b/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx
@@ -19,6 +19,9 @@ interface SessionMetrics {
   messages: number;
   toolCalls: number;
   errors: number;
+  /** Distinct models that answered, in first-seen order. Usually one; more than
+   *  one means the turn(s) fell back across the policy list. */
+  models: string[];
 }
 
 function computeMetrics(
@@ -26,12 +29,14 @@ function computeMetrics(
 ): SessionMetrics {
   let toolCalls = 0;
   let errors = 0;
+  const models: string[] = [];
   for (const msg of session.conversation) {
     if (msg.role === "assistant") {
       for (const part of msg.content) {
         if (part.type === "toolCall") toolCalls += 1;
       }
       if (msg.errorMessage) errors += 1;
+      if (msg.model && !models.includes(msg.model)) models.push(msg.model);
     } else if (msg.role === "toolResult" && msg.isError) {
       errors += 1;
     }
@@ -40,6 +45,7 @@ function computeMetrics(
     messages: session.conversation_total_turns ?? session.conversation.length,
     toolCalls,
     errors,
+    models,
   };
 }
 
@@ -134,6 +140,18 @@ export function AgentSessionDetailBody({
                   label="Tool calls"
                   value={String(metrics.toolCalls)}
                 />
+                {metrics.models.length > 0 ? (
+                  <MetricItem
+                    label="Model"
+                    value={
+                      metrics.models.length === 1
+                        ? metrics.models[0]
+                        : `${metrics.models[0]} +${metrics.models.length - 1}`
+                    }
+                    title={metrics.models.join(", ")}
+                    mono
+                  />
+                ) : null}
                 <MetricItem
                   label="Cost"
                   value={formatSpendUsd(session.usage_total.cost_total)}
@@ -219,10 +237,14 @@ function MetricItem({
   label,
   value,
   tone,
+  mono,
+  title,
 }: {
   label: string;
   value: string;
   tone?: "bad";
+  mono?: boolean;
+  title?: string;
 }) {
   return (
     <Flex direction="column" gap="0.5">
@@ -230,9 +252,10 @@ function MetricItem({
         {label}
       </Text>
       <Text
+        title={title}
         className={`font-semibold text-[14px] leading-none ${
           tone === "bad" ? "text-(--red-11)" : "text-gray-12"
-        }`}
+        } ${mono ? "text-[12.5px] [font-family:var(--font-mono)]" : ""}`}
       >
         {value}
       </Text>
diff --git a/packages/ui/src/features/agent-applications/hooks/useModelCatalog.ts b/packages/ui/src/features/agent-applications/hooks/useModelCatalog.ts
new file mode 100644
index 000000000..f6d74fd59
--- /dev/null
+++ b/packages/ui/src/features/agent-applications/hooks/useModelCatalog.ts
@@ -0,0 +1,446 @@
+import type {
+  ModelCatalog,
+  ModelCatalogEntry,
+} from "@posthog/shared/agent-platform-types";
+
+/**
+ * The served-model catalog for the model browser + auto-level preview.
+ *
+ * STAND-IN: this is a snapshot of the live ai-gateway `/v1/models` catalog. When
+ * the catalog endpoint lands (e.g. `GET …/agent_applications/models`), replace
+ * the body with a real query — `ModelCatalog` already matches the wire shape, so
+ * nothing downstream changes.
+ */
+const MODELS: ModelCatalogEntry[] = [
+  {
+    model: "anthropic/claude-haiku-4.5",
+    provider: "anthropic",
+    context_window: 200000,
+    input: 1.0,
+    output: 5.0,
+    cacheRead: 0.1,
+    cacheWrite: 1.25,
+  },
+  {
+    model: "anthropic/claude-opus-4.1",
+    provider: "anthropic",
+    context_window: 200000,
+    input: 15.0,
+    output: 75.0,
+    cacheRead: 1.5,
+    cacheWrite: 18.75,
+  },
+  {
+    model: "anthropic/claude-opus-4.5",
+    provider: "anthropic",
+    context_window: 200000,
+    input: 5.0,
+    output: 25.0,
+    cacheRead: 0.5,
+    cacheWrite: 6.25,
+  },
+  {
+    model: "anthropic/claude-opus-4.6",
+    provider: "anthropic",
+    context_window: 1000000,
+    input: 5.0,
+    output: 25.0,
+    cacheRead: 0.5,
+    cacheWrite: 6.25,
+  },
+  {
+    model: "anthropic/claude-opus-4.7",
+    provider: "anthropic",
+    context_window: 1000000,
+    input: 5.0,
+    output: 25.0,
+    cacheRead: 0.5,
+    cacheWrite: 6.25,
+  },
+  {
+    model: "anthropic/claude-opus-4.8",
+    provider: "anthropic",
+    context_window: 1000000,
+    input: 5.0,
+    output: 25.0,
+    cacheRead: 0.5,
+    cacheWrite: 6.25,
+  },
+  {
+    model: "anthropic/claude-sonnet-4.5",
+    provider: "anthropic",
+    context_window: 1000000,
+    input: 3.0,
+    output: 15.0,
+    cacheRead: 0.3,
+    cacheWrite: 3.75,
+  },
+  {
+    model: "anthropic/claude-sonnet-4.6",
+    provider: "anthropic",
+    context_window: 1000000,
+    input: 3.0,
+    output: 15.0,
+    cacheRead: 0.3,
+    cacheWrite: 3.75,
+  },
+  {
+    model: "openai/gpt-3.5-turbo",
+    provider: "openai",
+    context_window: 16385,
+    input: 0.5,
+    output: 1.5,
+  },
+  {
+    model: "openai/gpt-3.5-turbo-16k",
+    provider: "openai",
+    context_window: 16385,
+    input: 3.0,
+    output: 4.0,
+  },
+  {
+    model: "openai/gpt-3.5-turbo-instruct",
+    provider: "openai",
+    context_window: 4095,
+    input: 1.5,
+    output: 2.0,
+  },
+  {
+    model: "openai/gpt-4",
+    provider: "openai",
+    context_window: 8191,
+    input: 30.0,
+    output: 60.0,
+  },
+  {
+    model: "openai/gpt-4-turbo",
+    provider: "openai",
+    context_window: 128000,
+    input: 10.0,
+    output: 30.0,
+  },
+  {
+    model: "openai/gpt-4.1",
+    provider: "openai",
+    context_window: 1047576,
+    input: 2.0,
+    output: 8.0,
+    cacheRead: 0.5,
+  },
+  {
+    model: "openai/gpt-4.1-mini",
+    provider: "openai",
+    context_window: 1047576,
+    input: 0.4,
+    output: 1.6,
+    cacheRead: 0.1,
+  },
+  {
+    model: "openai/gpt-4.1-nano",
+    provider: "openai",
+    context_window: 1047576,
+    input: 0.1,
+    output: 0.4,
+    cacheRead: 0.025,
+  },
+  {
+    model: "openai/gpt-4o",
+    provider: "openai",
+    context_window: 128000,
+    input: 2.5,
+    output: 10.0,
+    cacheRead: 1.25,
+  },
+  {
+    model: "openai/gpt-4o-2024-05-13",
+    provider: "openai",
+    context_window: 128000,
+    input: 5.0,
+    output: 15.0,
+  },
+  {
+    model: "openai/gpt-4o-2024-08-06",
+    provider: "openai",
+    context_window: 128000,
+    input: 2.5,
+    output: 10.0,
+    cacheRead: 1.25,
+  },
+  {
+    model: "openai/gpt-4o-2024-11-20",
+    provider: "openai",
+    context_window: 128000,
+    input: 2.5,
+    output: 10.0,
+    cacheRead: 1.25,
+  },
+  {
+    model: "openai/gpt-4o-mini",
+    provider: "openai",
+    context_window: 128000,
+    input: 0.15,
+    output: 0.6,
+    cacheRead: 0.075,
+  },
+  {
+    model: "openai/gpt-4o-mini-2024-07-18",
+    provider: "openai",
+    context_window: 128000,
+    input: 0.15,
+    output: 0.6,
+    cacheRead: 0.075,
+  },
+  {
+    model: "openai/gpt-4o-mini-search-preview",
+    provider: "openai",
+    context_window: 128000,
+    input: 0.15,
+    output: 0.6,
+  },
+  {
+    model: "openai/gpt-4o-search-preview",
+    provider: "openai",
+    context_window: 128000,
+    input: 2.5,
+    output: 10.0,
+  },
+  {
+    model: "openai/gpt-5",
+    provider: "openai",
+    context_window: 400000,
+    input: 1.25,
+    output: 10.0,
+    cacheRead: 0.125,
+  },
+  {
+    model: "openai/gpt-5-codex",
+    provider: "openai",
+    context_window: 400000,
+    input: 1.25,
+    output: 10.0,
+    cacheRead: 0.125,
+  },
+  {
+    model: "openai/gpt-5-mini",
+    provider: "openai",
+    context_window: 400000,
+    input: 0.25,
+    output: 2.0,
+    cacheRead: 0.025,
+  },
+  {
+    model: "openai/gpt-5-nano",
+    provider: "openai",
+    context_window: 400000,
+    input: 0.05,
+    output: 0.4,
+    cacheRead: 0.005,
+  },
+  {
+    model: "openai/gpt-5-pro",
+    provider: "openai",
+    context_window: 400000,
+    input: 15.0,
+    output: 120.0,
+  },
+  {
+    model: "openai/gpt-5.1",
+    provider: "openai",
+    context_window: 400000,
+    input: 1.25,
+    output: 10.0,
+    cacheRead: 0.125,
+  },
+  {
+    model: "openai/gpt-5.1-codex",
+    provider: "openai",
+    context_window: 400000,
+    input: 1.25,
+    output: 10.0,
+    cacheRead: 0.125,
+  },
+  {
+    model: "openai/gpt-5.1-codex-max",
+    provider: "openai",
+    context_window: 400000,
+    input: 1.25,
+    output: 10.0,
+    cacheRead: 0.125,
+  },
+  {
+    model: "openai/gpt-5.1-codex-mini",
+    provider: "openai",
+    context_window: 400000,
+    input: 0.25,
+    output: 2.0,
+    cacheRead: 0.025,
+  },
+  {
+    model: "openai/gpt-5.2",
+    provider: "openai",
+    context_window: 400000,
+    input: 1.75,
+    output: 14.0,
+    cacheRead: 0.175,
+  },
+  {
+    model: "openai/gpt-5.2-codex",
+    provider: "openai",
+    context_window: 400000,
+    input: 1.75,
+    output: 14.0,
+    cacheRead: 0.175,
+  },
+  {
+    model: "openai/gpt-5.2-pro",
+    provider: "openai",
+    context_window: 400000,
+    input: 21.0,
+    output: 168.0,
+  },
+  {
+    model: "openai/gpt-5.3-codex",
+    provider: "openai",
+    context_window: 400000,
+    input: 1.75,
+    output: 14.0,
+    cacheRead: 0.175,
+  },
+  {
+    model: "openai/gpt-5.4",
+    provider: "openai",
+    context_window: 1050000,
+    input: 2.5,
+    output: 15.0,
+    cacheRead: 0.25,
+  },
+  {
+    model: "openai/gpt-5.4-mini",
+    provider: "openai",
+    context_window: 400000,
+    input: 0.75,
+    output: 4.5,
+    cacheRead: 0.075,
+  },
+  {
+    model: "openai/gpt-5.4-nano",
+    provider: "openai",
+    context_window: 400000,
+    input: 0.2,
+    output: 1.25,
+    cacheRead: 0.02,
+  },
+  {
+    model: "openai/gpt-5.4-pro",
+    provider: "openai",
+    context_window: 1050000,
+    input: 30.0,
+    output: 180.0,
+  },
+  {
+    model: "openai/gpt-5.5",
+    provider: "openai",
+    context_window: 1050000,
+    input: 5.0,
+    output: 30.0,
+    cacheRead: 0.5,
+  },
+  {
+    model: "openai/gpt-5.5-pro",
+    provider: "openai",
+    context_window: 1050000,
+    input: 30.0,
+    output: 180.0,
+  },
+  {
+    model: "openai/gpt-audio",
+    provider: "openai",
+    context_window: 128000,
+    input: 2.5,
+    output: 10.0,
+  },
+  {
+    model: "openai/gpt-audio-mini",
+    provider: "openai",
+    context_window: 128000,
+    input: 0.6,
+    output: 2.4,
+  },
+  {
+    model: "openai/o1",
+    provider: "openai",
+    context_window: 200000,
+    input: 15.0,
+    output: 60.0,
+    cacheRead: 7.5,
+  },
+  {
+    model: "openai/o1-pro",
+    provider: "openai",
+    context_window: 200000,
+    input: 150.0,
+    output: 600.0,
+  },
+  {
+    model: "openai/o3",
+    provider: "openai",
+    context_window: 200000,
+    input: 2.0,
+    output: 8.0,
+    cacheRead: 0.5,
+  },
+  {
+    model: "openai/o3-deep-research",
+    provider: "openai",
+    context_window: 200000,
+    input: 10.0,
+    output: 40.0,
+    cacheRead: 2.5,
+  },
+  {
+    model: "openai/o3-mini",
+    provider: "openai",
+    context_window: 200000,
+    input: 1.1,
+    output: 4.4,
+    cacheRead: 0.55,
+  },
+  {
+    model: "openai/o3-pro",
+    provider: "openai",
+    context_window: 200000,
+    input: 20.0,
+    output: 80.0,
+  },
+  {
+    model: "openai/o4-mini",
+    provider: "openai",
+    context_window: 200000,
+    input: 1.1,
+    output: 4.4,
+    cacheRead: 0.275,
+  },
+  {
+    model: "openai/o4-mini-deep-research",
+    provider: "openai",
+    context_window: 200000,
+    input: 2.0,
+    output: 8.0,
+    cacheRead: 0.5,
+  },
+];
+
+// Curated auto levels → canonical ids in priority order. Mirrors the backend's
+// MODEL_POLICY_LEVELS (resolved to the catalog's canonical ids).
+const LEVELS: ModelCatalog["levels"] = {
+  low: ["anthropic/claude-haiku-4.5", "openai/gpt-5-mini"],
+  medium: ["anthropic/claude-sonnet-4.6", "openai/gpt-5"],
+  high: ["anthropic/claude-opus-4.7", "openai/gpt-5-pro"],
+};
+
+export function useModelCatalog(): {
+  catalog: ModelCatalog;
+  isLoading: boolean;
+} {
+  return { catalog: { models: MODELS, levels: LEVELS }, isLoading: false };
+}

From 49838643b245ca038c080ddc33ab118caae91c86 Mon Sep 17 00:00:00 2001
From: Ben White <ben@posthog.com>
Date: Wed, 24 Jun 2026 16:40:46 +0200
Subject: [PATCH 2/9] feat(agent-applications): save model policy + live
 catalog endpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make the model picker functional: save edits and read the real catalog.

- api-client: updateAgentRevisionSpec (draft-only PATCH) + getAgentModelCatalog.
- useApplyAgentSpec: "create draft and apply changes" — PATCH a draft in place,
  else clone the revision to a fresh draft, apply, and select it. Freeze/promote
  stay on the existing lifecycle buttons.
- AgentModelConfig: Save / Reset bar (auto-branches a draft on non-draft
  revisions); threaded through the config pane.
- useModelCatalog: read GET …/agent_applications/models/ (drops the stand-in
  snapshot; small levels fallback while loading).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 packages/api-client/src/posthog-client.ts     |  32 ++
 .../components/AgentConfigurationPane.tsx     |  23 +-
 .../components/AgentModelConfig.tsx           |  82 +++-
 .../hooks/useApplyAgentSpec.ts                |  57 +++
 .../hooks/useModelCatalog.ts                  | 462 +-----------------
 5 files changed, 207 insertions(+), 449 deletions(-)
 create mode 100644 packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts

diff --git a/packages/api-client/src/posthog-client.ts b/packages/api-client/src/posthog-client.ts
index 3573de35b..5c4b8d89b 100644
--- a/packages/api-client/src/posthog-client.ts
+++ b/packages/api-client/src/posthog-client.ts
@@ -32,9 +32,11 @@ import type {
   AgentSessionLogsParams,
   AgentSessionsListParams,
   AgentSlackManifest,
+  AgentSpec,
   AgentUsersListResponse,
   BundleFile,
   DecideApprovalRequest,
+  ModelCatalog,
 } from "@posthog/shared/agent-platform-types";
 import type {
   ActionabilityJudgmentArtefact,
@@ -4661,6 +4663,36 @@ export class PostHogAPIClient {
     return (await response.json()) as AgentRevision;
   }
 
+  /** The served-model catalog + curated auto-level → model map (project-agnostic;
+   * proxies the AI gateway catalog). Powers the config-pane model browser. */
+  async getAgentModelCatalog(): Promise<ModelCatalog> {
+    const teamId = await this.getTeamId();
+    const path = `${this.agentApplicationsPath(teamId)}models/`;
+    const url = new URL(`${this.api.baseUrl}${path}`);
+    const response = await this.api.fetcher.fetch({ method: "get", url, path });
+    return (await response.json()) as ModelCatalog;
+  }
+
+  /** Update a draft revision's spec (PATCH). Draft-only on the server — a
+   * ready/live spec is frozen. Replaces `spec` wholesale, so callers send the
+   * full updated spec. Returns the updated revision. */
+  async updateAgentRevisionSpec(
+    idOrSlug: string,
+    revisionId: string,
+    spec: AgentSpec,
+  ): Promise<AgentRevision> {
+    const teamId = await this.getTeamId();
+    const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/revisions/${encodeURIComponent(revisionId)}/`;
+    const url = new URL(`${this.api.baseUrl}${path}`);
+    const response = await this.api.fetcher.fetch({
+      method: "patch",
+      url,
+      path,
+      overrides: { body: JSON.stringify({ spec }) },
+    });
+    return (await response.json()) as AgentRevision;
+  }
+
   /** Run a revision lifecycle transition: freeze (draft→ready), promote
    * (ready→live, demoting the old live), or archive. Returns the updated revision. */
   async transitionAgentRevision(
diff --git a/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx b/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx
index 4df298119..ba0031e55 100644
--- a/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx
+++ b/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx
@@ -21,6 +21,7 @@ import {
   WrenchIcon,
 } from "@phosphor-icons/react";
 import type {
+  AgentRevisionState,
   AgentSpec,
   BundleFile,
 } from "@posthog/shared/agent-platform-types";
@@ -63,9 +64,15 @@ const USAGE_HOST = "https://<ingress-host>";
 interface Ctx {
   idOrSlug: string;
   revisionId: string;
+  /** Application UUID — needed to branch a new draft on save. */
+  applicationId?: string;
+  /** State of the viewed revision — drives draft-only edit vs auto-clone. */
+  revisionState?: AgentRevisionState;
   ingressBaseUrl?: string;
   setKeys: string[];
   onSelect: (node: string) => void;
+  /** Select a revision in the picker (used to jump to a freshly branched draft). */
+  onSelectRevision?: (revisionId: string) => void;
   onOpenSession?: (sessionId: string) => void;
 }
 
@@ -399,9 +406,12 @@ export function AgentConfigurationPane({
     ? {
         idOrSlug,
         revisionId,
+        applicationId: application?.id,
+        revisionState: revision?.state,
         ingressBaseUrl: application?.ingress_base_url ?? undefined,
         setKeys,
         onSelect: onSelectNode,
+        onSelectRevision,
         onOpenSession,
       }
     : null;
@@ -615,7 +625,7 @@ function DetailBody({
 }) {
   switch (section) {
     case "model":
-      return <ModelBody spec={spec} />;
+      return <ModelBody key={ctx.revisionId} spec={spec} ctx={ctx} />;
     case "instructions":
       return (
         <BundleFileBody
@@ -694,10 +704,17 @@ function byPath(files: BundleFile[], path: string): BundleFile | undefined {
   return files.find((f) => f.path === path);
 }
 
-function ModelBody({ spec }: { spec: AgentSpec }) {
+function ModelBody({ spec, ctx }: { spec: AgentSpec; ctx: Ctx }) {
   return (
     <Flex direction="column" gap="4">
-      <AgentModelConfig spec={spec} />
+      <AgentModelConfig
+        spec={spec}
+        idOrSlug={ctx.idOrSlug}
+        applicationId={ctx.applicationId}
+        revisionId={ctx.revisionId}
+        revisionState={ctx.revisionState}
+        onSelectRevision={ctx.onSelectRevision}
+      />
       {spec.entrypoint ? (
         <Row label="entrypoint" value={spec.entrypoint} mono />
       ) : null}
diff --git a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
index 578312834..87595ee0c 100644
--- a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
+++ b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
@@ -13,12 +13,15 @@ import type {
   AgentModelLevel,
   AgentModelPolicy,
   AgentReasoningEffort,
+  AgentRevisionState,
   AgentSpec,
   ModelCatalogEntry,
 } from "@posthog/shared/agent-platform-types";
 import { Badge } from "@posthog/ui/primitives/Badge";
+import { Button } from "@posthog/ui/primitives/Button";
 import { Flex, Popover, Text } from "@radix-ui/themes";
 import { type ReactNode, type RefCallback, useMemo, useState } from "react";
+import { useApplyAgentSpec } from "../hooks/useApplyAgentSpec";
 import { useModelCatalog } from "../hooks/useModelCatalog";
 
 /**
@@ -30,8 +33,23 @@ import { useModelCatalog } from "../hooks/useModelCatalog";
  * the UX. The catalog comes from `useModelCatalog` (a stand-in for the
  * model-info endpoint).
  */
-export function AgentModelConfig({ spec }: { spec: AgentSpec }) {
+export function AgentModelConfig({
+  spec,
+  idOrSlug,
+  applicationId,
+  revisionId,
+  revisionState,
+  onSelectRevision,
+}: {
+  spec: AgentSpec;
+  idOrSlug: string;
+  applicationId?: string;
+  revisionId: string;
+  revisionState?: AgentRevisionState;
+  onSelectRevision?: (revisionId: string) => void;
+}) {
   const { catalog } = useModelCatalog();
+  const apply = useApplyAgentSpec(idOrSlug, applicationId);
   const initial = spec.model_policy;
 
   const [mode, setMode] = useState<"auto" | "manual">(initial?.mode ?? "auto");
@@ -53,14 +71,67 @@ export function AgentModelConfig({ spec }: { spec: AgentSpec }) {
   const dirty =
     JSON.stringify(policy) !==
     JSON.stringify(initial ?? { mode: "auto", level: "medium" });
+  const willBranch = revisionState !== "draft";
 
   const byId = useMemo(
     () => new Map(catalog.models.map((m) => [m.model, m])),
     [catalog.models],
   );
 
+  function reset() {
+    setMode(initial?.mode ?? "auto");
+    setLevel(initial?.mode === "auto" ? (initial.level ?? "medium") : "medium");
+    setReasoning(initial?.mode === "auto" ? initial.reasoning : spec.reasoning);
+    setManual(initial?.mode === "manual" ? initial.models : []);
+  }
+
+  function save() {
+    apply.mutate(
+      {
+        revision: { id: revisionId, state: revisionState ?? "draft" },
+        spec: { ...spec, model_policy: policy },
+      },
+      { onSuccess: (rev) => onSelectRevision?.(rev.id) },
+    );
+  }
+
   return (
     <Flex direction="column" gap="4">
+      {dirty ? (
+        <Flex
+          direction="column"
+          gap="1.5"
+          className="rounded-(--radius-2) border border-(--amber-6) bg-(--amber-2) px-3 py-2"
+        >
+          <Flex align="center" justify="between" gap="2">
+            <Text className="text-[12px] text-amber-11">
+              {willBranch
+                ? "Unsaved changes — saving branches a new draft."
+                : "Unsaved changes."}
+            </Text>
+            <Flex gap="2" className="shrink-0">
+              <Button
+                size="1"
+                variant="soft"
+                color="gray"
+                disabled={apply.isPending}
+                onClick={reset}
+              >
+                Reset
+              </Button>
+              <Button size="1" loading={apply.isPending} onClick={save}>
+                {willBranch ? "Save to new draft" : "Save"}
+              </Button>
+            </Flex>
+          </Flex>
+          {apply.isError ? (
+            <Text className="text-(--red-11) text-[11px]">
+              {apply.error?.message ?? "Save failed"}
+            </Text>
+          ) : null}
+        </Flex>
+      ) : null}
+
       <Flex direction="column" gap="3">
         <Select
           label="mode"
@@ -104,14 +175,7 @@ export function AgentModelConfig({ spec }: { spec: AgentSpec }) {
         <ManualEditor models={manual} byId={byId} onChange={setManual} />
       )}
 
-      <Flex align="center" justify="between" gap="2" wrap="wrap">
-        <Subhead>browse all models · {catalog.models.length}</Subhead>
-        {dirty ? (
-          <Text className="text-[11px] text-amber-11">
-            preview — not saved yet
-          </Text>
-        ) : null}
-      </Flex>
+      <Subhead>browse all models · {catalog.models.length}</Subhead>
       <ModelBrowser
         models={catalog.models}
         canAdd={mode === "manual"}
diff --git a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts
new file mode 100644
index 000000000..32103bacb
--- /dev/null
+++ b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts
@@ -0,0 +1,57 @@
+import type {
+  AgentRevision,
+  AgentRevisionState,
+  AgentSpec,
+} from "@posthog/shared/agent-platform-types";
+import { useAuthenticatedClient } from "@posthog/ui/features/auth/authClient";
+import { useMutation, useQueryClient } from "@tanstack/react-query";
+import { useAuthStateValue } from "../../auth/store";
+import { agentApplicationsKeys } from "./agentApplicationsKeys";
+
+/**
+ * Apply a spec change ("create draft and apply changes"): if the target
+ * revision is already a draft, PATCH its spec in place; otherwise clone it to a
+ * fresh draft first and PATCH that. Freeze/promote stay separate (the revision
+ * bar's lifecycle buttons) — this only lands the edit on an editable draft.
+ *
+ * Returns the revision the change landed on so the caller can select it (it's a
+ * new draft whenever the source wasn't a draft).
+ */
+export function useApplyAgentSpec(
+  idOrSlug: string,
+  applicationId: string | undefined,
+) {
+  const client = useAuthenticatedClient();
+  const queryClient = useQueryClient();
+  const projectId = useAuthStateValue((state) => state.currentProjectId);
+
+  return useMutation<
+    AgentRevision,
+    Error,
+    { revision: { id: string; state: AgentRevisionState }; spec: AgentSpec }
+  >({
+    mutationFn: async ({ revision, spec }) => {
+      let targetId = revision.id;
+      if (revision.state !== "draft") {
+        if (!applicationId) {
+          throw new Error("Application not loaded yet");
+        }
+        const draft = await client.createAgentDraftRevisionFrom(
+          applicationId,
+          revision.id,
+        );
+        targetId = draft.id;
+      }
+      return client.updateAgentRevisionSpec(idOrSlug, targetId, spec);
+    },
+    onSuccess: () => {
+      for (const key of [
+        agentApplicationsKeys.detail(projectId, idOrSlug),
+        agentApplicationsKeys.revisions(projectId, idOrSlug),
+        ["agent-applications", "revision", projectId, idOrSlug],
+      ]) {
+        void queryClient.invalidateQueries({ queryKey: key });
+      }
+    },
+  });
+}
diff --git a/packages/ui/src/features/agent-applications/hooks/useModelCatalog.ts b/packages/ui/src/features/agent-applications/hooks/useModelCatalog.ts
index f6d74fd59..c805c4725 100644
--- a/packages/ui/src/features/agent-applications/hooks/useModelCatalog.ts
+++ b/packages/ui/src/features/agent-applications/hooks/useModelCatalog.ts
@@ -1,446 +1,34 @@
-import type {
-  ModelCatalog,
-  ModelCatalogEntry,
-} from "@posthog/shared/agent-platform-types";
+import type { ModelCatalog } from "@posthog/shared/agent-platform-types";
+import { useAuthenticatedQuery } from "@posthog/ui/hooks/useAuthenticatedQuery";
+import { useAuthStateValue } from "../../auth/store";
 
-/**
- * The served-model catalog for the model browser + auto-level preview.
- *
- * STAND-IN: this is a snapshot of the live ai-gateway `/v1/models` catalog. When
- * the catalog endpoint lands (e.g. `GET …/agent_applications/models`), replace
- * the body with a real query — `ModelCatalog` already matches the wire shape, so
- * nothing downstream changes.
- */
-const MODELS: ModelCatalogEntry[] = [
-  {
-    model: "anthropic/claude-haiku-4.5",
-    provider: "anthropic",
-    context_window: 200000,
-    input: 1.0,
-    output: 5.0,
-    cacheRead: 0.1,
-    cacheWrite: 1.25,
-  },
-  {
-    model: "anthropic/claude-opus-4.1",
-    provider: "anthropic",
-    context_window: 200000,
-    input: 15.0,
-    output: 75.0,
-    cacheRead: 1.5,
-    cacheWrite: 18.75,
-  },
-  {
-    model: "anthropic/claude-opus-4.5",
-    provider: "anthropic",
-    context_window: 200000,
-    input: 5.0,
-    output: 25.0,
-    cacheRead: 0.5,
-    cacheWrite: 6.25,
-  },
-  {
-    model: "anthropic/claude-opus-4.6",
-    provider: "anthropic",
-    context_window: 1000000,
-    input: 5.0,
-    output: 25.0,
-    cacheRead: 0.5,
-    cacheWrite: 6.25,
-  },
-  {
-    model: "anthropic/claude-opus-4.7",
-    provider: "anthropic",
-    context_window: 1000000,
-    input: 5.0,
-    output: 25.0,
-    cacheRead: 0.5,
-    cacheWrite: 6.25,
-  },
-  {
-    model: "anthropic/claude-opus-4.8",
-    provider: "anthropic",
-    context_window: 1000000,
-    input: 5.0,
-    output: 25.0,
-    cacheRead: 0.5,
-    cacheWrite: 6.25,
-  },
-  {
-    model: "anthropic/claude-sonnet-4.5",
-    provider: "anthropic",
-    context_window: 1000000,
-    input: 3.0,
-    output: 15.0,
-    cacheRead: 0.3,
-    cacheWrite: 3.75,
-  },
-  {
-    model: "anthropic/claude-sonnet-4.6",
-    provider: "anthropic",
-    context_window: 1000000,
-    input: 3.0,
-    output: 15.0,
-    cacheRead: 0.3,
-    cacheWrite: 3.75,
-  },
-  {
-    model: "openai/gpt-3.5-turbo",
-    provider: "openai",
-    context_window: 16385,
-    input: 0.5,
-    output: 1.5,
-  },
-  {
-    model: "openai/gpt-3.5-turbo-16k",
-    provider: "openai",
-    context_window: 16385,
-    input: 3.0,
-    output: 4.0,
-  },
-  {
-    model: "openai/gpt-3.5-turbo-instruct",
-    provider: "openai",
-    context_window: 4095,
-    input: 1.5,
-    output: 2.0,
-  },
-  {
-    model: "openai/gpt-4",
-    provider: "openai",
-    context_window: 8191,
-    input: 30.0,
-    output: 60.0,
-  },
-  {
-    model: "openai/gpt-4-turbo",
-    provider: "openai",
-    context_window: 128000,
-    input: 10.0,
-    output: 30.0,
-  },
-  {
-    model: "openai/gpt-4.1",
-    provider: "openai",
-    context_window: 1047576,
-    input: 2.0,
-    output: 8.0,
-    cacheRead: 0.5,
-  },
-  {
-    model: "openai/gpt-4.1-mini",
-    provider: "openai",
-    context_window: 1047576,
-    input: 0.4,
-    output: 1.6,
-    cacheRead: 0.1,
-  },
-  {
-    model: "openai/gpt-4.1-nano",
-    provider: "openai",
-    context_window: 1047576,
-    input: 0.1,
-    output: 0.4,
-    cacheRead: 0.025,
-  },
-  {
-    model: "openai/gpt-4o",
-    provider: "openai",
-    context_window: 128000,
-    input: 2.5,
-    output: 10.0,
-    cacheRead: 1.25,
-  },
-  {
-    model: "openai/gpt-4o-2024-05-13",
-    provider: "openai",
-    context_window: 128000,
-    input: 5.0,
-    output: 15.0,
-  },
-  {
-    model: "openai/gpt-4o-2024-08-06",
-    provider: "openai",
-    context_window: 128000,
-    input: 2.5,
-    output: 10.0,
-    cacheRead: 1.25,
-  },
-  {
-    model: "openai/gpt-4o-2024-11-20",
-    provider: "openai",
-    context_window: 128000,
-    input: 2.5,
-    output: 10.0,
-    cacheRead: 1.25,
-  },
-  {
-    model: "openai/gpt-4o-mini",
-    provider: "openai",
-    context_window: 128000,
-    input: 0.15,
-    output: 0.6,
-    cacheRead: 0.075,
-  },
-  {
-    model: "openai/gpt-4o-mini-2024-07-18",
-    provider: "openai",
-    context_window: 128000,
-    input: 0.15,
-    output: 0.6,
-    cacheRead: 0.075,
-  },
-  {
-    model: "openai/gpt-4o-mini-search-preview",
-    provider: "openai",
-    context_window: 128000,
-    input: 0.15,
-    output: 0.6,
-  },
-  {
-    model: "openai/gpt-4o-search-preview",
-    provider: "openai",
-    context_window: 128000,
-    input: 2.5,
-    output: 10.0,
-  },
-  {
-    model: "openai/gpt-5",
-    provider: "openai",
-    context_window: 400000,
-    input: 1.25,
-    output: 10.0,
-    cacheRead: 0.125,
-  },
-  {
-    model: "openai/gpt-5-codex",
-    provider: "openai",
-    context_window: 400000,
-    input: 1.25,
-    output: 10.0,
-    cacheRead: 0.125,
-  },
-  {
-    model: "openai/gpt-5-mini",
-    provider: "openai",
-    context_window: 400000,
-    input: 0.25,
-    output: 2.0,
-    cacheRead: 0.025,
-  },
-  {
-    model: "openai/gpt-5-nano",
-    provider: "openai",
-    context_window: 400000,
-    input: 0.05,
-    output: 0.4,
-    cacheRead: 0.005,
+// Levels rarely change and the auto-level preview needs them even while the
+// catalog request is in flight; the authoritative values still come from the
+// endpoint. Models are left empty until the fetch resolves.
+const FALLBACK: ModelCatalog = {
+  models: [],
+  levels: {
+    low: ["anthropic/claude-haiku-4.5", "openai/gpt-5-mini"],
+    medium: ["anthropic/claude-sonnet-4.6", "openai/gpt-5"],
+    high: ["anthropic/claude-opus-4.7", "openai/gpt-5-pro"],
   },
-  {
-    model: "openai/gpt-5-pro",
-    provider: "openai",
-    context_window: 400000,
-    input: 15.0,
-    output: 120.0,
-  },
-  {
-    model: "openai/gpt-5.1",
-    provider: "openai",
-    context_window: 400000,
-    input: 1.25,
-    output: 10.0,
-    cacheRead: 0.125,
-  },
-  {
-    model: "openai/gpt-5.1-codex",
-    provider: "openai",
-    context_window: 400000,
-    input: 1.25,
-    output: 10.0,
-    cacheRead: 0.125,
-  },
-  {
-    model: "openai/gpt-5.1-codex-max",
-    provider: "openai",
-    context_window: 400000,
-    input: 1.25,
-    output: 10.0,
-    cacheRead: 0.125,
-  },
-  {
-    model: "openai/gpt-5.1-codex-mini",
-    provider: "openai",
-    context_window: 400000,
-    input: 0.25,
-    output: 2.0,
-    cacheRead: 0.025,
-  },
-  {
-    model: "openai/gpt-5.2",
-    provider: "openai",
-    context_window: 400000,
-    input: 1.75,
-    output: 14.0,
-    cacheRead: 0.175,
-  },
-  {
-    model: "openai/gpt-5.2-codex",
-    provider: "openai",
-    context_window: 400000,
-    input: 1.75,
-    output: 14.0,
-    cacheRead: 0.175,
-  },
-  {
-    model: "openai/gpt-5.2-pro",
-    provider: "openai",
-    context_window: 400000,
-    input: 21.0,
-    output: 168.0,
-  },
-  {
-    model: "openai/gpt-5.3-codex",
-    provider: "openai",
-    context_window: 400000,
-    input: 1.75,
-    output: 14.0,
-    cacheRead: 0.175,
-  },
-  {
-    model: "openai/gpt-5.4",
-    provider: "openai",
-    context_window: 1050000,
-    input: 2.5,
-    output: 15.0,
-    cacheRead: 0.25,
-  },
-  {
-    model: "openai/gpt-5.4-mini",
-    provider: "openai",
-    context_window: 400000,
-    input: 0.75,
-    output: 4.5,
-    cacheRead: 0.075,
-  },
-  {
-    model: "openai/gpt-5.4-nano",
-    provider: "openai",
-    context_window: 400000,
-    input: 0.2,
-    output: 1.25,
-    cacheRead: 0.02,
-  },
-  {
-    model: "openai/gpt-5.4-pro",
-    provider: "openai",
-    context_window: 1050000,
-    input: 30.0,
-    output: 180.0,
-  },
-  {
-    model: "openai/gpt-5.5",
-    provider: "openai",
-    context_window: 1050000,
-    input: 5.0,
-    output: 30.0,
-    cacheRead: 0.5,
-  },
-  {
-    model: "openai/gpt-5.5-pro",
-    provider: "openai",
-    context_window: 1050000,
-    input: 30.0,
-    output: 180.0,
-  },
-  {
-    model: "openai/gpt-audio",
-    provider: "openai",
-    context_window: 128000,
-    input: 2.5,
-    output: 10.0,
-  },
-  {
-    model: "openai/gpt-audio-mini",
-    provider: "openai",
-    context_window: 128000,
-    input: 0.6,
-    output: 2.4,
-  },
-  {
-    model: "openai/o1",
-    provider: "openai",
-    context_window: 200000,
-    input: 15.0,
-    output: 60.0,
-    cacheRead: 7.5,
-  },
-  {
-    model: "openai/o1-pro",
-    provider: "openai",
-    context_window: 200000,
-    input: 150.0,
-    output: 600.0,
-  },
-  {
-    model: "openai/o3",
-    provider: "openai",
-    context_window: 200000,
-    input: 2.0,
-    output: 8.0,
-    cacheRead: 0.5,
-  },
-  {
-    model: "openai/o3-deep-research",
-    provider: "openai",
-    context_window: 200000,
-    input: 10.0,
-    output: 40.0,
-    cacheRead: 2.5,
-  },
-  {
-    model: "openai/o3-mini",
-    provider: "openai",
-    context_window: 200000,
-    input: 1.1,
-    output: 4.4,
-    cacheRead: 0.55,
-  },
-  {
-    model: "openai/o3-pro",
-    provider: "openai",
-    context_window: 200000,
-    input: 20.0,
-    output: 80.0,
-  },
-  {
-    model: "openai/o4-mini",
-    provider: "openai",
-    context_window: 200000,
-    input: 1.1,
-    output: 4.4,
-    cacheRead: 0.275,
-  },
-  {
-    model: "openai/o4-mini-deep-research",
-    provider: "openai",
-    context_window: 200000,
-    input: 2.0,
-    output: 8.0,
-    cacheRead: 0.5,
-  },
-];
-
-// Curated auto levels → canonical ids in priority order. Mirrors the backend's
-// MODEL_POLICY_LEVELS (resolved to the catalog's canonical ids).
-const LEVELS: ModelCatalog["levels"] = {
-  low: ["anthropic/claude-haiku-4.5", "openai/gpt-5-mini"],
-  medium: ["anthropic/claude-sonnet-4.6", "openai/gpt-5"],
-  high: ["anthropic/claude-opus-4.7", "openai/gpt-5-pro"],
 };
 
+/**
+ * The served-model catalog + curated auto-level → model map, from
+ * `GET …/agent_applications/models/` (which proxies the AI gateway catalog).
+ * Feeds the model browser and the auto-level preview. Falls back to an empty
+ * catalog (with the known levels) while loading or if the endpoint is down.
+ */
 export function useModelCatalog(): {
   catalog: ModelCatalog;
   isLoading: boolean;
 } {
-  return { catalog: { models: MODELS, levels: LEVELS }, isLoading: false };
+  const projectId = useAuthStateValue((state) => state.currentProjectId);
+  const { data, isLoading } = useAuthenticatedQuery<ModelCatalog>(
+    ["agent-applications", "model-catalog", projectId],
+    (client) => client.getAgentModelCatalog(),
+    { enabled: !!projectId, staleTime: 5 * 60_000 },
+  );
+  return { catalog: data ?? FALLBACK, isLoading };
 }

From dab0ee8bb4628589c43a75f5ec3d92e1652f6101 Mon Sep 17 00:00:00 2001
From: Ben White <ben@posthog.com>
Date: Wed, 24 Jun 2026 17:32:12 +0200
Subject: [PATCH 3/9] fix(agent-applications): unwrap new_draft envelope; tests
 for save + catalog
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- createAgentDraftRevisionFrom: new_draft returns `{ revision, source_revision_id }`,
  not a flat revision — returning the wrapper left `.id` undefined and 404'd the
  follow-up spec PATCH (also affected "Clone to draft"). Unwrap it.
- tests: api-client (unwrap regression, updateAgentRevisionSpec PATCH,
  getAgentModelCatalog GET) + useApplyAgentSpec (draft-in-place vs clone+patch,
  missing-appId guard).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../api-client/src/posthog-client.test.ts     | 85 ++++++++++++++++++
 packages/api-client/src/posthog-client.ts     |  4 +-
 .../hooks/useApplyAgentSpec.test.ts           | 90 +++++++++++++++++++
 3 files changed, 178 insertions(+), 1 deletion(-)
 create mode 100644 packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts

diff --git a/packages/api-client/src/posthog-client.test.ts b/packages/api-client/src/posthog-client.test.ts
index cd660438d..4bd599881 100644
--- a/packages/api-client/src/posthog-client.test.ts
+++ b/packages/api-client/src/posthog-client.test.ts
@@ -797,4 +797,89 @@ describe("PostHogAPIClient", () => {
       ).rejects.toThrow("Unexpected response");
     });
   });
+
+  describe("agent model policy + catalog", () => {
+    function makeClient(fetch: ReturnType<typeof vi.fn>) {
+      const client = new PostHogAPIClient(
+        "http://localhost:8000",
+        async () => "token",
+        async () => "token",
+        123,
+      );
+      (
+        client as unknown as {
+          api: { baseUrl: string; fetcher: { fetch: typeof fetch } };
+        }
+      ).api = { baseUrl: "http://localhost:8000", fetcher: { fetch } };
+      return client;
+    }
+
+    it("createAgentDraftRevisionFrom unwraps the { revision } envelope", async () => {
+      // Regression: new_draft returns `{ revision, source_revision_id }`, not a
+      // flat revision — returning the wrapper left `.id` undefined and broke the
+      // follow-up PATCH (404 on /revisions/undefined/).
+      const fetch = vi.fn().mockResolvedValue({
+        json: async () => ({
+          revision: { id: "draft-1", state: "draft" },
+          source_revision_id: "rev-0",
+        }),
+      });
+      const client = makeClient(fetch);
+
+      const rev = await client.createAgentDraftRevisionFrom("app-1", "rev-0");
+
+      expect(rev.id).toBe("draft-1");
+      expect(fetch).toHaveBeenCalledWith(
+        expect.objectContaining({
+          method: "post",
+          path: "/api/projects/123/agent_applications/app-1/revisions/new_draft/",
+          overrides: {
+            body: JSON.stringify({
+              application_id: "app-1",
+              source_revision_id: "rev-0",
+            }),
+          },
+        }),
+      );
+    });
+
+    it("updateAgentRevisionSpec PATCHes the revision with the full spec", async () => {
+      const fetch = vi.fn().mockResolvedValue({
+        json: async () => ({ id: "draft-1", state: "draft" }),
+      });
+      const client = makeClient(fetch);
+      const spec = { model_policy: { mode: "auto", level: "high" } };
+
+      await client.updateAgentRevisionSpec(
+        "agent-slug",
+        "draft-1",
+        spec as never,
+      );
+
+      expect(fetch).toHaveBeenCalledWith(
+        expect.objectContaining({
+          method: "patch",
+          path: "/api/projects/123/agent_applications/agent-slug/revisions/draft-1/",
+          overrides: { body: JSON.stringify({ spec }) },
+        }),
+      );
+    });
+
+    it("getAgentModelCatalog GETs the project-level models endpoint", async () => {
+      const catalog = {
+        models: [{ model: "anthropic/claude-haiku-4.5" }],
+        levels: { low: ["anthropic/claude-haiku-4.5"] },
+      };
+      const fetch = vi.fn().mockResolvedValue({ json: async () => catalog });
+      const client = makeClient(fetch);
+
+      await expect(client.getAgentModelCatalog()).resolves.toEqual(catalog);
+      expect(fetch).toHaveBeenCalledWith(
+        expect.objectContaining({
+          method: "get",
+          path: "/api/projects/123/agent_applications/models/",
+        }),
+      );
+    });
+  });
 });
diff --git a/packages/api-client/src/posthog-client.ts b/packages/api-client/src/posthog-client.ts
index 5c4b8d89b..c193f0f1a 100644
--- a/packages/api-client/src/posthog-client.ts
+++ b/packages/api-client/src/posthog-client.ts
@@ -4660,7 +4660,9 @@ export class PostHogAPIClient {
         }),
       },
     });
-    return (await response.json()) as AgentRevision;
+    // new_draft wraps the created revision: `{ revision, source_revision_id }`.
+    const data = (await response.json()) as { revision: AgentRevision };
+    return data.revision;
   }
 
   /** The served-model catalog + curated auto-level → model map (project-agnostic;
diff --git a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts
new file mode 100644
index 000000000..8e1e558c0
--- /dev/null
+++ b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts
@@ -0,0 +1,90 @@
+import { renderHook } from "@testing-library/react";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+
+// Capture the mutationFn the hook hands to react-query so we can exercise the
+// create-draft-vs-patch branching directly, without a live QueryClient.
+let mutationFn: (vars: {
+  revision: { id: string; state: string };
+  spec: unknown;
+}) => Promise<unknown>;
+
+vi.mock("@tanstack/react-query", () => ({
+  useMutation: (opts: { mutationFn: typeof mutationFn }) => {
+    mutationFn = opts.mutationFn;
+    return { mutate: vi.fn() };
+  },
+  useQueryClient: () => ({ invalidateQueries: vi.fn() }),
+}));
+
+const client = {
+  createAgentDraftRevisionFrom: vi.fn(),
+  updateAgentRevisionSpec: vi.fn(),
+};
+
+vi.mock("@posthog/ui/features/auth/authClient", () => ({
+  useAuthenticatedClient: () => client,
+}));
+vi.mock("../../auth/store", () => ({
+  useAuthStateValue: () => 1,
+}));
+
+import { useApplyAgentSpec } from "./useApplyAgentSpec";
+
+describe("useApplyAgentSpec", () => {
+  beforeEach(() => {
+    client.createAgentDraftRevisionFrom.mockReset();
+    client.updateAgentRevisionSpec.mockReset();
+  });
+
+  it("PATCHes a draft in place — no new draft branched", async () => {
+    client.updateAgentRevisionSpec.mockResolvedValue({
+      id: "d1",
+      state: "draft",
+    });
+    renderHook(() => useApplyAgentSpec("agent-slug", "app-1"));
+    const spec = { model_policy: { mode: "auto", level: "high" } };
+
+    await mutationFn({ revision: { id: "d1", state: "draft" }, spec });
+
+    expect(client.createAgentDraftRevisionFrom).not.toHaveBeenCalled();
+    expect(client.updateAgentRevisionSpec).toHaveBeenCalledWith(
+      "agent-slug",
+      "d1",
+      spec,
+    );
+  });
+
+  it("clones to a fresh draft then PATCHes it when the source isn't a draft", async () => {
+    client.createAgentDraftRevisionFrom.mockResolvedValue({
+      id: "new-draft",
+      state: "draft",
+    });
+    client.updateAgentRevisionSpec.mockResolvedValue({
+      id: "new-draft",
+      state: "draft",
+    });
+    renderHook(() => useApplyAgentSpec("agent-slug", "app-1"));
+    const spec = { model_policy: { mode: "manual", models: [{ model: "x" }] } };
+
+    await mutationFn({ revision: { id: "live-1", state: "live" }, spec });
+
+    expect(client.createAgentDraftRevisionFrom).toHaveBeenCalledWith(
+      "app-1",
+      "live-1",
+    );
+    expect(client.updateAgentRevisionSpec).toHaveBeenCalledWith(
+      "agent-slug",
+      "new-draft",
+      spec,
+    );
+  });
+
+  it("throws when a clone is needed but the application id is missing", async () => {
+    renderHook(() => useApplyAgentSpec("agent-slug", undefined));
+
+    await expect(
+      mutationFn({ revision: { id: "live-1", state: "live" }, spec: {} }),
+    ).rejects.toThrow(/Application/);
+    expect(client.createAgentDraftRevisionFrom).not.toHaveBeenCalled();
+  });
+});

From c391c29ab959fc1bd4d8a31b7bf2244b7863504f Mon Sep 17 00:00:00 2001
From: Ben White <ben@posthog.com>
Date: Wed, 24 Jun 2026 18:02:04 +0200
Subject: [PATCH 4/9] =?UTF-8?q?refactor(agent-applications):=20rename=20sp?=
 =?UTF-8?q?ec.model=5Fpolicy=20=E2=86=92=20models;=20drop=20entrypoint?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mirror the backend spec change in the agent-builder UI + wire shapes.

- Rename the `model_policy` field → `models` (AgentSpec + accessors + tests).
  The `AgentModelPolicy` type keeps its name.
- Drop `spec.entrypoint` (removed from the spec) — and its config-pane row.
- Manual mode now seeds its model list from the level you were on when you
  switch from auto, so you start from auto's choices and edit rather than a
  blank slate.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../api-client/src/posthog-client.test.ts     |  2 +-
 packages/shared/src/agent-platform-types.ts   |  7 +++----
 .../components/AgentConfigurationPane.tsx     | 21 +++++++------------
 .../components/AgentModelConfig.tsx           | 15 ++++++++++---
 .../hooks/useApplyAgentSpec.test.ts           |  4 ++--
 5 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/packages/api-client/src/posthog-client.test.ts b/packages/api-client/src/posthog-client.test.ts
index 4bd599881..d5ad449e2 100644
--- a/packages/api-client/src/posthog-client.test.ts
+++ b/packages/api-client/src/posthog-client.test.ts
@@ -848,7 +848,7 @@ describe("PostHogAPIClient", () => {
         json: async () => ({ id: "draft-1", state: "draft" }),
       });
       const client = makeClient(fetch);
-      const spec = { model_policy: { mode: "auto", level: "high" } };
+      const spec = { models: { mode: "auto", level: "high" } };
 
       await client.updateAgentRevisionSpec(
         "agent-slug",
diff --git a/packages/shared/src/agent-platform-types.ts b/packages/shared/src/agent-platform-types.ts
index f961fd7ba..6f7056f06 100644
--- a/packages/shared/src/agent-platform-types.ts
+++ b/packages/shared/src/agent-platform-types.ts
@@ -76,7 +76,7 @@ export interface AgentModelEntry {
 /**
  * How a revision picks its model. `auto` resolves a maintained, priority-ordered,
  * cross-provider list from `level` at runtime; `manual` pins an author-ordered
- * fallback list (primary first). Mirrors `spec.model_policy` in the backend.
+ * fallback list (primary first). Mirrors `spec.models` in the backend.
  */
 export type AgentModelPolicy =
   | { mode: "auto"; level?: AgentModelLevel; reasoning?: AgentReasoningEffort }
@@ -111,8 +111,8 @@ export interface ModelCatalog {
  */
 export interface AgentSpec {
   /** Model selection. `model` is the legacy single-string form; current specs
-   *  carry `model_policy`. One or the other is present. */
-  model_policy?: AgentModelPolicy;
+   *  carry `models`. One or the other is present. */
+  models?: AgentModelPolicy;
   model?: string;
   triggers?: unknown[];
   tools?: unknown[];
@@ -125,7 +125,6 @@ export interface AgentSpec {
     max_tool_calls?: number;
     max_wall_seconds?: number;
   };
-  entrypoint?: string;
   reasoning?: AgentReasoningEffort;
   [key: string]: unknown;
 }
diff --git a/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx b/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx
index ba0031e55..5f71310f9 100644
--- a/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx
+++ b/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx
@@ -706,19 +706,14 @@ function byPath(files: BundleFile[], path: string): BundleFile | undefined {
 
 function ModelBody({ spec, ctx }: { spec: AgentSpec; ctx: Ctx }) {
   return (
-    <Flex direction="column" gap="4">
-      <AgentModelConfig
-        spec={spec}
-        idOrSlug={ctx.idOrSlug}
-        applicationId={ctx.applicationId}
-        revisionId={ctx.revisionId}
-        revisionState={ctx.revisionState}
-        onSelectRevision={ctx.onSelectRevision}
-      />
-      {spec.entrypoint ? (
-        <Row label="entrypoint" value={spec.entrypoint} mono />
-      ) : null}
-    </Flex>
+    <AgentModelConfig
+      spec={spec}
+      idOrSlug={ctx.idOrSlug}
+      applicationId={ctx.applicationId}
+      revisionId={ctx.revisionId}
+      revisionState={ctx.revisionState}
+      onSelectRevision={ctx.onSelectRevision}
+    />
   );
 }
 
diff --git a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
index 87595ee0c..d1186b374 100644
--- a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
+++ b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
@@ -50,7 +50,7 @@ export function AgentModelConfig({
 }) {
   const { catalog } = useModelCatalog();
   const apply = useApplyAgentSpec(idOrSlug, applicationId);
-  const initial = spec.model_policy;
+  const initial = spec.models;
 
   const [mode, setMode] = useState<"auto" | "manual">(initial?.mode ?? "auto");
   const [level, setLevel] = useState<AgentModelLevel>(
@@ -85,11 +85,20 @@ export function AgentModelConfig({
     setManual(initial?.mode === "manual" ? initial.models : []);
   }
 
+  function changeMode(next: "auto" | "manual") {
+    // Switching to manual with an empty list seeds it from the level you were
+    // on, so you start from auto's choices and edit rather than a blank slate.
+    if (next === "manual" && manual.length === 0) {
+      setManual((catalog.levels[level] ?? []).map((model) => ({ model })));
+    }
+    setMode(next);
+  }
+
   function save() {
     apply.mutate(
       {
         revision: { id: revisionId, state: revisionState ?? "draft" },
-        spec: { ...spec, model_policy: policy },
+        spec: { ...spec, models: policy },
       },
       { onSuccess: (rev) => onSelectRevision?.(rev.id) },
     );
@@ -137,7 +146,7 @@ export function AgentModelConfig({
           label="mode"
           icon={<SlidersHorizontalIcon size={14} />}
           value={mode}
-          onChange={(v) => setMode(v as "auto" | "manual")}
+          onChange={(v) => changeMode(v as "auto" | "manual")}
           options={MODE_OPTIONS}
         />
 
diff --git a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts
index 8e1e558c0..643ddc63e 100644
--- a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts
+++ b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts
@@ -42,7 +42,7 @@ describe("useApplyAgentSpec", () => {
       state: "draft",
     });
     renderHook(() => useApplyAgentSpec("agent-slug", "app-1"));
-    const spec = { model_policy: { mode: "auto", level: "high" } };
+    const spec = { models: { mode: "auto", level: "high" } };
 
     await mutationFn({ revision: { id: "d1", state: "draft" }, spec });
 
@@ -64,7 +64,7 @@ describe("useApplyAgentSpec", () => {
       state: "draft",
     });
     renderHook(() => useApplyAgentSpec("agent-slug", "app-1"));
-    const spec = { model_policy: { mode: "manual", models: [{ model: "x" }] } };
+    const spec = { models: { mode: "manual", models: [{ model: "x" }] } };
 
     await mutationFn({ revision: { id: "live-1", state: "live" }, spec });
 

From d778d072de9c48c61e2384680f757c70bdc7ace5 Mon Sep 17 00:00:00 2001
From: Ben White <ben@posthog.com>
Date: Wed, 24 Jun 2026 18:22:22 +0200
Subject: [PATCH 5/9] =?UTF-8?q?fix(agent-applications):=20address=20review?=
 =?UTF-8?q?=20=E2=80=94=20cost=20precision,=20stable=20dirty=20check,=20se?=
 =?UTF-8?q?t=20lookup?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- fmtUsd: fixed-precision formatting (min 2 / max 4 fraction digits) so the
  cost column reads consistently and survives float noise from the catalog API.
- dirty check: canonicalise via a recursive key-sorting serializer so the
  "not saved yet" banner doesn't fire when the server serialises spec.models
  with a different key order than the locally-built policy. (Greptile's
  replacer-array suggestion would have dropped nested manual-entry keys.)
- session metrics: dedupe distinct models with a Set instead of Array.includes
  inside the loop.

P1 stale-state-on-revision-change was already handled by `key={ctx.revisionId}`
on ModelBody (greptile reviewed a pre-key commit).

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../components/AgentModelConfig.tsx           | 26 ++++++++++++++++---
 .../components/AgentSessionDetailBody.tsx     |  6 ++++-
 2 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
index d1186b374..c8e7b8c49 100644
--- a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
+++ b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
@@ -69,8 +69,8 @@ export function AgentModelConfig({
       : { mode: "manual", models: manual };
 
   const dirty =
-    JSON.stringify(policy) !==
-    JSON.stringify(initial ?? { mode: "auto", level: "medium" });
+    stableStringify(policy) !==
+    stableStringify(initial ?? { mode: "auto", level: "medium" });
   const willBranch = revisionState !== "draft";
 
   const byId = useMemo(
@@ -682,7 +682,27 @@ function Muted({ children }: { children: ReactNode }) {
 }
 
 function fmtUsd(n: number): string {
-  return `$${n}`;
+  // Fixed precision so the cost column reads consistently ($1.00, $0.075)
+  // and survives float noise from the catalog API.
+  return `$${n.toLocaleString("en-US", {
+    minimumFractionDigits: 2,
+    maximumFractionDigits: 4,
+  })}`;
+}
+
+/**
+ * Deterministic JSON: recursively sorts object keys so the dirty check
+ * doesn't fire just because the server serialised `spec.models` with a
+ * different key order than the locally-built policy. Arrays keep their order.
+ */
+function stableStringify(value: unknown): string {
+  return JSON.stringify(value, (_key, val) =>
+    val && typeof val === "object" && !Array.isArray(val)
+      ? Object.fromEntries(
+          Object.entries(val).sort(([a], [b]) => a.localeCompare(b)),
+        )
+      : val,
+  );
 }
 
 function fmtCtx(n: number): string {
diff --git a/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx b/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx
index 99fc3193d..2c9b38957 100644
--- a/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx
+++ b/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx
@@ -30,13 +30,17 @@ function computeMetrics(
   let toolCalls = 0;
   let errors = 0;
   const models: string[] = [];
+  const seenModels = new Set<string>();
   for (const msg of session.conversation) {
     if (msg.role === "assistant") {
       for (const part of msg.content) {
         if (part.type === "toolCall") toolCalls += 1;
       }
       if (msg.errorMessage) errors += 1;
-      if (msg.model && !models.includes(msg.model)) models.push(msg.model);
+      if (msg.model && !seenModels.has(msg.model)) {
+        seenModels.add(msg.model);
+        models.push(msg.model);
+      }
     } else if (msg.role === "toolResult" && msg.isError) {
       errors += 1;
     }

From ff19038fbeb0c1f3e2f778fb3ecd2a6f293ddd97 Mon Sep 17 00:00:00 2001
From: Ben White <ben@posthog.com>
Date: Thu, 25 Jun 2026 08:59:50 +0200
Subject: [PATCH 6/9] fix(agent-applications): archive orphaned draft when spec
 PATCH fails after clone

useApplyAgentSpec clones a fresh draft when the target isn't already a
draft, then PATCHes the spec onto it. If the PATCH failed, that draft was
left orphaned (a copy of the source with no edit landed), and repeated
failed applies piled up empty drafts. Now the just-cloned draft is archived
best-effort on PATCH failure; the original error is always rethrown, and a
pre-existing draft passed in by the caller is never touched.

Addresses greptile review on #2900.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../hooks/useApplyAgentSpec.test.ts           | 34 +++++++++++++++++++
 .../hooks/useApplyAgentSpec.ts                | 19 +++++++++--
 2 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts
index 643ddc63e..c9a146a61 100644
--- a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts
+++ b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts
@@ -19,6 +19,7 @@ vi.mock("@tanstack/react-query", () => ({
 const client = {
   createAgentDraftRevisionFrom: vi.fn(),
   updateAgentRevisionSpec: vi.fn(),
+  transitionAgentRevision: vi.fn(),
 };
 
 vi.mock("@posthog/ui/features/auth/authClient", () => ({
@@ -34,6 +35,7 @@ describe("useApplyAgentSpec", () => {
   beforeEach(() => {
     client.createAgentDraftRevisionFrom.mockReset();
     client.updateAgentRevisionSpec.mockReset();
+    client.transitionAgentRevision.mockReset();
   });
 
   it("PATCHes a draft in place — no new draft branched", async () => {
@@ -87,4 +89,36 @@ describe("useApplyAgentSpec", () => {
     ).rejects.toThrow(/Application/);
     expect(client.createAgentDraftRevisionFrom).not.toHaveBeenCalled();
   });
+
+  it("archives the orphaned draft (and rethrows) when the PATCH fails after a clone", async () => {
+    client.createAgentDraftRevisionFrom.mockResolvedValue({
+      id: "new-draft",
+      state: "draft",
+    });
+    const patchErr = new Error("spec.models: invalid");
+    client.updateAgentRevisionSpec.mockRejectedValue(patchErr);
+    client.transitionAgentRevision.mockResolvedValue({ id: "new-draft" });
+    renderHook(() => useApplyAgentSpec("agent-slug", "app-1"));
+
+    await expect(
+      mutationFn({ revision: { id: "live-1", state: "live" }, spec: {} }),
+    ).rejects.toThrow(patchErr);
+    // The just-cloned, never-landed draft gets archived as cleanup.
+    expect(client.transitionAgentRevision).toHaveBeenCalledWith(
+      "agent-slug",
+      "new-draft",
+      "archive",
+    );
+  });
+
+  it("does NOT archive when an in-place draft PATCH fails (nothing was cloned)", async () => {
+    client.updateAgentRevisionSpec.mockRejectedValue(new Error("boom"));
+    renderHook(() => useApplyAgentSpec("agent-slug", "app-1"));
+
+    await expect(
+      mutationFn({ revision: { id: "d1", state: "draft" }, spec: {} }),
+    ).rejects.toThrow(/boom/);
+    expect(client.createAgentDraftRevisionFrom).not.toHaveBeenCalled();
+    expect(client.transitionAgentRevision).not.toHaveBeenCalled();
+  });
 });
diff --git a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts
index 32103bacb..4a181766b 100644
--- a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts
+++ b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts
@@ -32,7 +32,8 @@ export function useApplyAgentSpec(
   >({
     mutationFn: async ({ revision, spec }) => {
       let targetId = revision.id;
-      if (revision.state !== "draft") {
+      const clonedDraft = revision.state !== "draft";
+      if (clonedDraft) {
         if (!applicationId) {
           throw new Error("Application not loaded yet");
         }
@@ -42,7 +43,21 @@ export function useApplyAgentSpec(
         );
         targetId = draft.id;
       }
-      return client.updateAgentRevisionSpec(idOrSlug, targetId, spec);
+      try {
+        return await client.updateAgentRevisionSpec(idOrSlug, targetId, spec);
+      } catch (err) {
+        // If we cloned a fresh draft and the spec PATCH then failed, that
+        // draft is an orphan (a copy of the source with no edit landed).
+        // Archive it best-effort so repeated failed applies don't pile up
+        // empty drafts; never mask the original error. A pre-existing draft
+        // passed in by the caller is left untouched.
+        if (clonedDraft) {
+          await client
+            .transitionAgentRevision(idOrSlug, targetId, "archive")
+            .catch(() => undefined);
+        }
+        throw err;
+      }
     },
     onSuccess: () => {
       for (const key of [

From 274c97e594480d3c4e978790961a1c1ebf6539b7 Mon Sep 17 00:00:00 2001
From: Ben White <ben@posthog.com>
Date: Thu, 25 Jun 2026 10:42:27 +0200
Subject: [PATCH 7/9] feat(agent-applications): "optimize for" (cost |
 availability) control in model config
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Surfaces spec.models.optimize_for in the model editor as a dropdown alongside
mode/level/reasoning (applies to both auto and manual):
- Cost (default): pin the first working model for the session — warm cache, no
  mid-session failover.
- Availability: fail over to the next model if the session's model goes down.

Wired into the policy object, dirty check, reset, and the create-draft-and-apply
save path. Adds AgentModelOptimizeFor to the shared spec types.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 packages/shared/src/agent-platform-types.ts   | 21 +++++++++-
 .../components/AgentModelConfig.tsx           | 42 +++++++++++++++++--
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/packages/shared/src/agent-platform-types.ts b/packages/shared/src/agent-platform-types.ts
index 6f7056f06..e79e8ac38 100644
--- a/packages/shared/src/agent-platform-types.ts
+++ b/packages/shared/src/agent-platform-types.ts
@@ -66,6 +66,14 @@ export type AgentReasoningEffort =
 
 export type AgentModelLevel = "low" | "medium" | "high";
 
+/**
+ * Session model stability vs. resilience. `cost` (default): pin the first served
+ * model for the whole session — warm prompt cache, no cross-model failover.
+ * `availability`: lead with the last-served model but fail over on failure.
+ * Mirrors `spec.models.optimize_for` in the backend.
+ */
+export type AgentModelOptimizeFor = "cost" | "availability";
+
 /** One model in a manual policy: a canonical model id (e.g.
  *  `anthropic/claude-sonnet-4-6`) plus an optional per-model reasoning override. */
 export interface AgentModelEntry {
@@ -79,8 +87,17 @@ export interface AgentModelEntry {
  * fallback list (primary first). Mirrors `spec.models` in the backend.
  */
 export type AgentModelPolicy =
-  | { mode: "auto"; level?: AgentModelLevel; reasoning?: AgentReasoningEffort }
-  | { mode: "manual"; models: AgentModelEntry[] };
+  | {
+      mode: "auto";
+      level?: AgentModelLevel;
+      reasoning?: AgentReasoningEffort;
+      optimize_for?: AgentModelOptimizeFor;
+    }
+  | {
+      mode: "manual";
+      models: AgentModelEntry[];
+      optimize_for?: AgentModelOptimizeFor;
+    };
 
 /**
  * A served model + its cost profile, as the model browser shows it. Mirrors the
diff --git a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
index c8e7b8c49..f83e36dc9 100644
--- a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
+++ b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
@@ -6,11 +6,13 @@ import {
   CaretDownIcon,
   GaugeIcon,
   MagnifyingGlassIcon,
+  ScalesIcon,
   SlidersHorizontalIcon,
 } from "@phosphor-icons/react";
 import type {
   AgentModelEntry,
   AgentModelLevel,
+  AgentModelOptimizeFor,
   AgentModelPolicy,
   AgentReasoningEffort,
   AgentRevisionState,
@@ -62,15 +64,25 @@ export function AgentModelConfig({
   const [manual, setManual] = useState<AgentModelEntry[]>(
     initial?.mode === "manual" ? initial.models : [],
   );
+  const [optimizeFor, setOptimizeFor] = useState<AgentModelOptimizeFor>(
+    initial?.optimize_for ?? "cost",
+  );
 
   const policy: AgentModelPolicy =
     mode === "auto"
-      ? { mode: "auto", level, ...(reasoning ? { reasoning } : {}) }
-      : { mode: "manual", models: manual };
+      ? {
+          mode: "auto",
+          level,
+          optimize_for: optimizeFor,
+          ...(reasoning ? { reasoning } : {}),
+        }
+      : { mode: "manual", models: manual, optimize_for: optimizeFor };
 
   const dirty =
     stableStringify(policy) !==
-    stableStringify(initial ?? { mode: "auto", level: "medium" });
+    stableStringify(
+      initial ?? { mode: "auto", level: "medium", optimize_for: "cost" },
+    );
   const willBranch = revisionState !== "draft";
 
   const byId = useMemo(
@@ -83,6 +95,7 @@ export function AgentModelConfig({
     setLevel(initial?.mode === "auto" ? (initial.level ?? "medium") : "medium");
     setReasoning(initial?.mode === "auto" ? initial.reasoning : spec.reasoning);
     setManual(initial?.mode === "manual" ? initial.models : []);
+    setOptimizeFor(initial?.optimize_for ?? "cost");
   }
 
   function changeMode(next: "auto" | "manual") {
@@ -150,6 +163,14 @@ export function AgentModelConfig({
           options={MODE_OPTIONS}
         />
 
+        <Select
+          label="optimize for"
+          icon={<ScalesIcon size={14} />}
+          value={optimizeFor}
+          onChange={(v) => setOptimizeFor(v as AgentModelOptimizeFor)}
+          options={OPTIMIZE_OPTIONS}
+        />
+
         {mode === "auto" ? (
           <>
             <Select
@@ -212,6 +233,21 @@ const MODE_OPTIONS = [
   },
 ] as const;
 
+const OPTIMIZE_OPTIONS = [
+  {
+    value: "cost",
+    title: "Cost",
+    description:
+      "Pin the first working model for the whole session — keeps the prompt cache warm, no mid-session failover.",
+  },
+  {
+    value: "availability",
+    title: "Availability",
+    description:
+      "Fail over to the next model if the session's model goes down — survives outages, but re-reads context cold.",
+  },
+] as const;
+
 const LEVEL_OPTIONS = [
   {
     value: "low",

From e3417045b853e71e3117957d134f4b34bf8ad501 Mon Sep 17 00:00:00 2001
From: dmarticus <dylan@posthog.com>
Date: Thu, 25 Jun 2026 13:54:21 -0700
Subject: [PATCH 8/9] fix(agent-applications): drop colliding Tailwind size
 class on mono Model KPI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Model `MetricItem` always emitted `text-[14px]` and additionally
`text-[12.5px]` when `mono` was true. Tailwind JIT generates both rules
and the resolved font-size depends on stylesheet emission order, not
className order — fragile across builds.

Make the size class conditional so only one is ever present.

Also drop a stale "no save wired yet" comment on AgentModelConfig — the
save path landed in the same series.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../agent-applications/components/AgentModelConfig.tsx    | 8 +++-----
 .../components/AgentSessionDetailBody.tsx                 | 6 +++---
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
index f83e36dc9..fd2f4fc11 100644
--- a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
+++ b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
@@ -29,11 +29,9 @@ import { useModelCatalog } from "../hooks/useModelCatalog";
 /**
  * The rich model section: an interactive policy editor (mode + level +
  * reasoning), a preview of what an `auto` level resolves to, and a searchable
- * browser of every served model with its cost profile.
- *
- * Editing is local-state only — there's no save wired yet; the point is to see
- * the UX. The catalog comes from `useModelCatalog` (a stand-in for the
- * model-info endpoint).
+ * browser of every served model with its cost profile. Save goes through
+ * `useApplyAgentSpec`, which PATCHes a draft in place or branches a fresh
+ * draft from a non-draft revision first.
  */
 export function AgentModelConfig({
   spec,
diff --git a/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx b/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx
index 2c9b38957..a29ea1e11 100644
--- a/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx
+++ b/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx
@@ -257,9 +257,9 @@ function MetricItem({
       </Text>
       <Text
         title={title}
-        className={`font-semibold text-[14px] leading-none ${
-          tone === "bad" ? "text-(--red-11)" : "text-gray-12"
-        } ${mono ? "text-[12.5px] [font-family:var(--font-mono)]" : ""}`}
+        className={`font-semibold leading-none ${
+          mono ? "text-[12.5px] [font-family:var(--font-mono)]" : "text-[14px]"
+        } ${tone === "bad" ? "text-(--red-11)" : "text-gray-12"}`}
       >
         {value}
       </Text>

From f012d0956b6c9f19effed9ef1137ac57083b64f9 Mon Sep 17 00:00:00 2001
From: Ben White <ben@posthog.com>
Date: Fri, 26 Jun 2026 11:04:50 +0200
Subject: [PATCH 9/9] fix(agent-applications): address model-config review nits
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- ModelBrowser sort: rank cheapest/priciest by blended input+output cost,
  not input alone — input-only mis-ranks reasoning models (cheap input,
  dominant output), exactly the ones cost-conscious authors compare.
- useApplyAgentSpec: derive the revision-prefix invalidation key from the
  shared agentApplicationsKeys factory (new revisionPrefix) instead of a
  hand-rolled array, so it can't silently drift from revision().
- test: capture and exercise onSuccess, asserting it invalidates the
  detail/revisions/revision-prefix keys via the factory — closes the gap
  where mocking useMutation left invalidation keys untested.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .../components/AgentModelConfig.tsx           |  8 +++-
 .../hooks/agentApplicationsKeys.ts            |  7 ++++
 .../hooks/useApplyAgentSpec.test.ts           | 40 +++++++++++++++++--
 .../hooks/useApplyAgentSpec.ts                |  2 +-
 4 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
index fd2f4fc11..2d14eb9d5 100644
--- a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
+++ b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx
@@ -476,10 +476,14 @@ function ModelBrowser({
             m.provider.toLowerCase().includes(needle),
         )
       : models;
+    // Blended per-Mtok cost (input + output), not input alone: reasoning
+    // models can have cheap input but dominant output, so input-only mis-ranks
+    // exactly the models cost-conscious authors most need to compare.
+    const blended = (m: ModelCatalogEntry) => m.input + m.output;
     const sorted = [...filtered];
     if (sort === "name") sorted.sort((a, b) => a.model.localeCompare(b.model));
-    if (sort === "cheapest") sorted.sort((a, b) => a.input - b.input);
-    if (sort === "priciest") sorted.sort((a, b) => b.input - a.input);
+    if (sort === "cheapest") sorted.sort((a, b) => blended(a) - blended(b));
+    if (sort === "priciest") sorted.sort((a, b) => blended(b) - blended(a));
     return sorted;
   }, [models, q, sort]);
 
diff --git a/packages/ui/src/features/agent-applications/hooks/agentApplicationsKeys.ts b/packages/ui/src/features/agent-applications/hooks/agentApplicationsKeys.ts
index 3ad880a70..86545a7da 100644
--- a/packages/ui/src/features/agent-applications/hooks/agentApplicationsKeys.ts
+++ b/packages/ui/src/features/agent-applications/hooks/agentApplicationsKeys.ts
@@ -54,6 +54,13 @@ export const agentApplicationsKeys = {
     ] as const,
   revisions: (projectId: number | null, idOrSlug: string) =>
     ["agent-applications", "revisions", projectId, idOrSlug] as const,
+  /**
+   * Prefix over every single-revision query (any `revisionId`) for one agent.
+   * Invalidate it to refresh all `revision(...)` caches at once — derive the
+   * prefix here so it can't drift from the `revision` key it must match.
+   */
+  revisionPrefix: (projectId: number | null, idOrSlug: string) =>
+    ["agent-applications", "revision", projectId, idOrSlug] as const,
   revision: (projectId: number | null, idOrSlug: string, revisionId: string) =>
     [
       "agent-applications",
diff --git a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts
index c9a146a61..b3b60d544 100644
--- a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts
+++ b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts
@@ -1,19 +1,29 @@
 import { renderHook } from "@testing-library/react";
 import { beforeEach, describe, expect, it, vi } from "vitest";
 
-// Capture the mutationFn the hook hands to react-query so we can exercise the
-// create-draft-vs-patch branching directly, without a live QueryClient.
+// Capture the mutationFn and onSuccess the hook hands to react-query so we can
+// exercise the create-draft-vs-patch branching and the cache invalidation
+// directly, without a live QueryClient.
 let mutationFn: (vars: {
   revision: { id: string; state: string };
   spec: unknown;
 }) => Promise<unknown>;
+let onSuccess: (() => void) | undefined;
+let invalidateQueries: ReturnType<typeof vi.fn>;
 
 vi.mock("@tanstack/react-query", () => ({
-  useMutation: (opts: { mutationFn: typeof mutationFn }) => {
+  useMutation: (opts: {
+    mutationFn: typeof mutationFn;
+    onSuccess?: () => void;
+  }) => {
     mutationFn = opts.mutationFn;
+    onSuccess = opts.onSuccess;
     return { mutate: vi.fn() };
   },
-  useQueryClient: () => ({ invalidateQueries: vi.fn() }),
+  useQueryClient: () => {
+    invalidateQueries = vi.fn();
+    return { invalidateQueries };
+  },
 }));
 
 const client = {
@@ -29,6 +39,7 @@ vi.mock("../../auth/store", () => ({
   useAuthStateValue: () => 1,
 }));
 
+import { agentApplicationsKeys } from "./agentApplicationsKeys";
 import { useApplyAgentSpec } from "./useApplyAgentSpec";
 
 describe("useApplyAgentSpec", () => {
@@ -121,4 +132,25 @@ describe("useApplyAgentSpec", () => {
     expect(client.createAgentDraftRevisionFrom).not.toHaveBeenCalled();
     expect(client.transitionAgentRevision).not.toHaveBeenCalled();
   });
+
+  it("onSuccess invalidates the detail, revisions, and per-revision caches via the shared key factory", () => {
+    // projectId is mocked to 1 (useAuthStateValue). Asserting against the
+    // factory rather than literal arrays means these keys can't silently drift
+    // from the fetch hooks that own the caches — the gap when useMutation is
+    // mocked away.
+    renderHook(() => useApplyAgentSpec("agent-slug", "app-1"));
+    expect(onSuccess).toBeDefined();
+    onSuccess?.();
+
+    const invalidated = invalidateQueries.mock.calls.map((c) => c[0].queryKey);
+    expect(invalidated).toContainEqual(
+      agentApplicationsKeys.detail(1, "agent-slug"),
+    );
+    expect(invalidated).toContainEqual(
+      agentApplicationsKeys.revisions(1, "agent-slug"),
+    );
+    expect(invalidated).toContainEqual(
+      agentApplicationsKeys.revisionPrefix(1, "agent-slug"),
+    );
+  });
 });
diff --git a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts
index 4a181766b..9f59794c9 100644
--- a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts
+++ b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts
@@ -63,7 +63,7 @@ export function useApplyAgentSpec(
       for (const key of [
         agentApplicationsKeys.detail(projectId, idOrSlug),
         agentApplicationsKeys.revisions(projectId, idOrSlug),
-        ["agent-applications", "revision", projectId, idOrSlug],
+        agentApplicationsKeys.revisionPrefix(projectId, idOrSlug),
       ]) {
         void queryClient.invalidateQueries({ queryKey: key });
       }