diff --git a/packages/api-client/src/posthog-client.test.ts b/packages/api-client/src/posthog-client.test.ts index 5c60ba2f7..1eacedc65 100644 --- a/packages/api-client/src/posthog-client.test.ts +++ b/packages/api-client/src/posthog-client.test.ts @@ -836,4 +836,89 @@ describe("PostHogAPIClient", () => { ).rejects.toThrow("Unexpected response"); }); }); + + describe("agent model policy + catalog", () => { + function makeClient(fetch: ReturnType) { + const client = new PostHogAPIClient( + "http://localhost:8000", + async () => "token", + async () => "token", + 123, + ); + ( + client as unknown as { + api: { baseUrl: string; fetcher: { fetch: typeof fetch } }; + } + ).api = { baseUrl: "http://localhost:8000", fetcher: { fetch } }; + return client; + } + + it("createAgentDraftRevisionFrom unwraps the { revision } envelope", async () => { + // Regression: new_draft returns `{ revision, source_revision_id }`, not a + // flat revision — returning the wrapper left `.id` undefined and broke the + // follow-up PATCH (404 on /revisions/undefined/). + const fetch = vi.fn().mockResolvedValue({ + json: async () => ({ + revision: { id: "draft-1", state: "draft" }, + source_revision_id: "rev-0", + }), + }); + const client = makeClient(fetch); + + const rev = await client.createAgentDraftRevisionFrom("app-1", "rev-0"); + + expect(rev.id).toBe("draft-1"); + expect(fetch).toHaveBeenCalledWith( + expect.objectContaining({ + method: "post", + path: "/api/projects/123/agent_applications/app-1/revisions/new_draft/", + overrides: { + body: JSON.stringify({ + application_id: "app-1", + source_revision_id: "rev-0", + }), + }, + }), + ); + }); + + it("updateAgentRevisionSpec PATCHes the revision with the full spec", async () => { + const fetch = vi.fn().mockResolvedValue({ + json: async () => ({ id: "draft-1", state: "draft" }), + }); + const client = makeClient(fetch); + const spec = { models: { mode: "auto", level: "high" } }; + + await client.updateAgentRevisionSpec( + "agent-slug", + "draft-1", + spec as never, + ); + + expect(fetch).toHaveBeenCalledWith( + expect.objectContaining({ + method: "patch", + path: "/api/projects/123/agent_applications/agent-slug/revisions/draft-1/", + overrides: { body: JSON.stringify({ spec }) }, + }), + ); + }); + + it("getAgentModelCatalog GETs the project-level models endpoint", async () => { + const catalog = { + models: [{ model: "anthropic/claude-haiku-4.5" }], + levels: { low: ["anthropic/claude-haiku-4.5"] }, + }; + const fetch = vi.fn().mockResolvedValue({ json: async () => catalog }); + const client = makeClient(fetch); + + await expect(client.getAgentModelCatalog()).resolves.toEqual(catalog); + expect(fetch).toHaveBeenCalledWith( + expect.objectContaining({ + method: "get", + path: "/api/projects/123/agent_applications/models/", + }), + ); + }); + }); }); diff --git a/packages/api-client/src/posthog-client.ts b/packages/api-client/src/posthog-client.ts index 90942b2b7..05bec74cd 100644 --- a/packages/api-client/src/posthog-client.ts +++ b/packages/api-client/src/posthog-client.ts @@ -32,9 +32,11 @@ import type { AgentSessionLogsParams, AgentSessionsListParams, AgentSlackManifest, + AgentSpec, AgentUsersListResponse, BundleFile, DecideApprovalRequest, + ModelCatalog, } from "@posthog/shared/agent-platform-types"; import type { ActionabilityJudgmentArtefact, @@ -4667,6 +4669,38 @@ export class PostHogAPIClient { }), }, }); + // new_draft wraps the created revision: `{ revision, source_revision_id }`. + const data = (await response.json()) as { revision: AgentRevision }; + return data.revision; + } + + /** The served-model catalog + curated auto-level → model map (project-agnostic; + * proxies the AI gateway catalog). Powers the config-pane model browser. */ + async getAgentModelCatalog(): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}models/`; + const url = new URL(`${this.api.baseUrl}${path}`); + const response = await this.api.fetcher.fetch({ method: "get", url, path }); + return (await response.json()) as ModelCatalog; + } + + /** Update a draft revision's spec (PATCH). Draft-only on the server — a + * ready/live spec is frozen. Replaces `spec` wholesale, so callers send the + * full updated spec. Returns the updated revision. */ + async updateAgentRevisionSpec( + idOrSlug: string, + revisionId: string, + spec: AgentSpec, + ): Promise { + const teamId = await this.getTeamId(); + const path = `${this.agentApplicationsPath(teamId)}${encodeURIComponent(idOrSlug)}/revisions/${encodeURIComponent(revisionId)}/`; + const url = new URL(`${this.api.baseUrl}${path}`); + const response = await this.api.fetcher.fetch({ + method: "patch", + url, + path, + overrides: { body: JSON.stringify({ spec }) }, + }); return (await response.json()) as AgentRevision; } diff --git a/packages/shared/src/agent-platform-types.ts b/packages/shared/src/agent-platform-types.ts index 52965dca1..380505e43 100644 --- a/packages/shared/src/agent-platform-types.ts +++ b/packages/shared/src/agent-platform-types.ts @@ -57,12 +57,80 @@ export interface AgentApplication { ingress_base_url: string | null; } +export type AgentReasoningEffort = + | "minimal" + | "low" + | "medium" + | "high" + | "xhigh"; + +export type AgentModelLevel = "low" | "medium" | "high"; + +/** + * Session model stability vs. resilience. `cost` (default): pin the first served + * model for the whole session — warm prompt cache, no cross-model failover. + * `availability`: lead with the last-served model but fail over on failure. + * Mirrors `spec.models.optimize_for` in the backend. + */ +export type AgentModelOptimizeFor = "cost" | "availability"; + +/** One model in a manual policy: a canonical model id (e.g. + * `anthropic/claude-sonnet-4-6`) plus an optional per-model reasoning override. */ +export interface AgentModelEntry { + model: string; + reasoning?: AgentReasoningEffort; +} + +/** + * How a revision picks its model. `auto` resolves a maintained, priority-ordered, + * cross-provider list from `level` at runtime; `manual` pins an author-ordered + * fallback list (primary first). Mirrors `spec.models` in the backend. + */ +export type AgentModelPolicy = + | { + mode: "auto"; + level?: AgentModelLevel; + reasoning?: AgentReasoningEffort; + optimize_for?: AgentModelOptimizeFor; + } + | { + mode: "manual"; + models: AgentModelEntry[]; + optimize_for?: AgentModelOptimizeFor; + }; + +/** + * A served model + its cost profile, as the model browser shows it. Mirrors the + * ai-gateway catalog (`@posthog/agent-applications-models`). Pricing is USD per + * million tokens. + */ +export interface ModelCatalogEntry { + /** Canonical id, e.g. `anthropic/claude-sonnet-4.6`. */ + model: string; + provider: string; + context_window: number; + input: number; + output: number; + cacheRead?: number; + cacheWrite?: number; +} + +/** The full served catalog plus the curated `auto` level → model mapping. */ +export interface ModelCatalog { + models: ModelCatalogEntry[]; + /** Canonical ids each auto level resolves to, in priority order. */ + levels: Record; +} + /** * The agent spec carried on a revision. Known top-level fields are surfaced and * the rest passes through pending fully-typed elaboration. */ export interface AgentSpec { - model: string; + /** Model selection. `model` is the legacy single-string form; current specs + * carry `models`. One or the other is present. */ + models?: AgentModelPolicy; + model?: string; triggers?: unknown[]; tools?: unknown[]; mcps?: unknown[]; @@ -74,8 +142,7 @@ export interface AgentSpec { max_tool_calls?: number; max_wall_seconds?: number; }; - entrypoint?: string; - reasoning?: "minimal" | "low" | "medium" | "high" | "xhigh"; + reasoning?: AgentReasoningEffort; [key: string]: unknown; } diff --git a/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx b/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx index 7c4206d6d..5f71310f9 100644 --- a/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx +++ b/packages/ui/src/features/agent-applications/components/AgentConfigurationPane.tsx @@ -21,6 +21,7 @@ import { WrenchIcon, } from "@phosphor-icons/react"; import type { + AgentRevisionState, AgentSpec, BundleFile, } from "@posthog/shared/agent-platform-types"; @@ -37,6 +38,7 @@ import { useAgentRevisionBundle } from "../hooks/useAgentRevisionBundle"; import { useAgentRevisions } from "../hooks/useAgentRevisions"; import { triggerRequiredSecretsFor } from "../utils/triggerSecrets"; import { AgentDetailEmptyState, AgentDetailLayout } from "./AgentDetailLayout"; +import { AgentModelConfig } from "./AgentModelConfig"; import { AgentRevisionBar } from "./AgentRevisionBar"; import { CopyButton } from "./CopyButton"; import { CronFireButton } from "./CronFireButton"; @@ -62,9 +64,15 @@ const USAGE_HOST = "https://"; interface Ctx { idOrSlug: string; revisionId: string; + /** Application UUID — needed to branch a new draft on save. */ + applicationId?: string; + /** State of the viewed revision — drives draft-only edit vs auto-clone. */ + revisionState?: AgentRevisionState; ingressBaseUrl?: string; setKeys: string[]; onSelect: (node: string) => void; + /** Select a revision in the picker (used to jump to a freshly branched draft). */ + onSelectRevision?: (revisionId: string) => void; onOpenSession?: (sessionId: string) => void; } @@ -398,9 +406,12 @@ export function AgentConfigurationPane({ ? { idOrSlug, revisionId, + applicationId: application?.id, + revisionState: revision?.state, ingressBaseUrl: application?.ingress_base_url ?? undefined, setKeys, onSelect: onSelectNode, + onSelectRevision, onOpenSession, } : null; @@ -460,7 +471,7 @@ export function AgentConfigurationPane({ const SECTION_INFO: Record = { "cfg:model": - "The model every request goes to. `reasoning` sets the extended-thinking budget; limits cap a run's turns, tool calls and wall time.", + "How the agent picks its model. `auto` resolves a level (low/medium/high) to a maintained cross-provider list at runtime; `manual` pins an explicit priority list. `reasoning` sets the extended-thinking budget.", "cfg:instructions": "The agent's entrypoint prompt (agent.md) — the always-on system instructions.", "cfg:triggers": "What can start a session — chat, webhook, mcp, slack, cron.", @@ -614,7 +625,7 @@ function DetailBody({ }) { switch (section) { case "model": - return ; + return ; case "instructions": return ( f.path === path); } -function ModelBody({ spec }: { spec: AgentSpec }) { +function ModelBody({ spec, ctx }: { spec: AgentSpec; ctx: Ctx }) { return ( - - - - {spec.entrypoint ? ( - - ) : null} - + ); } diff --git a/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx new file mode 100644 index 000000000..2d14eb9d5 --- /dev/null +++ b/packages/ui/src/features/agent-applications/components/AgentModelConfig.tsx @@ -0,0 +1,751 @@ +import { PointerSensor } from "@dnd-kit/dom"; +import { type DragDropEvents, DragDropProvider } from "@dnd-kit/react"; +import { useSortable } from "@dnd-kit/react/sortable"; +import { + BrainIcon, + CaretDownIcon, + GaugeIcon, + MagnifyingGlassIcon, + ScalesIcon, + SlidersHorizontalIcon, +} from "@phosphor-icons/react"; +import type { + AgentModelEntry, + AgentModelLevel, + AgentModelOptimizeFor, + AgentModelPolicy, + AgentReasoningEffort, + AgentRevisionState, + AgentSpec, + ModelCatalogEntry, +} from "@posthog/shared/agent-platform-types"; +import { Badge } from "@posthog/ui/primitives/Badge"; +import { Button } from "@posthog/ui/primitives/Button"; +import { Flex, Popover, Text } from "@radix-ui/themes"; +import { type ReactNode, type RefCallback, useMemo, useState } from "react"; +import { useApplyAgentSpec } from "../hooks/useApplyAgentSpec"; +import { useModelCatalog } from "../hooks/useModelCatalog"; + +/** + * The rich model section: an interactive policy editor (mode + level + + * reasoning), a preview of what an `auto` level resolves to, and a searchable + * browser of every served model with its cost profile. Save goes through + * `useApplyAgentSpec`, which PATCHes a draft in place or branches a fresh + * draft from a non-draft revision first. + */ +export function AgentModelConfig({ + spec, + idOrSlug, + applicationId, + revisionId, + revisionState, + onSelectRevision, +}: { + spec: AgentSpec; + idOrSlug: string; + applicationId?: string; + revisionId: string; + revisionState?: AgentRevisionState; + onSelectRevision?: (revisionId: string) => void; +}) { + const { catalog } = useModelCatalog(); + const apply = useApplyAgentSpec(idOrSlug, applicationId); + const initial = spec.models; + + const [mode, setMode] = useState<"auto" | "manual">(initial?.mode ?? "auto"); + const [level, setLevel] = useState( + initial?.mode === "auto" ? (initial.level ?? "medium") : "medium", + ); + const [reasoning, setReasoning] = useState( + initial?.mode === "auto" ? initial.reasoning : spec.reasoning, + ); + const [manual, setManual] = useState( + initial?.mode === "manual" ? initial.models : [], + ); + const [optimizeFor, setOptimizeFor] = useState( + initial?.optimize_for ?? "cost", + ); + + const policy: AgentModelPolicy = + mode === "auto" + ? { + mode: "auto", + level, + optimize_for: optimizeFor, + ...(reasoning ? { reasoning } : {}), + } + : { mode: "manual", models: manual, optimize_for: optimizeFor }; + + const dirty = + stableStringify(policy) !== + stableStringify( + initial ?? { mode: "auto", level: "medium", optimize_for: "cost" }, + ); + const willBranch = revisionState !== "draft"; + + const byId = useMemo( + () => new Map(catalog.models.map((m) => [m.model, m])), + [catalog.models], + ); + + function reset() { + setMode(initial?.mode ?? "auto"); + setLevel(initial?.mode === "auto" ? (initial.level ?? "medium") : "medium"); + setReasoning(initial?.mode === "auto" ? initial.reasoning : spec.reasoning); + setManual(initial?.mode === "manual" ? initial.models : []); + setOptimizeFor(initial?.optimize_for ?? "cost"); + } + + function changeMode(next: "auto" | "manual") { + // Switching to manual with an empty list seeds it from the level you were + // on, so you start from auto's choices and edit rather than a blank slate. + if (next === "manual" && manual.length === 0) { + setManual((catalog.levels[level] ?? []).map((model) => ({ model }))); + } + setMode(next); + } + + function save() { + apply.mutate( + { + revision: { id: revisionId, state: revisionState ?? "draft" }, + spec: { ...spec, models: policy }, + }, + { onSuccess: (rev) => onSelectRevision?.(rev.id) }, + ); + } + + return ( + + {dirty ? ( + + + + {willBranch + ? "Unsaved changes — saving branches a new draft." + : "Unsaved changes."} + + + + + + + {apply.isError ? ( + + {apply.error?.message ?? "Save failed"} + + ) : null} + + ) : null} + + + } + value={optimizeFor} + onChange={(v) => setOptimizeFor(v as AgentModelOptimizeFor)} + options={OPTIMIZE_OPTIONS} + /> + + {mode === "auto" ? ( + <> + } + value={reasoning ?? "default"} + onChange={(v) => + setReasoning( + v === "default" ? undefined : (v as AgentReasoningEffort), + ) + } + options={REASONING_OPTIONS} + /> + + ) : null} + + + {mode === "auto" ? ( + + ) : ( + + )} + + browse all models · {catalog.models.length} + m.model) : []} + onAdd={(id) => + setManual((prev) => + prev.some((m) => m.model === id) ? prev : [...prev, { model: id }], + ) + } + /> + + ); +} + +const MODE_OPTIONS = [ + { + value: "auto", + title: "Auto", + description: "Platform-managed list, resolved across providers at runtime.", + }, + { + value: "manual", + title: "Manual", + description: "Explicit, author-ordered fallback list you pin yourself.", + }, +] as const; + +const OPTIMIZE_OPTIONS = [ + { + value: "cost", + title: "Cost", + description: + "Pin the first working model for the whole session — keeps the prompt cache warm, no mid-session failover.", + }, + { + value: "availability", + title: "Availability", + description: + "Fail over to the next model if the session's model goes down — survives outages, but re-reads context cold.", + }, +] as const; + +const LEVEL_OPTIONS = [ + { + value: "low", + title: "Low", + description: "Cheapest — short, formulaic, no-reasoning jobs.", + }, + { + value: "medium", + title: "Medium", + description: "Balanced default — multi-step but bounded work.", + }, + { + value: "high", + title: "High", + description: "Top-tier — long, branching, reasoning-heavy work.", + }, +] as const; + +const REASONING_OPTIONS = [ + { + value: "default", + title: "Default", + description: "Provider / spec default — no explicit budget.", + }, + { + value: "minimal", + title: "Minimal", + description: "No deliberation — cheapest, fastest.", + }, + { value: "low", title: "Low", description: "Light deliberation." }, + { value: "medium", title: "Medium", description: "Moderate deliberation." }, + { value: "high", title: "High", description: "Deep deliberation." }, + { + value: "xhigh", + title: "Xhigh", + description: "Maximal — research-grade, ~5–10× the per-turn cost.", + }, +] as const; + +const LEVEL_BLURB: Record = { + low: "Cheapest — short, formulaic, no-reasoning jobs (lookups, FAQ bots).", + medium: "Balanced default — multi-step but bounded work.", + high: "Top-tier — long, branching, reasoning-heavy work.", +}; + +function AutoLevelPreview({ + level, + ids, + byId, +}: { + level: AgentModelLevel; + ids: string[]; + byId: Map; +}) { + return ( + + + auto resolves the level to a maintained, + priority-ordered, cross-provider list at runtime — the runner tries each + in order until one answers, so the agent rides upgrades and survives a + provider outage without a spec change. {level}:{" "} + {LEVEL_BLURB[level]} + + {level} resolves to · priority order + {ids.map((id, i) => { + const m = byId.get(id); + return ( + + + + {i === 0 ? "primary" : `#${i + 1}`} + + + {id} + + + {m ? : not in catalog} + + ); + })} + + ); +} + +function ManualEditor({ + models, + byId, + onChange, +}: { + models: AgentModelEntry[]; + byId: Map; + onChange: (next: AgentModelEntry[]) => void; +}) { + const ids = models.map((m) => m.model); + const handleDragOver: DragDropEvents["dragover"] = (event) => { + const sourceId = event.operation.source?.id; + const targetId = event.operation.target?.id; + if (!sourceId || !targetId || sourceId === targetId) return; + const from = ids.indexOf(String(sourceId)); + const to = ids.indexOf(String(targetId)); + if (from === -1 || to === -1 || from === to) return; + const next = [...models]; + const [moved] = next.splice(from, 1); + next.splice(to, 0, moved); + onChange(next); + }; + return ( + + + manual pins an explicit fallback list + (primary first). Add models from the browser below; order them + provider-diverse so a single provider outage degrades instead of + failing. + + models · priority order · drag to reorder + {models.length === 0 ? ( + + No models yet — add one from the browser below. + + ) : ( + + + {models.map((entry, i) => ( + onChange(models.filter((_, k) => k !== i))} + /> + ))} + + + )} + + ); +} + +function SortableModelRow({ + id, + index, + entry, + m, + onRemove, +}: { + id: string; + index: number; + entry: AgentModelEntry; + m?: ModelCatalogEntry; + onRemove: () => void; +}) { + const { ref, handleRef, isDragging } = useSortable({ + id, + index, + group: "manual-models", + transition: { duration: 200, easing: "ease" }, + }); + return ( +
+ + + + + {index === 0 ? "primary" : `#${index + 1}`} + + + {entry.model} + + + + {m ? : null} + + + +
+ ); +} + +type SortKey = "name" | "cheapest" | "priciest"; + +function ModelBrowser({ + models, + canAdd, + selected, + onAdd, +}: { + models: ModelCatalogEntry[]; + canAdd: boolean; + selected: string[]; + onAdd: (id: string) => void; +}) { + const [q, setQ] = useState(""); + const [sort, setSort] = useState("name"); + + const rows = useMemo(() => { + const needle = q.trim().toLowerCase(); + const filtered = needle + ? models.filter( + (m) => + m.model.toLowerCase().includes(needle) || + m.provider.toLowerCase().includes(needle), + ) + : models; + // Blended per-Mtok cost (input + output), not input alone: reasoning + // models can have cheap input but dominant output, so input-only mis-ranks + // exactly the models cost-conscious authors most need to compare. + const blended = (m: ModelCatalogEntry) => m.input + m.output; + const sorted = [...filtered]; + if (sort === "name") sorted.sort((a, b) => a.model.localeCompare(b.model)); + if (sort === "cheapest") sorted.sort((a, b) => blended(a) - blended(b)); + if (sort === "priciest") sorted.sort((a, b) => blended(b) - blended(a)); + return sorted; + }, [models, q, sort]); + + return ( + + +
+ + setQ(e.currentTarget.value)} + placeholder="Search models…" + aria-label="Search models" + className="h-8 w-full rounded-(--radius-2) border border-border bg-(--color-panel-solid) pr-2 pl-8 text-[12.5px]" + /> +
+ setSort(v as SortKey)} + options={[ + { value: "name", label: "Name" }, + { value: "cheapest", label: "Cheapest" }, + { value: "priciest", label: "Priciest" }, + ]} + /> +
+ + + {rows.map((m) => { + const added = selected.includes(m.model); + return ( + + + + {m.model} + + {canAdd ? ( + onAdd(m.model)} + disabled={added} + /> + ) : null} + + + {m.provider} + + + + {m.cacheRead != null ? ( + + ) : null} + + + ); + })} + {rows.length === 0 ? ( + + No models match “{q}”. + + ) : null} + +
+ ); +} + +// --- small presentational helpers --- + +function CostInline({ m }: { m: ModelCatalogEntry }) { + return ( + + in {fmtUsd(m.input)} · out {fmtUsd(m.output)} + /Mtok + + ); +} + +function Stat({ label, value }: { label: string; value: string }) { + return ( + + {label} {value} + + ); +} + +function Select({ + label, + icon, + value, + onChange, + options, +}: { + label: string; + icon?: ReactNode; + value: string; + onChange: (v: string) => void; + options: readonly { value: string; title: string; description: string }[]; +}) { + const [open, setOpen] = useState(false); + const current = options.find((o) => o.value === value) ?? options[0]; + return ( + + + + {icon} + {label} + + + + + + +
    + {options.map((o) => ( +
  • + +
  • + ))} +
+
+
+
+ {current?.description ? ( + + {current.description} + + ) : null} +
+ ); +} + +function Seg({ + value, + onChange, + options, +}: { + value: T; + onChange: (v: T) => void; + options: { value: T; label: string }[]; +}) { + return ( + + {options.map((o) => ( + + ))} + + ); +} + +function MiniBtn({ + label, + title, + onClick, + disabled, +}: { + label: string; + title: string; + onClick: () => void; + disabled?: boolean; +}) { + return ( + + ); +} + +function Subhead({ children }: { children: ReactNode }) { + return ( + + {children} + + ); +} + +function Muted({ children }: { children: ReactNode }) { + return ( + {children} + ); +} + +function fmtUsd(n: number): string { + // Fixed precision so the cost column reads consistently ($1.00, $0.075) + // and survives float noise from the catalog API. + return `$${n.toLocaleString("en-US", { + minimumFractionDigits: 2, + maximumFractionDigits: 4, + })}`; +} + +/** + * Deterministic JSON: recursively sorts object keys so the dirty check + * doesn't fire just because the server serialised `spec.models` with a + * different key order than the locally-built policy. Arrays keep their order. + */ +function stableStringify(value: unknown): string { + return JSON.stringify(value, (_key, val) => + val && typeof val === "object" && !Array.isArray(val) + ? Object.fromEntries( + Object.entries(val).sort(([a], [b]) => a.localeCompare(b)), + ) + : val, + ); +} + +function fmtCtx(n: number): string { + if (n >= 1_000_000) { + return `${(n / 1_000_000).toFixed(1).replace(/\.0$/, "")}M`; + } + return `${Math.round(n / 1000)}K`; +} diff --git a/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx b/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx index 2de536756..a29ea1e11 100644 --- a/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx +++ b/packages/ui/src/features/agent-applications/components/AgentSessionDetailBody.tsx @@ -19,6 +19,9 @@ interface SessionMetrics { messages: number; toolCalls: number; errors: number; + /** Distinct models that answered, in first-seen order. Usually one; more than + * one means the turn(s) fell back across the policy list. */ + models: string[]; } function computeMetrics( @@ -26,12 +29,18 @@ function computeMetrics( ): SessionMetrics { let toolCalls = 0; let errors = 0; + const models: string[] = []; + const seenModels = new Set(); for (const msg of session.conversation) { if (msg.role === "assistant") { for (const part of msg.content) { if (part.type === "toolCall") toolCalls += 1; } if (msg.errorMessage) errors += 1; + if (msg.model && !seenModels.has(msg.model)) { + seenModels.add(msg.model); + models.push(msg.model); + } } else if (msg.role === "toolResult" && msg.isError) { errors += 1; } @@ -40,6 +49,7 @@ function computeMetrics( messages: session.conversation_total_turns ?? session.conversation.length, toolCalls, errors, + models, }; } @@ -134,6 +144,18 @@ export function AgentSessionDetailBody({ label="Tool calls" value={String(metrics.toolCalls)} /> + {metrics.models.length > 0 ? ( + + ) : null} @@ -230,9 +256,10 @@ function MetricItem({ {label} {value} diff --git a/packages/ui/src/features/agent-applications/hooks/agentApplicationsKeys.ts b/packages/ui/src/features/agent-applications/hooks/agentApplicationsKeys.ts index 3ad880a70..86545a7da 100644 --- a/packages/ui/src/features/agent-applications/hooks/agentApplicationsKeys.ts +++ b/packages/ui/src/features/agent-applications/hooks/agentApplicationsKeys.ts @@ -54,6 +54,13 @@ export const agentApplicationsKeys = { ] as const, revisions: (projectId: number | null, idOrSlug: string) => ["agent-applications", "revisions", projectId, idOrSlug] as const, + /** + * Prefix over every single-revision query (any `revisionId`) for one agent. + * Invalidate it to refresh all `revision(...)` caches at once — derive the + * prefix here so it can't drift from the `revision` key it must match. + */ + revisionPrefix: (projectId: number | null, idOrSlug: string) => + ["agent-applications", "revision", projectId, idOrSlug] as const, revision: (projectId: number | null, idOrSlug: string, revisionId: string) => [ "agent-applications", diff --git a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts new file mode 100644 index 000000000..b3b60d544 --- /dev/null +++ b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.test.ts @@ -0,0 +1,156 @@ +import { renderHook } from "@testing-library/react"; +import { beforeEach, describe, expect, it, vi } from "vitest"; + +// Capture the mutationFn and onSuccess the hook hands to react-query so we can +// exercise the create-draft-vs-patch branching and the cache invalidation +// directly, without a live QueryClient. +let mutationFn: (vars: { + revision: { id: string; state: string }; + spec: unknown; +}) => Promise; +let onSuccess: (() => void) | undefined; +let invalidateQueries: ReturnType; + +vi.mock("@tanstack/react-query", () => ({ + useMutation: (opts: { + mutationFn: typeof mutationFn; + onSuccess?: () => void; + }) => { + mutationFn = opts.mutationFn; + onSuccess = opts.onSuccess; + return { mutate: vi.fn() }; + }, + useQueryClient: () => { + invalidateQueries = vi.fn(); + return { invalidateQueries }; + }, +})); + +const client = { + createAgentDraftRevisionFrom: vi.fn(), + updateAgentRevisionSpec: vi.fn(), + transitionAgentRevision: vi.fn(), +}; + +vi.mock("@posthog/ui/features/auth/authClient", () => ({ + useAuthenticatedClient: () => client, +})); +vi.mock("../../auth/store", () => ({ + useAuthStateValue: () => 1, +})); + +import { agentApplicationsKeys } from "./agentApplicationsKeys"; +import { useApplyAgentSpec } from "./useApplyAgentSpec"; + +describe("useApplyAgentSpec", () => { + beforeEach(() => { + client.createAgentDraftRevisionFrom.mockReset(); + client.updateAgentRevisionSpec.mockReset(); + client.transitionAgentRevision.mockReset(); + }); + + it("PATCHes a draft in place — no new draft branched", async () => { + client.updateAgentRevisionSpec.mockResolvedValue({ + id: "d1", + state: "draft", + }); + renderHook(() => useApplyAgentSpec("agent-slug", "app-1")); + const spec = { models: { mode: "auto", level: "high" } }; + + await mutationFn({ revision: { id: "d1", state: "draft" }, spec }); + + expect(client.createAgentDraftRevisionFrom).not.toHaveBeenCalled(); + expect(client.updateAgentRevisionSpec).toHaveBeenCalledWith( + "agent-slug", + "d1", + spec, + ); + }); + + it("clones to a fresh draft then PATCHes it when the source isn't a draft", async () => { + client.createAgentDraftRevisionFrom.mockResolvedValue({ + id: "new-draft", + state: "draft", + }); + client.updateAgentRevisionSpec.mockResolvedValue({ + id: "new-draft", + state: "draft", + }); + renderHook(() => useApplyAgentSpec("agent-slug", "app-1")); + const spec = { models: { mode: "manual", models: [{ model: "x" }] } }; + + await mutationFn({ revision: { id: "live-1", state: "live" }, spec }); + + expect(client.createAgentDraftRevisionFrom).toHaveBeenCalledWith( + "app-1", + "live-1", + ); + expect(client.updateAgentRevisionSpec).toHaveBeenCalledWith( + "agent-slug", + "new-draft", + spec, + ); + }); + + it("throws when a clone is needed but the application id is missing", async () => { + renderHook(() => useApplyAgentSpec("agent-slug", undefined)); + + await expect( + mutationFn({ revision: { id: "live-1", state: "live" }, spec: {} }), + ).rejects.toThrow(/Application/); + expect(client.createAgentDraftRevisionFrom).not.toHaveBeenCalled(); + }); + + it("archives the orphaned draft (and rethrows) when the PATCH fails after a clone", async () => { + client.createAgentDraftRevisionFrom.mockResolvedValue({ + id: "new-draft", + state: "draft", + }); + const patchErr = new Error("spec.models: invalid"); + client.updateAgentRevisionSpec.mockRejectedValue(patchErr); + client.transitionAgentRevision.mockResolvedValue({ id: "new-draft" }); + renderHook(() => useApplyAgentSpec("agent-slug", "app-1")); + + await expect( + mutationFn({ revision: { id: "live-1", state: "live" }, spec: {} }), + ).rejects.toThrow(patchErr); + // The just-cloned, never-landed draft gets archived as cleanup. + expect(client.transitionAgentRevision).toHaveBeenCalledWith( + "agent-slug", + "new-draft", + "archive", + ); + }); + + it("does NOT archive when an in-place draft PATCH fails (nothing was cloned)", async () => { + client.updateAgentRevisionSpec.mockRejectedValue(new Error("boom")); + renderHook(() => useApplyAgentSpec("agent-slug", "app-1")); + + await expect( + mutationFn({ revision: { id: "d1", state: "draft" }, spec: {} }), + ).rejects.toThrow(/boom/); + expect(client.createAgentDraftRevisionFrom).not.toHaveBeenCalled(); + expect(client.transitionAgentRevision).not.toHaveBeenCalled(); + }); + + it("onSuccess invalidates the detail, revisions, and per-revision caches via the shared key factory", () => { + // projectId is mocked to 1 (useAuthStateValue). Asserting against the + // factory rather than literal arrays means these keys can't silently drift + // from the fetch hooks that own the caches — the gap when useMutation is + // mocked away. + renderHook(() => useApplyAgentSpec("agent-slug", "app-1")); + expect(onSuccess).toBeDefined(); + onSuccess?.(); + + const invalidated = invalidateQueries.mock.calls.map((c) => c[0].queryKey); + expect(invalidated).toContainEqual( + agentApplicationsKeys.detail(1, "agent-slug"), + ); + expect(invalidated).toContainEqual( + agentApplicationsKeys.revisions(1, "agent-slug"), + ); + expect(invalidated).toContainEqual( + agentApplicationsKeys.revisionPrefix(1, "agent-slug"), + ); + }); +}); diff --git a/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts new file mode 100644 index 000000000..9f59794c9 --- /dev/null +++ b/packages/ui/src/features/agent-applications/hooks/useApplyAgentSpec.ts @@ -0,0 +1,72 @@ +import type { + AgentRevision, + AgentRevisionState, + AgentSpec, +} from "@posthog/shared/agent-platform-types"; +import { useAuthenticatedClient } from "@posthog/ui/features/auth/authClient"; +import { useMutation, useQueryClient } from "@tanstack/react-query"; +import { useAuthStateValue } from "../../auth/store"; +import { agentApplicationsKeys } from "./agentApplicationsKeys"; + +/** + * Apply a spec change ("create draft and apply changes"): if the target + * revision is already a draft, PATCH its spec in place; otherwise clone it to a + * fresh draft first and PATCH that. Freeze/promote stay separate (the revision + * bar's lifecycle buttons) — this only lands the edit on an editable draft. + * + * Returns the revision the change landed on so the caller can select it (it's a + * new draft whenever the source wasn't a draft). + */ +export function useApplyAgentSpec( + idOrSlug: string, + applicationId: string | undefined, +) { + const client = useAuthenticatedClient(); + const queryClient = useQueryClient(); + const projectId = useAuthStateValue((state) => state.currentProjectId); + + return useMutation< + AgentRevision, + Error, + { revision: { id: string; state: AgentRevisionState }; spec: AgentSpec } + >({ + mutationFn: async ({ revision, spec }) => { + let targetId = revision.id; + const clonedDraft = revision.state !== "draft"; + if (clonedDraft) { + if (!applicationId) { + throw new Error("Application not loaded yet"); + } + const draft = await client.createAgentDraftRevisionFrom( + applicationId, + revision.id, + ); + targetId = draft.id; + } + try { + return await client.updateAgentRevisionSpec(idOrSlug, targetId, spec); + } catch (err) { + // If we cloned a fresh draft and the spec PATCH then failed, that + // draft is an orphan (a copy of the source with no edit landed). + // Archive it best-effort so repeated failed applies don't pile up + // empty drafts; never mask the original error. A pre-existing draft + // passed in by the caller is left untouched. + if (clonedDraft) { + await client + .transitionAgentRevision(idOrSlug, targetId, "archive") + .catch(() => undefined); + } + throw err; + } + }, + onSuccess: () => { + for (const key of [ + agentApplicationsKeys.detail(projectId, idOrSlug), + agentApplicationsKeys.revisions(projectId, idOrSlug), + agentApplicationsKeys.revisionPrefix(projectId, idOrSlug), + ]) { + void queryClient.invalidateQueries({ queryKey: key }); + } + }, + }); +} diff --git a/packages/ui/src/features/agent-applications/hooks/useModelCatalog.ts b/packages/ui/src/features/agent-applications/hooks/useModelCatalog.ts new file mode 100644 index 000000000..c805c4725 --- /dev/null +++ b/packages/ui/src/features/agent-applications/hooks/useModelCatalog.ts @@ -0,0 +1,34 @@ +import type { ModelCatalog } from "@posthog/shared/agent-platform-types"; +import { useAuthenticatedQuery } from "@posthog/ui/hooks/useAuthenticatedQuery"; +import { useAuthStateValue } from "../../auth/store"; + +// Levels rarely change and the auto-level preview needs them even while the +// catalog request is in flight; the authoritative values still come from the +// endpoint. Models are left empty until the fetch resolves. +const FALLBACK: ModelCatalog = { + models: [], + levels: { + low: ["anthropic/claude-haiku-4.5", "openai/gpt-5-mini"], + medium: ["anthropic/claude-sonnet-4.6", "openai/gpt-5"], + high: ["anthropic/claude-opus-4.7", "openai/gpt-5-pro"], + }, +}; + +/** + * The served-model catalog + curated auto-level → model map, from + * `GET …/agent_applications/models/` (which proxies the AI gateway catalog). + * Feeds the model browser and the auto-level preview. Falls back to an empty + * catalog (with the known levels) while loading or if the endpoint is down. + */ +export function useModelCatalog(): { + catalog: ModelCatalog; + isLoading: boolean; +} { + const projectId = useAuthStateValue((state) => state.currentProjectId); + const { data, isLoading } = useAuthenticatedQuery( + ["agent-applications", "model-catalog", projectId], + (client) => client.getAgentModelCatalog(), + { enabled: !!projectId, staleTime: 5 * 60_000 }, + ); + return { catalog: data ?? FALLBACK, isLoading }; +}