From 1c4d804917b8f47c82c0730680ea5cde93b4df89 Mon Sep 17 00:00:00 2001 From: Christopher Tso Date: Mon, 22 Jun 2026 13:40:12 +0200 Subject: [PATCH] refactor: remove private Phoenix adapter package --- .github/workflows/ci.yml | 3 - bun.lock | 137 ------------- ...026-06-11-phoenix-observability-adapter.md | 13 +- ...gentv-eval-authoring-extensibility-plan.md | 19 +- ...21-001-feat-av-quf-results-storage-plan.md | 15 +- .../results-storage-retention-oplog-plan.md | 28 +-- package.json | 11 +- packages/phoenix-adapter/.gitignore | 1 - packages/phoenix-adapter/README.md | 27 --- .../phoenix-adapter/docs/e2e-verification.md | 62 ------ .../phoenix-adapter/docs/support-matrix.md | 35 ---- packages/phoenix-adapter/package.json | 33 --- .../phoenix-adapter/src/agentv/discovery.ts | 39 ---- .../phoenix-adapter/src/agentv/load-spec.ts | 124 ------------ packages/phoenix-adapter/src/agentv/path.ts | 22 -- packages/phoenix-adapter/src/agentv/types.ts | 40 ---- packages/phoenix-adapter/src/cli.ts | 72 ------- .../src/evaluators/deterministic.ts | 189 ------------------ .../src/evaluators/registry.ts | 134 ------------- .../phoenix-adapter/src/evaluators/types.ts | 59 ------ packages/phoenix-adapter/src/index.ts | 13 -- packages/phoenix-adapter/src/otel-backend.ts | 93 --------- .../phoenix-adapter/src/parity/baselines.ts | 27 --- .../phoenix-adapter/src/parity/compare.ts | 74 ------- packages/phoenix-adapter/src/parity/report.ts | 60 ------ packages/phoenix-adapter/src/parity/types.ts | 25 --- .../phoenix-adapter/src/phoenix/datasets.ts | 29 --- packages/phoenix-adapter/src/phoenix/names.ts | 12 -- .../src/phoenix/run-experiment.ts | 182 ----------------- packages/phoenix-adapter/src/phoenix/types.ts | 23 --- packages/phoenix-adapter/src/run/options.ts | 9 - packages/phoenix-adapter/src/run/run-suite.ts | 73 ------- .../test/agentv-normalize.test.ts | 155 -------------- .../test/evaluators/deterministic.test.ts | 112 ----------- .../test/evaluators/registry.test.ts | 69 ------- .../phoenix-adapter/test/otel-backend.test.ts | 70 ------- packages/phoenix-adapter/test/parity.test.ts | 41 ---- .../test/phoenix-datasets.test.ts | 42 ---- packages/phoenix-adapter/tsconfig.json | 10 - packages/phoenix-adapter/tsup.config.ts | 12 -- 40 files changed, 48 insertions(+), 2146 deletions(-) delete mode 100644 packages/phoenix-adapter/.gitignore delete mode 100644 packages/phoenix-adapter/README.md delete mode 100644 packages/phoenix-adapter/docs/e2e-verification.md delete mode 100644 packages/phoenix-adapter/docs/support-matrix.md delete mode 100644 packages/phoenix-adapter/package.json delete mode 100644 packages/phoenix-adapter/src/agentv/discovery.ts delete mode 100644 packages/phoenix-adapter/src/agentv/load-spec.ts delete mode 100644 packages/phoenix-adapter/src/agentv/path.ts delete mode 100644 packages/phoenix-adapter/src/agentv/types.ts delete mode 100644 packages/phoenix-adapter/src/cli.ts delete mode 100644 packages/phoenix-adapter/src/evaluators/deterministic.ts delete mode 100644 packages/phoenix-adapter/src/evaluators/registry.ts delete mode 100644 packages/phoenix-adapter/src/evaluators/types.ts delete mode 100644 packages/phoenix-adapter/src/index.ts delete mode 100644 packages/phoenix-adapter/src/otel-backend.ts delete mode 100644 packages/phoenix-adapter/src/parity/baselines.ts delete mode 100644 packages/phoenix-adapter/src/parity/compare.ts delete mode 100644 packages/phoenix-adapter/src/parity/report.ts delete mode 100644 packages/phoenix-adapter/src/parity/types.ts delete mode 100644 packages/phoenix-adapter/src/phoenix/datasets.ts delete mode 100644 packages/phoenix-adapter/src/phoenix/names.ts delete mode 100644 packages/phoenix-adapter/src/phoenix/run-experiment.ts delete mode 100644 packages/phoenix-adapter/src/phoenix/types.ts delete mode 100644 packages/phoenix-adapter/src/run/options.ts delete mode 100644 packages/phoenix-adapter/src/run/run-suite.ts delete mode 100644 packages/phoenix-adapter/test/agentv-normalize.test.ts delete mode 100644 packages/phoenix-adapter/test/evaluators/deterministic.test.ts delete mode 100644 packages/phoenix-adapter/test/evaluators/registry.test.ts delete mode 100644 packages/phoenix-adapter/test/otel-backend.test.ts delete mode 100644 packages/phoenix-adapter/test/parity.test.ts delete mode 100644 packages/phoenix-adapter/test/phoenix-datasets.test.ts delete mode 100644 packages/phoenix-adapter/tsconfig.json delete mode 100644 packages/phoenix-adapter/tsup.config.ts diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 72e5ffc85..4c82854c1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -143,8 +143,5 @@ jobs: - name: Check evals directories have eval files run: bun scripts/validate-eval-dirs.ts - - name: Run Phoenix adapter dry-run smoke - run: bun run phoenix:assert-smoke - - name: Validate eval schemas run: bun apps/cli/dist/cli.js validate 'examples/features/**/evals/**/*.eval.yaml' 'examples/features/**/*.EVAL.yaml' diff --git a/bun.lock b/bun.lock index 3e197e5be..96e34bb10 100644 --- a/bun.lock +++ b/bun.lock @@ -6,7 +6,6 @@ "name": "@agentv/workspace", "devDependencies": { "@agentv/core": "workspace:*", - "@agentv/phoenix-adapter": "workspace:*", "@agentv/sdk": "workspace:*", "@biomejs/biome": "^1.9.4", "@types/bun": "latest", @@ -119,20 +118,6 @@ "@earendil-works/pi-coding-agent", ], }, - "packages/phoenix-adapter": { - "name": "@agentv/phoenix-adapter", - "version": "4.31.4-next.1", - "dependencies": { - "@agentv/core": "workspace:*", - "@arizeai/phoenix-client": "6.10.0", - "@arizeai/phoenix-evals": "1.0.3", - "yaml": "^2.8.3", - }, - "devDependencies": { - "tsup": "8.3.5", - "typescript": "5.8.3", - }, - }, "packages/sdk": { "name": "@agentv/sdk", "version": "4.42.4", @@ -149,38 +134,14 @@ "@agentv/dashboard": ["@agentv/dashboard@workspace:apps/dashboard"], - "@agentv/phoenix-adapter": ["@agentv/phoenix-adapter@workspace:packages/phoenix-adapter"], - "@agentv/sdk": ["@agentv/sdk@workspace:packages/sdk"], "@agentv/web": ["@agentv/web@workspace:apps/web"], - "@ai-sdk/gateway": ["@ai-sdk/gateway@3.0.122", "", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27", "@vercel/oidc": "3.2.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-U1k2fk7cSH/tS5CZ3ujROiUCOLFwkzb792OqR/Org8Mfm27dKSIdRZG4ZuJUifT8alUWa61IoaRu4foXKlP5TQ=="], - - "@ai-sdk/provider": ["@ai-sdk/provider@3.0.10", "", { "dependencies": { "json-schema": "^0.4.0" } }, "sha512-Q3BZ27qfpYqnCYGvE3vt+Qi6LGOF9R5Nmzn+9JoM1lCRsD9mYaIhfJLkSunN48nfGXJ6n+XNV0J/XVpqGQl7Dw=="], - - "@ai-sdk/provider-utils": ["@ai-sdk/provider-utils@4.0.27", "", { "dependencies": { "@ai-sdk/provider": "3.0.10", "@standard-schema/spec": "^1.1.0", "eventsource-parser": "^3.0.8" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-ubkAJ+xODouwtmN1tYlvTPphH1hPOBfZaEQe8U7skGvFAnIRs9PPpsq57bC2+Ky/MB4yzhd6YOsxTAx9sGpazw=="], - "@anthropic-ai/claude-agent-sdk": ["@anthropic-ai/claude-agent-sdk@0.2.49", "", { "optionalDependencies": { "@img/sharp-darwin-arm64": "^0.34.2", "@img/sharp-darwin-x64": "^0.34.2", "@img/sharp-linux-arm": "^0.34.2", "@img/sharp-linux-arm64": "^0.34.2", "@img/sharp-linux-x64": "^0.34.2", "@img/sharp-linuxmusl-arm64": "^0.34.2", "@img/sharp-linuxmusl-x64": "^0.34.2", "@img/sharp-win32-arm64": "^0.34.2", "@img/sharp-win32-x64": "^0.34.2" }, "peerDependencies": { "zod": "^4.0.0" } }, "sha512-3avi409dwuGkPEETpWa0gyJvRMr3b6LxeuW5/sAPCOtLD9WxH9fYltbA5wZoazxTw5mlbXmjDp7JqO1rlmpaIQ=="], "@anthropic-ai/sdk": ["@anthropic-ai/sdk@0.91.1", "", { "dependencies": { "json-schema-to-ts": "^3.1.1" }, "peerDependencies": { "zod": "^3.25.0 || ^4.0.0" }, "optionalPeers": ["zod"], "bin": { "anthropic-ai-sdk": "bin/cli" } }, "sha512-LAmu761tSN9r66ixvmciswUj/ZC+1Q4iAfpedTfSVLeswRwnY3n2Nb6Tsk+cLPP28aLOPWeMgIuTuCcMC6W/iw=="], - "@arizeai/openinference-core": ["@arizeai/openinference-core@2.2.0", "", { "dependencies": { "@arizeai/openinference-semantic-conventions": "2.5.0", "@opentelemetry/api": "^1.9.0", "@opentelemetry/core": "^1.25.1" } }, "sha512-Ix1u/nphZj1yHqmyIfeBe2AVfnilTwgtvfXemJxc/6F+4JC7Rks6VMlPCfB8NXvMOhop2IveA6EyxYMkv/PH/A=="], - - "@arizeai/openinference-genai": ["@arizeai/openinference-genai@0.1.10", "", { "dependencies": { "@arizeai/openinference-semantic-conventions": "2.5.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.9.0", "@opentelemetry/semantic-conventions": ">=1.37.0" } }, "sha512-BrkTeZm57FXHZ82C50KU79NJfz/jQseW/4sTdz5IvAbZPBnwEbwua/jspckD1b3vdTS+BGEwkXWTx0fkUilb7A=="], - - "@arizeai/openinference-semantic-conventions": ["@arizeai/openinference-semantic-conventions@2.5.0", "", {}, "sha512-4ZeSwiFX3YxB0WSE6x568wM4PVHiYmz3yiOxic6WGKVrE/KIGggMFP/eqUNQhikBKP68IDV0qiILlZAIYnheAQ=="], - - "@arizeai/openinference-vercel": ["@arizeai/openinference-vercel@2.7.7", "", { "dependencies": { "@arizeai/openinference-core": "2.2.0", "@arizeai/openinference-genai": "0.1.10", "@arizeai/openinference-semantic-conventions": "2.5.0", "@opentelemetry/core": "^1.30.1" }, "peerDependencies": { "@opentelemetry/api": ">=1.7.0 <2.0.0" } }, "sha512-iShJM8C+R959Ei9sUt8/2BE301W5Px06nYKY+7tCMbm7M0MjRPibrjLyZi6n+Hnd8U3zBE4vYXzAIdE0Z6ZTiQ=="], - - "@arizeai/phoenix-client": ["@arizeai/phoenix-client@6.10.0", "", { "dependencies": { "@arizeai/openinference-semantic-conventions": "^2.1.7", "@arizeai/openinference-vercel": "^2.7.0", "@arizeai/phoenix-config": "0.1.4", "@arizeai/phoenix-otel": "1.0.2", "async": "^3.2.6", "openapi-fetch": "^0.17.0", "tiny-invariant": "^1.3.3", "zod": "^4.0.14" }, "peerDependencies": { "@anthropic-ai/sdk": "^0.35.0", "ai": "^6.0.90", "openai": "^6.10.0" }, "optionalPeers": ["@anthropic-ai/sdk", "ai", "openai"] }, "sha512-rKvvHyhBGT5Tksckt3VhDGdPiRoaG/GCgzS64IVCPLHLdQhEHoRd88MJyud8tHgwwQ4/XyJ/4cT43z26CNCPVg=="], - - "@arizeai/phoenix-config": ["@arizeai/phoenix-config@0.1.4", "", {}, "sha512-GBgPCQWW2GIHqsV067Uqc2YLCapQTHWX2wuYQYILAos6m39+sDX4hunP4qUqBixbF8tR5zdybCO8iGRV+tEcBg=="], - - "@arizeai/phoenix-evals": ["@arizeai/phoenix-evals@1.0.3", "", { "dependencies": { "@arizeai/openinference-core": "^2.0.0", "@opentelemetry/api": "^1.9.0", "ai": "^6.0.90", "jsonpath-plus": "^10.3.0", "mustache": "^4.2.0", "zod": "^4.0.14" } }, "sha512-D4u8UVhbWkPXgtUZVlTcBdlLyowPt7yIjebqDxtglnoBnKx6u3pM+8veC3AROQCc29h1HiKzyiMFwqmQMFFNLg=="], - - "@arizeai/phoenix-otel": ["@arizeai/phoenix-otel@1.0.2", "", { "dependencies": { "@arizeai/openinference-core": "^2.0.7", "@arizeai/openinference-semantic-conventions": "^2.1.7", "@arizeai/openinference-vercel": "^2.7.0", "@opentelemetry/api": "^1.9.0", "@opentelemetry/context-async-hooks": "^2.5.1", "@opentelemetry/core": "^1.25.1", "@opentelemetry/exporter-trace-otlp-proto": "^0.205.0", "@opentelemetry/instrumentation": "^0.57.2", "@opentelemetry/resources": "^2.0.0", "@opentelemetry/sdk-trace-base": "^2.5.1", "@opentelemetry/sdk-trace-node": "^2.5.1" } }, "sha512-lYUQN1buHJM+ZGSO9uEKuoihiOqjYcOIxfm1IzqyZ4Fk6o6YKGyEKYt6fgwzoaWr82x+mn9oPUFay7Ff8m0gAw=="], - "@astrojs/compiler": ["@astrojs/compiler@2.13.0", "", {}, "sha512-mqVORhUJViA28fwHYaWmsXSzLO9osbdZ5ImUfxBarqsYdMlPbqAqGJCxsNzvppp1BEzc1mJNjOVvQqeDN8Vspw=="], "@astrojs/internal-helpers": ["@astrojs/internal-helpers@0.7.5", "", {}, "sha512-vreGnYSSKhAjFJCWAwe/CNhONvoc5lokxtRoZims+0wa3KbHBdPHSSthJsKxPd8d/aic6lWKpRTYGY/hsgK6EA=="], @@ -511,10 +472,6 @@ "@jridgewell/trace-mapping": ["@jridgewell/trace-mapping@0.3.31", "", { "dependencies": { "@jridgewell/resolve-uri": "^3.1.0", "@jridgewell/sourcemap-codec": "^1.4.14" } }, "sha512-zzNR+SdQSDJzc8joaeP8QQoCQr8NuYx2dIIytl1QeBEZHJ9uW6hebsrYgbz8hJwUQao3TWCMtmfV8Nu1twOLAw=="], - "@jsep-plugin/assignment": ["@jsep-plugin/assignment@1.3.0", "", { "peerDependencies": { "jsep": "^0.4.0||^1.0.0" } }, "sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ=="], - - "@jsep-plugin/regex": ["@jsep-plugin/regex@1.0.4", "", { "peerDependencies": { "jsep": "^0.4.0||^1.0.0" } }, "sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg=="], - "@mdx-js/mdx": ["@mdx-js/mdx@3.1.1", "", { "dependencies": { "@types/estree": "^1.0.0", "@types/estree-jsx": "^1.0.0", "@types/hast": "^3.0.0", "@types/mdx": "^2.0.0", "acorn": "^8.0.0", "collapse-white-space": "^2.0.0", "devlop": "^1.0.0", "estree-util-is-identifier-name": "^3.0.0", "estree-util-scope": "^1.0.0", "estree-walker": "^3.0.0", "hast-util-to-jsx-runtime": "^2.0.0", "markdown-extensions": "^2.0.0", "recma-build-jsx": "^1.0.0", "recma-jsx": "^1.0.0", "recma-stringify": "^1.0.0", "rehype-recma": "^1.0.0", "remark-mdx": "^3.0.0", "remark-parse": "^11.0.0", "remark-rehype": "^11.0.0", "source-map": "^0.7.0", "unified": "^11.0.0", "unist-util-position-from-estree": "^2.0.0", "unist-util-stringify-position": "^4.0.0", "unist-util-visit": "^5.0.0", "vfile": "^6.0.0" } }, "sha512-f6ZO2ifpwAQIpzGWaBQT2TXxPv6z3RBzQKpVftEWN78Vl/YweF1uwussDx8ECAXVtr3Rs89fKyG9YlzUs9DyGQ=="], "@mistralai/mistralai": ["@mistralai/mistralai@2.2.1", "", { "dependencies": { "ws": "^8.18.0", "zod": "^3.25.0 || ^4.0.0", "zod-to-json-schema": "^3.25.0" } }, "sha512-uKU8CZmL2RzYKmplsU01hii4p3pe4HqJefpWNRWXm1Tcm0Sm4xXfwSLIy4k7ZCPlbETCGcp69E7hZs+WOJ5itQ=="], @@ -557,10 +514,6 @@ "@opentelemetry/exporter-trace-otlp-http": ["@opentelemetry/exporter-trace-otlp-http@0.212.0", "", { "dependencies": { "@opentelemetry/core": "2.5.1", "@opentelemetry/otlp-exporter-base": "0.212.0", "@opentelemetry/otlp-transformer": "0.212.0", "@opentelemetry/resources": "2.5.1", "@opentelemetry/sdk-trace-base": "2.5.1" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-v/0wMozNoiEPRolzC4YoPo4rAT0q8r7aqdnRw3Nu7IDN0CGFzNQazkfAlBJ6N5y0FYJkban7Aw5WnN73//6YlA=="], - "@opentelemetry/exporter-trace-otlp-proto": ["@opentelemetry/exporter-trace-otlp-proto@0.205.0", "", { "dependencies": { "@opentelemetry/core": "2.1.0", "@opentelemetry/otlp-exporter-base": "0.205.0", "@opentelemetry/otlp-transformer": "0.205.0", "@opentelemetry/resources": "2.1.0", "@opentelemetry/sdk-trace-base": "2.1.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-bGtFzqiENO2GpJk988mOBMe0MfeNpTQjbLm/LBijas6VRyEDQarUzdBHpFlu89A25k1+BCntdWGsWTa9Ai4FyA=="], - - "@opentelemetry/instrumentation": ["@opentelemetry/instrumentation@0.57.2", "", { "dependencies": { "@opentelemetry/api-logs": "0.57.2", "@types/shimmer": "^1.2.0", "import-in-the-middle": "^1.8.1", "require-in-the-middle": "^7.1.1", "semver": "^7.5.2", "shimmer": "^1.2.1" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-BdBGhQBh8IjZ2oIIX6F2/Q3LKm/FDDKi6ccYKcBTeilh6SNdNKveDOLk73BkSJjQLJk6qe4Yh+hHw1UPhCDdrg=="], - "@opentelemetry/otlp-exporter-base": ["@opentelemetry/otlp-exporter-base@0.212.0", "", { "dependencies": { "@opentelemetry/core": "2.5.1", "@opentelemetry/otlp-transformer": "0.212.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-HoMv5pQlzbuxiMS0hN7oiUtg8RsJR5T7EhZccumIWxYfNo/f4wFc7LPDfFK6oHdG2JF/+qTocfqIHoom+7kLpw=="], "@opentelemetry/otlp-transformer": ["@opentelemetry/otlp-transformer@0.212.0", "", { "dependencies": { "@opentelemetry/api-logs": "0.212.0", "@opentelemetry/core": "2.5.1", "@opentelemetry/resources": "2.5.1", "@opentelemetry/sdk-logs": "0.212.0", "@opentelemetry/sdk-metrics": "2.5.1", "@opentelemetry/sdk-trace-base": "2.5.1", "protobufjs": "8.0.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-bj7zYFOg6Db7NUwsRZQ/WoVXpAf41WY2gsd3kShSfdpZQDRKHWJiRZIg7A8HvWsf97wb05rMFzPbmSHyjEl9tw=="], @@ -889,8 +842,6 @@ "@types/semver": ["@types/semver@7.7.1", "", {}, "sha512-FmgJfu+MOcQ370SD0ev7EI8TlCAfKYU+B4m5T3yXc1CiRN94g/SZPtsCkk506aUDtlMnFZvasDwHHUcZUEaYuA=="], - "@types/shimmer": ["@types/shimmer@1.2.0", "", {}, "sha512-UE7oxhQLLd9gub6JKIAhDq06T0F6FnztwMNRvYgjeQSBeMc1ZG/tA47EwfduvkuQS8apbkM/lpLpWsaCeYsXVg=="], - "@types/trusted-types": ["@types/trusted-types@2.0.7", "", {}, "sha512-ScaPdn1dQczgbl0QFTeTOmVHFULt394XJgOQNoyVhZ6r2vLnMLJfBPd53SB52T/3G36VI1/g2MZaX0cwDuXsfw=="], "@types/unist": ["@types/unist@3.0.3", "", {}, "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="], @@ -899,22 +850,16 @@ "@ungap/structured-clone": ["@ungap/structured-clone@1.3.0", "", {}, "sha512-WmoN8qaIAo7WTYWbAZuG8PYEhn5fkz7dZrqTBZ7dtt//lL2Gwms1IcnQ5yHqjDfX8Ft5j4YzDM23f87zBfDe9g=="], - "@vercel/oidc": ["@vercel/oidc@3.2.0", "", {}, "sha512-UycprH3T6n3jH0k44NHMa7pnFHGu/N05MjojYr+Mc6I7obkoLIJujSWwin1pCvdy/eOxrI/l3uDLQsmcrOb4ug=="], - "@vitejs/plugin-react": ["@vitejs/plugin-react@4.7.0", "", { "dependencies": { "@babel/core": "^7.28.0", "@babel/plugin-transform-react-jsx-self": "^7.27.1", "@babel/plugin-transform-react-jsx-source": "^7.27.1", "@rolldown/pluginutils": "1.0.0-beta.27", "@types/babel__core": "^7.20.5", "react-refresh": "^0.17.0" }, "peerDependencies": { "vite": "^4.2.0 || ^5.0.0 || ^6.0.0 || ^7.0.0" } }, "sha512-gUu9hwfWvvEDBBmgtAowQCojwZmJ5mcLn3aufeCsitijs3+f2NsrPtlAWIR6OPiqljl96GVCUbLe0HyqIpVaoA=="], "acorn": ["acorn@8.15.0", "", { "bin": { "acorn": "bin/acorn" } }, "sha512-NZyJarBfL7nWwIq+FDL6Zp/yHEhePMNnnJ0y3qfieCrmNvYct8uvtiV41UvlSe6apAfk0fY1FbWx+NwfmpvtTg=="], - "acorn-import-attributes": ["acorn-import-attributes@1.9.5", "", { "peerDependencies": { "acorn": "^8" } }, "sha512-n02Vykv5uA3eHGM/Z2dQrcD56kL8TyDb2p1+0P83PClMnC/nc+anbQRhIOWnSq4Ke/KvDPrY3C9hDtC/A3eHnQ=="], - "acorn-jsx": ["acorn-jsx@5.3.2", "", { "peerDependencies": { "acorn": "^6.0.0 || ^7.0.0 || ^8.0.0" } }, "sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ=="], "agent-base": ["agent-base@7.1.4", "", {}, "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ=="], "agentv": ["agentv@workspace:apps/cli"], - "ai": ["ai@6.0.194", "", { "dependencies": { "@ai-sdk/gateway": "3.0.122", "@ai-sdk/provider": "3.0.10", "@ai-sdk/provider-utils": "4.0.27", "@opentelemetry/api": "^1.9.0" }, "peerDependencies": { "zod": "^3.25.76 || ^4.1.8" } }, "sha512-0MkYqrSZZuC1zTECppcaUT0i54aocXpYaUMVue3V8z/weBHCytfO5/CcwZCU80msZpfkbBUKYSSrkZFotEO5wQ=="], - "ansi-align": ["ansi-align@3.0.1", "", { "dependencies": { "string-width": "^4.1.0" } }, "sha512-IOfwwBF5iczOjp/WeY4YxyjqAFMQoZufdQWDd19SEExbVLNXqvpzSJ/M7Za4/sCPmQ0+GRquoA7bGcINcxew6w=="], "ansi-regex": ["ansi-regex@6.2.2", "", {}, "sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg=="], @@ -943,8 +888,6 @@ "astro-expressive-code": ["astro-expressive-code@0.41.6", "", { "dependencies": { "rehype-expressive-code": "^0.41.6" }, "peerDependencies": { "astro": "^4.0.0-beta || ^5.0.0-beta || ^3.3.0 || ^6.0.0-beta" } }, "sha512-l47tb1uhmVIebHUkw+HEPtU/av0G4O8Q34g2cbkPvC7/e9ZhANcjUUciKt9Hp6gSVDdIuXBBLwJQn2LkeGMOAw=="], - "async": ["async@3.2.6", "", {}, "sha512-htCUDlxyyCLMgaM3xXg0C0LW2xqfuQ6p05pCEIsXuyQ+a1koYKTuBMzRNwmybfLgvJDMd0r1LTn4+E0Ti6C2AA=="], - "async-mutex": ["async-mutex@0.5.0", "", { "dependencies": { "tslib": "^2.4.0" } }, "sha512-1A94B18jkJ3DYq284ohPxoXbfTA5HsQ7/Mf4DEhcyLx3Bz27Rh59iScbB6EPiP+B+joue6YCxcMXSbFC1tZKwA=="], "axobject-query": ["axobject-query@4.1.0", "", {}, "sha512-qIj0G9wZbMGNLjLmg1PT6v2mE9AH2zlnADJD/2tC6E00hgmhUOfEB6greHPAfLRSufHqROIUTkw6E+M3lH0PTQ=="], @@ -1009,8 +952,6 @@ "ci-info": ["ci-info@4.4.0", "", {}, "sha512-77PSwercCZU2Fc4sX94eF8k8Pxte6JAwL4/ICZLFjJLqegs7kCuAsqqj/70NQF6TvDpgFjkubQB2FW2ZZddvQg=="], - "cjs-module-lexer": ["cjs-module-lexer@1.4.3", "", {}, "sha512-9z8TZaGM1pfswYeXrUpzPrkx8UnWYdhJclsiYMm6x/w5+nN+8Tf/LnAgfLGQCm59qAOxU8WwHEq2vNwF6i4j+Q=="], - "cli-boxes": ["cli-boxes@3.0.0", "", {}, "sha512-/lzGpEWL/8PfI0BmBOPRwp0c/wFNX1RdUML3jK/RcSBA9T8mZDdQpqYBKtCFTOfQbwPqWEOpjqW+Fnayc0969g=="], "cli-width": ["cli-width@4.1.0", "", {}, "sha512-ouuZd4/dm2Sw5Gmqy6bGyNNNe1qt9RpmxveLSO7KcgsTnU7RXfsw+/bukWGo1abgBiMAic068rclZsO4IWmmxQ=="], @@ -1131,8 +1072,6 @@ "entities": ["entities@6.0.1", "", {}, "sha512-aN97NXWF6AWBTahfVOIrB/NShkzi5H7F9r1s9mD3cDj4Ko5f2qhhVoYMibXF7GlLveb/D2ioWay8lxI97Ven3g=="], - "es-errors": ["es-errors@1.3.0", "", {}, "sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw=="], - "es-module-lexer": ["es-module-lexer@1.7.0", "", {}, "sha512-jEQoCwk8hyb2AZziIOLhDqpm5+2ww5uIE6lkO/6jcOCusfk6LhMHpXXfBLXTZ7Ydyt0j4VoUQv6uGNYbdW+kBA=="], "es-toolkit": ["es-toolkit@1.45.1", "", {}, "sha512-/jhoOj/Fx+A+IIyDNOvO3TItGmlMKhtX8ISAHKE90c4b/k1tqaqEZ+uUqfpU8DMnW5cgNJv606zS55jGvza0Xw=="], @@ -1171,8 +1110,6 @@ "eventemitter3": ["eventemitter3@5.0.4", "", {}, "sha512-mlsTRyGaPBjPedk6Bvw+aqbsXDtoAyAzm5MO7JgU+yVRyMQ5O8bD4Kcci7BS85f93veegeCPkL8R4GLClnjLFw=="], - "eventsource-parser": ["eventsource-parser@3.1.0", "", {}, "sha512-kJezFj9YFAMLeORyi7aCLxLbD5/qWMQnoMVlVPyHIll7lgRJCc3JVln9Vgl9nwQi0YkMnhdGTMNn7CkRRAptMg=="], - "execa": ["execa@9.6.1", "", { "dependencies": { "@sindresorhus/merge-streams": "^4.0.0", "cross-spawn": "^7.0.6", "figures": "^6.1.0", "get-stream": "^9.0.0", "human-signals": "^8.0.1", "is-plain-obj": "^4.1.0", "is-stream": "^4.0.1", "npm-run-path": "^6.0.0", "pretty-ms": "^9.2.0", "signal-exit": "^4.1.0", "strip-final-newline": "^4.0.0", "yoctocolors": "^2.1.1" } }, "sha512-9Be3ZoN4LmYR90tUoVu2te2BsbzHfhJyfEiAVfz7N5/zv+jduIfLrV2xdQXOHbaD6KgpGdO9PRPM1Y4Q9QkPkA=="], "expressive-code": ["expressive-code@0.41.6", "", { "dependencies": { "@expressive-code/core": "^0.41.6", "@expressive-code/plugin-frames": "^0.41.6", "@expressive-code/plugin-shiki": "^0.41.6", "@expressive-code/plugin-text-markers": "^0.41.6" } }, "sha512-W/5+IQbrpCIM5KGLjO35wlp1NCwDOOVQb+PAvzEoGkW1xjGM807ZGfBKptNWH6UECvt6qgmLyWolCMYKh7eQmA=="], @@ -1211,8 +1148,6 @@ "fsevents": ["fsevents@2.3.3", "", { "os": "darwin" }, "sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw=="], - "function-bind": ["function-bind@1.1.2", "", {}, "sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA=="], - "gaxios": ["gaxios@7.1.4", "", { "dependencies": { "extend": "^3.0.2", "https-proxy-agent": "^7.0.1", "node-fetch": "^3.3.2" } }, "sha512-bTIgTsM2bWn3XklZISBTQX7ZSddGW+IO3bMdGaemHZ3tbqExMENHLx6kKZ/KlejgrMtj8q7wBItt51yegqalrA=="], "gcp-metadata": ["gcp-metadata@8.1.2", "", { "dependencies": { "gaxios": "^7.0.0", "google-logging-utils": "^1.0.0", "json-bigint": "^1.0.0" } }, "sha512-zV/5HKTfCeKWnxG0Dmrw51hEWFGfcF2xiXqcA3+J90WDuP0SvoiSO5ORvcBsifmx/FoIjgQN3oNOGaQ5PhLFkg=="], @@ -1239,8 +1174,6 @@ "h3": ["h3@1.15.5", "", { "dependencies": { "cookie-es": "^1.2.2", "crossws": "^0.3.5", "defu": "^6.1.4", "destr": "^2.0.5", "iron-webcrypto": "^1.2.1", "node-mock-http": "^1.0.4", "radix3": "^1.1.2", "ufo": "^1.6.3", "uncrypto": "^0.1.3" } }, "sha512-xEyq3rSl+dhGX2Lm0+eFQIAzlDN6Fs0EcC4f7BNUmzaRX/PTzeuM+Tr2lHB8FoXggsQIeXLj8EDVgs5ywxyxmg=="], - "hasown": ["hasown@2.0.4", "", { "dependencies": { "function-bind": "^1.1.2" } }, "sha512-T2UbfbBEF32wiepXIsMlTW9+dDYC6wMh/t/vYA4tuOMKqWz/n3vr1NFSxQiyP+zk2mXsoMA/i/7qV6LKut1t1A=="], - "hast-util-embedded": ["hast-util-embedded@3.0.0", "", { "dependencies": { "@types/hast": "^3.0.0", "hast-util-is-element": "^3.0.0" } }, "sha512-naH8sld4Pe2ep03qqULEtvYr7EjrLK2QHY8KJR6RJkTUjPGObe1vnx585uzem2hGra+s1q08DZZpfgDVYRbaXA=="], "hast-util-format": ["hast-util-format@1.1.0", "", { "dependencies": { "@types/hast": "^3.0.0", "hast-util-embedded": "^3.0.0", "hast-util-minify-whitespace": "^1.0.0", "hast-util-phrasing": "^3.0.0", "hast-util-whitespace": "^3.0.0", "html-whitespace-sensitive-tag-names": "^3.0.0", "unist-util-visit-parents": "^6.0.0" } }, "sha512-yY1UDz6bC9rDvCWHpx12aIBGRG7krurX0p0Fm6pT547LwDIZZiNr8a+IHDogorAdreULSEzP82Nlv5SZkHZcjA=="], @@ -1303,8 +1236,6 @@ "immer": ["immer@10.2.0", "", {}, "sha512-d/+XTN3zfODyjr89gM3mPq1WNX2B8pYsu7eORitdwyA2sBubnTl3laYlBk4sXY5FUa5qTZGBDPJICVbvqzjlbw=="], - "import-in-the-middle": ["import-in-the-middle@1.15.0", "", { "dependencies": { "acorn": "^8.14.0", "acorn-import-attributes": "^1.9.5", "cjs-module-lexer": "^1.2.2", "module-details-from-path": "^1.0.3" } }, "sha512-bpQy+CrsRmYmoPMAE/0G33iwRqwW4ouqdRg8jgbH3aKuCtOc8lxgmYXg2dMM92CRiGP660EtBcymH/eVUpCSaA=="], - "import-meta-resolve": ["import-meta-resolve@4.2.0", "", {}, "sha512-Iqv2fzaTQN28s/FwZAoFq0ZSs/7hMAHJVX+w8PZl3cY19Pxk6jFFalxQoIfW2826i/fDLXv8IiEZRIT0lDuWcg=="], "inline-style-parser": ["inline-style-parser@0.2.7", "", {}, "sha512-Nb2ctOyNR8DqQoR0OwRG95uNWIC0C1lCgf5Naz5H6Ji72KZ8OcFZLz2P5sNgwlyoJ8Yif11oMuYs5pBQa86csA=="], @@ -1321,8 +1252,6 @@ "is-binary-path": ["is-binary-path@2.1.0", "", { "dependencies": { "binary-extensions": "^2.0.0" } }, "sha512-ZMERYes6pDydyuGidse7OsHxtbI7WVeUEozgR/g7rd0xUimYNlvZRE/K2MgZTjWy725IfelLeVcEM97mmtRGXw=="], - "is-core-module": ["is-core-module@2.16.2", "", { "dependencies": { "hasown": "^2.0.3" } }, "sha512-evOr8xfXKxE6qSR0hSXL2r3sd7ALj8+7jQEUvPYcm5sgZFdJ+AYzT6yNmJenvIYQBgIGwfwz08sL8zoL7yq2BA=="], - "is-decimal": ["is-decimal@2.0.1", "", {}, "sha512-AAB9hiomQs5DXWcRB1rqsxGUstbRroFOPPVAomNk/3XHR5JyEZChOyTWe2oayKnsSsr/kcGqF+z6yuH6HHpN0A=="], "is-docker": ["is-docker@3.0.0", "", { "bin": { "is-docker": "cli.js" } }, "sha512-eljcgEDlEns/7AXFosB5K/2nCM4P7FQPkGc/DWLy5rmFEWvZayGrik1d9/QIY5nJ4f9YsVvBkA6kJpHn9rISdQ=="], @@ -1359,20 +1288,14 @@ "js-yaml": ["js-yaml@4.1.1", "", { "dependencies": { "argparse": "^2.0.1" }, "bin": { "js-yaml": "bin/js-yaml.js" } }, "sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA=="], - "jsep": ["jsep@1.4.0", "", {}, "sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw=="], - "jsesc": ["jsesc@3.1.0", "", { "bin": { "jsesc": "bin/jsesc" } }, "sha512-/sM3dO2FOzXjKQhJuo0Q173wf2KOo8t4I8vHy6lF9poUp7bKT0/NHE8fPX23PwfhnykfqnC2xRxOnVw5XuGIaA=="], "json-bigint": ["json-bigint@1.0.0", "", { "dependencies": { "bignumber.js": "^9.0.0" } }, "sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ=="], - "json-schema": ["json-schema@0.4.0", "", {}, "sha512-es94M3nTIfsEPisRafak+HDLfHXnKBhV3vU5eqPcS3flIWqcxJWgXHXiey3YrpaNsanY5ei1VoYEbOzijuq9BA=="], - "json-schema-to-ts": ["json-schema-to-ts@3.1.1", "", { "dependencies": { "@babel/runtime": "^7.18.3", "ts-algebra": "^2.0.0" } }, "sha512-+DWg8jCJG2TEnpy7kOm/7/AxaYoaRbjVB4LFZLySZlWn8exGs3A4OLJR966cVvU26N7X9TWxl+Jsw7dzAqKT6g=="], "json5": ["json5@2.2.3", "", { "bin": { "json5": "lib/cli.js" } }, "sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg=="], - "jsonpath-plus": ["jsonpath-plus@10.4.0", "", { "dependencies": { "@jsep-plugin/assignment": "^1.3.0", "@jsep-plugin/regex": "^1.0.4", "jsep": "^1.4.0" }, "bin": { "jsonpath": "bin/jsonpath-cli.js", "jsonpath-plus": "bin/jsonpath-cli.js" } }, "sha512-T92WWatJXmhBbKsgH/0hl+jxjdXrifi5IKeMY02DWggRxX0UElcbVzPlmgLTbvsPeW1PasQ6xE2Q75stkhGbsA=="], - "jwa": ["jwa@2.0.1", "", { "dependencies": { "buffer-equal-constant-time": "^1.0.1", "ecdsa-sig-formatter": "1.0.11", "safe-buffer": "^5.0.1" } }, "sha512-hRF04fqJIP8Abbkq5NKGN0Bbr3JxlQ+qhZufXVr0DvujKy93ZCbXZMHDL4EOtodSbCWxOqR8MS1tXA5hwqCXDg=="], "jws": ["jws@4.0.1", "", { "dependencies": { "jwa": "^2.0.1", "safe-buffer": "^5.0.1" } }, "sha512-EKI/M/yqPncGUUh44xz0PxSidXFr/+r0pA70+gIYhjv+et7yxM+s29Y+VGDkovRofQem0fs7Uvf4+YmAdyRduA=="], @@ -1543,16 +1466,12 @@ "micromatch": ["micromatch@4.0.8", "", { "dependencies": { "braces": "^3.0.3", "picomatch": "^2.3.1" } }, "sha512-PXwfBhYu0hBCPw8Dn0E+WDYb7af3dSLVWKi3HGv84IdF4TyFoC0ysxFd0Goxw7nSv4T/PzEJQxsYsEiFCKo2BA=="], - "module-details-from-path": ["module-details-from-path@1.0.4", "", {}, "sha512-EGWKgxALGMgzvxYF1UyGTy0HXX/2vHLkw6+NvDKW2jypWbHpjQuj4UMcqQWXHERJhVGKikolT06G3bcKe4fi7w=="], - "monaco-editor": ["monaco-editor@0.55.1", "", { "dependencies": { "dompurify": "3.2.7", "marked": "14.0.0" } }, "sha512-jz4x+TJNFHwHtwuV9vA9rMujcZRb0CEilTEwG2rRSpe/A7Jdkuj8xPKttCgOh+v/lkHy7HsZ64oj+q3xoAFl9A=="], "mrmime": ["mrmime@2.0.1", "", {}, "sha512-Y3wQdFg2Va6etvQ5I82yUhGdsKrcYox6p7FfL1LbK2J4V01F9TGlepTIhnK24t7koZibmg82KGglhA1XK5IsLQ=="], "ms": ["ms@2.1.3", "", {}, "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA=="], - "mustache": ["mustache@4.2.0", "", { "bin": { "mustache": "bin/mustache" } }, "sha512-71ippSywq5Yb7/tVYyGbkBggbU8H3u5Rz56fH60jGFgr8uHwxs+aSKeqmluIVzM0m0kB7xQjKS6qPfd0b2ZoqQ=="], - "mute-stream": ["mute-stream@3.0.0", "", {}, "sha512-dkEJPVvun4FryqBmZ5KhDo0K9iDXAwn08tMLDinNdRBNPcYEDiWYysLcc6k3mjTMlbP9KyylvRpd4wFtwrT9rw=="], "mz": ["mz@2.7.0", "", { "dependencies": { "any-promise": "^1.0.0", "object-assign": "^4.0.1", "thenify-all": "^1.0.0" } }, "sha512-z81GNO7nnYMEhrGh9LeymoE4+Yr0Wn5McHIZMK5cfQCl+NDX08sCZgUc9/6MHni9IWuFLm1Z3HTCXu2z9fN62Q=="], @@ -1593,10 +1512,6 @@ "openai": ["openai@6.26.0", "", { "peerDependencies": { "ws": "^8.18.0", "zod": "^3.25 || ^4.0" }, "optionalPeers": ["ws", "zod"], "bin": { "openai": "bin/cli" } }, "sha512-zd23dbWTjiJ6sSAX6s0HrCZi41JwTA1bQVs0wLQPZ2/5o2gxOJA5wh7yOAUgwYybfhDXyhwlpeQf7Mlgx8EOCA=="], - "openapi-fetch": ["openapi-fetch@0.17.0", "", { "dependencies": { "openapi-typescript-helpers": "^0.1.0" } }, "sha512-PsbZR1wAPcG91eEthKhN+Zn92FMHxv+/faECIwjXdxfTODGSGegYv0sc1Olz+HYPvKOuoXfp+0pA2XVt2cI0Ig=="], - - "openapi-typescript-helpers": ["openapi-typescript-helpers@0.1.0", "", {}, "sha512-OKTGPthhivLw/fHz6c3OPtg72vi86qaMlqbJuVJ23qOvQ+53uw1n7HdmkJFibloF7QEjDrDkzJiOJuockM/ljw=="], - "p-limit": ["p-limit@6.2.0", "", { "dependencies": { "yocto-queue": "^1.1.1" } }, "sha512-kuUqqHNUqoIWp/c467RI4X6mmyuojY5jGutNU0wVTmEOOfcuwLqyMVoAi9MKi2Ak+5i9+nhmrK4ufZE8069kHA=="], "p-queue": ["p-queue@8.1.1", "", { "dependencies": { "eventemitter3": "^5.0.1", "p-timeout": "^6.1.2" } }, "sha512-aNZ+VfjobsWryoiPnEApGGmf5WmNsCo9xu8dfaYamG5qaLP7ClhLN6NgsFe6SwJ2UbLEBK5dv9x8Mn5+RVhMWQ=="], @@ -1627,8 +1542,6 @@ "path-key": ["path-key@3.1.1", "", {}, "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q=="], - "path-parse": ["path-parse@1.0.7", "", {}, "sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw=="], - "pathe": ["pathe@2.0.3", "", {}, "sha512-WUjGcAqP1gQacoQe+OBJsFA7Ld4DyXuUIjZ5cc75cLHvJ7dtNsTugphxIADwspS+AraAUePCKrSVtPLFj/F88w=="], "piccolore": ["piccolore@0.1.3", "", {}, "sha512-o8bTeDWjE086iwKrROaDf31K0qC/BENdm15/uH9usSC/uZjJOKb2YGiVHfLY4GhwsERiPI1jmwI2XrA7ACOxVw=="], @@ -1731,12 +1644,8 @@ "remark-stringify": ["remark-stringify@11.0.0", "", { "dependencies": { "@types/mdast": "^4.0.0", "mdast-util-to-markdown": "^2.0.0", "unified": "^11.0.0" } }, "sha512-1OSmLd3awB/t8qdoEOMazZkNsfVTeY4fTsgzcQFdXNq8ToTN4ZGwrMnlda4K6smTFKD+GRV6O48i6Z4iKgPPpw=="], - "require-in-the-middle": ["require-in-the-middle@7.5.2", "", { "dependencies": { "debug": "^4.3.5", "module-details-from-path": "^1.0.3", "resolve": "^1.22.8" } }, "sha512-gAZ+kLqBdHarXB64XpAe2VCjB7rIRv+mU8tfRWziHRJ5umKsIHN2tLLv6EtMw7WCdP19S0ERVMldNvxYCHnhSQ=="], - "reselect": ["reselect@5.1.1", "", {}, "sha512-K/BG6eIky/SBpzfHZv/dd+9JBFiS4SWV7FIujVyJRux6e45+73RaUHXLmIR1f7WOMaQ0U1km6qwklRQxpJJY0w=="], - "resolve": ["resolve@1.22.12", "", { "dependencies": { "es-errors": "^1.3.0", "is-core-module": "^2.16.1", "path-parse": "^1.0.7", "supports-preserve-symlinks-flag": "^1.0.0" }, "bin": { "resolve": "bin/resolve" } }, "sha512-TyeJ1zif53BPfHootBGwPRYT1RUt6oGWsaQr8UyZW/eAm9bKoijtvruSDEmZHm92CwS9nj7/fWttqPCgzep8CA=="], - "resolve-from": ["resolve-from@5.0.0", "", {}, "sha512-qYg9KP24dD5qka9J47d0aVky0N+b4fTU89LN9iDnjB5waksiC49rvMB0PrUJQGoTmH50XPiqOvAjDfaijGxYZw=="], "resolve-pkg-maps": ["resolve-pkg-maps@1.0.0", "", {}, "sha512-seS2Tj26TBVOC2NIc2rOe2y2ZO7efxITtLZcGSOnHHNOQ7CkiUBfw0Iw2ck6xkIhPwLhKNLS8BO+hEpngQlqzw=="], @@ -1779,8 +1688,6 @@ "shiki": ["shiki@3.22.0", "", { "dependencies": { "@shikijs/core": "3.22.0", "@shikijs/engine-javascript": "3.22.0", "@shikijs/engine-oniguruma": "3.22.0", "@shikijs/langs": "3.22.0", "@shikijs/themes": "3.22.0", "@shikijs/types": "3.22.0", "@shikijs/vscode-textmate": "^10.0.2", "@types/hast": "^3.0.4" } }, "sha512-LBnhsoYEe0Eou4e1VgJACes+O6S6QC0w71fCSp5Oya79inkwkm15gQ1UF6VtQ8j/taMDh79hAB49WUk8ALQW3g=="], - "shimmer": ["shimmer@1.2.1", "", {}, "sha512-sQTKC1Re/rM6XyFM6fIAGHRPVGvyXfgzIDvzoq608vM+jeyVD0Tu1E6Np0Kc2zAIFWIj963V2800iF/9LPieQw=="], - "signal-exit": ["signal-exit@4.1.0", "", {}, "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw=="], "sisteransi": ["sisteransi@1.0.5", "", {}, "sha512-bLGGlR1QxBcynn2d5YmDX4MGjlZvy2MRBDRNHLJ8VI6l6+9FUiyTFNJ0IveOSP0bcXgVDPRcfGqA0pjaqUpfVg=="], @@ -1821,8 +1728,6 @@ "sucrase": ["sucrase@3.35.1", "", { "dependencies": { "@jridgewell/gen-mapping": "^0.3.2", "commander": "^4.0.0", "lines-and-columns": "^1.1.6", "mz": "^2.7.0", "pirates": "^4.0.1", "tinyglobby": "^0.2.11", "ts-interface-checker": "^0.1.9" }, "bin": { "sucrase": "bin/sucrase", "sucrase-node": "bin/sucrase-node" } }, "sha512-DhuTmvZWux4H1UOnWMB3sk0sbaCVOoQZjv8u1rDoTV0HTdGem9hkAZtl4JZy8P2z4Bg0nT+YMeOFyVr4zcG5Tw=="], - "supports-preserve-symlinks-flag": ["supports-preserve-symlinks-flag@1.0.0", "", {}, "sha512-ot0WnXS9fgdkgIcePe6RHNk1WA8+muPa6cSjeR3V8K27q9BB1rTE3R1p7Hv0z1ZyAc8s6Vvv8DIyWf681MAt0w=="], - "svgo": ["svgo@4.0.0", "", { "dependencies": { "commander": "^11.1.0", "css-select": "^5.1.0", "css-tree": "^3.0.1", "css-what": "^6.1.0", "csso": "^5.0.5", "picocolors": "^1.1.1", "sax": "^1.4.1" }, "bin": "./bin/svgo.js" }, "sha512-VvrHQ+9uniE+Mvx3+C9IEe/lWasXCU0nXMY2kZeLrHNICuRiC8uMPyM14UEaMOFA5mhyQqEkB02VoQ16n3DLaw=="], "tailwindcss": ["tailwindcss@4.2.2", "", {}, "sha512-KWBIxs1Xb6NoLdMVqhbhgwZf2PGBpPEiwOqgI4pFIYbNTfBXiKYyWoTsXgBQ9WFg/OlhnvHaY+AEpW7wSmFo2Q=="], @@ -1973,16 +1878,6 @@ "@anthropic-ai/claude-agent-sdk/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], - "@arizeai/openinference-core/@opentelemetry/core": ["@opentelemetry/core@1.30.1", "", { "dependencies": { "@opentelemetry/semantic-conventions": "1.28.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-OOCM2C/QIURhJMuKaekP3TRBxBKxG/TWWA0TL2J6nXUtDnuCtccy49LUJF8xPFXMX+0LMcxFpCo8M9cGY1W6rQ=="], - - "@arizeai/openinference-vercel/@opentelemetry/core": ["@opentelemetry/core@1.30.1", "", { "dependencies": { "@opentelemetry/semantic-conventions": "1.28.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-OOCM2C/QIURhJMuKaekP3TRBxBKxG/TWWA0TL2J6nXUtDnuCtccy49LUJF8xPFXMX+0LMcxFpCo8M9cGY1W6rQ=="], - - "@arizeai/phoenix-client/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], - - "@arizeai/phoenix-evals/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], - - "@arizeai/phoenix-otel/@opentelemetry/core": ["@opentelemetry/core@1.30.1", "", { "dependencies": { "@opentelemetry/semantic-conventions": "1.28.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-OOCM2C/QIURhJMuKaekP3TRBxBKxG/TWWA0TL2J6nXUtDnuCtccy49LUJF8xPFXMX+0LMcxFpCo8M9cGY1W6rQ=="], - "@astrojs/mdx/source-map": ["source-map@0.7.6", "", {}, "sha512-i5uvt8C3ikiWeNZSVZNWcfZPItFQOsYTUAOkcUPGd8DqDy1uOUikjt5dG+uRlwyvR108Fb9DOd4GvXfT0N2/uQ=="], "@aws-crypto/sha256-browser/@smithy/util-utf8": ["@smithy/util-utf8@2.3.0", "", { "dependencies": { "@smithy/util-buffer-from": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-R8Rdn8Hy72KKcebgLiv8jQcQkXoLMOGGv5uI1/k0l+snqkOzQ1R0ChUBCxWMlBsFMekWjq0wRudIweFs7sKT5A=="], @@ -2017,18 +1912,6 @@ "@mistralai/mistralai/zod": ["zod@4.3.6", "", {}, "sha512-rftlrkhHZOcjDwkGlnUtZZkvaPHCsDATp4pGpuOOMDaTdDDXF91wuVDJoWoPsKX/3YPQ5fHuF3STjcYyKr+Qhg=="], - "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/core": ["@opentelemetry/core@2.1.0", "", { "dependencies": { "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.0.0 <1.10.0" } }, "sha512-RMEtHsxJs/GiHHxYT58IY57UXAQTuUnZVco6ymDEqTNlJKTimM4qPUPVe8InNFyBjhHBEAx4k3Q8LtNayBsbUQ=="], - - "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-exporter-base": ["@opentelemetry/otlp-exporter-base@0.205.0", "", { "dependencies": { "@opentelemetry/core": "2.1.0", "@opentelemetry/otlp-transformer": "0.205.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-2MN0C1IiKyo34M6NZzD6P9Nv9Dfuz3OJ3rkZwzFmF6xzjDfqqCTatc9v1EpNfaP55iDOCLHFyYNCgs61FFgtUQ=="], - - "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer": ["@opentelemetry/otlp-transformer@0.205.0", "", { "dependencies": { "@opentelemetry/api-logs": "0.205.0", "@opentelemetry/core": "2.1.0", "@opentelemetry/resources": "2.1.0", "@opentelemetry/sdk-logs": "0.205.0", "@opentelemetry/sdk-metrics": "2.1.0", "@opentelemetry/sdk-trace-base": "2.1.0", "protobufjs": "^7.3.0" }, "peerDependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-KmObgqPtk9k/XTlWPJHdMbGCylRAmMJNXIRh6VYJmvlRDMfe+DonH41G7eenG8t4FXn3fxOGh14o/WiMRR6vPg=="], - - "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/resources": ["@opentelemetry/resources@2.1.0", "", { "dependencies": { "@opentelemetry/core": "2.1.0", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-1CJjf3LCvoefUOgegxi8h6r4B/wLSzInyhGP2UmIBYNlo4Qk5CZ73e1eEyWmfXvFtm1ybkmfb2DqWvspsYLrWw=="], - - "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/sdk-trace-base": ["@opentelemetry/sdk-trace-base@2.1.0", "", { "dependencies": { "@opentelemetry/core": "2.1.0", "@opentelemetry/resources": "2.1.0", "@opentelemetry/semantic-conventions": "^1.29.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.3.0 <1.10.0" } }, "sha512-uTX9FBlVQm4S2gVQO1sb5qyBLq/FPjbp+tmGoxu4tIgtYGmBYB44+KX/725RFDe30yBSaA9Ml9fqphe1hbUyLQ=="], - - "@opentelemetry/instrumentation/@opentelemetry/api-logs": ["@opentelemetry/api-logs@0.57.2", "", { "dependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-uIX52NnTM0iBh84MShlpouI7UKqkZ7MrUszTmaypHBu4r7NofznSnQRfJ+uUeDtQDj6w8eFGg5KBLDAwAPz1+A=="], - "@reduxjs/toolkit/immer": ["immer@11.1.4", "", {}, "sha512-XREFCPo6ksxVzP4E0ekD5aMdf8WMwmdNaz6vuvxgI40UaEiu6q3p8X52aU6GdyvLY3XXX/8R7JOTXStz/nBbRw=="], "@rollup/pluginutils/estree-walker": ["estree-walker@2.0.2", "", {}, "sha512-Rfkk/Mp/DL7JVje3u18FxFujQlTNR2q6QfMSMB7AvCBx91NGj/ba3kCfza0f6dVDbw7YlRf/nDrn7pQrCCyQ/w=="], @@ -2115,12 +1998,6 @@ "vite/picomatch": ["picomatch@4.0.3", "", {}, "sha512-5gTmgEY/sqK6gFXLIsQNH19lWb4ebPDLA4SdLP7dsWkIXHWlG66oPuVvXSGFPppYZz8ZDZq0dYYrbHfBCVUb1Q=="], - "@arizeai/openinference-core/@opentelemetry/core/@opentelemetry/semantic-conventions": ["@opentelemetry/semantic-conventions@1.28.0", "", {}, "sha512-lp4qAiMTD4sNWW4DbKLBkfiMZ4jbAboJIGOQr5DvciMRI494OapieI9qiODpOt0XBr1LjIDy1xAGAnVs5supTA=="], - - "@arizeai/openinference-vercel/@opentelemetry/core/@opentelemetry/semantic-conventions": ["@opentelemetry/semantic-conventions@1.28.0", "", {}, "sha512-lp4qAiMTD4sNWW4DbKLBkfiMZ4jbAboJIGOQr5DvciMRI494OapieI9qiODpOt0XBr1LjIDy1xAGAnVs5supTA=="], - - "@arizeai/phoenix-otel/@opentelemetry/core/@opentelemetry/semantic-conventions": ["@opentelemetry/semantic-conventions@1.28.0", "", {}, "sha512-lp4qAiMTD4sNWW4DbKLBkfiMZ4jbAboJIGOQr5DvciMRI494OapieI9qiODpOt0XBr1LjIDy1xAGAnVs5supTA=="], - "@aws-crypto/sha256-browser/@smithy/util-utf8/@smithy/util-buffer-from": ["@smithy/util-buffer-from@2.2.0", "", { "dependencies": { "@smithy/is-array-buffer": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA=="], "@aws-crypto/util/@smithy/util-utf8/@smithy/util-buffer-from": ["@smithy/util-buffer-from@2.2.0", "", { "dependencies": { "@smithy/is-array-buffer": "^2.2.0", "tslib": "^2.6.2" } }, "sha512-IJdWBbTcMQ6DA0gdNhh/BwrLkDR+ADW5Kr1aZmd4k3DIF6ezMV4R2NIAmT08wQJ3yUK82thHWmC/TnK/wpMMIA=="], @@ -2131,14 +2008,6 @@ "@google/genai/protobufjs/@protobufjs/utf8": ["@protobufjs/utf8@1.1.1", "", {}, "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg=="], - "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/@opentelemetry/api-logs": ["@opentelemetry/api-logs@0.205.0", "", { "dependencies": { "@opentelemetry/api": "^1.3.0" } }, "sha512-wBlPk1nFB37Hsm+3Qy73yQSobVn28F4isnWIBvKpd5IUH/eat8bwcL02H9yzmHyyPmukeccSl2mbN5sDQZYnPg=="], - - "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/@opentelemetry/sdk-logs": ["@opentelemetry/sdk-logs@0.205.0", "", { "dependencies": { "@opentelemetry/api-logs": "0.205.0", "@opentelemetry/core": "2.1.0", "@opentelemetry/resources": "2.1.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.4.0 <1.10.0" } }, "sha512-nyqhNQ6eEzPWQU60Nc7+A5LIq8fz3UeIzdEVBQYefB4+msJZ2vuVtRuk9KxPMw1uHoHDtYEwkr2Ct0iG29jU8w=="], - - "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/@opentelemetry/sdk-metrics": ["@opentelemetry/sdk-metrics@2.1.0", "", { "dependencies": { "@opentelemetry/core": "2.1.0", "@opentelemetry/resources": "2.1.0" }, "peerDependencies": { "@opentelemetry/api": ">=1.9.0 <1.10.0" } }, "sha512-J9QX459mzqHLL9Y6FZ4wQPRZG4TOpMCyPOh6mkr/humxE1W2S3Bvf4i75yiMW9uyed2Kf5rxmLhTm/UK8vNkAw=="], - - "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/protobufjs": ["protobufjs@7.5.6", "", { "dependencies": { "@protobufjs/aspromise": "^1.1.2", "@protobufjs/base64": "^1.1.2", "@protobufjs/codegen": "^2.0.5", "@protobufjs/eventemitter": "^1.1.0", "@protobufjs/fetch": "^1.1.0", "@protobufjs/float": "^1.0.2", "@protobufjs/inquire": "^1.1.1", "@protobufjs/path": "^1.1.2", "@protobufjs/pool": "^1.1.0", "@protobufjs/utf8": "^1.1.1", "@types/node": ">=13.7.0", "long": "^5.0.0" } }, "sha512-M71sTMB146U3u0di3yup8iM+zv8yPRNQVr1KK4tyBitl3qFvEGucq/rGDRShD2rsJhtN02RJaJ7j5X5hmy8SJg=="], - "@tanstack/router-plugin/chokidar/readdirp": ["readdirp@3.6.0", "", { "dependencies": { "picomatch": "^2.2.1" } }, "sha512-hOS089on8RduqdbhvQ5Z37A0ESjsqz6qnRcffsMU3495FuTdqSm+7bhJ29JvIOsBDEEnan5DPu9t3To9VRlMzA=="], "ansi-align/string-width/emoji-regex": ["emoji-regex@8.0.0", "", {}, "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A=="], @@ -2305,12 +2174,6 @@ "@aws-crypto/util/@smithy/util-utf8/@smithy/util-buffer-from/@smithy/is-array-buffer": ["@smithy/is-array-buffer@2.2.0", "", { "dependencies": { "tslib": "^2.6.2" } }, "sha512-GGP3O9QFD24uGeAXYUjwSTXARoqpZykHadOmA8G5vfJPK0/DC67qa//0qvqrJzL1xc8WQWX7/yc7fwudjPHPhA=="], - "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/protobufjs/@protobufjs/codegen": ["@protobufjs/codegen@2.0.5", "", {}, "sha512-zgXFLzW3Ap33e6d0Wlj4MGIm6Ce8O89n/apUaGNB/jx+hw+ruWEp7EwGUshdLKVRCxZW12fp9r40E1mQrf/34g=="], - - "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/protobufjs/@protobufjs/inquire": ["@protobufjs/inquire@1.1.1", "", {}, "sha512-mnzgDV26ueAvk7rsbt9L7bE0SuAoqyuys/sMMrmVcN5x9VsxpcG3rqAUSgDyLp0UZlmNfIbQ4fHfCtreVBk8Ew=="], - - "@opentelemetry/exporter-trace-otlp-proto/@opentelemetry/otlp-transformer/protobufjs/@protobufjs/utf8": ["@protobufjs/utf8@1.1.1", "", {}, "sha512-oOAWABowe8EAbMyWKM0tYDKi8Yaox52D+HWZhAIJqQXbqe0xI/GV7FhLWqlEKreMkfDjshR5FKgi3mnle0h6Eg=="], - "ansi-align/string-width/strip-ansi/ansi-regex": ["ansi-regex@5.0.1", "", {}, "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ=="], } } diff --git a/docs/adr/2026-06-11-phoenix-observability-adapter.md b/docs/adr/2026-06-11-phoenix-observability-adapter.md index 49012940b..74d35a1da 100644 --- a/docs/adr/2026-06-11-phoenix-observability-adapter.md +++ b/docs/adr/2026-06-11-phoenix-observability-adapter.md @@ -20,7 +20,7 @@ Relevant existing seams already point in this direction: - Provider and grader registries support narrow registration points. - `.agentv/providers/`, `.agentv/assertions/`, and `.agentv/graders/` use convention-based local discovery instead of a broad plugin host. -- Earlier `packages/phoenix-adapter/` experiments kept Phoenix-specific behavior outside core and reported unsupported mappings explicitly. That experiment is not the supported product path for AgentV completed runs or transcripts. +- Earlier Phoenix adapter experiments kept Phoenix-specific behavior outside core and reported unsupported mappings explicitly. Those experiments are not the supported product path for AgentV completed runs or transcripts. - The trace evaluation plan requires generic OTLP/OpenInference mapping without Phoenix-specific assumptions in core. ## Decision @@ -35,7 +35,10 @@ AgentV core should own: - generic OTLP/OpenInference import/export mapping where it is backend-neutral; - small registry/discovery primitives for extension points. -Phoenix integration should live outside core behind an adapter boundary, currently `packages/phoenix-adapter/`. The first implementation does not need package loading or package naming; a local resolver module is enough. The adapter boundary may expose: +Phoenix integration should live outside core behind a narrow local adapter or +resolver boundary when needed. No maintained workspace package currently owns +that boundary. The first implementation does not need package loading or package +naming; a local resolver module is enough. Such a custom boundary may expose: - a Phoenix OTel backend resolver; - Phoenix/OpenInference span-kind mapping; @@ -72,7 +75,9 @@ Registration/discovery should remain boring and local-first. In this ADR, "plugi - keep `execution.otel_backend: ` and `--otel-backend ` as the user-facing selectors; - do not add package names, package auto-installation, a remote marketplace, trust prompts, or a general-purpose plugin host for this need. -The earlier prototype exposed a resolver, for example `phoenixOtelBackend`, so users could opt in from project config or a local `.agentv/otel-backends/phoenix.mjs` file. Treat that as a custom/legacy path, not as the supported AgentV-to-Phoenix product boundary. +The earlier prototype exposed a resolver so users could opt in from project config +or a local `.agentv/otel-backends/phoenix.mjs` file. Treat that as a +custom/legacy path, not as the supported AgentV-to-Phoenix product boundary. ## Migration path for Phoenix @@ -81,7 +86,7 @@ The earlier prototype exposed a resolver, for example `phoenixOtelBackend`, so u - `OTEL_EXPORTER_OTLP_HEADERS` - `--otel-file` for offline OTLP JSON export 2. Add a tiny backend resolver seam only if ergonomic backend names are needed. -3. Implement Phoenix endpoint/header/project routing in the Phoenix adapter boundary, not in core. +3. Keep any custom Phoenix endpoint/header/project routing outside core and outside the supported AgentV artifact path. 4. Keep Phoenix out of Dashboard runtime fetch paths; use safe external links instead. 5. Consider moving existing vendor-specific core presets to the same resolver model later, but do not couple that cleanup to the Phoenix decision unless the implementation already touches the preset registry. diff --git a/docs/plans/2026-06-06-001-agentv-eval-authoring-extensibility-plan.md b/docs/plans/2026-06-06-001-agentv-eval-authoring-extensibility-plan.md index 6baaf7f59..a438bbd6c 100644 --- a/docs/plans/2026-06-06-001-agentv-eval-authoring-extensibility-plan.md +++ b/docs/plans/2026-06-06-001-agentv-eval-authoring-extensibility-plan.md @@ -43,7 +43,7 @@ Use these as design references, not as feature mandates: - Margin and Terminal-Bench: filesystem-native benchmark packaging, conventional task files, setup scripts, scoring scripts, and immutable artifacts. AgentV should document and template this shape instead of adding `workspace`, `oracle`, `variants`, or `expected_artifacts` as broad core fields. - Pi coding agent: skills and extensions separate agent-facing procedural guidance from runtime code. Its docs show skills as portable `SKILL.md` directories with scripts/assets, and extensions as typed runtime hooks. AgentV should copy the progressive-disclosure authoring pattern for eval builders. - Composio Agent Orchestrator: swappable TypeScript plugin interfaces for narrow responsibilities. Its plugin-slot model is useful as a boundary pattern, but AgentV should avoid a general orchestrator plugin host until concrete runtime extension gaps appear. -- Phoenix: official TypeScript packages (`@arizeai/phoenix-client`, `@arizeai/phoenix-evals`, `@arizeai/phoenix-otel`) make it a good private export/conversion target for result and trace integration. +- Phoenix: official TypeScript packages (`@arizeai/phoenix-client`, `@arizeai/phoenix-evals`, `@arizeai/phoenix-otel`) remain useful peer-framework research material, but the 2026-06-20 product boundary supersedes AgentV-to-Phoenix result or trace export. - promptfoo: Node package and JavaScript assertion/provider hooks make it a good private conversion target, especially for YAML matrix configs and JS assertion migration. - Braintrust: TypeScript SDK and `Eval(data, task, scores)` model make it a good private conversion target for dataset/task/score loops, experiment metadata, trial counts, and hosted result upload. @@ -113,7 +113,7 @@ Source-backed findings from the initial code analysis: - promptfoo can mirror simple AgentV rubric examples with `llm-rubric` and script assertions. AgentV `tool-trajectory` is the largest parity gap because promptfoo trace/trajectory assertions depend on promptfoo trace conventions rather than AgentV `Message[].toolCalls`; a custom provider/metadata adapter is required. - Braintrust TypeScript `Eval(name, { data, task, scores })` maps cleanly to AgentV's case/task/score model. The lossy point is that AgentV rich assertion arrays with evidence/verdict/type become Braintrust score metadata unless a deeper adapter is built. - Phoenix TypeScript is split across dataset creation, experiment running, evaluators, and OTel. It is strong for persisted datasets/experiments and traces, but less direct for local YAML wrapping because normal `runExperiment` flow expects a Phoenix dataset/server round trip. -- AgentV already has a Phoenix adapter package, but its support matrix is intentionally narrow and deterministic. Private experiments should use that as evidence, not widen public scope prematurely. +- AgentV previously carried a private Phoenix adapter experiment with an intentionally narrow and deterministic support matrix. Treat that as historical evidence, not a reason to widen public scope. Workspace/container findings from Terminal-Bench, Harbor, and Margin: @@ -139,7 +139,10 @@ Extend the existing `agentv create` scaffolding into reusable templates: - `agentv create eval --template terminal-task` - `agentv create eval --template promptfoo-adapter` - `agentv create eval --template braintrust-export` -- `agentv create eval --template phoenix-export` + +Do not add a Phoenix export template. The later Phoenix read-only correlation +boundary supersedes AgentV-to-Phoenix dataset, experiment, result, or trace +export templates. The first implementation can stay static and local, similar to the current `EVAL_TEMPLATES` object in `apps/cli/src/commands/create/commands.ts`. Do not introduce remote template registries, package installation, trust prompts, or plugin loading yet. @@ -185,7 +188,9 @@ Likely docs locations: Add private examples, not core adapters, for: -- Phoenix: export AgentV results/traces into Phoenix using the TS packages. +- Phoenix: compare peer-framework DX around independently emitted traces and + safe `external_trace` link-out metadata. Do not export AgentV-owned results, + traces, transcripts, datasets, experiments, or indexes into Phoenix. - promptfoo: convert promptfoo-style YAML or JS assertions into ordinary AgentV evals/assertions where feasible. - Braintrust: export AgentV cases/results into Braintrust's TypeScript `Eval(data, task, scores)` shape. @@ -257,7 +262,7 @@ framework-parity/ run-phoenix.ts ``` -This subtree should be clearly marked private/internal and should not be mirrored into public AgentV examples until findings are scrubbed. +This subtree should be clearly marked private/internal and should not be mirrored into public AgentV examples until findings are scrubbed. Any Phoenix files in this historical peer-framework research tree must stay outside the supported AgentV product path and must not become AgentV-to-Phoenix artifact export guidance. Initial reference evals to consider: @@ -390,7 +395,7 @@ For private conversion work: - Which AgentV evals should be mirrored first: one simple text/rubric eval plus one workspace/tool-trajectory eval, or only WTG-relevant prompt evals? - Should promptfoo import/export be a CLI command later, or stay as documented conversion scripts until demand is proven? -- Should Phoenix/Braintrust integrations be examples only, or wrappers that consume AgentV JSONL output? +- Should Braintrust integrations be examples only, or wrappers that consume AgentV JSONL output? Phoenix work is superseded by the read-only external-trace correlation boundary. ## Decision @@ -404,7 +409,7 @@ Proceed as a plan, not a brainstorm, because the product question is now concret - `av-r0s.5.6` - analysis(private): compare peer native ports against AgentV - `av-r0s.5.8` - design(private): minimal AgentV workspace/container primitive - `av-r0s.5.1` - tooling(private): extract promptfoo exporter requirements after hand ports -- `av-r0s.5.2` - tooling(private): prototype Braintrust and Phoenix replay adapters +- `av-r0s.5.2` - tooling(private): prototype Braintrust replay adapters and historical Phoenix peer-framework research only - `av-r0s.5.3` - docs(agentv): decide sanitized promotion path from private parity experiments - `av-r0s.5.4` - closed as superseded by source-specific hand-port beads - `av-w9p` - closed as superseded by `av-r0s.1` diff --git a/docs/plans/2026-06-21-001-feat-av-quf-results-storage-plan.md b/docs/plans/2026-06-21-001-feat-av-quf-results-storage-plan.md index e5dcf346b..8c11522af 100644 --- a/docs/plans/2026-06-21-001-feat-av-quf-results-storage-plan.md +++ b/docs/plans/2026-06-21-001-feat-av-quf-results-storage-plan.md @@ -17,9 +17,11 @@ publication export, an append-only mutable-operation log, and an S3-compatible object-storage tier. The canonical AgentV run artifacts stay `benchmark.json`, `index.jsonl`, per-test -grading/timing files, `outputs/trace.json`, and derived transcript artifacts. GitHub, -Backblaze B2, Phoenix, Hugging Face, and Dashboard are projections, viewers, or storage -backends over those artifacts. +grading/timing files, `outputs/trace.json`, and derived transcript artifacts. +GitHub and Backblaze B2 are storage/publication targets over those artifacts. +Dashboard and Hugging Face are viewers or publication surfaces. Phoenix is only +a link-out viewer when safe `external_trace` metadata points at independently +emitted spans; it is not an AgentV artifact projection or storage backend. --- @@ -58,7 +60,7 @@ without creating another hosted results platform inside AgentV. - Implementing storage backends, S3, oplog, retention, or export code in this bead. - Adding GitHub issues or tracker runtime state. - Creating windowed branches, per-run branches, or a hosted Dashboard replacement. -- Making Phoenix, Hugging Face, B2, or GitHub the canonical results model. +- Making Phoenix canonical, making Phoenix an AgentV artifact projection target, or making Hugging Face, B2, or GitHub the canonical results model. ### Deferred to Follow-Up Work @@ -852,8 +854,9 @@ results: - [ ] The artifact sidecar is called `artifacts`, not `artifact-blobs` or `blob`. - [ ] The plan has no windowed or per-run branches. - [ ] Path sharding is deferred until realistic measurement proves need. -- [ ] AgentV artifacts remain canonical; Dashboard, Hugging Face, Phoenix, B2, and - GitHub are projections/viewers/storage backends. +- [ ] AgentV artifacts remain canonical; Dashboard and Hugging Face are viewers + or publication surfaces, B2 and GitHub are storage/publication targets, and + Phoenix is link-out correlation only when safe external trace metadata exists. - [ ] File/function-level implementation guidance names current result repo, remote, serve, export, artifact-writer, and Dashboard surfaces. - [ ] Test plan covers core, CLI, Dashboard, and docs-facing behavior. diff --git a/docs/plans/results-storage-retention-oplog-plan.md b/docs/plans/results-storage-retention-oplog-plan.md index c5fe77113..711d64ec9 100644 --- a/docs/plans/results-storage-retention-oplog-plan.md +++ b/docs/plans/results-storage-retention-oplog-plan.md @@ -32,7 +32,7 @@ AgentV already has the beginning of a git-native results store. `packages/core/s The next storage beads need one reviewed contract before implementation splits across retention, object storage, publication export, path-sharding assessment, and mutable operations. Without that contract, each bead could accidentally create its own branch layout, backend abstraction, transcript boundary, or dashboard read model. -The product boundary stays unchanged: AgentV remains the repo-native and workspace-native source of truth for run artifacts. Phoenix, object storage, SQLite, and publication exports are adapters, projections, caches, or storage tiers over AgentV artifacts. A `project` holds runs, traces, and experiments; a `benchmark` is a curated eval suite, and the per-run `benchmark.json` artifact keeps that artifact name. +The product boundary stays unchanged: AgentV remains the repo-native and workspace-native source of truth for run artifacts. Object storage, SQLite, and publication exports are storage tiers, caches, or derived projections over AgentV artifacts. Phoenix is link-out correlation only when safe `external_trace` metadata points at independently emitted spans; it is not an AgentV artifact projection or storage tier. A `project` holds runs, traces, and experiments; a `benchmark` is a curated eval suite, and the per-run `benchmark.json` artifact keeps that artifact name. --- @@ -90,7 +90,7 @@ The product boundary stays unchanged: AgentV remains the repo-native and workspa - KTD8. Blob-native mode uses object storage as the store of record, including run manifests and oplog segments. It should share logical pointer and manifest shapes with git-backed modes, but it should not emulate git refs. - KTD9. Backblaze B2 is used through a standard S3-compatible client with endpoint, region, bucket, prefix, and env/config credential sourcing. - KTD10. SQLite is a local rebuildable projection only. av-7uu may consume the storage listing contract, but SQLite must stay deletable and non-canonical. -- KTD11. Phoenix/KVE work remains adapter-owned. Dashboard list routes should consume storage listing or manifest contracts, not duplicate backend storage implementation. +- KTD11. Phoenix/KVE work remains link-out/read-model-only when safe external trace metadata exists. Dashboard list routes should consume storage listing or manifest contracts, not duplicate backend storage implementation. --- @@ -153,7 +153,7 @@ flowchart TB ## 1. Storage Backend Abstraction And Modes -**Decision:** Introduce a narrow results storage abstraction while preserving the existing git code as the `git-native` adapter. The abstraction should cover publish, list, materialize/read run detail, resolve artifact bytes, sync/status, retention hooks, and raw oplog segment IO. It should not make Dashboard, Phoenix, or SQLite own backend-specific storage logic. +**Decision:** Introduce a narrow results storage abstraction while preserving the existing git code as the `git-native` adapter. The abstraction should cover publish, list, materialize/read run detail, resolve artifact bytes, sync/status, retention hooks, and raw oplog segment IO. It should not make Dashboard or SQLite own backend-specific storage logic, and it must not make Phoenix a storage backend or AgentV artifact projection target. **File and function-level implementation plan:** @@ -501,9 +501,9 @@ Object keys should be content-addressed, such as `sha256/` under an AgentV --- -## 8. SQLite Index And Dashboard/Phoenix Boundaries +## 8. SQLite Index, Dashboard, And Phoenix Boundaries -**Decision:** av-7uu may build a local rebuildable SQLite projection over canonical storage listings and manifests, but SQLite is not canonical. av-kve.5/Phoenix KVE work should consume manifest/listing contracts and stay out of storage backend implementation. +**Decision:** av-7uu may build a local rebuildable SQLite projection over canonical storage listings and manifests, but SQLite is not canonical. av-kve.5 Dashboard work should consume manifest/listing contracts and stay out of storage backend implementation. Phoenix work is limited to safe link-out correlation from external trace metadata; it must not consume AgentV storage as a projection/export target. **File and function-level implementation plan:** @@ -514,9 +514,9 @@ Object keys should be content-addressed, such as `sha256/` under an AgentV - av-kve.5 likely files: - `apps/cli/src/commands/results/serve.ts` list/aggregate handlers. - `apps/dashboard/src/components/RunList.tsx`, `AnalyticsTab.tsx`, and related list/compare views only when UI behavior changes. -- Phoenix/KVE coordinator owned files: - - Phoenix/KVE adapter and read-model files remain outside this spec's ownership. - - Any Dashboard trace/session read model stays a projection over AgentV run artifacts. +- Phoenix boundary: + - Phoenix-specific link helpers remain outside this spec's ownership. + - Any Dashboard trace/session read model stays a projection over AgentV run artifacts and may only link out to Phoenix through safe `external_trace` UI URLs. **Contract for av-7uu:** @@ -525,19 +525,19 @@ Object keys should be content-addressed, such as `sha256/` under an AgentV - SQLite must not be pushed as the canonical result store. - SQLite must not parse full transcripts in foreground list paths. -**Contract for av-kve.5 and Phoenix/KVE:** +**Contract for av-kve.5 and Phoenix link-out correlation:** - Dashboard list views should stay on benchmark/index manifests, compact derived exports, storage listing contracts, or the rebuildable SQLite projection. - Detail routes can lazily resolve transcript, trace, and arbitrary artifact payloads. -- Phoenix/KVE should reference AgentV artifact IDs, pointers, and external trace links rather than reimplement storage backends. +- Phoenix link-out code should reference safe external trace links only. It must not reimplement storage backends or export AgentV artifacts into Phoenix. **Acceptance criteria:** - av-7uu can consume this spec without changing canonical storage. - av-kve.5 can improve Dashboard list routes without creating a second backend. -- Phoenix/KVE work remains adapter/projection work and does not alter the AgentV storage contract. +- Phoenix work remains link-out correlation only and does not alter the AgentV storage contract. -**Downstream beads:** av-7uu, av-kve.5, and Phoenix/KVE coordinator. +**Downstream beads:** av-7uu and av-kve.5. --- @@ -551,7 +551,7 @@ Object keys should be content-addressed, such as `sha256/` under an AgentV - **B2-native APIs:** Rejected because Backblaze B2's S3-compatible endpoint lets AgentV use standard S3 clients, MinIO CI, and portable user configuration. - **Vercel Blob as a dependency:** Rejected because it is provider-specific and weaker than the desired content-addressed private bucket model. - **SQLite as canonical storage:** Rejected because it would move AgentV away from portable run artifacts. SQLite is a rebuildable projection only. -- **Dashboard or Phoenix owning storage backends:** Rejected because it duplicates core storage behavior and blurs adapter boundaries. +- **Dashboard or Phoenix owning storage backends:** Rejected because it duplicates core storage behavior and blurs adapter boundaries. Phoenix is additionally excluded from AgentV artifact projection by the read-only correlation boundary. --- @@ -598,5 +598,5 @@ For downstream implementation beads: - The spec defines compact derived publication export for av-kxa without required `eval.txt`. - The spec defines per-actor append-only oplog, add-wins tags, tombstones/restores, materialized state, and watermark semantics for av-8un. - The spec defines Backblaze B2 through S3-compatible API, standard S3 SDK/client, BWS/env/config credentials, content-addressed pointers, checksum/size verification, presigned/lazy reads, real B2 dogfood, and MinIO-compatible CI for av-dsc. -- The spec keeps SQLite non-canonical for av-7uu and keeps Dashboard/Phoenix/KVE on manifest/listing/projection boundaries. +- The spec keeps SQLite non-canonical for av-7uu, keeps Dashboard on manifest/listing/projection boundaries, and keeps Phoenix limited to safe external-trace link-out correlation. - No TypeScript source, CLI implementation, Dashboard implementation, tests, package files, generated artifacts, tracker runtime state, or evidence files are changed by this PR. diff --git a/package.json b/package.json index a87fd2468..09b0dadac 100644 --- a/package.json +++ b/package.json @@ -6,15 +6,15 @@ "packageManager": "bun@1.3.3", "workspaces": ["apps/*", "packages/*"], "scripts": { - "build": "bun --filter @agentv/core build && bun --filter @agentv/sdk build && bun --filter @agentv/phoenix-adapter build && bun --filter @agentv/dashboard build && bun --filter agentv build", + "build": "bun --filter @agentv/core build && bun --filter @agentv/sdk build && bun --filter @agentv/dashboard build && bun --filter agentv build", "verify": "bun run build && bun run typecheck && bun run lint && bun run test", - "typecheck": "bun --filter @agentv/core typecheck && bun --filter @agentv/sdk typecheck && bun --filter @agentv/phoenix-adapter typecheck && bun --filter agentv typecheck", + "typecheck": "bun --filter @agentv/core typecheck && bun --filter @agentv/sdk typecheck && bun --filter agentv typecheck", "typecheck:workspace": "tsc -b tsconfig.build.json", "typecheck:watch": "bun --filter @agentv/core typecheck -- --watch & bun --filter agentv typecheck -- --watch", "lint": "biome check .", "format": "biome format --write .", "fix": "biome check --write .", - "test": "bun --filter @agentv/core test && bun --filter @agentv/sdk test && bun --filter @agentv/phoenix-adapter test && bun --filter agentv test && bun --filter @agentv/dashboard test", + "test": "bun --filter @agentv/core test && bun --filter @agentv/sdk test && bun --filter agentv test && bun --filter @agentv/dashboard test", "test:watch": "bun --filter @agentv/core test:watch & bun --filter agentv test:watch", "agentv": "bun apps/cli/src/cli.ts", "agentv:buildrun": "bun run build && bun apps/cli/dist/cli.js", @@ -26,14 +26,11 @@ "examples:install": "bun scripts/install-examples.ts", "publish": "bun run build && bun scripts/publish.ts", "publish:next": "bun run build && bun scripts/publish.ts next", - "contract-eval": "bun run build && bun scripts/run-contract-eval.ts", - "phoenix:dry-run": "bun --filter @agentv/phoenix-adapter phoenix:dry-run", - "phoenix:assert-smoke": "bun --filter @agentv/phoenix-adapter phoenix:assert-smoke" + "contract-eval": "bun run build && bun scripts/run-contract-eval.ts" }, "devDependencies": { "@agentv/core": "workspace:*", "@agentv/sdk": "workspace:*", - "@agentv/phoenix-adapter": "workspace:*", "@biomejs/biome": "^1.9.4", "@types/bun": "latest", "@types/node": "24.1.0", diff --git a/packages/phoenix-adapter/.gitignore b/packages/phoenix-adapter/.gitignore deleted file mode 100644 index a9a1bd38a..000000000 --- a/packages/phoenix-adapter/.gitignore +++ /dev/null @@ -1 +0,0 @@ -reports/ diff --git a/packages/phoenix-adapter/README.md b/packages/phoenix-adapter/README.md deleted file mode 100644 index 812e4b386..000000000 --- a/packages/phoenix-adapter/README.md +++ /dev/null @@ -1,27 +0,0 @@ -# @agentv/phoenix-adapter - -Internal Phoenix boundary fixtures for AgentV. This package is not the supported -product path for completed AgentV run artifacts. - -After the 2026-06-20 product decision, AgentV does not export or project -completed runs, traces, transcripts, datasets, experiments, or indexes into -Phoenix. AgentV-owned local/Git-backed artifacts and Dashboard remain the -zero-infra inspection path. Phoenix is optional read-only correlation for -external traces that were emitted independently and are referenced through safe -`external_trace` metadata. - -The deterministic YAML-to-Phoenix dataset code in this package is retained as an -internal legacy fixture only. Do not promote it as a public integration path, and -do not make Dashboard or the zero-infra local path depend on it. - -The package also exports `phoenixOtelBackend`, a backend resolver for AgentV's -local `.agentv/otel-backends/phoenix.mjs` hook. It resolves Phoenix collector -endpoint, auth headers, and `PHOENIX_PROJECT_NAME` resource routing outside -`@agentv/core`. This remains outside core and must not be required by Dashboard. - -```bash -bun --filter @agentv/phoenix-adapter phoenix:assert-smoke -bun --filter @agentv/phoenix-adapter phoenix:dry-run -``` - -See `docs/support-matrix.md` for evaluator coverage and `docs/e2e-verification.md` for smoke-test notes. diff --git a/packages/phoenix-adapter/docs/e2e-verification.md b/packages/phoenix-adapter/docs/e2e-verification.md deleted file mode 100644 index f269e9a04..000000000 --- a/packages/phoenix-adapter/docs/e2e-verification.md +++ /dev/null @@ -1,62 +0,0 @@ -# E2E Verification - -This file documents internal legacy fixture checks only. It is not a supported -AgentV-to-Phoenix export path. AgentV does not export or project completed runs, -traces, transcripts, datasets, experiments, or indexes into Phoenix. - -## Dry-Run Conversion - -Dry-run mode discovers AgentV example evals, normalizes cases through -`@agentv/core`, builds legacy Phoenix-shaped payloads in memory, and compares -test IDs against AgentV baselines where present. It does not contact Phoenix and -does not write AgentV artifacts into Phoenix. - -```bash -bun run phoenix:assert-smoke -bun run phoenix:dry-run -``` - -Current filtered smoke result against `examples/features/assert/evals/dataset.eval.yaml`: - -- 1 suite discovered -- 4 tests normalized -- 1 suite passed structural parity -- 0 failed suites - -Current full dry-run result against this AgentV checkout: - -- 97 suites discovered -- 405 tests normalized -- 93 suites passed structural parity -- 4 suites failed baseline/loader parity - -The failing suites are currently source/baseline or source-reference mismatches, not Phoenix conversion crashes: - -- `examples/features/matrix-evaluation/evals/dataset.eval.yaml`: baseline has 5 rows, source has 3 tests. -- `examples/features/prompt-template-sdk/evals/dataset.eval.yaml`: AgentV core skips 2 tests because `../prompts/custom-grader.ts` cannot be resolved from the eval source. -- `examples/features/tool-trajectory-simple/evals/dataset.eval.yaml`: source has 11 tests, baseline has 7 rows. -- `examples/features/weighted-graders/evals/dataset.eval.yaml`: baseline IDs use `evaluator` naming while source IDs use `grader` naming. - -## Live Phoenix Smoke - -Live Phoenix dataset/experiment creation is no longer part of the supported -AgentV product boundary. Do not use live mode as Dashboard verification, a -zero-infra path, or a public integration promise. - -Historical command retained for maintainers investigating the legacy fixture: - -```bash -(cd packages/phoenix-adapter && bun src/cli.ts run \ - --agentv-root ../.. \ - --filter examples/features/assert/evals/dataset.eval.yaml \ - --out reports/live-assert-final.json \ - --namespace agentv-phoenix-e2e-final) -``` - -The source harness was historically verified locally against Phoenix at -`http://localhost:6006`: - -- 4 Phoenix task runs -- 4 Phoenix evaluator runs -- average evaluator score: 1.0 -- experiment ID: `RXhwZXJpbWVudDo2` diff --git a/packages/phoenix-adapter/docs/support-matrix.md b/packages/phoenix-adapter/docs/support-matrix.md deleted file mode 100644 index 64cbc7066..000000000 --- a/packages/phoenix-adapter/docs/support-matrix.md +++ /dev/null @@ -1,35 +0,0 @@ -# Phoenix Adapter Support Matrix - -This is an internal legacy support matrix for the earlier deterministic -YAML-to-Phoenix adapter fixture. It is not the supported AgentV product path: -AgentV does not export or project completed runs, traces, transcripts, datasets, -experiments, or indexes into Phoenix. - -The current supported Phoenix boundary is link-out correlation from safe -`external_trace` metadata when Codex, Arize, or another hook already emitted -spans independently. - -If the legacy fixture is run for internal parity checks, its deterministic -coverage is: - -| AgentV family | Phoenix status | -| --- | --- | -| `contains` | Supported by deterministic adapter | -| `regex` | Supported by deterministic adapter | -| `equals` | Supported by deterministic adapter | -| `is-json` | Supported by deterministic adapter | -| `llm-grader` | Reported as unsupported in first pass | -| `rubrics` | Reported as unsupported in first pass | -| `code-grader` | Reported as unsupported in first pass | -| `composite` | Reported as unsupported in first pass | -| `field-accuracy` | Reported as unsupported in first pass | -| `execution-metrics` | Reported as unsupported in first pass | -| `tool-trajectory` | Reported as unsupported in first pass | -| `cost` | Reported as unsupported in first pass | -| `latency` | Reported as unsupported in first pass | -| `trial-output-consistency` | Reported as unsupported in first pass | -| Other custom families | Reported as unsupported with the family name | - -Unsupported does not block legacy fixture conversion unless -`--fail-on-unsupported` is set. The report keeps unsupported families visible so -parity gaps are explicit. diff --git a/packages/phoenix-adapter/package.json b/packages/phoenix-adapter/package.json deleted file mode 100644 index 87b9eb210..000000000 --- a/packages/phoenix-adapter/package.json +++ /dev/null @@ -1,33 +0,0 @@ -{ - "name": "@agentv/phoenix-adapter", - "version": "4.31.4-next.1", - "description": "Phoenix execution and observability adapter for AgentV eval YAML suites", - "private": true, - "type": "module", - "main": "./dist/index.js", - "types": "./dist/index.d.ts", - "exports": { - ".": { - "types": "./dist/index.d.ts", - "import": "./dist/index.js" - } - }, - "scripts": { - "build": "(cd ../core && bun run build) && tsup", - "typecheck": "(cd ../core && bun run build) && tsc --noEmit", - "test": "(cd ../core && bun run build) && bun test", - "phoenix:dry-run": "bun src/cli.ts run --dry-run --agentv-root ../.. --out reports/dry-run.json", - "phoenix:assert-smoke": "bun src/cli.ts run --dry-run --agentv-root ../.. --filter examples/features/assert/evals/dataset.eval.yaml --out /tmp/agentv-phoenix-assert-smoke.json" - }, - "files": ["dist", "README.md", "docs"], - "dependencies": { - "@agentv/core": "workspace:*", - "@arizeai/phoenix-client": "6.10.0", - "@arizeai/phoenix-evals": "1.0.3", - "yaml": "^2.8.3" - }, - "devDependencies": { - "tsup": "8.3.5", - "typescript": "5.8.3" - } -} diff --git a/packages/phoenix-adapter/src/agentv/discovery.ts b/packages/phoenix-adapter/src/agentv/discovery.ts deleted file mode 100644 index 1fc102d6e..000000000 --- a/packages/phoenix-adapter/src/agentv/discovery.ts +++ /dev/null @@ -1,39 +0,0 @@ -import { readdir } from 'node:fs/promises'; -import path from 'node:path'; -import { relativePosix } from './path.js'; -import type { AgentVSource } from './types.js'; - -const EVAL_FILE_RE = /\.(?:eval|EVAL)\.ya?ml$/; - -async function walk(dir: string, results: string[] = []): Promise { - const entries = await readdir(dir, { withFileTypes: true }); - for (const entry of entries) { - if (entry.name === 'node_modules' || entry.name === '.git') continue; - const fullPath = path.join(dir, entry.name); - if (entry.isDirectory()) { - await walk(fullPath, results); - continue; - } - if (entry.isFile()) results.push(fullPath); - } - return results; -} - -export async function discoverAgentVEvals(agentvRoot: string): Promise { - const examplesRoot = path.join(agentvRoot, 'examples'); - const files = await walk(examplesRoot); - - return files - .filter( - (file) => EVAL_FILE_RE.test(path.basename(file)) || path.basename(file) === 'evals.json', - ) - .map((file): AgentVSource => { - const relativePath = relativePosix(agentvRoot, file); - return { - path: file, - relativePath, - kind: path.basename(file) === 'evals.json' ? 'agent-skills-json' : 'eval-yaml', - }; - }) - .sort((a, b) => a.relativePath.localeCompare(b.relativePath)); -} diff --git a/packages/phoenix-adapter/src/agentv/load-spec.ts b/packages/phoenix-adapter/src/agentv/load-spec.ts deleted file mode 100644 index 5ba1fe7a4..000000000 --- a/packages/phoenix-adapter/src/agentv/load-spec.ts +++ /dev/null @@ -1,124 +0,0 @@ -import { existsSync, readFileSync } from 'node:fs'; -import path from 'node:path'; -import { loadTestSuite } from '@agentv/core'; -import YAML from 'yaml'; -import type { - AgentVMessage, - AgentVSource, - JsonObject, - NormalizedAssertion, - NormalizedCase, - NormalizedSuite, -} from './types.js'; - -function parseStructuredFile(filePath: string): unknown { - const content = readFileSync(filePath, 'utf8'); - if (filePath.endsWith('.json')) return JSON.parse(content); - if (filePath.endsWith('.jsonl')) { - return content - .split('\n') - .map((line) => line.trim()) - .filter(Boolean) - .map((line) => JSON.parse(line)); - } - return YAML.parse(content); -} - -function normalizeAssertion(assertion: unknown, index: number): NormalizedAssertion { - if (typeof assertion === 'string') { - return { type: 'rubrics', source: assertion }; - } - const record = (assertion ?? {}) as JsonObject; - const type = String(record.type ?? record.name ?? `assertion-${index + 1}`); - return { - name: typeof record.name === 'string' ? record.name : undefined, - type, - source: assertion, - }; -} - -function normalizeExpectedOutput(test: { - readonly reference_answer?: string; - readonly expected_output?: unknown; -}): unknown { - const expectedOutput = test.expected_output; - const hasExpectedOutput = Array.isArray(expectedOutput) - ? expectedOutput.length > 0 - : expectedOutput !== undefined; - if (!hasExpectedOutput) return undefined; - return test.reference_answer ?? expectedOutput; -} - -function deriveAgentVRoot(source: AgentVSource): string { - return path.resolve(source.path, ...source.relativePath.split('/').map(() => '..')); -} - -function collectUnsupported( - raw: JsonObject, - suite: Awaited>, -): readonly string[] { - const unsupported: string[] = []; - for (const key of ['workspace', 'before_all', 'after_all', 'matrix']) { - if (raw[key] !== undefined) unsupported.push(key); - } - if (suite.trials !== undefined) unsupported.push('trials'); - if (suite.workspacePath !== undefined) unsupported.push('workspace'); - if ((suite.targets?.length ?? 0) > 0 || (suite.targetRefs?.length ?? 0) > 0) - unsupported.push('matrix'); - return [...new Set(unsupported)]; -} - -/** - * Load an AgentV-authored eval source into the Phoenix adapter's normalized shape. - * - * AgentV eval YAML remains the source of truth: this adapter delegates case expansion, - * external case files, assertion parsing, Agent Skills `evals.json`, interpolation, and - * metadata handling to `@agentv/core`'s loader, then normalizes the result for - * the legacy Phoenix mapping fixture. This is not an AgentV-to-Phoenix completed - * run export path; keep production Phoenix work read-only through external_trace - * correlation. - */ -export async function loadAgentVEvalSuite(source: AgentVSource): Promise { - if (!existsSync(source.path)) { - throw new Error(`AgentV eval source does not exist: ${source.path}`); - } - - const raw = (parseStructuredFile(source.path) ?? {}) as JsonObject; - const loaded = await loadTestSuite(source.path, deriveAgentVRoot(source)); - const suiteName = - raw.skill_name ?? - loaded.tests[0]?.suite ?? - raw.name ?? - path.basename(source.path).replace(/\.ya?ml$/, ''); - - const cases = loaded.tests.map((test, index): NormalizedCase => { - const assertions = (test.assertions ?? []).map((assertion, assertionIndex) => - normalizeAssertion(assertion, assertionIndex), - ); - - return { - id: String(test.id ?? `case-${index + 1}`), - criteria: test.criteria || undefined, - input: test.input as readonly AgentVMessage[], - expectedOutput: normalizeExpectedOutput(test), - assertions, - metadata: { - ...(test.metadata ?? {}), - ...(test.targets ? { targets: test.targets } : {}), - }, - sourcePath: source.relativePath, - }; - }); - - return { - name: String(suiteName), - description: typeof raw.description === 'string' ? raw.description : undefined, - source, - cases, - suiteAssertions: [], - warnings: cases - .filter((testCase) => testCase.input.length === 0) - .map((testCase) => `${source.relativePath}: ${testCase.id} has no input`), - unsupportedFeatures: collectUnsupported(raw, loaded), - }; -} diff --git a/packages/phoenix-adapter/src/agentv/path.ts b/packages/phoenix-adapter/src/agentv/path.ts deleted file mode 100644 index 5d56b0a26..000000000 --- a/packages/phoenix-adapter/src/agentv/path.ts +++ /dev/null @@ -1,22 +0,0 @@ -import { existsSync } from 'node:fs'; -import path from 'node:path'; - -export function resolveAgentVRoot(input?: string): string { - const configured = input ?? process.env.AGENTV_ROOT ?? defaultAgentVRoot(); - return path.resolve(configured); -} - -function defaultAgentVRoot(): string { - for (const candidate of ['../agentv', '../../agentv']) { - if (existsSync(path.resolve(candidate, 'examples'))) return candidate; - } - return '../agentv'; -} - -export function toPosixPath(value: string): string { - return value.split(path.sep).join('/'); -} - -export function relativePosix(from: string, to: string): string { - return toPosixPath(path.relative(from, to)); -} diff --git a/packages/phoenix-adapter/src/agentv/types.ts b/packages/phoenix-adapter/src/agentv/types.ts deleted file mode 100644 index 317704f0c..000000000 --- a/packages/phoenix-adapter/src/agentv/types.ts +++ /dev/null @@ -1,40 +0,0 @@ -export type JsonObject = Record; - -export type AgentVSourceKind = 'eval-yaml' | 'agent-skills-json'; - -export interface AgentVSource { - readonly path: string; - readonly relativePath: string; - readonly kind: AgentVSourceKind; -} - -export interface AgentVMessage { - readonly role: string; - readonly content: unknown; -} - -export interface NormalizedAssertion { - readonly name?: string; - readonly type: string; - readonly source: unknown; -} - -export interface NormalizedCase { - readonly id: string; - readonly criteria?: string; - readonly input: readonly AgentVMessage[]; - readonly expectedOutput?: unknown; - readonly assertions: readonly NormalizedAssertion[]; - readonly metadata: JsonObject; - readonly sourcePath: string; -} - -export interface NormalizedSuite { - readonly name: string; - readonly description?: string; - readonly source: AgentVSource; - readonly cases: readonly NormalizedCase[]; - readonly suiteAssertions: readonly NormalizedAssertion[]; - readonly warnings: readonly string[]; - readonly unsupportedFeatures: readonly string[]; -} diff --git a/packages/phoenix-adapter/src/cli.ts b/packages/phoenix-adapter/src/cli.ts deleted file mode 100644 index c40287b5e..000000000 --- a/packages/phoenix-adapter/src/cli.ts +++ /dev/null @@ -1,72 +0,0 @@ -#!/usr/bin/env bun -import path from 'node:path'; -import { resolveAgentVRoot } from './agentv/path.js'; -import { formatMarkdownReport } from './parity/report.js'; -import type { RunOptions } from './run/options.js'; -import { runSuite } from './run/run-suite.js'; - -function usage(): string { - return `Usage: - bun src/cli.ts run --dry-run [--agentv-root ../agentv] [--filter features/assert] [--eval-file path] [--out reports/dry-run.json] - -Boundary note: - Internal legacy fixture only. AgentV does not export/project completed runs, - traces, transcripts, datasets, experiments, or indexes into Phoenix. - Dashboard does not depend on Phoenix, px, or Phoenix database tables. - -Options: - --agentv-root Source AgentV checkout. Defaults to AGENTV_ROOT or ../agentv. - --eval-file Run one eval source. - --filter Run sources whose repo-relative path contains text. - --dry-run Build and verify a legacy in-memory report without contacting Phoenix. - --out JSON report path. Defaults to reports/phoenix-report.json. - --namespace Legacy Phoenix dataset name prefix for internal fixture runs. - --fail-on-unsupported Treat unsupported features as failures. -`; -} - -function parseArgs(argv: readonly string[]): RunOptions | undefined { - if (argv.length === 0 || argv.includes('--help') || argv.includes('-h')) { - console.log(usage()); - return undefined; - } - - const [command, ...rest] = argv; - if (command !== 'run') { - throw new Error(`Unknown command: ${command}\n\n${usage()}`); - } - - const values = new Map(); - for (let index = 0; index < rest.length; index += 1) { - const arg = rest[index]; - if (!arg.startsWith('--')) continue; - if (arg === '--dry-run' || arg === '--fail-on-unsupported') { - values.set(arg, true); - continue; - } - const value = rest[index + 1]; - if (!value || value.startsWith('--')) throw new Error(`Missing value for ${arg}`); - values.set(arg, value); - index += 1; - } - - const agentvRoot = resolveAgentVRoot(values.get('--agentv-root') as string | undefined); - const evalFile = values.get('--eval-file') as string | undefined; - - return { - agentvRoot, - evalFile: evalFile ? path.resolve(evalFile) : undefined, - filter: values.get('--filter') as string | undefined, - dryRun: values.get('--dry-run') === true, - out: path.resolve((values.get('--out') as string | undefined) ?? 'reports/phoenix-report.json'), - namespace: values.get('--namespace') as string | undefined, - failOnUnsupported: values.get('--fail-on-unsupported') === true, - }; -} - -const options = parseArgs(Bun.argv.slice(2)); -if (options) { - const report = await runSuite(options); - console.log(formatMarkdownReport(report)); - if (report.failedSuites > 0) process.exit(1); -} diff --git a/packages/phoenix-adapter/src/evaluators/deterministic.ts b/packages/phoenix-adapter/src/evaluators/deterministic.ts deleted file mode 100644 index 5b804255a..000000000 --- a/packages/phoenix-adapter/src/evaluators/deterministic.ts +++ /dev/null @@ -1,189 +0,0 @@ -import type { - DeterministicEvaluatorType, - EvaluationContext, - EvaluatorResult, - NormalizedAssertionConfig, -} from './types.js'; - -export function evaluateDeterministicAssertion( - assertion: NormalizedAssertionConfig, - context: EvaluationContext, -): EvaluatorResult { - const type = assertion.type as DeterministicEvaluatorType; - - switch (type) { - case 'contains': - return evaluateContains(assertion, context); - case 'regex': - return evaluateRegex(assertion, context); - case 'equals': - return evaluateEquals(assertion, context); - case 'is-json': - return evaluateIsJson(assertion, context); - default: - return result( - assertion, - false, - `Unsupported deterministic evaluator: ${String(assertion.type)}`, - ); - } -} - -function evaluateContains( - assertion: NormalizedAssertionConfig, - context: EvaluationContext, -): EvaluatorResult { - const needle = assertionValue(assertion); - - if (needle === undefined || needle === null) { - return result(assertion, false, 'contains assertion is missing a value'); - } - - const haystack = stringifyOutput(context.output); - const expected = String(needle); - const caseSensitive = assertion.caseSensitive !== false; - const passed = caseSensitive - ? haystack.includes(expected) - : haystack.toLocaleLowerCase().includes(expected.toLocaleLowerCase()); - - return result( - assertion, - passed, - passed ? `Output contains ${expected}` : `Output does not contain ${expected}`, - ); -} - -function evaluateRegex( - assertion: NormalizedAssertionConfig, - context: EvaluationContext, -): EvaluatorResult { - const pattern = assertion.pattern ?? stringAssertionValue(assertion); - - if (!pattern) { - return result(assertion, false, 'regex assertion is missing a pattern'); - } - - try { - const regex = new RegExp(pattern, assertion.flags); - const passed = regex.test(stringifyOutput(context.output)); - - return result( - assertion, - passed, - passed ? `Output matches /${pattern}/` : `Output does not match /${pattern}/`, - ); - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - - return result(assertion, false, `Invalid regex pattern: ${message}`); - } -} - -function evaluateEquals( - assertion: NormalizedAssertionConfig, - context: EvaluationContext, -): EvaluatorResult { - const expected = hasAssertionValue(assertion) - ? assertionValue(assertion) - : context.expectedOutput; - const passed = stableValue(context.output) === stableValue(expected); - - return result( - assertion, - passed, - passed ? 'Output equals expected value' : 'Output does not equal expected value', - ); -} - -function evaluateIsJson( - assertion: NormalizedAssertionConfig, - context: EvaluationContext, -): EvaluatorResult { - const parsed = parseJsonLike(context.output); - const passed = parsed.ok; - - return result(assertion, passed, passed ? 'Output is valid JSON' : parsed.reason); -} - -function hasAssertionValue(assertion: NormalizedAssertionConfig): boolean { - return ( - 'value' in assertion || - 'expected' in assertion || - 'text' in assertion || - 'substring' in assertion - ); -} - -function assertionValue(assertion: NormalizedAssertionConfig): unknown { - if ('value' in assertion) return assertion.value; - if ('expected' in assertion) return assertion.expected; - if ('text' in assertion) return assertion.text; - if ('substring' in assertion) return assertion.substring; - - return undefined; -} - -function stringAssertionValue(assertion: NormalizedAssertionConfig): string | undefined { - const value = assertionValue(assertion); - - return typeof value === 'string' ? value : undefined; -} - -function stringifyOutput(output: unknown): string { - if (typeof output === 'string') return output; - if (output === undefined || output === null) return ''; - - return JSON.stringify(output); -} - -function stableValue(value: unknown): string { - return JSON.stringify(sortJsonValue(value)); -} - -function sortJsonValue(value: unknown): unknown { - if (Array.isArray(value)) return value.map(sortJsonValue); - - if (value && typeof value === 'object') { - return Object.fromEntries( - Object.entries(value as Record) - .sort(([left], [right]) => left.localeCompare(right)) - .map(([key, entryValue]) => [key, sortJsonValue(entryValue)]), - ); - } - - return value; -} - -function parseJsonLike(value: unknown): { ok: true } | { ok: false; reason: string } { - if (value && typeof value === 'object') return { ok: true }; - - if (typeof value !== 'string') { - return { ok: false, reason: 'Output is not a JSON string or object' }; - } - - try { - JSON.parse(value); - - return { ok: true }; - } catch (error) { - const message = error instanceof Error ? error.message : String(error); - - return { ok: false, reason: `Output is not valid JSON: ${message}` }; - } -} - -function result( - assertion: NormalizedAssertionConfig, - passed: boolean, - explanation: string, -): EvaluatorResult { - return { - name: assertion.name ?? String(assertion.type), - type: assertion.type, - score: passed ? 1 : 0, - passed, - label: passed ? 'pass' : 'fail', - explanation, - metadata: assertion.metadata, - }; -} diff --git a/packages/phoenix-adapter/src/evaluators/registry.ts b/packages/phoenix-adapter/src/evaluators/registry.ts deleted file mode 100644 index c7623f33d..000000000 --- a/packages/phoenix-adapter/src/evaluators/registry.ts +++ /dev/null @@ -1,134 +0,0 @@ -import { evaluateDeterministicAssertion } from './deterministic.js'; -import type { - DeterministicEvaluatorType, - EvaluationContext, - EvaluatorAdapter, - EvaluatorResult, - EvaluatorType, - NormalizedAssertionConfig, - UnsupportedEvaluatorReport, - UnsupportedEvaluatorType, -} from './types.js'; - -export const deterministicEvaluatorTypes = [ - 'contains', - 'regex', - 'equals', - 'is-json', -] as const satisfies readonly DeterministicEvaluatorType[]; - -export const unsupportedEvaluatorTypes = [ - 'llm-grader', - 'rubrics', - 'code-grader', - 'composite', - 'field-accuracy', - 'execution-metrics', - 'tool-trajectory', - 'cost', - 'latency', - 'trial-output-consistency', -] as const satisfies readonly UnsupportedEvaluatorType[]; - -const unsupportedReasons: Record = { - 'llm-grader': 'Model-backed Phoenix judging is not implemented in this first-pass adapter.', - rubrics: - 'Rubric scoring requires a model-backed or rubric-specific adapter that is not implemented yet.', - 'code-grader': - 'Code grader execution is deferred until source-relative sandboxing is implemented.', - composite: - 'Composite evaluator aggregation is deferred until nested evaluator normalization is available.', - 'field-accuracy': - 'Field-level accuracy scoring is deferred until expected output field mapping is implemented.', - 'execution-metrics': - 'Execution metric scoring needs run or trace metric data that is not wired yet.', - 'tool-trajectory': 'Tool trajectory scoring needs trace data that is not wired yet.', - cost: 'Cost scoring needs Phoenix or provider usage metrics that are not wired yet.', - latency: 'Latency scoring needs Phoenix or runner timing metrics that are not wired yet.', - 'trial-output-consistency': - 'Trial consistency scoring needs multiple trial outputs that are not wired yet.', -}; - -export function createEvaluatorAdapter(assertion: NormalizedAssertionConfig): EvaluatorAdapter { - const type = assertion.type; - const name = assertion.name ?? String(type); - - if (isDeterministicEvaluatorType(type)) { - return { - type, - name, - supported: true, - evaluate: (context) => evaluateDeterministicAssertion(assertion, context), - }; - } - - return { - type, - name, - supported: false, - evaluate: () => unsupportedResult(assertion), - }; -} - -export function createEvaluatorRegistry( - assertions: readonly NormalizedAssertionConfig[], -): EvaluatorAdapter[] { - return assertions.map(createEvaluatorAdapter); -} - -export function evaluateAssertion( - assertion: NormalizedAssertionConfig, - context: EvaluationContext, -): EvaluatorResult { - return createEvaluatorAdapter(assertion).evaluate(context); -} - -export function unsupportedEvaluatorReports( - assertions: readonly NormalizedAssertionConfig[], -): UnsupportedEvaluatorReport[] { - return assertions.filter(isUnsupportedAssertion).map((assertion) => ({ - name: assertion.name ?? String(assertion.type), - type: assertion.type, - reason: unsupportedReason(assertion.type), - metadata: assertion.metadata, - })); -} - -export function isSupportedEvaluatorType(type: EvaluatorType): boolean { - return isDeterministicEvaluatorType(type); -} - -export function isDeterministicEvaluatorType( - type: EvaluatorType, -): type is DeterministicEvaluatorType { - return (deterministicEvaluatorTypes as readonly string[]).includes(String(type)); -} - -export function isKnownUnsupportedEvaluatorType( - type: EvaluatorType, -): type is UnsupportedEvaluatorType { - return (unsupportedEvaluatorTypes as readonly string[]).includes(String(type)); -} - -function isUnsupportedAssertion(assertion: NormalizedAssertionConfig): boolean { - return !isSupportedEvaluatorType(assertion.type); -} - -function unsupportedResult(assertion: NormalizedAssertionConfig): EvaluatorResult { - return { - name: assertion.name ?? String(assertion.type), - type: assertion.type, - score: 0, - passed: false, - label: 'unsupported', - explanation: unsupportedReason(assertion.type), - unsupported: true, - metadata: assertion.metadata, - }; -} - -function unsupportedReason(type: EvaluatorType): string { - if (isKnownUnsupportedEvaluatorType(type)) return unsupportedReasons[type]; - - return `Unknown evaluator family: ${String(type)}`; -} diff --git a/packages/phoenix-adapter/src/evaluators/types.ts b/packages/phoenix-adapter/src/evaluators/types.ts deleted file mode 100644 index e0658c6c9..000000000 --- a/packages/phoenix-adapter/src/evaluators/types.ts +++ /dev/null @@ -1,59 +0,0 @@ -export type DeterministicEvaluatorType = 'contains' | 'regex' | 'equals' | 'is-json'; - -export type UnsupportedEvaluatorType = - | 'llm-grader' - | 'rubrics' - | 'code-grader' - | 'composite' - | 'field-accuracy' - | 'execution-metrics' - | 'tool-trajectory' - | 'cost' - | 'latency' - | 'trial-output-consistency'; - -export type EvaluatorType = DeterministicEvaluatorType | UnsupportedEvaluatorType | string; - -export interface NormalizedAssertionConfig { - type: EvaluatorType; - name?: string; - value?: unknown; - expected?: unknown; - pattern?: string; - flags?: string; - caseSensitive?: boolean; - metadata?: Record; - [key: string]: unknown; -} - -export interface EvaluationContext { - output: unknown; - expectedOutput?: unknown; - input?: unknown; - metadata?: Record; -} - -export interface EvaluatorResult { - name: string; - type: EvaluatorType; - score: number; - passed: boolean; - label: 'pass' | 'fail' | 'unsupported'; - explanation: string; - unsupported?: boolean; - metadata?: Record; -} - -export interface EvaluatorAdapter { - type: EvaluatorType; - name: string; - supported: boolean; - evaluate(context: EvaluationContext): EvaluatorResult; -} - -export interface UnsupportedEvaluatorReport { - name: string; - type: EvaluatorType; - reason: string; - metadata?: Record; -} diff --git a/packages/phoenix-adapter/src/index.ts b/packages/phoenix-adapter/src/index.ts deleted file mode 100644 index 34e0143c6..000000000 --- a/packages/phoenix-adapter/src/index.ts +++ /dev/null @@ -1,13 +0,0 @@ -export { discoverAgentVEvals } from './agentv/discovery.js'; -export { loadAgentVEvalSuite } from './agentv/load-spec.js'; -export { phoenixOtelBackend } from './otel-backend.js'; -export { createPhoenixDatasetPayload } from './phoenix/datasets.js'; -export { runSuite } from './run/run-suite.js'; - -export type { - AgentVSource, - NormalizedAssertion, - NormalizedCase, - NormalizedSuite, -} from './agentv/types.js'; -export type { PhoenixDatasetPayload } from './phoenix/types.js'; diff --git a/packages/phoenix-adapter/src/otel-backend.ts b/packages/phoenix-adapter/src/otel-backend.ts deleted file mode 100644 index 7ef96c7aa..000000000 --- a/packages/phoenix-adapter/src/otel-backend.ts +++ /dev/null @@ -1,93 +0,0 @@ -/** - * Phoenix OTel backend resolver. - * - * This file is the Phoenix-specific boundary for AgentV trace export routing. - * Core receives only generic OTLP endpoint, headers, and resource attributes. - */ - -import type { OtelBackendResolver } from '@agentv/core'; - -const DEFAULT_PHOENIX_COLLECTOR_ENDPOINT = 'http://localhost:6006'; -const OPENINFERENCE_PROJECT_NAME = 'openinference.project.name'; - -export const phoenixOtelBackend: OtelBackendResolver = { - name: 'phoenix', - resolve: ({ env }) => { - const warnings: string[] = []; - const headers = parsePhoenixClientHeaders(env.PHOENIX_CLIENT_HEADERS, warnings); - const apiKey = trimOptional(env.PHOENIX_API_KEY); - - if (apiKey && !hasHeader(headers, 'authorization')) { - headers.authorization = `Bearer ${apiKey}`; - } - - return { - endpoint: normalizePhoenixTraceEndpoint( - trimOptional(env.PHOENIX_COLLECTOR_ENDPOINT) ?? DEFAULT_PHOENIX_COLLECTOR_ENDPOINT, - ), - headers, - resourceAttributes: { - [OPENINFERENCE_PROJECT_NAME]: trimOptional(env.PHOENIX_PROJECT_NAME) ?? 'default', - }, - warnings, - }; - }, -}; - -function normalizePhoenixTraceEndpoint(endpoint: string): string { - const trimmed = endpoint.replace(/\/+$/, ''); - if (trimmed.endsWith('/v1/traces')) { - return trimmed; - } - return `${trimmed}/v1/traces`; -} - -function parsePhoenixClientHeaders( - value: string | undefined, - warnings: string[], -): Record { - const headers: Record = {}; - const raw = trimOptional(value); - - if (!raw) { - return headers; - } - - for (const segment of raw.split(',')) { - const entry = segment.trim(); - if (!entry) { - continue; - } - - const separatorIndex = entry.indexOf('='); - if (separatorIndex <= 0) { - warnings.push(`Ignoring invalid PHOENIX_CLIENT_HEADERS entry: ${entry}`); - continue; - } - - const rawName = entry.slice(0, separatorIndex).trim(); - const rawHeaderValue = entry.slice(separatorIndex + 1).trim(); - - try { - const name = decodeURIComponent(rawName).trim().toLowerCase(); - const headerValue = decodeURIComponent(rawHeaderValue).trim(); - if (name) { - headers[name] = headerValue; - } - } catch { - warnings.push(`Ignoring invalid PHOENIX_CLIENT_HEADERS entry: ${entry}`); - } - } - - return headers; -} - -function hasHeader(headers: Record, name: string): boolean { - const normalized = name.toLowerCase(); - return Object.keys(headers).some((header) => header.toLowerCase() === normalized); -} - -function trimOptional(value: string | undefined): string | undefined { - const trimmed = value?.trim(); - return trimmed ? trimmed : undefined; -} diff --git a/packages/phoenix-adapter/src/parity/baselines.ts b/packages/phoenix-adapter/src/parity/baselines.ts deleted file mode 100644 index 5a3acfdc3..000000000 --- a/packages/phoenix-adapter/src/parity/baselines.ts +++ /dev/null @@ -1,27 +0,0 @@ -import { existsSync, readFileSync } from 'node:fs'; -import path from 'node:path'; - -export interface BaselineSummary { - readonly path: string; - readonly testIds: readonly string[]; -} - -export function baselinePathFor(evalSourcePath: string): string { - return evalSourcePath.replace(/\.ya?ml$/i, '.baseline.jsonl'); -} - -export function readBaselineSummary(evalSourcePath: string): BaselineSummary | undefined { - if (!/\.ya?ml$/i.test(evalSourcePath)) return undefined; - const baselinePath = baselinePathFor(evalSourcePath); - if (!existsSync(baselinePath)) return undefined; - - const lines = readFileSync(baselinePath, 'utf8') - .split('\n') - .map((line) => line.trim()) - .filter(Boolean); - - return { - path: path.basename(baselinePath), - testIds: lines.map((line) => String(JSON.parse(line).test_id ?? JSON.parse(line).testId ?? '')), - }; -} diff --git a/packages/phoenix-adapter/src/parity/compare.ts b/packages/phoenix-adapter/src/parity/compare.ts deleted file mode 100644 index bc30805b2..000000000 --- a/packages/phoenix-adapter/src/parity/compare.ts +++ /dev/null @@ -1,74 +0,0 @@ -import type { NormalizedSuite } from '../agentv/types.js'; -import { unsupportedEvaluatorReports } from '../evaluators/registry.js'; -import type { NormalizedAssertionConfig } from '../evaluators/types.js'; -import type { PhoenixDatasetPayload } from '../phoenix/types.js'; -import { readBaselineSummary } from './baselines.js'; -import type { SuiteRunSummary } from './types.js'; - -export function compareDryRunSuite( - suite: NormalizedSuite, - dataset: PhoenixDatasetPayload, -): SuiteRunSummary { - const failures: string[] = []; - const baseline = readBaselineSummary(suite.source.path); - const caseIds = new Set(suite.cases.map((testCase) => testCase.id)); - const unsupportedFeatures = [ - ...suite.unsupportedFeatures, - ...unsupportedEvaluatorReports( - suite.cases.flatMap((testCase) => testCase.assertions.map(toAssertionConfig)), - ).map((report) => `${report.type}: ${report.name}`), - ]; - - if (dataset.examples.length !== suite.cases.length) { - failures.push( - `Dataset example count ${dataset.examples.length} does not match case count ${suite.cases.length}`, - ); - } - - if (baseline) { - const baselineIds = new Set(baseline.testIds); - for (const id of baselineIds) { - if (!caseIds.has(id)) - failures.push(`Baseline test id is missing from converted suite: ${id}`); - } - for (const id of caseIds) { - if (!baselineIds.has(id)) failures.push(`Converted test id is missing from baseline: ${id}`); - } - } - - if (suite.cases.length === 0) failures.push('Suite contains no normalized cases'); - - return { - source: suite.source.relativePath, - datasetName: dataset.name, - testCount: suite.cases.length, - baselineCount: baseline?.testIds.length, - warningCount: suite.warnings.length, - unsupportedFeatures: [...new Set(unsupportedFeatures)].sort(), - status: failures.length === 0 ? 'passed' : 'failed', - failures, - }; -} - -function toAssertionConfig(assertion: { - readonly type: string; - readonly name?: string; - readonly source: unknown; -}): NormalizedAssertionConfig { - if ( - assertion.source && - typeof assertion.source === 'object' && - !Array.isArray(assertion.source) - ) { - return { - ...(assertion.source as Record), - type: assertion.type, - name: assertion.name, - }; - } - return { - type: assertion.type, - name: assertion.name, - value: assertion.source, - }; -} diff --git a/packages/phoenix-adapter/src/parity/report.ts b/packages/phoenix-adapter/src/parity/report.ts deleted file mode 100644 index c03171f1a..000000000 --- a/packages/phoenix-adapter/src/parity/report.ts +++ /dev/null @@ -1,60 +0,0 @@ -import { mkdir, writeFile } from 'node:fs/promises'; -import path from 'node:path'; -import type { RunReport, SuiteRunSummary } from './types.js'; - -export function buildRunReport(input: { - readonly dryRun: boolean; - readonly agentvRoot: string; - readonly suites: readonly SuiteRunSummary[]; -}): RunReport { - const unsupported = new Set(); - for (const suite of input.suites) { - for (const feature of suite.unsupportedFeatures) unsupported.add(feature); - } - - return { - generatedAt: new Date().toISOString(), - dryRun: input.dryRun, - agentvRoot: input.agentvRoot, - suiteCount: input.suites.length, - testCount: input.suites.reduce((sum, suite) => sum + suite.testCount, 0), - passedSuites: input.suites.filter((suite) => suite.status === 'passed').length, - failedSuites: input.suites.filter((suite) => suite.status === 'failed').length, - unsupportedFeatures: [...unsupported].sort(), - suites: input.suites, - }; -} - -export async function writeJsonReport(report: RunReport, outPath: string): Promise { - await mkdir(path.dirname(outPath), { recursive: true }); - await writeFile(outPath, `${JSON.stringify(report, null, 2)}\n`, 'utf8'); -} - -export function formatMarkdownReport(report: RunReport): string { - const lines = [ - '# Phoenix AgentV Eval Report', - '', - `Generated: ${report.generatedAt}`, - `Dry run: ${String(report.dryRun)}`, - `Suites: ${report.suiteCount}`, - `Tests: ${report.testCount}`, - `Passed suites: ${report.passedSuites}`, - `Failed suites: ${report.failedSuites}`, - '', - '| Status | Source | Tests | Baseline | Unsupported |', - '| --- | --- | ---: | ---: | --- |', - ]; - - for (const suite of report.suites) { - lines.push( - `| ${suite.status} | \`${suite.source}\` | ${suite.testCount} | ${suite.baselineCount ?? ''} | ${suite.unsupportedFeatures.join(', ')} |`, - ); - if (suite.phoenixExperimentId) { - lines.push( - `| | Phoenix experiment \`${suite.phoenixExperimentId}\` | ${suite.phoenixRunCount ?? ''} | ${suite.phoenixEvaluationRunCount ?? ''} | |`, - ); - } - } - - return `${lines.join('\n')}\n`; -} diff --git a/packages/phoenix-adapter/src/parity/types.ts b/packages/phoenix-adapter/src/parity/types.ts deleted file mode 100644 index 27f4365f5..000000000 --- a/packages/phoenix-adapter/src/parity/types.ts +++ /dev/null @@ -1,25 +0,0 @@ -export interface SuiteRunSummary { - readonly source: string; - readonly datasetName: string; - readonly testCount: number; - readonly baselineCount?: number; - readonly warningCount: number; - readonly unsupportedFeatures: readonly string[]; - readonly phoenixExperimentId?: string; - readonly phoenixRunCount?: number; - readonly phoenixEvaluationRunCount?: number; - readonly status: 'passed' | 'failed'; - readonly failures: readonly string[]; -} - -export interface RunReport { - readonly generatedAt: string; - readonly dryRun: boolean; - readonly agentvRoot: string; - readonly suiteCount: number; - readonly testCount: number; - readonly passedSuites: number; - readonly failedSuites: number; - readonly unsupportedFeatures: readonly string[]; - readonly suites: readonly SuiteRunSummary[]; -} diff --git a/packages/phoenix-adapter/src/phoenix/datasets.ts b/packages/phoenix-adapter/src/phoenix/datasets.ts deleted file mode 100644 index d14da7914..000000000 --- a/packages/phoenix-adapter/src/phoenix/datasets.ts +++ /dev/null @@ -1,29 +0,0 @@ -import type { NormalizedSuite } from '../agentv/types.js'; -import { stableDatasetName } from './names.js'; -import type { PhoenixDatasetPayload } from './types.js'; - -export function createPhoenixDatasetPayload( - suite: NormalizedSuite, - options: { namespace?: string } = {}, -): PhoenixDatasetPayload { - return { - name: stableDatasetName(suite.source.relativePath, options.namespace), - description: suite.description, - assertions: suite.suiteAssertions, - examples: suite.cases.map((testCase) => ({ - input: { - messages: testCase.input, - criteria: testCase.criteria, - agentv_assertion_configs: testCase.assertions.map((assertion) => assertion.source), - }, - output: testCase.expectedOutput, - metadata: { - ...testCase.metadata, - agentv_source: testCase.sourcePath, - agentv_test_id: testCase.id, - agentv_assertions: testCase.assertions.map((assertion) => assertion.name ?? assertion.type), - agentv_assertion_configs: testCase.assertions.map((assertion) => assertion.source), - }, - })), - }; -} diff --git a/packages/phoenix-adapter/src/phoenix/names.ts b/packages/phoenix-adapter/src/phoenix/names.ts deleted file mode 100644 index 3fea54b21..000000000 --- a/packages/phoenix-adapter/src/phoenix/names.ts +++ /dev/null @@ -1,12 +0,0 @@ -import crypto from 'node:crypto'; - -export function stableDatasetName(sourcePath: string, namespace = 'agentv-examples'): string { - const slug = sourcePath - .replace(/\.[^.]+$/, '') - .toLowerCase() - .replace(/[^a-z0-9]+/g, '-') - .replace(/^-|-$/g, '') - .slice(0, 80); - const hash = crypto.createHash('sha1').update(sourcePath).digest('hex').slice(0, 8); - return `${namespace}-${slug}-${hash}`; -} diff --git a/packages/phoenix-adapter/src/phoenix/run-experiment.ts b/packages/phoenix-adapter/src/phoenix/run-experiment.ts deleted file mode 100644 index 7fd0ec922..000000000 --- a/packages/phoenix-adapter/src/phoenix/run-experiment.ts +++ /dev/null @@ -1,182 +0,0 @@ -import { createDataset } from '@arizeai/phoenix-client/datasets'; -import { asExperimentEvaluator, runExperiment } from '@arizeai/phoenix-client/experiments'; -import type { Example } from '@arizeai/phoenix-client/types/datasets'; -import { evaluateAssertion } from '../evaluators/registry.js'; -import type { NormalizedAssertionConfig } from '../evaluators/types.js'; -import type { PhoenixDatasetPayload } from './types.js'; - -export interface PhoenixExperimentSummary { - readonly experimentId: string; - readonly runCount: number; - readonly evaluationRunCount: number; -} - -type PhoenixExample = { - readonly input: { - readonly messages?: readonly { readonly role: string; readonly content: unknown }[]; - readonly criteria?: string; - readonly agentv_assertion_configs?: readonly unknown[]; - }; - readonly output?: Record | null; - readonly metadata?: { - readonly agentv_assertion_configs?: readonly unknown[]; - } | null; -}; - -export async function runPhoenixExperiment( - dataset: PhoenixDatasetPayload, -): Promise { - const created = await createDataset({ - name: dataset.name, - description: dataset.description ?? dataset.name, - examples: dataset.examples.map((example) => ({ - input: example.input, - output: normalizeExpected(example.output), - metadata: example.metadata, - })) satisfies Example[], - }); - - const experiment = await runExperiment({ - dataset: { datasetId: created.datasetId }, - experimentName: `${dataset.name}-${Date.now()}`, - experimentDescription: `Phoenix equivalent run for ${dataset.name}`, - experimentMetadata: { - source: 'agentv-evals-phoenix', - }, - concurrency: 2, - task: async (example) => { - const typedExample = example as PhoenixExample; - if ( - typedExample.output !== undefined && - typedExample.output !== null && - typedExample.output.answer !== undefined && - typedExample.output.answer !== null - ) { - return stringifyAnswer(typedExample.output.answer); - } - const synthesized = synthesizeOutputFromAssertions( - typedExample.input.agentv_assertion_configs ?? - typedExample.metadata?.agentv_assertion_configs, - ); - if (synthesized !== undefined) return synthesized; - const lastMessage = typedExample.input.messages?.at(-1); - return stringifyAnswer(lastMessage?.content ?? typedExample.input.criteria ?? ''); - }, - evaluators: [ - asExperimentEvaluator({ - name: 'agentv-adapter', - kind: 'CODE', - evaluate: async ({ output, expected, metadata }) => { - const safeMetadata = metadata ?? undefined; - const configs = normalizeAssertionConfigs(safeMetadata?.agentv_assertion_configs); - if (configs.length === 0) { - return { - label: 'pass', - score: 1, - explanation: 'No AgentV assertions declared for this example.', - metadata: {}, - }; - } - - const expectedOutput = unwrapPhoenixExpectedOutput(expected); - const results = configs.map((config) => - evaluateAssertion(config, { - output, - expectedOutput, - metadata: safeMetadata, - }), - ); - const supportedResults = results.filter((result) => !result.unsupported); - const scoredResults = supportedResults.length > 0 ? supportedResults : results; - const score = - scoredResults.reduce((sum, result) => sum + result.score, 0) / - Math.max(scoredResults.length, 1); - const unsupportedCount = results.filter((result) => result.unsupported).length; - - return { - label: unsupportedCount > 0 ? 'unsupported' : score >= 1 ? 'pass' : 'fail', - score, - explanation: results - .map((result) => `${result.name}: ${result.explanation}`) - .join(' | '), - metadata: { - unsupported_count: unsupportedCount, - assertion_count: results.length, - }, - }; - }, - }), - ], - }); - - return { - experimentId: experiment.id, - runCount: Object.keys(experiment.runs).length, - evaluationRunCount: experiment.evaluationRuns?.length ?? 0, - }; -} - -export function unwrapPhoenixExpectedOutput(expected: unknown): unknown { - if (expected && typeof expected === 'object' && 'answer' in expected) { - return (expected as { readonly answer?: unknown }).answer; - } - return expected; -} - -function normalizeExpected(output: unknown): Record { - if (Array.isArray(output) && output.length === 1) { - const first = output[0] as { readonly content?: unknown } | undefined; - if (first && typeof first === 'object' && 'content' in first) return { answer: first.content }; - } - return { answer: output ?? null }; -} - -function stringifyAnswer(value: unknown): string { - if (typeof value === 'string') return value; - if (Array.isArray(value) && value.length === 1) { - const first = value[0] as { readonly content?: unknown } | undefined; - if (first && typeof first === 'object' && 'content' in first) - return stringifyAnswer(first.content); - } - return JSON.stringify(value); -} - -function synthesizeOutputFromAssertions(value: unknown): string | undefined { - const configs = normalizeAssertionConfigs(value); - if (configs.length === 0) return undefined; - if (configs.some((config) => config.type === 'is-json')) { - return '{"status":"ok","code":200}'; - } - - const parts: string[] = []; - for (const config of configs) { - if (config.type === 'equals') return stringifyAnswer(config.value ?? config.expected ?? ''); - if (config.type === 'contains' && config.value !== undefined) parts.push(String(config.value)); - if (config.type === 'regex') - parts.push(sampleForRegex(String(config.pattern ?? config.value ?? ''))); - } - - return parts.length > 0 ? parts.join(' ') : undefined; -} - -function sampleForRegex(pattern: string): string { - if (pattern.includes('Good (morning|afternoon|evening)')) return 'Good morning'; - if (pattern.includes('[Hh]ello')) return 'Hello'; - return pattern.replace(/[[\]()+?^$\\]/g, '').replace(/\|/g, ' '); -} - -function normalizeAssertionConfigs(value: unknown): NormalizedAssertionConfig[] { - if (!Array.isArray(value)) return []; - return value.map((entry, index) => { - if (typeof entry === 'string') return { type: 'rubrics', value: entry }; - if (entry && typeof entry === 'object') { - const record = entry as Record; - return { - ...record, - type: String(record.type ?? record.name ?? `assertion-${index + 1}`), - name: typeof record.name === 'string' ? record.name : undefined, - }; - } - return { type: `assertion-${index + 1}`, value: entry }; - }); -} diff --git a/packages/phoenix-adapter/src/phoenix/types.ts b/packages/phoenix-adapter/src/phoenix/types.ts deleted file mode 100644 index cdfb468a8..000000000 --- a/packages/phoenix-adapter/src/phoenix/types.ts +++ /dev/null @@ -1,23 +0,0 @@ -import type { AgentVMessage, JsonObject, NormalizedAssertion } from '../agentv/types.js'; - -export interface PhoenixDatasetExamplePayload { - readonly input: { - readonly messages: readonly AgentVMessage[]; - readonly criteria?: string; - readonly agentv_assertion_configs: readonly unknown[]; - }; - readonly output?: unknown; - readonly metadata: JsonObject & { - readonly agentv_source: string; - readonly agentv_test_id: string; - readonly agentv_assertions: readonly string[]; - readonly agentv_assertion_configs: readonly unknown[]; - }; -} - -export interface PhoenixDatasetPayload { - readonly name: string; - readonly description?: string; - readonly examples: readonly PhoenixDatasetExamplePayload[]; - readonly assertions: readonly NormalizedAssertion[]; -} diff --git a/packages/phoenix-adapter/src/run/options.ts b/packages/phoenix-adapter/src/run/options.ts deleted file mode 100644 index 286a24108..000000000 --- a/packages/phoenix-adapter/src/run/options.ts +++ /dev/null @@ -1,9 +0,0 @@ -export interface RunOptions { - readonly agentvRoot: string; - readonly evalFile?: string; - readonly filter?: string; - readonly dryRun: boolean; - readonly out: string; - readonly namespace?: string; - readonly failOnUnsupported: boolean; -} diff --git a/packages/phoenix-adapter/src/run/run-suite.ts b/packages/phoenix-adapter/src/run/run-suite.ts deleted file mode 100644 index 24f12df1c..000000000 --- a/packages/phoenix-adapter/src/run/run-suite.ts +++ /dev/null @@ -1,73 +0,0 @@ -import path from 'node:path'; -import { discoverAgentVEvals } from '../agentv/discovery.js'; -import { loadAgentVEvalSuite } from '../agentv/load-spec.js'; -import { relativePosix } from '../agentv/path.js'; -import { compareDryRunSuite } from '../parity/compare.js'; -import { buildRunReport, writeJsonReport } from '../parity/report.js'; -import type { RunReport } from '../parity/types.js'; -import { createPhoenixDatasetPayload } from '../phoenix/datasets.js'; -import { runPhoenixExperiment } from '../phoenix/run-experiment.js'; -import type { RunOptions } from './options.js'; - -function sourceMatches(relativePath: string, options: RunOptions): boolean { - if (options.evalFile) { - const requested = relativePosix(options.agentvRoot, path.resolve(options.evalFile)); - return relativePath === requested || relativePath === options.evalFile; - } - if (options.filter) return relativePath.includes(options.filter); - return true; -} - -export async function runSuite(options: RunOptions): Promise { - const sources = (await discoverAgentVEvals(options.agentvRoot)).filter((source) => - sourceMatches(source.relativePath, options), - ); - if (sources.length === 0) { - throw new Error('No AgentV eval sources matched the requested options.'); - } - - const summaries = []; - for (const source of sources) { - const suite = await loadAgentVEvalSuite(source); - const dataset = createPhoenixDatasetPayload(suite, { namespace: options.namespace }); - let summary = compareDryRunSuite(suite, dataset); - if (options.failOnUnsupported && summary.unsupportedFeatures.length > 0) { - summary = { - ...summary, - status: 'failed' as const, - failures: [ - ...summary.failures, - `Unsupported features present: ${summary.unsupportedFeatures.join(', ')}`, - ], - }; - } - if (!options.dryRun) { - const experiment = await runPhoenixExperiment(dataset); - summary = { - ...summary, - phoenixExperimentId: experiment.experimentId, - phoenixRunCount: experiment.runCount, - phoenixEvaluationRunCount: experiment.evaluationRunCount, - }; - if (experiment.runCount !== suite.cases.length) { - summary = { - ...summary, - status: 'failed', - failures: [ - ...summary.failures, - `Phoenix run count ${experiment.runCount} does not match case count ${suite.cases.length}`, - ], - }; - } - } - summaries.push(summary); - } - - const report = buildRunReport({ - dryRun: options.dryRun, - agentvRoot: options.agentvRoot, - suites: summaries, - }); - await writeJsonReport(report, options.out); - return report; -} diff --git a/packages/phoenix-adapter/test/agentv-normalize.test.ts b/packages/phoenix-adapter/test/agentv-normalize.test.ts deleted file mode 100644 index 89c7035f0..000000000 --- a/packages/phoenix-adapter/test/agentv-normalize.test.ts +++ /dev/null @@ -1,155 +0,0 @@ -import { describe, expect, test } from 'bun:test'; -import { mkdirSync, writeFileSync } from 'node:fs'; -import { tmpdir } from 'node:os'; -import path from 'node:path'; -import { discoverAgentVEvals } from '../src/agentv/discovery.js'; -import { loadAgentVEvalSuite } from '../src/agentv/load-spec.js'; - -function fixtureRoot(name: string): string { - return path.join(tmpdir(), `agentv-phoenix-${name}-${crypto.randomUUID()}`); -} - -describe('AgentV eval normalization', () => { - test('discovers yaml and agent skills eval sources', async () => { - const root = fixtureRoot('discovery'); - mkdirSync(path.join(root, 'examples', 'features', 'basic', 'evals'), { recursive: true }); - mkdirSync(path.join(root, 'examples', 'features', 'skills'), { recursive: true }); - writeFileSync( - path.join(root, 'examples', 'features', 'basic', 'evals', 'dataset.eval.yaml'), - 'tests: []\n', - ); - writeFileSync( - path.join(root, 'examples', 'features', 'skills', 'evals.json'), - '{"evals": []}\n', - ); - - const sources = await discoverAgentVEvals(root); - - expect(sources.map((source) => source.relativePath)).toEqual([ - 'examples/features/basic/evals/dataset.eval.yaml', - 'examples/features/skills/evals.json', - ]); - }); - - test('expands suite input, external yaml, jsonl, and suite assertions', async () => { - const root = fixtureRoot('normalize'); - const evalDir = path.join(root, 'examples', 'features', 'external', 'evals'); - mkdirSync(path.join(evalDir, 'cases'), { recursive: true }); - writeFileSync( - path.join(evalDir, 'dataset.eval.yaml'), - `name: external -input: - - role: system - content: shared -assertions: - - type: contains - value: ok -tests: - - id: inline - criteria: inline criteria - input: hello - - file://cases/more.jsonl -`, - ); - writeFileSync( - path.join(evalDir, 'cases', 'more.jsonl'), - '{"id":"from-jsonl","criteria":"jsonl criteria","input":"hi","expected_output":"ok"}\n', - ); - - const suite = await loadAgentVEvalSuite({ - path: path.join(evalDir, 'dataset.eval.yaml'), - relativePath: 'examples/features/external/evals/dataset.eval.yaml', - kind: 'eval-yaml', - }); - - expect(suite.cases).toHaveLength(2); - expect(suite.cases[0]?.input.map((message) => message.role)).toEqual(['system', 'user']); - expect(suite.cases[1]?.expectedOutput).toBe('ok'); - expect(suite.cases[1]?.assertions[0]?.type).toBe('contains'); - }); - - test('preserves present object and empty-string expected output values', async () => { - const root = fixtureRoot('expected-values'); - const evalPath = path.join( - root, - 'examples', - 'features', - 'expected-values', - 'evals', - 'dataset.eval.yaml', - ); - mkdirSync(path.dirname(evalPath), { recursive: true }); - writeFileSync( - evalPath, - `name: expected-values -tests: - - id: object-output - input: hi - expected_output: - ok: true - - id: empty-string-output - input: hi - expected_output: "" - - id: assertion-only - input: hi - assertions: - - type: contains - value: ok -`, - ); - - const suite = await loadAgentVEvalSuite({ - path: evalPath, - relativePath: 'examples/features/expected-values/evals/dataset.eval.yaml', - kind: 'eval-yaml', - }); - - expect(suite.cases.map((testCase) => testCase.expectedOutput)).toEqual([ - '{\n "ok": true\n}', - '', - undefined, - ]); - }); - - test('leaves assertion-only expected output absent for Phoenix synthesis', async () => { - const sourcePath = path.resolve('../../examples/features/assert/evals/dataset.eval.yaml'); - const suite = await loadAgentVEvalSuite({ - path: sourcePath, - relativePath: 'examples/features/assert/evals/dataset.eval.yaml', - kind: 'eval-yaml', - }); - - expect(suite.cases).toHaveLength(4); - expect(suite.cases.map((testCase) => testCase.expectedOutput)).toEqual([ - undefined, - undefined, - undefined, - undefined, - ]); - }); - - test('normalizes Agent Skills evals.json', async () => { - const root = fixtureRoot('skills'); - const evalPath = path.join(root, 'examples', 'features', 'agent-skills-evals', 'evals.json'); - mkdirSync(path.dirname(evalPath), { recursive: true }); - writeFileSync( - evalPath, - JSON.stringify({ - skill_name: 'csv-analyzer', - evals: [ - { id: 1, prompt: 'Read CSV', expected_output: 'Done', assertions: ['Reads the file'] }, - ], - }), - ); - - const suite = await loadAgentVEvalSuite({ - path: evalPath, - relativePath: 'examples/features/agent-skills-evals/evals.json', - kind: 'agent-skills-json', - }); - - expect(suite.name).toBe('csv-analyzer'); - expect(suite.cases[0]?.id).toBe('1'); - expect(suite.cases[0]?.assertions[0]?.type).toBe('llm-grader'); - }); -}); diff --git a/packages/phoenix-adapter/test/evaluators/deterministic.test.ts b/packages/phoenix-adapter/test/evaluators/deterministic.test.ts deleted file mode 100644 index 1e233b2b9..000000000 --- a/packages/phoenix-adapter/test/evaluators/deterministic.test.ts +++ /dev/null @@ -1,112 +0,0 @@ -import { describe, expect, test } from 'bun:test'; -import { evaluateAssertion } from '../../src/evaluators/registry.js'; - -describe('deterministic evaluator adapters', () => { - test('contains returns pass and score 1 when output includes the expected text', () => { - const result = evaluateAssertion( - { type: 'contains', name: 'has greeting', value: 'hello' }, - { output: 'well hello there' }, - ); - - expect(result).toMatchObject({ - name: 'has greeting', - type: 'contains', - passed: true, - score: 1, - label: 'pass', - }); - }); - - test('contains returns fail and score 0 when output does not include the expected text', () => { - const result = evaluateAssertion( - { type: 'contains', value: 'goodbye' }, - { output: 'hello there' }, - ); - - expect(result.passed).toBe(false); - expect(result.score).toBe(0); - expect(result.label).toBe('fail'); - }); - - test('contains can compare case-insensitively', () => { - const result = evaluateAssertion( - { type: 'contains', value: 'HELLO', caseSensitive: false }, - { output: 'hello there' }, - ); - - expect(result.passed).toBe(true); - expect(result.score).toBe(1); - }); - - test('regex returns pass for matching output', () => { - const result = evaluateAssertion( - { type: 'regex', pattern: 'order-[0-9]+$' }, - { output: 'created order-123' }, - ); - - expect(result.passed).toBe(true); - expect(result.score).toBe(1); - }); - - test('regex returns fail for invalid patterns', () => { - const result = evaluateAssertion({ type: 'regex', pattern: '[' }, { output: 'anything' }); - - expect(result.passed).toBe(false); - expect(result.score).toBe(0); - expect(result.explanation).toContain('Invalid regex pattern'); - }); - - test('equals performs stable deep equality for object outputs', () => { - const result = evaluateAssertion( - { type: 'equals', expected: { b: 2, a: ['x', { c: true }] } }, - { output: { a: ['x', { c: true }], b: 2 } }, - ); - - expect(result.passed).toBe(true); - expect(result.score).toBe(1); - }); - - test('equals preserves explicit null expected values', () => { - const pass = evaluateAssertion( - { type: 'equals', expected: null }, - { output: null, expectedOutput: 'fallback' }, - ); - const fail = evaluateAssertion( - { type: 'equals', expected: null }, - { output: 'fallback', expectedOutput: 'fallback' }, - ); - - expect(pass.passed).toBe(true); - expect(pass.score).toBe(1); - expect(fail.passed).toBe(false); - expect(fail.score).toBe(0); - }); - - test('equals can fall back to context expected output', () => { - const result = evaluateAssertion( - { type: 'equals' }, - { output: 'done', expectedOutput: 'done' }, - ); - - expect(result.passed).toBe(true); - expect(result.score).toBe(1); - }); - - test('is-json passes JSON strings and object outputs', () => { - const jsonString = evaluateAssertion({ type: 'is-json' }, { output: '{"ok":true}' }); - const objectOutput = evaluateAssertion({ type: 'is-json' }, { output: { ok: true } }); - - expect(jsonString.passed).toBe(true); - expect(jsonString.score).toBe(1); - expect(objectOutput.passed).toBe(true); - expect(objectOutput.score).toBe(1); - }); - - test('is-json fails non-JSON text', () => { - const result = evaluateAssertion({ type: 'is-json' }, { output: 'not json' }); - - expect(result.passed).toBe(false); - expect(result.score).toBe(0); - expect(result.explanation).toContain('Output is not valid JSON'); - }); -}); diff --git a/packages/phoenix-adapter/test/evaluators/registry.test.ts b/packages/phoenix-adapter/test/evaluators/registry.test.ts deleted file mode 100644 index 5417b6d7b..000000000 --- a/packages/phoenix-adapter/test/evaluators/registry.test.ts +++ /dev/null @@ -1,69 +0,0 @@ -import { describe, expect, test } from 'bun:test'; -import { - createEvaluatorRegistry, - isSupportedEvaluatorType, - unsupportedEvaluatorReports, - unsupportedEvaluatorTypes, -} from '../../src/evaluators/registry.js'; -import type { NormalizedAssertionConfig } from '../../src/evaluators/types.js'; - -describe('evaluator registry', () => { - test('marks deterministic evaluator families as supported', () => { - expect(isSupportedEvaluatorType('contains')).toBe(true); - expect(isSupportedEvaluatorType('regex')).toBe(true); - expect(isSupportedEvaluatorType('equals')).toBe(true); - expect(isSupportedEvaluatorType('is-json')).toBe(true); - }); - - test('builds adapters for supported and unsupported evaluators', () => { - const registry = createEvaluatorRegistry([ - { type: 'contains', value: 'ok' }, - { type: 'llm-grader', name: 'judge answer' }, - ]); - - expect(registry).toHaveLength(2); - expect(registry[0]?.supported).toBe(true); - expect(registry[1]?.supported).toBe(false); - - const unsupportedResult = registry[1]?.evaluate({ output: 'ok' }); - - expect(unsupportedResult).toMatchObject({ - name: 'judge answer', - type: 'llm-grader', - passed: false, - score: 0, - label: 'unsupported', - unsupported: true, - }); - }); - - test('reports every first-pass unsupported evaluator family with a reason', () => { - const assertions: NormalizedAssertionConfig[] = unsupportedEvaluatorTypes.map((type) => ({ - type, - name: `${type} assertion`, - metadata: { testId: type }, - })); - - const reports = unsupportedEvaluatorReports(assertions); - - expect(reports).toHaveLength(unsupportedEvaluatorTypes.length); - - for (const type of unsupportedEvaluatorTypes) { - const report = reports.find((entry) => entry.type === type); - - expect(report?.name).toBe(`${type} assertion`); - expect(report?.reason.length).toBeGreaterThan(0); - expect(report?.metadata).toEqual({ testId: type }); - } - }); - - test('reports unknown evaluator families instead of silently treating them as supported', () => { - const [report] = unsupportedEvaluatorReports([{ type: 'custom-family', name: 'custom' }]); - - expect(report).toMatchObject({ - name: 'custom', - type: 'custom-family', - reason: 'Unknown evaluator family: custom-family', - }); - }); -}); diff --git a/packages/phoenix-adapter/test/otel-backend.test.ts b/packages/phoenix-adapter/test/otel-backend.test.ts deleted file mode 100644 index d6555e487..000000000 --- a/packages/phoenix-adapter/test/otel-backend.test.ts +++ /dev/null @@ -1,70 +0,0 @@ -import { describe, expect, test } from 'bun:test'; - -import { phoenixOtelBackend } from '../src/otel-backend.js'; - -describe('phoenixOtelBackend', () => { - test('resolves default local Phoenix endpoint and project resource attribute', () => { - const resolved = phoenixOtelBackend.resolve({ cwd: process.cwd(), env: {} }); - - expect(resolved).toEqual({ - endpoint: 'http://localhost:6006/v1/traces', - headers: {}, - resourceAttributes: { - 'openinference.project.name': 'default', - }, - warnings: [], - }); - }); - - test('normalizes Phoenix endpoint, API key, client headers, and project name', () => { - const resolved = phoenixOtelBackend.resolve({ - cwd: process.cwd(), - env: { - PHOENIX_COLLECTOR_ENDPOINT: 'https://app.phoenix.arize.com/s/my-space/', - PHOENIX_API_KEY: 'px-key', - PHOENIX_PROJECT_NAME: 'agentv-evals', - PHOENIX_CLIENT_HEADERS: 'x-custom=one%20two', - }, - }); - - expect(resolved).toEqual({ - endpoint: 'https://app.phoenix.arize.com/s/my-space/v1/traces', - headers: { - 'x-custom': 'one two', - authorization: 'Bearer px-key', - }, - resourceAttributes: { - 'openinference.project.name': 'agentv-evals', - }, - warnings: [], - }); - }); - - test('does not append duplicate traces path or override explicit auth header', () => { - const resolved = phoenixOtelBackend.resolve({ - cwd: process.cwd(), - env: { - PHOENIX_COLLECTOR_ENDPOINT: 'http://phoenix.example.com/v1/traces', - PHOENIX_API_KEY: 'px-key', - PHOENIX_CLIENT_HEADERS: 'authorization=Bearer%20override', - }, - }); - - expect(resolved.endpoint).toBe('http://phoenix.example.com/v1/traces'); - expect(resolved.headers).toEqual({ authorization: 'Bearer override' }); - }); - - test('reports invalid client header entries as warnings', () => { - const resolved = phoenixOtelBackend.resolve({ - cwd: process.cwd(), - env: { - PHOENIX_CLIENT_HEADERS: 'valid=value,not-a-header', - }, - }); - - expect(resolved.headers).toEqual({ valid: 'value' }); - expect(resolved.warnings).toEqual([ - 'Ignoring invalid PHOENIX_CLIENT_HEADERS entry: not-a-header', - ]); - }); -}); diff --git a/packages/phoenix-adapter/test/parity.test.ts b/packages/phoenix-adapter/test/parity.test.ts deleted file mode 100644 index 5e0fdfe36..000000000 --- a/packages/phoenix-adapter/test/parity.test.ts +++ /dev/null @@ -1,41 +0,0 @@ -import { expect, test } from 'bun:test'; -import { mkdirSync, writeFileSync } from 'node:fs'; -import { tmpdir } from 'node:os'; -import path from 'node:path'; -import type { NormalizedSuite } from '../src/agentv/types.js'; -import { compareDryRunSuite } from '../src/parity/compare.js'; -import { createPhoenixDatasetPayload } from '../src/phoenix/datasets.js'; - -test('dry-run parity compares baseline ids with normalized cases', () => { - const dir = path.join(tmpdir(), `agentv-phoenix-parity-${crypto.randomUUID()}`); - mkdirSync(dir, { recursive: true }); - const evalPath = path.join(dir, 'dataset.eval.yaml'); - writeFileSync(evalPath, 'tests: []\n'); - writeFileSync(path.join(dir, 'dataset.eval.baseline.jsonl'), '{"test_id":"known"}\n'); - - const suite: NormalizedSuite = { - name: 'suite', - source: { - path: evalPath, - relativePath: 'examples/x/evals/dataset.eval.yaml', - kind: 'eval-yaml', - }, - cases: [ - { - id: 'known', - input: [{ role: 'user', content: 'hi' }], - assertions: [], - metadata: {}, - sourcePath: 'examples/x/evals/dataset.eval.yaml', - }, - ], - suiteAssertions: [], - warnings: [], - unsupportedFeatures: [], - }; - - const summary = compareDryRunSuite(suite, createPhoenixDatasetPayload(suite)); - - expect(summary.status).toBe('passed'); - expect(summary.baselineCount).toBe(1); -}); diff --git a/packages/phoenix-adapter/test/phoenix-datasets.test.ts b/packages/phoenix-adapter/test/phoenix-datasets.test.ts deleted file mode 100644 index 7d15f118d..000000000 --- a/packages/phoenix-adapter/test/phoenix-datasets.test.ts +++ /dev/null @@ -1,42 +0,0 @@ -import { expect, test } from 'bun:test'; -import type { NormalizedSuite } from '../src/agentv/types.js'; -import { createPhoenixDatasetPayload } from '../src/phoenix/datasets.js'; -import { unwrapPhoenixExpectedOutput } from '../src/phoenix/run-experiment.js'; - -test('creates deterministic Phoenix dataset payloads from normalized suites', () => { - const suite: NormalizedSuite = { - name: 'assert-demo', - source: { - path: '/tmp/dataset.eval.yaml', - relativePath: 'examples/features/assert/evals/dataset.eval.yaml', - kind: 'eval-yaml', - }, - cases: [ - { - id: 'contains-check', - criteria: 'Must contain Hello', - input: [{ role: 'user', content: 'Say hello' }], - expectedOutput: 'Hello', - assertions: [{ type: 'contains', source: { type: 'contains', value: 'Hello' } }], - metadata: { tag: 'demo' }, - sourcePath: 'examples/features/assert/evals/dataset.eval.yaml', - }, - ], - suiteAssertions: [], - warnings: [], - unsupportedFeatures: [], - }; - - const dataset = createPhoenixDatasetPayload(suite); - - expect(dataset.name).toStartWith('agentv-examples-examples-features-assert-evals-dataset-eval'); - expect(dataset.examples[0]?.input.messages[0]?.content).toBe('Say hello'); - expect(dataset.examples[0]?.metadata.agentv_test_id).toBe('contains-check'); - expect(dataset.examples[0]?.metadata.agentv_assertions).toEqual(['contains']); -}); - -test('unwraps Phoenix expected answer payloads for AgentV deterministic graders', () => { - expect(unwrapPhoenixExpectedOutput({ answer: 'done' })).toBe('done'); - expect(unwrapPhoenixExpectedOutput({ answer: { ok: true } })).toEqual({ ok: true }); - expect(unwrapPhoenixExpectedOutput({ other: 'shape' })).toEqual({ other: 'shape' }); -}); diff --git a/packages/phoenix-adapter/tsconfig.json b/packages/phoenix-adapter/tsconfig.json deleted file mode 100644 index 984b50599..000000000 --- a/packages/phoenix-adapter/tsconfig.json +++ /dev/null @@ -1,10 +0,0 @@ -{ - "extends": "../../tsconfig.base.json", - "compilerOptions": { - "module": "NodeNext", - "moduleResolution": "NodeNext", - "noEmit": true, - "types": ["bun"] - }, - "include": ["src/**/*.ts", "test/**/*.ts"] -} diff --git a/packages/phoenix-adapter/tsup.config.ts b/packages/phoenix-adapter/tsup.config.ts deleted file mode 100644 index edc4764f3..000000000 --- a/packages/phoenix-adapter/tsup.config.ts +++ /dev/null @@ -1,12 +0,0 @@ -import { defineConfig } from 'tsup'; - -export default defineConfig({ - entry: ['src/index.ts'], - format: ['esm'], - sourcemap: true, - clean: true, - dts: true, - target: 'node20', - tsconfig: './tsconfig.json', - external: ['@agentv/core'], -});