diff --git a/.agents/publish.md b/.agents/publish.md index 59be02a8f..90743ecc3 100644 --- a/.agents/publish.md +++ b/.agents/publish.md @@ -32,6 +32,7 @@ Contract gates: ## Published Packages - `packages/core/` publishes as `@agentv/core` +- `packages/sdk/` publishes as `@agentv/sdk` - `apps/cli/` publishes as `agentv` - The CLI bundles workspace dependencies via tsup with `noExternal: ["@agentv/core"]` - Install with `bun install -g agentv` or `npm install -g agentv` diff --git a/Dockerfile b/Dockerfile index 65f3b8082..d7d1eab96 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,6 @@ WORKDIR /app COPY package.json bun.lock ./ COPY packages/core/package.json packages/core/ COPY packages/sdk/package.json packages/sdk/ -COPY packages/eval/package.json packages/eval/ COPY apps/cli/package.json apps/cli/ COPY apps/dashboard/package.json apps/dashboard/ COPY apps/web/package.json apps/web/ @@ -60,8 +59,6 @@ COPY --from=build /app/packages/core/dist ./packages/core/dist COPY --from=build /app/packages/core/package.json ./packages/core/ COPY --from=build /app/packages/sdk/dist ./packages/sdk/dist COPY --from=build /app/packages/sdk/package.json ./packages/sdk/ -COPY --from=build /app/packages/eval/dist ./packages/eval/dist -COPY --from=build /app/packages/eval/package.json ./packages/eval/ COPY --from=build /app/apps/cli/dist ./apps/cli/dist COPY --from=build /app/apps/cli/package.json ./apps/cli/ COPY --from=build /app/apps/cli/node_modules ./apps/cli/node_modules diff --git a/apps/web/src/content/docs/docs/evaluation/sdk.mdx b/apps/web/src/content/docs/docs/evaluation/sdk.mdx index 04febba13..7f9db1758 100644 --- a/apps/web/src/content/docs/docs/evaluation/sdk.mdx +++ b/apps/web/src/content/docs/docs/evaluation/sdk.mdx @@ -35,7 +35,7 @@ npm install @agentv/sdk import { defineCodeGrader } from '@agentv/sdk'; ``` -The general policy is hard convergence for same-week or unreleased surface names: use the correct package, field, or wire name instead of carrying aliases. The package rename is the exception because `@agentv/eval` was already published. It remains a temporary deprecated compatibility package that re-exports `@agentv/sdk` for existing consumers, but it should not appear in new docs, examples, scaffolds, or skills except as migration guidance. +The general policy is hard convergence for same-week or unreleased surface names: use the correct package, field, or wire name instead of carrying aliases. `@agentv/eval` was already published, then deprecated on npm, and has been removed from this repository. New docs, examples, scaffolds, and skills should use `@agentv/sdk` directly. ## Choose a Surface diff --git a/bun.lock b/bun.lock index 4d8cdd29d..366f850c9 100644 --- a/bun.lock +++ b/bun.lock @@ -118,13 +118,6 @@ "@earendil-works/pi-coding-agent", ], }, - "packages/eval": { - "name": "@agentv/eval", - "version": "4.42.4", - "dependencies": { - "@agentv/sdk": "workspace:*", - }, - }, "packages/phoenix-adapter": { "name": "@agentv/phoenix-adapter", "version": "4.31.4-next.1", @@ -155,8 +148,6 @@ "@agentv/dashboard": ["@agentv/dashboard@workspace:apps/dashboard"], - "@agentv/eval": ["@agentv/eval@workspace:packages/eval"], - "@agentv/phoenix-adapter": ["@agentv/phoenix-adapter@workspace:packages/phoenix-adapter"], "@agentv/sdk": ["@agentv/sdk@workspace:packages/sdk"], diff --git a/docs/adr/2026-06-18-sdk-surface-decision.md b/docs/adr/2026-06-18-sdk-surface-decision.md index de670c5d8..9ff4ed564 100644 --- a/docs/adr/2026-06-18-sdk-surface-decision.md +++ b/docs/adr/2026-06-18-sdk-surface-decision.md @@ -4,6 +4,10 @@ Date: 2026-06-18 Status: Accepted +Update 2026-06-22: `@agentv/eval` has been deprecated on npm and removed from +this repository. `@agentv/sdk` is the only lightweight TypeScript SDK package +published by the release workflow. + Supersedes: the earlier 2026-06-18 decision in this file that rejected a separate `@agentv/sdk` package. @@ -61,13 +65,14 @@ For this package rename, npm evidence changes the compatibility choice: 2026-05-19 through 2026-06-17. - `@agentv/sdk` is not yet published at the time of this decision. -Therefore `@agentv/eval` remains only as a thin deprecated compatibility -package that re-exports `@agentv/sdk` for existing consumers. It should not be -used by new docs, examples, scaffolds, or skills except when explaining the -migration. +Therefore `@agentv/eval` was kept temporarily as a thin deprecated +compatibility package that re-exported `@agentv/sdk` for existing consumers. It +must not be used by new docs, examples, scaffolds, or skills except when +explaining the migration. -Future removal of `@agentv/eval` requires an explicit release/migration -decision. The compatibility package must not grow new API surface. +After npm deprecation, the explicit removal decision was made on 2026-06-22. +The compatibility package is no longer part of the workspace, release script, +publish script, or runtime Docker image. ## Non-Goals @@ -95,15 +100,12 @@ Positive: Negative: -- one temporary compatibility package remains until a later removal decision -- release scripts and examples need to carry both package paths during the - migration window +- users of the deprecated package must migrate imports to `@agentv/sdk` ## Tracker Impact - `av-bv4.11`: this ADR supersedes the previous no-new-sdk decision and records the new package-boundary decision. -- `av-bv4.12`: implementation should move the SDK surface to `packages/sdk` / - `@agentv/sdk`, keep `@agentv/eval` only as a deprecated shim because it was - already published, and update repo references so the old name is not taught as - primary. +- `av-bv4.12`: implementation moved the SDK surface to `packages/sdk` / + `@agentv/sdk`. The deprecated shim has since been removed after npm + deprecation. diff --git a/docs/plans/2026-06-06-001-agentv-eval-authoring-extensibility-plan.md b/docs/plans/2026-06-06-001-agentv-eval-authoring-extensibility-plan.md index f39014864..6baaf7f59 100644 --- a/docs/plans/2026-06-06-001-agentv-eval-authoring-extensibility-plan.md +++ b/docs/plans/2026-06-06-001-agentv-eval-authoring-extensibility-plan.md @@ -23,7 +23,7 @@ Prior research on `av-r0s` found that AgentV already has the right low-level com - Custom assertions are discovered from `.agentv/assertions/` in `packages/core/src/evaluation/registry/assertion-discovery.ts`. - Custom graders are discovered from `.agentv/graders/` in `packages/core/src/evaluation/registry/grader-discovery.ts`. - `agentv create` already scaffolds evals, assertions, and providers in `apps/cli/src/commands/create/commands.ts`. -- The lightweight SDK contract lives in `packages/eval/src/assertion.ts`. +- The lightweight SDK contract lives in `packages/sdk/src/assertion.ts`. The main ceremony problem is not that the schema cannot represent tasks. It is that users must repeatedly hand-write the same layout, provenance metadata, adapter scripts, and integration glue. diff --git a/docs/plans/trace-evaluation-architecture.md b/docs/plans/trace-evaluation-architecture.md index 084b42699..4d557da1b 100644 --- a/docs/plans/trace-evaluation-architecture.md +++ b/docs/plans/trace-evaluation-architecture.md @@ -188,7 +188,7 @@ The exact schema belongs in implementation, but these concepts should be stable: ### U1. Trace Artifact Model - **Goal:** Introduce the core TypeScript model, Zod validation, and snake_case boundary conversion for trace artifacts. -- **Files:** `packages/core/src/evaluation/trace.ts`, `packages/core/src/evaluation/types.ts`, `packages/eval/src/schemas.ts`, new focused files under `packages/core/src/evaluation/trace/` if the existing file becomes too large. +- **Files:** `packages/core/src/evaluation/trace.ts`, `packages/core/src/evaluation/types.ts`, `packages/sdk/src/schemas.ts`, new focused files under `packages/core/src/evaluation/trace/` if the existing file becomes too large. - **Patterns:** Follow the existing `TraceSummary`, `TokenUsage`, and project boundary conversion conventions. Keep internal fields camelCase and persisted fields snake_case. - **Test Scenarios:** Add tests that validate round-trip conversion, missing optional content, inferred duration flags, branch metadata, and raw evidence handles. - **Verification:** Unit tests should prove summaries can be derived from trace artifacts without changing current summary behavior, and that trace artifacts do not embed a separate summary payload. @@ -268,7 +268,7 @@ The exact schema belongs in implementation, but these concepts should be stable: ### U7. Grader Context Upgrade - **Goal:** Let built-in and code graders receive trace artifacts in addition to compact summaries and output messages. -- **Files:** `packages/core/src/evaluation/graders/types.ts`, `packages/core/src/evaluation/graders/tool-trajectory.ts`, `packages/core/src/evaluation/graders/execution-metrics.ts`, `packages/core/src/evaluation/graders/code-grader.ts`, `packages/eval/src/index.ts`, `packages/eval/src/schemas.ts`. +- **Files:** `packages/core/src/evaluation/graders/types.ts`, `packages/core/src/evaluation/graders/tool-trajectory.ts`, `packages/core/src/evaluation/graders/execution-metrics.ts`, `packages/core/src/evaluation/graders/code-grader.ts`, `packages/sdk/src/index.ts`, `packages/sdk/src/schemas.ts`. - **Patterns:** Keep existing graders that only read `trace` or `output` working. Trace-aware graders use the richer object. - **Test Scenarios:** Existing `tool-trajectory` modes should pass from live output and from trace artifact input. Argument matching, ordering, latency, status/error matching, and evidence text should be covered. - **Verification:** `trace score` should run `tool-trajectory` against imported traces, not only metrics-only graders. diff --git a/package.json b/package.json index 70d35d429..53c575408 100644 --- a/package.json +++ b/package.json @@ -6,15 +6,15 @@ "packageManager": "bun@1.3.3", "workspaces": ["apps/*", "packages/*"], "scripts": { - "build": "bun --filter @agentv/core build && bun --filter @agentv/sdk build && bun --filter @agentv/eval build && bun --filter @agentv/phoenix-adapter build && bun --filter @agentv/dashboard build && bun --filter agentv build", + "build": "bun --filter @agentv/core build && bun --filter @agentv/sdk build && bun --filter @agentv/phoenix-adapter build && bun --filter @agentv/dashboard build && bun --filter agentv build", "verify": "bun run build && bun run typecheck && bun run lint && bun run test", - "typecheck": "bun --filter @agentv/core typecheck && bun --filter @agentv/sdk typecheck && bun --filter @agentv/eval typecheck && bun --filter @agentv/phoenix-adapter typecheck && bun --filter agentv typecheck", + "typecheck": "bun --filter @agentv/core typecheck && bun --filter @agentv/sdk typecheck && bun --filter @agentv/phoenix-adapter typecheck && bun --filter agentv typecheck", "typecheck:workspace": "tsc -b tsconfig.build.json", "typecheck:watch": "bun --filter @agentv/core typecheck -- --watch & bun --filter agentv typecheck -- --watch", "lint": "biome check .", "format": "biome format --write .", "fix": "biome check --write .", - "test": "bun --filter @agentv/core test && bun --filter @agentv/sdk test && bun --filter @agentv/eval test && bun --filter @agentv/phoenix-adapter test && bun --filter agentv test && bun --filter @agentv/dashboard test", + "test": "bun --filter @agentv/core test && bun --filter @agentv/sdk test && bun --filter @agentv/phoenix-adapter test && bun --filter agentv test && bun --filter @agentv/dashboard test", "test:watch": "bun --filter @agentv/core test:watch & bun --filter agentv test:watch", "agentv": "bun apps/cli/src/cli.ts", "agentv:buildrun": "bun run build && bun apps/cli/dist/cli.js", diff --git a/packages/eval/README.md b/packages/eval/README.md deleted file mode 100644 index 39c51bbe6..000000000 --- a/packages/eval/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# @agentv/eval - -Deprecated compatibility package for the AgentV TypeScript SDK. - -Use `@agentv/sdk` for new code: - -```bash -npm uninstall @agentv/eval -npm install @agentv/sdk -``` - -```typescript -import { defineCodeGrader } from '@agentv/sdk'; -``` - -This package temporarily re-exports the same helper surface because -`@agentv/eval` shipped before the SDK rename. It exists only as a migration -bridge for existing consumers. - -## License - -MIT License - see [LICENSE](../../LICENSE) for details. diff --git a/packages/eval/package.json b/packages/eval/package.json deleted file mode 100644 index 41b0326df..000000000 --- a/packages/eval/package.json +++ /dev/null @@ -1,38 +0,0 @@ -{ - "name": "@agentv/eval", - "version": "4.42.4", - "description": "Deprecated compatibility package for @agentv/sdk", - "type": "module", - "repository": { - "type": "git", - "url": "https://github.com/EntityProcess/agentv.git" - }, - "homepage": "https://agentv.dev", - "bugs": { - "url": "https://github.com/EntityProcess/agentv/issues" - }, - "main": "./dist/index.js", - "types": "./dist/index.d.ts", - "exports": { - ".": { - "types": "./dist/index.d.ts", - "import": "./dist/index.js", - "require": "./dist/index.cjs" - } - }, - "scripts": { - "prepublishOnly": "node -e \"if(process.env.ALLOW_PUBLISH!=='1'){console.error('ERROR: Use bun run publish:next, then bun run promote:latest');process.exit(1)}\"", - "build:deps": "bun --filter @agentv/sdk build", - "build": "tsup", - "dev": "tsup --watch", - "typecheck": "bun run build:deps && tsc --noEmit", - "lint": "biome check .", - "format": "biome format --write .", - "fix": "biome check --write .", - "test": "bun run build:deps && bun test" - }, - "files": ["dist", "README.md"], - "dependencies": { - "@agentv/sdk": "workspace:*" - } -} diff --git a/packages/eval/src/index.ts b/packages/eval/src/index.ts deleted file mode 100644 index b7ed53b30..000000000 --- a/packages/eval/src/index.ts +++ /dev/null @@ -1,9 +0,0 @@ -/** - * Deprecated @agentv/eval compatibility entry point. - * - * New code should import from @agentv/sdk. This package remains as a temporary - * bridge for users who installed the previously published package name. - * - * @deprecated Use @agentv/sdk. - */ -export * from '@agentv/sdk'; diff --git a/packages/eval/test/compatibility.test.ts b/packages/eval/test/compatibility.test.ts deleted file mode 100644 index 80ad60dcf..000000000 --- a/packages/eval/test/compatibility.test.ts +++ /dev/null @@ -1,17 +0,0 @@ -import { describe, expect, it } from 'bun:test'; - -import { - createTargetClient, - defineAssertion, - defineCodeGrader, - definePromptTemplate, -} from '../src/index.js'; - -describe('@agentv/eval compatibility package', () => { - it('re-exports the public SDK helpers', () => { - expect(typeof defineAssertion).toBe('function'); - expect(typeof defineCodeGrader).toBe('function'); - expect(typeof definePromptTemplate).toBe('function'); - expect(typeof createTargetClient).toBe('function'); - }); -}); diff --git a/packages/eval/tsconfig.json b/packages/eval/tsconfig.json deleted file mode 100644 index a013e0c30..000000000 --- a/packages/eval/tsconfig.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "extends": "../../tsconfig.base.json", - "compilerOptions": { - "outDir": "dist", - "rootDir": "src" - }, - "include": ["src/**/*"] -} diff --git a/packages/eval/tsup.config.ts b/packages/eval/tsup.config.ts deleted file mode 100644 index d2ea405f4..000000000 --- a/packages/eval/tsup.config.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { defineConfig } from 'tsup'; - -export default defineConfig({ - entry: ['src/index.ts'], - external: ['@agentv/sdk'], - format: ['esm', 'cjs'], - sourcemap: true, - clean: true, - dts: { - resolve: true, - compilerOptions: { - composite: false, - }, - }, - target: 'node20', - outExtension({ format }) { - return { - js: format === 'cjs' ? '.cjs' : '.js', - }; - }, -}); diff --git a/packages/sdk/README.md b/packages/sdk/README.md index 8ef29a8aa..cbe4a77a4 100644 --- a/packages/sdk/README.md +++ b/packages/sdk/README.md @@ -21,7 +21,7 @@ npm install @agentv/sdk import { defineCodeGrader } from '@agentv/sdk'; ``` -`@agentv/eval` remains only as a temporary deprecated compatibility package that re-exports this SDK for existing consumers. New docs, examples, scaffolds, and skills should not import from it. +`@agentv/eval` was a temporary deprecated compatibility package for this SDK. It is no longer published from this repository. Use `@agentv/sdk` directly. ## Quick Start diff --git a/scripts/publish.ts b/scripts/publish.ts index e832d3c42..c06e968ca 100644 --- a/scripts/publish.ts +++ b/scripts/publish.ts @@ -25,7 +25,7 @@ if (requestedTag !== undefined && requestedTag !== 'next') { const npmTag = requestedTag ?? 'latest'; const publishArgs = ['--tag', npmTag, '--access', 'public']; -const PACKAGES = ['packages/core', 'packages/sdk', 'packages/eval', 'apps/cli']; +const PACKAGES = ['packages/core', 'packages/sdk', 'apps/cli']; interface PackageJson { name: string; diff --git a/scripts/release.ts b/scripts/release.ts index f69873301..29bf93175 100644 --- a/scripts/release.ts +++ b/scripts/release.ts @@ -35,7 +35,6 @@ const NEXT_PRERELEASE_TAG = 'next'; const PACKAGE_PATHS = [ 'packages/core/package.json', 'packages/sdk/package.json', - 'packages/eval/package.json', 'apps/cli/package.json', ]; diff --git a/skills-data/agentv-eval-writer/SKILL.md b/skills-data/agentv-eval-writer/SKILL.md index a4ee071d7..823670ff6 100644 --- a/skills-data/agentv-eval-writer/SKILL.md +++ b/skills-data/agentv-eval-writer/SKILL.md @@ -18,7 +18,7 @@ Comprehensive docs: https://agentv.dev Treat YAML as the canonical portable model. Prefer authoring `.eval.yaml` / `EVAL.yaml` first, then use TypeScript helpers, Python scripts, or executable graders only when they lower to the same fields or when the evaluation logic must actually run code. -Use `@agentv/sdk` for TypeScript helper imports. Do not use `@agentv/eval` for new evals, examples, scaffolds, or skill guidance; it is only a deprecated compatibility shim for existing consumers during migration. +Use `@agentv/sdk` for TypeScript helper imports. Do not use `@agentv/eval` for new evals, examples, scaffolds, or skill guidance; it was a deprecated compatibility package and has been removed from this repository. ## Evaluation Types