EntityProcess · christso · Jun 19, 2026 · Jun 19, 2026
diff --git a/apps/web/src/content/docs/docs/evaluation/sdk.mdx b/apps/web/src/content/docs/docs/evaluation/sdk.mdx
@@ -9,7 +9,7 @@ YAML remains AgentV's canonical, portable eval format. The SDK surfaces below ar
 
 AgentV currently provides two npm packages for programmatic use:
 
-- **`@agentv/sdk`** — custom assertions and code graders
+- **`@agentv/sdk`** — YAML-aligned eval authoring, custom assertions, and code graders
 - **`@agentv/core`** — programmatic evaluation API and typed configuration
 
 ## Installation
@@ -27,12 +27,52 @@ npm install @agentv/core
 Use the simplest surface that matches the job:
 
 - **YAML / JSONL first** for portable eval specs you want to run from the CLI, check into a repo, or share across TypeScript and Python workflows.
+- **`defineEval()` / `evalSuite()`** when you want a `.eval.ts` file that mirrors YAML concepts and lowers back to the canonical snake_case contract.
 - **`evaluate({ specFile })`** when you want library control around an existing YAML suite.
 - **Inline `evaluate({ tests })`** when the eval definition truly belongs inside application code. The programmatic API mirrors YAML, but uses current TypeScript naming such as `expectedOutput` and `assert`.
 - **`defineAssertion` / `defineCodeGrader`** when the grading logic itself must execute code.
 
 There is no separate first-party Python authoring SDK today. Python-facing workflows should either emit canonical YAML/JSONL or implement executable graders that consume the standard `snake_case` wire format.
 
+## YAML-Aligned `.eval.ts` Authoring
+
+Use `defineEval()` from `@agentv/sdk` when you want TypeScript ergonomics without creating a second eval vocabulary. The helper keeps authoring in camelCase where TypeScript needs it, then lowers back to the canonical snake_case eval object contract when AgentV loads the file.
+
+```typescript
+// evals/greeting.eval.ts
+import { defineEval } from '@agentv/sdk';
+
+export default defineEval({
+  name: 'hello-suite',
+  execution: {
+    targets: ['mock-sdk'],
+  },
+  workspace: {
+    hooks: {
+      beforeAll: {
+        command: ['echo', 'suite-start'],
+      },
+    },
+  },
+  tests: [
+    {
+      id: 'hello',
+      input: 'Say hello',
+      inputFiles: ['../fixtures/per-test-note.md'],
+      expectedOutput: 'Hello from the mock target',
+      assertions: [{ type: 'contains', value: 'Hello' }],
+    },
+  ],
+});
+```
+
+Useful companion helpers:
+
+- `toEvalYamlObject()` returns the canonical snake_case object.
+- `serializeEvalYaml()` returns YAML text using the same canonical field names.
+
+The durable field remains `assertions`. This helper does not introduce a second YAML vocabulary.
+
 ## Custom Assertions
 
 Use `defineAssertion` from `@agentv/sdk` to create reusable assertion types. Place them in `.agentv/assertions/` — they're auto-discovered by filename.

diff --git a/bun.lock b/bun.lock
diff --git a/examples/README.md b/examples/README.md
@@ -54,6 +54,7 @@ Focused demonstrations of specific AgentV capabilities. Each example includes it
 - [code-grader-sdk](features/code-grader-sdk/) - TypeScript SDK for code graders using `defineCodeGrader()`
 - [sdk-custom-assertion](features/sdk-custom-assertion/) - Custom assertion types using `defineAssertion()`
 - [sdk-programmatic-api](features/sdk-programmatic-api/) - Programmatic evaluation using `evaluate()`
+- [sdk-eval-authoring](features/sdk-eval-authoring/) - YAML-aligned `.eval.ts` authoring using `defineEval()`
 - [sdk-config-file](features/sdk-config-file/) - Typed configuration with `defineConfig()`
 - [prompt-template-sdk](features/prompt-template-sdk/) - Custom LLM grader prompts using `definePromptTemplate()`
 

diff --git a/examples/features/README.md b/examples/features/README.md
@@ -122,6 +122,7 @@ Focused examples for specific AgentV capabilities. Find your use case below, the
 | Example | Description |
 |---------|-------------|
 | [sdk-custom-assertion](sdk-custom-assertion/) | Custom assertion types using `defineAssertion()` |
+| [sdk-eval-authoring](sdk-eval-authoring/) | YAML-aligned `.eval.ts` authoring using `defineEval()` |
 | [sdk-programmatic-api](sdk-programmatic-api/) | Programmatic evaluation using `evaluate()` |
 | [sdk-config-file](sdk-config-file/) | Typed configuration with `defineConfig()` |
 | [prompt-template-sdk](prompt-template-sdk/) | Custom LLM grader prompts using `definePromptTemplate()` |
@@ -167,6 +168,7 @@ Focused examples for specific AgentV capabilities. Find your use case below, the
 | [rubric](rubric/) | LLM grading |
 | [sdk-config-file](sdk-config-file/) | TypeScript SDK |
 | [sdk-custom-assertion](sdk-custom-assertion/) | TypeScript SDK |
+| [sdk-eval-authoring](sdk-eval-authoring/) | TypeScript SDK |
 | [sdk-programmatic-api](sdk-programmatic-api/) | TypeScript SDK |
 | [suite-level-input](suite-level-input/) | Dataset & input |
 | [suite-level-input-files](suite-level-input-files/) | Dataset & input |

diff --git a/examples/features/sdk-eval-authoring/.agentv/targets.yaml b/examples/features/sdk-eval-authoring/.agentv/targets.yaml
@@ -0,0 +1,4 @@
+targets:
+  - name: mock-sdk
+    provider: mock
+    response: Hello from the mock target
diff --git a/examples/features/sdk-eval-authoring/README.md b/examples/features/sdk-eval-authoring/README.md
@@ -0,0 +1,27 @@
+# SDK Example: YAML-Aligned Eval Authoring
+
+Demonstrates authoring a `.eval.ts` suite with `defineEval()` from `@agentv/sdk` while still lowering to AgentV's canonical snake_case YAML/runtime contract.
+
+## What It Shows
+
+1. `defineEval()` brands a TypeScript suite for the `.eval.ts` loader.
+2. CamelCase authoring fields such as `inputFiles`, `expectedOutput`, `beforeAll`, and `beforeEach` lower to the canonical YAML/runtime keys.
+3. The suite still runs through the standard CLI and YAML parser path instead of a separate SDK runner.
+
+## Files
+
+- `evals/greeting.eval.ts` — the YAML-aligned TypeScript suite
+- `.agentv/targets.yaml` — local mock target for a zero-credential run
+- `fixtures/*.md` — attached input files used by the suite
+
+## How to Run
+
+```bash
+# From repository root
+cd examples/features/sdk-eval-authoring
+bun install
+
+bun ../../../../apps/cli/src/cli.ts eval evals/greeting.eval.ts
+```
+
+The example uses a local `mock` target, so it does not require API credentials.
diff --git a/examples/features/sdk-eval-authoring/evals/greeting.eval.ts b/examples/features/sdk-eval-authoring/evals/greeting.eval.ts
@@ -0,0 +1,34 @@
+import { defineEval } from '@agentv/sdk';
+
+export default defineEval({
+  name: 'sdk-eval-authoring',
+  description: 'YAML-aligned TypeScript eval authoring with @agentv/sdk',
+  inputFiles: ['../fixtures/shared-context.md'],
+  execution: {
+    targets: ['mock-sdk'],
+  },
+  workspace: {
+    hooks: {
+      beforeAll: {
+        command: ['echo', 'suite-start'],
+      },
+    },
+  },
+  tests: [
+    {
+      id: 'hello-from-typescript',
+      input: 'Use the attached notes and say hello.',
+      inputFiles: ['../fixtures/per-test-note.md'],
+      expectedOutput: 'Hello from the mock target',
+      assertions: [{ type: 'contains', value: 'Hello' }],
+      workspace: {
+        hooks: {
+          beforeEach: {
+            command: ['echo', 'per-test-setup'],
+            timeoutMs: 1_000,
+          },
+        },
+      },
+    },
+  ],
+});
diff --git a/examples/features/sdk-eval-authoring/fixtures/per-test-note.md b/examples/features/sdk-eval-authoring/fixtures/per-test-note.md
@@ -0,0 +1 @@
+Include the word "hello" in the response.
diff --git a/examples/features/sdk-eval-authoring/fixtures/shared-context.md b/examples/features/sdk-eval-authoring/fixtures/shared-context.md
@@ -0,0 +1 @@
+Use a friendly tone.
diff --git a/examples/features/sdk-eval-authoring/package.json b/examples/features/sdk-eval-authoring/package.json
@@ -0,0 +1,8 @@
+{
+  "name": "agentv-example-sdk-eval-authoring",
+  "private": true,
+  "type": "module",
+  "dependencies": {
+    "@agentv/sdk": "file:../../../packages/sdk"
+  }
+}
diff --git a/packages/core/src/evaluation/loaders/ts-eval-loader.ts b/packages/core/src/evaluation/loaders/ts-eval-loader.ts
@@ -15,12 +15,19 @@ import { type EvalConfig, materializeEvalConfig } from '../evaluate.js';
 import { createFunctionProvider } from '../providers/function-provider.js';
 import type { ProviderFactoryFn } from '../providers/provider-registry.js';
 import type { TargetDefinition } from '../providers/types.js';
-import type { EvalSuiteResult } from '../yaml-parser.js';
+import { type EvalSuiteResult, loadTestSuiteFromYamlObject } from '../yaml-parser.js';
 
 const EXPORT_NAMES = ['default', 'config', 'evalConfig'] as const;
+const SDK_EVAL_SUITE_SYMBOL = Symbol.for('@agentv/sdk/eval-suite');
+const SDK_TO_EVAL_YAML_OBJECT_SYMBOL = Symbol.for('@agentv/sdk/to-eval-yaml-object');
+
+type SdkEvalSuiteExport = Record<string, unknown> & {
+  readonly [SDK_EVAL_SUITE_SYMBOL]: true;
+  readonly [SDK_TO_EVAL_YAML_OBJECT_SYMBOL]: () => Record<string, unknown>;
+};
 
 export interface TsEvalResult {
-  readonly config: EvalConfig;
+  readonly config: EvalConfig | SdkEvalSuiteExport;
   readonly filePath: string;
 }
 
@@ -38,18 +45,18 @@ export async function loadTsEvalFile(filePath: string): Promise<TsEvalResult> {
   const moduleUrl = pathToFileURL(absolutePath).href;
   const module = await import(moduleUrl);
 
-  let config: EvalConfig | undefined;
+  let config: EvalConfig | SdkEvalSuiteExport | undefined;
   for (const name of EXPORT_NAMES) {
     const candidate = module[name];
-    if (isEvalConfigLike(candidate)) {
+    if (isSupportedTsEvalExport(candidate)) {
       config = candidate;
       break;
     }
   }
 
   if (!config) {
     throw new Error(
-      `${filePath}: no EvalConfig export found. Export an EvalConfig as default, 'config', or 'evalConfig'.`,
+      `${filePath}: no supported eval export found. Export defineEval(...) or an EvalConfig as default, 'config', or 'evalConfig'.`,
     );
   }
 
@@ -66,6 +73,16 @@ export async function loadTsEvalSuite(
   },
 ): Promise<TsEvalSuiteResult> {
   const { config, filePath: absolutePath } = await loadTsEvalFile(filePath);
+
+  if (isSdkEvalSuiteExport(config)) {
+    return loadTestSuiteFromYamlObject(
+      absolutePath,
+      config[SDK_TO_EVAL_YAML_OBJECT_SYMBOL](),
+      repoRoot,
+      options,
+    );
+  }
+
   const materialized = await materializeEvalConfig(config, {
     repoRoot,
     baseDir: path.dirname(absolutePath),
@@ -98,6 +115,19 @@ export async function loadTsEvalSuite(
   };
 }
 
+function isSdkEvalSuiteExport(value: unknown): value is SdkEvalSuiteExport {
+  return (
+    !!value &&
+    typeof value === 'object' &&
+    (value as SdkEvalSuiteExport)[SDK_EVAL_SUITE_SYMBOL] === true &&
+    typeof (value as SdkEvalSuiteExport)[SDK_TO_EVAL_YAML_OBJECT_SYMBOL] === 'function'
+  );
+}
+
+function isSupportedTsEvalExport(value: unknown): value is EvalConfig | SdkEvalSuiteExport {
+  return isSdkEvalSuiteExport(value) || isEvalConfigLike(value);
+}
+
 /**
  * Duck-type check for EvalConfig-like objects.
  * An EvalConfig must have at least one of: tests, specFile, or target.

diff --git a/packages/core/src/evaluation/yaml-parser.ts b/packages/core/src/evaluation/yaml-parser.ts
@@ -315,27 +315,28 @@ export async function loadTestSuite(
     repoRoot,
     options,
   );
-  const metadata = parseMetadata(parsed);
-  const failOnError = extractFailOnError(parsed);
-  const threshold = extractThreshold(parsed);
-  return {
-    tests,
-    trials: extractTrialsConfig(parsed),
-    targets: extractTargetsFromSuite(parsed),
-    targetRefs: extractTargetRefsFromSuite(parsed),
-    workers: extractWorkersFromSuite(parsed),
-    cacheConfig: extractCacheConfig(parsed),
-    budgetUsd: extractBudgetUsd(parsed),
-    ...(metadata !== undefined && { metadata }),
-    ...(failOnError !== undefined && { failOnError }),
-    ...(threshold !== undefined && { threshold }),
-    ...(suiteWorkspacePath !== undefined && { workspacePath: suiteWorkspacePath }),
-  };
+  return buildEvalSuiteResult(parsed, tests, suiteWorkspacePath);
 }
 
 /** @deprecated Use `loadTestSuite` instead */
 export const loadEvalSuite = loadTestSuite;
 
+export async function loadTestSuiteFromYamlObject(
+  evalFilePath: string,
+  suiteObject: unknown,
+  repoRoot: URL | string,
+  options?: LoadOptions,
+): Promise<EvalSuiteResult> {
+  const { tests, parsed, suiteWorkspacePath } = await loadTestsFromParsedYamlValue(
+    suiteObject,
+    evalFilePath,
+    repoRoot,
+    options,
+  );
+
+  return buildEvalSuiteResult(parsed, tests, suiteWorkspacePath);
+}
+
 export async function loadTests(
   evalFilePath: string,
   repoRoot: URL | string,
@@ -366,7 +367,18 @@ async function loadTestsFromYaml(
   repoRoot: URL | string,
   options?: LoadOptions,
 ): Promise<{ tests: readonly EvalTest[]; parsed: JsonObject; suiteWorkspacePath?: string }> {
-  // YAML parsing (existing implementation)
+  const absoluteTestPath = path.resolve(evalFilePath);
+  const rawFile = await readFile(absoluteTestPath, 'utf8');
+
+  return loadTestsFromParsedYamlValue(parseYamlValue(rawFile), evalFilePath, repoRoot, options);
+}
+
+async function loadTestsFromParsedYamlValue(
+  rawParsed: unknown,
+  evalFilePath: string,
+  repoRoot: URL | string,
+  options?: LoadOptions,
+): Promise<{ tests: readonly EvalTest[]; parsed: JsonObject; suiteWorkspacePath?: string }> {
   const verbose = options?.verbose ?? false;
   const filterPattern = options?.filter;
   const absoluteTestPath = path.resolve(evalFilePath);
@@ -377,8 +389,6 @@ async function loadTestsFromYaml(
   // Load configuration (walks up directory tree to repo root)
   const config = await loadConfig(absoluteTestPath, repoRootPath);
 
-  const rawFile = await readFile(absoluteTestPath, 'utf8');
-  const rawParsed = parseYamlValue(rawFile) as unknown;
   const rawCaseSnapshots = buildRawInlineTestSnapshots(rawParsed);
   const interpolated = interpolateEnv(rawParsed, process.env) as unknown;
   if (!isJsonObject(interpolated)) {
@@ -715,6 +725,30 @@ async function loadTestsFromYaml(
   return { tests: results, parsed: suite, suiteWorkspacePath: suiteWorkspace?.path };
 }
 
+function buildEvalSuiteResult(
+  parsed: JsonObject,
+  tests: readonly EvalTest[],
+  suiteWorkspacePath?: string,
+): EvalSuiteResult {
+  const metadata = parseMetadata(parsed);
+  const failOnError = extractFailOnError(parsed);
+  const threshold = extractThreshold(parsed);
+
+  return {
+    tests,
+    trials: extractTrialsConfig(parsed),
+    targets: extractTargetsFromSuite(parsed),
+    targetRefs: extractTargetRefsFromSuite(parsed),
+    workers: extractWorkersFromSuite(parsed),
+    cacheConfig: extractCacheConfig(parsed),
+    budgetUsd: extractBudgetUsd(parsed),
+    ...(metadata !== undefined && { metadata }),
+    ...(failOnError !== undefined && { failOnError }),
+    ...(threshold !== undefined && { threshold }),
+    ...(suiteWorkspacePath !== undefined && { workspacePath: suiteWorkspacePath }),
+  };
+}
+
 const SOURCE_SECRET_KEY_PATTERN =
   /(api[_-]?key|authorization|bearer|credential|password|private[_-]?key|secret|token)/i;
 const REDACTED_SOURCE_VALUE = '[redacted]';

diff --git a/packages/core/test/evaluation/loaders/fixtures/sdk-define-eval.eval.ts b/packages/core/test/evaluation/loaders/fixtures/sdk-define-eval.eval.ts
@@ -0,0 +1,37 @@
+import { defineEval } from '../../../../../sdk/src/index.ts';
+
+export default defineEval({
+  name: 'sdk-define-eval-suite',
+  description: 'YAML-aligned TypeScript suite authored with @agentv/sdk',
+  tags: ['sdk', 'typescript', 'yaml'],
+  execution: {
+    targets: ['mock-target'],
+    workers: 2,
+    skipDefaults: true,
+    budgetUsd: 2,
+    threshold: 0.75,
+  },
+  workspace: {
+    hooks: {
+      beforeAll: {
+        command: ['echo', 'suite-setup'],
+      },
+    },
+  },
+  tests: [
+    {
+      id: 'sdk-define-eval',
+      input: 'Say hello',
+      expectedOutput: 'hello there',
+      assertions: [{ type: 'contains', value: 'hello' }],
+      workspace: {
+        hooks: {
+          beforeEach: {
+            command: ['echo', 'case-setup'],
+            timeoutMs: 1_000,
+          },
+        },
+      },
+    },
+  ],
+});