EntityProcess · christso · Jun 26, 2026 · Jun 26, 2026 · Jun 26, 2026 · Jun 26, 2026
diff --git a/.agents/verification.md b/.agents/verification.md
@@ -127,7 +127,7 @@ Use live dogfood before marking PRs ready when they affect eval execution, exper
 - Live means both sides are real: a live agent/provider target and a live grader target. Do not count `mock`, `--dry-run`, or deterministic-only assertions as dogfood for these changes.
 - Prefer the smallest realistic eval: one or two cases, bounded timeouts, and `workers: 1` for heavyweight agent providers.
 - For native experiment changes, run through `agentv eval run ... --experiment <experiment.yaml|ts>` so resolution, setup, scripts, target selection, run knobs, and artifact metadata are exercised together.
-- For repeat-run changes, use an experiment-level repeat config with `count >= 2`, `early_exit: false` when validating all attempts are persisted. Inspect root `index.jsonl`, root `benchmark.json`, and the repeated case folder. The repeated case folder should carry aggregate `summary.json` with flattened snake_case timing fields plus AgentV aggregate `grading.json`; attempt-specific outputs and transcripts live under `run-N/`. Each `run-N/` folder should contain `result.json`, `grading.json`, `transcript.json`, `transcript-raw.jsonl`, and `outputs/answer.md`. Do not write per-run `metrics.json`; timing and o11y fields belong in `result.json`, and `result.json` points at `./grading.json` through `grading_path`.
+- For repeat-run changes, use an experiment-level repeat config with `count >= 2`, `early_exit: false` when validating all attempts are persisted. Inspect root `index.jsonl`, root `benchmark.json`, and the repeated case folder. The repeated case folder should carry aggregate `summary.json` with flattened snake_case timing fields plus AgentV aggregate `grading.json`; attempt-specific outputs, transcripts, and metrics live under `run-N/`. Each `run-N/` folder should contain `result.json`, `grading.json`, `metrics.json`, `transcript.jsonl`, `transcript-raw.jsonl`, and `outputs/answer.md` when answer output is available. `result.json` should point at `./grading.json`, `./metrics.json`, `./transcript.jsonl`, and `./transcript-raw.jsonl` through the corresponding path fields.
 - For local OpenAI-compatible grading through the OAuth proxy, use `endpoint: http://127.0.0.1:10531/v1`, but still route `api_key` and `model` through environment references such as `${{ LOCAL_OPENAI_PROXY_API_KEY }}` and `${{ LOCAL_OPENAI_PROXY_MODEL }}`. Literal secrets and literal model values are intentionally rejected by target validation unless a resolver explicitly allows them.
 - Preserve review evidence in `agentv-private` on an `evidence/<bead-or-feature-slug>` branch. Include the run bundle, source eval/experiment/targets files, a short README, an artifact tree, and screenshots when folder structure or UI behavior is under review.
 - If comparing against an external convention such as Vercel `agent-eval`, verify both semantic provenance and the physical `run-N` artifact layout for repeat runs.

diff --git a/apps/cli/src/commands/eval/artifact-writer.ts b/apps/cli/src/commands/eval/artifact-writer.ts
@@ -99,6 +99,7 @@ export function buildIndexArtifactEntry(
     answerPath?: string;
     tracePath?: string;
     transcriptPath?: string;
+    transcriptRawPath?: string;
     metricsPath?: string;
     rawProviderLogPath?: string;
     responsePath?: string;

diff --git a/apps/cli/src/commands/results/combine-run.ts b/apps/cli/src/commands/results/combine-run.ts
@@ -369,6 +369,7 @@ const MANIFEST_PATH_FIELDS = [
   'response_path',
   'trace_path',
   'transcript_path',
+  'transcript_raw_path',
   'metrics_path',
   'raw_provider_log_path',
   'task_dir',

diff --git a/apps/cli/src/commands/results/manifest.ts b/apps/cli/src/commands/results/manifest.ts
@@ -6,10 +6,11 @@ import {
   type ExternalTraceMetadataWire,
   type ResultArtifactPointersWire,
   type TraceSummary,
-  type TranscriptJsonLine,
   buildTraceFromMessages,
+  fromTraceEnvelopeWire,
   toCamelCaseDeep,
-  traceFromTranscriptJsonLines,
+  traceEnvelopeToTraceSummary,
+  traceEnvelopeToTranscriptMessages,
 } from '@agentv/core';
 
 import type { GradingArtifact, TimingArtifact } from '../eval/artifact-writer.js';
@@ -55,6 +56,7 @@ export interface ResultManifestRecord {
   readonly answer_path?: string;
   readonly trace_path?: string;
   readonly transcript_path?: string;
+  readonly transcript_raw_path?: string;
   readonly metrics_path?: string;
   readonly raw_provider_log_path?: string;
   readonly artifact_pointers?: ResultArtifactPointersWire;
@@ -86,6 +88,7 @@ export type ArtifactPointer =
 
 export interface ArtifactPointerMap {
   readonly transcript_path?: string;
+  readonly transcript_raw_path?: string;
   readonly answer_path?: string;
   readonly transcript?: ArtifactPointer;
   readonly answer?: ArtifactPointer;
@@ -101,14 +104,6 @@ export interface ManifestHydrationOptions {
   readonly hydrateTranscriptTrace?: boolean;
 }
 
-function parseJsonlLines<T>(content: string): T[] {
-  return content
-    .split(/\r?\n/)
-    .map((line) => line.trim())
-    .filter((line) => line.length > 0)
-    .map((line) => JSON.parse(line) as T);
-}
-
 function parseResultRows(content: string, sourceLabel?: string): ResultManifestRecord[] {
   return content
     .split(/\r?\n/)
@@ -162,33 +157,6 @@ function readOptionalJson<T>(baseDir: string, relativePath: string | undefined):
   }
 }
 
-function nonEmptyString(value: unknown): string | undefined {
-  return typeof value === 'string' && value.trim().length > 0 ? value : undefined;
-}
-
-function artifactPointerPath(pointer: ArtifactPointer | undefined): string | undefined {
-  if (typeof pointer === 'string') {
-    return nonEmptyString(pointer);
-  }
-  if (!pointer) {
-    return undefined;
-  }
-  return (
-    nonEmptyString(pointer.path) ??
-    nonEmptyString(pointer.artifact_path) ??
-    nonEmptyString(pointer.relative_path)
-  );
-}
-
-function resolveTranscriptPath(record: ResultManifestRecord): string | undefined {
-  return (
-    record.transcript_path ??
-    record.artifact_pointers?.transcript?.path ??
-    record.artifacts?.transcript_path ??
-    artifactPointerPath(record.transcript ?? record.artifacts?.transcript)
-  );
-}
-
 function hydrateInput(
   baseDir: string,
   record: ResultManifestRecord,
@@ -217,19 +185,46 @@ function hydrateOutput(
   return responseText.trimEnd();
 }
 
+function hydrateTraceEnvelope(
+  baseDir: string,
+  record: ResultManifestRecord,
+): EvaluationResult['trace'] | undefined {
+  const traceWire = readOptionalJson<unknown>(baseDir, record.trace_path);
+  if (!traceWire) {
+    return undefined;
+  }
+
+  try {
+    const envelope = fromTraceEnvelopeWire(traceWire);
+    const summary = traceEnvelopeToTraceSummary(envelope);
+    return buildTraceFromMessages({
+      output: traceEnvelopeToTranscriptMessages(envelope),
+      summary: summary.trace,
+      finalOutput: hydrateOutput(baseDir, record),
+      tokenUsage: summary.tokenUsage,
+      costUsd: summary.costUsd,
+      durationMs: summary.durationMs,
+      startTime: summary.startTime,
+      endTime: summary.endTime,
+      provider: envelope.source.provider,
+      target: record.target ?? envelope.eval.target,
+      testId: record.test_id ?? envelope.eval.testId,
+      conversationId: envelope.eval.runId,
+    });
+  } catch {
+    return undefined;
+  }
+}
+
 function hydrateTrace(
   baseDir: string,
   record: ResultManifestRecord,
   options: ManifestHydrationOptions,
 ): EvaluationResult['trace'] {
   if (options.hydrateTranscriptTrace !== false) {
-    const transcriptText = readOptionalText(baseDir, resolveTranscriptPath(record));
-    if (transcriptText) {
-      try {
-        return traceFromTranscriptJsonLines(parseJsonlLines<TranscriptJsonLine>(transcriptText));
-      } catch {
-        // Fall through to a minimal trace below.
-      }
+    const trace = hydrateTraceEnvelope(baseDir, record);
+    if (trace) {
+      return trace;
     }
   }
 

diff --git a/apps/cli/src/commands/results/projection-bundle.ts b/apps/cli/src/commands/results/projection-bundle.ts
@@ -94,6 +94,7 @@ export type ProjectionBundleArtifactRefs = Partial<
     | 'output_path'
     | 'answer_path'
     | 'transcript_path'
+    | 'transcript_raw_path'
     | 'metrics_path'
     | 'task_dir'
     | 'eval_path'
@@ -178,6 +179,7 @@ function artifactRefs(
     output_path: indexEntry.output_path,
     answer_path: indexEntry.answer_path,
     transcript_path: indexEntry.transcript_path,
+    transcript_raw_path: indexEntry.transcript_raw_path,
     metrics_path: indexEntry.metrics_path,
     trace_path: tracePathFor(indexEntry),
     task_dir: indexEntry.task_dir,

diff --git a/apps/cli/src/commands/results/serve.ts b/apps/cli/src/commands/results/serve.ts
@@ -858,6 +858,7 @@ function buildResultArtifactCatalog(
   addDirectArtifactCatalogEntry(entries, seen, record.response_path, 'artifact');
   addDirectArtifactCatalogEntry(entries, seen, record.answer_path, 'answer');
   addDirectArtifactCatalogEntry(entries, seen, record.transcript_path, 'transcript');
+  addDirectArtifactCatalogEntry(entries, seen, record.transcript_raw_path, 'artifact');
   addDirectArtifactCatalogEntry(entries, seen, recordWithTrace.trace_path, 'trace');
   addDirectArtifactCatalogEntry(entries, seen, record.eval_path, 'artifact');
   addDirectArtifactCatalogEntry(entries, seen, record.targets_path, 'artifact');
@@ -1153,7 +1154,8 @@ function buildRepeatTrialReadModels(
     const metricsPath = caseTrialArtifactPath(artifactDir, runPath, 'metrics.json');
     const timingPath = caseTrialArtifactPath(artifactDir, runPath, 'timing.json');
     const gradingPath = caseTrialArtifactPath(artifactDir, runPath, 'grading.json');
-    const transcriptPath = caseTrialArtifactPath(artifactDir, runPath, 'transcript-raw.jsonl');
+    const transcriptPath = caseTrialArtifactPath(artifactDir, runPath, 'transcript.jsonl');
+    const transcriptRawPath = caseTrialArtifactPath(artifactDir, runPath, 'transcript-raw.jsonl');
     const answerPath = caseTrialArtifactPath(artifactDir, runPath, 'outputs/answer.md');
     const metrics = readArtifactJsonObject(baseDir, metricsPath);
     const timing = readArtifactJsonObject(baseDir, timingPath);
@@ -1180,6 +1182,7 @@ function buildRepeatTrialReadModels(
       ...(timingPath && { timing_path: timingPath }),
       ...(gradingPath && { grading_path: gradingPath }),
       ...(transcriptPath && { transcript_path: transcriptPath }),
+      ...(transcriptRawPath && { transcript_raw_path: transcriptRawPath }),
       ...(answerPath && { answer_path: answerPath }),
     };
   });
@@ -1203,6 +1206,7 @@ function attachRunDetailReadModelFields<T extends Record<string, unknown>>(
       ...(record.timing_path && { timing_path: record.timing_path }),
       ...(record.metrics_path && { metrics_path: record.metrics_path }),
       ...(record.transcript_path && { transcript_path: record.transcript_path }),
+      ...(record.transcript_raw_path && { transcript_raw_path: record.transcript_raw_path }),
       ...(record.output_path && { output_path: record.output_path }),
       ...(record.answer_path && { answer_path: record.answer_path }),
       ...(trials && { trials }),