EntityProcess · christso · Jun 26, 2026 · Jun 26, 2026
diff --git a/apps/web/src/content/docs/docs/graders/composite.mdx b/apps/web/src/content/docs/docs/graders/composite.mdx
@@ -100,15 +100,18 @@ Because this is an average, the final score is the fraction of passing children
 
 ### OR Logic (Strict)
 
-For a strict OR, add a custom code-grader aggregator and return `1.0` when any child score passes:
+For a strict OR, add a custom code-grader aggregator and return `1.0` when any child score passes.
+
+Composite aggregator execution accepts either a direct script path or a shell command.
+The `bun run` form is the recommended pattern:
 
 ```yaml
 assertions:
   - name: strict_or
     type: composite
     aggregator:
       type: code-grader
-      path: ./scripts/or-aggregator.js
+      path: bun run ../scripts/or-aggregator.js
     assertions:
       - name: mentions-paris
         type: contains
@@ -132,8 +135,7 @@ console.log(
     verdict: anyPassed ? 'pass' : 'fail',
     assertions: [{ text: `Any-or gate: ${anyPassed ? 'passed' : 'failed'}`, passed: anyPassed }],
   }),
-);
-```
+  );
 ```
 
 ### Code Grader Aggregator
@@ -143,7 +145,7 @@ Run a custom command to decide the final score based on all grader results:
 ```yaml
 aggregator:
   type: code-grader
-  path: node ./scripts/safety-gate.js
+  path: bun run ./scripts/safety-gate.js
   cwd: ./graders  # optional working directory
 ```
 

diff --git a/examples/features/composite/README.md b/examples/features/composite/README.md
@@ -13,10 +13,13 @@ Demonstrates composite grader patterns for combining multiple evaluation criteri
 
 ```bash
 # From repository root
-bun agentv eval examples/features/composite/evals/dataset.eval.yaml
+bun agentv eval run examples/features/composite/evals/dataset.eval.yaml
+# Run only the strict-or script path example in dry-run (no live LLM targets required)
+bun agentv eval run examples/features/composite/evals/dataset.eval.yaml --test-id strict-or-local --dry-run
 ```
 
 ## Key Files
 
 - `evals/dataset.eval.yaml` - Test cases with composite grader patterns
+- `scripts/or-aggregator.js` - Strict OR aggregator script used by `composite` examples
 - `apps/web/src/content/docs/docs/graders/composite.mdx` - Detailed AND/OR and strict-OR composition guidance
diff --git a/examples/features/composite/evals/dataset.eval.yaml b/examples/features/composite/evals/dataset.eval.yaml
@@ -56,9 +56,34 @@ tests:
             prompt: ../prompts/technical-accuracy.md
         aggregator:
           type: code-grader
-          path: node ../scripts/safety-gate-aggregator.js
+          path: bun run ../scripts/safety-gate-aggregator.js
 
-  # Example 3: LLM Grader Aggregator
+  # Example 3: Strict OR with a local code-grader aggregator
+  - id: strict-or-local
+    input:
+      - role: user
+        content: "Where is Paris?"
+    expected_output:
+      - role: assistant
+        content: |
+          Paris is the capital city of France.
+    criteria: |
+      The response should include either Paris or the phrase "capital of France".
+    assertions:
+      - name: strict_or
+        type: composite
+        assertions:
+          - name: mentions-paris
+            type: contains
+            value: Paris
+          - name: mentions-capital
+            type: contains
+            value: capital
+        aggregator:
+          type: code-grader
+          path: bun run ../scripts/or-aggregator.js
+
+  # Example 4: LLM Grader Aggregator
   - id: llm-grader-conflict-resolution
     # Baseline note: aggregator may report minor omissions (score ~0.9).
     input:
@@ -84,7 +109,7 @@ tests:
           type: llm-grader
           prompt: ../prompts/conflict-resolution.md
 
-  # Example 4: Nested Composite Graders
+  # Example 5: Nested Composite Graders
   - id: nested-composite
     input:
       - role: user

diff --git a/examples/features/composite/scripts/or-aggregator.js b/examples/features/composite/scripts/or-aggregator.js
@@ -0,0 +1,53 @@
+const fs = require('node:fs');
+
+function getScore(result) {
+  if (result === null || typeof result !== 'object') {
+    return 0;
+  }
+
+  if (result.verdict === 'pass') {
+    return 1;
+  }
+
+  if (typeof result.verdict === 'string' && result.verdict === 'skip') {
+    return 0;
+  }
+
+  if (typeof result.score === 'number') {
+    return result.score >= 0.5 ? 1 : 0;
+  }
+
+  return 0;
+}
+
+try {
+  const input = JSON.parse(fs.readFileSync(0, 'utf8'));
+  const results = Object.values(input.results ?? {});
+  const anyPassed = results.some(getScore);
+
+  console.log(
+    JSON.stringify({
+      score: anyPassed ? 1 : 0,
+      verdict: anyPassed ? 'pass' : 'fail',
+      assertions: [
+        {
+          text: `Strict OR passed if any child passed: ${anyPassed ? 'true' : 'false'}`,
+          passed: anyPassed,
+        },
+      ],
+    }),
+  );
+} catch (error) {
+  console.log(
+    JSON.stringify({
+      score: 0,
+      verdict: 'fail',
+      assertions: [
+        {
+          text: `Failed to evaluate strict OR: ${error instanceof Error ? error.message : String(error)}`,
+          passed: false,
+        },
+      ],
+    }),
+  );
+}