Skip to content

Commit a12a6bf

Browse files
fix(bench): add workspaces coercion and forced synthesis for cross-repo tasks
1 parent c103a3d commit a12a6bf

2 files changed

Lines changed: 48 additions & 3 deletions

File tree

benchmarks/agent_eval/src/agents.ts

Lines changed: 36 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -327,10 +327,28 @@ export async function reactLoop(
327327

328328
// Build system prompt with optional targetRepos hint
329329
let systemPrompt: string | undefined;
330-
if (task.targetRepos && task.targetRepos.length > 0 && task.targetRepos.length < 3) {
331-
systemPrompt = `You are an elite coding agent with access to VectorCode MCP tools. Solve the task step-by-step. Use tools when needed.
330+
if (task.targetRepos && task.targetRepos.length > 0) {
331+
const repos = JSON.stringify(task.targetRepos);
332+
if (task.targetRepos.length >= 3) {
333+
// Cross-repo task: guide the agent to search each repo individually
334+
systemPrompt = `You are an elite coding agent with access to VectorCode MCP tools. Solve the task step-by-step. Use tools when needed.
335+
336+
IMPORTANT — CROSS-REPO SEARCH STRATEGY:
337+
The task involves ${task.targetRepos.length} repositories: ${repos}.
338+
When using vec_search, results are merged from ALL repos unless you scope them. To get useful results for each repository:
339+
340+
1. Make SEPARATE vec_search calls for EACH repository using the "workspaces" parameter. For example:
341+
- vec_search({ query: "public API entry point", workspaces: ["thiserror"] })
342+
- vec_search({ query: "public API entry point", workspaces: ["defu"] })
343+
- vec_search({ query: "public API entry point", workspaces: ["itsdangerous"] })
344+
2. After gathering information per-repo, synthesize your findings into a comparison or cross-repo analysis.
345+
3. Be efficient — you have limited steps. Make parallel calls when independent.
346+
4. After gathering enough information, STOP exploring and produce your final answer. Do not use all available steps on exploration.`;
347+
} else {
348+
systemPrompt = `You are an elite coding agent with access to VectorCode MCP tools. Solve the task step-by-step. Use tools when needed.
332349
333-
IMPORTANT: When using vec_search, always pass the "workspaces" parameter with value ${JSON.stringify(task.targetRepos)} to scope your search to the relevant repositories.`;
350+
IMPORTANT: When using vec_search, always pass the "workspaces" parameter with value ${repos} to scope your search to the relevant repositories.`;
351+
}
334352
}
335353

336354
let isReplaying = cacheMode === 'cached' || (cacheMode === 'live' && cachedEntries.length > 0);
@@ -446,6 +464,21 @@ IMPORTANT: When using vec_search, always pass the "workspaces" parameter with va
446464
});
447465
}
448466

467+
// If the agent exhausted all steps without producing a final answer,
468+
// force one more LLM call (without tools) to synthesize findings.
469+
if (!finalAnswer && steps >= maxSteps) {
470+
console.log(`[reactLoop] Agent used all ${maxSteps} steps without final answer. Forcing synthesis...`);
471+
messages.push({
472+
role: 'user',
473+
content: [{ type: 'text', text: 'You have used all available steps. Stop using tools and synthesize everything you have found into your final answer now.' }]
474+
});
475+
const forcedResponse = await llmCall(messages, systemPrompt);
476+
finalAnswer = forcedResponse.text;
477+
inputTokens += forcedResponse.tokens.input;
478+
outputTokens += forcedResponse.tokens.output;
479+
steps++;
480+
}
481+
449482
return { steps, finalAnswer, inputTokens, outputTokens, toolCalls };
450483
})();
451484

benchmarks/agent_eval/src/tools/vectorcode.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,18 @@ export class VectorCodeProvider implements ToolProvider {
5151
if (!this.client) {
5252
throw new Error('VectorCodeProvider not initialized');
5353
}
54+
// Coerce workspaces from stringified JSON array to actual array.
55+
// LLMs sometimes pass workspaces as "[\"repo\"]" (string) instead of ["repo"] (array).
56+
if (args.workspaces && typeof args.workspaces === 'string') {
57+
try {
58+
const parsed = JSON.parse(args.workspaces);
59+
if (Array.isArray(parsed)) {
60+
args = { ...args, workspaces: parsed };
61+
}
62+
} catch {
63+
// Not valid JSON — leave as-is, let server handle it
64+
}
65+
}
5466
const response = await this.client.callTool({
5567
name,
5668
arguments: args

0 commit comments

Comments
 (0)