-
Notifications
You must be signed in to change notification settings - Fork 45
Run specific Question ID's #26
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -128,6 +128,75 @@ export async function handleBenchmarksRoutes(req: Request, url: URL): Promise<Re | |
| } | ||
| } | ||
|
|
||
| // POST /api/benchmarks/:name/expand-ids - Expand conversation/session patterns to question IDs | ||
| const expandIdsMatch = pathname.match(/^\/api\/benchmarks\/([^/]+)\/expand-ids$/) | ||
| if (method === "POST" && expandIdsMatch) { | ||
| const benchmarkName = expandIdsMatch[1] | ||
|
|
||
| try { | ||
| const body = await req.json() | ||
| const { patterns } = body as { patterns: string[] } | ||
|
|
||
| if (!patterns || !Array.isArray(patterns)) { | ||
| return json({ error: "patterns array is required" }, 400) | ||
| } | ||
|
|
||
| const benchmark = createBenchmark(benchmarkName as any) | ||
| await benchmark.load() | ||
| const allQuestions = benchmark.getQuestions() | ||
|
|
||
| const expandedIds = new Set<string>() | ||
| const patternResults: Record<string, string[]> = {} | ||
|
|
||
| for (const pattern of patterns) { | ||
| const trimmed = pattern.trim() | ||
| if (!trimmed) continue | ||
|
|
||
| const expanded: string[] = [] | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (aside) This regex Consider either:
|
||
|
|
||
| // Pattern 1: Conversation ID (e.g., "conv-26") - expand to all questions | ||
| // Check if pattern ends with a number and doesn't have -q or -session suffix | ||
| if (/^[a-zA-Z]+-\d+$/.test(trimmed)) { | ||
| const matchingQuestions = allQuestions.filter((q) => | ||
| q.questionId.startsWith(trimmed + "-q") | ||
| ) | ||
| matchingQuestions.forEach((q) => { | ||
| expanded.push(q.questionId) | ||
| expandedIds.add(q.questionId) | ||
| }) | ||
| } | ||
| // Pattern 2: Session ID (e.g., "conv-26-session_1" or "001be529-session-0") | ||
| // Find all questions that reference this session | ||
| else if (trimmed.includes("-session")) { | ||
| const matchingQuestions = allQuestions.filter((q) => | ||
| q.haystackSessionIds.includes(trimmed) | ||
| ) | ||
| matchingQuestions.forEach((q) => { | ||
| expanded.push(q.questionId) | ||
| expandedIds.add(q.questionId) | ||
| }) | ||
| } | ||
| // Pattern 3: Direct question ID - add as-is if it exists | ||
| else { | ||
| const exactMatch = allQuestions.find((q) => q.questionId === trimmed) | ||
| if (exactMatch) { | ||
| expanded.push(trimmed) | ||
| expandedIds.add(trimmed) | ||
| } | ||
| } | ||
|
|
||
| patternResults[pattern] = expanded | ||
| } | ||
|
|
||
| return json({ | ||
| expandedIds: Array.from(expandedIds), | ||
| patternResults, | ||
| }) | ||
| } catch (e) { | ||
| return json({ error: e instanceof Error ? e.message : "Failed to expand IDs" }, 400) | ||
| } | ||
| } | ||
|
|
||
| // GET /api/models - List available models | ||
| if (method === "GET" && pathname === "/api/models") { | ||
| const openai = listModelsByProvider("openai").map((alias) => ({ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -190,12 +190,14 @@ export async function handleRunsRoutes(req: Request, url: URL): Promise<Response | |
| answeringModel, | ||
| limit, | ||
| sampling, | ||
| questionIds, | ||
| concurrency, | ||
| force, | ||
| fromPhase, | ||
| sourceRunId, | ||
| } = body | ||
| console.log("[API] Extracted sampling:", sampling) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (aside) Nit: |
||
| console.log("[API] Extracted questionIds:", questionIds) | ||
| console.log("[API] Extracted concurrency:", concurrency) | ||
|
|
||
| if (!provider || !benchmark || !runId || !judgeModel) { | ||
|
|
@@ -279,6 +281,7 @@ export async function handleRunsRoutes(req: Request, url: URL): Promise<Response | |
| answeringModel, | ||
| limit, | ||
| sampling, | ||
| questionIds, | ||
| concurrency, | ||
| force: sourceRunId ? false : force, | ||
| fromPhase: fromPhase as PhaseId | undefined, | ||
|
|
@@ -374,6 +377,7 @@ async function runBenchmark(options: { | |
| answeringModel?: string | ||
| limit?: number | ||
| sampling?: SamplingConfig | ||
| questionIds?: string[] | ||
| concurrency?: ConcurrencyConfig | ||
| force?: boolean | ||
| fromPhase?: PhaseId | ||
|
|
@@ -396,6 +400,7 @@ async function runBenchmark(options: { | |
| answeringModel: options.answeringModel, | ||
| limit: options.limit, | ||
| sampling: options.sampling, | ||
| questionIds: options.questionIds, | ||
| concurrency: options.concurrency, | ||
| force: options.force, | ||
| phases, | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(aside) The validation logic here (lines 215-248) is nearly identical to the one added in
batch.ts(lines 158-188). This is a ~30-line block duplicated verbatim. Consider extracting a sharedvalidateQuestionIds(allQuestions, questionIds, benchmarkName)utility function.