supermemoryai · vorflux · Mar 23, 2026
diff --git a/src/orchestrator/batch.ts b/src/orchestrator/batch.ts
@@ -5,6 +5,7 @@ import type { BenchmarkResult } from "../types/unified"
 import { orchestrator, CheckpointManager } from "./index"
 import { createBenchmark } from "../benchmarks"
 import { logger } from "../utils/logger"
+import { validateQuestionIds } from "../utils/question-ids"
 import { existsSync, mkdirSync, readFileSync, writeFileSync, rmSync } from "fs"
 import { join } from "path"
 import { startRun, endRun } from "../server/runState"
@@ -157,35 +158,8 @@ export class BatchManager {
 
     let targetQuestionIds: string[]
     if (questionIds && questionIds.length > 0) {
-      // Validate that all provided IDs exist in the benchmark
-      const allQuestionIdsSet = new Set(allQuestions.map((q) => q.questionId))
-      const validIds: string[] = []
-      const invalidIds: string[] = []
-
-      for (const id of questionIds) {
-        if (allQuestionIdsSet.has(id)) {
-          validIds.push(id)
-        } else {
-          invalidIds.push(id)
-        }
-      }
-
-      if (invalidIds.length > 0) {
-        logger.warn(`Invalid question IDs (will be skipped): ${invalidIds.join(", ")}`)
-      }
-
-      if (validIds.length === 0) {
-        throw new Error(
-          `All provided questionIds are invalid. No matching questions found in benchmark "${benchmark}". ` +
-            `Invalid IDs: ${invalidIds.join(", ")}`
-        )
-      }
-
+      const { validIds } = validateQuestionIds(questionIds, allQuestions, benchmark)
       targetQuestionIds = validIds
-      logger.info(
-        `Using explicit questionIds: ${validIds.length} valid questions` +
-          (invalidIds.length > 0 ? ` (${invalidIds.length} invalid skipped)` : "")
-      )
     } else if (sampling) {
       targetQuestionIds = selectQuestionsBySampling(allQuestions, sampling)
     } else {

diff --git a/src/orchestrator/index.ts b/src/orchestrator/index.ts
@@ -10,6 +10,7 @@ import { CheckpointManager } from "./checkpoint"
 import { getProviderConfig, getJudgeConfig } from "../utils/config"
 import { resolveModel } from "../utils/models"
 import { logger } from "../utils/logger"
+import { validateQuestionIds } from "../utils/question-ids"
 import { runIngestPhase } from "./phases/ingest"
 import { runIndexingPhase } from "./phases/indexing"
 import { runSearchPhase } from "./phases/search"
@@ -213,35 +214,8 @@ export class Orchestrator {
       effectiveLimit = limit
 
       if (questionIds && questionIds.length > 0) {
-        // Validate that all provided IDs exist in the benchmark
-        const allQuestionIdsSet = new Set(allQuestions.map((q) => q.questionId))
-        const validIds: string[] = []
-        const invalidIds: string[] = []
-
-        for (const id of questionIds) {
-          if (allQuestionIdsSet.has(id)) {
-            validIds.push(id)
-          } else {
-            invalidIds.push(id)
-          }
-        }
-
-        if (invalidIds.length > 0) {
-          logger.warn(`Invalid question IDs (will be skipped): ${invalidIds.join(", ")}`)
-        }
-
-        if (validIds.length === 0) {
-          throw new Error(
-            `All provided questionIds are invalid. No matching questions found in benchmark "${benchmarkName}". ` +
-              `Invalid IDs: ${invalidIds.join(", ")}`
-          )
-        }
-
+        const { validIds } = validateQuestionIds(questionIds, allQuestions, benchmarkName)
         targetQuestionIds = validIds
-        logger.info(
-          `Using explicit questionIds: ${validIds.length} valid questions` +
-            (invalidIds.length > 0 ? ` (${invalidIds.length} invalid skipped)` : "")
-        )
       } else if (sampling) {
         logger.info(`Using sampling mode: ${sampling.mode}`)
         targetQuestionIds = selectQuestionsBySampling(allQuestions, sampling)

diff --git a/src/server/routes/benchmarks.ts b/src/server/routes/benchmarks.ts
@@ -128,7 +128,8 @@ export async function handleBenchmarksRoutes(req: Request, url: URL): Promise<Re
     }
   }
 
-  // POST /api/benchmarks/:name/expand-ids - Expand conversation/session patterns to question IDs
+  // POST /api/benchmarks/:name/expand-ids - Expand patterns to question IDs
+  // Supports: exact question IDs, session IDs, and prefix matching (works across all benchmarks)
   const expandIdsMatch = pathname.match(/^\/api\/benchmarks\/([^/]+)\/expand-ids$/)
   if (method === "POST" && expandIdsMatch) {
     const benchmarkName = expandIdsMatch[1]
@@ -153,39 +154,27 @@ export async function handleBenchmarksRoutes(req: Request, url: URL): Promise<Re
         if (!trimmed) continue
 
         const expanded: string[] = []
-
-        // Pattern 1: Conversation ID (e.g., "conv-26") - expand to all questions
-        // Check if pattern ends with a number and doesn't have -q or -session suffix
-        if (/^[a-zA-Z]+-\d+$/.test(trimmed)) {
-          const matchingQuestions = allQuestions.filter((q) =>
-            q.questionId.startsWith(trimmed + "-q")
-          )
-          matchingQuestions.forEach((q) => {
-            expanded.push(q.questionId)
-            expandedIds.add(q.questionId)
-          })
-        }
-        // Pattern 2: Session ID (e.g., "conv-26-session_1" or "001be529-session-0")
-        // Find all questions that reference this session
-        else if (trimmed.includes("-session")) {
-          const matchingQuestions = allQuestions.filter((q) =>
-            q.haystackSessionIds.includes(trimmed)
-          )
-          matchingQuestions.forEach((q) => {
-            expanded.push(q.questionId)
-            expandedIds.add(q.questionId)
-          })
-        }
-        // Pattern 3: Direct question ID - add as-is if it exists
-        else {
-          const exactMatch = allQuestions.find((q) => q.questionId === trimmed)
-          if (exactMatch) {
-            expanded.push(trimmed)
-            expandedIds.add(trimmed)
-          }
+        const addMatch = (id: string) => { expanded.push(id); expandedIds.add(id) }
+
+        // Priority: exact match > session lookup > prefix expansion
+        const exactMatch = allQuestions.find((q) => q.questionId === trimmed)
+        if (exactMatch) {
+          addMatch(trimmed)
+        } else if (trimmed.includes("-session")) {
+          // Session ID — find all questions that reference this session
+          allQuestions
+            .filter((q) => q.haystackSessionIds.includes(trimmed))
+            .forEach((q) => addMatch(q.questionId))
+        } else {
+          // Prefix match — works across all benchmarks (e.g., "conv-26" matches
+          // "conv-26-q0", "convomem-user_evidence" matches "convomem-user_evidence-0")
+          const prefix = trimmed.endsWith("-") ? trimmed : trimmed + "-"
+          allQuestions
+            .filter((q) => q.questionId.startsWith(prefix))
+            .forEach((q) => addMatch(q.questionId))
         }
 
-        patternResults[pattern] = expanded
+        patternResults[trimmed] = expanded
       }
 
       return json({

diff --git a/src/utils/question-ids.ts b/src/utils/question-ids.ts
@@ -0,0 +1,46 @@
+import { logger } from "./logger"
+
+export interface ValidateQuestionIdsResult {
+  validIds: string[]
+  invalidIds: string[]
+}
+
+/**
+ * Validates a list of question IDs against the full set of questions in a benchmark.
+ * Returns valid and invalid IDs separately. Throws if all IDs are invalid.
+ */
+export function validateQuestionIds(
+  questionIds: string[],
+  allQuestions: { questionId: string }[],
+  benchmarkName: string
+): ValidateQuestionIdsResult {
+  const allQuestionIdsSet = new Set(allQuestions.map((q) => q.questionId))
+  const validIds: string[] = []
+  const invalidIds: string[] = []
+
+  for (const id of questionIds) {
+    if (allQuestionIdsSet.has(id)) {
+      validIds.push(id)
+    } else {
+      invalidIds.push(id)
+    }
+  }
+
+  if (invalidIds.length > 0) {
+    logger.warn(`Invalid question IDs (will be skipped): ${invalidIds.join(", ")}`)
+  }
+
+  if (validIds.length === 0) {
+    throw new Error(
+      `All provided questionIds are invalid. No matching questions found in benchmark "${benchmarkName}". ` +
+        `Invalid IDs: ${invalidIds.join(", ")}`
+    )
+  }
+
+  logger.info(
+    `Using explicit questionIds: ${validIds.length} valid questions` +
+      (invalidIds.length > 0 ? ` (${invalidIds.length} invalid skipped)` : "")
+  )
+
+  return { validIds, invalidIds }
+}