From a7c46a8ce891a43087b586a0c11a40eb007c65c7 Mon Sep 17 00:00:00 2001
From: Vorflux AI <noreply@vorflux.com>
Date: Mon, 23 Mar 2026 22:53:48 +0000
Subject: [PATCH] refactor: deduplicate question ID validation, fix
 cross-benchmark pattern matching

- Extract shared validateQuestionIds utility (src/utils/question-ids.ts)
  to deduplicate identical ~30-line validation blocks in batch.ts and index.ts

- Rewrite expand-ids endpoint to use generic prefix matching instead of
  hardcoded regex (^[a-zA-Z]+-\d+$) that only worked for LoCoMo. Now works
  across all benchmarks (LoCoMo, ConvoMem, LongMemEval) by matching any
  pattern as a prefix against question IDs.

- Extract shared QuestionIdSelector component and validation utility to
  deduplicate identical code between runs/new and compare/new pages

- Remove redundant client-side re-validation that fetched all benchmark
  questions page-by-page (server already validates in expand-ids endpoint)
---
 src/orchestrator/batch.ts              |  30 +----
 src/orchestrator/index.ts              |  30 +----
 src/server/routes/benchmarks.ts        |  53 +++-----
 src/utils/question-ids.ts              |  46 +++++++
 ui/app/compare/new/page.tsx            | 176 ++----------------------
 ui/app/runs/new/page.tsx               | 180 ++-----------------------
 ui/components/question-id-selector.tsx | 123 +++++++++++++++++
 ui/lib/question-id-validation.ts       |  43 ++++++
 8 files changed, 259 insertions(+), 422 deletions(-)
 create mode 100644 src/utils/question-ids.ts
 create mode 100644 ui/components/question-id-selector.tsx
 create mode 100644 ui/lib/question-id-validation.ts

diff --git a/src/orchestrator/batch.ts b/src/orchestrator/batch.ts
index 5febfdd..0226b79 100644
--- a/src/orchestrator/batch.ts
+++ b/src/orchestrator/batch.ts
@@ -5,6 +5,7 @@ import type { BenchmarkResult } from "../types/unified"
 import { orchestrator, CheckpointManager } from "./index"
 import { createBenchmark } from "../benchmarks"
 import { logger } from "../utils/logger"
+import { validateQuestionIds } from "../utils/question-ids"
 import { existsSync, mkdirSync, readFileSync, writeFileSync, rmSync } from "fs"
 import { join } from "path"
 import { startRun, endRun } from "../server/runState"
@@ -157,35 +158,8 @@ export class BatchManager {
 
     let targetQuestionIds: string[]
     if (questionIds && questionIds.length > 0) {
-      // Validate that all provided IDs exist in the benchmark
-      const allQuestionIdsSet = new Set(allQuestions.map((q) => q.questionId))
-      const validIds: string[] = []
-      const invalidIds: string[] = []
-
-      for (const id of questionIds) {
-        if (allQuestionIdsSet.has(id)) {
-          validIds.push(id)
-        } else {
-          invalidIds.push(id)
-        }
-      }
-
-      if (invalidIds.length > 0) {
-        logger.warn(`Invalid question IDs (will be skipped): ${invalidIds.join(", ")}`)
-      }
-
-      if (validIds.length === 0) {
-        throw new Error(
-          `All provided questionIds are invalid. No matching questions found in benchmark "${benchmark}". ` +
-            `Invalid IDs: ${invalidIds.join(", ")}`
-        )
-      }
-
+      const { validIds } = validateQuestionIds(questionIds, allQuestions, benchmark)
       targetQuestionIds = validIds
-      logger.info(
-        `Using explicit questionIds: ${validIds.length} valid questions` +
-          (invalidIds.length > 0 ? ` (${invalidIds.length} invalid skipped)` : "")
-      )
     } else if (sampling) {
       targetQuestionIds = selectQuestionsBySampling(allQuestions, sampling)
     } else {
diff --git a/src/orchestrator/index.ts b/src/orchestrator/index.ts
index f19123b..758067a 100644
--- a/src/orchestrator/index.ts
+++ b/src/orchestrator/index.ts
@@ -10,6 +10,7 @@ import { CheckpointManager } from "./checkpoint"
 import { getProviderConfig, getJudgeConfig } from "../utils/config"
 import { resolveModel } from "../utils/models"
 import { logger } from "../utils/logger"
+import { validateQuestionIds } from "../utils/question-ids"
 import { runIngestPhase } from "./phases/ingest"
 import { runIndexingPhase } from "./phases/indexing"
 import { runSearchPhase } from "./phases/search"
@@ -213,35 +214,8 @@ export class Orchestrator {
       effectiveLimit = limit
 
       if (questionIds && questionIds.length > 0) {
-        // Validate that all provided IDs exist in the benchmark
-        const allQuestionIdsSet = new Set(allQuestions.map((q) => q.questionId))
-        const validIds: string[] = []
-        const invalidIds: string[] = []
-
-        for (const id of questionIds) {
-          if (allQuestionIdsSet.has(id)) {
-            validIds.push(id)
-          } else {
-            invalidIds.push(id)
-          }
-        }
-
-        if (invalidIds.length > 0) {
-          logger.warn(`Invalid question IDs (will be skipped): ${invalidIds.join(", ")}`)
-        }
-
-        if (validIds.length === 0) {
-          throw new Error(
-            `All provided questionIds are invalid. No matching questions found in benchmark "${benchmarkName}". ` +
-              `Invalid IDs: ${invalidIds.join(", ")}`
-          )
-        }
-
+        const { validIds } = validateQuestionIds(questionIds, allQuestions, benchmarkName)
         targetQuestionIds = validIds
-        logger.info(
-          `Using explicit questionIds: ${validIds.length} valid questions` +
-            (invalidIds.length > 0 ? ` (${invalidIds.length} invalid skipped)` : "")
-        )
       } else if (sampling) {
         logger.info(`Using sampling mode: ${sampling.mode}`)
         targetQuestionIds = selectQuestionsBySampling(allQuestions, sampling)
diff --git a/src/server/routes/benchmarks.ts b/src/server/routes/benchmarks.ts
index 6860315..4a46cb6 100644
--- a/src/server/routes/benchmarks.ts
+++ b/src/server/routes/benchmarks.ts
@@ -128,7 +128,8 @@ export async function handleBenchmarksRoutes(req: Request, url: URL): Promise<Re
     }
   }
 
-  // POST /api/benchmarks/:name/expand-ids - Expand conversation/session patterns to question IDs
+  // POST /api/benchmarks/:name/expand-ids - Expand patterns to question IDs
+  // Supports: exact question IDs, session IDs, and prefix matching (works across all benchmarks)
   const expandIdsMatch = pathname.match(/^\/api\/benchmarks\/([^/]+)\/expand-ids$/)
   if (method === "POST" && expandIdsMatch) {
     const benchmarkName = expandIdsMatch[1]
@@ -153,39 +154,27 @@ export async function handleBenchmarksRoutes(req: Request, url: URL): Promise<Re
         if (!trimmed) continue
 
         const expanded: string[] = []
-
-        // Pattern 1: Conversation ID (e.g., "conv-26") - expand to all questions
-        // Check if pattern ends with a number and doesn't have -q or -session suffix
-        if (/^[a-zA-Z]+-\d+$/.test(trimmed)) {
-          const matchingQuestions = allQuestions.filter((q) =>
-            q.questionId.startsWith(trimmed + "-q")
-          )
-          matchingQuestions.forEach((q) => {
-            expanded.push(q.questionId)
-            expandedIds.add(q.questionId)
-          })
-        }
-        // Pattern 2: Session ID (e.g., "conv-26-session_1" or "001be529-session-0")
-        // Find all questions that reference this session
-        else if (trimmed.includes("-session")) {
-          const matchingQuestions = allQuestions.filter((q) =>
-            q.haystackSessionIds.includes(trimmed)
-          )
-          matchingQuestions.forEach((q) => {
-            expanded.push(q.questionId)
-            expandedIds.add(q.questionId)
-          })
-        }
-        // Pattern 3: Direct question ID - add as-is if it exists
-        else {
-          const exactMatch = allQuestions.find((q) => q.questionId === trimmed)
-          if (exactMatch) {
-            expanded.push(trimmed)
-            expandedIds.add(trimmed)
-          }
+        const addMatch = (id: string) => { expanded.push(id); expandedIds.add(id) }
+
+        // Priority: exact match > session lookup > prefix expansion
+        const exactMatch = allQuestions.find((q) => q.questionId === trimmed)
+        if (exactMatch) {
+          addMatch(trimmed)
+        } else if (trimmed.includes("-session")) {
+          // Session ID — find all questions that reference this session
+          allQuestions
+            .filter((q) => q.haystackSessionIds.includes(trimmed))
+            .forEach((q) => addMatch(q.questionId))
+        } else {
+          // Prefix match — works across all benchmarks (e.g., "conv-26" matches
+          // "conv-26-q0", "convomem-user_evidence" matches "convomem-user_evidence-0")
+          const prefix = trimmed.endsWith("-") ? trimmed : trimmed + "-"
+          allQuestions
+            .filter((q) => q.questionId.startsWith(prefix))
+            .forEach((q) => addMatch(q.questionId))
         }
 
-        patternResults[pattern] = expanded
+        patternResults[trimmed] = expanded
       }
 
       return json({
diff --git a/src/utils/question-ids.ts b/src/utils/question-ids.ts
new file mode 100644
index 0000000..8db1430
--- /dev/null
+++ b/src/utils/question-ids.ts
@@ -0,0 +1,46 @@
+import { logger } from "./logger"
+
+export interface ValidateQuestionIdsResult {
+  validIds: string[]
+  invalidIds: string[]
+}
+
+/**
+ * Validates a list of question IDs against the full set of questions in a benchmark.
+ * Returns valid and invalid IDs separately. Throws if all IDs are invalid.
+ */
+export function validateQuestionIds(
+  questionIds: string[],
+  allQuestions: { questionId: string }[],
+  benchmarkName: string
+): ValidateQuestionIdsResult {
+  const allQuestionIdsSet = new Set(allQuestions.map((q) => q.questionId))
+  const validIds: string[] = []
+  const invalidIds: string[] = []
+
+  for (const id of questionIds) {
+    if (allQuestionIdsSet.has(id)) {
+      validIds.push(id)
+    } else {
+      invalidIds.push(id)
+    }
+  }
+
+  if (invalidIds.length > 0) {
+    logger.warn(`Invalid question IDs (will be skipped): ${invalidIds.join(", ")}`)
+  }
+
+  if (validIds.length === 0) {
+    throw new Error(
+      `All provided questionIds are invalid. No matching questions found in benchmark "${benchmarkName}". ` +
+        `Invalid IDs: ${invalidIds.join(", ")}`
+    )
+  }
+
+  logger.info(
+    `Using explicit questionIds: ${validIds.length} valid questions` +
+      (invalidIds.length > 0 ? ` (${invalidIds.length} invalid skipped)` : "")
+  )
+
+  return { validIds, invalidIds }
+}
diff --git a/ui/app/compare/new/page.tsx b/ui/app/compare/new/page.tsx
index e095b84..a1abf73 100644
--- a/ui/app/compare/new/page.tsx
+++ b/ui/app/compare/new/page.tsx
@@ -8,14 +8,14 @@ import {
   getBenchmarks,
   getModels,
   startCompare,
-  expandQuestionIdPatterns,
-  getBenchmarkQuestions,
   type SelectionMode,
   type SampleType,
   type SamplingConfig,
 } from "@/lib/api"
+import { type QuestionIdValidationResult } from "@/lib/question-id-validation"
 import { SingleSelect } from "@/components/single-select"
 import { MultiSelect } from "@/components/multi-select"
+import { QuestionIdSelector } from "@/components/question-id-selector"
 
 export default function NewComparePage() {
   const router = useRouter()
@@ -41,14 +41,7 @@ export default function NewComparePage() {
   })
 
   const [editingCompareId, setEditingCompareId] = useState(false)
-  const [validatingQuestionIds, setValidatingQuestionIds] = useState(false)
-  const [questionIdValidation, setQuestionIdValidation] = useState<{
-    valid: string[]
-    invalid: string[]
-    total: number
-    expanded: string[]
-    patternResults: Record<string, string[]>
-  } | null>(null)
+  const [questionIdValidation, setQuestionIdValidation] = useState<QuestionIdValidationResult | null>(null)
   const compareIdInputRef = useRef<HTMLInputElement>(null)
 
   useEffect(() => {
@@ -83,70 +76,6 @@ export default function NewComparePage() {
     }
   }
 
-  async function validateQuestionIds(
-    benchmark: string,
-    questionIdsInput: string
-  ): Promise<{
-    valid: string[]
-    invalid: string[]
-    total: number
-    expanded: string[]
-    patternResults: Record<string, string[]>
-  }> {
-    // Parse input: split by comma, trim, remove duplicates
-    const inputPatterns = questionIdsInput
-      .split(",")
-      .map((id) => id.trim())
-      .filter((id) => id.length > 0)
-    const uniquePatterns = [...new Set(inputPatterns)]
-
-    // Call pattern expansion endpoint
-    const expansionResult = await expandQuestionIdPatterns(benchmark, uniquePatterns)
-    const expandedIds = expansionResult.expandedIds
-
-    // Fetch all questions to validate expanded IDs exist
-    const allQuestionIds = new Set<string>()
-    let page = 1
-    let hasMore = true
-
-    while (hasMore) {
-      const response = await getBenchmarkQuestions(benchmark, {
-        page,
-        limit: 100,
-      })
-      response.questions.forEach((q) => allQuestionIds.add(q.questionId))
-      hasMore = page < response.pagination.totalPages
-      page++
-    }
-
-    // Validate expanded IDs
-    const valid: string[] = []
-    const invalid: string[] = []
-
-    expandedIds.forEach((id) => {
-      if (allQuestionIds.has(id)) {
-        valid.push(id)
-      } else {
-        invalid.push(id)
-      }
-    })
-
-    // Find patterns that didn't expand to anything
-    const patternsWithNoResults = uniquePatterns.filter(
-      (pattern) =>
-        !expansionResult.patternResults[pattern] ||
-        expansionResult.patternResults[pattern].length === 0
-    )
-
-    return {
-      valid,
-      invalid: [...invalid, ...patternsWithNoResults],
-      total: uniquePatterns.length,
-      expanded: expandedIds,
-      patternResults: expansionResult.patternResults,
-    }
-  }
-
   function generateCompareId() {
     const now = new Date()
     const date = now.toISOString().slice(0, 10).replace(/-/g, "")
@@ -444,97 +373,14 @@ export default function NewComparePage() {
           )}
 
           {form.selectionMode === "questionIds" && (
-            <div className="space-y-3">
-              <div>
-                <label className="block text-sm text-text-secondary mb-2">
-                  Question IDs (comma-separated)
-                </label>
-                <textarea
-                  className="w-full px-3 py-2 text-sm bg-[#222222] border border-[#444444] rounded text-text-primary placeholder-text-muted focus:outline-none focus:border-accent font-mono"
-                  rows={4}
-                  value={form.questionIds}
-                  onChange={(e) => {
-                    setForm({ ...form, questionIds: e.target.value })
-                    setQuestionIdValidation(null)
-                  }}
-                  placeholder="e.g., conv-30, conv-30-q0, conv-30-session_1"
-                />
-                <p className="text-xs text-text-muted mt-1">
-                  Enter question IDs, conversation IDs (e.g., conv-26), or session IDs (e.g.,
-                  conv-26-session_1), separated by commas
-                </p>
-              </div>
-
-              {/* Validation Button */}
-              <button
-                type="button"
-                onClick={async () => {
-                  if (!form.questionIds.trim()) {
-                    setError("Please enter at least one question ID")
-                    return
-                  }
-                  if (!form.benchmark) {
-                    setError("Please select a benchmark first")
-                    return
-                  }
-
-                  setValidatingQuestionIds(true)
-                  setError(null)
-                  try {
-                    const validation = await validateQuestionIds(form.benchmark, form.questionIds)
-                    setQuestionIdValidation(validation)
-
-                    if (validation.invalid.length > 0) {
-                      setError(`Invalid question IDs: ${validation.invalid.join(", ")}`)
-                    }
-                  } catch (e) {
-                    setError(e instanceof Error ? e.message : "Failed to validate question IDs")
-                  } finally {
-                    setValidatingQuestionIds(false)
-                  }
-                }}
-                disabled={validatingQuestionIds || !form.benchmark || !form.questionIds.trim()}
-                className="px-3 py-1.5 text-sm bg-[#222222] border border-[#444444] rounded text-text-primary hover:border-accent disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
-              >
-                {validatingQuestionIds ? (
-                  <>
-                    <div className="inline-block w-3 h-3 border-2 border-accent border-t-transparent rounded-full animate-spin mr-2" />
-                    Validating...
-                  </>
-                ) : (
-                  "Validate Question IDs"
-                )}
-              </button>
-
-              {/* Validation Result */}
-              {questionIdValidation && (
-                <div
-                  className={`p-3 rounded text-sm border ${
-                    questionIdValidation.invalid.length === 0
-                      ? "bg-green-500/10 border-green-500/20 text-green-400"
-                      : "bg-yellow-500/10 border-yellow-500/20 text-yellow-400"
-                  }`}
-                >
-                  <div className="font-medium mb-1">
-                    {questionIdValidation.invalid.length === 0
-                      ? `✓ Valid: ${questionIdValidation.total} patterns expanded to ${questionIdValidation.expanded.length} questions`
-                      : `⚠ ${questionIdValidation.valid.length} valid, ${questionIdValidation.invalid.length} invalid patterns`}
-                  </div>
-                  {questionIdValidation.invalid.length > 0 && (
-                    <div className="text-xs mt-1">
-                      Invalid: {questionIdValidation.invalid.join(", ")}
-                    </div>
-                  )}
-                  {questionIdValidation.expanded.length > 0 && (
-                    <div className="text-xs mt-2 opacity-80">
-                      Sample expanded IDs: {questionIdValidation.expanded.slice(0, 5).join(", ")}
-                      {questionIdValidation.expanded.length > 5 &&
-                        ` ...and ${questionIdValidation.expanded.length - 5} more`}
-                    </div>
-                  )}
-                </div>
-              )}
-            </div>
+            <QuestionIdSelector
+              benchmark={form.benchmark}
+              value={form.questionIds}
+              onChange={(value) => setForm({ ...form, questionIds: value })}
+              onValidationChange={setQuestionIdValidation}
+              validation={questionIdValidation}
+              onError={setError}
+            />
           )}
         </div>
 
diff --git a/ui/app/runs/new/page.tsx b/ui/app/runs/new/page.tsx
index 5087962..8cd5bbb 100644
--- a/ui/app/runs/new/page.tsx
+++ b/ui/app/runs/new/page.tsx
@@ -9,8 +9,6 @@ import {
   getModels,
   startRun,
   getCompletedRuns,
-  expandQuestionIdPatterns,
-  getBenchmarkQuestions,
   type RunSummary,
   type PhaseId,
   PHASE_ORDER,
@@ -19,7 +17,9 @@ import {
   type SamplingConfig,
   type Provider,
 } from "@/lib/api"
+import { type QuestionIdValidationResult } from "@/lib/question-id-validation"
 import { SingleSelect } from "@/components/single-select"
+import { QuestionIdSelector } from "@/components/question-id-selector"
 
 type Tab = "new" | "advanced"
 
@@ -70,14 +70,7 @@ export default function NewRunPage() {
   const [showAdvancedConcurrencyNew, setShowAdvancedConcurrencyNew] = useState(false)
   const [showAdvancedConcurrencyAdvanced, setShowAdvancedConcurrencyAdvanced] = useState(false)
   const [editingPhase, setEditingPhase] = useState<string | null>(null)
-  const [validatingQuestionIds, setValidatingQuestionIds] = useState(false)
-  const [questionIdValidation, setQuestionIdValidation] = useState<{
-    valid: string[]
-    invalid: string[]
-    total: number
-    expanded: string[]
-    patternResults: Record<string, string[]>
-  } | null>(null)
+  const [questionIdValidation, setQuestionIdValidation] = useState<QuestionIdValidationResult | null>(null)
   const runIdInputRef = useRef<HTMLInputElement>(null)
   const advancedRunIdInputRef = useRef<HTMLInputElement>(null)
   const concurrencyInputRef = useRef<HTMLInputElement>(null)
@@ -223,70 +216,6 @@ export default function NewRunPage() {
     }
   }
 
-  async function validateQuestionIds(
-    benchmark: string,
-    questionIdsInput: string
-  ): Promise<{
-    valid: string[]
-    invalid: string[]
-    total: number
-    expanded: string[]
-    patternResults: Record<string, string[]>
-  }> {
-    // Parse input: split by comma, trim, remove duplicates
-    const inputPatterns = questionIdsInput
-      .split(",")
-      .map((id) => id.trim())
-      .filter((id) => id.length > 0)
-    const uniquePatterns = [...new Set(inputPatterns)]
-
-    // Call pattern expansion endpoint
-    const expansionResult = await expandQuestionIdPatterns(benchmark, uniquePatterns)
-    const expandedIds = expansionResult.expandedIds
-
-    // Fetch all questions to validate expanded IDs exist
-    const allQuestionIds = new Set<string>()
-    let page = 1
-    let hasMore = true
-
-    while (hasMore) {
-      const response = await getBenchmarkQuestions(benchmark, {
-        page,
-        limit: 100,
-      })
-      response.questions.forEach((q) => allQuestionIds.add(q.questionId))
-      hasMore = page < response.pagination.totalPages
-      page++
-    }
-
-    // Validate expanded IDs
-    const valid: string[] = []
-    const invalid: string[] = []
-
-    expandedIds.forEach((id) => {
-      if (allQuestionIds.has(id)) {
-        valid.push(id)
-      } else {
-        invalid.push(id)
-      }
-    })
-
-    // Find patterns that didn't expand to anything
-    const patternsWithNoResults = uniquePatterns.filter(
-      (pattern) =>
-        !expansionResult.patternResults[pattern] ||
-        expansionResult.patternResults[pattern].length === 0
-    )
-
-    return {
-      valid,
-      invalid: [...invalid, ...patternsWithNoResults],
-      total: uniquePatterns.length,
-      expanded: expandedIds,
-      patternResults: expansionResult.patternResults,
-    }
-  }
-
   function generateRunId() {
     const timestamp = new Date().toISOString().slice(0, 10).replace(/-/g, "")
     const random = Math.random().toString(36).slice(2, 6)
@@ -1042,101 +971,14 @@ export default function NewRunPage() {
               )}
 
               {form.selectionMode === "questionIds" && (
-                <div className="space-y-3">
-                  <div>
-                    <label className="block text-sm text-text-secondary mb-2">
-                      Question IDs (comma-separated)
-                    </label>
-                    <textarea
-                      className="w-full px-3 py-2 text-sm bg-[#222222] border border-[#444444] rounded text-text-primary placeholder-text-muted focus:outline-none focus:border-accent font-mono"
-                      rows={4}
-                      value={form.questionIds}
-                      onChange={(e) => {
-                        setForm({ ...form, questionIds: e.target.value })
-                        setQuestionIdValidation(null)
-                      }}
-                      placeholder="e.g., conv-30, conv-30-q0, conv-30-session_1"
-                    />
-                    <p className="text-xs text-text-muted mt-1">
-                      Enter question IDs, conversation IDs (e.g., conv-26), or session IDs (e.g.,
-                      conv-26-session_1), separated by commas
-                    </p>
-                  </div>
-
-                  {/* Validation Button */}
-                  <button
-                    type="button"
-                    onClick={async () => {
-                      if (!form.questionIds.trim()) {
-                        setError("Please enter at least one question ID")
-                        return
-                      }
-                      if (!form.benchmark) {
-                        setError("Please select a benchmark first")
-                        return
-                      }
-
-                      setValidatingQuestionIds(true)
-                      setError(null)
-                      try {
-                        const validation = await validateQuestionIds(
-                          form.benchmark,
-                          form.questionIds
-                        )
-                        setQuestionIdValidation(validation)
-
-                        if (validation.invalid.length > 0) {
-                          setError(`Invalid question IDs: ${validation.invalid.join(", ")}`)
-                        }
-                      } catch (e) {
-                        setError(e instanceof Error ? e.message : "Failed to validate question IDs")
-                      } finally {
-                        setValidatingQuestionIds(false)
-                      }
-                    }}
-                    disabled={validatingQuestionIds || !form.benchmark || !form.questionIds.trim()}
-                    className="px-3 py-1.5 text-sm bg-[#222222] border border-[#444444] rounded text-text-primary hover:border-accent disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
-                  >
-                    {validatingQuestionIds ? (
-                      <>
-                        <div className="inline-block w-3 h-3 border-2 border-accent border-t-transparent rounded-full animate-spin mr-2" />
-                        Validating...
-                      </>
-                    ) : (
-                      "Validate Question IDs"
-                    )}
-                  </button>
-
-                  {/* Validation Result */}
-                  {questionIdValidation && (
-                    <div
-                      className={`p-3 rounded text-sm border ${
-                        questionIdValidation.invalid.length === 0
-                          ? "bg-green-500/10 border-green-500/20 text-green-400"
-                          : "bg-yellow-500/10 border-yellow-500/20 text-yellow-400"
-                      }`}
-                    >
-                      <div className="font-medium mb-1">
-                        {questionIdValidation.invalid.length === 0
-                          ? `✓ Valid: ${questionIdValidation.total} patterns expanded to ${questionIdValidation.expanded.length} questions`
-                          : `⚠ ${questionIdValidation.valid.length} valid, ${questionIdValidation.invalid.length} invalid patterns`}
-                      </div>
-                      {questionIdValidation.invalid.length > 0 && (
-                        <div className="text-xs mt-1">
-                          Invalid: {questionIdValidation.invalid.join(", ")}
-                        </div>
-                      )}
-                      {questionIdValidation.expanded.length > 0 && (
-                        <div className="text-xs mt-2 opacity-80">
-                          Sample expanded IDs:{" "}
-                          {questionIdValidation.expanded.slice(0, 5).join(", ")}
-                          {questionIdValidation.expanded.length > 5 &&
-                            ` ...and ${questionIdValidation.expanded.length - 5} more`}
-                        </div>
-                      )}
-                    </div>
-                  )}
-                </div>
+                <QuestionIdSelector
+                  benchmark={form.benchmark}
+                  value={form.questionIds}
+                  onChange={(value) => setForm({ ...form, questionIds: value })}
+                  onValidationChange={setQuestionIdValidation}
+                  validation={questionIdValidation}
+                  onError={setError}
+                />
               )}
             </div>
 
diff --git a/ui/components/question-id-selector.tsx b/ui/components/question-id-selector.tsx
new file mode 100644
index 0000000..64484a1
--- /dev/null
+++ b/ui/components/question-id-selector.tsx
@@ -0,0 +1,123 @@
+"use client"
+
+import { useState } from "react"
+import {
+  validateQuestionIdPatterns,
+  type QuestionIdValidationResult,
+} from "@/lib/question-id-validation"
+
+interface QuestionIdSelectorProps {
+  benchmark: string
+  value: string
+  onChange: (value: string) => void
+  onValidationChange: (result: QuestionIdValidationResult | null) => void
+  validation: QuestionIdValidationResult | null
+  onError: (error: string | null) => void
+}
+
+export function QuestionIdSelector({
+  benchmark,
+  value,
+  onChange,
+  onValidationChange,
+  validation,
+  onError,
+}: QuestionIdSelectorProps) {
+  const [validating, setValidating] = useState(false)
+
+  async function handleValidate() {
+    if (!value.trim()) {
+      onError("Please enter at least one question ID")
+      return
+    }
+    if (!benchmark) {
+      onError("Please select a benchmark first")
+      return
+    }
+
+    setValidating(true)
+    onError(null)
+    try {
+      const result = await validateQuestionIdPatterns(benchmark, value)
+      onValidationChange(result)
+
+      if (result.invalid.length > 0) {
+        onError(`Invalid patterns: ${result.invalid.join(", ")}`)
+      }
+    } catch (e) {
+      onError(e instanceof Error ? e.message : "Failed to validate question IDs")
+    } finally {
+      setValidating(false)
+    }
+  }
+
+  return (
+    <div className="space-y-3">
+      <div>
+        <label className="block text-sm text-text-secondary mb-2">
+          Question IDs (comma-separated)
+        </label>
+        <textarea
+          className="w-full px-3 py-2 text-sm bg-[#222222] border border-[#444444] rounded text-text-primary placeholder-text-muted focus:outline-none focus:border-accent font-mono"
+          rows={4}
+          value={value}
+          onChange={(e) => {
+            onChange(e.target.value)
+            onValidationChange(null)
+          }}
+          placeholder="e.g., conv-30, conv-30-q0, conv-30-session_1"
+        />
+        <p className="text-xs text-text-muted mt-1">
+          Enter question IDs, conversation/group prefixes (e.g., conv-26), or session IDs (e.g.,
+          conv-26-session_1), separated by commas
+        </p>
+      </div>
+
+      {/* Validation Button */}
+      <button
+        type="button"
+        onClick={handleValidate}
+        disabled={validating || !benchmark || !value.trim()}
+        className="px-3 py-1.5 text-sm bg-[#222222] border border-[#444444] rounded text-text-primary hover:border-accent disabled:opacity-50 disabled:cursor-not-allowed transition-colors"
+      >
+        {validating ? (
+          <>
+            <div className="inline-block w-3 h-3 border-2 border-accent border-t-transparent rounded-full animate-spin mr-2" />
+            Validating...
+          </>
+        ) : (
+          "Validate Question IDs"
+        )}
+      </button>
+
+      {/* Validation Result */}
+      {validation && (
+        <div
+          className={`p-3 rounded text-sm border ${
+            validation.invalid.length === 0
+              ? "bg-green-500/10 border-green-500/20 text-green-400"
+              : "bg-yellow-500/10 border-yellow-500/20 text-yellow-400"
+          }`}
+        >
+          <div className="font-medium mb-1">
+            {validation.invalid.length === 0
+              ? `✓ Valid: ${validation.total} patterns expanded to ${validation.expanded.length} questions`
+              : `⚠ ${validation.valid.length} valid, ${validation.invalid.length} invalid patterns`}
+          </div>
+          {validation.invalid.length > 0 && (
+            <div className="text-xs mt-1">
+              Invalid: {validation.invalid.join(", ")}
+            </div>
+          )}
+          {validation.expanded.length > 0 && (
+            <div className="text-xs mt-2 opacity-80">
+              Sample expanded IDs: {validation.expanded.slice(0, 5).join(", ")}
+              {validation.expanded.length > 5 &&
+                ` ...and ${validation.expanded.length - 5} more`}
+            </div>
+          )}
+        </div>
+      )}
+    </div>
+  )
+}
diff --git a/ui/lib/question-id-validation.ts b/ui/lib/question-id-validation.ts
new file mode 100644
index 0000000..c2007e8
--- /dev/null
+++ b/ui/lib/question-id-validation.ts
@@ -0,0 +1,43 @@
+import { expandQuestionIdPatterns } from "./api"
+
+export interface QuestionIdValidationResult {
+  valid: string[]
+  invalid: string[]
+  total: number
+  expanded: string[]
+  patternResults: Record<string, string[]>
+}
+
+/**
+ * Validates question ID patterns against a benchmark by calling the server's expand-ids endpoint.
+ * The server handles all validation — patterns that don't match any questions are reported as invalid.
+ */
+export async function validateQuestionIdPatterns(
+  benchmark: string,
+  questionIdsInput: string
+): Promise<QuestionIdValidationResult> {
+  // Parse input: split by comma, trim, remove empty, deduplicate
+  const inputPatterns = questionIdsInput
+    .split(",")
+    .map((id) => id.trim())
+    .filter((id) => id.length > 0)
+  const uniquePatterns = [...new Set(inputPatterns)]
+
+  // Server expands patterns and validates against the benchmark's questions
+  const expansionResult = await expandQuestionIdPatterns(benchmark, uniquePatterns)
+
+  // Patterns that didn't expand to anything are invalid
+  const patternsWithNoResults = uniquePatterns.filter(
+    (pattern) =>
+      !expansionResult.patternResults[pattern] ||
+      expansionResult.patternResults[pattern].length === 0
+  )
+
+  return {
+    valid: expansionResult.expandedIds,
+    invalid: patternsWithNoResults,
+    total: uniquePatterns.length,
+    expanded: expansionResult.expandedIds,
+    patternResults: expansionResult.patternResults,
+  }
+}