diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index ed9d5a1ba52..47cb20b6893 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -271,6 +271,9 @@ jobs:
     needs: [lints]
     name: Keynote Bench
     runs-on: spacetimedb-benchmark-runner
+    concurrency:
+      group: ci-benchmark-runner
+      queue: max
     timeout-minutes: 60
     env:
       CARGO_TARGET_DIR: ${{ github.workspace }}/target
@@ -325,6 +328,50 @@ jobs:
       - name: Run keynote-2 benchmark regression check
         run: cargo ci keynote-bench
 
+  index_scan_bench:
+    needs: [lints]
+    name: Index Scan Bench
+    runs-on: spacetimedb-benchmark-runner
+    concurrency:
+      group: ci-benchmark-runner
+      queue: max
+    timeout-minutes: 60
+    env:
+      CARGO_TARGET_DIR: ${{ github.workspace }}/target
+      RUST_BACKTRACE: full
+    steps:
+      - name: Find Git ref
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          PR_NUMBER="${{ github.event.inputs.pr_number || null }}"
+          if test -n "${PR_NUMBER}"; then
+            GIT_REF="$( gh pr view --repo clockworklabs/SpacetimeDB $PR_NUMBER --json headRefName --jq .headRefName )"
+          else
+            GIT_REF="${{ github.ref }}"
+          fi
+          echo "GIT_REF=${GIT_REF}" >>"$GITHUB_ENV"
+
+      - name: Checkout sources
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ env.GIT_REF }}
+
+      - uses: dsherret/rust-toolchain-file@v1
+      - name: Set default rust toolchain
+        run: rustup default $(rustup show active-toolchain | cut -d' ' -f1)
+
+      - name: Cache Rust dependencies
+        uses: Swatinem/rust-cache@v2
+        with:
+          workspaces: ${{ github.workspace }}
+          shared-key: spacetimedb
+          save-if: false
+          prefix-key: v1
+
+      - name: Run index scan benchmark regression check
+        run: cargo bench -p spacetimedb-bench --bench index_scan_gate
+
   lints:
     name: Lints
     runs-on: spacetimedb-new-runner-2
diff --git a/crates/bench/Cargo.toml b/crates/bench/Cargo.toml
index 115eb115022..ea8d4960227 100644
--- a/crates/bench/Cargo.toml
+++ b/crates/bench/Cargo.toml
@@ -30,6 +30,10 @@ harness = false
 name = "index"
 harness = false
 
+[[bench]]
+name = "index_scan_gate"
+harness = false
+
 [[bin]]
 name = "summarize"
 
diff --git a/crates/bench/benches/index_scan_gate.rs b/crates/bench/benches/index_scan_gate.rs
new file mode 100644
index 00000000000..a03977f61ed
--- /dev/null
+++ b/crates/bench/benches/index_scan_gate.rs
@@ -0,0 +1,80 @@
+use std::time::Duration;
+
+use anyhow::{bail, Context, Result};
+use spacetimedb_sats::product;
+use spacetimedb_testing::modules::{start_runtime, CompilationMode, CompiledModule, IN_MEMORY_CONFIG};
+
+#[cfg(not(target_env = "msvc"))]
+use tikv_jemallocator::Jemalloc;
+
+#[cfg(not(target_env = "msvc"))]
+#[global_allocator]
+static GLOBAL: Jemalloc = Jemalloc;
+
+const WARMUP_RUNS: usize = 5;
+const MEASURED_RUNS: usize = 31;
+const MEDIAN_THRESHOLD: Duration = Duration::from_micros(100);
+
+const REDUCERS: &[&str] = &[
+    "test_index_scan_on_id",
+    "test_index_scan_on_chunk",
+    "test_index_scan_on_x_z_dimension",
+    "test_index_scan_on_x_z",
+];
+
+fn main() -> Result<()> {
+    let module = CompiledModule::compile("perf-test", CompilationMode::Release);
+    let runtime = start_runtime();
+
+    runtime.block_on(async {
+        let module = module.load_module(IN_MEMORY_CONFIG, None).await;
+        let no_args = product![];
+
+        println!("loading perf-test location table...");
+        module
+            .call_reducer_binary("load_location_table", &no_args)
+            .await
+            .context("failed to load perf-test location table")?;
+
+        let mut failures = Vec::new();
+        for &reducer in REDUCERS {
+            for _ in 0..WARMUP_RUNS {
+                module
+                    .call_reducer_binary_result(reducer, &no_args)
+                    .await
+                    .with_context(|| format!("failed during warmup for {reducer}"))?;
+            }
+
+            let mut samples = Vec::with_capacity(MEASURED_RUNS);
+            for _ in 0..MEASURED_RUNS {
+                let result = module
+                    .call_reducer_binary_result(reducer, &no_args)
+                    .await
+                    .with_context(|| format!("failed during measured run for {reducer}"))?;
+                samples.push(result.execution_duration);
+            }
+
+            samples.sort_unstable();
+            let median = samples[samples.len() / 2];
+
+            println!("{reducer:<36} median={median:?}");
+            if median >= MEDIAN_THRESHOLD {
+                failures.push(format!("{reducer} median {median:?}"));
+            }
+        }
+
+        if !failures.is_empty() {
+            bail!(
+                "index scan benchmark failed; median threshold is {:?}; failures: {}",
+                MEDIAN_THRESHOLD,
+                failures.join(", ")
+            );
+        }
+
+        println!(
+            "index scan benchmark passed; all medians are below {:?}",
+            MEDIAN_THRESHOLD
+        );
+        Ok(())
+    })
+}
diff --git a/crates/table/src/page_pool.rs b/crates/table/src/page_pool.rs
index 872654a0268..00c46704c4e 100644
--- a/crates/table/src/page_pool.rs
+++ b/crates/table/src/page_pool.rs
@@ -43,14 +43,6 @@ impl PagePool {
     /// if no size is provided, a default of 1 page is used.
     pub fn new(max_size: Option<usize>) -> Self {
         const PAGE_SIZE: usize = size_of::<Page>();
-        // TODO(centril): Currently, we have a test `test_index_scans`.
-        // The test sets up a `Location` table, like in BitCraft, with a `chunk` field,
-        // and populates it with 1000 different chunks with 1200 rows each.
-        // Then it asserts that the cold latency of an index scan on `chunk` takes < 1 ms.
-        // However, for reasons currently unknown to us,
-        // a large page pool, with capacity `1 << 26` bytes, on i7-7700K, 64GB RAM,
-        // will turn the latency into 30-40 ms.
-        // As a precaution, we use a smaller page pool by default.
         const DEFAULT_MAX_SIZE: usize = 128 * PAGE_SIZE; // 128 pages
 
         let queue_size = max_size.unwrap_or(DEFAULT_MAX_SIZE) / PAGE_SIZE;
diff --git a/crates/testing/src/modules.rs b/crates/testing/src/modules.rs
index 21fea57fe96..7dcf58df89c 100644
--- a/crates/testing/src/modules.rs
+++ b/crates/testing/src/modules.rs
@@ -25,6 +25,7 @@ use spacetimedb::client::{
 use spacetimedb::db::{Config, Storage};
 use spacetimedb::host::module_host::EventStatus;
 use spacetimedb::host::FunctionArgs;
+use spacetimedb::host::ReducerCallResult;
 use spacetimedb_client_api::{ControlStateReadAccess, ControlStateWriteAccess, DatabaseDef, NodeDelegate};
 use spacetimedb_client_api_messages::websocket::v1 as ws_v1;
 use spacetimedb_lib::identity::RequestId;
@@ -61,21 +62,30 @@ pub struct ModuleHandle {
 }
 
 impl ModuleHandle {
-    async fn call_reducer(&self, reducer: &str, args: FunctionArgs) -> anyhow::Result<()> {
+    async fn call_reducer_result(&self, reducer: &str, args: FunctionArgs) -> anyhow::Result<ReducerCallResult> {
         let result = self
             .client
             .call_reducer(reducer, args, 0, Instant::now(), ws_v1::CallReducerFlags::FullUpdate)
             .await;
-        let result = match result {
-            Ok(result) => result.into(),
+        let result: anyhow::Result<ReducerCallResult> = match result {
+            Ok(result) => Ok(result),
             Err(err) => Err(err.into()),
         };
         match result {
-            Ok(()) => Ok(()),
+            Ok(result) if result.is_ok() => Ok(result),
+            Ok(result) => {
+                let err = Result::<(), anyhow::Error>::from(result)
+                    .expect_err("non-committed reducer outcome should produce an error");
+                Err(err.context(format!("Logs:\n{}", self.read_log(None).await)))
+            }
             Err(err) => Err(err.context(format!("Logs:\n{}", self.read_log(None).await))),
         }
     }
 
+    async fn call_reducer(&self, reducer: &str, args: FunctionArgs) -> anyhow::Result<()> {
+        self.call_reducer_result(reducer, args).await.map(drop)
+    }
+
     pub async fn call_reducer_json(&self, reducer: &str, args: &sats::ProductValue) -> anyhow::Result<()> {
         let args = serde_json::to_string(&args).unwrap();
         self.call_reducer(reducer, FunctionArgs::Json(args.into())).await
@@ -86,6 +96,16 @@ impl ModuleHandle {
         self.call_reducer(reducer, FunctionArgs::Bsatn(args.into())).await
     }
 
+    pub async fn call_reducer_binary_result(
+        &self,
+        reducer: &str,
+        args: &sats::ProductValue,
+    ) -> anyhow::Result<ReducerCallResult> {
+        let args = bsatn::to_vec(&args).unwrap();
+        self.call_reducer_result(reducer, FunctionArgs::Bsatn(args.into()))
+            .await
+    }
+
     pub async fn send(&self, message: impl Into<DataMessage>) -> anyhow::Result<()> {
         let timer = Instant::now();
         self.client.handle_message(message, timer).await.map_err(Into::into)
@@ -314,10 +334,6 @@ pub static DEFAULT_CONFIG: Config = Config {
 /// For performance tests, do not persist to disk.
 pub static IN_MEMORY_CONFIG: Config = Config {
     storage: Storage::Disk,
-    // For some reason, a large page pool capacity causes `test_index_scans` to slow down,
-    // and makes the perf test for `chunk` go over 1ms.
-    // The threshold for failure on i7-7700K, 64GB RAM seems to be at 1 << 26.
-    // TODO(centril): investigate further why this size affects the benchmark.
     page_pool_max_size: Some(1 << 16),
 };
 
diff --git a/crates/testing/tests/standalone_integration_test.rs b/crates/testing/tests/standalone_integration_test.rs
index db5b9bedc3a..8de5cfe5bc4 100644
--- a/crates/testing/tests/standalone_integration_test.rs
+++ b/crates/testing/tests/standalone_integration_test.rs
@@ -2,7 +2,7 @@ use serial_test::serial;
 use spacetimedb_lib::sats::{product, AlgebraicValue};
 use spacetimedb_testing::modules::{
     CompilationMode, CompiledModule, Cpp, Csharp, LogLevel, LoggerRecord, ModuleHandle, ModuleLanguage, Rust,
-    TypeScript, DEFAULT_CONFIG, IN_MEMORY_CONFIG,
+    TypeScript, DEFAULT_CONFIG,
 };
 use std::{
     future::Future,
@@ -356,58 +356,6 @@ fn test_call_query_macro() {
     });
 }
 
-#[test]
-#[serial]
-/// This test runs the index scan workloads in the `perf-test` module.
-/// Timing spans should be < 1ms if the correct index was used.
-/// Otherwise these workloads will degenerate into full table scans.
-fn test_index_scans() {
-    init();
-    CompiledModule::compile("perf-test", CompilationMode::Release).with_module_async(
-        IN_MEMORY_CONFIG,
-        |module| async move {
-            let no_args = &product![];
-
-            module
-                .call_reducer_binary("load_location_table", no_args)
-                .await
-                .unwrap();
-
-            module
-                .call_reducer_binary("test_index_scan_on_id", no_args)
-                .await
-                .unwrap();
-
-            module
-                .call_reducer_binary("test_index_scan_on_chunk", no_args)
-                .await
-                .unwrap();
-
-            module
-                .call_reducer_binary("test_index_scan_on_x_z_dimension", no_args)
-                .await
-                .unwrap();
-
-            module
-                .call_reducer_binary("test_index_scan_on_x_z", no_args)
-                .await
-                .unwrap();
-
-            let logs = read_logs(&module).await;
-
-            // Each timing span should be < 1ms
-            let timing = |line: &str| {
-                line.starts_with("Timing span")
-                    && (line.ends_with("ns") || line.ends_with("us") || line.ends_with("µs"))
-            };
-            assert!(timing(&logs[0]));
-            assert!(timing(&logs[1]));
-            assert!(timing(&logs[2]));
-            assert!(timing(&logs[3]));
-        },
-    );
-}
-
 async fn bench_call(module: &ModuleHandle, call: &str, count: &u32) -> Duration {
     let now = Instant::now();
 
diff --git a/modules/perf-test/README.md b/modules/perf-test/README.md
index 6852de25e09..daba35e72fd 100644
--- a/modules/perf-test/README.md
+++ b/modules/perf-test/README.md
@@ -1,14 +1,13 @@
-# `perf-test` *Rust* test
+# `perf-test` *Rust* benchmark module
 
 A module with various `index scan` workloads for SpacetimeDB.
 
-Called as part of our tests to ensure the system is working as expected.
+Called by the `index_scan_gate` benchmark to ensure the system is working as expected.
 
 ## How to Run
 
-Execute the test `test_index_scans`
-at [standalone_integration_test](../../crates/testing/tests/standalone_integration_test.rs):
+Execute the benchmark gate:
 
 ```bash
-cargo test -p spacetimedb-testing test_index_scans
-```
\ No newline at end of file
+cargo bench -p spacetimedb-bench --bench index_scan_gate
+```