From 4bba39207596db5b6e47344c7c7f9413f6731530 Mon Sep 17 00:00:00 2001
From: Hassan Abdel-Rahman <hassan.abdelrahman@gmail.com>
Date: Mon, 18 May 2026 18:03:37 -0400
Subject: [PATCH 1/2] Cache _federated-search-prerendered live-search fallback
 symmetrically
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Today the JobScopedSearchCache covers _federated-search only; the
prerendered endpoint's `searchPrerenderedRealms()` path runs uncached,
so any realm whose card templates use `PrerenderedSearchResource`
(`host/app/resources/prerendered-search.ts`) bypasses the per-job dedup
even during a worker-driven reindex. Extending the cache to
`_federated-search-prerendered` brings the two endpoints into symmetry.

Gating mirrors `handle-search.ts`: the cache is consulted only when
both `x-boxel-job-id` AND `x-boxel-consuming-realm` are present and
well-formed, i.e. the request was stamped by indexer-driven prerender
traffic. User-facing API callers never carry both headers and continue
to observe live SQL state.

The prerendered handler's request shape carries `htmlFormat`,
`cardUrls`, and `renderType` which materially change the response.
These are passed through `opts` so the existing `sortKeysDeep`-
canonicalised inner key segregates entries that differ on any of them
— and so the shared cache instance also segregates entries from the
two endpoints by key (no collision possible without identical
parameters across both shapes).

The cache instance is shared between the two handlers
(`routes.ts:188` and `routes.ts:200`). `CachedEntry.result` is now
typed `unknown` and `getOrPopulate<T>` is generic so the same store
holds both `LinkableCollectionDocument` (federated-search) and
`PrerenderedCardCollectionDocument` (prerendered) results.

Also folds in observability: per-job hit/miss counters that emit a
single `log.debug` line once a job's last entry leaves the cache —
either via `clearJob` (worker signals completion) or via TTL eviction
of the last surviving entry, whichever fires first. Off by default;
enable via `LOG_LEVELS=job-scoped-search-cache=debug` to confirm cache
utilisation on a specific batch.

Host-side: the three prerender-context header helpers
(`duringPrerenderHeaders`, `consumingRealmHeader`, `jobIdHeader`)
moved from `store.ts` into a shared `host/app/lib/prerender-fetch-headers.ts`
so the new `PrerenderedSearchResource.fetchPrerenderedCards` call can
stamp them on outbound requests using the same code path as
`store.search`. No behaviour change for live (non-prerender) SPA
fetches — the globals are unset and the headers stay absent, so those
calls keep bypassing the cache.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../host/app/lib/prerender-fetch-headers.ts   |  46 +++
 .../host/app/resources/prerendered-search.ts  |  13 +
 packages/host/app/services/store.ts           |  46 +--
 .../handlers/handle-search-prerendered.ts     |  73 +++-
 .../realm-server/job-scoped-search-cache.ts   |  76 ++++-
 packages/realm-server/routes.ts               |   2 +-
 .../search-prerendered-test.ts                | 321 ++++++++++++++++++
 7 files changed, 513 insertions(+), 64 deletions(-)
 create mode 100644 packages/host/app/lib/prerender-fetch-headers.ts
diff --git a/packages/host/app/lib/prerender-fetch-headers.ts b/packages/host/app/lib/prerender-fetch-headers.ts
new file mode 100644
index 00000000000..4f5d800a2d3
--- /dev/null
+++ b/packages/host/app/lib/prerender-fetch-headers.ts
@@ -0,0 +1,46 @@
+import {
+  DURING_PRERENDER_HEADER,
+  X_BOXEL_CONSUMING_REALM_HEADER,
+  X_BOXEL_JOB_ID_HEADER,
+} from '@cardstack/runtime-common';
+
+// Set by the prerender server's `evaluateOnNewDocument` before the
+// SPA boots — `__boxelDuringPrerender = true`. Read here so the
+// realm-server fetch wrappers can attach the marker header on
+// search calls only, narrowly scoping the signal to the endpoints
+// that need it. See realm.ts:DURING_PRERENDER_HEADER for the full
+// chain.
+export function duringPrerenderHeaders(): Record<string, string> {
+  let flag = (globalThis as unknown as { __boxelDuringPrerender?: boolean })
+    .__boxelDuringPrerender;
+  return flag ? { [DURING_PRERENDER_HEADER]: '1' } : {};
+}
+
+// While rendering inside a prerender tab the render route writes
+// `__boxelConsumingRealm` with the URL of the realm whose card is
+// being rendered. Attach it to outbound search requests so the
+// realm-server's job-scoped cache layer can gate caching on the
+// indexer-traffic shape. Read each fetch (not cached at module
+// scope) so a tab that renders cards from multiple realms in
+// sequence sends the correct header per request. Returns an empty
+// object when the global is not set so non-prerender (live SPA)
+// fetches behave exactly as before.
+export function consumingRealmHeader(): Record<string, string> {
+  let r = (globalThis as unknown as { __boxelConsumingRealm?: string })
+    .__boxelConsumingRealm;
+  return r ? { [X_BOXEL_CONSUMING_REALM_HEADER]: r } : {};
+}
+
+// Companion to `consumingRealmHeader()`. The prerender server's
+// `prerenderVisitAttempt` injects `__boxelJobId` onto the page
+// before transitioning into the render route — see
+// `packages/realm-server/prerender/render-runner.ts`. Read it on
+// each fetch (not module-scope-cached) so a page reused across
+// multiple visits picks up the current visit's job id. Outside a
+// prerender tab the global is undefined and we send no header, so
+// user / API callers continue to bypass the realm-server's
+// job-scoped cache.
+export function jobIdHeader(): Record<string, string> {
+  let j = (globalThis as unknown as { __boxelJobId?: string }).__boxelJobId;
+  return j ? { [X_BOXEL_JOB_ID_HEADER]: j } : {};
+}
diff --git a/packages/host/app/resources/prerendered-search.ts b/packages/host/app/resources/prerendered-search.ts
index a631ab7dc29..b184eea602f 100644
--- a/packages/host/app/resources/prerendered-search.ts
+++ b/packages/host/app/resources/prerendered-search.ts
@@ -29,6 +29,11 @@ import { isPrerenderedCardCollectionDocument } from '@cardstack/runtime-common/d
 import type { RealmEventContent } from 'https://cardstack.com/base/matrix-event';
 
 import { PrerenderedCard } from '../components/prerendered-card-search';
+import {
+  consumingRealmHeader,
+  duringPrerenderHeaders,
+  jobIdHeader,
+} from '../lib/prerender-fetch-headers';
 import { normalizeRealms, resolveCardRealmUrl } from '../lib/realm-utils';
 
 import type LoaderService from '../services/loader-service';
@@ -274,6 +279,14 @@ export class PrerenderedSearchResource extends Resource<Args> {
       headers: {
         Accept: SupportedMimeType.CardJson,
         'Content-Type': 'application/json',
+        // Plumb the prerender-context headers so a worker-driven
+        // render hits the realm-server's job-scoped search cache
+        // (the same instance shared with `_federated-search`).
+        // Live (non-prerender) SPA fetches send no values for any of
+        // these globals and continue to bypass the cache.
+        ...duringPrerenderHeaders(),
+        ...consumingRealmHeader(),
+        ...jobIdHeader(),
       },
       body: JSON.stringify({
         ...query,
diff --git a/packages/host/app/services/store.ts b/packages/host/app/services/store.ts
index 38b2ca893c1..820ee42c47f 100644
--- a/packages/host/app/services/store.ts
+++ b/packages/host/app/services/store.ts
@@ -23,7 +23,6 @@ import {
   baseFileRef,
   CardError,
   cardIdToURL,
-  DURING_PRERENDER_HEADER,
   isRegisteredPrefix,
   hasExecutableExtension,
   isCardError,
@@ -33,8 +32,6 @@ import {
   isSingleCardDocument,
   isLinkableCollectionDocument,
   resolveFileDefCodeRef,
-  X_BOXEL_CONSUMING_REALM_HEADER,
-  X_BOXEL_JOB_ID_HEADER,
   Deferred,
   delay,
   mergeRelationships,
@@ -86,6 +83,11 @@ import type { RealmEventContent } from 'https://cardstack.com/base/matrix-event'
 
 import CardStore, { getDeps, type ReferenceCount } from '../lib/gc-card-store';
 
+import {
+  consumingRealmHeader,
+  duringPrerenderHeaders,
+  jobIdHeader,
+} from '../lib/prerender-fetch-headers';
 import { errorJsonApiToErrorEntry } from '../lib/window-error-handler';
 import { getSearch } from '../resources/search';
 import {
@@ -121,44 +123,6 @@ let waiter = buildWaiter('store-service');
 const realmEventsLogger = logger('realm:events');
 const storeLogger = logger('store');
 
-// Set by the prerender server's `evaluateOnNewDocument` before the
-// SPA boots — `__boxelDuringPrerender = true`. Read here so the
-// federated-search fetch wrapper can attach the marker header on
-// realm-server-bound calls only, narrowly scoping the signal to the
-// endpoint that needs it. See realm.ts:DURING_PRERENDER_HEADER for
-// the full chain.
-function duringPrerenderHeaders(): Record<string, string> {
-  let flag = (globalThis as unknown as { __boxelDuringPrerender?: boolean })
-    .__boxelDuringPrerender;
-  return flag ? { [DURING_PRERENDER_HEADER]: '1' } : {};
-}
-
-// While rendering inside a prerender tab the render route writes
-// `__boxelConsumingRealm` with the URL of the realm whose card is being
-// rendered. Attach it to outbound `_federated-search` requests so the
-// realm-server's job-scoped cache layer can gate same-realm-only
-// caching. Read each fetch (not cached at module scope) so a tab that
-// renders cards from multiple realms in sequence sends the correct
-// header per request. Returns an empty object when the global is not
-// set so non-prerender (live SPA) fetches behave exactly as before.
-function consumingRealmHeader(): Record<string, string> {
-  let r = (globalThis as unknown as { __boxelConsumingRealm?: string })
-    .__boxelConsumingRealm;
-  return r ? { [X_BOXEL_CONSUMING_REALM_HEADER]: r } : {};
-}
-
-// Companion to `consumingRealmHeader()`. The prerender server's
-// `prerenderVisitAttempt` injects `__boxelJobId` onto the page before
-// transitioning into the render route — see
-// `packages/realm-server/prerender/render-runner.ts`. Read it on each
-// fetch (not module-scope-cached) so a page reused across multiple
-// visits picks up the current visit's job id. Outside a prerender
-// tab the global is undefined and we send no header, so user / API
-// callers continue to bypass the realm-server's job-scoped cache.
-function jobIdHeader(): Record<string, string> {
-  let j = (globalThis as unknown as { __boxelJobId?: string }).__boxelJobId;
-  return j ? { [X_BOXEL_JOB_ID_HEADER]: j } : {};
-}
 const queryFieldSeedFromSearchSymbol = Symbol.for(
   'cardstack-query-field-seed-from-search',
 );
diff --git a/packages/realm-server/handlers/handle-search-prerendered.ts b/packages/realm-server/handlers/handle-search-prerendered.ts
index aa7baf15c8d..9c7eb1eb3d4 100644
--- a/packages/realm-server/handlers/handle-search-prerendered.ts
+++ b/packages/realm-server/handlers/handle-search-prerendered.ts
@@ -2,8 +2,10 @@ import type Koa from 'koa';
 import {
   buildSearchErrorResponse,
   SupportedMimeType,
+  X_BOXEL_CONSUMING_REALM_HEADER,
   parsePrerenderedSearchRequestFromPayload,
   parsePrerenderedSearchRequestFromRequest,
+  sanitizeConsumingRealmHeader,
   SearchRequestError,
   searchPrerenderedRealms,
 } from '@cardstack/runtime-common';
@@ -16,10 +18,16 @@ import {
   getMultiRealmAuthorization,
   getSearchRequestPayload,
 } from '../middleware/multi-realm-authorization';
+import type { JobScopedSearchCache } from '../job-scoped-search-cache';
+import {
+  PRERENDER_JOB_ID_HEADER,
+  sanitizePrerenderJobId,
+} from '../prerender/prerender-constants';
 
-export default function handleSearchPrerendered(): (
-  ctxt: Koa.Context,
-) => Promise<void> {
+export default function handleSearchPrerendered(opts?: {
+  searchCache?: JobScopedSearchCache;
+}): (ctxt: Koa.Context) => Promise<void> {
+  let searchCache = opts?.searchCache;
   return async function (ctxt: Koa.Context) {
     let { realmList, realmByURL } = getMultiRealmAuthorization(ctxt);
 
@@ -43,15 +51,56 @@ export default function handleSearchPrerendered(): (
       throw e;
     }
 
-    let combined = await searchPrerenderedRealms(
-      realmList.map((realmURL) => realmByURL.get(realmURL)),
-      parsed.cardsQuery,
-      {
-        htmlFormat: parsed.htmlFormat,
-        cardUrls: parsed.cardUrls,
-        renderType: parsed.renderType,
-      },
-    );
+    let searchOpts = {
+      htmlFormat: parsed.htmlFormat,
+      cardUrls: parsed.cardUrls,
+      renderType: parsed.renderType,
+    };
+    let runSearch = () =>
+      searchPrerenderedRealms(
+        realmList.map((realmURL) => realmByURL.get(realmURL)),
+        parsed.cardsQuery,
+        searchOpts,
+      );
+
+    // Symmetric to `_federated-search`'s gating. Cache is consulted
+    // only when both indexer-traffic headers are present and well-
+    // formed:
+    //   (a) `x-boxel-job-id` — only the indexer worker stamps this,
+    //   (b) `x-boxel-consuming-realm` — the host's render route only
+    //       sets it during prerender.
+    // User-facing API callers never carry both, so they always
+    // bypass the cache and observe live SQL state.
+    //
+    // The prerendered handler's request shape carries
+    // `htmlFormat` / `cardUrls` / `renderType` which materially
+    // change the response body. These are passed through `opts` so
+    // the cache's `sortKeysDeep`-canonicalised inner key segregates
+    // entries that differ on any of them. `cardsQuery` is whatever
+    // remains after those three keys are stripped by
+    // `parsePrerenderedSearchRequest…` — so the cache key reflects
+    // the full request shape, not just the query body.
+    //
+    // `multiRealmAuthorization` has already validated read access
+    // for every entry of `realmList`, so the cache cannot surface
+    // results across an authorization boundary.
+    let jobId = searchCache
+      ? sanitizePrerenderJobId(ctxt.get(PRERENDER_JOB_ID_HEADER))
+      : null;
+    let consumingRealm = searchCache
+      ? sanitizeConsumingRealmHeader(ctxt.get(X_BOXEL_CONSUMING_REALM_HEADER))
+      : null;
+    let cacheable = searchCache && jobId && consumingRealm;
+
+    let combined = cacheable
+      ? await searchCache!.getOrPopulate({
+          jobId: jobId!,
+          realms: realmList,
+          query: parsed.cardsQuery,
+          opts: searchOpts,
+          populate: runSearch,
+        })
+      : await runSearch();
 
     await setContextResponse(
       ctxt,
diff --git a/packages/realm-server/job-scoped-search-cache.ts b/packages/realm-server/job-scoped-search-cache.ts
index 2a97a02a67d..f151677119f 100644
--- a/packages/realm-server/job-scoped-search-cache.ts
+++ b/packages/realm-server/job-scoped-search-cache.ts
@@ -1,10 +1,12 @@
 import {
+  logger,
   normalizeQueryForSignature,
   sortKeysDeep,
-  type LinkableCollectionDocument,
   type Query,
 } from '@cardstack/runtime-common';
 
+const log = logger('job-scoped-search-cache');
+
 // Default entry TTL. Picked to comfortably outlive a single indexing
 // batch (workers cap from-scratch jobs at 6 min, incremental jobs are
 // shorter) while bounding the worst case where a job ends without a
@@ -28,7 +30,14 @@ const DEFAULT_TTL_MS = 10 * 60 * 1000;
 const DEFAULT_MAX_ENTRIES = 5000;
 
 type CachedEntry = {
-  result: LinkableCollectionDocument;
+  // Result is stored opaquely so both `_federated-search`'s
+  // `LinkableCollectionDocument` and `_federated-search-prerendered`'s
+  // `PrerenderedCardCollectionDocument` can share the same cache
+  // instance. Inner-key canonicalisation already includes the
+  // endpoint-distinguishing params (htmlFormat / cardUrls / renderType
+  // are passed through `opts`), so two endpoints' entries cannot
+  // collide on a key they don't both fully share.
+  result: unknown;
   timer: ReturnType<typeof setTimeout>;
   // Position in the FIFO eviction ring. Stored on the entry so a
   // cache hit doesn't need a separate map lookup to know its slot.
@@ -37,10 +46,16 @@ type CachedEntry = {
 
 // Per-batch read cache used during indexing. Each entry is keyed by
 // `(jobId, normalizedRealms, normalizedQuery, normalizedOpts)` and
-// represents one `_federated-search` populate computed during the
-// lifetime of one indexing job. The job-id boundary scopes the cache
-// to a single batch; a subsequent job hashes to different keys and
-// never reuses a stale value.
+// represents one search populate computed during the lifetime of one
+// indexing job. The cache is shared across both `_federated-search`
+// (`LinkableCollectionDocument` results) and
+// `_federated-search-prerendered` (`PrerenderedCardCollectionDocument`
+// results) — the endpoint-specific request shape (`htmlFormat`,
+// `cardUrls`, `renderType` for the prerendered handler) is folded into
+// `opts` before the call here, so the canonicalised inner key already
+// segregates the two endpoints' entries. The job-id boundary scopes
+// the cache to a single batch; a subsequent job hashes to different
+// keys and never reuses a stale value.
 //
 // Same-realm reads are safe by construction: within an indexing batch
 // the writer touches `boxel_index_working`, not `boxel_index`, so
@@ -88,25 +103,40 @@ export class JobScopedSearchCache {
   #nextFifoSeq = 0;
   readonly #ttlMs: number;
   readonly #maxEntries: number;
+  // Per-job hit/miss counters for observability. Recorded on every
+  // `getOrPopulate` call and emitted as a single debug-level log line
+  // when the job's last entry leaves the cache — either via
+  // `clearJob` (worker signals job completion) or via TTL eviction of
+  // the last surviving entry, whichever fires first. Off by default;
+  // enable via `LOG_LEVELS=job-scoped-search-cache=debug` when an
+  // operator wants to confirm cache utilisation on a specific batch.
+  #stats = new Map<string, { hits: number; misses: number }>();
 
   constructor(opts?: { ttlMs?: number; maxEntries?: number }) {
     this.#ttlMs = opts?.ttlMs ?? DEFAULT_TTL_MS;
     this.#maxEntries = opts?.maxEntries ?? DEFAULT_MAX_ENTRIES;
   }
 
-  async getOrPopulate(args: {
+  async getOrPopulate<T>(args: {
     jobId: string;
     realms: string[];
     query: Query;
     opts: unknown | undefined;
-    populate: () => Promise<LinkableCollectionDocument>;
-  }): Promise<LinkableCollectionDocument> {
+    populate: () => Promise<T>;
+  }): Promise<T> {
     let innerKey = buildInnerKey(args.realms, args.query, args.opts);
     let jobMap = this.#byJob.get(args.jobId);
     let existing = jobMap?.get(innerKey);
+    let stat = this.#stats.get(args.jobId);
+    if (!stat) {
+      stat = { hits: 0, misses: 0 };
+      this.#stats.set(args.jobId, stat);
+    }
     if (existing) {
-      return existing.result;
+      stat.hits += 1;
+      return existing.result as T;
     }
+    stat.misses += 1;
 
     let result = await args.populate();
 
@@ -171,6 +201,21 @@ export class JobScopedSearchCache {
       jm.delete(innerKey);
       if (jm.size === 0) {
         this.#byJob.delete(jobId);
+        // The last entry for this job has TTL-evicted — surface
+        // accumulated hit/miss stats so operators can tell whether
+        // the cache was doing work for the batch. Stats are cleared
+        // alongside the entry map so a subsequent job reuse of the
+        // same jobId (rare; jobIds are monotonic) starts fresh.
+        let stat = this.#stats.get(jobId);
+        if (stat) {
+          let total = stat.hits + stat.misses;
+          let hitRate =
+            total === 0 ? '0%' : `${Math.round((100 * stat.hits) / total)}%`;
+          log.debug(
+            `job-scoped search cache stats job=${jobId} hits=${stat.hits} misses=${stat.misses} hitRate=${hitRate} (TTL-evicted)`,
+          );
+          this.#stats.delete(jobId);
+        }
       }
     }
   }
@@ -179,7 +224,18 @@ export class JobScopedSearchCache {
   // eviction path so the cache releases memory as soon as the worker
   // signals job completion, rather than waiting on TTL.
   clearJob(jobId: string): void {
+    let stat = this.#stats.get(jobId);
     let jobMap = this.#byJob.get(jobId);
+    let entries = jobMap?.size ?? 0;
+    if (stat) {
+      let total = stat.hits + stat.misses;
+      let hitRate =
+        total === 0 ? '0%' : `${Math.round((100 * stat.hits) / total)}%`;
+      log.debug(
+        `job-scoped search cache stats job=${jobId} hits=${stat.hits} misses=${stat.misses} hitRate=${hitRate} entries=${entries}`,
+      );
+      this.#stats.delete(jobId);
+    }
     if (!jobMap) return;
     for (let entry of jobMap.values()) {
       clearTimeout(entry.timer);
diff --git a/packages/realm-server/routes.ts b/packages/realm-server/routes.ts
index f3bd90c4719..6b0b3604d3f 100644
--- a/packages/realm-server/routes.ts
+++ b/packages/realm-server/routes.ts
@@ -200,7 +200,7 @@ export function createRoutes(args: CreateRoutesArgs) {
   router.all(
     '/_federated-search-prerendered',
     multiRealmAuthorization(args),
-    handleSearchPrerendered(),
+    handleSearchPrerendered({ searchCache }),
   );
   router.post(
     '/_prerender-card',
diff --git a/packages/realm-server/tests/server-endpoints/search-prerendered-test.ts b/packages/realm-server/tests/server-endpoints/search-prerendered-test.ts
index 7a87c529c64..ff007211cb5 100644
--- a/packages/realm-server/tests/server-endpoints/search-prerendered-test.ts
+++ b/packages/realm-server/tests/server-endpoints/search-prerendered-test.ts
@@ -205,6 +205,327 @@ module(`server-endpoints/${basename(__filename)}`, function (_hooks) {
         );
       });
 
+      // Verifies the shared `JobScopedSearchCache` hits at the
+      // `_federated-search-prerendered` handler boundary too. Counts
+      // populates by spying on each realm's `searchPrerendered` method
+      // — a cache hit short-circuits before `searchPrerenderedRealms`
+      // reaches the realm, so spy invocations are the unambiguous
+      // tell.
+      test('QUERY /_federated-search-prerendered caches reads under one jobId and bypasses other jobs', async function (assert) {
+        let realmServerToken = createRealmServerJWT(
+          { user: ownerUserId, sessionRoom: 'session-room-test' },
+          realmSecretSeed,
+        );
+
+        let primaryCalls = 0;
+        let secondaryCalls = 0;
+        let primaryProto = testRealm.searchPrerendered;
+        let secondaryProto = secondaryRealm.searchPrerendered;
+        (
+          testRealm as unknown as {
+            searchPrerendered: typeof testRealm.searchPrerendered;
+          }
+        ).searchPrerendered = function (
+          this: typeof testRealm,
+          ...args: Parameters<typeof testRealm.searchPrerendered>
+        ) {
+          primaryCalls++;
+          return primaryProto.apply(this, args);
+        };
+        (
+          secondaryRealm as unknown as {
+            searchPrerendered: typeof secondaryRealm.searchPrerendered;
+          }
+        ).searchPrerendered = function (
+          this: typeof secondaryRealm,
+          ...args: Parameters<typeof secondaryRealm.searchPrerendered>
+        ) {
+          secondaryCalls++;
+          return secondaryProto.apply(this, args);
+        };
+
+        try {
+          let query: Query = {
+            filter: {
+              on: baseCardRef,
+              eq: { cardTitle: 'Shared Card' },
+            },
+          };
+          let searchURL = new URL(
+            '/_federated-search-prerendered',
+            testRealm.url,
+          );
+          let post = (jobId: string) =>
+            request
+              .post(`${searchURL.pathname}${searchURL.search}`)
+              .set('Accept', 'application/vnd.card+json')
+              .set('Content-Type', 'application/json')
+              .set('X-HTTP-Method-Override', 'QUERY')
+              .set('Authorization', `Bearer ${realmServerToken}`)
+              .set('x-boxel-job-id', jobId)
+              .set('x-boxel-consuming-realm', testRealm.url)
+              .send({
+                ...query,
+                realms: [testRealm.url, secondaryRealm.url],
+                prerenderedHtmlFormat: 'embedded',
+              });
+
+          let first = await post('42.1');
+          assert.strictEqual(first.status, 200, 'first request: HTTP 200');
+          assert.strictEqual(
+            first.body.data.length,
+            2,
+            'first request returns both realms’ results',
+          );
+          assert.strictEqual(
+            primaryCalls,
+            1,
+            'first request hit testRealm exactly once',
+          );
+          assert.strictEqual(
+            secondaryCalls,
+            1,
+            'first request hit secondaryRealm exactly once',
+          );
+
+          let second = await post('42.1');
+          assert.strictEqual(second.status, 200, 'second request: HTTP 200');
+          assert.strictEqual(
+            second.body.data.length,
+            2,
+            'second request returns the cached result',
+          );
+          assert.strictEqual(
+            primaryCalls,
+            1,
+            'second request was a cache hit (testRealm not re-queried)',
+          );
+          assert.strictEqual(
+            secondaryCalls,
+            1,
+            'second request was a cache hit (secondaryRealm not re-queried)',
+          );
+
+          // A different jobId is a different batch — fresh populate.
+          let third = await post('43.1');
+          assert.strictEqual(third.status, 200, 'third request: HTTP 200');
+          assert.strictEqual(
+            primaryCalls,
+            2,
+            'different jobId re-queried testRealm',
+          );
+          assert.strictEqual(
+            secondaryCalls,
+            2,
+            'different jobId re-queried secondaryRealm',
+          );
+        } finally {
+          delete (testRealm as unknown as { searchPrerendered?: unknown })
+            .searchPrerendered;
+          delete (secondaryRealm as unknown as { searchPrerendered?: unknown })
+            .searchPrerendered;
+        }
+      });
+
+      // Cache key reflects the endpoint-specific request shape. Changing
+      // any of `prerenderedHtmlFormat`, `cardUrls`, or `renderType` under
+      // an otherwise identical (jobId, realms, query) tuple must miss
+      // the cache and fire a fresh populate.
+      test('QUERY /_federated-search-prerendered cache key segregates entries by htmlFormat / cardUrls / renderType', async function (assert) {
+        let realmServerToken = createRealmServerJWT(
+          { user: ownerUserId, sessionRoom: 'session-room-test' },
+          realmSecretSeed,
+        );
+
+        let primaryCalls = 0;
+        let primaryProto = testRealm.searchPrerendered;
+        (
+          testRealm as unknown as {
+            searchPrerendered: typeof testRealm.searchPrerendered;
+          }
+        ).searchPrerendered = function (
+          this: typeof testRealm,
+          ...args: Parameters<typeof testRealm.searchPrerendered>
+        ) {
+          primaryCalls++;
+          return primaryProto.apply(this, args);
+        };
+
+        try {
+          let query: Query = {
+            filter: {
+              on: baseCardRef,
+              eq: { cardTitle: 'Shared Card' },
+            },
+          };
+          let searchURL = new URL(
+            '/_federated-search-prerendered',
+            testRealm.url,
+          );
+          let post = (body: Record<string, unknown>) =>
+            request
+              .post(`${searchURL.pathname}${searchURL.search}`)
+              .set('Accept', 'application/vnd.card+json')
+              .set('Content-Type', 'application/json')
+              .set('X-HTTP-Method-Override', 'QUERY')
+              .set('Authorization', `Bearer ${realmServerToken}`)
+              .set('x-boxel-job-id', '42.1')
+              .set('x-boxel-consuming-realm', testRealm.url)
+              .send({
+                ...query,
+                realms: [testRealm.url],
+                ...body,
+              });
+
+          let first = await post({ prerenderedHtmlFormat: 'embedded' });
+          assert.strictEqual(first.status, 200, 'first request: HTTP 200');
+          assert.strictEqual(primaryCalls, 1, 'first request populated');
+
+          // Same jobId, same query, same realms — but htmlFormat differs.
+          // Must miss the cache.
+          let second = await post({ prerenderedHtmlFormat: 'fitted' });
+          assert.strictEqual(second.status, 200, 'second request: HTTP 200');
+          assert.strictEqual(
+            primaryCalls,
+            2,
+            'different htmlFormat fired a fresh populate',
+          );
+
+          // Identical to `second` — must hit the cache.
+          let third = await post({ prerenderedHtmlFormat: 'fitted' });
+          assert.strictEqual(third.status, 200, 'third request: HTTP 200');
+          assert.strictEqual(
+            primaryCalls,
+            2,
+            'repeat of `second` was a cache hit',
+          );
+
+          // Adding a `cardUrls` filter changes the response → miss.
+          let fourth = await post({
+            prerenderedHtmlFormat: 'fitted',
+            cardUrls: [`${testRealm.url}test-card`],
+          });
+          assert.strictEqual(fourth.status, 200, 'fourth request: HTTP 200');
+          assert.strictEqual(
+            primaryCalls,
+            3,
+            'different cardUrls fired a fresh populate',
+          );
+        } finally {
+          delete (testRealm as unknown as { searchPrerendered?: unknown })
+            .searchPrerendered;
+        }
+      });
+
+      // Without both the job-id and consuming-realm headers a request
+      // is treated as user-facing traffic and bypasses the cache.
+      test('QUERY /_federated-search-prerendered bypasses cache when either prerender-context header is absent', async function (assert) {
+        let realmServerToken = createRealmServerJWT(
+          { user: ownerUserId, sessionRoom: 'session-room-test' },
+          realmSecretSeed,
+        );
+
+        let primaryCalls = 0;
+        let primaryProto = testRealm.searchPrerendered;
+        (
+          testRealm as unknown as {
+            searchPrerendered: typeof testRealm.searchPrerendered;
+          }
+        ).searchPrerendered = function (
+          this: typeof testRealm,
+          ...args: Parameters<typeof testRealm.searchPrerendered>
+        ) {
+          primaryCalls++;
+          return primaryProto.apply(this, args);
+        };
+
+        try {
+          let query: Query = {
+            filter: {
+              on: baseCardRef,
+              eq: { cardTitle: 'Shared Card' },
+            },
+          };
+          let searchURL = new URL(
+            '/_federated-search-prerendered',
+            testRealm.url,
+          );
+
+          // No prerender-context headers — every request must re-populate.
+          let plain = () =>
+            request
+              .post(`${searchURL.pathname}${searchURL.search}`)
+              .set('Accept', 'application/vnd.card+json')
+              .set('Content-Type', 'application/json')
+              .set('X-HTTP-Method-Override', 'QUERY')
+              .set('Authorization', `Bearer ${realmServerToken}`)
+              .send({
+                ...query,
+                realms: [testRealm.url],
+                prerenderedHtmlFormat: 'embedded',
+              });
+
+          let first = await plain();
+          assert.strictEqual(first.status, 200, 'first request: HTTP 200');
+          assert.strictEqual(primaryCalls, 1, 'first request populated');
+
+          let second = await plain();
+          assert.strictEqual(second.status, 200, 'second request: HTTP 200');
+          assert.strictEqual(
+            primaryCalls,
+            2,
+            'no headers → cache bypassed, populate ran again',
+          );
+
+          // jobId present, but consumingRealm missing → bypass.
+          let jobIdOnly = await request
+            .post(`${searchURL.pathname}${searchURL.search}`)
+            .set('Accept', 'application/vnd.card+json')
+            .set('Content-Type', 'application/json')
+            .set('X-HTTP-Method-Override', 'QUERY')
+            .set('Authorization', `Bearer ${realmServerToken}`)
+            .set('x-boxel-job-id', '42.1')
+            .send({
+              ...query,
+              realms: [testRealm.url],
+              prerenderedHtmlFormat: 'embedded',
+            });
+          assert.strictEqual(jobIdOnly.status, 200, 'job-id only: HTTP 200');
+          assert.strictEqual(
+            primaryCalls,
+            3,
+            'job-id without consuming-realm → cache bypassed',
+          );
+
+          // consumingRealm present, but jobId missing → bypass.
+          let consumingOnly = await request
+            .post(`${searchURL.pathname}${searchURL.search}`)
+            .set('Accept', 'application/vnd.card+json')
+            .set('Content-Type', 'application/json')
+            .set('X-HTTP-Method-Override', 'QUERY')
+            .set('Authorization', `Bearer ${realmServerToken}`)
+            .set('x-boxel-consuming-realm', testRealm.url)
+            .send({
+              ...query,
+              realms: [testRealm.url],
+              prerenderedHtmlFormat: 'embedded',
+            });
+          assert.strictEqual(
+            consumingOnly.status,
+            200,
+            'consuming-realm only: HTTP 200',
+          );
+          assert.strictEqual(
+            primaryCalls,
+            4,
+            'consuming-realm without job-id → cache bypassed',
+          );
+        } finally {
+          delete (testRealm as unknown as { searchPrerendered?: unknown })
+            .searchPrerendered;
+        }
+      });
+
       test('GET /_federated-search-prerendered returns 400 for unsupported method', async function (assert) {
         let realmServerToken = createRealmServerJWT(
           { user: ownerUserId, sessionRoom: 'session-room-test' },

From b257fa533282f4e707ffbd095fa0c4a0417db305 Mon Sep 17 00:00:00 2001
From: Hassan Abdel-Rahman <hassan.abdelrahman@gmail.com>
Date: Mon, 18 May 2026 18:11:46 -0400
Subject: [PATCH 2/2] Address Copilot review: stats-flush from all eviction
 paths, defer miss-count
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two findings from review:

1. `#stats` map could leak: miss-count was incremented before
   `await populate()`. If populate threw before any entry got
   stored, the jobId never acquired a `#byJob` map and the entry
   in `#stats` would persist forever (no TTL timer, no later
   eviction path to trigger cleanup). Fix: defer the `miss += 1`
   until after populate resolves. If populate throws we count
   nothing — observationally equivalent to "never happened" from
   the cache's perspective, with no leaked stats entry.

2. The "last entry for this job left the cache" stats log only
   fired on the TTL-eviction path. Cap eviction (the FIFO loop in
   `getOrPopulate` when `#fifo.size > #maxEntries`) could remove
   the last entry for a job without ever emitting the summary
   line. Same bug shape for `clearJob`. Fix: factor the flush
   into a private `#flushStats(jobId, reason)` helper and call it
   from all three paths (TTL eviction, cap eviction, clearJob),
   passing the eviction reason as a tag in the log line so
   operators can tell why the flush fired.

Hit-count integrity is preserved: a hit implies a prior
`populate()` resolved (no other code path stores entries), so the
stats entry was allocated then. The hit-side `if (stat) stat.hits
+= 1` is defensive against hand-rolled tests that might pre-seed
entries.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../realm-server/job-scoped-search-cache.ts   | 66 ++++++++++---------
 1 file changed, 35 insertions(+), 31 deletions(-)

diff --git a/packages/realm-server/job-scoped-search-cache.ts b/packages/realm-server/job-scoped-search-cache.ts
index f151677119f..c94ff5de36a 100644
--- a/packages/realm-server/job-scoped-search-cache.ts
+++ b/packages/realm-server/job-scoped-search-cache.ts
@@ -127,19 +127,28 @@ export class JobScopedSearchCache {
     let innerKey = buildInnerKey(args.realms, args.query, args.opts);
     let jobMap = this.#byJob.get(args.jobId);
     let existing = jobMap?.get(innerKey);
+    if (existing) {
+      // A hit implies a prior store, which must have allocated the
+      // jobId's stats entry — but be defensive in case a hand-rolled
+      // test ever pre-seeds entries without going through populate.
+      let stat = this.#stats.get(args.jobId);
+      if (stat) stat.hits += 1;
+      return existing.result as T;
+    }
+
+    let result = await args.populate();
+    // Count the miss only after populate resolves. If populate throws
+    // we count nothing — the call is observationally equivalent to
+    // "never happened" from the cache's perspective, and we avoid
+    // leaking a `#stats` entry for a jobId that never produced a
+    // cache entry whose eviction would clear it.
     let stat = this.#stats.get(args.jobId);
     if (!stat) {
       stat = { hits: 0, misses: 0 };
       this.#stats.set(args.jobId, stat);
     }
-    if (existing) {
-      stat.hits += 1;
-      return existing.result as T;
-    }
     stat.misses += 1;
 
-    let result = await args.populate();
-
     // Late-arriving check: the populate may have just settled while a
     // peer's populate (same key) also settled and stored its result
     // first. Last-write-wins; either of the two resolved docs is
@@ -181,6 +190,7 @@ export class JobScopedSearchCache {
         jm!.delete(slot.innerKey);
         if (jm!.size === 0) {
           this.#byJob.delete(slot.jobId);
+          this.#flushStats(slot.jobId, 'cap-evicted');
         }
       }
     }
@@ -201,21 +211,7 @@ export class JobScopedSearchCache {
       jm.delete(innerKey);
       if (jm.size === 0) {
         this.#byJob.delete(jobId);
-        // The last entry for this job has TTL-evicted — surface
-        // accumulated hit/miss stats so operators can tell whether
-        // the cache was doing work for the batch. Stats are cleared
-        // alongside the entry map so a subsequent job reuse of the
-        // same jobId (rare; jobIds are monotonic) starts fresh.
-        let stat = this.#stats.get(jobId);
-        if (stat) {
-          let total = stat.hits + stat.misses;
-          let hitRate =
-            total === 0 ? '0%' : `${Math.round((100 * stat.hits) / total)}%`;
-          log.debug(
-            `job-scoped search cache stats job=${jobId} hits=${stat.hits} misses=${stat.misses} hitRate=${hitRate} (TTL-evicted)`,
-          );
-          this.#stats.delete(jobId);
-        }
+        this.#flushStats(jobId, 'TTL-evicted');
       }
     }
   }
@@ -224,18 +220,9 @@ export class JobScopedSearchCache {
   // eviction path so the cache releases memory as soon as the worker
   // signals job completion, rather than waiting on TTL.
   clearJob(jobId: string): void {
-    let stat = this.#stats.get(jobId);
     let jobMap = this.#byJob.get(jobId);
     let entries = jobMap?.size ?? 0;
-    if (stat) {
-      let total = stat.hits + stat.misses;
-      let hitRate =
-        total === 0 ? '0%' : `${Math.round((100 * stat.hits) / total)}%`;
-      log.debug(
-        `job-scoped search cache stats job=${jobId} hits=${stat.hits} misses=${stat.misses} hitRate=${hitRate} entries=${entries}`,
-      );
-      this.#stats.delete(jobId);
-    }
+    this.#flushStats(jobId, `clearJob entries=${entries}`);
     if (!jobMap) return;
     for (let entry of jobMap.values()) {
       clearTimeout(entry.timer);
@@ -244,6 +231,23 @@ export class JobScopedSearchCache {
     this.#byJob.delete(jobId);
   }
 
+  // Single sink for the once-per-job stats log. Called from every
+  // path that can leave a jobId with no remaining cache entries —
+  // TTL eviction, cap eviction, and clearJob. The stats map's entry
+  // is dropped in lockstep so a subsequent jobId reuse (rare, since
+  // jobIds are monotonic) starts fresh.
+  #flushStats(jobId: string, reason: string): void {
+    let stat = this.#stats.get(jobId);
+    if (!stat) return;
+    let total = stat.hits + stat.misses;
+    let hitRate =
+      total === 0 ? '0%' : `${Math.round((100 * stat.hits) / total)}%`;
+    log.debug(
+      `job-scoped search cache stats job=${jobId} hits=${stat.hits} misses=${stat.misses} hitRate=${hitRate} (${reason})`,
+    );
+    this.#stats.delete(jobId);
+  }
+
   // Total entry count across all jobs. Useful for tests + observability.
   size(): number {
     let total = 0;