diff --git a/.claude/skills/prerender-sizing/SKILL.md b/.claude/skills/prerender-sizing/SKILL.md index 28263eed15d..1fcb9c52cff 100644 --- a/.claude/skills/prerender-sizing/SKILL.md +++ b/.claude/skills/prerender-sizing/SKILL.md @@ -8,14 +8,14 @@ allowed-tools: Read, Grep, Glob, Bash The prerender pool's tab capacity is governed by a small set of SSM-driven knobs: -| Env var | What it controls | -|---|---| -| `PRERENDER_PAGE_POOL_MIN` | Idle floor — pool never contracts below this. | -| `PRERENDER_PAGE_POOL_MAX` | Burst ceiling reachable by any priority. | -| `PRERENDER_PAGE_POOL_HIGH_PRIORITY_MAX` | Extra ceiling, reachable only when caller `priority >= HIGH_PRIORITY_THRESHOLD`. | -| `PRERENDER_HIGH_PRIORITY_THRESHOLD` | Priority bar that unlocks the upper tier. | -| `PRERENDER_PAGE_POOL_IDLE_CONTRACTION_MS` | Hysteresis window before each contraction tick. | -| `PRERENDER_SHARED_CONTEXT_CAP` | Absolute LRU cap for cached BrowserContexts. | +| Env var | What it controls | +| ----------------------------------------- | -------------------------------------------------------------------------------- | +| `PRERENDER_PAGE_POOL_MIN` | Idle floor — pool never contracts below this. | +| `PRERENDER_PAGE_POOL_MAX` | Burst ceiling reachable by any priority. | +| `PRERENDER_PAGE_POOL_HIGH_PRIORITY_MAX` | Extra ceiling, reachable only when caller `priority >= HIGH_PRIORITY_THRESHOLD`. | +| `PRERENDER_HIGH_PRIORITY_THRESHOLD` | Priority bar that unlocks the upper tier. | +| `PRERENDER_PAGE_POOL_IDLE_CONTRACTION_MS` | Hysteresis window before each contraction tick. | +| `PRERENDER_SHARED_CONTEXT_CAP` | Absolute LRU cap for cached BrowserContexts. | Plus the ECS task definition's `cpu` and `memory`. All these together form the **memory envelope** that bounds how many warmed BrowserContexts the system can hold and how much burst headroom it has. @@ -31,7 +31,7 @@ Trigger on any of: - "Why does the dashboard show prerender memory peak at X%?" - "Should I bump `PRERENDER_PAGE_POOL_MAX` from N to M?" -If the user is asking "why did this single render time out", that's the `indexing-diagnostics` skill, not this one. This skill is for *capacity planning*. +If the user is asking "why did this single render time out", that's the `indexing-diagnostics` skill, not this one. This skill is for _capacity planning_. ## The sizing model @@ -47,7 +47,7 @@ where: - `N`: number of warmed pool entries (active tabs + standby contexts the LRU is holding). - `marginal_per_tab`: cost of one additional warmed BrowserContext + its cached fetches + tab queue state. Empirically derived per environment. -**CPU follows a different shape.** Each *actively rendering* tab consumes approximately one busy CPU core (Chromium docs / observed). But tabs alternate between rendering, host-side waits (fetches, store loads), and idle. So: +**CPU follows a different shape.** Each _actively rendering_ tab consumes approximately one busy CPU core (Chromium docs / observed). But tabs alternate between rendering, host-side waits (fetches, store loads), and idle. So: ``` cpu_peak ≈ (# tabs rendering simultaneously) × 1 vCPU @@ -148,7 +148,7 @@ Confirms whether the system held under pressure (zero render-timeouts) or was at -- skip the malformed rows: e.g. `AND diagnostics->'waits' ? -- 'tabQueueMs'` (the JSONB `?` operator tests for a key) keeps -- only rows with that key present. -SELECT +SELECT count(*) AS rows_with_diag, count(*) FILTER (WHERE (diagnostics->>'totalElapsedMs')::int >= 145000) AS at_or_over_timeout, percentile_cont(0.95) WITHIN GROUP (ORDER BY (diagnostics->>'totalElapsedMs')::int) AS p95_total_ms, @@ -167,7 +167,7 @@ WHERE diagnostics IS NOT NULL Key signals to look for: -- `at_or_over_timeout > 0`: the system is *already* dropping renders. Sizing change is needed urgently. +- `at_or_over_timeout > 0`: the system is _already_ dropping renders. Sizing change is needed urgently. - `max_tabq_ms` of seconds-to-tens-of-seconds: the user was waiting for a tab. This is the UX-visible pressure that priority routing + dynamic expansion exists to mitigate. - `max_sem_ms` of seconds-to-tens-of-seconds: global render-semaphore saturation. Indicates pool is too small or fleet is too small. - `p99_total_ms` near `145000` (the timeout budget): system was at the edge. Even if no timeouts fired, you're one bad burst from a 504. @@ -217,7 +217,7 @@ Now apply the projection plus operational judgment: - **`MIN`** — the idle floor. From queue-snapshot data: what's the typical low-load tab count? Pick `MIN` slightly above that so the manager's warm-vacancy routing has cached affinities to route to. On boxel today, MIN=2 covers the steady state. - **`MAX`** — the any-priority burst ceiling. From queue-snapshot data: what's the observed peak `totalTabs`? Pick `MAX` at-or-just-above that. Values above the 80 % memory line are undersized; values below the observed peak will throttle under existing workload. On boxel today, MAX=6 covers all 7 d observations. - **`HP_MAX`** — the high-priority ceiling. Should give priority-10 traffic 1–2 reserved expansion slots beyond `MAX` for the worst-case "low-priority workload has saturated MAX, user comes in" scenario. Has to fit memory at 80 %. On boxel today, HP_MAX=8 fits 16 GB at 55 %. -- **`HIGH_PRIORITY_THRESHOLD`** — the bar that unlocks HP tier. Above `systemInitiatedPriority = 0`, below `userInitiatedPriority = 10`. Default 5 leaves room for an intermediate priority level (e.g. live-refresh) to also benefit without re-tuning. +- **`HIGH_PRIORITY_THRESHOLD`** — the bar that unlocks HP tier. Above the system-initiated tiers (`systemInitiatedPriority = 1`, `systemInitiatedPrerenderHtmlPriority = 0`), at-or-below the user-initiated tiers (`userInitiatedPriority = 10`, `userInitiatedPrerenderHtmlPriority = 9`). Default 5 leaves room for an intermediate priority level (e.g. live-refresh) to also benefit without re-tuning. - **`IDLE_CONTRACTION_MS`** — the hysteresis window. Long enough to absorb sequential render trains from a typical fan-out; short enough that contraction reaches MIN within a few minutes. Default 60 000 ms (1 minute) works for most workloads. - **`SHARED_CONTEXT_CAP`** — the absolute LRU cap on cached BrowserContexts. Default `HP_MAX × 1.5` keeps the LRU stable across expansion + contraction cycles. @@ -230,13 +230,13 @@ If the resize affects task size, do a Fargate pricing comparison. us-east-1 on-d So: -| Task size | $/hr | /month per task | -|---|---:|---:| -| 1 vCPU / 4 GB | $0.058 | $42 | -| 2 vCPU / 8 GB | $0.117 | $85 | -| 2 vCPU / 16 GB | $0.152 | $111 | -| 4 vCPU / 8 GB | $0.197 | $144 | -| 4 vCPU / 16 GB | $0.233 | $170 | +| Task size | $/hr | /month per task | +| -------------- | -----: | --------------: | +| 1 vCPU / 4 GB | $0.058 | $42 | +| 2 vCPU / 8 GB | $0.117 | $85 | +| 2 vCPU / 16 GB | $0.152 | $111 | +| 4 vCPU / 8 GB | $0.197 | $144 | +| 4 vCPU / 16 GB | $0.233 | $170 | If the resize is "swap memory for CPU" (the typical case for prerender — memory-bound, CPU over-provisioned), the cost may actually drop. **Always show the pricing delta in the PR description.** It's a meaningful data point for the resize decision. @@ -246,12 +246,12 @@ Captured on 2026-04-30 ~20:00 UTC for the CS-10976 PR 12 staging activation. ### Telemetry -| Metric | 24 h | 7 d | -|---|---:|---:| -| CPU avg of 5-min Avg | 1.1 % | 1.5 % | -| CPU 5-min peak | 67.5 % | 97.5 % | -| Memory avg of 5-min Avg | 35 % | 39 % | -| Memory 5-min peak | 64 % | 98.3 % | +| Metric | 24 h | 7 d | +| ----------------------- | -----: | -----: | +| CPU avg of 5-min Avg | 1.1 % | 1.5 % | +| CPU 5-min peak | 67.5 % | 97.5 % | +| Memory avg of 5-min Avg | 35 % | 39 % | +| Memory 5-min peak | 64 % | 98.3 % | 7-d render-timing histogram from `boxel_index.diagnostics`: @@ -285,12 +285,12 @@ Queue-snapshot at the memory peak: ### Memory projection -| N tabs | Memory used | 8 GB (today) | 16 GB (resized) | -|---:|---:|:---:|:---:| -| 2 (MIN) | 3.7 GB | 46 % ✓ | 23 % ✓ | -| 4 | 5.4 GB | 67 % ✓ | 34 % ✓ | -| **6 (MAX)** | **7.1 GB** | **89 % ✗** | **44 % ✓** | -| **8 (HP_MAX)** | **8.7 GB** | **109 % ✗ OOM** | **55 % ✓** | +| N tabs | Memory used | 8 GB (today) | 16 GB (resized) | +| -------------: | ----------: | :-------------: | :-------------: | +| 2 (MIN) | 3.7 GB | 46 % ✓ | 23 % ✓ | +| 4 | 5.4 GB | 67 % ✓ | 34 % ✓ | +| **6 (MAX)** | **7.1 GB** | **89 % ✗** | **44 % ✓** | +| **8 (HP_MAX)** | **8.7 GB** | **109 % ✗ OOM** | **55 % ✓** | The 16 GB resize is what makes HP_MAX=8 safe. On the existing 8 GB task, MAX=6 is already tight; HP_MAX=8 would OOM. diff --git a/README.md b/README.md index 571b83ba576..0a277477f2f 100644 --- a/README.md +++ b/README.md @@ -142,7 +142,7 @@ Live reloads are not available in this mode, however, if you use start the serve #### Using `mise run dev` -Instead of running `mise run services:realm-server-base`, you can alternatively use `mise run dev` which also serves a few other realms on other ports--this is convenient if you wish to switch between the app and the tests without having to restart servers. For faster startup, `mise run dev-minimal` skips experiments, catalog, homepage, and submission realms. `dev-minimal` does not start the host app — run it in a second terminal with `mise exec -- pnpm -C packages/host start` (the `mise exec` prefix loads the HTTPS dev cert env so host comes up on `https://localhost:4200`). Use the environment variable `WORKER_HIGH_PRIORITY_COUNT` to add additional workers that service only user initiated requests and `WORKER_ALL_PRIORITY_COUNT` to add workers that service all jobs (system or user initiated). By default there is 1 all priority worker for each realm server. +Instead of running `mise run services:realm-server-base`, you can alternatively use `mise run dev` which also serves a few other realms on other ports--this is convenient if you wish to switch between the app and the tests without having to restart servers. For faster startup, `mise run dev-minimal` skips experiments, catalog, homepage, and submission realms. `dev-minimal` does not start the host app — run it in a second terminal with `mise exec -- pnpm -C packages/host start` (the `mise exec` prefix loads the HTTPS dev cert env so host comes up on `https://localhost:4200`). Use the environment variable `WORKER_HIGH_PRIORITY_COUNT` to add additional workers that service only user initiated requests and `WORKER_ALL_PRIORITY_COUNT` to add workers that service all jobs regardless of priority. By default there is 1 all priority worker for each realm server. ##### Turbo mode diff --git a/docs/aws-operations.md b/docs/aws-operations.md index 6d21dcbd61b..36be155d994 100644 --- a/docs/aws-operations.md +++ b/docs/aws-operations.md @@ -2,7 +2,7 @@ Operational runbooks for boxel services on AWS. Each section is a self-contained procedure: pre-requisites, the steps to run, what to verify after, and how to roll back. -These procedures assume you already have AWS access set up — the `aws-access` claude skill walks new teammates through the one-time setup. Most procedures here need an AWS profile with `ssm:PutParameter` / `ecs:UpdateService` etc. on the target prefix; the read-only `claude-staging` / `claude-prod` profiles from that skill are *not* sufficient. +These procedures assume you already have AWS access set up — the `aws-access` claude skill walks new teammates through the one-time setup. Most procedures here need an AWS profile with `ssm:PutParameter` / `ecs:UpdateService` etc. on the target prefix; the read-only `claude-staging` / `claude-prod` profiles from that skill are _not_ sufficient. Conventions used throughout: @@ -68,17 +68,20 @@ This rolls the running tasks one at a time; expect a 1–2 minute window where s ### Validation gate Before promoting to prod, run a synthetic saturating workload on staging: -- Concurrent catalog full reindex (priority 0) -- Simulated user-driven incremental reindex on a different realm (priority 10) + +- Concurrent catalog full reindex (`systemInitiatedPriority`) +- Simulated user-driven incremental reindex on a different realm (`userInitiatedPriority`) - Both for ~5 minutes **Pass criteria:** + - High-priority p95 `tabQueueMs` < 1 s during the burst. - Memory peak < 80 % of allocated. - CPU peak < 80 % of allocated, sustained over a 1-minute window (brief overshoots OK). - Zero 145-second render-timeouts (`SELECT count(*) FROM boxel_index WHERE (diagnostics->>'totalElapsedMs')::int >= 145000`). **Adjustment paths:** + - CPU peak > 80 % sustained → drop `MAX` by 1, retest. - Memory peak > 80 % → drop `HIGH_PRIORITY_MAX` by 1, retest. - Tab-queue wait spike at high priority → investigate manager-side routing; the priority-aware `scoreCandidate` should be picking the right server. @@ -87,14 +90,14 @@ Before promoting to prod, run a synthetic saturating workload on staging: If the ECS task is still 4 vCPU / 8 GB, the recommended values would OOM at `HP_MAX = 8` (memory model: 8 × 836 MB + 2 GB ≈ 8.7 GB → 109 % of 8 GB). Use these instead — modest improvement over today's behaviour, no infra change required: -| Knob | 16 GB task (recommended) | 8 GB task (fallback) | -|---|---:|---:| -| `PRERENDER_PAGE_POOL_MIN` | 2 | 2 | -| `PRERENDER_PAGE_POOL_MAX` | 6 | 4 | -| `PRERENDER_PAGE_POOL_HIGH_PRIORITY_MAX` | 8 | 5 | -| `PRERENDER_HIGH_PRIORITY_THRESHOLD` | 5 | 5 | -| `PRERENDER_POOL_IDLE_CONTRACTION_MS` | 60000 | 60000 | -| `PRERENDER_SHARED_CONTEXT_CAP` | 12 | 8 | +| Knob | 16 GB task (recommended) | 8 GB task (fallback) | +| --------------------------------------- | -----------------------: | -------------------: | +| `PRERENDER_PAGE_POOL_MIN` | 2 | 2 | +| `PRERENDER_PAGE_POOL_MAX` | 6 | 4 | +| `PRERENDER_PAGE_POOL_HIGH_PRIORITY_MAX` | 8 | 5 | +| `PRERENDER_HIGH_PRIORITY_THRESHOLD` | 5 | 5 | +| `PRERENDER_POOL_IDLE_CONTRACTION_MS` | 60000 | 60000 | +| `PRERENDER_SHARED_CONTEXT_CAP` | 12 | 8 | ### Rollback diff --git a/packages/host/app/services/store.ts b/packages/host/app/services/store.ts index 20357df5528..a14cd312ba5 100644 --- a/packages/host/app/services/store.ts +++ b/packages/host/app/services/store.ts @@ -137,22 +137,22 @@ const storeLogger = logger('store'); // // 1. Inside a prerender tab: forward the worker job's priority as-is. // The render-runner injects `__boxelJobPriority` alongside -// `__boxelJobId` on each visit — a priority of 0 is meaningful -// (the originating job is system-initiated background indexing) +// `__boxelJobId` on each visit — a low priority is meaningful +// (the originating job is system-initiated background work) // and must be preserved, not upgraded. Sub-`prerenderModule` // calls fired by the federated search for a `lookupDefinition` // cache miss inherit this priority so they don't outrun the // parent. If `__boxelJobPriority` is missing here (older // render-runner build, test fixture, etc.) treat as 0 — the -// safe default for prerender-context work. +// lowest tier, the safe default for prerender-context work. // // 2. Outside a prerender tab (the host SPA in a real user's browser): -// stamp `userInitiatedPriority` (10). User clicks driving a +// stamp `userInitiatedPriority`. User clicks driving a // search are by definition user-initiated work and should outrank // background indexing on the realm-server's PagePool. Without // this, a user search whose definition lookup misses the modules -// cache would fire its sub-prerender at priority 0 and queue -// behind concurrent indexing fan-out. +// cache would fire its sub-prerender at background priority and +// queue behind concurrent indexing fan-out. // // External (non-host) HTTP callers — anything that doesn't run in // the host SPA's JS runtime — bypass this helper entirely and set diff --git a/packages/realm-server/lib/cron-scheduler.ts b/packages/realm-server/lib/cron-scheduler.ts index bb1229e38fe..f7867574c5a 100644 --- a/packages/realm-server/lib/cron-scheduler.ts +++ b/packages/realm-server/lib/cron-scheduler.ts @@ -1,4 +1,4 @@ -import { logger } from '@cardstack/runtime-common'; +import { logger, systemInitiatedPriority } from '@cardstack/runtime-common'; import * as Sentry from '@sentry/node'; import type { CronJob } from 'cron'; import { enqueueDailyCreditGrant } from '../scripts/daily-credit-grant.ts'; @@ -47,7 +47,7 @@ function startDailyCreditGrantCron(): CronJob | undefined { try { await enqueueDailyCreditGrant({ lowCreditThreshold, - priority: 4, + priority: systemInitiatedPriority, }); } catch (error) { Sentry.captureException(error); diff --git a/packages/realm-server/prerender/manager-app.ts b/packages/realm-server/prerender/manager-app.ts index 313b0f7ab47..988f58476cc 100644 --- a/packages/realm-server/prerender/manager-app.ts +++ b/packages/realm-server/prerender/manager-app.ts @@ -1055,8 +1055,8 @@ export function buildPrerenderManagerApp(options?: { // Priority comes from the worker job (stamped onto the request // attributes by the wire-format threading). Pass through to // scoreCandidate so a high-priority request prefers servers - // without higher-priority pending work. Defaults to 0 (system - // priority) when absent for back-compat with older callers / + // without higher-priority pending work. Defaults to the lowest + // tier (0) when absent for back-compat with older callers / // direct curl. Only accept non-negative safe integers — priority // buckets are integer-keyed, and floats / negatives / values // beyond 2^53 would produce misleading routing comparisons. diff --git a/packages/realm-server/worker-manager.ts b/packages/realm-server/worker-manager.ts index f03db7eaf1c..92c6ef0f7c6 100644 --- a/packages/realm-server/worker-manager.ts +++ b/packages/realm-server/worker-manager.ts @@ -41,8 +41,8 @@ writeSync( import { logger, - userInitiatedPriority, - systemInitiatedPriority, + userInitiatedPrerenderHtmlPriority, + systemInitiatedPrerenderHtmlPriority, query as _query, param, separatedByCommas, @@ -138,7 +138,7 @@ let { }, highPriorityCount: { description: - 'The number of workers that service high priority jobs (user initiated) to start (default 0)', + 'The number of workers that service user-initiated jobs, including user-initiated prerender-html, and nothing below that tier (default 0)', type: 'number', }, allPriorityCount: { @@ -656,11 +656,16 @@ let adapter: PgAdapter; // is set. eventSink.setAdapter(adapter); + // Each pool's minimum priority is a dequeue floor: its workers only + // claim jobs at or above it. The high-priority pool floors at the + // user-initiated prerender-html tier so it serves all user-initiated + // work — prerender-html included — and never system-tier jobs; the + // all-priority pool floors at the lowest tier and serves everything. for (let i = 0; i < highPriorityCount; i++) { - await startWorker(userInitiatedPriority, urlMappings); + await startWorker(userInitiatedPrerenderHtmlPriority, urlMappings); } for (let i = 0; i < allPriorityCount; i++) { - await startWorker(systemInitiatedPriority, urlMappings); + await startWorker(systemInitiatedPrerenderHtmlPriority, urlMappings); } isReady = true; log.info('All workers have been started'); diff --git a/packages/runtime-common/definition-lookup.ts b/packages/runtime-common/definition-lookup.ts index 9720f226b63..cff758de6a6 100644 --- a/packages/runtime-common/definition-lookup.ts +++ b/packages/runtime-common/definition-lookup.ts @@ -296,8 +296,9 @@ export interface PopulateCoordinator { // Public option shape for definition lookup calls. `priority` is forwarded to // the prerender server when a cache miss requires a sub-prerender — same -// numeric scale as worker-job priority (0 = system-initiated background, -// 10 = userInitiatedPriority). Callers in the indexer thread their job +// numeric scale as worker-job priority (`systemInitiatedPriority` for +// background work, `userInitiatedPriority` for user-driven work). Callers in +// the indexer thread their job // priority through here so user-initiated reindex work doesn't silently // downgrade to background priority for its module sub-renders. export interface DefinitionLookupOptions { diff --git a/packages/runtime-common/index-runner/visit-file.ts b/packages/runtime-common/index-runner/visit-file.ts index 34089f4bba5..aeab13db333 100644 --- a/packages/runtime-common/index-runner/visit-file.ts +++ b/packages/runtime-common/index-runner/visit-file.ts @@ -32,8 +32,9 @@ interface VisitFileFusedOptions { jobInfo: JobInfo; // Worker-job priority threaded from `IndexRunner`. Forwarded into // the `prerenderVisit` request so the prerender server can route by - // priority. `0` for system-priority indexing, `10` for user- - // initiated; defaults to `0` when not provided. + // priority. On the tier scale in `queue.ts`: `systemInitiatedPriority` + // for background indexing, `userInitiatedPriority` for user-initiated; + // defaults to the lowest tier (`0`) when not provided. jobPriority?: number; auth: string; // Indexing batch identifier (CS-10758 step 3). Threaded into diff --git a/packages/runtime-common/index.ts b/packages/runtime-common/index.ts index 11893571704..aa563b02584 100644 --- a/packages/runtime-common/index.ts +++ b/packages/runtime-common/index.ts @@ -189,10 +189,11 @@ export interface RenderTimeoutDiagnostics { // join all three stacks for this call. requestId?: string; // Worker-job priority of the request that produced this render. - // Plumbed from the producer side via `Job.priority`. `0` is the - // system-initiated default; `10` is user-initiated. Read in post- - // mortems and in `prerender-queue-snapshot` triage to tell whether a - // stalled render was background or user-priority work. + // Plumbed from the producer side via `Job.priority`, on the tier + // scale defined in `queue.ts` (system tiers `0`/`1` below the user + // tiers `9`/`10`). Read in post-mortems and in + // `prerender-queue-snapshot` triage to tell whether a stalled render + // was background or user-initiated work. priority?: number; // Whether this render landed on a tab that was already bound to its // affinity. `true` = warm tab, fast launch + cached BrowserContext @@ -544,8 +545,8 @@ export type ModulePrerenderArgs = { // Higher priority requests dequeue ahead of lower-priority pending // work on the prerender server (per-tab queues + per-affinity file- // admission semaphore + global render semaphore). No preemption: an - // in-flight low-priority render runs to completion. Defaults to 0 - // when absent (system-priority). + // in-flight low-priority render runs to completion. Defaults to the + // lowest tier (0) when absent. priority?: number; }; diff --git a/packages/runtime-common/prerender-headers.ts b/packages/runtime-common/prerender-headers.ts index dacedb8427f..b2862db2489 100644 --- a/packages/runtime-common/prerender-headers.ts +++ b/packages/runtime-common/prerender-headers.ts @@ -68,10 +68,12 @@ export function sanitizeConsumingRealmHeader( // wrapper → realm-server's `handle-search`. The realm-server forwards // it into `LookupContext.priority` so any sub-`prerenderModule` fired // by `CachingDefinitionLookup` for a missed definition inherits the -// originating job's priority instead of silently dropping to 0. +// originating job's priority instead of silently dropping to the +// lowest tier. // -// Same scale as worker-job priority — 0 = system-initiated, 10 = -// userInitiatedPriority — small non-negative integers. +// Same scale as worker-job priority — `systemInitiatedPrerenderHtmlPriority` +// (the lowest tier) up to `userInitiatedPriority` — small non-negative +// integers. export const X_BOXEL_JOB_PRIORITY_HEADER = 'x-boxel-job-priority'; // Sanitize the inbound job-priority header value. The producer side diff --git a/packages/runtime-common/queue.ts b/packages/runtime-common/queue.ts index 0d99215d622..ee9d57cffca 100644 --- a/packages/runtime-common/queue.ts +++ b/packages/runtime-common/queue.ts @@ -1,8 +1,32 @@ import type { PgPrimitive } from './index.ts'; import type { Deferred } from './deferred.ts'; -export const systemInitiatedPriority = 0; +// Job priority is a worker-reservation floor, not an ordering: a worker +// dequeues only jobs whose priority is at or above its configured +// minimum, oldest-first among those. A higher number therefore reserves +// a job to more (and to more-dedicated) worker pools. +// +// The tiers: +// +// | priority | job | +// | -------- | ---------------------------------- | +// | 10 | any user-initiated job | +// | 9 | user-initiated prerender-html | +// | 1 | any system-initiated job | +// | 0 | system-initiated prerender-html | +// +// The prerender-html tiers sit one notch below their initiator tier so +// a pool's floor can take in an initiator's plain jobs with or without +// its (orders-of-magnitude slower) HTML rendering work. Two pools +// exist: the high-priority pool floors at +// `userInitiatedPrerenderHtmlPriority`, serving all user-initiated +// work — prerender-html included — and never system-tier jobs; the +// all-priority pool floors at `systemInitiatedPrerenderHtmlPriority` +// and serves everything. export const userInitiatedPriority = 10; +export const userInitiatedPrerenderHtmlPriority = 9; +export const systemInitiatedPriority = 1; +export const systemInitiatedPrerenderHtmlPriority = 0; export interface QueueRunner { start: () => Promise; @@ -97,7 +121,9 @@ export function getQueueJobCoalesceHandler(jobType: string) { export function normalizeQueueJobSpec(args: QueuePublishRequest): QueueJobSpec { return { ...args, - priority: args.priority ?? 0, + // A publish that doesn't state a priority is background work, so it + // takes the system-initiated tier. + priority: args.priority ?? systemInitiatedPriority, }; } diff --git a/packages/runtime-common/worker.ts b/packages/runtime-common/worker.ts index 54c4891bafb..712460652b3 100644 --- a/packages/runtime-common/worker.ts +++ b/packages/runtime-common/worker.ts @@ -55,9 +55,9 @@ export interface JobInfo extends JSONTypes.Object { jobId: number; reservationId: number; // Priority of the job this handler is running for, threaded from - // the queue row. `0` is the system default; user-initiated jobs use - // `10`. Forwarded into the prerenderer call chain so the prerender - // server can route by priority. Required because + // the queue row (the tier constants live in `queue.ts`). Forwarded + // into the prerenderer call chain so the prerender server can route + // by priority. Required because // `JSONTypes.Object`'s index signature doesn't accept `undefined`; // the queue layer always supplies the value from the row, and tests // / non-job callers that mint a synthetic JobInfo can pass