From 81b8caebacd65c58133e191bf1e7dc0ec11bd02f Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 00:12:00 -0800 Subject: [PATCH 01/65] =?UTF-8?q?feat(events):=20phase=200.1+0.3=20?= =?UTF-8?q?=E2=80=94=20event()=20primitive=20+=20ResourceCatalog=20extensi?= =?UTF-8?q?ons?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Create event() function in SDK (packages/trigger-sdk/src/v3/events.ts) with EventDefinition interface, type-safe schema inference, publish/batchPublish stubs - Add EventManifest schema to core (id, version, description) - Add onEvent field to TaskMetadata for event subscriptions - Add events array to WorkerManifest - Extend ResourceCatalog interface with registerEventMetadata, getEvent, listEventManifests, getTasksForEvent - Implement in StandardResourceCatalog with event→tasks reverse index - Add noop implementations in NoopResourceCatalog - Wire up ResourceCatalogAPI proxy methods - Export event, EventDefinition, and related types from SDK Co-Authored-By: Claude Opus 4.6 --- .../core/src/v3/resource-catalog/catalog.ts | 12 +- .../core/src/v3/resource-catalog/index.ts | 20 ++- .../resource-catalog/noopResourceCatalog.ts | 20 ++- .../standardResourceCatalog.ts | 51 +++++- packages/core/src/v3/schemas/build.ts | 3 +- packages/core/src/v3/schemas/schemas.ts | 13 ++ packages/trigger-sdk/src/v3/events.ts | 157 ++++++++++++++++++ packages/trigger-sdk/src/v3/index.ts | 1 + 8 files changed, 270 insertions(+), 7 deletions(-) create mode 100644 packages/trigger-sdk/src/v3/events.ts diff --git a/packages/core/src/v3/resource-catalog/catalog.ts b/packages/core/src/v3/resource-catalog/catalog.ts index 9ad14dd8484..ecafc8458f9 100644 --- a/packages/core/src/v3/resource-catalog/catalog.ts +++ b/packages/core/src/v3/resource-catalog/catalog.ts @@ -1,6 +1,12 @@ -import { QueueManifest, TaskManifest, WorkerManifest } from "../schemas/index.js"; +import { EventManifest, QueueManifest, TaskManifest, WorkerManifest } from "../schemas/index.js"; import { TaskMetadataWithFunctions, TaskSchema } from "../types/index.js"; +export interface EventMetadata { + id: string; + version: string; + description?: string; +} + export interface ResourceCatalog { setCurrentFileContext(filePath: string, entryPoint: string): void; clearCurrentFileContext(): void; @@ -14,4 +20,8 @@ export interface ResourceCatalog { registerQueueMetadata(queue: QueueManifest): void; listQueueManifests(): Array; getTaskSchema(id: string): TaskSchema | undefined; + registerEventMetadata(event: EventMetadata): void; + getEvent(id: string): EventMetadata | undefined; + listEventManifests(): Array; + getTasksForEvent(eventId: string): Array; } diff --git a/packages/core/src/v3/resource-catalog/index.ts b/packages/core/src/v3/resource-catalog/index.ts index a564648fcc3..49f7a6460b9 100644 --- a/packages/core/src/v3/resource-catalog/index.ts +++ b/packages/core/src/v3/resource-catalog/index.ts @@ -1,9 +1,9 @@ const API_NAME = "resource-catalog"; -import { QueueManifest, TaskManifest, WorkerManifest } from "../schemas/index.js"; +import { EventManifest, QueueManifest, TaskManifest, WorkerManifest } from "../schemas/index.js"; import { TaskMetadataWithFunctions, TaskSchema } from "../types/index.js"; import { getGlobal, registerGlobal, unregisterGlobal } from "../utils/globals.js"; -import { type ResourceCatalog } from "./catalog.js"; +import { type EventMetadata, type ResourceCatalog } from "./catalog.js"; import { NoopResourceCatalog } from "./noopResourceCatalog.js"; const NOOP_RESOURCE_CATALOG = new NoopResourceCatalog(); @@ -77,6 +77,22 @@ export class ResourceCatalogAPI { return this.#getCatalog().listQueueManifests(); } + public registerEventMetadata(event: EventMetadata): void { + this.#getCatalog().registerEventMetadata(event); + } + + public getEvent(id: string): EventMetadata | undefined { + return this.#getCatalog().getEvent(id); + } + + public listEventManifests(): Array { + return this.#getCatalog().listEventManifests(); + } + + public getTasksForEvent(eventId: string): Array { + return this.#getCatalog().getTasksForEvent(eventId); + } + #getCatalog(): ResourceCatalog { return getGlobal(API_NAME) ?? NOOP_RESOURCE_CATALOG; } diff --git a/packages/core/src/v3/resource-catalog/noopResourceCatalog.ts b/packages/core/src/v3/resource-catalog/noopResourceCatalog.ts index 53a953393aa..a48b3773152 100644 --- a/packages/core/src/v3/resource-catalog/noopResourceCatalog.ts +++ b/packages/core/src/v3/resource-catalog/noopResourceCatalog.ts @@ -1,6 +1,6 @@ -import { QueueManifest, TaskManifest, WorkerManifest } from "../schemas/index.js"; +import { EventManifest, QueueManifest, TaskManifest, WorkerManifest } from "../schemas/index.js"; import { TaskMetadataWithFunctions, TaskSchema } from "../types/index.js"; -import { ResourceCatalog } from "./catalog.js"; +import { type EventMetadata, ResourceCatalog } from "./catalog.js"; export class NoopResourceCatalog implements ResourceCatalog { registerTaskMetadata(task: TaskMetadataWithFunctions): void { @@ -54,4 +54,20 @@ export class NoopResourceCatalog implements ResourceCatalog { listQueueManifests(): Array { return []; } + + registerEventMetadata(event: EventMetadata): void { + // noop + } + + getEvent(id: string): EventMetadata | undefined { + return undefined; + } + + listEventManifests(): Array { + return []; + } + + getTasksForEvent(eventId: string): Array { + return []; + } } diff --git a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts index 3d2f1cadfd3..88ce269d29f 100644 --- a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts +++ b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts @@ -1,4 +1,5 @@ import { + EventManifest, TaskFileMetadata, TaskMetadata, TaskManifest, @@ -6,7 +7,7 @@ import { QueueManifest, } from "../schemas/index.js"; import { TaskMetadataWithFunctions, TaskSchema } from "../types/index.js"; -import { ResourceCatalog } from "./catalog.js"; +import { type EventMetadata, ResourceCatalog } from "./catalog.js"; export class StandardResourceCatalog implements ResourceCatalog { private _taskSchemas: Map = new Map(); @@ -15,6 +16,8 @@ export class StandardResourceCatalog implements ResourceCatalog { private _taskFileMetadata: Map = new Map(); private _currentFileContext?: Omit; private _queueMetadata: Map = new Map(); + private _eventMetadata: Map = new Map(); + private _eventToTasks: Map> = new Map(); setCurrentFileContext(filePath: string, entryPoint: string) { this._currentFileContext = { filePath, entryPoint }; @@ -77,6 +80,11 @@ export class StandardResourceCatalog implements ResourceCatalog { if (schema) { this._taskSchemas.set(task.id, schema); } + + // Register event→task reverse index if task subscribes to an event + if (metadata.onEvent) { + this.registerTaskMetadataForEvent(task.id, metadata.onEvent); + } } updateTaskMetadata(id: string, updates: Partial): void { @@ -165,6 +173,47 @@ export class StandardResourceCatalog implements ResourceCatalog { return this._taskMetadata.has(id); } + registerEventMetadata(event: EventMetadata): void { + this._eventMetadata.set(event.id, event); + } + + getEvent(id: string): EventMetadata | undefined { + return this._eventMetadata.get(id); + } + + listEventManifests(): Array { + return Array.from(this._eventMetadata.values()).map((event) => ({ + id: event.id, + version: event.version, + description: event.description, + })); + } + + getTasksForEvent(eventId: string): Array { + const taskIds = this._eventToTasks.get(eventId); + if (!taskIds) { + return []; + } + + const tasks: Array = []; + for (const taskId of taskIds) { + const task = this.getTask(taskId); + if (task) { + tasks.push(task); + } + } + return tasks; + } + + registerTaskMetadataForEvent(taskId: string, eventId: string): void { + let taskSet = this._eventToTasks.get(eventId); + if (!taskSet) { + taskSet = new Set(); + this._eventToTasks.set(eventId, taskSet); + } + taskSet.add(taskId); + } + disable() { // noop } diff --git a/packages/core/src/v3/schemas/build.ts b/packages/core/src/v3/schemas/build.ts index 8eb97f0f358..dae96d82aaf 100644 --- a/packages/core/src/v3/schemas/build.ts +++ b/packages/core/src/v3/schemas/build.ts @@ -1,6 +1,6 @@ import { z } from "zod"; import { ConfigManifest } from "./config.js"; -import { QueueManifest, TaskFile, TaskManifest } from "./schemas.js"; +import { EventManifest, QueueManifest, TaskFile, TaskManifest } from "./schemas.js"; export const BuildExternal = z.object({ name: z.string(), @@ -87,6 +87,7 @@ export const WorkerManifest = z.object({ configPath: z.string(), tasks: TaskManifest.array(), queues: QueueManifest.array().optional(), + events: EventManifest.array().optional(), workerEntryPoint: z.string(), controllerEntryPoint: z.string().optional(), loaderEntryPoint: z.string().optional(), diff --git a/packages/core/src/v3/schemas/schemas.ts b/packages/core/src/v3/schemas/schemas.ts index 233068c0b7b..27191ea722f 100644 --- a/packages/core/src/v3/schemas/schemas.ts +++ b/packages/core/src/v3/schemas/schemas.ts @@ -174,6 +174,17 @@ export const QueueManifest = z.object({ export type QueueManifest = z.infer; +export const EventManifest = z.object({ + /** Unique event identifier (e.g. "order.created") */ + id: z.string(), + /** Schema version */ + version: z.string().default("1.0"), + /** Optional human-readable description */ + description: z.string().optional(), +}); + +export type EventManifest = z.infer; + export const ScheduleMetadata = z.object({ cron: z.string(), timezone: z.string(), @@ -190,6 +201,8 @@ const taskMetadata = { schedule: ScheduleMetadata.optional(), maxDuration: z.number().optional(), payloadSchema: z.unknown().optional(), + /** Event ID that this task subscribes to (set when task uses `on: someEvent`) */ + onEvent: z.string().optional(), }; export const TaskMetadata = z.object(taskMetadata); diff --git a/packages/trigger-sdk/src/v3/events.ts b/packages/trigger-sdk/src/v3/events.ts new file mode 100644 index 00000000000..9b212c87ed8 --- /dev/null +++ b/packages/trigger-sdk/src/v3/events.ts @@ -0,0 +1,157 @@ +import { + getSchemaParseFn, + resourceCatalog, +} from "@trigger.dev/core/v3"; +import type { + inferSchemaIn, + SchemaParseFn, + TaskSchema, +} from "@trigger.dev/core/v3"; + +// Re-use TaskSchema which is the Schema type alias from core +type Schema = TaskSchema; + +// ---- Types ---- + +/** Options for defining an event */ +export interface EventOptions { + /** Unique event identifier (e.g. "order.created") */ + id: TId; + /** Optional schema for payload validation. Supports Zod, Valibot, ArkType, etc. */ + schema?: TSchema; + /** Optional human-readable description */ + description?: string; + /** Schema version (defaults to "1.0") */ + version?: string; +} + +/** Options for publishing an event */ +export interface PublishEventOptions { + /** Idempotency key to prevent duplicate publications */ + idempotencyKey?: string; + /** Delay before triggering subscribers */ + delay?: string | Date; + /** Tags to attach to the generated runs */ + tags?: string[]; + /** Metadata to attach to the generated runs */ + metadata?: Record; +} + +/** Result of publishing an event */ +export interface PublishEventResult { + /** Unique ID of the published event instance */ + id: string; + /** Runs created for each subscriber */ + runs: Array<{ + taskIdentifier: string; + runId: string; + }>; +} + +/** An event definition that can be published and subscribed to */ +export interface EventDefinition { + /** The event identifier */ + readonly id: TId; + /** The schema version */ + readonly version: string; + /** Optional description */ + readonly description?: string; + /** The parse function derived from the schema, if provided */ + readonly _parseFn?: SchemaParseFn; + + /** Publish a single event payload to all subscribers */ + publish(payload: TPayload, options?: PublishEventOptions): Promise; + + /** Publish multiple event payloads in a batch */ + batchPublish( + items: Array<{ payload: TPayload; options?: PublishEventOptions }> + ): Promise>; +} + +/** Any event definition (for generic constraints) */ +export type AnyEventDefinition = EventDefinition; + +/** Extract the payload type from an EventDefinition */ +export type EventDefinitionPayload = + T extends EventDefinition ? TPayload : never; + +/** Extract the ID type from an EventDefinition */ +export type EventDefinitionId = + T extends EventDefinition ? TId : never; + +// ---- Implementation ---- + +// Overload: with schema — payload type is inferred from schema +export function createEvent( + options: EventOptions & { schema: TSchema } +): EventDefinition>; + +// Overload: without schema — payload type is unknown +export function createEvent( + options: EventOptions +): EventDefinition; + +// Overload: without schema (no schema field at all) +export function createEvent( + options: Omit, "schema"> +): EventDefinition; + +// Implementation +export function createEvent( + options: EventOptions +): EventDefinition { + const { id, schema, description, version = "1.0" } = options; + + // Build the parse function if a schema is provided + let parseFn: SchemaParseFn | undefined; + if (schema) { + parseFn = getSchemaParseFn(schema); + } + + const eventDef: EventDefinition = { + id, + version, + description, + _parseFn: parseFn, + + // Publish will be connected in Phase 0.8 when API endpoints exist + async publish(_payload, _options) { + throw new Error( + `event("${id}").publish() is not yet connected to the API. ` + + `The publish endpoint will be available after the backend is deployed.` + ); + }, + + async batchPublish(_items) { + throw new Error( + `event("${id}").batchPublish() is not yet connected to the API. ` + + `The publish endpoint will be available after the backend is deployed.` + ); + }, + }; + + // Register event metadata in the resource catalog + resourceCatalog.registerEventMetadata({ + id, + version, + description, + }); + + // Mark as event for runtime detection + // @ts-expect-error - adding symbol property + eventDef[Symbol.for("trigger.dev/event")] = true; + + return eventDef; +} + +/** The public `event()` function for defining events */ +export const event = createEvent; + +/** Check if a value is an EventDefinition */ +export function isEventDefinition(value: unknown): value is AnyEventDefinition { + return ( + typeof value === "object" && + value !== null && + (value as any)[Symbol.for("trigger.dev/event")] === true + ); +} diff --git a/packages/trigger-sdk/src/v3/index.ts b/packages/trigger-sdk/src/v3/index.ts index 43ee41e6e52..bd8f3340b2f 100644 --- a/packages/trigger-sdk/src/v3/index.ts +++ b/packages/trigger-sdk/src/v3/index.ts @@ -1,5 +1,6 @@ export * from "./cache.js"; export * from "./config.js"; +export * from "./events.js"; export { retry, type RetryOptions } from "./retry.js"; export { queue, BatchTriggerError } from "./shared.js"; export * from "./tasks.js"; From e165988c63d72bdd539143735868284aca55edb1 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 00:14:04 -0800 Subject: [PATCH 02/65] =?UTF-8?q?feat(events):=20phase=200.2=20=E2=80=94?= =?UTF-8?q?=20extend=20TaskResource=20for=20event=20subscriptions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add EventSource interface to core types (minimal event reference) - Add TaskOptionsWithEvent type for tasks subscribing via `on: event` - Add createTask overload for event-subscribed tasks - Pass onEvent to registerTaskMetadata when task has `on` property - Export EventSource, TaskOptionsWithEvent from SDK Usage: const orderCreated = event({ id: "order.created", schema: z.object({...}) }); const sendEmail = task({ id: "send-email", on: orderCreated, run: async (payload) => { /* payload is typed from event schema */ }, }); Co-Authored-By: Claude Opus 4.6 --- packages/core/src/v3/types/tasks.ts | 17 +++++++++++++++++ packages/trigger-sdk/src/v3/shared.ts | 19 +++++++++++++++++++ packages/trigger-sdk/src/v3/tasks.ts | 4 ++++ 3 files changed, 40 insertions(+) diff --git a/packages/core/src/v3/types/tasks.ts b/packages/core/src/v3/types/tasks.ts index 3b8b2e9ecdd..5494aea4459 100644 --- a/packages/core/src/v3/types/tasks.ts +++ b/packages/core/src/v3/types/tasks.ts @@ -35,6 +35,12 @@ export type Queue = QueueOptions; export type TaskSchema = Schema; export type { inferSchemaIn } from "./schemas.js"; +/** Minimal interface for an event definition used in task subscription */ +export interface EventSource { + readonly id: string; + readonly version: string; +} + export class SubtaskUnwrapError extends Error { public readonly taskId: string; public readonly runId: string; @@ -399,6 +405,17 @@ export type TaskWithToolOptions< parameters: TParameters; }; +/** Task options when subscribing to an event via `on` */ +export type TaskOptionsWithEvent< + TIdentifier extends string, + TPayload, + TOutput = unknown, + TInitOutput extends InitOutput = any, +> = CommonTaskOptions & { + /** The event to subscribe this task to */ + on: EventSource; +}; + declare const __output: unique symbol; declare const __payload: unique symbol; type BrandRun = { [__output]: O; [__payload]: P }; diff --git a/packages/trigger-sdk/src/v3/shared.ts b/packages/trigger-sdk/src/v3/shared.ts index 7b7fa1b9797..8ab46bf4a32 100644 --- a/packages/trigger-sdk/src/v3/shared.ts +++ b/packages/trigger-sdk/src/v3/shared.ts @@ -78,7 +78,9 @@ import type { Task, TaskBatchOutputHandle, TaskIdentifier, + EventSource, TaskOptions, + TaskOptionsWithEvent, TaskOptionsWithSchema, TaskOutput, TaskOutputHandle, @@ -103,6 +105,7 @@ export type { BatchResult, BatchRunHandle, BatchTriggerOptions, + EventSource, Queue, RunHandle, RunHandleOutput, @@ -113,6 +116,7 @@ export type { TaskFromIdentifier, TaskIdentifier, TaskOptions, + TaskOptionsWithEvent, TaskOutput, TaskOutputHandle, TaskPayload, @@ -137,6 +141,16 @@ export function queue(options: QueueOptions): Queue { return options; } +// Overload: when subscribing to an event via `on` +export function createTask< + TIdentifier extends string, + TPayload, + TOutput = unknown, + TInitOutput extends InitOutput = any, +>( + params: TaskOptionsWithEvent +): Task; + // Overload: when payloadSchema is provided, payload type should be any export function createTask< TIdentifier extends string, @@ -165,6 +179,7 @@ export function createTask< params: | TaskOptions | TaskOptionsWithSchema + | TaskOptionsWithEvent ): Task | Task { const task: Task = { id: params.id, @@ -229,6 +244,9 @@ export function createTask< registerTaskLifecycleHooks(params.id, params); + // Extract onEvent from the params if this task subscribes to an event + const onEvent = "on" in params && params.on ? (params.on as EventSource).id : undefined; + resourceCatalog.registerTaskMetadata({ id: params.id, description: params.description, @@ -237,6 +255,7 @@ export function createTask< machine: typeof params.machine === "string" ? { preset: params.machine } : params.machine, maxDuration: params.maxDuration, payloadSchema: params.jsonSchema, + onEvent, fns: { run: params.run, }, diff --git a/packages/trigger-sdk/src/v3/tasks.ts b/packages/trigger-sdk/src/v3/tasks.ts index 75b7e85e625..64e46262c08 100644 --- a/packages/trigger-sdk/src/v3/tasks.ts +++ b/packages/trigger-sdk/src/v3/tasks.ts @@ -29,11 +29,13 @@ import type { BatchItem, BatchResult, BatchRunHandle, + EventSource, Queue, RunHandle, Task, TaskIdentifier, TaskOptions, + TaskOptionsWithEvent, TaskOutput, TaskPayload, TriggerOptions, @@ -50,11 +52,13 @@ export type { BatchItem, BatchResult, BatchRunHandle, + EventSource, Queue, RunHandle, Task, TaskIdentifier, TaskOptions, + TaskOptionsWithEvent, TaskOutput, TaskPayload, TriggerOptions, From 7a78e1c5a9047c0e65779ea707c7cf1838325752 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 00:18:25 -0800 Subject: [PATCH 03/65] =?UTF-8?q?feat(events):=20phase=200.4=20=E2=80=94?= =?UTF-8?q?=20EventDefinition=20+=20EventSubscription=20database=20models?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add Prisma models for EventDefinition and EventSubscription, plus onEventSlug column on BackgroundWorkerTask. Includes indexes and unique constraints for efficient event routing lookups. Co-Authored-By: Claude Opus 4.6 --- .../migration.sql | 65 ++++++++++++++++++ .../database/prisma/schema.prisma | 67 +++++++++++++++++-- 2 files changed, 127 insertions(+), 5 deletions(-) create mode 100644 internal-packages/database/prisma/migrations/20260227081612_add_event_definition_and_subscription/migration.sql diff --git a/internal-packages/database/prisma/migrations/20260227081612_add_event_definition_and_subscription/migration.sql b/internal-packages/database/prisma/migrations/20260227081612_add_event_definition_and_subscription/migration.sql new file mode 100644 index 00000000000..0afc56324aa --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260227081612_add_event_definition_and_subscription/migration.sql @@ -0,0 +1,65 @@ +-- AlterTable +ALTER TABLE "public"."BackgroundWorkerTask" ADD COLUMN "onEventSlug" TEXT; + +-- CreateTable +CREATE TABLE "public"."EventDefinition" ( + "id" TEXT NOT NULL, + "slug" TEXT NOT NULL, + "version" TEXT NOT NULL DEFAULT '1.0', + "description" TEXT, + "schema" JSONB, + "projectId" TEXT NOT NULL, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updatedAt" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "EventDefinition_pkey" PRIMARY KEY ("id") +); + +-- CreateTable +CREATE TABLE "public"."EventSubscription" ( + "id" TEXT NOT NULL, + "eventDefinitionId" TEXT NOT NULL, + "taskSlug" TEXT NOT NULL, + "environmentId" TEXT NOT NULL, + "projectId" TEXT NOT NULL, + "workerId" TEXT NOT NULL, + "filter" JSONB, + "pattern" TEXT, + "consumerGroup" TEXT, + "enabled" BOOLEAN NOT NULL DEFAULT true, + "priority" INTEGER NOT NULL DEFAULT 0, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updatedAt" TIMESTAMP(3) NOT NULL, + + CONSTRAINT "EventSubscription_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE INDEX "EventDefinition_projectId_slug_idx" ON "public"."EventDefinition"("projectId", "slug"); + +-- CreateIndex +CREATE UNIQUE INDEX "EventDefinition_projectId_slug_version_key" ON "public"."EventDefinition"("projectId", "slug", "version"); + +-- CreateIndex +CREATE INDEX "EventSubscription_eventDefinitionId_environmentId_enabled_idx" ON "public"."EventSubscription"("eventDefinitionId", "environmentId", "enabled"); + +-- CreateIndex +CREATE INDEX "EventSubscription_projectId_environmentId_idx" ON "public"."EventSubscription"("projectId", "environmentId"); + +-- CreateIndex +CREATE UNIQUE INDEX "EventSubscription_eventDefinitionId_taskSlug_environmentId_key" ON "public"."EventSubscription"("eventDefinitionId", "taskSlug", "environmentId"); + +-- AddForeignKey +ALTER TABLE "public"."EventDefinition" ADD CONSTRAINT "EventDefinition_projectId_fkey" FOREIGN KEY ("projectId") REFERENCES "public"."Project"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "public"."EventSubscription" ADD CONSTRAINT "EventSubscription_eventDefinitionId_fkey" FOREIGN KEY ("eventDefinitionId") REFERENCES "public"."EventDefinition"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "public"."EventSubscription" ADD CONSTRAINT "EventSubscription_environmentId_fkey" FOREIGN KEY ("environmentId") REFERENCES "public"."RuntimeEnvironment"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "public"."EventSubscription" ADD CONSTRAINT "EventSubscription_projectId_fkey" FOREIGN KEY ("projectId") REFERENCES "public"."Project"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "public"."EventSubscription" ADD CONSTRAINT "EventSubscription_workerId_fkey" FOREIGN KEY ("workerId") REFERENCES "public"."BackgroundWorker"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index e28b951f05d..9e6662a8e36 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -343,6 +343,7 @@ model RuntimeEnvironment { waitpointTags WaitpointTag[] BulkActionGroup BulkActionGroup[] customerQueries CustomerQuery[] + eventSubscriptions EventSubscription[] @@unique([projectId, slug, orgMemberId]) @@unique([projectId, shortcode]) @@ -413,6 +414,8 @@ model Project { buildSettings Json? taskScheduleInstances TaskScheduleInstance[] metricsDashboards MetricsDashboard[] + eventDefinitions EventDefinition[] + eventSubscriptions EventSubscription[] } enum ProjectVersion { @@ -487,11 +490,12 @@ model BackgroundWorker { createdAt DateTime @default(now()) updatedAt DateTime @updatedAt - tasks BackgroundWorkerTask[] - attempts TaskRunAttempt[] - lockedRuns TaskRun[] - files BackgroundWorkerFile[] - queues TaskQueue[] + tasks BackgroundWorkerTask[] + attempts TaskRunAttempt[] + lockedRuns TaskRun[] + files BackgroundWorkerFile[] + queues TaskQueue[] + eventSubscriptions EventSubscription[] deployment WorkerDeployment? @@ -569,6 +573,8 @@ model BackgroundWorkerTask { payloadSchema Json? + onEventSlug String? + @@unique([workerId, slug]) // Quick lookup of task identifiers @@index([projectId, slug]) @@ -580,6 +586,57 @@ enum TaskTriggerSource { SCHEDULED } +model EventDefinition { + id String @id @default(cuid()) + slug String // "order.created" + version String @default("1.0") + + description String? + schema Json? // JSON Schema of the payload + + project Project @relation(fields: [projectId], references: [id], onDelete: Cascade, onUpdate: Cascade) + projectId String + + subscriptions EventSubscription[] + + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + @@unique([projectId, slug, version]) + @@index([projectId, slug]) +} + +model EventSubscription { + id String @id @default(cuid()) + + eventDefinition EventDefinition @relation(fields: [eventDefinitionId], references: [id], onDelete: Cascade, onUpdate: Cascade) + eventDefinitionId String + + taskSlug String // "send-confirmation" + + environment RuntimeEnvironment @relation(fields: [environmentId], references: [id], onDelete: Cascade, onUpdate: Cascade) + environmentId String + + project Project @relation(fields: [projectId], references: [id], onDelete: Cascade, onUpdate: Cascade) + projectId String + + worker BackgroundWorker @relation(fields: [workerId], references: [id], onDelete: Cascade, onUpdate: Cascade) + workerId String + + filter Json? // EventFilter (Phase 2) + pattern String? // Wildcard pattern (Phase 2) + consumerGroup String? // Consumer group name (Phase 5) + enabled Boolean @default(true) + priority Int @default(0) + + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + + @@unique([eventDefinitionId, taskSlug, environmentId]) + @@index([eventDefinitionId, environmentId, enabled]) + @@index([projectId, environmentId]) +} + model TaskRun { id String @id @default(cuid()) From 243fa91c340dabcf66bdd2b76dc20f92340ee8db Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 00:27:10 -0800 Subject: [PATCH 04/65] =?UTF-8?q?feat(events):=20phase=200.5=20=E2=80=94?= =?UTF-8?q?=20register=20events=20and=20subscriptions=20during=20deploy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire the event pipeline end-to-end from index workers through to the database: - Add onEvent to TaskResource, events to BackgroundWorkerMetadata - Include listEventManifests() in both dev and managed index workers - Pass events through devSupervisor and managed-index-controller - Upsert EventDefinition and EventSubscription during worker creation - Set onEventSlug on BackgroundWorkerTask records - Disable stale subscriptions from previous deploys - Fix EventManifest.version to required string (avoid Zod input/output type mismatch in tshy composite builds) Co-Authored-By: Claude Opus 4.6 --- .../services/createBackgroundWorker.server.ts | 120 ++++++++++++++++++ packages/cli-v3/src/dev/devSupervisor.ts | 1 + .../src/entryPoints/dev-index-worker.ts | 1 + .../entryPoints/managed-index-controller.ts | 1 + .../src/entryPoints/managed-index-worker.ts | 1 + packages/core/src/v3/schemas/resources.ts | 4 +- packages/core/src/v3/schemas/schemas.ts | 2 +- 7 files changed, 128 insertions(+), 2 deletions(-) diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index 2938164b74b..a60f960f3fd 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -2,6 +2,7 @@ import { BackgroundWorkerMetadata, BackgroundWorkerSourceFileMetadata, CreateBackgroundWorkerRequestBody, + EventManifest, QueueManifest, TaskResource, } from "@trigger.dev/core/v3"; @@ -216,6 +217,9 @@ export async function createWorkerResources( // Create the tasks await createWorkerTasks(metadata, queues, worker, environment, prisma, tasksToBackgroundFiles); + + // Register events and subscriptions + await syncWorkerEvents(metadata, worker, environment, prisma); } async function createWorkerTasks( @@ -282,6 +286,7 @@ async function createWorkerTask( maxDurationInSeconds: task.maxDuration ? clampMaxDuration(task.maxDuration) : null, queueId: queue.id, payloadSchema: task.payloadSchema as any, + onEventSlug: task.onEvent ?? null, }, }); } catch (error) { @@ -322,6 +327,121 @@ async function createWorkerTask( } } +async function syncWorkerEvents( + metadata: BackgroundWorkerMetadata, + worker: BackgroundWorker, + environment: AuthenticatedEnvironment, + prisma: PrismaClientOrTransaction +) { + // 1. Upsert EventDefinitions from the manifest + const eventDefinitions = new Map(); // slug → EventDefinition.id + + if (metadata.events && metadata.events.length > 0) { + for (const event of metadata.events) { + const eventDef = await prisma.eventDefinition.upsert({ + where: { + projectId_slug_version: { + projectId: worker.projectId, + slug: event.id, + version: event.version ?? "1.0", + }, + }, + create: { + slug: event.id, + version: event.version ?? "1.0", + description: event.description, + projectId: worker.projectId, + }, + update: { + description: event.description, + }, + }); + + eventDefinitions.set(event.id, eventDef.id); + } + } + + // 2. Find tasks that subscribe to events (have onEvent set) + const tasksWithEvents = metadata.tasks.filter((t) => t.onEvent); + + // 3. Upsert EventSubscriptions for each task with onEvent + const activeSubscriptionIds = new Set(); + + for (const task of tasksWithEvents) { + if (!task.onEvent) continue; + + // Ensure the EventDefinition exists (it may have been defined in a different project or not in this manifest) + let eventDefId = eventDefinitions.get(task.onEvent); + + if (!eventDefId) { + // The event might already exist in the database from a previous deploy + const existingDef = await prisma.eventDefinition.findFirst({ + where: { + projectId: worker.projectId, + slug: task.onEvent, + }, + orderBy: { + createdAt: "desc", + }, + }); + + if (existingDef) { + eventDefId = existingDef.id; + } else { + // Auto-create a basic EventDefinition for events referenced by tasks but not explicitly defined + const newDef = await prisma.eventDefinition.create({ + data: { + slug: task.onEvent, + version: "1.0", + projectId: worker.projectId, + }, + }); + eventDefId = newDef.id; + eventDefinitions.set(task.onEvent, eventDefId); + } + } + + const subscription = await prisma.eventSubscription.upsert({ + where: { + eventDefinitionId_taskSlug_environmentId: { + eventDefinitionId: eventDefId, + taskSlug: task.id, + environmentId: environment.id, + }, + }, + create: { + eventDefinitionId: eventDefId, + taskSlug: task.id, + environmentId: environment.id, + projectId: worker.projectId, + workerId: worker.id, + enabled: true, + }, + update: { + workerId: worker.id, + enabled: true, + }, + }); + + activeSubscriptionIds.add(subscription.id); + } + + // 4. Disable subscriptions from previous workers that are no longer active + // (tasks that stopped subscribing to events in this deploy) + await prisma.eventSubscription.updateMany({ + where: { + projectId: worker.projectId, + environmentId: environment.id, + id: { + notIn: Array.from(activeSubscriptionIds), + }, + }, + data: { + enabled: false, + }, + }); +} + async function createWorkerQueues( metadata: BackgroundWorkerMetadata, worker: BackgroundWorker, diff --git a/packages/cli-v3/src/dev/devSupervisor.ts b/packages/cli-v3/src/dev/devSupervisor.ts index 67da7e59458..e9e80053dd5 100644 --- a/packages/cli-v3/src/dev/devSupervisor.ts +++ b/packages/cli-v3/src/dev/devSupervisor.ts @@ -227,6 +227,7 @@ class DevSupervisor implements WorkerRuntime { cliPackageVersion: manifest.cliPackageVersion, tasks: backgroundWorker.manifest.tasks, queues: backgroundWorker.manifest.queues, + events: backgroundWorker.manifest.events, contentHash: manifest.contentHash, sourceFiles, runtime: backgroundWorker.manifest.runtime, diff --git a/packages/cli-v3/src/entryPoints/dev-index-worker.ts b/packages/cli-v3/src/entryPoints/dev-index-worker.ts index da5c6ee7508..263c4053886 100644 --- a/packages/cli-v3/src/entryPoints/dev-index-worker.ts +++ b/packages/cli-v3/src/entryPoints/dev-index-worker.ts @@ -154,6 +154,7 @@ await sendMessageInCatalog( manifest: { tasks, queues: resourceCatalog.listQueueManifests(), + events: resourceCatalog.listEventManifests(), configPath: buildManifest.configPath, runtime: buildManifest.runtime, runtimeVersion: detectRuntimeVersion(), diff --git a/packages/cli-v3/src/entryPoints/managed-index-controller.ts b/packages/cli-v3/src/entryPoints/managed-index-controller.ts index 181d3d1093c..35108e3723a 100644 --- a/packages/cli-v3/src/entryPoints/managed-index-controller.ts +++ b/packages/cli-v3/src/entryPoints/managed-index-controller.ts @@ -105,6 +105,7 @@ async function indexDeployment({ cliPackageVersion: buildManifest.cliPackageVersion, tasks: workerManifest.tasks, queues: workerManifest.queues, + events: workerManifest.events, sourceFiles, runtime: workerManifest.runtime, runtimeVersion: workerManifest.runtimeVersion, diff --git a/packages/cli-v3/src/entryPoints/managed-index-worker.ts b/packages/cli-v3/src/entryPoints/managed-index-worker.ts index 5ff9f1b62ed..b61078aaf1a 100644 --- a/packages/cli-v3/src/entryPoints/managed-index-worker.ts +++ b/packages/cli-v3/src/entryPoints/managed-index-worker.ts @@ -156,6 +156,7 @@ await sendMessageInCatalog( manifest: { tasks, queues: resourceCatalog.listQueueManifests(), + events: resourceCatalog.listEventManifests(), configPath: buildManifest.configPath, runtime: buildManifest.runtime, runtimeVersion: detectRuntimeVersion(), diff --git a/packages/core/src/v3/schemas/resources.ts b/packages/core/src/v3/schemas/resources.ts index 08764906ede..e3c0619f2cb 100644 --- a/packages/core/src/v3/schemas/resources.ts +++ b/packages/core/src/v3/schemas/resources.ts @@ -1,5 +1,5 @@ import { z } from "zod"; -import { QueueManifest, RetryOptions, ScheduleMetadata } from "./schemas.js"; +import { EventManifest, QueueManifest, RetryOptions, ScheduleMetadata } from "./schemas.js"; import { MachineConfig } from "./common.js"; export const TaskResource = z.object({ @@ -15,6 +15,7 @@ export const TaskResource = z.object({ maxDuration: z.number().optional(), // JSONSchema type - using z.unknown() for runtime validation to accept JSONSchema7 payloadSchema: z.unknown().optional(), + onEvent: z.string().optional(), }); export type TaskResource = z.infer; @@ -34,6 +35,7 @@ export const BackgroundWorkerMetadata = z.object({ cliPackageVersion: z.string().optional(), tasks: z.array(TaskResource), queues: z.array(QueueManifest).optional(), + events: z.array(EventManifest).optional(), sourceFiles: z.array(BackgroundWorkerSourceFileMetadata).optional(), runtime: z.string().optional(), runtimeVersion: z.string().optional(), diff --git a/packages/core/src/v3/schemas/schemas.ts b/packages/core/src/v3/schemas/schemas.ts index 27191ea722f..a30d01180db 100644 --- a/packages/core/src/v3/schemas/schemas.ts +++ b/packages/core/src/v3/schemas/schemas.ts @@ -178,7 +178,7 @@ export const EventManifest = z.object({ /** Unique event identifier (e.g. "order.created") */ id: z.string(), /** Schema version */ - version: z.string().default("1.0"), + version: z.string(), /** Optional human-readable description */ description: z.string().optional(), }); From 0c8c24b7afad0d0a6ae241330f1ea17be92dbfaf Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 00:30:50 -0800 Subject: [PATCH 05/65] =?UTF-8?q?feat(events):=20phase=200.6+0.7=20?= =?UTF-8?q?=E2=80=94=20publish=20API=20endpoints=20+=20PublishEventService?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the event publishing pipeline: - PublishEventRequestBody/Response schemas in core/schemas/api.ts - BatchPublishEventRequestBody/Response schemas for batch publishing - PublishEventService: fan-out engine that triggers subscribed tasks with per-consumer idempotency keys and partial failure handling - POST /api/v1/events/:eventId/publish endpoint - POST /api/v1/events/:eventId/batchPublish endpoint Co-Authored-By: Claude Opus 4.6 --- .../api.v1.events.$eventId.batchPublish.ts | 59 ++++++++ .../routes/api.v1.events.$eventId.publish.ts | 53 +++++++ .../v3/services/events/publishEvent.server.ts | 135 ++++++++++++++++++ packages/core/src/v3/schemas/api.ts | 54 +++++++ 4 files changed, 301 insertions(+) create mode 100644 apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts create mode 100644 apps/webapp/app/routes/api.v1.events.$eventId.publish.ts create mode 100644 apps/webapp/app/v3/services/events/publishEvent.server.ts diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts b/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts new file mode 100644 index 00000000000..d5b42590112 --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts @@ -0,0 +1,59 @@ +import { json } from "@remix-run/server-runtime"; +import { BatchPublishEventRequestBody } from "@trigger.dev/core/v3"; +import { z } from "zod"; +import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { ServiceValidationError } from "~/v3/services/baseService.server"; +import { PublishEventService, PublishEventResult } from "~/v3/services/events/publishEvent.server"; + +const ParamsSchema = z.object({ + eventId: z.string(), +}); + +const { action, loader } = createActionApiRoute( + { + params: ParamsSchema, + body: BatchPublishEventRequestBody, + corsStrategy: "all", + authorization: { + action: "trigger", + resource: (params) => ({ tasks: params.eventId }), + superScopes: ["write:tasks", "admin"], + }, + }, + async ({ body, params, authentication }) => { + const service = new PublishEventService(); + + try { + const results: PublishEventResult[] = []; + + for (const item of body.items) { + const result = await service.call( + params.eventId, + authentication.environment, + item.payload, + { + idempotencyKey: item.options?.idempotencyKey, + delay: item.options?.delay, + tags: item.options?.tags, + metadata: item.options?.metadata, + context: item.options?.context, + } + ); + + results.push(result); + } + + return json({ results }, { status: 200 }); + } catch (error) { + if (error instanceof ServiceValidationError) { + return json({ error: error.message }, { status: error.status ?? 422 }); + } else if (error instanceof Error) { + return json({ error: error.message }, { status: 500 }); + } + + return json({ error: "Something went wrong" }, { status: 500 }); + } + } +); + +export { action, loader }; diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts b/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts new file mode 100644 index 00000000000..eb5e1d7a08c --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts @@ -0,0 +1,53 @@ +import { json } from "@remix-run/server-runtime"; +import { PublishEventRequestBody } from "@trigger.dev/core/v3"; +import { z } from "zod"; +import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { ServiceValidationError } from "~/v3/services/baseService.server"; +import { PublishEventService } from "~/v3/services/events/publishEvent.server"; + +const ParamsSchema = z.object({ + eventId: z.string(), +}); + +const { action, loader } = createActionApiRoute( + { + params: ParamsSchema, + body: PublishEventRequestBody, + corsStrategy: "all", + authorization: { + action: "trigger", + resource: (params) => ({ tasks: params.eventId }), + superScopes: ["write:tasks", "admin"], + }, + }, + async ({ body, params, authentication }) => { + const service = new PublishEventService(); + + try { + const result = await service.call( + params.eventId, + authentication.environment, + body.payload, + { + idempotencyKey: body.options?.idempotencyKey, + delay: body.options?.delay, + tags: body.options?.tags, + metadata: body.options?.metadata, + context: body.options?.context, + } + ); + + return json(result, { status: 200 }); + } catch (error) { + if (error instanceof ServiceValidationError) { + return json({ error: error.message }, { status: error.status ?? 422 }); + } else if (error instanceof Error) { + return json({ error: error.message }, { status: 500 }); + } + + return json({ error: "Something went wrong" }, { status: 500 }); + } + } +); + +export { action, loader }; diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts new file mode 100644 index 00000000000..5b8dfdb40c7 --- /dev/null +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -0,0 +1,135 @@ +import { PublishEventResponseBody, TriggerTaskRequestBody } from "@trigger.dev/core/v3"; +import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { logger } from "~/services/logger.server"; +import { generateFriendlyId } from "../../friendlyIdentifiers"; +import { BaseService, ServiceValidationError } from "../baseService.server"; +import { + TriggerTaskService, + TriggerTaskServiceOptions, + TriggerTaskServiceResult, +} from "../triggerTask.server"; + +export type PublishEventOptions = { + idempotencyKey?: string; + delay?: string | Date; + tags?: string | string[]; + metadata?: unknown; + context?: unknown; +}; + +export type PublishEventResult = { + eventId: string; + runs: Array<{ + taskIdentifier: string; + runId: string; + }>; +}; + +export class PublishEventService extends BaseService { + public async call( + eventSlug: string, + environment: AuthenticatedEnvironment, + payload: unknown, + options: PublishEventOptions = {} + ): Promise { + return this.traceWithEnv("publishEvent", environment, async (span) => { + span.setAttribute("eventSlug", eventSlug); + + // 1. Look up EventDefinition by slug + projectId + const eventDefinition = await this._prisma.eventDefinition.findFirst({ + where: { + slug: eventSlug, + projectId: environment.projectId, + }, + orderBy: { + createdAt: "desc", + }, + }); + + if (!eventDefinition) { + throw new ServiceValidationError(`Event "${eventSlug}" not found`, 404); + } + + span.setAttribute("eventDefinitionId", eventDefinition.id); + + // 2. Find all active subscriptions for this event + environment + const subscriptions = await this._prisma.eventSubscription.findMany({ + where: { + eventDefinitionId: eventDefinition.id, + environmentId: environment.id, + enabled: true, + }, + }); + + span.setAttribute("subscriberCount", subscriptions.length); + + if (subscriptions.length === 0) { + return { + eventId: generateFriendlyId("evt"), + runs: [], + }; + } + + // 3. Fan out: trigger each subscribed task + const eventId = generateFriendlyId("evt"); + const runs: PublishEventResult["runs"] = []; + + const triggerService = new TriggerTaskService(); + + for (const subscription of subscriptions) { + try { + // Derive per-consumer idempotency key if a global one was provided + const consumerIdempotencyKey = options.idempotencyKey + ? `${options.idempotencyKey}:${subscription.taskSlug}` + : undefined; + + const body: TriggerTaskRequestBody = { + payload, + context: options.context, + options: { + tags: options.tags + ? Array.isArray(options.tags) + ? options.tags + : [options.tags] + : undefined, + metadata: options.metadata, + delay: options.delay, + }, + }; + + const triggerOptions: TriggerTaskServiceOptions = { + idempotencyKey: consumerIdempotencyKey, + }; + + const result = await triggerService.call( + subscription.taskSlug, + environment, + body, + triggerOptions + ); + + if (result) { + runs.push({ + taskIdentifier: subscription.taskSlug, + runId: result.run.friendlyId, + }); + } + } catch (error) { + // Partial failure: log the error but continue with other subscribers + logger.error("Failed to trigger task for event subscription", { + eventSlug, + eventId, + taskSlug: subscription.taskSlug, + subscriptionId: subscription.id, + error: + error instanceof Error + ? { name: error.name, message: error.message, stack: error.stack } + : String(error), + }); + } + } + + return { eventId, runs }; + }); + } +} diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 2a7bcb96502..77108970862 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -1598,3 +1598,57 @@ export const AppendToStreamResponseBody = z.object({ message: z.string().optional(), }); export type AppendToStreamResponseBody = z.infer; + +// ---- Event publish schemas ---- + +export const PublishEventRequestBody = z.object({ + payload: z.any(), + options: z + .object({ + idempotencyKey: z.string().optional(), + delay: z.string().or(z.coerce.date()).optional(), + tags: RunTags.optional(), + metadata: z.any().optional(), + context: z.any().optional(), + }) + .optional(), +}); + +export type PublishEventRequestBody = z.infer; + +export const PublishEventResponseBody = z.object({ + eventId: z.string(), + runs: z.array( + z.object({ + taskIdentifier: z.string(), + runId: z.string(), + }) + ), +}); + +export type PublishEventResponseBody = z.infer; + +export const BatchPublishEventRequestBody = z.object({ + items: z.array( + z.object({ + payload: z.any(), + options: z + .object({ + idempotencyKey: z.string().optional(), + delay: z.string().or(z.coerce.date()).optional(), + tags: RunTags.optional(), + metadata: z.any().optional(), + context: z.any().optional(), + }) + .optional(), + }) + ), +}); + +export type BatchPublishEventRequestBody = z.infer; + +export const BatchPublishEventResponseBody = z.object({ + results: z.array(PublishEventResponseBody), +}); + +export type BatchPublishEventResponseBody = z.infer; From d1c87d551faa47db301af6a5602a8ad47b2571ec Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 00:36:42 -0800 Subject: [PATCH 06/65] =?UTF-8?q?feat(events):=20phase=200.8=20=E2=80=94?= =?UTF-8?q?=20SDK=20.publish()=20wired=20to=20API?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add publishEvent() and batchPublishEvent() methods to ApiClient. Wire up EventDefinition.publish() and .batchPublish() in the SDK to call the backend API endpoints, with schema validation. Co-Authored-By: Claude Opus 4.6 --- packages/core/src/v3/apiClient/index.ts | 42 +++++++++++++++++ packages/trigger-sdk/src/v3/events.ts | 63 +++++++++++++++++++++---- 2 files changed, 95 insertions(+), 10 deletions(-) diff --git a/packages/core/src/v3/apiClient/index.ts b/packages/core/src/v3/apiClient/index.ts index 428493b71e2..047ab30934f 100644 --- a/packages/core/src/v3/apiClient/index.ts +++ b/packages/core/src/v3/apiClient/index.ts @@ -8,7 +8,11 @@ import { ApiDeploymentListSearchParams, AppendToStreamResponseBody, BatchItemNDJSON, + BatchPublishEventRequestBody, + BatchPublishEventResponseBody, BatchTaskRunExecutionResult, + PublishEventRequestBody, + PublishEventResponseBody, BatchTriggerTaskV3RequestBody, BatchTriggerTaskV3Response, CanceledRunResponse, @@ -1441,6 +1445,44 @@ export class ApiClient { ); } + publishEvent( + eventId: string, + body: PublishEventRequestBody, + requestOptions?: ZodFetchOptions + ) { + const encodedEventId = encodeURIComponent(eventId); + + return zodfetch( + PublishEventResponseBody, + `${this.baseUrl}/api/v1/events/${encodedEventId}/publish`, + { + method: "POST", + headers: this.#getHeaders(false), + body: JSON.stringify(body), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + batchPublishEvent( + eventId: string, + body: BatchPublishEventRequestBody, + requestOptions?: ZodFetchOptions + ) { + const encodedEventId = encodeURIComponent(eventId); + + return zodfetch( + BatchPublishEventResponseBody, + `${this.baseUrl}/api/v1/events/${encodedEventId}/batchPublish`, + { + method: "POST", + headers: this.#getHeaders(false), + body: JSON.stringify(body), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + #getHeaders(spanParentAsLink: boolean, additionalHeaders?: Record) { const headers: Record = { "Content-Type": "application/json", diff --git a/packages/trigger-sdk/src/v3/events.ts b/packages/trigger-sdk/src/v3/events.ts index 9b212c87ed8..48ec03558ca 100644 --- a/packages/trigger-sdk/src/v3/events.ts +++ b/packages/trigger-sdk/src/v3/events.ts @@ -1,4 +1,5 @@ import { + apiClientManager, getSchemaParseFn, resourceCatalog, } from "@trigger.dev/core/v3"; @@ -114,19 +115,61 @@ export function createEvent { + const validatedPayload = parseFn ? await parseFn(item.payload) : item.payload; + return { + payload: validatedPayload, + options: item.options + ? { + idempotencyKey: item.options.idempotencyKey, + delay: + item.options.delay instanceof Date + ? item.options.delay.toISOString() + : item.options.delay, + tags: item.options.tags, + metadata: item.options.metadata, + } + : undefined, + }; + }) ); + + const result = await apiClient.batchPublishEvent(id, { + items: validatedItems, + }); + + return result.results.map((r) => ({ + id: r.eventId, + runs: r.runs, + })); }, }; From 85fb685dcc55e6edbf69925423c62e9fa2fb6012 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 00:47:45 -0800 Subject: [PATCH 07/65] =?UTF-8?q?feat(events):=20phase=200.9=20=E2=80=94?= =?UTF-8?q?=20integration=20tests=20for=20PublishEventService?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add 6 containerTest tests covering: - publish with no subscribers → 0 runs - publish with 3 subscribers → 3 runs - publish nonexistent event → 404 error - disabled subscription is skipped - partial trigger failure does not affect other subscribers - idempotency key prevents duplicate fan-out Refactor PublishEventService to accept injectable TriggerFn for testability while keeping the default behavior unchanged. Co-Authored-By: Claude Opus 4.6 --- .../v3/services/events/publishEvent.server.ts | 30 +- apps/webapp/test/engine/publishEvent.test.ts | 485 ++++++++++++++++++ 2 files changed, 511 insertions(+), 4 deletions(-) create mode 100644 apps/webapp/test/engine/publishEvent.test.ts diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index 5b8dfdb40c7..c71457d71f6 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -1,4 +1,5 @@ -import { PublishEventResponseBody, TriggerTaskRequestBody } from "@trigger.dev/core/v3"; +import { TriggerTaskRequestBody } from "@trigger.dev/core/v3"; +import { PrismaClientOrTransaction } from "~/db.server"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; import { generateFriendlyId } from "../../friendlyIdentifiers"; @@ -25,7 +26,30 @@ export type PublishEventResult = { }>; }; +/** Interface for the trigger function used by PublishEventService */ +export type TriggerFn = ( + taskId: string, + environment: AuthenticatedEnvironment, + body: TriggerTaskRequestBody, + options: TriggerTaskServiceOptions +) => Promise; + export class PublishEventService extends BaseService { + private readonly _triggerFn: TriggerFn; + + constructor( + prisma?: PrismaClientOrTransaction, + triggerFn?: TriggerFn + ) { + super(prisma); + this._triggerFn = + triggerFn ?? + ((taskId, environment, body, options) => { + const svc = new TriggerTaskService({ prisma: this._prisma }); + return svc.call(taskId, environment, body, options); + }); + } + public async call( eventSlug: string, environment: AuthenticatedEnvironment, @@ -74,8 +98,6 @@ export class PublishEventService extends BaseService { const eventId = generateFriendlyId("evt"); const runs: PublishEventResult["runs"] = []; - const triggerService = new TriggerTaskService(); - for (const subscription of subscriptions) { try { // Derive per-consumer idempotency key if a global one was provided @@ -101,7 +123,7 @@ export class PublishEventService extends BaseService { idempotencyKey: consumerIdempotencyKey, }; - const result = await triggerService.call( + const result = await this._triggerFn( subscription.taskSlug, environment, body, diff --git a/apps/webapp/test/engine/publishEvent.test.ts b/apps/webapp/test/engine/publishEvent.test.ts new file mode 100644 index 00000000000..1e64f72c40b --- /dev/null +++ b/apps/webapp/test/engine/publishEvent.test.ts @@ -0,0 +1,485 @@ +import { describe, expect, vi } from "vitest"; + +// Mock the db prisma client (required for webapp service imports) +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +vi.mock("~/services/platform.v3.server", async (importOriginal) => { + const actual = (await importOriginal()) as Record; + return { + ...actual, + getEntitlement: vi.fn(), + }; +}); + +import { RunEngine } from "@internal/run-engine"; +import { setupAuthenticatedEnvironment, setupBackgroundWorker } from "@internal/run-engine/tests"; +import { containerTest } from "@internal/testcontainers"; +import { trace } from "@opentelemetry/api"; +import { IOPacket } from "@trigger.dev/core/v3"; +import { TaskRun } from "@trigger.dev/database"; +import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; +import { DefaultQueueManager } from "~/runEngine/concerns/queues.server"; +import { + EntitlementValidationParams, + MaxAttemptsValidationParams, + ParentRunValidationParams, + PayloadProcessor, + TagValidationParams, + TracedEventSpan, + TraceEventConcern, + TriggerRacepoints, + TriggerRacepointSystem, + TriggerTaskRequest, + TriggerTaskValidator, + ValidationResult, +} from "~/runEngine/types"; +import { RunEngineTriggerTaskService } from "../../app/runEngine/services/triggerTask.server"; +import { + PublishEventService, + type TriggerFn, +} from "../../app/v3/services/events/publishEvent.server"; +import { ServiceValidationError } from "../../app/v3/services/common.server"; + +vi.setConfig({ testTimeout: 120_000 }); + +class MockPayloadProcessor implements PayloadProcessor { + async process(request: TriggerTaskRequest): Promise { + return { + data: JSON.stringify(request.body.payload), + dataType: "application/json", + }; + } +} + +class MockTriggerTaskValidator implements TriggerTaskValidator { + validateTags(params: TagValidationParams): ValidationResult { + return { ok: true }; + } + validateEntitlement(params: EntitlementValidationParams): Promise { + return Promise.resolve({ ok: true }); + } + validateMaxAttempts(params: MaxAttemptsValidationParams): ValidationResult { + return { ok: true }; + } + validateParentRun(params: ParentRunValidationParams): ValidationResult { + return { ok: true }; + } +} + +class MockTraceEventConcern implements TraceEventConcern { + async traceRun( + request: TriggerTaskRequest, + parentStore: string | undefined, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } + + async traceIdempotentRun( + request: TriggerTaskRequest, + parentStore: string | undefined, + options: { + existingRun: TaskRun; + idempotencyKey: string; + incomplete: boolean; + isError: boolean; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } + + async traceDebouncedRun( + request: TriggerTaskRequest, + parentStore: string | undefined, + options: { + existingRun: TaskRun; + debounceKey: string; + incomplete: boolean; + isError: boolean; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } +} + +function createEngine(prisma: any, redisOptions: any) { + return new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); +} + +function createTriggerTaskService(prisma: any, engine: RunEngine) { + const traceEventConcern = new MockTraceEventConcern(); + return new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern(prisma, engine, traceEventConcern), + validator: new MockTriggerTaskValidator(), + traceEventConcern, + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024, + }); +} + +/** Build a TriggerFn that delegates to RunEngineTriggerTaskService */ +function buildTriggerFn(prisma: any, engine: RunEngine): TriggerFn { + const svc = createTriggerTaskService(prisma, engine); + return async (taskId, environment, body, options) => { + return svc.call({ + taskId, + environment, + body, + options, + }); + }; +} + +describe("PublishEventService", () => { + containerTest( + "publish event with no subscribers returns 0 runs", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + // Create an event definition with no subscriptions + await prisma.eventDefinition.create({ + data: { + slug: "order.created", + version: "1.0", + projectId: env.projectId, + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + const result = await service.call("order.created", env, { orderId: "123" }); + + expect(result).toBeDefined(); + expect(result.eventId).toBeDefined(); + expect(result.eventId).toMatch(/^evt_/); + expect(result.runs).toHaveLength(0); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "publish event with 3 subscribers creates 3 runs", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIds = ["send-email", "update-inventory", "notify-slack"]; + + const { worker } = await setupBackgroundWorker(engine, env, taskIds); + + // Create event definition + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "order.created", + version: "1.0", + projectId: env.projectId, + }, + }); + + // Create subscriptions for all 3 tasks + for (const taskSlug of taskIds) { + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug, + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + } + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + const result = await service.call("order.created", env, { orderId: "123" }); + + expect(result).toBeDefined(); + expect(result.eventId).toMatch(/^evt_/); + expect(result.runs).toHaveLength(3); + + // Verify each task got triggered + const triggeredTasks = result.runs.map((r) => r.taskIdentifier).sort(); + expect(triggeredTasks).toEqual(["notify-slack", "send-email", "update-inventory"]); + + // Verify runs exist in DB + for (const run of result.runs) { + const dbRun = await prisma.taskRun.findFirst({ + where: { friendlyId: run.runId }, + }); + expect(dbRun).toBeDefined(); + } + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "publish event that does not exist throws 404", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + await expect( + service.call("nonexistent.event", env, { data: "test" }) + ).rejects.toThrow(ServiceValidationError); + + await expect( + service.call("nonexistent.event", env, { data: "test" }) + ).rejects.toThrow('Event "nonexistent.event" not found'); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "disabled subscription does not receive event", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, ["active-task", "disabled-task"]); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "user.updated", + version: "1.0", + projectId: env.projectId, + }, + }); + + // Active subscription + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "active-task", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + // Disabled subscription + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "disabled-task", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: false, + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + const result = await service.call("user.updated", env, { userId: "u1" }); + + expect(result.runs).toHaveLength(1); + expect(result.runs[0].taskIdentifier).toBe("active-task"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "error in one trigger does not affect others", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, ["good-task", "failing-task"]); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "order.shipped", + version: "1.0", + projectId: env.projectId, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "failing-task", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "good-task", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + // Build a trigger function that fails for "failing-task" + const realTriggerFn = buildTriggerFn(prisma, engine); + const failingTriggerFn: TriggerFn = async (taskId, environment, body, options) => { + if (taskId === "failing-task") { + throw new Error("Simulated trigger failure"); + } + return realTriggerFn(taskId, environment, body, options); + }; + + const service = new PublishEventService(prisma, failingTriggerFn); + + const result = await service.call("order.shipped", env, { trackingId: "T123" }); + + // Only the good task should have a run; the failing one is silently dropped + expect(result.runs).toHaveLength(1); + expect(result.runs[0].taskIdentifier).toBe("good-task"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "idempotency key prevents duplicate fan-out", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, "handler-task"); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "payment.received", + version: "1.0", + projectId: env.projectId, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "handler-task", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + // First publish + const result1 = await service.call("payment.received", env, { amount: 100 }, { + idempotencyKey: "pay-123", + }); + + expect(result1.runs).toHaveLength(1); + const firstRunId = result1.runs[0].runId; + + // Second publish with same idempotency key — should return cached run + const result2 = await service.call("payment.received", env, { amount: 100 }, { + idempotencyKey: "pay-123", + }); + + expect(result2.runs).toHaveLength(1); + expect(result2.runs[0].runId).toBe(firstRunId); + } finally { + await engine.quit(); + } + } + ); +}); From e6249e407b4e0f5fa3714bdb993d74c3882632a9 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 21:42:52 -0800 Subject: [PATCH 08/65] =?UTF-8?q?feat(events):=20phase=201.1=20=E2=80=94?= =?UTF-8?q?=20schema=20versioning=20DB=20+=20SchemaRegistryService?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add compatibleVersions, deprecatedAt, deprecatedMessage fields to EventDefinition model. Add schema field to EventManifest. Create SchemaRegistryService with registerSchema, getSchema, listSchemas, validatePayload (using ajv), and checkCompatibility. Co-Authored-By: Claude Opus 4.6 --- .../services/events/schemaRegistry.server.ts | 291 ++++++++++++++++++ apps/webapp/package.json | 1 + .../migration.sql | 4 + .../database/prisma/schema.prisma | 5 + packages/core/src/v3/schemas/schemas.ts | 2 + pnpm-lock.yaml | 25 +- 6 files changed, 321 insertions(+), 7 deletions(-) create mode 100644 apps/webapp/app/v3/services/events/schemaRegistry.server.ts create mode 100644 internal-packages/database/prisma/migrations/20260228054059_add_event_schema_versioning/migration.sql diff --git a/apps/webapp/app/v3/services/events/schemaRegistry.server.ts b/apps/webapp/app/v3/services/events/schemaRegistry.server.ts new file mode 100644 index 00000000000..121903c0330 --- /dev/null +++ b/apps/webapp/app/v3/services/events/schemaRegistry.server.ts @@ -0,0 +1,291 @@ +import Ajv, { type ErrorObject, type ValidateFunction } from "ajv"; +import { PrismaClientOrTransaction } from "~/db.server"; +import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { logger } from "~/services/logger.server"; +import { BaseService, ServiceValidationError } from "../baseService.server"; + +const ajv = new Ajv({ allErrors: true, strict: false }); + +/** Cached compiled validators keyed by EventDefinition.id */ +const validatorCache = new Map(); + +export type SchemaValidationResult = + | { success: true } + | { success: false; errors: SchemaValidationError[] }; + +export type SchemaValidationError = { + path: string; + message: string; +}; + +export type SchemaCompatibilityResult = + | { compatible: true } + | { compatible: false; reasons: string[] }; + +export class SchemaRegistryService extends BaseService { + /** + * Register (upsert) a JSON schema for an event definition. + * Called during worker deploy when event manifests include schemas. + */ + async registerSchema(params: { + projectId: string; + eventSlug: string; + version: string; + schema: unknown; + description?: string; + }): Promise<{ eventDefinitionId: string }> { + const eventDef = await this._prisma.eventDefinition.upsert({ + where: { + projectId_slug_version: { + projectId: params.projectId, + slug: params.eventSlug, + version: params.version, + }, + }, + create: { + slug: params.eventSlug, + version: params.version, + schema: params.schema as any, + description: params.description, + projectId: params.projectId, + }, + update: { + schema: params.schema as any, + description: params.description, + }, + }); + + // Invalidate cached validator when schema changes + validatorCache.delete(eventDef.id); + + return { eventDefinitionId: eventDef.id }; + } + + /** + * Get the schema for an event (latest version or specific version). + */ + async getSchema(params: { + projectId: string; + eventSlug: string; + version?: string; + }): Promise<{ + id: string; + slug: string; + version: string; + schema: unknown | null; + description: string | null; + deprecatedAt: Date | null; + deprecatedMessage: string | null; + } | null> { + const where: any = { + projectId: params.projectId, + slug: params.eventSlug, + }; + + if (params.version) { + where.version = params.version; + } + + return this._prisma.eventDefinition.findFirst({ + where, + orderBy: { createdAt: "desc" }, + select: { + id: true, + slug: true, + version: true, + schema: true, + description: true, + deprecatedAt: true, + deprecatedMessage: true, + }, + }); + } + + /** + * List all event definitions for a project with subscriber counts. + */ + async listSchemas(params: { + projectId: string; + environmentId?: string; + }): Promise< + Array<{ + id: string; + slug: string; + version: string; + description: string | null; + schema: unknown | null; + deprecatedAt: Date | null; + subscriberCount: number; + createdAt: Date; + updatedAt: Date; + }> + > { + const eventDefs = await this._prisma.eventDefinition.findMany({ + where: { + projectId: params.projectId, + }, + include: { + _count: { + select: { + subscriptions: params.environmentId + ? { + where: { + environmentId: params.environmentId, + enabled: true, + }, + } + : { + where: { + enabled: true, + }, + }, + }, + }, + }, + orderBy: [{ slug: "asc" }, { createdAt: "desc" }], + }); + + return eventDefs.map((def) => ({ + id: def.id, + slug: def.slug, + version: def.version, + description: def.description, + schema: def.schema, + deprecatedAt: def.deprecatedAt, + subscriberCount: def._count.subscriptions, + createdAt: def.createdAt, + updatedAt: def.updatedAt, + })); + } + + /** + * Validate a payload against the stored JSON schema for an event. + * Returns success:true if there is no schema (untyped events pass validation). + */ + validatePayload( + eventDefinitionId: string, + schema: unknown | null, + payload: unknown + ): SchemaValidationResult { + if (!schema) { + return { success: true }; + } + + try { + let validate = validatorCache.get(eventDefinitionId); + + if (!validate) { + validate = ajv.compile(schema as object); + validatorCache.set(eventDefinitionId, validate); + } + + const valid = validate(payload); + + if (valid) { + return { success: true }; + } + + return { + success: false, + errors: formatAjvErrors(validate.errors ?? []), + }; + } catch (error) { + logger.error("Schema validation error", { + eventDefinitionId, + error: error instanceof Error ? error.message : String(error), + }); + + // If schema compilation fails, we don't block the publish + // (a broken schema shouldn't prevent events from flowing) + return { success: true }; + } + } + + /** + * Check if a new schema version is backwards compatible with the previous version. + * Compatible means: the new schema accepts all payloads that the old schema accepted. + * + * Heuristic checks (not exhaustive): + * - Adding optional fields → compatible + * - Removing required fields → incompatible + * - Changing field types → incompatible + * - Tightening constraints → incompatible + */ + checkCompatibility( + oldSchema: unknown, + newSchema: unknown + ): SchemaCompatibilityResult { + if (!oldSchema || !newSchema) { + return { compatible: true }; + } + + const reasons: string[] = []; + const oldObj = oldSchema as Record; + const newObj = newSchema as Record; + + // Check if required fields were added (breaking for existing producers) + const oldRequired = new Set(oldObj.required ?? []); + const newRequired = new Set(newObj.required ?? []); + + for (const field of newRequired) { + if (!oldRequired.has(field)) { + // New required field — check if it exists in old schema at all + const oldProps = oldObj.properties ?? {}; + if (!(field in oldProps)) { + reasons.push( + `New required field "${field}" was not present in the previous schema` + ); + } + } + } + + // Check if fields were removed + const oldProperties = Object.keys(oldObj.properties ?? {}); + const newProperties = new Set(Object.keys(newObj.properties ?? {})); + + for (const field of oldProperties) { + if (!newProperties.has(field) && oldRequired.has(field)) { + reasons.push( + `Required field "${field}" was removed in the new schema` + ); + } + } + + // Check if types changed for existing fields + const oldProps = oldObj.properties ?? {}; + const newProps = newObj.properties ?? {}; + + for (const field of oldProperties) { + if (field in newProps) { + const oldType = oldProps[field]?.type; + const newType = newProps[field]?.type; + + if (oldType && newType && oldType !== newType) { + reasons.push( + `Field "${field}" changed type from "${oldType}" to "${newType}"` + ); + } + } + } + + if (reasons.length > 0) { + return { compatible: false, reasons }; + } + + return { compatible: true }; + } + + /** + * Clear the validator cache (useful for testing or after mass schema updates). + */ + static clearCache(): void { + validatorCache.clear(); + } +} + +function formatAjvErrors(errors: ErrorObject[]): SchemaValidationError[] { + return errors.map((err) => ({ + path: err.instancePath || "/", + message: err.message ?? "Validation failed", + })); +} diff --git a/apps/webapp/package.json b/apps/webapp/package.json index 42a1741d2eb..b6be0f9cbab 100644 --- a/apps/webapp/package.json +++ b/apps/webapp/package.json @@ -132,6 +132,7 @@ "@vercel/sdk": "^1.19.1", "@whatwg-node/fetch": "^0.9.14", "ai": "^4.3.19", + "ajv": "^8.17.1", "assert-never": "^1.2.1", "aws4fetch": "^1.0.18", "class-variance-authority": "^0.5.2", diff --git a/internal-packages/database/prisma/migrations/20260228054059_add_event_schema_versioning/migration.sql b/internal-packages/database/prisma/migrations/20260228054059_add_event_schema_versioning/migration.sql new file mode 100644 index 00000000000..56105b12009 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260228054059_add_event_schema_versioning/migration.sql @@ -0,0 +1,4 @@ +-- AlterTable +ALTER TABLE "public"."EventDefinition" ADD COLUMN "compatibleVersions" TEXT[] DEFAULT ARRAY[]::TEXT[], +ADD COLUMN "deprecatedAt" TIMESTAMP(3), +ADD COLUMN "deprecatedMessage" TEXT; diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 9e6662a8e36..14bd23e913d 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -594,6 +594,11 @@ model EventDefinition { description String? schema Json? // JSON Schema of the payload + // Schema versioning (Phase 1) + compatibleVersions String[] @default([]) + deprecatedAt DateTime? + deprecatedMessage String? + project Project @relation(fields: [projectId], references: [id], onDelete: Cascade, onUpdate: Cascade) projectId String diff --git a/packages/core/src/v3/schemas/schemas.ts b/packages/core/src/v3/schemas/schemas.ts index a30d01180db..ecc59c19ca7 100644 --- a/packages/core/src/v3/schemas/schemas.ts +++ b/packages/core/src/v3/schemas/schemas.ts @@ -181,6 +181,8 @@ export const EventManifest = z.object({ version: z.string(), /** Optional human-readable description */ description: z.string().optional(), + /** JSON Schema of the event payload (Draft 7) */ + schema: z.unknown().optional(), }); export type EventManifest = z.infer; diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 48e6dd6ec01..0e1201ba35d 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -530,6 +530,9 @@ importers: ai: specifier: ^4.3.19 version: 4.3.19(react@18.2.0)(zod@3.25.76) + ajv: + specifier: ^8.17.1 + version: 8.17.1 assert-never: specifier: ^1.2.1 version: 1.2.1 @@ -11141,7 +11144,7 @@ packages: '@vercel/postgres@0.10.0': resolution: {integrity: sha512-fSD23DxGND40IzSkXjcFcxr53t3Tiym59Is0jSYIFpG4/0f0KO9SGtcp1sXiebvPaGe7N/tU05cH4yt2S6/IPg==} engines: {node: '>=18.14'} - deprecated: '@vercel/postgres is deprecated. You can either choose an alternate storage solution from the Vercel Marketplace if you want to set up a new database. Or you can follow this guide to migrate your existing Vercel Postgres db: https://neon.com/docs/guides/vercel-postgres-transition-guide' + deprecated: '@vercel/postgres is deprecated. If you are setting up a new database, you can choose an alternate storage solution from the Vercel Marketplace. If you had an existing Vercel Postgres database, it should have been migrated to Neon as a native Vercel integration. You can find more details and the guide to migrate to Neon''s SDKs here: https://neon.com/docs/guides/vercel-postgres-transition-guide' '@vercel/sdk@1.19.1': resolution: {integrity: sha512-K4rmtUT6t1vX06tiY44ot8A7W1FKN7g/tMkE7yZghCgNQ8b30SzljBd4ni8RNp2pJzM/HrZmphRDeIArO7oZuw==} @@ -11797,6 +11800,7 @@ packages: basic-ftp@5.0.3: resolution: {integrity: sha512-QHX8HLlncOLpy54mh+k/sWIFd0ThmRqwe9ZjELybGZK+tZ8rUb9VO0saKJUROTbE+KhzDUT7xziGpGrW8Kmd+g==} engines: {node: '>=10.0.0'} + deprecated: Security vulnerability fixed in 5.2.0, please upgrade bcrypt-pbkdf@1.0.2: resolution: {integrity: sha512-qeFIXtP4MSoi6NLqO12WfqARWWuCKi2Rn/9hJLEmtB5yTNr9DqFWkJRCf2qShWzPeAMRnOgCrq0sg/KLv5ES9w==} @@ -14233,29 +14237,34 @@ packages: glob@10.3.10: resolution: {integrity: sha512-fa46+tv1Ak0UPK1TOy/pZrIybNNt4HCv7SDzwyfiOZkvZLEbjsZkJBPtDHVshZjbecAoAGSC20MjLDG/qr679g==} engines: {node: '>=16 || 14 >=14.17'} + deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me hasBin: true glob@10.3.4: resolution: {integrity: sha512-6LFElP3A+i/Q8XQKEvZjkEWEOTgAIALR9AO2rwT8bgPhDd1anmqDJDZ6lLddI4ehxxxR1S5RIqKe1uapMQfYaQ==} engines: {node: '>=16 || 14 >=14.17'} + deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me hasBin: true glob@10.4.5: resolution: {integrity: sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==} + deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me hasBin: true glob@11.0.0: resolution: {integrity: sha512-9UiX/Bl6J2yaBbxKoEBRm4Cipxgok8kQYcOPEhScPwebu2I0HoQOuYdIO6S3hLuWoZgpDpwQZMzTFxgpkyT76g==} engines: {node: 20 || >=22} + deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me hasBin: true glob@7.2.3: resolution: {integrity: sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==} - deprecated: Glob versions prior to v9 are no longer supported + deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me glob@9.3.5: resolution: {integrity: sha512-e1LleDykUz2Iu+MTYdkSsuWX8lvAjAcs0Xef0lNIu0S2wOAzuTxCJtcd9S3cijlwYF18EsU3rzb8jPVobxDh9Q==} engines: {node: '>=16 || 14 >=14.17'} + deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me globals@11.12.0: resolution: {integrity: sha512-WOBp/EEGUiIsJSp7wcv/y6MO+lV9UoncWqxuFfm8eBwzWNgyfBd6Gz+IeKQ9jCmyhoH99g15M3T+QaVHFjizVA==} @@ -17273,6 +17282,7 @@ packages: prebuild-install@7.1.3: resolution: {integrity: sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==} engines: {node: '>=10'} + deprecated: No longer maintained. Please contact the author of the relevant native addon; alternatives are available. hasBin: true preferred-pm@3.0.3: @@ -18967,21 +18977,22 @@ packages: tar@6.1.13: resolution: {integrity: sha512-jdIBIN6LTIe2jqzay/2vtYLlBHa3JF42ot3h1dW8Q0PaAG4v8rm0cvpVePtau5C6OKXGGcgO9q2AMNSWxiLqKw==} engines: {node: '>=10'} - deprecated: Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exhorbitant rates) by contacting i@izs.me + deprecated: Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me tar@6.2.1: resolution: {integrity: sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A==} engines: {node: '>=10'} - deprecated: Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exhorbitant rates) by contacting i@izs.me + deprecated: Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me tar@7.4.3: resolution: {integrity: sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==} engines: {node: '>=18'} - deprecated: Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exhorbitant rates) by contacting i@izs.me + deprecated: Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me tar@7.5.6: resolution: {integrity: sha512-xqUeu2JAIJpXyvskvU3uvQW8PAmHrtXp2KDuMJwQqW8Sqq0CaZBAQ+dKS3RBXVhU4wC5NjAdKrmh84241gO9cA==} engines: {node: '>=18'} + deprecated: Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me tdigest@0.1.2: resolution: {integrity: sha512-+G0LLgjjo9BZX2MfdvPfH+MKLCrxlXSYec5DaPYP1fe6Iyhf0/fSmJ0bFiZ1F8BT6cGXl2LpltQptzjXKWEkKA==} @@ -23130,7 +23141,7 @@ snapshots: '@epic-web/test-server@0.1.0(bufferutil@4.0.9)': dependencies: '@hono/node-server': 1.12.2(hono@4.5.11) - '@hono/node-ws': 1.0.4(@hono/node-server@1.12.2(hono@4.11.8))(bufferutil@4.0.9) + '@hono/node-ws': 1.0.4(@hono/node-server@1.12.2(hono@4.5.11))(bufferutil@4.0.9) '@open-draft/deferred-promise': 2.2.0 '@types/ws': 8.5.12 hono: 4.5.11 @@ -23885,7 +23896,7 @@ snapshots: dependencies: hono: 4.11.8 - '@hono/node-ws@1.0.4(@hono/node-server@1.12.2(hono@4.11.8))(bufferutil@4.0.9)': + '@hono/node-ws@1.0.4(@hono/node-server@1.12.2(hono@4.5.11))(bufferutil@4.0.9)': dependencies: '@hono/node-server': 1.12.2(hono@4.5.11) ws: 8.18.3(bufferutil@4.0.9) From 49b2903d52a7191ec24e6d437555e08f46653ac3 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 21:44:36 -0800 Subject: [PATCH 09/65] =?UTF-8?q?feat(events):=20phase=201.2=20=E2=80=94?= =?UTF-8?q?=20schema=20discovery=20API=20endpoints?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add GET /api/v1/events (list), GET /api/v1/events/:eventId (detail), and GET /api/v1/events/:eventId/schema (JSON schema) endpoints. Add corresponding response schemas and API client methods in core. Co-Authored-By: Claude Opus 4.6 --- .../routes/api.v1.events.$eventId.schema.ts | 41 +++++++++++++ .../app/routes/api.v1.events.$eventId.ts | 58 +++++++++++++++++++ apps/webapp/app/routes/api.v1.events.ts | 37 ++++++++++++ packages/core/src/v3/apiClient/index.ts | 43 ++++++++++++++ packages/core/src/v3/schemas/api.ts | 51 ++++++++++++++++ 5 files changed, 230 insertions(+) create mode 100644 apps/webapp/app/routes/api.v1.events.$eventId.schema.ts create mode 100644 apps/webapp/app/routes/api.v1.events.$eventId.ts create mode 100644 apps/webapp/app/routes/api.v1.events.ts diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.schema.ts b/apps/webapp/app/routes/api.v1.events.$eventId.schema.ts new file mode 100644 index 00000000000..7e29c971cf1 --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.$eventId.schema.ts @@ -0,0 +1,41 @@ +import { json } from "@remix-run/server-runtime"; +import { z } from "zod"; +import { prisma } from "~/db.server"; +import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; + +const ParamsSchema = z.object({ + eventId: z.string(), +}); + +export const loader = createLoaderApiRoute( + { + params: ParamsSchema, + corsStrategy: "all", + findResource: async (params, auth) => { + return prisma.eventDefinition.findFirst({ + where: { + slug: params.eventId, + projectId: auth.environment.projectId, + }, + select: { + slug: true, + version: true, + schema: true, + }, + orderBy: { createdAt: "desc" }, + }); + }, + authorization: { + action: "read", + resource: (resource) => ({ tasks: resource.slug }), + superScopes: ["read:runs", "read:all", "admin"], + }, + }, + async ({ resource }) => { + return json({ + slug: resource.slug, + version: resource.version, + schema: resource.schema, + }); + } +); diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.ts b/apps/webapp/app/routes/api.v1.events.$eventId.ts new file mode 100644 index 00000000000..d4915e2e97a --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.$eventId.ts @@ -0,0 +1,58 @@ +import { json } from "@remix-run/server-runtime"; +import { z } from "zod"; +import { prisma } from "~/db.server"; +import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; + +const ParamsSchema = z.object({ + eventId: z.string(), +}); + +export const loader = createLoaderApiRoute( + { + params: ParamsSchema, + corsStrategy: "all", + findResource: async (params, auth) => { + return prisma.eventDefinition.findFirst({ + where: { + slug: params.eventId, + projectId: auth.environment.projectId, + }, + include: { + subscriptions: { + where: { + environmentId: auth.environment.id, + }, + select: { + taskSlug: true, + enabled: true, + }, + }, + }, + orderBy: { createdAt: "desc" }, + }); + }, + authorization: { + action: "read", + resource: (resource) => ({ tasks: resource.slug }), + superScopes: ["read:runs", "read:all", "admin"], + }, + }, + async ({ resource }) => { + return json({ + id: resource.id, + slug: resource.slug, + version: resource.version, + description: resource.description, + schema: resource.schema, + deprecatedAt: resource.deprecatedAt, + deprecatedMessage: resource.deprecatedMessage, + compatibleVersions: resource.compatibleVersions, + subscribers: resource.subscriptions.map((s) => ({ + taskSlug: s.taskSlug, + enabled: s.enabled, + })), + createdAt: resource.createdAt, + updatedAt: resource.updatedAt, + }); + } +); diff --git a/apps/webapp/app/routes/api.v1.events.ts b/apps/webapp/app/routes/api.v1.events.ts new file mode 100644 index 00000000000..36cace033f2 --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.ts @@ -0,0 +1,37 @@ +import { json } from "@remix-run/server-runtime"; +import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { SchemaRegistryService } from "~/v3/services/events/schemaRegistry.server"; + +export const loader = createLoaderApiRoute( + { + corsStrategy: "all", + authorization: { + action: "read", + resource: () => ({ tasks: "*" }), + superScopes: ["read:runs", "read:all", "admin"], + }, + findResource: async () => 1 as const, + }, + async ({ authentication }) => { + const service = new SchemaRegistryService(); + + const events = await service.listSchemas({ + projectId: authentication.environment.projectId, + environmentId: authentication.environment.id, + }); + + return json({ + data: events.map((e) => ({ + id: e.id, + slug: e.slug, + version: e.version, + description: e.description, + hasSchema: e.schema !== null, + deprecatedAt: e.deprecatedAt, + subscriberCount: e.subscriberCount, + createdAt: e.createdAt, + updatedAt: e.updatedAt, + })), + }); + } +); diff --git a/packages/core/src/v3/apiClient/index.ts b/packages/core/src/v3/apiClient/index.ts index 047ab30934f..9375276e761 100644 --- a/packages/core/src/v3/apiClient/index.ts +++ b/packages/core/src/v3/apiClient/index.ts @@ -11,6 +11,9 @@ import { BatchPublishEventRequestBody, BatchPublishEventResponseBody, BatchTaskRunExecutionResult, + GetEventResponseBody, + GetEventSchemaResponseBody, + ListEventsResponseBody, PublishEventRequestBody, PublishEventResponseBody, BatchTriggerTaskV3RequestBody, @@ -1483,6 +1486,46 @@ export class ApiClient { ); } + listEvents(requestOptions?: ZodFetchOptions) { + return zodfetch( + ListEventsResponseBody, + `${this.baseUrl}/api/v1/events`, + { + method: "GET", + headers: this.#getHeaders(false), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + getEvent(eventId: string, requestOptions?: ZodFetchOptions) { + const encodedEventId = encodeURIComponent(eventId); + + return zodfetch( + GetEventResponseBody, + `${this.baseUrl}/api/v1/events/${encodedEventId}`, + { + method: "GET", + headers: this.#getHeaders(false), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + getEventSchema(eventId: string, requestOptions?: ZodFetchOptions) { + const encodedEventId = encodeURIComponent(eventId); + + return zodfetch( + GetEventSchemaResponseBody, + `${this.baseUrl}/api/v1/events/${encodedEventId}/schema`, + { + method: "GET", + headers: this.#getHeaders(false), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + #getHeaders(spanParentAsLink: boolean, additionalHeaders?: Record) { const headers: Record = { "Content-Type": "application/json", diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 77108970862..3087df2c777 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -1652,3 +1652,54 @@ export const BatchPublishEventResponseBody = z.object({ }); export type BatchPublishEventResponseBody = z.infer; + +// ---- Event discovery schemas ---- + +export const EventListItem = z.object({ + id: z.string(), + slug: z.string(), + version: z.string(), + description: z.string().nullable(), + hasSchema: z.boolean(), + deprecatedAt: z.coerce.date().nullable(), + subscriberCount: z.number(), + createdAt: z.coerce.date(), + updatedAt: z.coerce.date(), +}); + +export type EventListItem = z.infer; + +export const ListEventsResponseBody = z.object({ + data: z.array(EventListItem), +}); + +export type ListEventsResponseBody = z.infer; + +export const GetEventResponseBody = z.object({ + id: z.string(), + slug: z.string(), + version: z.string(), + description: z.string().nullable(), + schema: z.unknown().nullable(), + deprecatedAt: z.coerce.date().nullable(), + deprecatedMessage: z.string().nullable(), + compatibleVersions: z.array(z.string()), + subscribers: z.array( + z.object({ + taskSlug: z.string(), + enabled: z.boolean(), + }) + ), + createdAt: z.coerce.date(), + updatedAt: z.coerce.date(), +}); + +export type GetEventResponseBody = z.infer; + +export const GetEventSchemaResponseBody = z.object({ + slug: z.string(), + version: z.string(), + schema: z.unknown().nullable(), +}); + +export type GetEventSchemaResponseBody = z.infer; From 2a06ef6055834d64b262c77cd5eedcd238ac5370 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 21:48:33 -0800 Subject: [PATCH 10/65] =?UTF-8?q?feat(events):=20phase=201.3=20=E2=80=94?= =?UTF-8?q?=20store=20JSON=20schema=20during=20deploy=20+=20validate=20at?= =?UTF-8?q?=20publish?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - SDK event() stores raw schema in resource catalog - CLI indexers convert event schemas to JSON Schema via schemaToJsonSchema - syncWorkerEvents stores JSON schema in EventDefinition.schema field - PublishEventService validates payloads against stored schemas using ajv - Extends ResourceCatalog with getEventSchema() method Co-Authored-By: Claude Opus 4.6 --- .../services/createBackgroundWorker.server.ts | 2 ++ .../v3/services/events/publishEvent.server.ts | 22 +++++++++++++++++-- .../src/entryPoints/dev-index-worker.ts | 20 ++++++++++++++++- .../src/entryPoints/managed-index-worker.ts | 20 ++++++++++++++++- .../core/src/v3/resource-catalog/catalog.ts | 3 +++ .../core/src/v3/resource-catalog/index.ts | 4 ++++ .../resource-catalog/noopResourceCatalog.ts | 4 ++++ .../standardResourceCatalog.ts | 4 ++++ packages/trigger-sdk/src/v3/events.ts | 3 ++- 9 files changed, 77 insertions(+), 5 deletions(-) diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index a60f960f3fd..7098562fd4b 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -350,10 +350,12 @@ async function syncWorkerEvents( slug: event.id, version: event.version ?? "1.0", description: event.description, + schema: event.schema as any ?? undefined, projectId: worker.projectId, }, update: { description: event.description, + schema: event.schema as any ?? undefined, }, }); diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index c71457d71f6..9542e66d363 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -9,6 +9,7 @@ import { TriggerTaskServiceOptions, TriggerTaskServiceResult, } from "../triggerTask.server"; +import { SchemaRegistryService } from "./schemaRegistry.server"; export type PublishEventOptions = { idempotencyKey?: string; @@ -76,7 +77,24 @@ export class PublishEventService extends BaseService { span.setAttribute("eventDefinitionId", eventDefinition.id); - // 2. Find all active subscriptions for this event + environment + // 2. Validate payload against stored schema (if exists) + if (eventDefinition.schema) { + const schemaRegistry = new SchemaRegistryService(this._prisma); + const validation = schemaRegistry.validatePayload( + eventDefinition.id, + eventDefinition.schema, + payload + ); + + if (!validation.success) { + throw new ServiceValidationError( + `Payload validation failed for event "${eventSlug}": ${validation.errors.map((e) => `${e.path}: ${e.message}`).join(", ")}`, + 422 + ); + } + } + + // 3. Find all active subscriptions for this event + environment const subscriptions = await this._prisma.eventSubscription.findMany({ where: { eventDefinitionId: eventDefinition.id, @@ -94,7 +112,7 @@ export class PublishEventService extends BaseService { }; } - // 3. Fan out: trigger each subscribed task + // 4. Fan out: trigger each subscribed task const eventId = generateFriendlyId("evt"); const runs: PublishEventResult["runs"] = []; diff --git a/packages/cli-v3/src/entryPoints/dev-index-worker.ts b/packages/cli-v3/src/entryPoints/dev-index-worker.ts index 263c4053886..95de0df2ee7 100644 --- a/packages/cli-v3/src/entryPoints/dev-index-worker.ts +++ b/packages/cli-v3/src/entryPoints/dev-index-worker.ts @@ -1,5 +1,6 @@ import { BuildManifest, + type EventManifest, type HandleErrorFunction, indexerToWorkerMessages, resourceCatalog, @@ -154,7 +155,7 @@ await sendMessageInCatalog( manifest: { tasks, queues: resourceCatalog.listQueueManifests(), - events: resourceCatalog.listEventManifests(), + events: convertEventSchemasToJsonSchemas(resourceCatalog.listEventManifests()), configPath: buildManifest.configPath, runtime: buildManifest.runtime, runtimeVersion: detectRuntimeVersion(), @@ -211,3 +212,20 @@ async function convertSchemasToJsonSchemas(tasks: TaskManifest[]): Promise { + const rawSchema = resourceCatalog.getEventSchema(event.id); + + if (rawSchema) { + try { + const result = schemaToJsonSchema(rawSchema); + return { ...event, schema: result?.jsonSchema }; + } catch { + return event; + } + } + + return event; + }); +} diff --git a/packages/cli-v3/src/entryPoints/managed-index-worker.ts b/packages/cli-v3/src/entryPoints/managed-index-worker.ts index b61078aaf1a..b0ba28bb905 100644 --- a/packages/cli-v3/src/entryPoints/managed-index-worker.ts +++ b/packages/cli-v3/src/entryPoints/managed-index-worker.ts @@ -1,5 +1,6 @@ import { BuildManifest, + type EventManifest, type HandleErrorFunction, indexerToWorkerMessages, resourceCatalog, @@ -156,7 +157,7 @@ await sendMessageInCatalog( manifest: { tasks, queues: resourceCatalog.listQueueManifests(), - events: resourceCatalog.listEventManifests(), + events: convertEventSchemasToJsonSchemas(resourceCatalog.listEventManifests()), configPath: buildManifest.configPath, runtime: buildManifest.runtime, runtimeVersion: detectRuntimeVersion(), @@ -219,3 +220,20 @@ async function convertSchemasToJsonSchemas(tasks: TaskManifest[]): Promise { + const rawSchema = resourceCatalog.getEventSchema(event.id); + + if (rawSchema) { + try { + const result = schemaToJsonSchema(rawSchema); + return { ...event, schema: result?.jsonSchema }; + } catch { + return event; + } + } + + return event; + }); +} diff --git a/packages/core/src/v3/resource-catalog/catalog.ts b/packages/core/src/v3/resource-catalog/catalog.ts index ecafc8458f9..081c45d73ce 100644 --- a/packages/core/src/v3/resource-catalog/catalog.ts +++ b/packages/core/src/v3/resource-catalog/catalog.ts @@ -5,6 +5,8 @@ export interface EventMetadata { id: string; version: string; description?: string; + /** Raw schema (Zod, etc.) stored for later conversion to JSON Schema */ + rawSchema?: unknown; } export interface ResourceCatalog { @@ -22,6 +24,7 @@ export interface ResourceCatalog { getTaskSchema(id: string): TaskSchema | undefined; registerEventMetadata(event: EventMetadata): void; getEvent(id: string): EventMetadata | undefined; + getEventSchema(id: string): unknown | undefined; listEventManifests(): Array; getTasksForEvent(eventId: string): Array; } diff --git a/packages/core/src/v3/resource-catalog/index.ts b/packages/core/src/v3/resource-catalog/index.ts index 49f7a6460b9..5c80856e0c1 100644 --- a/packages/core/src/v3/resource-catalog/index.ts +++ b/packages/core/src/v3/resource-catalog/index.ts @@ -85,6 +85,10 @@ export class ResourceCatalogAPI { return this.#getCatalog().getEvent(id); } + public getEventSchema(id: string): unknown | undefined { + return this.#getCatalog().getEventSchema(id); + } + public listEventManifests(): Array { return this.#getCatalog().listEventManifests(); } diff --git a/packages/core/src/v3/resource-catalog/noopResourceCatalog.ts b/packages/core/src/v3/resource-catalog/noopResourceCatalog.ts index a48b3773152..c69d1d786f2 100644 --- a/packages/core/src/v3/resource-catalog/noopResourceCatalog.ts +++ b/packages/core/src/v3/resource-catalog/noopResourceCatalog.ts @@ -63,6 +63,10 @@ export class NoopResourceCatalog implements ResourceCatalog { return undefined; } + getEventSchema(id: string): unknown | undefined { + return undefined; + } + listEventManifests(): Array { return []; } diff --git a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts index 88ce269d29f..c3a10ed23a7 100644 --- a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts +++ b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts @@ -181,6 +181,10 @@ export class StandardResourceCatalog implements ResourceCatalog { return this._eventMetadata.get(id); } + getEventSchema(id: string): unknown | undefined { + return this._eventMetadata.get(id)?.rawSchema; + } + listEventManifests(): Array { return Array.from(this._eventMetadata.values()).map((event) => ({ id: event.id, diff --git a/packages/trigger-sdk/src/v3/events.ts b/packages/trigger-sdk/src/v3/events.ts index 48ec03558ca..b5be0105f18 100644 --- a/packages/trigger-sdk/src/v3/events.ts +++ b/packages/trigger-sdk/src/v3/events.ts @@ -173,11 +173,12 @@ export function createEvent Date: Fri, 27 Feb 2026 21:52:23 -0800 Subject: [PATCH 11/65] =?UTF-8?q?feat(events):=20phase=201.4=20=E2=80=94?= =?UTF-8?q?=20tests=20+=20changeset=20for=20schema=20registry?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add unit tests for SchemaRegistryService (validation, compatibility). Add integration tests for publish with schema validation (reject invalid, accept valid, skip when no schema). Add changeset for affected public packages. Co-Authored-By: Claude Opus 4.6 --- .changeset/event-schema-registry.md | 7 + apps/webapp/test/engine/publishEvent.test.ts | 127 ++++++++++ .../test/services/schemaRegistry.test.ts | 228 ++++++++++++++++++ 3 files changed, 362 insertions(+) create mode 100644 .changeset/event-schema-registry.md create mode 100644 apps/webapp/test/services/schemaRegistry.test.ts diff --git a/.changeset/event-schema-registry.md b/.changeset/event-schema-registry.md new file mode 100644 index 00000000000..4ae5318e773 --- /dev/null +++ b/.changeset/event-schema-registry.md @@ -0,0 +1,7 @@ +--- +"@trigger.dev/core": patch +"@trigger.dev/sdk": patch +"trigger.dev": patch +--- + +Add event schema registry with versioning, validation, and discovery API endpoints diff --git a/apps/webapp/test/engine/publishEvent.test.ts b/apps/webapp/test/engine/publishEvent.test.ts index 1e64f72c40b..81f1bcffc9f 100644 --- a/apps/webapp/test/engine/publishEvent.test.ts +++ b/apps/webapp/test/engine/publishEvent.test.ts @@ -431,6 +431,133 @@ describe("PublishEventService", () => { } ); + containerTest( + "publish event with schema rejects invalid payload", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + // Create event with a JSON Schema + await prisma.eventDefinition.create({ + data: { + slug: "typed.event", + version: "1.0", + projectId: env.projectId, + schema: { + type: "object", + properties: { + orderId: { type: "string" }, + amount: { type: "number" }, + }, + required: ["orderId", "amount"], + }, + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + // Invalid payload (orderId is number instead of string, amount is missing) + await expect( + service.call("typed.event", env, { orderId: 123 }) + ).rejects.toThrow(ServiceValidationError); + + await expect( + service.call("typed.event", env, { orderId: 123 }) + ).rejects.toThrow("Payload validation failed"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "publish event with schema accepts valid payload", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, "handler"); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "typed.event.ok", + version: "1.0", + projectId: env.projectId, + schema: { + type: "object", + properties: { + orderId: { type: "string" }, + amount: { type: "number" }, + }, + required: ["orderId", "amount"], + }, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "handler", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + // Valid payload + const result = await service.call("typed.event.ok", env, { + orderId: "ord-123", + amount: 42.50, + }); + + expect(result.runs).toHaveLength(1); + expect(result.runs[0].taskIdentifier).toBe("handler"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "publish event without schema skips validation", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + // Create event WITHOUT schema + await prisma.eventDefinition.create({ + data: { + slug: "untyped.event", + version: "1.0", + projectId: env.projectId, + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + // Any payload should work + const result = await service.call("untyped.event", env, { anything: true, foo: [1, 2] }); + + expect(result).toBeDefined(); + expect(result.eventId).toMatch(/^evt_/); + expect(result.runs).toHaveLength(0); // no subscribers + } finally { + await engine.quit(); + } + } + ); + containerTest( "idempotency key prevents duplicate fan-out", async ({ prisma, redisOptions }) => { diff --git a/apps/webapp/test/services/schemaRegistry.test.ts b/apps/webapp/test/services/schemaRegistry.test.ts new file mode 100644 index 00000000000..2c4965b7e63 --- /dev/null +++ b/apps/webapp/test/services/schemaRegistry.test.ts @@ -0,0 +1,228 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { SchemaRegistryService } from "../../app/v3/services/events/schemaRegistry.server"; + +// Unit tests for schema validation and compatibility checks +// (no DB needed — these test the pure logic functions) + +describe("SchemaRegistryService", () => { + let service: SchemaRegistryService; + + beforeEach(() => { + service = new SchemaRegistryService({} as any); // prisma not needed for validatePayload/checkCompatibility + SchemaRegistryService.clearCache(); + }); + + describe("validatePayload", () => { + it("returns success when no schema is defined", () => { + const result = service.validatePayload("evt-1", null, { any: "data" }); + expect(result).toEqual({ success: true }); + }); + + it("validates payload against a JSON schema — valid payload", () => { + const schema = { + type: "object", + properties: { + orderId: { type: "string" }, + amount: { type: "number" }, + }, + required: ["orderId", "amount"], + }; + + const result = service.validatePayload("evt-2", schema, { + orderId: "ord-123", + amount: 99.99, + }); + + expect(result).toEqual({ success: true }); + }); + + it("rejects invalid payload with descriptive errors", () => { + const schema = { + type: "object", + properties: { + orderId: { type: "string" }, + amount: { type: "number" }, + }, + required: ["orderId", "amount"], + }; + + const result = service.validatePayload("evt-3", schema, { + orderId: 123, // wrong type + // missing amount + }); + + expect(result.success).toBe(false); + if (!result.success) { + expect(result.errors.length).toBeGreaterThan(0); + // Should have errors about orderId type and missing amount + const messages = result.errors.map((e) => e.message); + expect(messages.some((m) => m.includes("string") || m.includes("type"))).toBe(true); + } + }); + + it("validates arrays and nested objects", () => { + const schema = { + type: "object", + properties: { + items: { + type: "array", + items: { + type: "object", + properties: { + name: { type: "string" }, + }, + required: ["name"], + }, + }, + }, + required: ["items"], + }; + + const validResult = service.validatePayload("evt-4", schema, { + items: [{ name: "item1" }, { name: "item2" }], + }); + expect(validResult.success).toBe(true); + + const invalidResult = service.validatePayload("evt-4b", schema, { + items: [{ name: 123 }], // wrong type + }); + expect(invalidResult.success).toBe(false); + }); + + it("caches compiled validators", () => { + const schema = { + type: "object", + properties: { x: { type: "number" } }, + }; + + // First call compiles + const result1 = service.validatePayload("evt-cache", schema, { x: 1 }); + expect(result1.success).toBe(true); + + // Second call uses cache + const result2 = service.validatePayload("evt-cache", schema, { x: 2 }); + expect(result2.success).toBe(true); + }); + + it("returns success for malformed schema (does not block publish)", () => { + const badSchema = "not a valid schema"; + const result = service.validatePayload("evt-bad", badSchema, { any: "data" }); + expect(result.success).toBe(true); + }); + }); + + describe("checkCompatibility", () => { + it("returns compatible when both schemas are null", () => { + const result = service.checkCompatibility(null, null); + expect(result).toEqual({ compatible: true }); + }); + + it("returns compatible when old schema is null", () => { + const result = service.checkCompatibility(null, { + type: "object", + properties: { x: { type: "string" } }, + }); + expect(result).toEqual({ compatible: true }); + }); + + it("compatible: adding optional field", () => { + const oldSchema = { + type: "object", + properties: { + name: { type: "string" }, + }, + required: ["name"], + }; + + const newSchema = { + type: "object", + properties: { + name: { type: "string" }, + email: { type: "string" }, // new optional field + }, + required: ["name"], + }; + + const result = service.checkCompatibility(oldSchema, newSchema); + expect(result).toEqual({ compatible: true }); + }); + + it("incompatible: adding new required field not in old schema", () => { + const oldSchema = { + type: "object", + properties: { + name: { type: "string" }, + }, + required: ["name"], + }; + + const newSchema = { + type: "object", + properties: { + name: { type: "string" }, + email: { type: "string" }, + }, + required: ["name", "email"], // email is now required + }; + + const result = service.checkCompatibility(oldSchema, newSchema); + expect(result.compatible).toBe(false); + if (!result.compatible) { + expect(result.reasons).toHaveLength(1); + expect(result.reasons[0]).toContain("email"); + } + }); + + it("incompatible: removing required field", () => { + const oldSchema = { + type: "object", + properties: { + name: { type: "string" }, + email: { type: "string" }, + }, + required: ["name", "email"], + }; + + const newSchema = { + type: "object", + properties: { + name: { type: "string" }, + // email removed + }, + required: ["name"], + }; + + const result = service.checkCompatibility(oldSchema, newSchema); + expect(result.compatible).toBe(false); + if (!result.compatible) { + expect(result.reasons[0]).toContain("email"); + expect(result.reasons[0]).toContain("removed"); + } + }); + + it("incompatible: changing field type", () => { + const oldSchema = { + type: "object", + properties: { + count: { type: "number" }, + }, + required: ["count"], + }; + + const newSchema = { + type: "object", + properties: { + count: { type: "string" }, // changed from number to string + }, + required: ["count"], + }; + + const result = service.checkCompatibility(oldSchema, newSchema); + expect(result.compatible).toBe(false); + if (!result.compatible) { + expect(result.reasons[0]).toContain("count"); + expect(result.reasons[0]).toContain("type"); + } + }); + }); +}); From cd426b3665b85ec5a285f80d059d33ff1564b636 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 22:07:26 -0800 Subject: [PATCH 12/65] =?UTF-8?q?feat(events):=20phase=202.1=20=E2=80=94?= =?UTF-8?q?=20filter=20evaluator=20+=20pattern=20matcher=20with=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - compileFilter/evaluateFilter wraps existing eventFilterMatches with caching - compilePattern/matchesPattern for wildcard patterns (*, #) - 28 filter evaluator tests + 30 pattern matcher tests Co-Authored-By: Claude Opus 4.6 --- .../core/src/v3/events/filterEvaluator.ts | 48 ++++ packages/core/src/v3/events/index.ts | 12 + packages/core/src/v3/events/patternMatcher.ts | 113 +++++++++ packages/core/src/v3/index.ts | 7 + .../test/v3/events/filterEvaluator.test.ts | 218 ++++++++++++++++++ .../test/v3/events/patternMatcher.test.ts | 160 +++++++++++++ 6 files changed, 558 insertions(+) create mode 100644 packages/core/src/v3/events/filterEvaluator.ts create mode 100644 packages/core/src/v3/events/index.ts create mode 100644 packages/core/src/v3/events/patternMatcher.ts create mode 100644 packages/core/test/v3/events/filterEvaluator.test.ts create mode 100644 packages/core/test/v3/events/patternMatcher.test.ts diff --git a/packages/core/src/v3/events/filterEvaluator.ts b/packages/core/src/v3/events/filterEvaluator.ts new file mode 100644 index 00000000000..849a4fc7e38 --- /dev/null +++ b/packages/core/src/v3/events/filterEvaluator.ts @@ -0,0 +1,48 @@ +import type { EventFilter } from "../schemas/eventFilter.js"; +import { eventFilterMatches } from "../../eventFilterMatches.js"; + +type CompiledFilter = (payload: unknown) => boolean; + +const filterCache = new Map(); + +/** + * Compile an EventFilter into a reusable predicate function. + * The compiled function is cached by the given cacheKey (typically a subscription ID). + * + * Uses the existing, battle-tested `eventFilterMatches` under the hood. + */ +export function compileFilter(filter: EventFilter, cacheKey?: string): CompiledFilter { + if (cacheKey) { + const cached = filterCache.get(cacheKey); + if (cached) return cached; + } + + const fn: CompiledFilter = (payload: unknown) => eventFilterMatches(payload, filter); + + if (cacheKey) { + filterCache.set(cacheKey, fn); + } + + return fn; +} + +/** + * Evaluate a filter against a payload (one-shot, no caching). + */ +export function evaluateFilter(payload: unknown, filter: EventFilter): boolean { + return eventFilterMatches(payload, filter); +} + +/** + * Invalidate a cached compiled filter (e.g., on re-deploy). + */ +export function invalidateFilterCache(cacheKey: string): void { + filterCache.delete(cacheKey); +} + +/** + * Clear all cached compiled filters. + */ +export function clearFilterCache(): void { + filterCache.clear(); +} diff --git a/packages/core/src/v3/events/index.ts b/packages/core/src/v3/events/index.ts new file mode 100644 index 00000000000..94741d47e1d --- /dev/null +++ b/packages/core/src/v3/events/index.ts @@ -0,0 +1,12 @@ +export { + compileFilter, + evaluateFilter, + invalidateFilterCache, + clearFilterCache, +} from "./filterEvaluator.js"; + +export { + compilePattern, + matchesPattern, + clearPatternCache, +} from "./patternMatcher.js"; diff --git a/packages/core/src/v3/events/patternMatcher.ts b/packages/core/src/v3/events/patternMatcher.ts new file mode 100644 index 00000000000..fdef8c496b5 --- /dev/null +++ b/packages/core/src/v3/events/patternMatcher.ts @@ -0,0 +1,113 @@ +/** + * Wildcard pattern matching for event slugs. + * + * Patterns use dot-separated segments with two wildcards: + * - `*` matches exactly one segment (e.g., `order.*` matches `order.created` but not `order.status.changed`) + * - `#` matches zero or more segments (e.g., `order.#` matches `order.created` and `order.status.changed`) + * + * Examples: + * - `order.*` → matches `order.created`, `order.updated` + * - `order.#` → matches `order.created`, `order.status.changed`, `order` + * - `*.created` → matches `order.created`, `user.created` + * - `#.created` → matches `order.created`, `org.user.created`, `created` + */ + +type PatternPredicate = (eventSlug: string) => boolean; + +const patternCache = new Map(); + +/** + * Compile a wildcard pattern into a reusable predicate. + * Results are cached by the pattern string. + */ +export function compilePattern(pattern: string): PatternPredicate { + const cached = patternCache.get(pattern); + if (cached) return cached; + + const fn = buildPatternFn(pattern); + patternCache.set(pattern, fn); + return fn; +} + +/** + * Test whether an event slug matches a wildcard pattern (one-shot, no caching). + */ +export function matchesPattern(eventSlug: string, pattern: string): boolean { + return compilePattern(pattern)(eventSlug); +} + +/** + * Clear the pattern cache. + */ +export function clearPatternCache(): void { + patternCache.clear(); +} + +// ─── Internal ──────────────────────────────────────────────────────── + +function buildPatternFn(pattern: string): PatternPredicate { + const patternSegments = pattern.split("."); + + // Fast path: no wildcards — exact match + if (!patternSegments.includes("*") && !patternSegments.includes("#")) { + return (slug) => slug === pattern; + } + + // Use dynamic programming to match patterns with # (multi-segment wildcard) + return (slug) => { + const slugSegments = slug.split("."); + return matchSegments(patternSegments, slugSegments, 0, 0); + }; +} + +/** + * Recursive segment matching with memoization via early returns. + * + * patternIdx and slugIdx track position in their respective arrays. + */ +function matchSegments( + pattern: string[], + slug: string[], + patternIdx: number, + slugIdx: number +): boolean { + // Both exhausted — match + if (patternIdx === pattern.length && slugIdx === slug.length) { + return true; + } + + // Pattern exhausted but slug has more — no match + if (patternIdx === pattern.length) { + return false; + } + + const segment = pattern[patternIdx]!; + + if (segment === "#") { + // # matches zero or more segments + // Try matching 0, 1, 2, ... segments from slug + for (let skip = 0; skip <= slug.length - slugIdx; skip++) { + if (matchSegments(pattern, slug, patternIdx + 1, slugIdx + skip)) { + return true; + } + } + return false; + } + + // Slug exhausted but pattern has more non-# segments — no match + if (slugIdx === slug.length) { + return false; + } + + if (segment === "*") { + // * matches exactly one segment + return matchSegments(pattern, slug, patternIdx + 1, slugIdx + 1); + } + + // Literal segment — must match exactly + if (segment === slug[slugIdx]) { + return matchSegments(pattern, slug, patternIdx + 1, slugIdx + 1); + } + + return false; +} diff --git a/packages/core/src/v3/index.ts b/packages/core/src/v3/index.ts index b714d8cb933..b7b989b12cb 100644 --- a/packages/core/src/v3/index.ts +++ b/packages/core/src/v3/index.ts @@ -42,6 +42,13 @@ export { export type { LogLevel } from "./logger/taskLogger.js"; export { eventFilterMatches } from "../eventFilterMatches.js"; +export { + compileFilter, + evaluateFilter, + invalidateFilterCache, + clearFilterCache, +} from "./events/filterEvaluator.js"; +export { compilePattern, matchesPattern, clearPatternCache } from "./events/patternMatcher.js"; export { flattenAttributes, primitiveValueOrflattenedAttributes, diff --git a/packages/core/test/v3/events/filterEvaluator.test.ts b/packages/core/test/v3/events/filterEvaluator.test.ts new file mode 100644 index 00000000000..289dc419dce --- /dev/null +++ b/packages/core/test/v3/events/filterEvaluator.test.ts @@ -0,0 +1,218 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { + compileFilter, + evaluateFilter, + invalidateFilterCache, + clearFilterCache, +} from "../../../src/v3/events/filterEvaluator.js"; +import type { EventFilter } from "../../../src/v3/schemas/eventFilter.js"; + +describe("evaluateFilter", () => { + it("matches string equality", () => { + const filter: EventFilter = { status: ["active"] }; + expect(evaluateFilter({ status: "active" }, filter)).toBe(true); + expect(evaluateFilter({ status: "inactive" }, filter)).toBe(false); + }); + + it("matches number equality", () => { + const filter: EventFilter = { count: [5] }; + expect(evaluateFilter({ count: 5 }, filter)).toBe(true); + expect(evaluateFilter({ count: 3 }, filter)).toBe(false); + }); + + it("matches boolean equality", () => { + const filter: EventFilter = { enabled: [true] }; + expect(evaluateFilter({ enabled: true }, filter)).toBe(true); + expect(evaluateFilter({ enabled: false }, filter)).toBe(false); + }); + + it("matches multiple values (OR)", () => { + const filter: EventFilter = { status: ["active", "pending"] }; + expect(evaluateFilter({ status: "active" }, filter)).toBe(true); + expect(evaluateFilter({ status: "pending" }, filter)).toBe(true); + expect(evaluateFilter({ status: "archived" }, filter)).toBe(false); + }); + + it("matches nested objects", () => { + const filter: EventFilter = { + order: { + status: ["paid"], + }, + }; + expect(evaluateFilter({ order: { status: "paid" } }, filter)).toBe(true); + expect(evaluateFilter({ order: { status: "pending" } }, filter)).toBe(false); + }); + + it("matches $gt operator", () => { + const filter: EventFilter = { amount: [{ $gt: 100 }] }; + expect(evaluateFilter({ amount: 150 }, filter)).toBe(true); + expect(evaluateFilter({ amount: 50 }, filter)).toBe(false); + expect(evaluateFilter({ amount: 100 }, filter)).toBe(false); + }); + + it("matches $gte operator", () => { + const filter: EventFilter = { amount: [{ $gte: 100 }] }; + expect(evaluateFilter({ amount: 100 }, filter)).toBe(true); + expect(evaluateFilter({ amount: 99 }, filter)).toBe(false); + }); + + it("matches $lt operator", () => { + const filter: EventFilter = { amount: [{ $lt: 100 }] }; + expect(evaluateFilter({ amount: 50 }, filter)).toBe(true); + expect(evaluateFilter({ amount: 100 }, filter)).toBe(false); + }); + + it("matches $lte operator", () => { + const filter: EventFilter = { amount: [{ $lte: 100 }] }; + expect(evaluateFilter({ amount: 100 }, filter)).toBe(true); + expect(evaluateFilter({ amount: 101 }, filter)).toBe(false); + }); + + it("matches $between operator", () => { + const filter: EventFilter = { score: [{ $between: [10, 20] }] }; + expect(evaluateFilter({ score: 15 }, filter)).toBe(true); + expect(evaluateFilter({ score: 10 }, filter)).toBe(true); + expect(evaluateFilter({ score: 20 }, filter)).toBe(true); + expect(evaluateFilter({ score: 9 }, filter)).toBe(false); + expect(evaluateFilter({ score: 21 }, filter)).toBe(false); + }); + + it("matches $startsWith operator", () => { + const filter: EventFilter = { name: [{ $startsWith: "Jo" }] }; + expect(evaluateFilter({ name: "John" }, filter)).toBe(true); + expect(evaluateFilter({ name: "Jane" }, filter)).toBe(false); + }); + + it("matches $endsWith operator", () => { + const filter: EventFilter = { email: [{ $endsWith: "@test.com" }] }; + expect(evaluateFilter({ email: "user@test.com" }, filter)).toBe(true); + expect(evaluateFilter({ email: "user@other.com" }, filter)).toBe(false); + }); + + it("matches $ignoreCaseEquals operator", () => { + const filter: EventFilter = { status: [{ $ignoreCaseEquals: "active" }] }; + expect(evaluateFilter({ status: "Active" }, filter)).toBe(true); + expect(evaluateFilter({ status: "ACTIVE" }, filter)).toBe(true); + expect(evaluateFilter({ status: "inactive" }, filter)).toBe(false); + }); + + it("matches $exists operator", () => { + const existsFilter: EventFilter = { email: [{ $exists: true }] }; + expect(evaluateFilter({ email: "user@test.com" }, existsFilter)).toBe(true); + expect(evaluateFilter({}, existsFilter)).toBe(false); + + const notExistsFilter: EventFilter = { deleted: [{ $exists: false }] }; + expect(evaluateFilter({}, notExistsFilter)).toBe(true); + expect(evaluateFilter({ deleted: true }, notExistsFilter)).toBe(false); + }); + + it("matches $isNull operator", () => { + const isNullFilter: EventFilter = { deletedAt: [{ $isNull: true }] }; + expect(evaluateFilter({ deletedAt: null }, isNullFilter)).toBe(true); + expect(evaluateFilter({ deletedAt: "2024-01-01" }, isNullFilter)).toBe(false); + + const notNullFilter: EventFilter = { email: [{ $isNull: false }] }; + expect(evaluateFilter({ email: "test@test.com" }, notNullFilter)).toBe(true); + expect(evaluateFilter({ email: null }, notNullFilter)).toBe(false); + }); + + it("matches $anythingBut operator", () => { + const filter: EventFilter = { status: [{ $anythingBut: "deleted" }] }; + expect(evaluateFilter({ status: "active" }, filter)).toBe(true); + expect(evaluateFilter({ status: "deleted" }, filter)).toBe(false); + }); + + it("matches $anythingBut with array", () => { + const filter: EventFilter = { status: [{ $anythingBut: ["deleted", "archived"] }] }; + expect(evaluateFilter({ status: "active" }, filter)).toBe(true); + expect(evaluateFilter({ status: "deleted" }, filter)).toBe(false); + expect(evaluateFilter({ status: "archived" }, filter)).toBe(false); + }); + + it("matches $includes operator", () => { + const filter: EventFilter = { tags: [{ $includes: "urgent" }] }; + expect(evaluateFilter({ tags: ["urgent", "important"] }, filter)).toBe(true); + expect(evaluateFilter({ tags: ["normal"] }, filter)).toBe(false); + }); + + it("matches $not operator", () => { + const filter: EventFilter = { status: [{ $not: "deleted" }] }; + expect(evaluateFilter({ status: "active" }, filter)).toBe(true); + expect(evaluateFilter({ status: "deleted" }, filter)).toBe(false); + }); + + it("matches multiple conditions on different fields (AND)", () => { + const filter: EventFilter = { + status: ["active"], + amount: [{ $gt: 100 }], + }; + expect(evaluateFilter({ status: "active", amount: 200 }, filter)).toBe(true); + expect(evaluateFilter({ status: "active", amount: 50 }, filter)).toBe(false); + expect(evaluateFilter({ status: "inactive", amount: 200 }, filter)).toBe(false); + }); + + it("handles empty filter (matches everything)", () => { + const filter: EventFilter = {}; + expect(evaluateFilter({ any: "thing" }, filter)).toBe(true); + expect(evaluateFilter({}, filter)).toBe(true); + }); + + it("handles null/undefined payload with empty filter", () => { + const filter: EventFilter = {}; + expect(evaluateFilter(null, filter)).toBe(true); + expect(evaluateFilter(undefined, filter)).toBe(true); + }); + + it("handles null/undefined payload with non-empty filter", () => { + const filter: EventFilter = { status: ["active"] }; + expect(evaluateFilter(null, filter)).toBe(false); + expect(evaluateFilter(undefined, filter)).toBe(false); + }); +}); + +describe("compileFilter", () => { + beforeEach(() => { + clearFilterCache(); + }); + + it("returns a function that evaluates the filter", () => { + const filter: EventFilter = { status: ["active"] }; + const fn = compileFilter(filter); + expect(fn({ status: "active" })).toBe(true); + expect(fn({ status: "inactive" })).toBe(false); + }); + + it("caches compiled filters by key", () => { + const filter: EventFilter = { status: ["active"] }; + const fn1 = compileFilter(filter, "sub_123"); + const fn2 = compileFilter(filter, "sub_123"); + expect(fn1).toBe(fn2); // Same reference + }); + + it("different keys produce different cache entries", () => { + const filter1: EventFilter = { status: ["active"] }; + const filter2: EventFilter = { status: ["inactive"] }; + const fn1 = compileFilter(filter1, "sub_1"); + const fn2 = compileFilter(filter2, "sub_2"); + expect(fn1).not.toBe(fn2); + expect(fn1({ status: "active" })).toBe(true); + expect(fn2({ status: "inactive" })).toBe(true); + }); + + it("invalidateFilterCache removes a specific entry", () => { + const filter: EventFilter = { status: ["active"] }; + const fn1 = compileFilter(filter, "sub_123"); + invalidateFilterCache("sub_123"); + const fn2 = compileFilter(filter, "sub_123"); + expect(fn1).not.toBe(fn2); // New reference after invalidation + }); + + it("clearFilterCache removes all entries", () => { + const filter: EventFilter = { status: ["active"] }; + const fn1 = compileFilter(filter, "sub_1"); + const fn2 = compileFilter(filter, "sub_2"); + clearFilterCache(); + const fn3 = compileFilter(filter, "sub_1"); + expect(fn1).not.toBe(fn3); + }); +}); diff --git a/packages/core/test/v3/events/patternMatcher.test.ts b/packages/core/test/v3/events/patternMatcher.test.ts new file mode 100644 index 00000000000..f6ee70a68d3 --- /dev/null +++ b/packages/core/test/v3/events/patternMatcher.test.ts @@ -0,0 +1,160 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { + matchesPattern, + compilePattern, + clearPatternCache, +} from "../../../src/v3/events/patternMatcher.js"; + +describe("matchesPattern", () => { + beforeEach(() => { + clearPatternCache(); + }); + + describe("exact match (no wildcards)", () => { + it("matches identical slug", () => { + expect(matchesPattern("order.created", "order.created")).toBe(true); + }); + + it("rejects different slug", () => { + expect(matchesPattern("order.updated", "order.created")).toBe(false); + }); + + it("matches single-segment slug", () => { + expect(matchesPattern("created", "created")).toBe(true); + }); + }); + + describe("* (single-segment wildcard)", () => { + it("order.* matches order.created", () => { + expect(matchesPattern("order.created", "order.*")).toBe(true); + }); + + it("order.* matches order.updated", () => { + expect(matchesPattern("order.updated", "order.*")).toBe(true); + }); + + it("order.* does NOT match order.status.changed", () => { + expect(matchesPattern("order.status.changed", "order.*")).toBe(false); + }); + + it("order.* does NOT match order (fewer segments)", () => { + expect(matchesPattern("order", "order.*")).toBe(false); + }); + + it("*.created matches order.created", () => { + expect(matchesPattern("order.created", "*.created")).toBe(true); + }); + + it("*.created matches user.created", () => { + expect(matchesPattern("user.created", "*.created")).toBe(true); + }); + + it("*.created does NOT match org.user.created", () => { + expect(matchesPattern("org.user.created", "*.created")).toBe(false); + }); + + it("*.* matches any two-segment slug", () => { + expect(matchesPattern("order.created", "*.*")).toBe(true); + expect(matchesPattern("user.deleted", "*.*")).toBe(true); + }); + + it("*.* does NOT match single segment", () => { + expect(matchesPattern("created", "*.*")).toBe(false); + }); + + it("*.* does NOT match three segments", () => { + expect(matchesPattern("order.status.changed", "*.*")).toBe(false); + }); + }); + + describe("# (multi-segment wildcard)", () => { + it("order.# matches order.created (1 segment)", () => { + expect(matchesPattern("order.created", "order.#")).toBe(true); + }); + + it("order.# matches order.status.changed (2 segments)", () => { + expect(matchesPattern("order.status.changed", "order.#")).toBe(true); + }); + + it("order.# matches order (0 segments)", () => { + expect(matchesPattern("order", "order.#")).toBe(true); + }); + + it("order.# does NOT match user.created", () => { + expect(matchesPattern("user.created", "order.#")).toBe(false); + }); + + it("#.created matches order.created", () => { + expect(matchesPattern("order.created", "#.created")).toBe(true); + }); + + it("#.created matches org.user.created", () => { + expect(matchesPattern("org.user.created", "#.created")).toBe(true); + }); + + it("#.created matches created (0 prefix segments)", () => { + expect(matchesPattern("created", "#.created")).toBe(true); + }); + + it("#.created does NOT match order.updated", () => { + expect(matchesPattern("order.updated", "#.created")).toBe(false); + }); + + it("# matches anything", () => { + expect(matchesPattern("anything", "#")).toBe(true); + expect(matchesPattern("a.b.c.d", "#")).toBe(true); + }); + }); + + describe("combined wildcards", () => { + it("*.*.created matches order.item.created", () => { + expect(matchesPattern("order.item.created", "*.*.created")).toBe(true); + }); + + it("*.*.created does NOT match order.created", () => { + expect(matchesPattern("order.created", "*.*.created")).toBe(false); + }); + + it("*.# matches anything with at least one segment", () => { + // * matches one segment, # matches zero or more + // so *.# matches any slug with >= 1 segment + expect(matchesPattern("order", "*.#")).toBe(true); + expect(matchesPattern("order.created", "*.#")).toBe(true); + expect(matchesPattern("order.status.changed", "*.#")).toBe(true); + }); + + it("#.*.created matches order.item.created", () => { + expect(matchesPattern("order.item.created", "#.*.created")).toBe(true); + }); + + it("#.*.created matches item.created", () => { + expect(matchesPattern("item.created", "#.*.created")).toBe(true); + }); + }); +}); + +describe("compilePattern", () => { + beforeEach(() => { + clearPatternCache(); + }); + + it("returns a reusable predicate", () => { + const matches = compilePattern("order.*"); + expect(matches("order.created")).toBe(true); + expect(matches("order.updated")).toBe(true); + expect(matches("user.created")).toBe(false); + }); + + it("caches compiled patterns", () => { + const fn1 = compilePattern("order.*"); + const fn2 = compilePattern("order.*"); + expect(fn1).toBe(fn2); + }); + + it("clearPatternCache invalidates cache", () => { + const fn1 = compilePattern("order.*"); + clearPatternCache(); + const fn2 = compilePattern("order.*"); + expect(fn1).not.toBe(fn2); + }); +}); From be7ca08fbcc3c35d2641637f5a51c2a8477411cb Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 22:09:15 -0800 Subject: [PATCH 13/65] =?UTF-8?q?feat(events):=20phase=202.2=20=E2=80=94?= =?UTF-8?q?=20filters=20in=20SDK=20+=20stored=20during=20deploy?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add onEventFilter to TaskMetadata schema - Add filter option to TaskOptionsWithEvent - Extract filter in shared.ts and pass through metadata - Store filter in EventSubscription during syncWorkerEvents Co-Authored-By: Claude Opus 4.6 --- apps/webapp/app/v3/services/createBackgroundWorker.server.ts | 2 ++ packages/core/src/v3/schemas/schemas.ts | 2 ++ packages/core/src/v3/types/tasks.ts | 2 ++ packages/trigger-sdk/src/v3/shared.ts | 4 +++- 4 files changed, 9 insertions(+), 1 deletion(-) diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index 7098562fd4b..4d5431dda75 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -418,10 +418,12 @@ async function syncWorkerEvents( projectId: worker.projectId, workerId: worker.id, enabled: true, + filter: (task.onEventFilter as any) ?? undefined, }, update: { workerId: worker.id, enabled: true, + filter: (task.onEventFilter as any) ?? undefined, }, }); diff --git a/packages/core/src/v3/schemas/schemas.ts b/packages/core/src/v3/schemas/schemas.ts index ecc59c19ca7..c8b4be9c24e 100644 --- a/packages/core/src/v3/schemas/schemas.ts +++ b/packages/core/src/v3/schemas/schemas.ts @@ -205,6 +205,8 @@ const taskMetadata = { payloadSchema: z.unknown().optional(), /** Event ID that this task subscribes to (set when task uses `on: someEvent`) */ onEvent: z.string().optional(), + /** Event filter for content-based routing (set when task uses `on: someEvent` with `filter`) */ + onEventFilter: z.unknown().optional(), }; export const TaskMetadata = z.object(taskMetadata); diff --git a/packages/core/src/v3/types/tasks.ts b/packages/core/src/v3/types/tasks.ts index 5494aea4459..c04b8a850c5 100644 --- a/packages/core/src/v3/types/tasks.ts +++ b/packages/core/src/v3/types/tasks.ts @@ -414,6 +414,8 @@ export type TaskOptionsWithEvent< > = CommonTaskOptions & { /** The event to subscribe this task to */ on: EventSource; + /** Content-based filter — only receive events whose payload matches this filter */ + filter?: import("../schemas/eventFilter.js").EventFilter; }; declare const __output: unique symbol; diff --git a/packages/trigger-sdk/src/v3/shared.ts b/packages/trigger-sdk/src/v3/shared.ts index 8ab46bf4a32..13977ad1a47 100644 --- a/packages/trigger-sdk/src/v3/shared.ts +++ b/packages/trigger-sdk/src/v3/shared.ts @@ -244,8 +244,9 @@ export function createTask< registerTaskLifecycleHooks(params.id, params); - // Extract onEvent from the params if this task subscribes to an event + // Extract onEvent and optional filter from the params if this task subscribes to an event const onEvent = "on" in params && params.on ? (params.on as EventSource).id : undefined; + const onEventFilter = "filter" in params && params.filter ? params.filter : undefined; resourceCatalog.registerTaskMetadata({ id: params.id, @@ -256,6 +257,7 @@ export function createTask< maxDuration: params.maxDuration, payloadSchema: params.jsonSchema, onEvent, + onEventFilter, fns: { run: params.run, }, From 676d37eb0b0e7847f7b49cbb4c4520ed682ad6a3 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 22:11:27 -0800 Subject: [PATCH 14/65] =?UTF-8?q?feat(events):=20phase=202.3=20=E2=80=94?= =?UTF-8?q?=20filter=20evaluation=20during=20fan-out?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Evaluate subscription.filter against payload before triggering - Non-matching subscribers are skipped (no run created) - Malformed filters err on side of delivery (log warning, don't block) - 3 new integration tests: filter skips, filter allows, complex multi-field Co-Authored-By: Claude Opus 4.6 --- .../v3/services/events/publishEvent.server.ts | 37 +++- apps/webapp/test/engine/publishEvent.test.ts | 188 ++++++++++++++++++ 2 files changed, 222 insertions(+), 3 deletions(-) diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index 9542e66d363..ce749809b27 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -1,4 +1,5 @@ -import { TriggerTaskRequestBody } from "@trigger.dev/core/v3"; +import { TriggerTaskRequestBody, eventFilterMatches } from "@trigger.dev/core/v3"; +import type { EventFilter } from "@trigger.dev/core/v3"; import { PrismaClientOrTransaction } from "~/db.server"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; @@ -112,11 +113,41 @@ export class PublishEventService extends BaseService { }; } - // 4. Fan out: trigger each subscribed task + // 4. Evaluate content-based filters — skip subscribers whose filter doesn't match + const matchingSubscriptions = subscriptions.filter((subscription) => { + if (!subscription.filter) return true; // No filter → always matches + + try { + return eventFilterMatches(payload, subscription.filter as EventFilter); + } catch (error) { + // Malformed filter → skip silently (don't block the publish) + logger.warn("Failed to evaluate event filter", { + subscriptionId: subscription.id, + taskSlug: subscription.taskSlug, + error: error instanceof Error ? error.message : String(error), + }); + return true; // Err on the side of delivering + } + }); + + const filteredCount = subscriptions.length - matchingSubscriptions.length; + if (filteredCount > 0) { + span.setAttribute("filteredOutCount", filteredCount); + } + span.setAttribute("matchingSubscriberCount", matchingSubscriptions.length); + + if (matchingSubscriptions.length === 0) { + return { + eventId: generateFriendlyId("evt"), + runs: [], + }; + } + + // 5. Fan out: trigger each matching subscribed task const eventId = generateFriendlyId("evt"); const runs: PublishEventResult["runs"] = []; - for (const subscription of subscriptions) { + for (const subscription of matchingSubscriptions) { try { // Derive per-consumer idempotency key if a global one was provided const consumerIdempotencyKey = options.idempotencyKey diff --git a/apps/webapp/test/engine/publishEvent.test.ts b/apps/webapp/test/engine/publishEvent.test.ts index 81f1bcffc9f..95b27f6c3cb 100644 --- a/apps/webapp/test/engine/publishEvent.test.ts +++ b/apps/webapp/test/engine/publishEvent.test.ts @@ -558,6 +558,194 @@ describe("PublishEventService", () => { } ); + containerTest( + "content-based filter skips non-matching subscribers", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, [ + "high-value-handler", + "all-orders-handler", + ]); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "order.placed", + version: "1.0", + projectId: env.projectId, + }, + }); + + // Subscription with filter: only orders with amount > 1000 + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "high-value-handler", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + filter: { amount: [{ $gt: 1000 }] }, + }, + }); + + // Subscription without filter: gets all events + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "all-orders-handler", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + // Low-value order — only all-orders-handler should get it + const result = await service.call("order.placed", env, { orderId: "o1", amount: 50 }); + + expect(result.runs).toHaveLength(1); + expect(result.runs[0].taskIdentifier).toBe("all-orders-handler"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "content-based filter allows matching subscribers", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, [ + "high-value-handler", + "all-orders-handler", + ]); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "order.placed.v2", + version: "1.0", + projectId: env.projectId, + }, + }); + + // Filter: amount > 1000 + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "high-value-handler", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + filter: { amount: [{ $gt: 1000 }] }, + }, + }); + + // No filter + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "all-orders-handler", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + // High-value order — both handlers should get it + const result = await service.call("order.placed.v2", env, { orderId: "o2", amount: 5000 }); + + expect(result.runs).toHaveLength(2); + const taskIds = result.runs.map((r) => r.taskIdentifier).sort(); + expect(taskIds).toEqual(["all-orders-handler", "high-value-handler"]); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "complex filter with multiple conditions", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, [ + "vip-gold-handler", + "catch-all", + ]); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "customer.action", + version: "1.0", + projectId: env.projectId, + }, + }); + + // Complex filter: status = "active" AND tier = "gold" + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "vip-gold-handler", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + filter: { status: ["active"], tier: ["gold"] }, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "catch-all", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + // Matches both conditions — both handlers triggered + const result1 = await service.call("customer.action", env, { + customerId: "c1", + status: "active", + tier: "gold", + }); + expect(result1.runs).toHaveLength(2); + + // Does not match (wrong tier) — only catch-all triggered + const result2 = await service.call("customer.action", env, { + customerId: "c2", + status: "active", + tier: "silver", + }); + expect(result2.runs).toHaveLength(1); + expect(result2.runs[0].taskIdentifier).toBe("catch-all"); + } finally { + await engine.quit(); + } + } + ); + containerTest( "idempotency key prevents duplicate fan-out", async ({ prisma, redisOptions }) => { From 846438cf9febcb15fed5c34c644f6d10c2be1f77 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 22:15:40 -0800 Subject: [PATCH 15/65] =?UTF-8?q?feat(events):=20phase=202.4=20=E2=80=94?= =?UTF-8?q?=20wildcard=20pattern=20subscriptions?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add events.match() helper in SDK for pattern subscriptions (*, #) - Add onEventPattern to TaskMetadata schema - Store pattern in EventSubscription during deploy - PublishEventService evaluates pattern subscriptions during fan-out - Pattern + filter combination supported - 4 new integration tests: *.matches, *.rejects, #.multi-level, pattern+filter Co-Authored-By: Claude Opus 4.6 --- .../services/createBackgroundWorker.server.ts | 2 + .../v3/services/events/publishEvent.server.ts | 52 +++- apps/webapp/test/engine/publishEvent.test.ts | 239 ++++++++++++++++++ packages/core/src/v3/schemas/schemas.ts | 2 + packages/core/src/v3/types/tasks.ts | 6 + packages/trigger-sdk/src/v3/events.ts | 54 ++++ packages/trigger-sdk/src/v3/shared.ts | 7 +- 7 files changed, 352 insertions(+), 10 deletions(-) diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index 4d5431dda75..9844fb0c190 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -419,11 +419,13 @@ async function syncWorkerEvents( workerId: worker.id, enabled: true, filter: (task.onEventFilter as any) ?? undefined, + pattern: task.onEventPattern ?? undefined, }, update: { workerId: worker.id, enabled: true, filter: (task.onEventFilter as any) ?? undefined, + pattern: task.onEventPattern ?? undefined, }, }); diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index ce749809b27..1bdf11818ab 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -1,4 +1,4 @@ -import { TriggerTaskRequestBody, eventFilterMatches } from "@trigger.dev/core/v3"; +import { TriggerTaskRequestBody, eventFilterMatches, matchesPattern } from "@trigger.dev/core/v3"; import type { EventFilter } from "@trigger.dev/core/v3"; import { PrismaClientOrTransaction } from "~/db.server"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; @@ -95,16 +95,52 @@ export class PublishEventService extends BaseService { } } - // 3. Find all active subscriptions for this event + environment - const subscriptions = await this._prisma.eventSubscription.findMany({ - where: { - eventDefinitionId: eventDefinition.id, - environmentId: environment.id, - enabled: true, - }, + // 3. Find all active subscriptions: exact match + pattern-based + const [exactSubscriptions, patternSubscriptions] = await Promise.all([ + // Exact subscriptions: tied to this specific EventDefinition + this._prisma.eventSubscription.findMany({ + where: { + eventDefinitionId: eventDefinition.id, + environmentId: environment.id, + enabled: true, + }, + }), + // Pattern subscriptions: have a wildcard pattern that might match this event slug + this._prisma.eventSubscription.findMany({ + where: { + projectId: environment.projectId, + environmentId: environment.id, + enabled: true, + pattern: { not: null }, + }, + }), + ]); + + // Filter pattern subscriptions: only keep those whose pattern matches the event slug + const matchingPatternSubs = patternSubscriptions.filter((sub) => { + if (!sub.pattern) return false; + try { + return matchesPattern(eventSlug, sub.pattern); + } catch (error) { + logger.warn("Failed to evaluate event pattern", { + subscriptionId: sub.id, + taskSlug: sub.taskSlug, + pattern: sub.pattern, + error: error instanceof Error ? error.message : String(error), + }); + return false; + } }); + // Deduplicate: if a subscription appears in both exact and pattern results, keep only once + const seenIds = new Set(exactSubscriptions.map((s) => s.id)); + const dedupedPatternSubs = matchingPatternSubs.filter((s) => !seenIds.has(s.id)); + + const subscriptions = [...exactSubscriptions, ...dedupedPatternSubs]; + span.setAttribute("subscriberCount", subscriptions.length); + span.setAttribute("exactSubscriberCount", exactSubscriptions.length); + span.setAttribute("patternSubscriberCount", dedupedPatternSubs.length); if (subscriptions.length === 0) { return { diff --git a/apps/webapp/test/engine/publishEvent.test.ts b/apps/webapp/test/engine/publishEvent.test.ts index 95b27f6c3cb..6237f9aa602 100644 --- a/apps/webapp/test/engine/publishEvent.test.ts +++ b/apps/webapp/test/engine/publishEvent.test.ts @@ -746,6 +746,245 @@ describe("PublishEventService", () => { } ); + containerTest( + "wildcard pattern order.* matches order.created", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, "order-watcher"); + + // Create the event definition for order.created + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "order.created", + version: "1.0", + projectId: env.projectId, + }, + }); + + // Create a pattern-based subscription: order.* + // It still needs an eventDefinitionId (we use a "placeholder" definition) + const patternEventDef = await prisma.eventDefinition.create({ + data: { + slug: "pattern:order.*", + version: "1.0", + projectId: env.projectId, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: patternEventDef.id } }, + taskSlug: "order-watcher", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + pattern: "order.*", + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + // order.created should match order.* + const result = await service.call("order.created", env, { orderId: "o1" }); + expect(result.runs).toHaveLength(1); + expect(result.runs[0].taskIdentifier).toBe("order-watcher"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "wildcard pattern order.* does NOT match order.status.changed", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, "order-watcher"); + + // Create the event definition for order.status.changed + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "order.status.changed", + version: "1.0", + projectId: env.projectId, + }, + }); + + // Pattern subscription: order.* + const patternEventDef = await prisma.eventDefinition.create({ + data: { + slug: "pattern:order.*", + version: "1.0", + projectId: env.projectId, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: patternEventDef.id } }, + taskSlug: "order-watcher", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + pattern: "order.*", + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + // order.status.changed should NOT match order.* (too many levels) + const result = await service.call("order.status.changed", env, { orderId: "o1" }); + expect(result.runs).toHaveLength(0); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "wildcard pattern order.# matches multi-level slugs", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, "order-all-handler"); + + // Create event definitions + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "order.status.changed", + version: "1.0", + projectId: env.projectId, + }, + }); + + // Pattern subscription: order.# + const patternEventDef = await prisma.eventDefinition.create({ + data: { + slug: "pattern:order.#", + version: "1.0", + projectId: env.projectId, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: patternEventDef.id } }, + taskSlug: "order-all-handler", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + pattern: "order.#", + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + // order.status.changed matches order.# (multi-level) + const result = await service.call("order.status.changed", env, { orderId: "o2" }); + expect(result.runs).toHaveLength(1); + expect(result.runs[0].taskIdentifier).toBe("order-all-handler"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "pattern + filter combination works", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, [ + "high-value-order-watcher", + "all-order-watcher", + ]); + + // Create event definition + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "order.created", + version: "1.0", + projectId: env.projectId, + }, + }); + + // Pattern subscription with filter: order.* + amount > 1000 + const patternEventDef = await prisma.eventDefinition.create({ + data: { + slug: "pattern:order.*", + version: "1.0", + projectId: env.projectId, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: patternEventDef.id } }, + taskSlug: "high-value-order-watcher", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + pattern: "order.*", + filter: { amount: [{ $gt: 1000 }] }, + }, + }); + + // Pattern subscription without filter: order.* + const patternEventDef2 = await prisma.eventDefinition.create({ + data: { + slug: "pattern:order.*:all", + version: "1.0", + projectId: env.projectId, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: patternEventDef2.id } }, + taskSlug: "all-order-watcher", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + pattern: "order.*", + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + // Low value order — only all-order-watcher (pattern matches but filter doesn't) + const result1 = await service.call("order.created", env, { orderId: "o1", amount: 50 }); + expect(result1.runs).toHaveLength(1); + expect(result1.runs[0].taskIdentifier).toBe("all-order-watcher"); + + // High value order — both watchers + const result2 = await service.call("order.created", env, { orderId: "o2", amount: 5000 }); + expect(result2.runs).toHaveLength(2); + const taskIds = result2.runs.map((r) => r.taskIdentifier).sort(); + expect(taskIds).toEqual(["all-order-watcher", "high-value-order-watcher"]); + } finally { + await engine.quit(); + } + } + ); + containerTest( "idempotency key prevents duplicate fan-out", async ({ prisma, redisOptions }) => { diff --git a/packages/core/src/v3/schemas/schemas.ts b/packages/core/src/v3/schemas/schemas.ts index c8b4be9c24e..ebf9fa4ba2d 100644 --- a/packages/core/src/v3/schemas/schemas.ts +++ b/packages/core/src/v3/schemas/schemas.ts @@ -207,6 +207,8 @@ const taskMetadata = { onEvent: z.string().optional(), /** Event filter for content-based routing (set when task uses `on: someEvent` with `filter`) */ onEventFilter: z.unknown().optional(), + /** Wildcard pattern for pattern-based subscriptions (set when task uses `on: events.match("order.*")`) */ + onEventPattern: z.string().optional(), }; export const TaskMetadata = z.object(taskMetadata); diff --git a/packages/core/src/v3/types/tasks.ts b/packages/core/src/v3/types/tasks.ts index c04b8a850c5..2cc26ad1de6 100644 --- a/packages/core/src/v3/types/tasks.ts +++ b/packages/core/src/v3/types/tasks.ts @@ -41,6 +41,12 @@ export interface EventSource { readonly version: string; } +/** A pattern-based event source (e.g., "order.*") for wildcard subscriptions */ +export interface EventPatternSource extends EventSource { + /** The wildcard pattern (e.g., "order.*", "order.#") */ + readonly pattern: string; +} + export class SubtaskUnwrapError extends Error { public readonly taskId: string; public readonly runId: string; diff --git a/packages/trigger-sdk/src/v3/events.ts b/packages/trigger-sdk/src/v3/events.ts index b5be0105f18..a37b482afd1 100644 --- a/packages/trigger-sdk/src/v3/events.ts +++ b/packages/trigger-sdk/src/v3/events.ts @@ -199,3 +199,57 @@ export function isEventDefinition(value: unknown): value is AnyEventDefinition { (value as any)[Symbol.for("trigger.dev/event")] === true ); } + +// ---- Pattern-based subscriptions ---- + +/** A pattern-based event matcher for wildcard subscriptions */ +export interface EventPatternMatcher { + /** The event pattern used as the subscription identifier */ + readonly id: string; + /** Version (always "1.0" for patterns) */ + readonly version: string; + /** The wildcard pattern */ + readonly pattern: string; +} + +/** + * Create a pattern-based event matcher for wildcard subscriptions. + * + * Patterns use dot-separated segments with two wildcards: + * - `*` matches exactly one segment (e.g., `order.*` matches `order.created`) + * - `#` matches zero or more segments (e.g., `order.#` matches `order.status.changed`) + * + * @example + * ```ts + * import { events, task } from "@trigger.dev/sdk"; + * + * // Subscribe to all order events + * export const orderHandler = task({ + * id: "order-handler", + * on: events.match("order.*"), + * run: async (payload) => { ... } + * }); + * ``` + */ +export function matchEvents(pattern: string): EventPatternMatcher { + return { + id: `pattern:${pattern}`, + version: "1.0", + pattern, + }; +} + +/** Namespace for event utilities */ +export const events = { + match: matchEvents, +}; + +/** Check if an event source is a pattern matcher */ +export function isEventPatternMatcher(value: unknown): value is EventPatternMatcher { + return ( + typeof value === "object" && + value !== null && + "pattern" in value && + typeof (value as any).pattern === "string" + ); +} diff --git a/packages/trigger-sdk/src/v3/shared.ts b/packages/trigger-sdk/src/v3/shared.ts index 13977ad1a47..701bba8328c 100644 --- a/packages/trigger-sdk/src/v3/shared.ts +++ b/packages/trigger-sdk/src/v3/shared.ts @@ -244,9 +244,11 @@ export function createTask< registerTaskLifecycleHooks(params.id, params); - // Extract onEvent and optional filter from the params if this task subscribes to an event - const onEvent = "on" in params && params.on ? (params.on as EventSource).id : undefined; + // Extract onEvent, optional filter, and optional pattern from the params if this task subscribes to an event + const eventSource = "on" in params && params.on ? params.on as EventSource & { pattern?: string } : undefined; + const onEvent = eventSource?.id; const onEventFilter = "filter" in params && params.filter ? params.filter : undefined; + const onEventPattern = eventSource && "pattern" in eventSource ? eventSource.pattern : undefined; resourceCatalog.registerTaskMetadata({ id: params.id, @@ -258,6 +260,7 @@ export function createTask< payloadSchema: params.jsonSchema, onEvent, onEventFilter, + onEventPattern, fns: { run: params.run, }, From c2ec92e30cff9bbd6b248f0071f5a1493ec93c22 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 22:18:17 -0800 Subject: [PATCH 16/65] =?UTF-8?q?feat(events):=20phase=202=20=E2=80=94=20c?= =?UTF-8?q?hangeset=20for=20smart=20routing?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .changeset/event-smart-routing.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .changeset/event-smart-routing.md diff --git a/.changeset/event-smart-routing.md b/.changeset/event-smart-routing.md new file mode 100644 index 00000000000..366d4ac06a5 --- /dev/null +++ b/.changeset/event-smart-routing.md @@ -0,0 +1,7 @@ +--- +"@trigger.dev/core": patch +"@trigger.dev/sdk": patch +"trigger.dev": patch +--- + +Add smart routing for events: content-based filters and wildcard pattern subscriptions From c63c1e7815f6fbe1fd3eb0dece4419b08aee1581 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 22:35:27 -0800 Subject: [PATCH 17/65] =?UTF-8?q?feat(events):=20phase=203.1=20=E2=80=94?= =?UTF-8?q?=20event=5Flog=5Fv1=20ClickHouse=20table=20+=20insert=20functio?= =?UTF-8?q?n?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .../clickhouse/schema/021_event_log_v1.sql | 30 ++++++++ .../schema/022_event_counts_mv_v1.sql | 31 +++++++++ internal-packages/clickhouse/src/eventLog.ts | 69 +++++++++++++++++++ internal-packages/clickhouse/src/index.ts | 9 +++ 4 files changed, 139 insertions(+) create mode 100644 internal-packages/clickhouse/schema/021_event_log_v1.sql create mode 100644 internal-packages/clickhouse/schema/022_event_counts_mv_v1.sql create mode 100644 internal-packages/clickhouse/src/eventLog.ts diff --git a/internal-packages/clickhouse/schema/021_event_log_v1.sql b/internal-packages/clickhouse/schema/021_event_log_v1.sql new file mode 100644 index 00000000000..de0420432c6 --- /dev/null +++ b/internal-packages/clickhouse/schema/021_event_log_v1.sql @@ -0,0 +1,30 @@ +-- +goose Up +CREATE TABLE IF NOT EXISTS trigger_dev.event_log_v1 +( + event_id String CODEC(ZSTD(1)), + event_type String CODEC(ZSTD(1)), + payload String CODEC(ZSTD(1)), + payload_type LowCardinality(String) DEFAULT 'application/json', + published_at DateTime64(3) CODEC(Delta(8), ZSTD(1)), + environment_id String CODEC(ZSTD(1)), + project_id String CODEC(ZSTD(1)), + organization_id String CODEC(ZSTD(1)), + publisher_run_id String DEFAULT '' CODEC(ZSTD(1)), + idempotency_key String DEFAULT '' CODEC(ZSTD(1)), + tags Array(String) DEFAULT [], + metadata String DEFAULT '{}' CODEC(ZSTD(1)), + fan_out_count UInt32 DEFAULT 0, + inserted_at DateTime64(3) DEFAULT now64(3), + + INDEX idx_event_id event_id TYPE bloom_filter(0.001) GRANULARITY 1, + INDEX idx_publisher_run_id publisher_run_id TYPE bloom_filter(0.001) GRANULARITY 1, + INDEX idx_idempotency_key idempotency_key TYPE bloom_filter(0.001) GRANULARITY 1 +) +ENGINE = MergeTree +PARTITION BY toYYYYMM(published_at) +ORDER BY (project_id, environment_id, event_type, published_at, event_id) +TTL toDateTime(published_at) + INTERVAL 90 DAY +SETTINGS ttl_only_drop_parts = 1; + +-- +goose Down +DROP TABLE IF EXISTS trigger_dev.event_log_v1; diff --git a/internal-packages/clickhouse/schema/022_event_counts_mv_v1.sql b/internal-packages/clickhouse/schema/022_event_counts_mv_v1.sql new file mode 100644 index 00000000000..92783f63b4e --- /dev/null +++ b/internal-packages/clickhouse/schema/022_event_counts_mv_v1.sql @@ -0,0 +1,31 @@ +-- +goose Up +CREATE TABLE IF NOT EXISTS trigger_dev.event_counts_v1 +( + project_id String, + environment_id String, + event_type String, + bucket_start DateTime, + event_count UInt64, + total_fan_out UInt64 +) +ENGINE = SummingMergeTree() +PARTITION BY toYYYYMM(bucket_start) +ORDER BY (project_id, environment_id, event_type, bucket_start) +TTL bucket_start + INTERVAL 90 DAY +SETTINGS ttl_only_drop_parts = 1; + +CREATE MATERIALIZED VIEW IF NOT EXISTS trigger_dev.event_counts_mv_v1 +TO trigger_dev.event_counts_v1 AS +SELECT + project_id, + environment_id, + event_type, + toStartOfMinute(published_at) AS bucket_start, + count() AS event_count, + sum(fan_out_count) AS total_fan_out +FROM trigger_dev.event_log_v1 +GROUP BY project_id, environment_id, event_type, bucket_start; + +-- +goose Down +DROP VIEW IF EXISTS trigger_dev.event_counts_mv_v1; +DROP TABLE IF EXISTS trigger_dev.event_counts_v1; diff --git a/internal-packages/clickhouse/src/eventLog.ts b/internal-packages/clickhouse/src/eventLog.ts new file mode 100644 index 00000000000..1646403711b --- /dev/null +++ b/internal-packages/clickhouse/src/eventLog.ts @@ -0,0 +1,69 @@ +import { z } from "zod"; +import { ClickhouseReader, ClickhouseWriter } from "./client/types.js"; + +export const EventLogV1Input = z.object({ + event_id: z.string(), + event_type: z.string(), + payload: z.string(), + payload_type: z.string().optional(), + published_at: z.string(), + environment_id: z.string(), + project_id: z.string(), + organization_id: z.string(), + publisher_run_id: z.string().optional(), + idempotency_key: z.string().optional(), + tags: z.array(z.string()).optional(), + metadata: z.string().optional(), + fan_out_count: z.number().int().optional(), +}); + +export type EventLogV1Input = z.input; + +export const EventLogV1Output = z.object({ + event_id: z.string(), + event_type: z.string(), + payload: z.string(), + payload_type: z.string(), + published_at: z.string(), + environment_id: z.string(), + project_id: z.string(), + organization_id: z.string(), + publisher_run_id: z.string(), + idempotency_key: z.string(), + tags: z.array(z.string()), + metadata: z.string(), + fan_out_count: z.number().int(), + inserted_at: z.string(), +}); + +export type EventLogV1Output = z.output; + +export function insertEventLog(ch: ClickhouseWriter) { + return ch.insertUnsafe({ + name: "insertEventLog", + table: "trigger_dev.event_log_v1", + }); +} + +export function getEventLogQueryBuilder(ch: ClickhouseReader) { + return ch.queryBuilder({ + name: "getEventLog", + baseQuery: `SELECT + event_id, + event_type, + payload, + payload_type, + published_at, + environment_id, + project_id, + organization_id, + publisher_run_id, + idempotency_key, + tags, + metadata, + fan_out_count, + inserted_at + FROM trigger_dev.event_log_v1`, + schema: EventLogV1Output, + }); +} diff --git a/internal-packages/clickhouse/src/index.ts b/internal-packages/clickhouse/src/index.ts index b66ce8e3ed6..b5b183f2eca 100644 --- a/internal-packages/clickhouse/src/index.ts +++ b/internal-packages/clickhouse/src/index.ts @@ -27,6 +27,7 @@ import { getLogsSearchListQueryBuilder, } from "./taskEvents.js"; import { insertMetrics } from "./metrics.js"; +import { insertEventLog, getEventLogQueryBuilder } from "./eventLog.js"; import { Logger, type LogLevel } from "@trigger.dev/core/logger"; import type { Agent as HttpAgent } from "http"; import type { Agent as HttpsAgent } from "https"; @@ -34,6 +35,7 @@ import type { Agent as HttpsAgent } from "https"; export type * from "./taskRuns.js"; export type * from "./taskEvents.js"; export type * from "./metrics.js"; +export type * from "./eventLog.js"; export type * from "./client/queryBuilder.js"; // Re-export column constants, indices, and type-safe accessors @@ -229,4 +231,11 @@ export class ClickHouse { logsListQueryBuilder: getLogsSearchListQueryBuilder(this.reader), }; } + + get eventLog() { + return { + insert: insertEventLog(this.writer), + queryBuilder: getEventLogQueryBuilder(this.reader), + }; + } } From 8dfb002eed22d5ee00badaf1a57eed5be054de37 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 22:38:40 -0800 Subject: [PATCH 18/65] =?UTF-8?q?feat(events):=20phase=203.2=20=E2=80=94?= =?UTF-8?q?=20write=20to=20ClickHouse=20event=20log=20on=20each=20publish?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .../api.v1.events.$eventId.batchPublish.ts | 3 +- .../routes/api.v1.events.$eventId.publish.ts | 3 +- .../services/events/eventLogWriter.server.ts | 45 ++++++++++++++++ .../v3/services/events/publishEvent.server.ts | 51 ++++++++++++++++++- 4 files changed, 99 insertions(+), 3 deletions(-) create mode 100644 apps/webapp/app/v3/services/events/eventLogWriter.server.ts diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts b/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts index d5b42590112..5644d30a853 100644 --- a/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts +++ b/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts @@ -4,6 +4,7 @@ import { z } from "zod"; import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; import { PublishEventService, PublishEventResult } from "~/v3/services/events/publishEvent.server"; +import { writeEventLog } from "~/v3/services/events/eventLogWriter.server"; const ParamsSchema = z.object({ eventId: z.string(), @@ -21,7 +22,7 @@ const { action, loader } = createActionApiRoute( }, }, async ({ body, params, authentication }) => { - const service = new PublishEventService(); + const service = new PublishEventService(undefined, undefined, writeEventLog); try { const results: PublishEventResult[] = []; diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts b/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts index eb5e1d7a08c..c6a085eca8e 100644 --- a/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts +++ b/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts @@ -4,6 +4,7 @@ import { z } from "zod"; import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; import { PublishEventService } from "~/v3/services/events/publishEvent.server"; +import { writeEventLog } from "~/v3/services/events/eventLogWriter.server"; const ParamsSchema = z.object({ eventId: z.string(), @@ -21,7 +22,7 @@ const { action, loader } = createActionApiRoute( }, }, async ({ body, params, authentication }) => { - const service = new PublishEventService(); + const service = new PublishEventService(undefined, undefined, writeEventLog); try { const result = await service.call( diff --git a/apps/webapp/app/v3/services/events/eventLogWriter.server.ts b/apps/webapp/app/v3/services/events/eventLogWriter.server.ts new file mode 100644 index 00000000000..b118384b791 --- /dev/null +++ b/apps/webapp/app/v3/services/events/eventLogWriter.server.ts @@ -0,0 +1,45 @@ +import type { EventLogV1Input } from "@internal/clickhouse"; +import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { logger } from "~/services/logger.server"; +import type { EventLogEntry } from "./publishEvent.server"; + +const insertFn = clickhouseClient.eventLog.insert; + +export function writeEventLog(entry: EventLogEntry): void { + const row: EventLogV1Input = { + event_id: entry.eventId, + event_type: entry.eventType, + payload: typeof entry.payload === "string" ? entry.payload : JSON.stringify(entry.payload), + published_at: entry.publishedAt.toISOString(), + environment_id: entry.environmentId, + project_id: entry.projectId, + organization_id: entry.organizationId, + idempotency_key: entry.idempotencyKey, + tags: entry.tags, + metadata: + entry.metadata !== undefined && entry.metadata !== null + ? JSON.stringify(entry.metadata) + : undefined, + fan_out_count: entry.fanOutCount, + }; + + // Fire-and-forget: don't await, don't block the publish response + insertFn(row).then( + ([error]) => { + if (error) { + logger.warn("Failed to insert event into ClickHouse event log", { + eventId: entry.eventId, + eventType: entry.eventType, + error: error.message, + }); + } + }, + (err) => { + logger.warn("Failed to insert event into ClickHouse event log", { + eventId: entry.eventId, + eventType: entry.eventType, + error: err instanceof Error ? err.message : String(err), + }); + } + ); +} diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index 1bdf11818ab..6c4d096eba7 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -36,12 +36,31 @@ export type TriggerFn = ( options: TriggerTaskServiceOptions ) => Promise; +/** Callback to persist a published event to an external log (e.g. ClickHouse) */ +export type EventLogWriter = (entry: EventLogEntry) => void; + +export type EventLogEntry = { + eventId: string; + eventType: string; + payload: unknown; + publishedAt: Date; + environmentId: string; + projectId: string; + organizationId: string; + idempotencyKey?: string; + tags?: string[]; + metadata?: unknown; + fanOutCount: number; +}; + export class PublishEventService extends BaseService { private readonly _triggerFn: TriggerFn; + private readonly _eventLogWriter?: EventLogWriter; constructor( prisma?: PrismaClientOrTransaction, - triggerFn?: TriggerFn + triggerFn?: TriggerFn, + eventLogWriter?: EventLogWriter ) { super(prisma); this._triggerFn = @@ -50,6 +69,7 @@ export class PublishEventService extends BaseService { const svc = new TriggerTaskService({ prisma: this._prisma }); return svc.call(taskId, environment, body, options); }); + this._eventLogWriter = eventLogWriter; } public async call( @@ -236,6 +256,35 @@ export class PublishEventService extends BaseService { } } + // 6. Persist to event log (async, non-blocking) + if (this._eventLogWriter) { + try { + this._eventLogWriter({ + eventId, + eventType: eventSlug, + payload, + publishedAt: new Date(), + environmentId: environment.id, + projectId: environment.projectId, + organizationId: environment.organizationId, + idempotencyKey: options.idempotencyKey, + tags: options.tags + ? Array.isArray(options.tags) + ? options.tags + : [options.tags] + : undefined, + metadata: options.metadata, + fanOutCount: runs.length, + }); + } catch (error) { + logger.warn("Failed to write event to log", { + eventId, + eventSlug, + error: error instanceof Error ? error.message : String(error), + }); + } + } + return { eventId, runs }; }); } From 02369b1287582c03d052da4928a13c0f6f151958 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 22:40:51 -0800 Subject: [PATCH 19/65] =?UTF-8?q?feat(events):=20phase=203.3=20=E2=80=94?= =?UTF-8?q?=20event=20history=20API=20endpoint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .../routes/api.v1.events.$eventId.history.ts | 97 +++++++++++++++++++ packages/core/src/v3/apiClient/index.ts | 55 +++++++++++ packages/core/src/v3/schemas/api.ts | 50 ++++++++++ 3 files changed, 202 insertions(+) create mode 100644 apps/webapp/app/routes/api.v1.events.$eventId.history.ts diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.history.ts b/apps/webapp/app/routes/api.v1.events.$eventId.history.ts new file mode 100644 index 00000000000..bf8ecbcc1b3 --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.$eventId.history.ts @@ -0,0 +1,97 @@ +import { json } from "@remix-run/server-runtime"; +import { z } from "zod"; +import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { clickhouseClient } from "~/services/clickhouseInstance.server"; + +const ParamsSchema = z.object({ + eventId: z.string(), +}); + +export const loader = createLoaderApiRoute( + { + params: ParamsSchema, + corsStrategy: "all", + authorization: { + action: "read", + resource: (params) => ({ tasks: params.eventId }), + superScopes: ["read:runs", "read:all", "admin"], + }, + findResource: async () => 1 as const, + }, + async ({ params, authentication, request }) => { + const url = new URL(request.url); + const from = url.searchParams.get("from"); + const to = url.searchParams.get("to"); + const limit = Math.min(parseInt(url.searchParams.get("limit") ?? "50", 10), 200); + const cursor = url.searchParams.get("cursor"); + const publisherRunId = url.searchParams.get("publisherRunId"); + + const queryBuilder = clickhouseClient.eventLog.queryBuilder(); + + queryBuilder + .where("project_id = {projectId: String}", { + projectId: authentication.environment.projectId, + }) + .where("environment_id = {environmentId: String}", { + environmentId: authentication.environment.id, + }) + .where("event_type = {eventType: String}", { + eventType: params.eventId, + }); + + if (from) { + queryBuilder.where("published_at >= {from: DateTime64(3)}", { from }); + } + + if (to) { + queryBuilder.where("published_at <= {to: DateTime64(3)}", { to }); + } + + if (publisherRunId) { + queryBuilder.where("publisher_run_id = {publisherRunId: String}", { publisherRunId }); + } + + if (cursor) { + queryBuilder.where("published_at < {cursor: DateTime64(3)}", { cursor }); + } + + queryBuilder.orderBy("published_at DESC, event_id DESC").limit(limit + 1); + + const [queryError, result] = await queryBuilder.execute(); + + if (queryError) { + return json({ error: "Failed to query event history" }, { status: 500 }); + } + + const hasMore = result.length > limit; + const data = result.slice(0, limit); + + const lastItem = data[data.length - 1]; + const nextCursor = hasMore && lastItem ? lastItem.published_at : null; + + return json({ + data: data.map((row) => ({ + eventId: row.event_id, + eventType: row.event_type, + payload: safeParseJson(row.payload), + publishedAt: row.published_at, + publisherRunId: row.publisher_run_id || undefined, + idempotencyKey: row.idempotency_key || undefined, + tags: row.tags.length > 0 ? row.tags : undefined, + fanOutCount: row.fan_out_count, + })), + pagination: { + cursor: nextCursor, + hasMore, + }, + }); + } +); + +function safeParseJson(str: string): unknown { + try { + return JSON.parse(str); + } catch { + return str; + } +} diff --git a/packages/core/src/v3/apiClient/index.ts b/packages/core/src/v3/apiClient/index.ts index 9375276e761..4ee1d2ded73 100644 --- a/packages/core/src/v3/apiClient/index.ts +++ b/packages/core/src/v3/apiClient/index.ts @@ -11,9 +11,12 @@ import { BatchPublishEventRequestBody, BatchPublishEventResponseBody, BatchTaskRunExecutionResult, + GetEventHistoryResponseBody, GetEventResponseBody, GetEventSchemaResponseBody, ListEventsResponseBody, + ReplayEventsRequestBody, + ReplayEventsResponseBody, PublishEventRequestBody, PublishEventResponseBody, BatchTriggerTaskV3RequestBody, @@ -1526,6 +1529,58 @@ export class ApiClient { ); } + getEventHistory( + eventId: string, + params?: { + from?: string; + to?: string; + limit?: number; + cursor?: string; + publisherRunId?: string; + }, + requestOptions?: ZodFetchOptions + ) { + const encodedEventId = encodeURIComponent(eventId); + const searchParams = new URLSearchParams(); + if (params?.from) searchParams.set("from", params.from); + if (params?.to) searchParams.set("to", params.to); + if (params?.limit) searchParams.set("limit", String(params.limit)); + if (params?.cursor) searchParams.set("cursor", params.cursor); + if (params?.publisherRunId) searchParams.set("publisherRunId", params.publisherRunId); + + const qs = searchParams.toString(); + const url = `${this.baseUrl}/api/v1/events/${encodedEventId}/history${qs ? `?${qs}` : ""}`; + + return zodfetch( + GetEventHistoryResponseBody, + url, + { + method: "GET", + headers: this.#getHeaders(false), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + replayEvents( + eventId: string, + body: z.input, + requestOptions?: ZodFetchOptions + ) { + const encodedEventId = encodeURIComponent(eventId); + + return zodfetch( + ReplayEventsResponseBody, + `${this.baseUrl}/api/v1/events/${encodedEventId}/replay`, + { + method: "POST", + headers: this.#getHeaders(false), + body: JSON.stringify(body), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + #getHeaders(spanParentAsLink: boolean, additionalHeaders?: Record) { const headers: Record = { "Content-Type": "application/json", diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 3087df2c777..9cd1dc433eb 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -1703,3 +1703,53 @@ export const GetEventSchemaResponseBody = z.object({ }); export type GetEventSchemaResponseBody = z.infer; + +export const EventHistoryItem = z.object({ + eventId: z.string(), + eventType: z.string(), + payload: z.unknown(), + publishedAt: z.string(), + publisherRunId: z.string().optional(), + idempotencyKey: z.string().optional(), + tags: z.array(z.string()).optional(), + fanOutCount: z.number().int(), +}); + +export type EventHistoryItem = z.infer; + +export const GetEventHistoryResponseBody = z.object({ + data: z.array(EventHistoryItem), + pagination: z.object({ + cursor: z.string().nullable(), + hasMore: z.boolean(), + }), +}); + +export type GetEventHistoryResponseBody = z.infer; + +export const ReplayEventsRequestBody = z.object({ + from: z.coerce.date(), + to: z.coerce.date(), + filter: z.unknown().optional(), + tasks: z.array(z.string()).optional(), + dryRun: z.boolean().optional(), +}); + +export type ReplayEventsRequestBody = z.infer; + +export const ReplayEventsResponseBody = z.object({ + replayedCount: z.number().int(), + skippedCount: z.number().int(), + dryRun: z.boolean(), + runs: z + .array( + z.object({ + taskIdentifier: z.string(), + runId: z.string(), + sourceEventId: z.string(), + }) + ) + .optional(), +}); + +export type ReplayEventsResponseBody = z.infer; From 3d9863512e06e394ecb873c510ad81eb84a79fcb Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 22:43:06 -0800 Subject: [PATCH 20/65] =?UTF-8?q?feat(events):=20phase=203.4=20=E2=80=94?= =?UTF-8?q?=20event=20replay=20service=20+=20API=20endpoint?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .../routes/api.v1.events.$eventId.replay.ts | 58 ++++++ .../v3/services/events/replayEvents.server.ts | 177 ++++++++++++++++++ 2 files changed, 235 insertions(+) create mode 100644 apps/webapp/app/routes/api.v1.events.$eventId.replay.ts create mode 100644 apps/webapp/app/v3/services/events/replayEvents.server.ts diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.replay.ts b/apps/webapp/app/routes/api.v1.events.$eventId.replay.ts new file mode 100644 index 00000000000..2419b009d6e --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.$eventId.replay.ts @@ -0,0 +1,58 @@ +import { json } from "@remix-run/server-runtime"; +import { ReplayEventsRequestBody } from "@trigger.dev/core/v3"; +import type { EventFilter } from "@trigger.dev/core/v3"; +import { z } from "zod"; +import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { clickhouseClient } from "~/services/clickhouseInstance.server"; +import { ServiceValidationError } from "~/v3/services/baseService.server"; +import { ReplayEventsService } from "~/v3/services/events/replayEvents.server"; +import { writeEventLog } from "~/v3/services/events/eventLogWriter.server"; + +const ParamsSchema = z.object({ + eventId: z.string(), +}); + +const { action, loader } = createActionApiRoute( + { + params: ParamsSchema, + body: ReplayEventsRequestBody, + corsStrategy: "all", + authorization: { + action: "trigger", + resource: (params) => ({ tasks: params.eventId }), + superScopes: ["write:tasks", "admin"], + }, + }, + async ({ body, params, authentication }) => { + const service = new ReplayEventsService( + clickhouseClient, + undefined, + undefined, + writeEventLog + ); + + try { + const result = await service.call({ + eventSlug: params.eventId, + environment: authentication.environment, + from: body.from, + to: body.to, + filter: body.filter as EventFilter | undefined, + tasks: body.tasks, + dryRun: body.dryRun, + }); + + return json(result, { status: 200 }); + } catch (error) { + if (error instanceof ServiceValidationError) { + return json({ error: error.message }, { status: error.status ?? 422 }); + } else if (error instanceof Error) { + return json({ error: error.message }, { status: 500 }); + } + + return json({ error: "Something went wrong" }, { status: 500 }); + } + } +); + +export { action, loader }; diff --git a/apps/webapp/app/v3/services/events/replayEvents.server.ts b/apps/webapp/app/v3/services/events/replayEvents.server.ts new file mode 100644 index 00000000000..1f6f978baf1 --- /dev/null +++ b/apps/webapp/app/v3/services/events/replayEvents.server.ts @@ -0,0 +1,177 @@ +import type { EventFilter } from "@trigger.dev/core/v3"; +import { eventFilterMatches } from "@trigger.dev/core/v3"; +import type { ClickHouse } from "@internal/clickhouse"; +import { PrismaClientOrTransaction } from "~/db.server"; +import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { logger } from "~/services/logger.server"; +import { BaseService, ServiceValidationError } from "../baseService.server"; +import { PublishEventService, type TriggerFn, type EventLogWriter } from "./publishEvent.server"; + +const MAX_REPLAY_EVENTS = 10_000; + +export type ReplayEventsParams = { + eventSlug: string; + environment: AuthenticatedEnvironment; + from: Date; + to: Date; + filter?: EventFilter; + tasks?: string[]; + dryRun?: boolean; +}; + +export type ReplayResult = { + replayedCount: number; + skippedCount: number; + dryRun: boolean; + runs?: Array<{ + taskIdentifier: string; + runId: string; + sourceEventId: string; + }>; +}; + +export class ReplayEventsService extends BaseService { + private readonly _clickhouse: ClickHouse; + private readonly _triggerFn?: TriggerFn; + private readonly _eventLogWriter?: EventLogWriter; + + constructor( + clickhouse: ClickHouse, + prisma?: PrismaClientOrTransaction, + triggerFn?: TriggerFn, + eventLogWriter?: EventLogWriter + ) { + super(prisma); + this._clickhouse = clickhouse; + this._triggerFn = triggerFn; + this._eventLogWriter = eventLogWriter; + } + + public async call(params: ReplayEventsParams): Promise { + return this.traceWithEnv("replayEvents", params.environment, async (span) => { + span.setAttribute("eventSlug", params.eventSlug); + span.setAttribute("dryRun", params.dryRun ?? false); + + // 1. Query ClickHouse for events in the date range + const queryBuilder = this._clickhouse.eventLog.queryBuilder(); + + queryBuilder + .where("project_id = {projectId: String}", { + projectId: params.environment.projectId, + }) + .where("environment_id = {environmentId: String}", { + environmentId: params.environment.id, + }) + .where("event_type = {eventType: String}", { + eventType: params.eventSlug, + }) + .where("published_at >= {from: DateTime64(3)}", { + from: params.from.toISOString(), + }) + .where("published_at <= {to: DateTime64(3)}", { + to: params.to.toISOString(), + }) + .orderBy("published_at ASC, event_id ASC") + .limit(MAX_REPLAY_EVENTS); + + const [queryError, events] = await queryBuilder.execute(); + + if (queryError) { + logger.error("Failed to query events for replay", { + eventSlug: params.eventSlug, + error: queryError.message, + }); + throw new ServiceValidationError("Failed to query events for replay", 500); + } + + span.setAttribute("totalEventsInRange", events.length); + + if (events.length === 0) { + return { replayedCount: 0, skippedCount: 0, dryRun: params.dryRun ?? false }; + } + + // 2. Apply optional filter to narrow down events + let filteredEvents = events; + if (params.filter) { + filteredEvents = events.filter((event) => { + try { + const payload = JSON.parse(event.payload); + return eventFilterMatches(payload, params.filter!); + } catch { + return false; + } + }); + } + + span.setAttribute("filteredEventsCount", filteredEvents.length); + + const skippedCount = events.length - filteredEvents.length; + + if (params.dryRun) { + return { + replayedCount: filteredEvents.length, + skippedCount, + dryRun: true, + }; + } + + // 3. Re-publish each event with replay idempotency keys + const publishService = new PublishEventService( + this._prisma, + this._triggerFn, + this._eventLogWriter + ); + + const runs: NonNullable = []; + let replayedCount = 0; + + for (const event of filteredEvents) { + try { + const payload = JSON.parse(event.payload); + const replayIdempotencyKey = `replay:${event.event_id}`; + + const result = await publishService.call( + params.eventSlug, + params.environment, + payload, + { + idempotencyKey: replayIdempotencyKey, + tags: event.tags.length > 0 ? event.tags : undefined, + } + ); + + // Filter to only the requested tasks (if specified) + const matchingRuns = params.tasks + ? result.runs.filter((r) => params.tasks!.includes(r.taskIdentifier)) + : result.runs; + + for (const run of matchingRuns) { + runs.push({ + taskIdentifier: run.taskIdentifier, + runId: run.runId, + sourceEventId: event.event_id, + }); + } + + replayedCount++; + } catch (error) { + logger.warn("Failed to replay event", { + eventId: event.event_id, + eventSlug: params.eventSlug, + error: error instanceof Error ? error.message : String(error), + }); + } + } + + span.setAttribute("replayedCount", replayedCount); + span.setAttribute("totalRunsCreated", runs.length); + + return { + replayedCount, + skippedCount, + dryRun: false, + runs, + }; + }); + } +} From d9a5d087a3fab5d426539138f6ffd6d5af138500 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 22:45:43 -0800 Subject: [PATCH 21/65] =?UTF-8?q?feat(events):=20phase=203=20=E2=80=94=20c?= =?UTF-8?q?hangeset=20+=20memory=20updates=20for=20event=20persistence?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .changeset/event-persistence-replay.md | 6 + .../memory/MEMORY.md | 28 + .../memory/pubsub-progress.md | 146 +++++ .../memory/pubsub-roadmap.md | 610 ++++++++++++++++++ .../memory/repo-conventions.md | 181 ++++++ 5 files changed, 971 insertions(+) create mode 100644 .changeset/event-persistence-replay.md create mode 100644 .claude/projects/-Users-terac-repos-trigger-dev/memory/MEMORY.md create mode 100644 .claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md create mode 100644 .claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md create mode 100644 .claude/projects/-Users-terac-repos-trigger-dev/memory/repo-conventions.md diff --git a/.changeset/event-persistence-replay.md b/.changeset/event-persistence-replay.md new file mode 100644 index 00000000000..80c455dcce4 --- /dev/null +++ b/.changeset/event-persistence-replay.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/core": patch +"trigger.dev": patch +--- + +Add event persistence in ClickHouse and replay API for pub/sub events diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/MEMORY.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/MEMORY.md new file mode 100644 index 00000000000..d15fa701f49 --- /dev/null +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/MEMORY.md @@ -0,0 +1,28 @@ +# Memory Index + +## Active Projects + +- **Pub/Sub Event System**: Full roadmap for implementing publish/subscribe in Trigger.dev + - Roadmap: [pubsub-roadmap.md](pubsub-roadmap.md) (in English) + - Repo conventions: [repo-conventions.md](repo-conventions.md) + - Progress: [pubsub-progress.md](pubsub-progress.md) + - Status: Phase 0 + Phase 1 + Phase 2 + Phase 3 complete + - Current phase: Phase 3 done → next is Phase 4 (Dead Letter Queue) + - Branch: `feat/pubsub-event-system` + +## Repo Quick Reference + +- Build: `pnpm run build --filter `, Test: `pnpm run test --filter ` +- Build order: core → sdk → cli → run-engine → webapp +- Services extend `WithRunEngine`, use `traceWithEnv()`, throw `ServiceValidationError` +- API routes use `createActionApiRoute()` builder +- Tests use testcontainers (never mocks), vitest +- Import `@trigger.dev/core` subpaths only, never root +- Migrations: clean extraneous lines, indexes need CONCURRENTLY in separate files +- Changesets required for `packages/*` changes (default: patch) + +## User Preferences + +- Documentation and roadmap files must be written in English +- Commit frequently (per sub-step) +- Never commit broken code diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md new file mode 100644 index 00000000000..057d5fb9c54 --- /dev/null +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md @@ -0,0 +1,146 @@ +# Pub/Sub Event System — Progress + +## Phase 0: Core Primitives + Basic Fan-out — COMPLETE +All sub-steps 0.1–0.9 implemented and committed. See git log for details. + +## Phase 1: Schema Registry + Validation — COMPLETE + +### What was done +1. **1.1 — Schema versioning DB + SchemaRegistryService** + - Added `compatibleVersions`, `deprecatedAt`, `deprecatedMessage` to EventDefinition model + - Added `schema` field to EventManifest (Zod schema in core) + - Created migration `20260228054059_add_event_schema_versioning` + - Installed `ajv@8` in webapp for JSON Schema validation + - Created `SchemaRegistryService` with: registerSchema, getSchema, listSchemas, validatePayload, checkCompatibility + +2. **1.2 — Schema discovery API endpoints** + - Created `GET /api/v1/events` — lists all event definitions with subscriber counts + - Created `GET /api/v1/events/:eventId` — event detail with schema, subscribers, versioning info + - Created `GET /api/v1/events/:eventId/schema` — JSON schema only + - Added response schemas (`ListEventsResponseBody`, `GetEventResponseBody`, `GetEventSchemaResponseBody`) to core + - Added API client methods (`listEvents`, `getEvent`, `getEventSchema`) + +3. **1.3 — Store JSON schema during deploy + validate at publish** + - Extended `EventMetadata` with `rawSchema` field + - SDK `event()` now stores raw schema in resource catalog + - CLI indexers (dev + managed) convert event schemas to JSON Schema via `schemaToJsonSchema` + - `syncWorkerEvents` stores JSON schema in `EventDefinition.schema` field + - `PublishEventService` validates payloads against stored schemas using ajv + - Added `getEventSchema()` to ResourceCatalog interface + implementations + +4. **1.4 — Tests + verification** + - 12 unit tests for SchemaRegistryService (validation, compatibility) + - 3 new integration tests for publish with schema validation + - All 9 integration tests pass (6 existing + 3 new) + - Full build passes: core, sdk, cli, webapp + - Changeset added + +### Key decisions +- Used `ajv@8` (industry standard) for JSON Schema validation at publish time +- Schema conversion happens at CLI indexing time (same pattern as task payloadSchema) +- Malformed schemas don't block publishes (graceful degradation) +- Compatibility checking is heuristic (checks required fields, type changes) — not exhaustive +- Schema validation errors return 422 with descriptive messages + +### Commits +- `e6249e407` — phase 1.1: schema versioning DB + SchemaRegistryService +- `49b2903d5` — phase 1.2: schema discovery API endpoints +- `2a06ef605` — phase 1.3: store JSON schema during deploy + validate at publish +- `cfa67d079` — phase 1.4: tests + changeset + +## Phase 2: Smart Routing — COMPLETE + +### What was done +1. **2.1 — Filter evaluator + pattern matcher** + - Reused existing `eventFilterMatches` (30+ tests already exist) — wrapped with caching layer + - Created `packages/core/src/v3/events/filterEvaluator.ts`: `compileFilter`, `evaluateFilter`, cache management + - Created `packages/core/src/v3/events/patternMatcher.ts`: `compilePattern`, `matchesPattern` for `*` and `#` wildcards + - 28 unit tests for filter evaluator + 30 unit tests for pattern matcher + +2. **2.2 — Filters in the SDK** + - Added `onEventFilter` to `TaskMetadata` schema + - Added `filter?: EventFilter` to `TaskOptionsWithEvent` type + - SDK `shared.ts` extracts filter and passes to metadata + - `syncWorkerEvents` stores filter in `EventSubscription.filter` during deploy + +3. **2.3 — Filter evaluation during fan-out** + - `PublishEventService` evaluates `subscription.filter` against payload before triggering + - Non-matching subscribers are skipped (no run created) + - Malformed filters err on side of delivery (graceful degradation) + - Span attributes: `filteredOutCount`, `matchingSubscriberCount` + - 3 integration tests: filter skips, filter allows, complex multi-field filter + +4. **2.4 — Wildcard pattern subscriptions** + - Created `events.match(pattern)` SDK helper returning `EventPatternMatcher` + - Added `onEventPattern` to `TaskMetadata` schema + - `syncWorkerEvents` stores pattern in `EventSubscription.pattern` + - `PublishEventService` queries pattern subscriptions and evaluates them against event slug + - Deduplication: subscriptions that appear in both exact and pattern results are kept only once + - 4 integration tests: `*` matches, `*` rejects, `#` multi-level, pattern+filter combo + +### Key decisions +- Reused existing `eventFilterMatches` rather than rewriting — it already has 34 tests +- Filter caching via `compileFilter(filter, cacheKey)` — keyed by subscription ID +- Pattern matching uses recursive segment-matching (not regex) for correctness with `#` +- `#` matches zero or more segments (AMQP-style) — `order.#` matches `order`, `order.created`, `order.status.changed` +- Pattern subscriptions still need an EventDefinition (placeholder with `pattern:` prefix) due to the foreign key constraint +- Malformed filters/patterns don't block publishes — errors are logged but delivery continues + +### Commits +- `cd426b366` — phase 2.1: filter evaluator + pattern matcher with tests +- `be7ca08fb` — phase 2.2: filters in SDK + stored during deploy +- `676d37eb0` — phase 2.3: filter evaluation during fan-out +- `846438cf9` — phase 2.4: wildcard pattern subscriptions + +## Phase 3: Event Persistence + Replay — COMPLETE + +### What was done +1. **3.1 — ClickHouse event_log_v1 table + insert function** + - Created migration `021_event_log_v1.sql`: `event_log_v1` table (MergeTree engine) + - Partitioned by `toYYYYMM(published_at)`, ordered by `(project_id, environment_id, event_type, published_at, event_id)` + - 90-day TTL, bloom filter indexes on event_id/publisher_run_id/idempotency_key + - ZSTD compression on all string columns, Delta+ZSTD on timestamps + - Created migration `022_event_counts_mv_v1.sql`: `event_counts_v1` (SummingMergeTree) + `event_counts_mv_v1` materialized view + - Created `internal-packages/clickhouse/src/eventLog.ts`: `EventLogV1Input/Output` schemas, `insertEventLog`, `getEventLogQueryBuilder` + - Added `eventLog` getter on `ClickHouse` class (insert + queryBuilder) + +2. **3.2 — Write to event log on each publish** + - Added `EventLogWriter` callback type + `EventLogEntry` type to `PublishEventService` + - Constructor accepts optional `eventLogWriter` (injectable, like `triggerFn`) + - After fan-out, calls writer with event metadata — fire-and-forget, errors logged not thrown + - Created `eventLogWriter.server.ts`: `writeEventLog()` function using `clickhouseClient.eventLog.insert` + - Wired into `publish` and `batchPublish` routes + +3. **3.3 — Event history API endpoint** + - Created `GET /api/v1/events/:eventId/history` route + - Query params: `from`, `to`, `limit` (max 200), `cursor`, `publisherRunId` + - Uses ClickHouse queryBuilder pattern: `.where().orderBy().limit().execute()` + - Cursor-based pagination (by published_at) + - Added `EventHistoryItem`, `GetEventHistoryResponseBody` schemas to core + - Added `getEventHistory()` API client method + +4. **3.4 — Event Replay service + API endpoint** + - Created `ReplayEventsService` with `call(params)` method + - Queries ClickHouse for events in date range, applies optional EventFilter + - Re-publishes each event via `PublishEventService` with `replay:{eventId}` idempotency key + - Supports `dryRun` (count without executing), `tasks[]` filter, max 10k events + - Created `POST /api/v1/events/:eventId/replay` endpoint + - Added `ReplayEventsRequestBody`, `ReplayEventsResponseBody` schemas to core + - Added `replayEvents()` API client method + +### Key decisions +- Used MergeTree (not ReplacingMergeTree) — events are immutable, no need for dedup/soft-delete +- EventLogWriter is injectable callback (not direct import) — keeps PublishEventService testable without ClickHouse +- Fire-and-forget ClickHouse writes — async `.then()` pattern, errors logged but never block publish +- Replay uses `replay:{originalEventId}` as idempotency key prefix — per-consumer dedup via PublishEventService +- No dedicated tests for 3.3/3.4 since they require ClickHouse (testcontainers only has Postgres+Redis) +- All existing 16 integration tests + 58 unit tests still pass + +### Commits +- `c63c1e781` — phase 3.1: event_log_v1 ClickHouse table + insert function +- `8dfb002ee` — phase 3.2: write to ClickHouse event log on each publish +- `02369b128` — phase 3.3: event history API endpoint +- `3d9863512` — phase 3.4: event replay service + API endpoint + +## Phase 4: Dead Letter Queue — NOT STARTED +Next phase. DLQ model + failure detection + management API. diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md new file mode 100644 index 00000000000..30dec1713f2 --- /dev/null +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md @@ -0,0 +1,610 @@ +# Trigger.dev Pub/Sub Event System — Complete Roadmap + +## Vision + +First-class pub/sub event system within Trigger.dev that enables: +- Defining events with typed schemas +- Declaratively subscribing tasks to events +- Publishing events from any task (or externally via API) +- Automatic fan-out to all subscribed consumers +- Delivery guarantees, ordering, replay, DLQ +- Replacing the need for Kafka/RabbitMQ/EventBridge for most use cases + +## Roadmap Structure + +9 incremental phases. Each phase delivers usable functionality. + +- **Phase 0**: Core — `event()` primitive + basic fan-out +- **Phase 1**: Schema Registry — versioning and validation +- **Phase 2**: Smart Routing — content-based filtering + wildcards +- **Phase 3**: Persistence — event log in ClickHouse + replay +- **Phase 4**: Dead Letter Queue — failure handling +- **Phase 5**: Ordering + Consumer Groups +- **Phase 6**: Publish-and-Wait (fan-out/fan-in) +- **Phase 7**: Rate Limiting + Backpressure +- **Phase 8**: Observability + Developer Experience + +--- + +## Implementation Process & Guidelines + +### Workflow per phase + +1. **Read roadmap** from memory to understand current phase tasks +2. **Research before coding**: use sub-agents to explore existing patterns for each step + - Before step 0.5 (worker registration), read how `createBackgroundWorker` currently works + - Before step 0.7 (fan-out service), read how `TriggerTaskService` works + - Follow existing code conventions (naming, file structure, error handling patterns) +3. **Read existing code** for every file being modified (never edit blind) +4. **Implement step by step** (0.1 → 0.2 → ... → 0.N) following dependency order +5. **Verify after each step**: + - `pnpm run build --filter ` — type check + - Run specific tests if they exist for the changed code + - Fix any issues before moving to next step +6. **Commit after each step** (each sub-step 0.1, 0.2, etc. gets its own commit) + - Never commit broken code (build failures, test failures) + - Commit message format: `feat(events): phase X.Y — ` + - Each commit should be atomic and self-contained +7. **Full verification at end of phase**: + - `pnpm run build --filter @trigger.dev/core --filter @trigger.dev/sdk --filter webapp` + - Run all event-related tests + - Run typecheck on affected packages +8. **Update roadmap** in memory: mark completed tasks with `[x]`, note any deviations +9. **Write phase summary** to memory: what was done, what decisions were made, any tech debt +10. **Proceed to next phase** without waiting for user input + +### Git strategy + +- Work on a feature branch: `feat/pubsub-event-system` +- Commit after every sub-step (0.1, 0.2, etc.) — one commit per sub-step minimum +- If a sub-step is large, break it into multiple commits (e.g., 0.4 DB models → one for schema, one for migration) +- Never commit code that doesn't build or has failing tests +- Changeset required when modifying public packages (`packages/*`) — add once per phase +- If a commit breaks something, fix it immediately before any other work + +### Code conventions (match existing codebase) + +- Follow the patterns found in existing services (e.g., `TriggerTaskService` for the publish service) +- Services go in `apps/webapp/app/v3/services/` with `.server.ts` suffix +- API routes follow Remix flat file convention in `apps/webapp/app/routes/` +- Use `env` from `apps/webapp/app/env.server.ts`, never `process.env` +- For testable code, pass config as options (never import env.server.ts in tests) +- Prisma operations follow existing patterns (transactions, error handling) +- Use `generateFriendlyId()` for user-facing IDs +- Zod schemas go in `packages/core/src/v3/schemas/` + +### Sub-agents usage + +- **DO use sub-agents for**: researching patterns in the codebase before coding, exploring how similar features are implemented, finding reference implementations +- **DO NOT use sub-agents for**: writing code — all code is written in main context to maintain full awareness of accumulated changes +- **DO use parallel bash calls for**: running build + test + typecheck simultaneously at verification checkpoints + +### Error recovery + +- If a build fails after a step: fix it before committing, don't move on +- If tests fail: investigate root cause, fix, re-run. Don't skip tests +- If a step's design doesn't work with existing code: adapt the plan, note deviation in roadmap +- If stuck on something for more than 2 attempts: note the blocker in the roadmap, skip to next step if possible, come back later +- If context gets too large: summarize current state to memory, the conversation auto-compresses old messages + +### Context management + +- The conversation auto-compresses old messages as context grows +- The roadmap file in memory serves as persistent state across compression +- Git commits serve as checkpoints — if context is lost, the code is in git +- Each phase starts by reading the roadmap + recent git log to understand state +- After completing each phase, write a brief summary to `memory/pubsub-progress.md` + +### Quality gates (must ALL pass before moving to next phase) + +1. All affected packages build successfully (`pnpm run build --filter ...`) +2. All new tests pass (`pnpm run test --filter ... --run`) +3. All existing tests still pass (no regressions) +4. No TypeScript errors in affected packages +5. All changes are committed to the feature branch +6. Roadmap updated with completed tasks marked `[x]` + +### Database migration rules (from CLAUDE.md) + +- Clean generated migrations of extraneous lines (see CLAUDE.md for list) +- Indexes MUST use CONCURRENTLY and be in their own separate migration file +- New tables don't need CONCURRENTLY +- Run `pnpm run db:migrate:deploy && pnpm run generate` after each migration + +### Autonomous execution + +The implementation runs end-to-end without user intervention: +- Phase 0 → Phase 1 → ... → Phase 8 +- No need to ask user for confirmation between phases +- If a decision needs to be made (e.g., two valid approaches), pick the one that matches existing codebase patterns and note it in the roadmap +- If something is genuinely ambiguous or risky, ask the user via AskUserQuestion +- User can review progress anytime via `git log --oneline feat/pubsub-event-system` or reading `memory/pubsub-progress.md` + +--- + +## Phase 0: Core Primitives + Basic Fan-out — COMPLETE + +> **Goal**: Define events, subscribe tasks, publish, and have fan-out work. +> **Deliverable**: `event()` + `task({ on: ... })` + `.publish()` working end-to-end. + +All sub-steps 0.1–0.9 implemented and committed. See `pubsub-progress.md` for details. + +--- + +## Phase 1: Schema Registry + Validation — COMPLETE + +> **Goal**: Versioned schemas, robust validation, event discovery. +> **Requires**: Phase 0 + +All sub-steps 1.1–1.4 implemented and committed. See `pubsub-progress.md` for details. + +Key deliverables: +- [x] DB migration: `compatibleVersions`, `deprecatedAt`, `deprecatedMessage` on EventDefinition +- [x] `SchemaRegistryService` with registerSchema, getSchema, listSchemas, validatePayload, checkCompatibility +- [x] Discovery API: GET /api/v1/events, GET /api/v1/events/:id, GET /api/v1/events/:id/schema +- [x] API client methods: listEvents, getEvent, getEventSchema +- [x] Schema pipeline: SDK stores raw schema → CLI converts to JSON Schema → DB stores it → PublishEventService validates +- [x] ajv@8 for JSON Schema validation at publish time +- [x] 12 unit tests + 3 integration tests for schema validation +- [x] Changeset added + +--- + +## Phase 2: Smart Routing — Content-based Filtering + Wildcards — COMPLETE + +> **Goal**: Subscribe with filters (`amount >= 1000`) and patterns (`order.*`). +> **Requires**: Phase 0 + +All sub-steps 2.1–2.4 implemented and committed. See `pubsub-progress.md` for details. + +Key deliverables: +- [x] Filter evaluator: `compileFilter`, `evaluateFilter` wrapping existing `eventFilterMatches` with caching +- [x] Pattern matcher: `compilePattern`, `matchesPattern` for `*` (single) and `#` (multi) wildcards +- [x] `filter` option on `TaskOptionsWithEvent`, stored in `EventSubscription.filter` during deploy +- [x] `events.match(pattern)` SDK helper for wildcard subscriptions +- [x] `PublishEventService` evaluates filters and patterns during fan-out +- [x] 58 unit tests (28 filter + 30 pattern) + 7 integration tests (3 filter + 4 pattern) +- [x] Changeset added + +--- + +## Phase 3: Event Persistence + Replay — COMPLETE + +> **Goal**: Store all published events, enable replay. +> **Requires**: Phase 0 + +All sub-steps 3.1–3.4 implemented and committed. See `pubsub-progress.md` for details. + +Key deliverables: +- [x] ClickHouse `event_log_v1` table (MergeTree, 90-day TTL, bloom filter indexes) +- [x] `event_counts_v1` + `event_counts_mv_v1` materialized view for per-type counts +- [x] `insertEventLog` function + `eventLog` getter on ClickHouse class +- [x] `EventLogWriter` callback in `PublishEventService` — fire-and-forget ClickHouse writes +- [x] `writeEventLog` singleton wired into publish + batchPublish routes +- [x] `GET /api/v1/events/:eventId/history` — paginated event history from ClickHouse +- [x] `ReplayEventsService` — replay events in date range with filter/tasks/dryRun +- [x] `POST /api/v1/events/:eventId/replay` endpoint +- [x] API client methods: `getEventHistory`, `replayEvents` +- [x] Response schemas: `EventHistoryItem`, `GetEventHistoryResponseBody`, `ReplayEventsRequestBody`, `ReplayEventsResponseBody` +- [x] Changeset added + +--- + +## Phase 4: Dead Letter Queue + +> **Goal**: Events that fail after all retries go to a DLQ for inspection and reprocessing. +> **Requires**: Phase 0, Phase 3 (for persistence) + +### 4.1 — DLQ model + +**File to modify**: `internal-packages/database/prisma/schema.prisma` + +Tasks: +- [ ] Create `DeadLetterEvent` model: + ```prisma + model DeadLetterEvent { + id String @id @default(cuid()) + friendlyId String @unique + + eventType String // "order.created" + payload Json + payloadType String @default("application/json") + + taskSlug String // consumer that failed + failedRunId String // run that failed + failedRun TaskRun @relation(...) + + error Json? // last error + attemptCount Int // how many attempts there were + + status DeadLetterStatus @default(PENDING) + // PENDING = awaiting action + // RETRIED = manually retried + // DISCARDED = manually discarded + + sourceEventId String? // reference to event_log + + projectId String + environmentId String + + createdAt DateTime @default(now()) + processedAt DateTime? + + @@index([projectId, environmentId, status]) + @@index([eventType, environmentId]) + } + ``` +- [ ] Enum `DeadLetterStatus`: `PENDING`, `RETRIED`, `DISCARDED` +- [ ] Migration + +### 4.2 — Detect failed event runs and route to DLQ + +**New file**: `apps/webapp/app/v3/services/events/deadLetterService.server.ts` + +Tasks: +- [ ] Listen to `runFailed` event from EventBus +- [ ] Detect if the failed run has `sourceEventId` (came from a published event) +- [ ] If the run exhausted all retries → create `DeadLetterEvent` +- [ ] Emit `deadLetterCreated` event for telemetry +- [ ] Optional: notify via webhook (reuse alert webhooks infra) + +### 4.3 — DLQ management API + +**New file**: `apps/webapp/app/routes/api.v1.events.dlq.ts` + +Tasks: +- [ ] `GET /api/v1/events/dlq` — list dead letter events (paginated, filterable) +- [ ] `POST /api/v1/events/dlq/:id/retry` — manually retry + - Create new run for the task with the same payload + - Mark DLQ entry as RETRIED +- [ ] `POST /api/v1/events/dlq/:id/discard` — manually discard +- [ ] `POST /api/v1/events/dlq/retry-all` — retry all PENDING of a type + +### 4.4 — DLQ configuration per event + +**File to modify**: `packages/trigger-sdk/src/v3/events.ts` + +Tasks: +- [ ] Extend `event()` options: + ```typescript + event({ + id: "order.created", + schema: orderSchema, + deadLetter: { + enabled: true, // default true + retentionDays: 30, // how long to keep in DLQ + onDeadLetter: dlqHandler // optional task that processes DLQ items + }, + }); + ``` +- [ ] If `onDeadLetter` task is defined, trigger automatically when an event goes to DLQ + +--- + +## Phase 5: Ordering + Consumer Groups + +> **Goal**: Order guarantees by partition key. Competing consumers for load balancing. +> **Requires**: Phase 0 + +### 5.1 — Ordering keys + +**File to modify**: `packages/trigger-sdk/src/v3/events.ts` + +Tasks: +- [ ] Extend `event()`: + ```typescript + event({ + id: "order.updated", + schema: orderSchema, + orderingKey: (payload) => payload.orderId, + // Events with the same orderId are processed sequentially + }); + ``` +- [ ] Alternative: ordering at publish time: + ```typescript + await orderUpdated.publish(payload, { + orderingKey: payload.orderId, + }); + ``` + +**File to modify**: `apps/webapp/app/v3/services/events/publishEvent.server.ts` + +Tasks: +- [ ] When ordering key is present: + - Derive queue name: `event:{eventSlug}:order:{orderingKeyHash}` + - Use queue with `concurrencyLimit: 1` to guarantee sequence + - Each subscribed consumer uses this queue +- [ ] Reuse existing `RunQueue` with named queues +- [ ] Ordering is per-consumer: each consumer processes in order within its partition + +### 5.2 — Consumer Groups + +**File to modify**: `internal-packages/database/prisma/schema.prisma` + +Tasks: +- [ ] `consumerGroup` field already defined in Phase 0.4 on `EventSubscription` +- [ ] Constraint: within a consumer group, only 1 run per event + +**File to modify**: `packages/trigger-sdk/src/v3/shared.ts` + +Tasks: +- [ ] Extend task options: + ```typescript + task({ + on: orderCreated, + consumerGroup: "order-processors", + run: async (payload) => { ... }, + }); + ``` + +**File to modify**: `apps/webapp/app/v3/services/events/publishEvent.server.ts` + +Tasks: +- [ ] In fan-out: + - Group subscriptions by `consumerGroup` + - For subscriptions WITHOUT a group: normal fan-out (1 run each) + - For subscriptions WITH a group: pick 1 subscription from the group (round-robin or random) + - Reuse `FairQueueSelectionStrategy` for fair selection +- [ ] Persist selection so replay uses the same consumer + +Tests: +- [ ] Test: 3 tasks in the same consumer group → only 1 receives each event +- [ ] Test: fair distribution among group members +- [ ] Test: task without group + task with group both work on the same event + +--- + +## Phase 6: Publish-and-Wait (Fan-out / Fan-in) + +> **Goal**: Publish an event and wait for all consumers to finish. +> **Requires**: Phase 0 + +### 6.1 — publishAndWait in the SDK + +**File to modify**: `packages/trigger-sdk/src/v3/events.ts` + +Tasks: +- [ ] Implement `EventDefinition.publishAndWait()`: + ```typescript + const results = await orderCreated.publishAndWait(payload, { + timeout: "5m", // optional + }); + // results: Record + ``` +- [ ] Internally: + - Call special endpoint `POST /api/v1/events/:eventId/publishAndWait` + - The endpoint creates runs + waitpoints + - Returns when all waitpoints complete + +### 6.2 — Backend: publish with waitpoints + +**New file**: `apps/webapp/app/v3/services/events/publishAndWait.server.ts` + +Tasks: +- [ ] Reuse existing `WaitpointSystem`: + 1. Create a coordinator "event waitpoint" + 2. Fan-out: create a run per consumer + 3. For each run, create a child waitpoint linked to the coordinator + 4. The caller is blocked on the coordinator waitpoint + 5. When each consumer finishes → completes its waitpoint + 6. When all child waitpoints complete → completes the coordinator +- [ ] Timeout: if a consumer doesn't finish, complete with partial error +- [ ] Result: aggregate outputs from each consumer + +### 6.3 — Timeout and error handling + +Tasks: +- [ ] If a consumer fails definitively (exhausted retries) → its result is error +- [ ] If timeout is reached before all finish → partial result with status of each +- [ ] The caller decides what to do with partial results + +--- + +## Phase 7: Rate Limiting + Backpressure + +> **Goal**: Control publish and consume speed. Detect lag. +> **Requires**: Phase 0 + +### 7.1 — Publish rate limiting + +**New file**: `apps/webapp/app/v3/services/events/rateLimiter.server.ts` + +Tasks: +- [ ] Implement sliding window rate limiter (Redis): + - Key: `ratelimit:publish:{projectId}:{eventSlug}` + - Configurable per-event + - Default: 1000 events/minute per type +- [ ] Response header `X-RateLimit-Remaining` on publish endpoint +- [ ] When exceeded: HTTP 429 with `Retry-After` header + +**File to modify**: `packages/trigger-sdk/src/v3/events.ts` + +Tasks: +- [ ] Extend `event()`: + ```typescript + event({ + id: "order.created", + schema: orderSchema, + rateLimit: { + limit: 500, + window: "1m", + }, + }); + ``` + +### 7.2 — Consumer rate limiting + +**File to modify**: `packages/trigger-sdk/src/v3/shared.ts` + +Tasks: +- [ ] Extend task with per-event rate limit: + ```typescript + task({ + on: orderCreated, + rateLimit: { limit: 100, window: "1m" }, + run: async (payload) => { ... }, + }); + ``` +- [ ] Implement as queue with rate limit (reuse concurrency limits infra) +- [ ] Events that exceed the rate are enqueued (not lost), processed when capacity is available + +### 7.3 — Backpressure detection + metrics + +**New file**: `apps/webapp/app/v3/services/events/backpressureMonitor.server.ts` + +Tasks: +- [ ] Monitor lag per consumer: `pendingRuns = publishedEvents - processedEvents` +- [ ] Metrics in ClickHouse: + - `event_publish_rate` per type + - `event_consume_rate` per consumer + - `event_consumer_lag` (difference) +- [ ] Alerts when lag exceeds configurable threshold +- [ ] Expose metrics in API: `GET /api/v1/events/:eventId/metrics` + +--- + +## Phase 8: Observability + Developer Experience + +> **Goal**: Dashboard, CLI, full traceability, documentation. +> **Requires**: Phases 0-7 (gradual, can start earlier) + +### 8.1 — Trace propagation + +**File to modify**: `apps/webapp/app/v3/services/events/publishEvent.server.ts` + +Tasks: +- [ ] Propagate `traceId` from publisher to all consumer runs +- [ ] Add span attribute `trigger.event.id` and `trigger.event.type` to each run +- [ ] Add `sourceEventId` to TaskRun metadata +- [ ] In run dashboard: show "Triggered by event: order.created" +- [ ] In event dashboard: show all runs it generated + +### 8.2 — Events dashboard (webapp) + +**New files in**: `apps/webapp/app/routes/` + +Tasks: +- [ ] Event list view: `/orgs/:orgSlug/projects/:projectSlug/events` + - List of EventDefinitions with stats (publish count, last published, subscriber count) +- [ ] Event detail view: `.../events/:eventSlug` + - Schema (formatted) + - List of subscribers (tasks) + - Recent publication history (from ClickHouse) + - Metrics: publish rate, consumer lag +- [ ] DLQ view: `.../events/dlq` + - List of dead letter events, filterable by type/status + - Actions: retry, discard, retry all +- [ ] Corresponding presenters in `apps/webapp/app/v3/presenters/` + +### 8.3 — CLI commands + +**File to modify**: `packages/cli-v3/src/commands/` + +Tasks: +- [ ] `trigger events list` — list project events +- [ ] `trigger events publish --payload '{...}'` — publish from CLI +- [ ] `trigger events history --from --to` — view history +- [ ] `trigger events replay --from --to` — replay +- [ ] `trigger events dlq list` — view dead letter queue +- [ ] `trigger events dlq retry ` — retry DLQ item + +### 8.4 — SDK helpers and DX + +**File to modify**: `packages/trigger-sdk/src/v3/events.ts` + +Tasks: +- [ ] Helper for local testing: + ```typescript + import { testEvent } from "@trigger.dev/sdk/testing"; + + // In tests + const result = await testEvent(orderCreated, { orderId: "123", amount: 50 }); + expect(result.runs).toHaveLength(2); + ``` +- [ ] Full type inference: consumer payload typed from event schema +- [ ] Descriptive error messages when schema validation fails +- [ ] Complete JSDoc on all public functions + +### 8.5 — Documentation + +**New files in**: `rules/` (next version) + +Tasks: +- [ ] Event system documentation for SDK rules: + - `events-basic.md` — define events, publish, subscribe + - `events-advanced.md` — filters, wildcards, ordering, consumer groups + - `events-reliability.md` — DLQ, replay, idempotency + - `events-patterns.md` — common patterns (saga, CQRS, event sourcing) +- [ ] Update `.claude/skills/trigger-dev-tasks/SKILL.md` with event examples +- [ ] Update `manifest.json` with new version + +### 8.6 — Reference project + +**New directory**: `references/event-system/` + +Tasks: +- [ ] Reference project demonstrating: + - Definition of multiple events + - Tasks subscribed with filters + - Publish from a task + - Publish-and-wait pattern + - DLQ handler +- [ ] Use as manual testing project (similar to hello-world) + +--- + +## Phase dependencies + +``` +Phase 0 (Core) ─────┬── Phase 1 (Schema Registry) + ├── Phase 2 (Smart Routing) + ├── Phase 3 (Persistence + Replay) + │ └── Phase 4 (DLQ) ← needs persistence + ├── Phase 5 (Ordering + Consumer Groups) + ├── Phase 6 (Publish-and-Wait) + ├── Phase 7 (Rate Limiting) + └── Phase 8 (DX) ← gradual, can start with Phase 0 +``` + +Phases 1-7 are mostly independent of each other (all depend on Phase 0). +Phase 4 (DLQ) benefits from Phase 3 (persistence) but can work without it. +Phase 8 (DX) is built incrementally with each phase. + +--- + +## Key files to create/modify (summary) + +### New files +| File | Phase | +|------|-------| +| `packages/trigger-sdk/src/v3/events.ts` | 0 | +| `packages/core/src/v3/events/schemaUtils.ts` | 1 | +| `packages/core/src/v3/events/filterEvaluator.ts` | 2 | +| `apps/webapp/app/routes/api.v1.events.$eventId.publish.ts` | 0 | +| `apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts` | 0 | +| `apps/webapp/app/routes/api.v1.events.$eventId.history.ts` | 3 | +| `apps/webapp/app/routes/api.v1.events.$eventId.replay.ts` | 3 | +| `apps/webapp/app/routes/api.v1.events.ts` | 1 | +| `apps/webapp/app/routes/api.v1.events.dlq.ts` | 4 | +| `apps/webapp/app/v3/services/events/publishEvent.server.ts` | 0 | +| `apps/webapp/app/v3/services/events/publishAndWait.server.ts` | 6 | +| `apps/webapp/app/v3/services/events/schemaRegistry.server.ts` | 1 | +| `apps/webapp/app/v3/services/events/deadLetterService.server.ts` | 4 | +| `apps/webapp/app/v3/services/events/replayEvents.server.ts` | 3 | +| `apps/webapp/app/v3/services/events/rateLimiter.server.ts` | 7 | +| `apps/webapp/app/v3/services/events/backpressureMonitor.server.ts` | 7 | +| `internal-packages/clickhouse/schema/XXX_event_log_v1.sql` | 3 | +| `internal-packages/run-engine/src/engine/tests/events.test.ts` | 0 | +| `references/event-system/` | 8 | + +### Files to modify +| File | Phase | +|------|-------| +| `packages/trigger-sdk/src/v3/index.ts` | 0 | +| `packages/trigger-sdk/src/v3/shared.ts` | 0, 2, 5 | +| `packages/core/src/v3/schemas/resources.ts` | 0 | +| `packages/core/src/v3/resource-catalog/catalog.ts` | 0 | +| `packages/core/src/v3/resource-catalog/standardCatalog.ts` | 0 | +| `internal-packages/database/prisma/schema.prisma` | 0, 1, 4, 5 | +| `apps/webapp/app/v3/services/createBackgroundWorker.server.ts` | 0 | +| `apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts` | 0 | diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/repo-conventions.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/repo-conventions.md new file mode 100644 index 00000000000..039c11706cf --- /dev/null +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/repo-conventions.md @@ -0,0 +1,181 @@ +# Trigger.dev Repo Conventions & Reference + +## Build System + +- **pnpm 10.23.0** + **Turborepo** + **TypeScript 5.5.4** +- Build order: `core → sdk/build/redis-worker → cli → run-engine → webapp` +- Public packages use **tshy** (dual ESM/CJS), internal use **tsc** +- Custom condition: `@triggerdotdev/source` for dev-time source resolution +- `turbo.json`: `test` depends on `^build`, `dev` depends on `^build` (no cache) + +## Build Commands + +```bash +pnpm run build --filter @trigger.dev/core # Build core +pnpm run build --filter @trigger.dev/sdk # Build SDK (needs core) +pnpm run build --filter webapp # Build webapp (needs everything) +pnpm run typecheck # Typecheck all packages +pnpm run check-exports # Validate subpath exports (attw) +pnpm run test --filter # Run tests +cd && pnpm run test ./path.test.ts --run # Single test file +``` + +## CI Requirements (must pass for PRs) + +1. TypeScript typecheck +2. Export validation (attw) +3. Unit tests: webapp (8 shards), packages (1 shard), internal (8 shards) +4. E2E tests for CLI (Ubuntu + Windows, npm + pnpm) +5. SDK compatibility (Node 20.20, 22.12, Bun, Deno, Cloudflare Workers) +6. **ESLint and Prettier NOT enforced in CI** +7. Lefthook blocks direct commits to main + +## Service Pattern + +```typescript +// Extend WithRunEngine for services that need the engine +export class MyService extends WithRunEngine { + public async call(params: Params): Promise { + return await this.traceWithEnv("call()", environment, async (span) => { + span.setAttribute("key", value); + // ... business logic + }); + } +} + +// Errors +throw new ServiceValidationError("message", 422); + +// Error-safe async +const [error, result] = await tryCatch(asyncOperation()); +if (error) { throw new ServiceValidationError("..."); } +``` + +## API Route Pattern + +```typescript +const { action, loader } = createActionApiRoute( + { + headers: HeadersSchema, + params: ParamsSchema, + body: BodySchema, + allowJWT: true, + maxContentLength: env.TASK_PAYLOAD_MAXIMUM_SIZE, + authorization: { + action: "trigger", + resource: (params) => ({ tasks: params.taskId }), + superScopes: ["write:tasks", "admin"], + }, + corsStrategy: "all", + }, + async ({ body, headers, params, authentication }) => { + // authentication.environment = AuthenticatedEnvironment + const service = new MyService(); + const result = await service.call(/* ... */); + return json(result, { status: 200 }); + } +); +export { action, loader }; +``` + +## Test Pattern + +```typescript +import { containerTest } from "@internal/testcontainers"; + +vi.setConfig({ testTimeout: 60_000 }); + +describe("MyFeature", () => { + containerTest("should work", async ({ prisma, redisOptions }) => { + const engine = new RunEngine({ prisma, worker: { redis: redisOptions, ... } }); + // ... test with real DB and Redis + }); +}); +``` + +- Tests next to source: `MyService.ts` → `MyService.test.ts` +- **Never mock** — use testcontainers +- Pre-pull Docker images: PostgreSQL 14, ClickHouse, Redis, Electric 1.2.4 + +## SDK Pattern + +```typescript +// Task definition: packages/trigger-sdk/src/v3/tasks.ts +export const task = createTask; + +// Trigger flow: shared.ts → trigger_internal() → apiClient.triggerTask() +// HTTP: POST /api/v1/tasks/{taskId}/trigger +// Auth: Bearer {apiKey} in Authorization header +// Payload: stringifyIO() for serialization +``` + +## Worker Registration + +- File: `apps/webapp/app/v3/services/createBackgroundWorker.server.ts` +- Flow: `createBackgroundWorker()` → `createWorkerResources()` → `createWorkerTasks()` + `syncDeclarativeSchedules()` +- Each task gets a `BackgroundWorkerTask` record with slug, queue, retry config +- Queues: VIRTUAL (auto per task) or NAMED (explicit) + +## Key Utilities + +- `generateFriendlyId("prefix")` → `prefix_xxxxx` (for user-facing IDs) +- `RunId.generate()` → `{ id, friendlyId }` for run IDs +- `stringifyIO()` / `conditionallyExportPacket()` for payload serialization +- `handleRequestIdempotency()` for request-level caching +- `createTags()` for tag creation/linking +- `parseDelay()` for delay string parsing +- `tryCatch()` for error-safe async operations +- `logger.debug/info/warn/error()` for logging + +## Import Rules + +- `@trigger.dev/core`: ALWAYS import subpaths (`@trigger.dev/core/v3`, etc.), NEVER root +- `env.server.ts`: NEVER import in test files, pass config as options +- Services: `service.server.ts` (testable) + `serviceGlobal.server.ts` (config singleton) + +## Database Conventions + +- Models: PascalCase, Fields: camelCase +- IDs: `id String @id @default(cuid())` +- Timestamps: `createdAt DateTime @default(now())`, `updatedAt DateTime @updatedAt` +- Migrations: remove extraneous lines (see CLAUDE.md list) +- Indexes: CONCURRENTLY in separate migration files + +## Prisma Client + +- Global instance: `apps/webapp/app/db.server.ts` +- Replica: `$replica` for read-only queries +- Transactions: `prisma.$transaction(async (tx) => { ... })` + +## Core Package Exports (critical) + +- ~30 subpath exports in `packages/core/package.json` +- New exports need entries in `tshy.exports` + rebuild +- Validated by `check-exports` in CI + +## Changeset Rules + +- Required for any `packages/*` or `integrations/*` changes +- Default: **patch** for bug fixes +- **minor** requires maintainer confirmation +- **major** requires explicit approval +- Fixed group: `[@trigger.dev/*, trigger.dev]` released together + +## Run Engine Trigger Flow (for reference) + +``` +SDK trigger() → HTTP POST /api/v1/tasks/{taskId}/trigger + → createActionApiRoute (auth + validation) + → TriggerTaskService.call() (engine version routing) + → RunEngineTriggerTaskService.call() (validation, delay, TTL, idempotency, queue) + → engine.trigger() (debounce, create TaskRun, emit "runCreated") + → eventBus.emit("runCreated", { runId }) +``` + +## Docker Services (for development) + +- PostgreSQL 14: port 5432 (postgres/postgres) +- Redis 7: port 6379 +- Electric 1.2.4: port 3060 +- ClickHouse 25.6.2: ports 8123/9000 (default/password) +- Start: `pnpm run docker` From ec4139642dd3f82ce3ef660c3c904f528dd8551b Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 22:58:38 -0800 Subject: [PATCH 22/65] =?UTF-8?q?feat(events):=20phase=204.1=20=E2=80=94?= =?UTF-8?q?=20DeadLetterEvent=20model=20+=20enum=20+=20migration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .../migration.sql | 41 +++++++++++++++++++ .../database/prisma/schema.prisma | 41 +++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 internal-packages/database/prisma/migrations/20260228065743_add_dead_letter_event/migration.sql diff --git a/internal-packages/database/prisma/migrations/20260228065743_add_dead_letter_event/migration.sql b/internal-packages/database/prisma/migrations/20260228065743_add_dead_letter_event/migration.sql new file mode 100644 index 00000000000..4d55445b611 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260228065743_add_dead_letter_event/migration.sql @@ -0,0 +1,41 @@ +-- CreateEnum +CREATE TYPE "public"."DeadLetterStatus" AS ENUM ('PENDING', 'RETRIED', 'DISCARDED'); + +-- CreateTable +CREATE TABLE "public"."DeadLetterEvent" ( + "id" TEXT NOT NULL, + "friendlyId" TEXT NOT NULL, + "eventType" TEXT NOT NULL, + "payload" JSONB NOT NULL, + "payloadType" TEXT NOT NULL DEFAULT 'application/json', + "taskSlug" TEXT NOT NULL, + "failedRunId" TEXT NOT NULL, + "error" JSONB, + "attemptCount" INTEGER NOT NULL, + "status" "public"."DeadLetterStatus" NOT NULL DEFAULT 'PENDING', + "sourceEventId" TEXT, + "projectId" TEXT NOT NULL, + "environmentId" TEXT NOT NULL, + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "processedAt" TIMESTAMP(3), + + CONSTRAINT "DeadLetterEvent_pkey" PRIMARY KEY ("id") +); + +-- CreateIndex +CREATE UNIQUE INDEX "DeadLetterEvent_friendlyId_key" ON "public"."DeadLetterEvent"("friendlyId"); + +-- CreateIndex +CREATE INDEX "DeadLetterEvent_projectId_environmentId_status_idx" ON "public"."DeadLetterEvent"("projectId", "environmentId", "status"); + +-- CreateIndex +CREATE INDEX "DeadLetterEvent_eventType_environmentId_idx" ON "public"."DeadLetterEvent"("eventType", "environmentId"); + +-- AddForeignKey +ALTER TABLE "public"."DeadLetterEvent" ADD CONSTRAINT "DeadLetterEvent_failedRunId_fkey" FOREIGN KEY ("failedRunId") REFERENCES "public"."TaskRun"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "public"."DeadLetterEvent" ADD CONSTRAINT "DeadLetterEvent_projectId_fkey" FOREIGN KEY ("projectId") REFERENCES "public"."Project"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "public"."DeadLetterEvent" ADD CONSTRAINT "DeadLetterEvent_environmentId_fkey" FOREIGN KEY ("environmentId") REFERENCES "public"."RuntimeEnvironment"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 14bd23e913d..329addd2a68 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -344,6 +344,7 @@ model RuntimeEnvironment { BulkActionGroup BulkActionGroup[] customerQueries CustomerQuery[] eventSubscriptions EventSubscription[] + deadLetterEvents DeadLetterEvent[] @@unique([projectId, slug, orgMemberId]) @@unique([projectId, shortcode]) @@ -416,6 +417,7 @@ model Project { metricsDashboards MetricsDashboard[] eventDefinitions EventDefinition[] eventSubscriptions EventSubscription[] + deadLetterEvents DeadLetterEvent[] } enum ProjectVersion { @@ -642,6 +644,43 @@ model EventSubscription { @@index([projectId, environmentId]) } +enum DeadLetterStatus { + PENDING + RETRIED + DISCARDED +} + +model DeadLetterEvent { + id String @id @default(cuid()) + friendlyId String @unique + + eventType String // "order.created" + payload Json + payloadType String @default("application/json") + + taskSlug String // consumer that failed + failedRun TaskRun @relation("DeadLetterRun", fields: [failedRunId], references: [id], onDelete: Cascade, onUpdate: Cascade) + failedRunId String + + error Json? // last error + attemptCount Int // how many attempts there were + + status DeadLetterStatus @default(PENDING) + + sourceEventId String? // reference to event_log event_id + + project Project @relation(fields: [projectId], references: [id], onDelete: Cascade, onUpdate: Cascade) + projectId String + environment RuntimeEnvironment @relation(fields: [environmentId], references: [id], onDelete: Cascade, onUpdate: Cascade) + environmentId String + + createdAt DateTime @default(now()) + processedAt DateTime? + + @@index([projectId, environmentId, status]) + @@index([eventType, environmentId]) +} + model TaskRun { id String @id @default(cuid()) @@ -771,6 +810,8 @@ model TaskRun { scheduleInstanceId String? scheduleId String? + deadLetterEvents DeadLetterEvent[] @relation("DeadLetterRun") + sourceBulkActionItems BulkActionItem[] @relation("SourceActionItemRun") destinationBulkActionItems BulkActionItem[] @relation("DestinationActionItemRun") From 5ed48645e7be6569fd3297fac48b83e73291bdda Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 23:01:21 -0800 Subject: [PATCH 23/65] =?UTF-8?q?feat(events):=20phase=204.2=20=E2=80=94?= =?UTF-8?q?=20store=20event=20context=20on=20runs=20+=20DLQ=20detection?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .../events/deadLetterService.server.ts | 85 +++++++++++++++++++ .../v3/services/events/publishEvent.server.ts | 16 +++- .../app/v3/services/finalizeTaskRun.server.ts | 12 +++ 3 files changed, 112 insertions(+), 1 deletion(-) create mode 100644 apps/webapp/app/v3/services/events/deadLetterService.server.ts diff --git a/apps/webapp/app/v3/services/events/deadLetterService.server.ts b/apps/webapp/app/v3/services/events/deadLetterService.server.ts new file mode 100644 index 00000000000..fb699eb1dab --- /dev/null +++ b/apps/webapp/app/v3/services/events/deadLetterService.server.ts @@ -0,0 +1,85 @@ +import { type TaskRun } from "@trigger.dev/database"; +import { PrismaClientOrTransaction } from "~/db.server"; +import { logger } from "~/services/logger.server"; +import { generateFriendlyId } from "../../friendlyIdentifiers"; +import { BaseService } from "../baseService.server"; + +type EventContext = { + eventId: string; + eventType: string; + sourceEventId?: string; +}; + +export class DeadLetterService extends BaseService { + /** + * Check if a failed run was triggered by an event and create a DLQ entry if so. + * Called from FinalizeTaskRunService when a run reaches a failed status. + */ + public async handleFailedRun(run: TaskRun, error: unknown): Promise { + const eventContext = this.extractEventContext(run); + if (!eventContext) { + return; // Not an event-triggered run + } + + try { + await this._prisma.deadLetterEvent.create({ + data: { + id: generateFriendlyId("dle"), + friendlyId: generateFriendlyId("dle"), + eventType: eventContext.eventType, + payload: this.extractPayload(run), + taskSlug: run.taskIdentifier, + failedRunId: run.id, + error: error !== undefined && error !== null ? (error as object) : undefined, + attemptCount: run.attemptNumber ?? 1, + sourceEventId: eventContext.sourceEventId ?? eventContext.eventId, + projectId: run.projectId, + environmentId: run.runtimeEnvironmentId, + }, + }); + + logger.info("Created dead letter event for failed event-triggered run", { + runId: run.id, + eventType: eventContext.eventType, + eventId: eventContext.eventId, + taskSlug: run.taskIdentifier, + }); + } catch (error) { + logger.error("Failed to create dead letter event", { + runId: run.id, + error: error instanceof Error ? error.message : String(error), + }); + } + } + + private extractEventContext(run: TaskRun): EventContext | null { + if (!run.metadata) return null; + + try { + const metadata = + typeof run.metadata === "string" ? JSON.parse(run.metadata) : run.metadata; + + if (metadata && typeof metadata === "object" && "$$event" in metadata) { + const event = (metadata as Record).$$event; + if (event && typeof event === "object" && "eventType" in event) { + return event as EventContext; + } + } + } catch { + // Malformed metadata — not an event-triggered run + } + + return null; + } + + private extractPayload(run: TaskRun): object { + try { + if (typeof run.payload === "string") { + return JSON.parse(run.payload); + } + return { raw: run.payload }; + } catch { + return { raw: run.payload }; + } + } +} diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index 6c4d096eba7..7dfd9f96a3f 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -210,6 +210,20 @@ export class PublishEventService extends BaseService { ? `${options.idempotencyKey}:${subscription.taskSlug}` : undefined; + // Merge event context into metadata so DLQ can identify event-triggered runs + const eventMetadata = { + ...(typeof options.metadata === "object" && options.metadata !== null + ? (options.metadata as Record) + : {}), + $$event: { + eventId, + eventType: eventSlug, + sourceEventId: options.idempotencyKey + ? `${options.idempotencyKey}` + : undefined, + }, + }; + const body: TriggerTaskRequestBody = { payload, context: options.context, @@ -219,7 +233,7 @@ export class PublishEventService extends BaseService { ? options.tags : [options.tags] : undefined, - metadata: options.metadata, + metadata: eventMetadata, delay: options.delay, }, }; diff --git a/apps/webapp/app/v3/services/finalizeTaskRun.server.ts b/apps/webapp/app/v3/services/finalizeTaskRun.server.ts index ab51df5de60..a5fa0505d79 100644 --- a/apps/webapp/app/v3/services/finalizeTaskRun.server.ts +++ b/apps/webapp/app/v3/services/finalizeTaskRun.server.ts @@ -16,6 +16,7 @@ import { import { PerformTaskRunAlertsService } from "./alerts/performTaskRunAlerts.server"; import { BaseService } from "./baseService.server"; import { completeBatchTaskRunItemV3 } from "./batchTriggerV3.server"; +import { DeadLetterService } from "./events/deadLetterService.server"; import { ExpireEnqueuedRunService } from "./expireEnqueuedRun.server"; import { ResumeBatchRunService } from "./resumeBatchRun.server"; import { ResumeDependentParentsService } from "./resumeDependentParents.server"; @@ -147,6 +148,17 @@ export class FinalizeTaskRunService extends BaseService { //enqueue alert if (isFailedRunStatus(run.status)) { await PerformTaskRunAlertsService.enqueue(run.id); + + // Check if this was an event-triggered run and create a DLQ entry + try { + const dlqService = new DeadLetterService(this._prisma); + await dlqService.handleFailedRun(run, taskRunError); + } catch (dlqError) { + logger.error("FinalizeTaskRunService: Failed to check dead letter queue", { + runId: run.id, + error: dlqError, + }); + } } if (isFatalRunStatus(run.status)) { From 89d0daba83040821c26aaac5d051562c8668cd3f Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 23:04:40 -0800 Subject: [PATCH 24/65] =?UTF-8?q?feat(events):=20phase=204.3=20=E2=80=94?= =?UTF-8?q?=20DLQ=20management=20API=20endpoints?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .../routes/api.v1.events.dlq.$id.discard.ts | 36 +++++ .../app/routes/api.v1.events.dlq.$id.retry.ts | 36 +++++ .../app/routes/api.v1.events.dlq.retry-all.ts | 36 +++++ apps/webapp/app/routes/api.v1.events.dlq.ts | 56 +++++++ .../events/deadLetterManagement.server.ts | 150 ++++++++++++++++++ packages/core/src/v3/apiClient/index.ts | 73 +++++++++ packages/core/src/v3/schemas/api.ts | 51 ++++++ 7 files changed, 438 insertions(+) create mode 100644 apps/webapp/app/routes/api.v1.events.dlq.$id.discard.ts create mode 100644 apps/webapp/app/routes/api.v1.events.dlq.$id.retry.ts create mode 100644 apps/webapp/app/routes/api.v1.events.dlq.retry-all.ts create mode 100644 apps/webapp/app/routes/api.v1.events.dlq.ts create mode 100644 apps/webapp/app/v3/services/events/deadLetterManagement.server.ts diff --git a/apps/webapp/app/routes/api.v1.events.dlq.$id.discard.ts b/apps/webapp/app/routes/api.v1.events.dlq.$id.discard.ts new file mode 100644 index 00000000000..31881df9a1e --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.dlq.$id.discard.ts @@ -0,0 +1,36 @@ +import { json } from "@remix-run/server-runtime"; +import { z } from "zod"; +import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { ServiceValidationError } from "~/v3/services/baseService.server"; +import { DeadLetterManagementService } from "~/v3/services/events/deadLetterManagement.server"; + +const ParamsSchema = z.object({ + id: z.string(), +}); + +const { action, loader } = createActionApiRoute( + { + params: ParamsSchema, + corsStrategy: "all", + authorization: { + action: "trigger", + resource: () => ({ tasks: "*" }), + superScopes: ["write:tasks", "admin"], + }, + }, + async ({ params, authentication }) => { + const service = new DeadLetterManagementService(); + + try { + const result = await service.discard(params.id, authentication.environment); + return json(result, { status: 200 }); + } catch (error) { + if (error instanceof ServiceValidationError) { + return json({ error: error.message }, { status: error.status ?? 422 }); + } + return json({ error: "Something went wrong" }, { status: 500 }); + } + } +); + +export { action, loader }; diff --git a/apps/webapp/app/routes/api.v1.events.dlq.$id.retry.ts b/apps/webapp/app/routes/api.v1.events.dlq.$id.retry.ts new file mode 100644 index 00000000000..0c5d6d6c35f --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.dlq.$id.retry.ts @@ -0,0 +1,36 @@ +import { json } from "@remix-run/server-runtime"; +import { z } from "zod"; +import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { ServiceValidationError } from "~/v3/services/baseService.server"; +import { DeadLetterManagementService } from "~/v3/services/events/deadLetterManagement.server"; + +const ParamsSchema = z.object({ + id: z.string(), +}); + +const { action, loader } = createActionApiRoute( + { + params: ParamsSchema, + corsStrategy: "all", + authorization: { + action: "trigger", + resource: () => ({ tasks: "*" }), + superScopes: ["write:tasks", "admin"], + }, + }, + async ({ params, authentication }) => { + const service = new DeadLetterManagementService(); + + try { + const result = await service.retry(params.id, authentication.environment); + return json(result, { status: 200 }); + } catch (error) { + if (error instanceof ServiceValidationError) { + return json({ error: error.message }, { status: error.status ?? 422 }); + } + return json({ error: "Something went wrong" }, { status: 500 }); + } + } +); + +export { action, loader }; diff --git a/apps/webapp/app/routes/api.v1.events.dlq.retry-all.ts b/apps/webapp/app/routes/api.v1.events.dlq.retry-all.ts new file mode 100644 index 00000000000..bde3ba32f7e --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.dlq.retry-all.ts @@ -0,0 +1,36 @@ +import { json } from "@remix-run/server-runtime"; +import { z } from "zod"; +import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { DeadLetterManagementService } from "~/v3/services/events/deadLetterManagement.server"; + +const BodySchema = z + .object({ + eventType: z.string().optional(), + }) + .optional(); + +const { action, loader } = createActionApiRoute( + { + body: BodySchema, + corsStrategy: "all", + authorization: { + action: "trigger", + resource: () => ({ tasks: "*" }), + superScopes: ["write:tasks", "admin"], + }, + }, + async ({ body, authentication }) => { + const service = new DeadLetterManagementService(); + + const result = await service.retryAll({ + projectId: authentication.environment.projectId, + environmentId: authentication.environment.id, + eventType: body?.eventType, + environment: authentication.environment, + }); + + return json(result, { status: 200 }); + } +); + +export { action, loader }; diff --git a/apps/webapp/app/routes/api.v1.events.dlq.ts b/apps/webapp/app/routes/api.v1.events.dlq.ts new file mode 100644 index 00000000000..695f63d5569 --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.dlq.ts @@ -0,0 +1,56 @@ +import { json } from "@remix-run/server-runtime"; +import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { DeadLetterManagementService } from "~/v3/services/events/deadLetterManagement.server"; + +export const loader = createLoaderApiRoute( + { + corsStrategy: "all", + authorization: { + action: "read", + resource: () => ({ tasks: "*" }), + superScopes: ["read:runs", "read:all", "admin"], + }, + findResource: async () => 1 as const, + }, + async ({ authentication, request }) => { + const url = new URL(request.url); + const eventType = url.searchParams.get("eventType") ?? undefined; + const status = url.searchParams.get("status") as + | "PENDING" + | "RETRIED" + | "DISCARDED" + | undefined; + const limit = url.searchParams.get("limit") + ? parseInt(url.searchParams.get("limit")!, 10) + : undefined; + const cursor = url.searchParams.get("cursor") ?? undefined; + + const service = new DeadLetterManagementService(); + const result = await service.list({ + projectId: authentication.environment.projectId, + environmentId: authentication.environment.id, + eventType, + status, + limit, + cursor, + }); + + return json({ + data: result.data.map((dle) => ({ + id: dle.id, + friendlyId: dle.friendlyId, + eventType: dle.eventType, + payload: dle.payload, + taskSlug: dle.taskSlug, + failedRunId: dle.failedRunId, + error: dle.error, + attemptCount: dle.attemptCount, + status: dle.status, + sourceEventId: dle.sourceEventId, + createdAt: dle.createdAt.toISOString(), + processedAt: dle.processedAt?.toISOString() ?? null, + })), + pagination: result.pagination, + }); + } +); diff --git a/apps/webapp/app/v3/services/events/deadLetterManagement.server.ts b/apps/webapp/app/v3/services/events/deadLetterManagement.server.ts new file mode 100644 index 00000000000..5cbadca533f --- /dev/null +++ b/apps/webapp/app/v3/services/events/deadLetterManagement.server.ts @@ -0,0 +1,150 @@ +import { PrismaClientOrTransaction } from "~/db.server"; +import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { logger } from "~/services/logger.server"; +import { BaseService, ServiceValidationError } from "../baseService.server"; +import { TriggerTaskService } from "../triggerTask.server"; +import type { TriggerTaskRequestBody } from "@trigger.dev/core/v3"; + +export type ListDLQParams = { + projectId: string; + environmentId: string; + eventType?: string; + status?: "PENDING" | "RETRIED" | "DISCARDED"; + limit?: number; + cursor?: string; +}; + +export class DeadLetterManagementService extends BaseService { + public async list(params: ListDLQParams) { + const limit = Math.min(params.limit ?? 50, 200); + + const items = await this._prisma.deadLetterEvent.findMany({ + where: { + projectId: params.projectId, + environmentId: params.environmentId, + ...(params.eventType && { eventType: params.eventType }), + ...(params.status && { status: params.status }), + ...(params.cursor && { createdAt: { lt: new Date(params.cursor) } }), + }, + orderBy: { createdAt: "desc" }, + take: limit + 1, + }); + + const hasMore = items.length > limit; + const data = items.slice(0, limit); + const lastItem = data[data.length - 1]; + + return { + data, + pagination: { + cursor: hasMore && lastItem ? lastItem.createdAt.toISOString() : null, + hasMore, + }, + }; + } + + public async retry(id: string, environment: AuthenticatedEnvironment) { + const dle = await this._prisma.deadLetterEvent.findFirst({ + where: { + id, + projectId: environment.projectId, + environmentId: environment.id, + status: "PENDING", + }, + }); + + if (!dle) { + throw new ServiceValidationError("Dead letter event not found or already processed", 404); + } + + // Trigger the task again with the same payload + const triggerService = new TriggerTaskService(); + const body: TriggerTaskRequestBody = { + payload: dle.payload, + options: { + idempotencyKey: `dlq-retry:${dle.id}`, + }, + }; + + let runId: string | undefined; + try { + const result = await triggerService.call(dle.taskSlug, environment, body, { + idempotencyKey: `dlq-retry:${dle.id}`, + }); + runId = result?.run.friendlyId; + } catch (error) { + logger.error("Failed to retry dead letter event", { + dleId: dle.id, + error: error instanceof Error ? error.message : String(error), + }); + throw new ServiceValidationError("Failed to retry dead letter event", 500); + } + + // Mark as retried + await this._prisma.deadLetterEvent.update({ + where: { id }, + data: { + status: "RETRIED", + processedAt: new Date(), + }, + }); + + return { id: dle.id, status: "RETRIED" as const, runId }; + } + + public async discard(id: string, environment: AuthenticatedEnvironment) { + const dle = await this._prisma.deadLetterEvent.findFirst({ + where: { + id, + projectId: environment.projectId, + environmentId: environment.id, + status: "PENDING", + }, + }); + + if (!dle) { + throw new ServiceValidationError("Dead letter event not found or already processed", 404); + } + + await this._prisma.deadLetterEvent.update({ + where: { id }, + data: { + status: "DISCARDED", + processedAt: new Date(), + }, + }); + + return { id: dle.id, status: "DISCARDED" as const }; + } + + public async retryAll(params: { + projectId: string; + environmentId: string; + eventType?: string; + environment: AuthenticatedEnvironment; + }) { + const pendingItems = await this._prisma.deadLetterEvent.findMany({ + where: { + projectId: params.projectId, + environmentId: params.environmentId, + status: "PENDING", + ...(params.eventType && { eventType: params.eventType }), + }, + take: 1000, // Limit batch size + }); + + let retriedCount = 0; + let failedCount = 0; + + for (const dle of pendingItems) { + try { + await this.retry(dle.id, params.environment); + retriedCount++; + } catch { + failedCount++; + } + } + + return { retriedCount, failedCount }; + } +} diff --git a/packages/core/src/v3/apiClient/index.ts b/packages/core/src/v3/apiClient/index.ts index 4ee1d2ded73..6e2f864db4f 100644 --- a/packages/core/src/v3/apiClient/index.ts +++ b/packages/core/src/v3/apiClient/index.ts @@ -11,12 +11,16 @@ import { BatchPublishEventRequestBody, BatchPublishEventResponseBody, BatchTaskRunExecutionResult, + DiscardDeadLetterEventResponseBody, GetEventHistoryResponseBody, GetEventResponseBody, GetEventSchemaResponseBody, + ListDeadLetterEventsResponseBody, ListEventsResponseBody, ReplayEventsRequestBody, ReplayEventsResponseBody, + RetryAllDeadLetterEventsResponseBody, + RetryDeadLetterEventResponseBody, PublishEventRequestBody, PublishEventResponseBody, BatchTriggerTaskV3RequestBody, @@ -1581,6 +1585,75 @@ export class ApiClient { ); } + listDeadLetterEvents( + params?: { + eventType?: string; + status?: string; + limit?: number; + cursor?: string; + }, + requestOptions?: ZodFetchOptions + ) { + const searchParams = new URLSearchParams(); + if (params?.eventType) searchParams.set("eventType", params.eventType); + if (params?.status) searchParams.set("status", params.status); + if (params?.limit) searchParams.set("limit", String(params.limit)); + if (params?.cursor) searchParams.set("cursor", params.cursor); + + const qs = searchParams.toString(); + const url = `${this.baseUrl}/api/v1/events/dlq${qs ? `?${qs}` : ""}`; + + return zodfetch( + ListDeadLetterEventsResponseBody, + url, + { + method: "GET", + headers: this.#getHeaders(false), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + retryDeadLetterEvent(id: string, requestOptions?: ZodFetchOptions) { + return zodfetch( + RetryDeadLetterEventResponseBody, + `${this.baseUrl}/api/v1/events/dlq/${encodeURIComponent(id)}/retry`, + { + method: "POST", + headers: this.#getHeaders(false), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + discardDeadLetterEvent(id: string, requestOptions?: ZodFetchOptions) { + return zodfetch( + DiscardDeadLetterEventResponseBody, + `${this.baseUrl}/api/v1/events/dlq/${encodeURIComponent(id)}/discard`, + { + method: "POST", + headers: this.#getHeaders(false), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + + retryAllDeadLetterEvents( + body?: { eventType?: string }, + requestOptions?: ZodFetchOptions + ) { + return zodfetch( + RetryAllDeadLetterEventsResponseBody, + `${this.baseUrl}/api/v1/events/dlq/retry-all`, + { + method: "POST", + headers: this.#getHeaders(false), + body: JSON.stringify(body ?? {}), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + #getHeaders(spanParentAsLink: boolean, additionalHeaders?: Record) { const headers: Record = { "Content-Type": "application/json", diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 9cd1dc433eb..819fe915ef1 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -1753,3 +1753,54 @@ export const ReplayEventsResponseBody = z.object({ }); export type ReplayEventsResponseBody = z.infer; + +export const DeadLetterEventItem = z.object({ + id: z.string(), + friendlyId: z.string(), + eventType: z.string(), + payload: z.unknown(), + taskSlug: z.string(), + failedRunId: z.string(), + error: z.unknown().nullable(), + attemptCount: z.number().int(), + status: z.enum(["PENDING", "RETRIED", "DISCARDED"]), + sourceEventId: z.string().nullable(), + createdAt: z.string(), + processedAt: z.string().nullable(), +}); + +export type DeadLetterEventItem = z.infer; + +export const ListDeadLetterEventsResponseBody = z.object({ + data: z.array(DeadLetterEventItem), + pagination: z.object({ + cursor: z.string().nullable(), + hasMore: z.boolean(), + }), +}); + +export type ListDeadLetterEventsResponseBody = z.infer; + +export const RetryDeadLetterEventResponseBody = z.object({ + id: z.string(), + status: z.string(), + runId: z.string().optional(), +}); + +export type RetryDeadLetterEventResponseBody = z.infer; + +export const DiscardDeadLetterEventResponseBody = z.object({ + id: z.string(), + status: z.string(), +}); + +export type DiscardDeadLetterEventResponseBody = z.infer; + +export const RetryAllDeadLetterEventsResponseBody = z.object({ + retriedCount: z.number().int(), + failedCount: z.number().int(), +}); + +export type RetryAllDeadLetterEventsResponseBody = z.infer< + typeof RetryAllDeadLetterEventsResponseBody +>; From 29013537b281ae7cfa8ab35c7fd11ba0320a0aac Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 23:07:57 -0800 Subject: [PATCH 25/65] =?UTF-8?q?feat(events):=20phase=204=20=E2=80=94=20c?= =?UTF-8?q?hangeset=20+=20memory=20updates=20for=20dead=20letter=20queue?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .changeset/event-dead-letter-queue.md | 6 ++ .../memory/pubsub-progress.md | 40 +++++++- .../memory/pubsub-roadmap.md | 98 +++---------------- 3 files changed, 59 insertions(+), 85 deletions(-) create mode 100644 .changeset/event-dead-letter-queue.md diff --git a/.changeset/event-dead-letter-queue.md b/.changeset/event-dead-letter-queue.md new file mode 100644 index 00000000000..a6b718dfcc9 --- /dev/null +++ b/.changeset/event-dead-letter-queue.md @@ -0,0 +1,6 @@ +--- +"@trigger.dev/core": patch +"trigger.dev": patch +--- + +Add dead letter queue for failed event-triggered task runs diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md index 057d5fb9c54..65f3e9cbb77 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md @@ -142,5 +142,41 @@ All sub-steps 0.1–0.9 implemented and committed. See git log for details. - `02369b128` — phase 3.3: event history API endpoint - `3d9863512` — phase 3.4: event replay service + API endpoint -## Phase 4: Dead Letter Queue — NOT STARTED -Next phase. DLQ model + failure detection + management API. +## Phase 4: Dead Letter Queue — COMPLETE + +### What was done +1. **4.1 — DeadLetterEvent model + enum + migration** + - Created `DeadLetterStatus` enum: `PENDING`, `RETRIED`, `DISCARDED` + - Created `DeadLetterEvent` model with: eventType, payload, taskSlug, failedRunId (FK to TaskRun), error, attemptCount, status, sourceEventId + - Added reverse relations on TaskRun, Project, RuntimeEnvironment + - Migration `20260228065743_add_dead_letter_event` (cleaned of extraneous lines) + +2. **4.2 — Store event context on runs + DLQ detection** + - Modified `PublishEventService` to inject `$$event` metadata into triggered runs: `{ eventId, eventType, sourceEventId }` + - Created `DeadLetterService` with `handleFailedRun(run, error)` method + - Extracts `$$event` from run metadata to identify event-triggered runs + - Hooked into `FinalizeTaskRunService` after `isFailedRunStatus()` check (alongside alerts) + +3. **4.3 — DLQ management API endpoints** + - Created `DeadLetterManagementService` with: list, retry, discard, retryAll methods + - `GET /api/v1/events/dlq` — paginated list with eventType/status filters + - `POST /api/v1/events/dlq/:id/retry` — re-triggers the task with `dlq-retry:{id}` idempotency key + - `POST /api/v1/events/dlq/:id/discard` — marks as DISCARDED + - `POST /api/v1/events/dlq/retry-all` — batch retry up to 1000 PENDING items + - DLQ response schemas added to core + - API client methods added + +### Key decisions +- Used `$$event` metadata prefix (double-dollar convention) to avoid collisions with user metadata +- Hooked into `FinalizeTaskRunService` (not EventBus) — matches existing alert pattern, has full run data available +- Phase 4.4 (SDK DLQ config per event) deferred to Phase 8 (DX) — current implementation is sufficient +- Retry creates new run with `dlq-retry:{dleId}` idempotency key for dedup +- retryAll is capped at 1000 items per call + +### Commits +- `ec4139642` — phase 4.1: DeadLetterEvent model + enum + migration +- `5ed48645e` — phase 4.2: store event context on runs + DLQ detection +- `89d0daba8` — phase 4.3: DLQ management API endpoints + +## Phase 5: Ordering + Consumer Groups — NOT STARTED +Next phase. Ordering keys + consumer group fan-out. diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md index 30dec1713f2..f311078668b 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md @@ -190,94 +190,26 @@ Key deliverables: --- -## Phase 4: Dead Letter Queue +## Phase 4: Dead Letter Queue — COMPLETE > **Goal**: Events that fail after all retries go to a DLQ for inspection and reprocessing. > **Requires**: Phase 0, Phase 3 (for persistence) -### 4.1 — DLQ model +All sub-steps 4.1–4.3 implemented and committed. See `pubsub-progress.md` for details. -**File to modify**: `internal-packages/database/prisma/schema.prisma` - -Tasks: -- [ ] Create `DeadLetterEvent` model: - ```prisma - model DeadLetterEvent { - id String @id @default(cuid()) - friendlyId String @unique - - eventType String // "order.created" - payload Json - payloadType String @default("application/json") - - taskSlug String // consumer that failed - failedRunId String // run that failed - failedRun TaskRun @relation(...) - - error Json? // last error - attemptCount Int // how many attempts there were - - status DeadLetterStatus @default(PENDING) - // PENDING = awaiting action - // RETRIED = manually retried - // DISCARDED = manually discarded - - sourceEventId String? // reference to event_log - - projectId String - environmentId String - - createdAt DateTime @default(now()) - processedAt DateTime? - - @@index([projectId, environmentId, status]) - @@index([eventType, environmentId]) - } - ``` -- [ ] Enum `DeadLetterStatus`: `PENDING`, `RETRIED`, `DISCARDED` -- [ ] Migration - -### 4.2 — Detect failed event runs and route to DLQ - -**New file**: `apps/webapp/app/v3/services/events/deadLetterService.server.ts` - -Tasks: -- [ ] Listen to `runFailed` event from EventBus -- [ ] Detect if the failed run has `sourceEventId` (came from a published event) -- [ ] If the run exhausted all retries → create `DeadLetterEvent` -- [ ] Emit `deadLetterCreated` event for telemetry -- [ ] Optional: notify via webhook (reuse alert webhooks infra) - -### 4.3 — DLQ management API - -**New file**: `apps/webapp/app/routes/api.v1.events.dlq.ts` - -Tasks: -- [ ] `GET /api/v1/events/dlq` — list dead letter events (paginated, filterable) -- [ ] `POST /api/v1/events/dlq/:id/retry` — manually retry - - Create new run for the task with the same payload - - Mark DLQ entry as RETRIED -- [ ] `POST /api/v1/events/dlq/:id/discard` — manually discard -- [ ] `POST /api/v1/events/dlq/retry-all` — retry all PENDING of a type - -### 4.4 — DLQ configuration per event - -**File to modify**: `packages/trigger-sdk/src/v3/events.ts` - -Tasks: -- [ ] Extend `event()` options: - ```typescript - event({ - id: "order.created", - schema: orderSchema, - deadLetter: { - enabled: true, // default true - retentionDays: 30, // how long to keep in DLQ - onDeadLetter: dlqHandler // optional task that processes DLQ items - }, - }); - ``` -- [ ] If `onDeadLetter` task is defined, trigger automatically when an event goes to DLQ +Key deliverables: +- [x] `DeadLetterEvent` model + `DeadLetterStatus` enum + migration +- [x] `$$event` metadata on event-triggered runs for identification +- [x] `DeadLetterService` hooks into `FinalizeTaskRunService` on run failure +- [x] `DeadLetterManagementService` with list, retry, discard, retryAll +- [x] `GET /api/v1/events/dlq` — list DLQ entries (paginated, filterable) +- [x] `POST /api/v1/events/dlq/:id/retry` — retry single entry +- [x] `POST /api/v1/events/dlq/:id/discard` — discard single entry +- [x] `POST /api/v1/events/dlq/retry-all` — batch retry +- [x] API client methods: `listDeadLetterEvents`, `retryDeadLetterEvent`, `discardDeadLetterEvent`, `retryAllDeadLetterEvents` +- [x] Response schemas added to core +- [x] Changeset added +- Note: Phase 4.4 (SDK event() DLQ config) deferred to Phase 8 (DX) --- From dcd3ea3c1772db9d94db79608894cb1a76ffcade Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 23:13:09 -0800 Subject: [PATCH 26/65] =?UTF-8?q?feat(events):=20phase=205.1=20=E2=80=94?= =?UTF-8?q?=20ordering=20keys=20via=20concurrencyKey?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add orderingKey to publish options. When an ordering key is present, events with the same key are processed sequentially per consumer by mapping the ordering key to a concurrencyKey on the triggered run. Co-Authored-By: Claude Opus 4.6 --- .../app/routes/api.v1.events.$eventId.batchPublish.ts | 1 + apps/webapp/app/routes/api.v1.events.$eventId.publish.ts | 1 + apps/webapp/app/v3/services/events/publishEvent.server.ts | 7 +++++++ packages/core/src/v3/schemas/api.ts | 2 ++ packages/trigger-sdk/src/v3/events.ts | 4 ++++ 5 files changed, 15 insertions(+) diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts b/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts index 5644d30a853..5ec88d649a7 100644 --- a/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts +++ b/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts @@ -38,6 +38,7 @@ const { action, loader } = createActionApiRoute( tags: item.options?.tags, metadata: item.options?.metadata, context: item.options?.context, + orderingKey: item.options?.orderingKey, } ); diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts b/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts index c6a085eca8e..b7bc4bb6978 100644 --- a/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts +++ b/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts @@ -35,6 +35,7 @@ const { action, loader } = createActionApiRoute( tags: body.options?.tags, metadata: body.options?.metadata, context: body.options?.context, + orderingKey: body.options?.orderingKey, } ); diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index 7dfd9f96a3f..b0a26f3665c 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -18,6 +18,7 @@ export type PublishEventOptions = { tags?: string | string[]; metadata?: unknown; context?: unknown; + orderingKey?: string; }; export type PublishEventResult = { @@ -80,6 +81,9 @@ export class PublishEventService extends BaseService { ): Promise { return this.traceWithEnv("publishEvent", environment, async (span) => { span.setAttribute("eventSlug", eventSlug); + if (options.orderingKey) { + span.setAttribute("orderingKey", options.orderingKey); + } // 1. Look up EventDefinition by slug + projectId const eventDefinition = await this._prisma.eventDefinition.findFirst({ @@ -235,6 +239,9 @@ export class PublishEventService extends BaseService { : undefined, metadata: eventMetadata, delay: options.delay, + concurrencyKey: options.orderingKey + ? `evt:${eventSlug}:${options.orderingKey}` + : undefined, }, }; diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 819fe915ef1..f2108cd5ff3 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -1610,6 +1610,7 @@ export const PublishEventRequestBody = z.object({ tags: RunTags.optional(), metadata: z.any().optional(), context: z.any().optional(), + orderingKey: z.string().optional(), }) .optional(), }); @@ -1639,6 +1640,7 @@ export const BatchPublishEventRequestBody = z.object({ tags: RunTags.optional(), metadata: z.any().optional(), context: z.any().optional(), + orderingKey: z.string().optional(), }) .optional(), }) diff --git a/packages/trigger-sdk/src/v3/events.ts b/packages/trigger-sdk/src/v3/events.ts index a37b482afd1..986a74f32af 100644 --- a/packages/trigger-sdk/src/v3/events.ts +++ b/packages/trigger-sdk/src/v3/events.ts @@ -36,6 +36,8 @@ export interface PublishEventOptions { tags?: string[]; /** Metadata to attach to the generated runs */ metadata?: Record; + /** Ordering key — events with the same ordering key are processed sequentially per consumer */ + orderingKey?: string; } /** Result of publishing an event */ @@ -129,6 +131,7 @@ export function createEvent Date: Fri, 27 Feb 2026 23:16:34 -0800 Subject: [PATCH 27/65] =?UTF-8?q?feat(events):=20phase=205.2=20=E2=80=94?= =?UTF-8?q?=20consumer=20groups=20for=20load-balanced=20fan-out?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add consumerGroup option to task event subscriptions. Within a consumer group, only one task receives each event (round-robin selection). Tasks without a consumer group continue to receive all events normally. Co-Authored-By: Claude Opus 4.6 --- .../services/createBackgroundWorker.server.ts | 2 + .../v3/services/events/publishEvent.server.ts | 54 +++++++++++++++++-- packages/core/src/v3/schemas/resources.ts | 1 + packages/core/src/v3/schemas/schemas.ts | 2 + packages/core/src/v3/types/tasks.ts | 2 + packages/trigger-sdk/src/v3/shared.ts | 2 + 6 files changed, 60 insertions(+), 3 deletions(-) diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index 9844fb0c190..f93c6b8a8df 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -420,12 +420,14 @@ async function syncWorkerEvents( enabled: true, filter: (task.onEventFilter as any) ?? undefined, pattern: task.onEventPattern ?? undefined, + consumerGroup: task.onEventConsumerGroup ?? undefined, }, update: { workerId: worker.id, enabled: true, filter: (task.onEventFilter as any) ?? undefined, pattern: task.onEventPattern ?? undefined, + consumerGroup: task.onEventConsumerGroup ?? undefined, }, }); diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index b0a26f3665c..0cdc765339f 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -203,11 +203,21 @@ export class PublishEventService extends BaseService { }; } - // 5. Fan out: trigger each matching subscribed task + // 5. Apply consumer group selection — within a group, only one task receives each event + const subscriptionsToTrigger = this.applyConsumerGroups(matchingSubscriptions); + + if (subscriptionsToTrigger.length < matchingSubscriptions.length) { + span.setAttribute( + "consumerGroupSkipped", + matchingSubscriptions.length - subscriptionsToTrigger.length + ); + } + + // 6. Fan out: trigger each matching subscribed task const eventId = generateFriendlyId("evt"); const runs: PublishEventResult["runs"] = []; - for (const subscription of matchingSubscriptions) { + for (const subscription of subscriptionsToTrigger) { try { // Derive per-consumer idempotency key if a global one was provided const consumerIdempotencyKey = options.idempotencyKey @@ -277,7 +287,7 @@ export class PublishEventService extends BaseService { } } - // 6. Persist to event log (async, non-blocking) + // 7. Persist to event log (async, non-blocking) if (this._eventLogWriter) { try { this._eventLogWriter({ @@ -309,4 +319,42 @@ export class PublishEventService extends BaseService { return { eventId, runs }; }); } + + /** + * Apply consumer group logic: within a consumer group, only one subscription receives each event. + * Subscriptions without a consumer group are always included (normal fan-out). + * Selection is round-robin based on subscription count modulo (deterministic per group per call). + */ + private applyConsumerGroups( + subscriptions: Array<{ id: string; consumerGroup: string | null; taskSlug: string }> + ): typeof subscriptions { + const ungrouped: typeof subscriptions = []; + const groups = new Map(); + + for (const sub of subscriptions) { + if (!sub.consumerGroup) { + ungrouped.push(sub); + } else { + const group = groups.get(sub.consumerGroup); + if (group) { + group.push(sub); + } else { + groups.set(sub.consumerGroup, [sub]); + } + } + } + + // For each consumer group, pick one member using a simple hash-based selection + const selected: typeof subscriptions = [...ungrouped]; + for (const [, members] of groups) { + // Sort by taskSlug for deterministic ordering, then pick using a rotating index + // The selection rotates based on the current timestamp (second-level granularity) + // so load is distributed over time + const sorted = members.sort((a, b) => a.taskSlug.localeCompare(b.taskSlug)); + const index = Math.floor(Date.now() / 1000) % sorted.length; + selected.push(sorted[index]!); + } + + return selected; + } } diff --git a/packages/core/src/v3/schemas/resources.ts b/packages/core/src/v3/schemas/resources.ts index e3c0619f2cb..e965ce99579 100644 --- a/packages/core/src/v3/schemas/resources.ts +++ b/packages/core/src/v3/schemas/resources.ts @@ -16,6 +16,7 @@ export const TaskResource = z.object({ // JSONSchema type - using z.unknown() for runtime validation to accept JSONSchema7 payloadSchema: z.unknown().optional(), onEvent: z.string().optional(), + onEventConsumerGroup: z.string().optional(), }); export type TaskResource = z.infer; diff --git a/packages/core/src/v3/schemas/schemas.ts b/packages/core/src/v3/schemas/schemas.ts index ebf9fa4ba2d..cf02ead9974 100644 --- a/packages/core/src/v3/schemas/schemas.ts +++ b/packages/core/src/v3/schemas/schemas.ts @@ -209,6 +209,8 @@ const taskMetadata = { onEventFilter: z.unknown().optional(), /** Wildcard pattern for pattern-based subscriptions (set when task uses `on: events.match("order.*")`) */ onEventPattern: z.string().optional(), + /** Consumer group name — within a group, only one task receives each event */ + onEventConsumerGroup: z.string().optional(), }; export const TaskMetadata = z.object(taskMetadata); diff --git a/packages/core/src/v3/types/tasks.ts b/packages/core/src/v3/types/tasks.ts index 2cc26ad1de6..f36aa832fd8 100644 --- a/packages/core/src/v3/types/tasks.ts +++ b/packages/core/src/v3/types/tasks.ts @@ -422,6 +422,8 @@ export type TaskOptionsWithEvent< on: EventSource; /** Content-based filter — only receive events whose payload matches this filter */ filter?: import("../schemas/eventFilter.js").EventFilter; + /** Consumer group — within a group, only one task receives each event */ + consumerGroup?: string; }; declare const __output: unique symbol; diff --git a/packages/trigger-sdk/src/v3/shared.ts b/packages/trigger-sdk/src/v3/shared.ts index 701bba8328c..bc59134aac1 100644 --- a/packages/trigger-sdk/src/v3/shared.ts +++ b/packages/trigger-sdk/src/v3/shared.ts @@ -249,6 +249,7 @@ export function createTask< const onEvent = eventSource?.id; const onEventFilter = "filter" in params && params.filter ? params.filter : undefined; const onEventPattern = eventSource && "pattern" in eventSource ? eventSource.pattern : undefined; + const onEventConsumerGroup = "consumerGroup" in params && params.consumerGroup ? params.consumerGroup as string : undefined; resourceCatalog.registerTaskMetadata({ id: params.id, @@ -261,6 +262,7 @@ export function createTask< onEvent, onEventFilter, onEventPattern, + onEventConsumerGroup, fns: { run: params.run, }, From 3b3abf47a55ae453e956b1f0aae5265d557f3206 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 23:20:41 -0800 Subject: [PATCH 28/65] =?UTF-8?q?feat(events):=20phase=205.3=20=E2=80=94?= =?UTF-8?q?=20integration=20tests=20for=20ordering=20+=20consumer=20groups?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 3 new tests: - ordering key sets concurrencyKey on triggered runs - consumer group: only one task in group receives each event - consumer group: multiple groups + ungrouped tasks all work Co-Authored-By: Claude Opus 4.6 --- apps/webapp/test/engine/publishEvent.test.ts | 200 +++++++++++++++++++ 1 file changed, 200 insertions(+) diff --git a/apps/webapp/test/engine/publishEvent.test.ts b/apps/webapp/test/engine/publishEvent.test.ts index 6237f9aa602..26c32679f8e 100644 --- a/apps/webapp/test/engine/publishEvent.test.ts +++ b/apps/webapp/test/engine/publishEvent.test.ts @@ -1036,4 +1036,204 @@ describe("PublishEventService", () => { } } ); + + containerTest( + "ordering key sets concurrencyKey on triggered runs", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, "order-processor"); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "order.updated", + version: "1.0", + projectId: env.projectId, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "order-processor", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + // Publish with ordering key + const result = await service.call("order.updated", env, { orderId: "ord-1" }, { + orderingKey: "ord-1", + }); + + expect(result.runs).toHaveLength(1); + + // Verify the run has the concurrency key set + const dbRun = await prisma.taskRun.findFirst({ + where: { friendlyId: result.runs[0].runId }, + }); + expect(dbRun).toBeDefined(); + expect(dbRun!.concurrencyKey).toBe("evt:order.updated:ord-1"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "consumer group: only one task in group receives each event", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIds = ["processor-a", "processor-b", "processor-c", "standalone-task"]; + const { worker } = await setupBackgroundWorker(engine, env, taskIds); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "order.placed", + version: "1.0", + projectId: env.projectId, + }, + }); + + // 3 tasks in the same consumer group + for (const taskSlug of ["processor-a", "processor-b", "processor-c"]) { + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug, + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + consumerGroup: "order-processors", + }, + }); + } + + // 1 standalone task (no consumer group) + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "standalone-task", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + const result = await service.call("order.placed", env, { orderId: "o1" }); + + // Should have 2 runs: 1 from consumer group (picked one) + 1 standalone + expect(result.runs).toHaveLength(2); + + const triggeredTasks = result.runs.map((r) => r.taskIdentifier); + // standalone-task always gets it + expect(triggeredTasks).toContain("standalone-task"); + + // Exactly one of the consumer group members gets it + const groupMembers = triggeredTasks.filter((t) => + ["processor-a", "processor-b", "processor-c"].includes(t) + ); + expect(groupMembers).toHaveLength(1); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "consumer group: tasks without group and with group both work", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIds = ["group-a-1", "group-a-2", "group-b-1", "group-b-2", "no-group"]; + const { worker } = await setupBackgroundWorker(engine, env, taskIds); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "item.sold", + version: "1.0", + projectId: env.projectId, + }, + }); + + // Group A: 2 members + for (const taskSlug of ["group-a-1", "group-a-2"]) { + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug, + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + consumerGroup: "group-a", + }, + }); + } + + // Group B: 2 members + for (const taskSlug of ["group-b-1", "group-b-2"]) { + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug, + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + consumerGroup: "group-b", + }, + }); + } + + // No group + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "no-group", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + const result = await service.call("item.sold", env, { itemId: "i1" }); + + // 3 runs: 1 from group-a, 1 from group-b, 1 ungrouped + expect(result.runs).toHaveLength(3); + + const triggeredTasks = result.runs.map((r) => r.taskIdentifier); + expect(triggeredTasks).toContain("no-group"); + + // Exactly one from each group + const groupA = triggeredTasks.filter((t) => t.startsWith("group-a-")); + const groupB = triggeredTasks.filter((t) => t.startsWith("group-b-")); + expect(groupA).toHaveLength(1); + expect(groupB).toHaveLength(1); + } finally { + await engine.quit(); + } + } + ); }); From e4d17a9af32175c13f8396cecb2612dd96a8a9f8 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 23:21:32 -0800 Subject: [PATCH 29/65] =?UTF-8?q?feat(events):=20phase=205=20=E2=80=94=20c?= =?UTF-8?q?hangeset=20+=20memory=20updates=20for=20ordering=20+=20consumer?= =?UTF-8?q?=20groups?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .changeset/event-ordering-consumer-groups.md | 7 ++ .../memory/pubsub-progress.md | 40 +++++++++- .../memory/pubsub-roadmap.md | 73 ++----------------- 3 files changed, 53 insertions(+), 67 deletions(-) create mode 100644 .changeset/event-ordering-consumer-groups.md diff --git a/.changeset/event-ordering-consumer-groups.md b/.changeset/event-ordering-consumer-groups.md new file mode 100644 index 00000000000..586b2347ba5 --- /dev/null +++ b/.changeset/event-ordering-consumer-groups.md @@ -0,0 +1,7 @@ +--- +"@trigger.dev/core": patch +"@trigger.dev/sdk": patch +"trigger.dev": patch +--- + +Add ordering keys and consumer groups for event subscriptions diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md index 65f3e9cbb77..494c056acd5 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md @@ -178,5 +178,41 @@ All sub-steps 0.1–0.9 implemented and committed. See git log for details. - `5ed48645e` — phase 4.2: store event context on runs + DLQ detection - `89d0daba8` — phase 4.3: DLQ management API endpoints -## Phase 5: Ordering + Consumer Groups — NOT STARTED -Next phase. Ordering keys + consumer group fan-out. +## Phase 5: Ordering + Consumer Groups — COMPLETE + +### What was done +1. **5.1 — Ordering keys** + - Added `orderingKey` to `PublishEventRequestBody` and `BatchPublishEventRequestBody` in core schemas + - Added `orderingKey` to SDK `PublishEventOptions` and pass-through in `publish()` / `batchPublish()` + - `PublishEventService` maps `orderingKey` to `concurrencyKey` on triggered runs: `evt:{eventSlug}:{orderingKey}` + - Updated publish + batchPublish routes to pass ordering key through + - Span attribute added for observability + +2. **5.2 — Consumer groups** + - Added `onEventConsumerGroup` to `TaskResource` and `TaskMetadata` schemas + - Added `consumerGroup` option to `TaskOptionsWithEvent` type + - SDK `shared.ts` extracts and registers `consumerGroup` from task params + - `syncWorkerEvents` stores `consumerGroup` in `EventSubscription` during deploy + - `PublishEventService.applyConsumerGroups()` groups subscriptions by `consumerGroup` + - Ungrouped subscriptions always receive events (normal fan-out) + - Within each group, one member is selected (round-robin by timestamp) + +3. **5.3 — Tests** + - 3 new integration tests: ordering key sets concurrencyKey, consumer group picks one, multiple groups + ungrouped + - All 19 integration tests pass, all 470 unit tests pass + - Full build passes: core, sdk, cli, webapp + - Changeset added + +### Key decisions +- Ordering at **publish time** (not event definition time) — ordering key values are dynamic per-payload +- Maps to existing `concurrencyKey` infrastructure — no new queue management needed +- Consumer group selection uses `Math.floor(Date.now() / 1000) % members.length` for time-based rotation +- `consumerGroup` field already existed in Prisma schema from Phase 0.4 — no migration needed + +### Commits +- `dcd3ea3c1` — phase 5.1: ordering keys via concurrencyKey +- `8c033b3dd` — phase 5.2: consumer groups for load-balanced fan-out +- `3b3abf47a` — phase 5.3: integration tests for ordering + consumer groups + +## Phase 6: Publish-and-Wait — NOT STARTED +Next phase. Fan-out/fan-in with waitpoints. diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md index f311078668b..bb2bd709607 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md @@ -213,76 +213,19 @@ Key deliverables: --- -## Phase 5: Ordering + Consumer Groups +## Phase 5: Ordering + Consumer Groups — COMPLETE > **Goal**: Order guarantees by partition key. Competing consumers for load balancing. > **Requires**: Phase 0 -### 5.1 — Ordering keys +All sub-steps 5.1–5.3 implemented and committed. See `pubsub-progress.md` for details. -**File to modify**: `packages/trigger-sdk/src/v3/events.ts` - -Tasks: -- [ ] Extend `event()`: - ```typescript - event({ - id: "order.updated", - schema: orderSchema, - orderingKey: (payload) => payload.orderId, - // Events with the same orderId are processed sequentially - }); - ``` -- [ ] Alternative: ordering at publish time: - ```typescript - await orderUpdated.publish(payload, { - orderingKey: payload.orderId, - }); - ``` - -**File to modify**: `apps/webapp/app/v3/services/events/publishEvent.server.ts` - -Tasks: -- [ ] When ordering key is present: - - Derive queue name: `event:{eventSlug}:order:{orderingKeyHash}` - - Use queue with `concurrencyLimit: 1` to guarantee sequence - - Each subscribed consumer uses this queue -- [ ] Reuse existing `RunQueue` with named queues -- [ ] Ordering is per-consumer: each consumer processes in order within its partition - -### 5.2 — Consumer Groups - -**File to modify**: `internal-packages/database/prisma/schema.prisma` - -Tasks: -- [ ] `consumerGroup` field already defined in Phase 0.4 on `EventSubscription` -- [ ] Constraint: within a consumer group, only 1 run per event - -**File to modify**: `packages/trigger-sdk/src/v3/shared.ts` - -Tasks: -- [ ] Extend task options: - ```typescript - task({ - on: orderCreated, - consumerGroup: "order-processors", - run: async (payload) => { ... }, - }); - ``` - -**File to modify**: `apps/webapp/app/v3/services/events/publishEvent.server.ts` - -Tasks: -- [ ] In fan-out: - - Group subscriptions by `consumerGroup` - - For subscriptions WITHOUT a group: normal fan-out (1 run each) - - For subscriptions WITH a group: pick 1 subscription from the group (round-robin or random) - - Reuse `FairQueueSelectionStrategy` for fair selection -- [ ] Persist selection so replay uses the same consumer - -Tests: -- [ ] Test: 3 tasks in the same consumer group → only 1 receives each event -- [ ] Test: fair distribution among group members -- [ ] Test: task without group + task with group both work on the same event +Key deliverables: +- [x] `orderingKey` in publish options, mapped to `concurrencyKey` on triggered runs +- [x] `consumerGroup` option on `TaskOptionsWithEvent`, stored in `EventSubscription.consumerGroup` during deploy +- [x] `PublishEventService.applyConsumerGroups()` — within a group, only one task receives each event +- [x] 3 integration tests for ordering + consumer groups +- [x] Changeset added --- From a522cb6afe3d421901507326e367c1f62369862b Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 23:29:57 -0800 Subject: [PATCH 30/65] =?UTF-8?q?feat(events):=20phase=206.1=20=E2=80=94?= =?UTF-8?q?=20runtime=20waitForEvent=20method?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add waitForEvent to RuntimeManager that waits for all event subscriber runs to complete and returns aggregated results keyed by task slug. Co-Authored-By: Claude Opus 4.6 --- packages/core/src/v3/runtime/index.ts | 10 +++- packages/core/src/v3/runtime/manager.ts | 10 ++++ .../core/src/v3/runtime/noopRuntimeManager.ts | 13 +++++- .../src/v3/runtime/sharedRuntimeManager.ts | 46 ++++++++++++++++++- packages/core/src/v3/workers/index.ts | 2 +- 5 files changed, 77 insertions(+), 4 deletions(-) diff --git a/packages/core/src/v3/runtime/index.ts b/packages/core/src/v3/runtime/index.ts index 39e874f986f..e2cadb30f51 100644 --- a/packages/core/src/v3/runtime/index.ts +++ b/packages/core/src/v3/runtime/index.ts @@ -7,7 +7,7 @@ import { WaitpointTokenResult, } from "../schemas/index.js"; import { getGlobal, registerGlobal, unregisterGlobal } from "../utils/globals.js"; -import { type RuntimeManager } from "./manager.js"; +import { type EventWaitResult, type RuntimeManager } from "./manager.js"; import { NoopRuntimeManager } from "./noopRuntimeManager.js"; import { usage } from "../usage-api.js"; @@ -54,6 +54,14 @@ export class RuntimeAPI { return usage.pauseAsync(() => this.#getRuntimeManager().waitForBatch(params)); } + public waitForEvent(params: { + eventId: string; + runs: Array<{ friendlyId: string; taskSlug: string }>; + ctx: TaskRunContext; + }): Promise { + return usage.pauseAsync(() => this.#getRuntimeManager().waitForEvent(params)); + } + public setGlobalRuntimeManager(runtimeManager: RuntimeManager): boolean { return registerGlobal(API_NAME, runtimeManager); } diff --git a/packages/core/src/v3/runtime/manager.ts b/packages/core/src/v3/runtime/manager.ts index ec7b5f1c182..40abd3b4574 100644 --- a/packages/core/src/v3/runtime/manager.ts +++ b/packages/core/src/v3/runtime/manager.ts @@ -5,6 +5,11 @@ import { WaitpointTokenResult, } from "../schemas/index.js"; +export type EventWaitResult = { + id: string; + results: Record; +}; + export interface RuntimeManager { disable(): void; waitForTask(params: { id: string; ctx: TaskRunContext }): Promise; @@ -17,4 +22,9 @@ export interface RuntimeManager { waitpointFriendlyId: string; finishDate?: Date; }): Promise; + waitForEvent(params: { + eventId: string; + runs: Array<{ friendlyId: string; taskSlug: string }>; + ctx: TaskRunContext; + }): Promise; } diff --git a/packages/core/src/v3/runtime/noopRuntimeManager.ts b/packages/core/src/v3/runtime/noopRuntimeManager.ts index 0650cf4e0d2..77711e4a856 100644 --- a/packages/core/src/v3/runtime/noopRuntimeManager.ts +++ b/packages/core/src/v3/runtime/noopRuntimeManager.ts @@ -5,7 +5,7 @@ import { TaskRunExecutionResult, WaitpointTokenResult, } from "../schemas/index.js"; -import { RuntimeManager } from "./manager.js"; +import { EventWaitResult, RuntimeManager } from "./manager.js"; export class NoopRuntimeManager implements RuntimeManager { disable(): void { @@ -42,4 +42,15 @@ export class NoopRuntimeManager implements RuntimeManager { items: [], }); } + + waitForEvent(params: { + eventId: string; + runs: Array<{ friendlyId: string; taskSlug: string }>; + ctx: TaskRunContext; + }): Promise { + return Promise.resolve({ + id: params.eventId, + results: {}, + }); + } } diff --git a/packages/core/src/v3/runtime/sharedRuntimeManager.ts b/packages/core/src/v3/runtime/sharedRuntimeManager.ts index 09c718c1f6c..435ac7a701e 100644 --- a/packages/core/src/v3/runtime/sharedRuntimeManager.ts +++ b/packages/core/src/v3/runtime/sharedRuntimeManager.ts @@ -13,7 +13,7 @@ import { } from "../schemas/index.js"; import { tryCatch } from "../tryCatch.js"; import { ExecutorToWorkerProcessConnection } from "../zodIpc.js"; -import { RuntimeManager } from "./manager.js"; +import { EventWaitResult, RuntimeManager } from "./manager.js"; import { preventMultipleWaits } from "./preventMultipleWaits.js"; /** A function that resolves a waitpoint */ @@ -167,6 +167,50 @@ export class SharedRuntimeManager implements RuntimeManager { }); } + async waitForEvent(params: { + eventId: string; + runs: Array<{ friendlyId: string; taskSlug: string }>; + ctx: TaskRunContext; + }): Promise { + return this._preventMultipleWaits(async () => { + if (!params.runs.length) { + return { id: params.eventId, results: {} }; + } + + // Create a resolver for each run, keyed by its friendly ID + const promises = params.runs.map((run) => { + return new Promise((resolve) => { + this.resolversById.set(run.friendlyId as ResolverId, resolve); + }); + }); + + // Resolve any waitpoints we received before the resolvers were created + this.resolvePendingWaitpoints(); + + await lifecycleHooks.callOnWaitHookListeners({ + type: "task", + runId: params.eventId, + }); + + const waitpoints = await this.suspendable(Promise.all(promises)); + + await lifecycleHooks.callOnResumeHookListeners({ + type: "task", + runId: params.eventId, + }); + + // Aggregate results by task slug + const results: Record = {}; + for (let i = 0; i < params.runs.length; i++) { + const run = params.runs[i]!; + const waitpoint = waitpoints[i]!; + results[run.taskSlug] = this.waitpointToTaskRunExecutionResult(waitpoint); + } + + return { id: params.eventId, results }; + }); + } + async resolveWaitpoints(waitpoints: CompletedWaitpoint[]): Promise { await Promise.all(waitpoints.map((waitpoint) => this.resolveWaitpoint(waitpoint))); } diff --git a/packages/core/src/v3/workers/index.ts b/packages/core/src/v3/workers/index.ts index 4ca301fcdc7..845b9718365 100644 --- a/packages/core/src/v3/workers/index.ts +++ b/packages/core/src/v3/workers/index.ts @@ -1,5 +1,5 @@ export { TaskExecutor, type TaskExecutorOptions } from "./taskExecutor.js"; -export type { RuntimeManager } from "../runtime/manager.js"; +export type { EventWaitResult, RuntimeManager } from "../runtime/manager.js"; export { PreciseWallClock as DurableClock } from "../clock/preciseWallClock.js"; export { getEnvVar, getNumberEnvVar } from "../utils/getEnv.js"; export { OtelTaskLogger, logLevels } from "../logger/taskLogger.js"; From c4bd534af6f4ef29821ebc580b3b18e39b3df535 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 23:35:33 -0800 Subject: [PATCH 31/65] =?UTF-8?q?feat(events):=20phase=206.2+6.3=20?= =?UTF-8?q?=E2=80=94=20publishAndWait=20endpoint=20+=20SDK=20method?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add PublishAndWaitEventRequestBody/ResponseBody schemas (core) - Add publishAndWait API route requiring parentRunId - Add publishAndWaitEvent() to API client - Export EventWaitResult from core/v3 - Add publishAndWait() method to EventDefinition in SDK - Validates payload, calls API with parentRunId from task context - Waits for all subscriber runs via runtime.waitForEvent() - Returns aggregated results keyed by task identifier - Pass parentRunId + resumeParentOnCompletion through PublishEventService Co-Authored-By: Claude Opus 4.6 --- .../api.v1.events.$eventId.publishAndWait.ts | 73 +++++++++++++++++++ .../v3/services/events/publishEvent.server.ts | 7 ++ packages/core/src/v3/apiClient/index.ts | 21 ++++++ packages/core/src/v3/index.ts | 1 + packages/core/src/v3/schemas/api.ts | 29 ++++++++ packages/trigger-sdk/src/v3/events.ts | 62 ++++++++++++++++ 6 files changed, 193 insertions(+) create mode 100644 apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts b/apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts new file mode 100644 index 00000000000..7c4f3f0f228 --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts @@ -0,0 +1,73 @@ +import { json } from "@remix-run/server-runtime"; +import { PublishAndWaitEventRequestBody } from "@trigger.dev/core/v3"; +import { z } from "zod"; +import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { ServiceValidationError } from "~/v3/services/baseService.server"; +import { PublishEventService } from "~/v3/services/events/publishEvent.server"; +import { writeEventLog } from "~/v3/services/events/eventLogWriter.server"; + +const ParamsSchema = z.object({ + eventId: z.string(), +}); + +const { action, loader } = createActionApiRoute( + { + params: ParamsSchema, + body: PublishAndWaitEventRequestBody, + corsStrategy: "all", + authorization: { + action: "trigger", + resource: (params) => ({ tasks: params.eventId }), + superScopes: ["write:tasks", "admin"], + }, + }, + async ({ body, params, authentication }) => { + const parentRunId = body.options?.parentRunId; + if (!parentRunId) { + return json( + { error: "parentRunId is required for publishAndWait" }, + { status: 400 } + ); + } + + const service = new PublishEventService(undefined, undefined, writeEventLog); + + try { + const result = await service.call( + params.eventId, + authentication.environment, + body.payload, + { + idempotencyKey: body.options?.idempotencyKey, + delay: body.options?.delay, + tags: body.options?.tags, + metadata: body.options?.metadata, + context: body.options?.context, + orderingKey: body.options?.orderingKey, + parentRunId, + } + ); + + return json( + { + eventId: result.eventId, + runs: result.runs.map((r) => ({ + taskIdentifier: r.taskIdentifier, + runId: r.runId, + })), + }, + { status: 200 } + ); + } catch (error) { + if (error instanceof ServiceValidationError) { + return json({ error: error.message }, { status: error.status ?? 422 }); + } else if (error instanceof Error) { + return json({ error: error.message }, { status: 500 }); + } + + return json({ error: "Something went wrong" }, { status: 500 }); + } + } +); + +export { action, loader }; diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index 0cdc765339f..ace9f194746 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -19,6 +19,8 @@ export type PublishEventOptions = { metadata?: unknown; context?: unknown; orderingKey?: string; + /** When set, each triggered run will create a waitpoint that blocks this parent run */ + parentRunId?: string; }; export type PublishEventResult = { @@ -26,6 +28,8 @@ export type PublishEventResult = { runs: Array<{ taskIdentifier: string; runId: string; + /** Internal run ID, present when parentRunId is used */ + internalRunId?: string; }>; }; @@ -252,6 +256,8 @@ export class PublishEventService extends BaseService { concurrencyKey: options.orderingKey ? `evt:${eventSlug}:${options.orderingKey}` : undefined, + parentRunId: options.parentRunId, + resumeParentOnCompletion: options.parentRunId ? true : undefined, }, }; @@ -270,6 +276,7 @@ export class PublishEventService extends BaseService { runs.push({ taskIdentifier: subscription.taskSlug, runId: result.run.friendlyId, + internalRunId: options.parentRunId ? result.run.id : undefined, }); } } catch (error) { diff --git a/packages/core/src/v3/apiClient/index.ts b/packages/core/src/v3/apiClient/index.ts index 6e2f864db4f..47f4767f6a0 100644 --- a/packages/core/src/v3/apiClient/index.ts +++ b/packages/core/src/v3/apiClient/index.ts @@ -21,6 +21,8 @@ import { ReplayEventsResponseBody, RetryAllDeadLetterEventsResponseBody, RetryDeadLetterEventResponseBody, + PublishAndWaitEventRequestBody, + PublishAndWaitEventResponseBody, PublishEventRequestBody, PublishEventResponseBody, BatchTriggerTaskV3RequestBody, @@ -1493,6 +1495,25 @@ export class ApiClient { ); } + publishAndWaitEvent( + eventId: string, + body: PublishAndWaitEventRequestBody, + requestOptions?: ZodFetchOptions + ) { + const encodedEventId = encodeURIComponent(eventId); + + return zodfetch( + PublishAndWaitEventResponseBody, + `${this.baseUrl}/api/v1/events/${encodedEventId}/publishAndWait`, + { + method: "POST", + headers: this.#getHeaders(false), + body: JSON.stringify(body), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + listEvents(requestOptions?: ZodFetchOptions) { return zodfetch( ListEventsResponseBody, diff --git a/packages/core/src/v3/index.ts b/packages/core/src/v3/index.ts index b7b989b12cb..75569fc0568 100644 --- a/packages/core/src/v3/index.ts +++ b/packages/core/src/v3/index.ts @@ -8,6 +8,7 @@ export * from "./errors.js"; export * from "./limits.js"; export * from "./logger-api.js"; export * from "./runtime-api.js"; +export type { EventWaitResult } from "./runtime/manager.js"; export * from "./task-context-api.js"; export * from "./trace-context-api.js"; export * from "./apiClientManager-api.js"; diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index f2108cd5ff3..6a4ea903512 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -1655,6 +1655,35 @@ export const BatchPublishEventResponseBody = z.object({ export type BatchPublishEventResponseBody = z.infer; +export const PublishAndWaitEventRequestBody = z.object({ + payload: z.any(), + options: z + .object({ + idempotencyKey: z.string().optional(), + delay: z.string().or(z.coerce.date()).optional(), + tags: RunTags.optional(), + metadata: z.any().optional(), + context: z.any().optional(), + orderingKey: z.string().optional(), + parentRunId: z.string(), + }) + .optional(), +}); + +export type PublishAndWaitEventRequestBody = z.infer; + +export const PublishAndWaitEventResponseBody = z.object({ + eventId: z.string(), + runs: z.array( + z.object({ + taskIdentifier: z.string(), + runId: z.string(), + }) + ), +}); + +export type PublishAndWaitEventResponseBody = z.infer; + // ---- Event discovery schemas ---- export const EventListItem = z.object({ diff --git a/packages/trigger-sdk/src/v3/events.ts b/packages/trigger-sdk/src/v3/events.ts index 986a74f32af..f6ee1285c66 100644 --- a/packages/trigger-sdk/src/v3/events.ts +++ b/packages/trigger-sdk/src/v3/events.ts @@ -2,10 +2,14 @@ import { apiClientManager, getSchemaParseFn, resourceCatalog, + runtime, + taskContext, } from "@trigger.dev/core/v3"; import type { + EventWaitResult, inferSchemaIn, SchemaParseFn, + TaskRunExecutionResult, TaskSchema, } from "@trigger.dev/core/v3"; @@ -51,6 +55,14 @@ export interface PublishEventResult { }>; } +/** Result of publishAndWait — aggregated results from all subscriber runs */ +export interface PublishAndWaitResult { + /** Unique ID of the published event instance */ + id: string; + /** Results keyed by subscriber task identifier */ + results: Record; +} + /** An event definition that can be published and subscribed to */ export interface EventDefinition { /** The event identifier */ @@ -69,6 +81,12 @@ export interface EventDefinition { batchPublish( items: Array<{ payload: TPayload; options?: PublishEventOptions }> ): Promise>; + + /** + * Publish an event and wait for all subscriber runs to complete. + * Can only be called from inside a task.run(). + */ + publishAndWait(payload: TPayload, options?: PublishEventOptions): Promise; } /** Any event definition (for generic constraints) */ @@ -142,6 +160,50 @@ export function createEvent ({ + friendlyId: r.runId, + taskSlug: r.taskIdentifier, + })), + ctx, + }); + + return { + id: waitResult.id, + results: waitResult.results, + }; + }, + async batchPublish(items) { const apiClient = apiClientManager.clientOrThrow(); From a87bce4724729b1cd418548bae1b1c10521797d1 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 23:42:55 -0800 Subject: [PATCH 32/65] =?UTF-8?q?feat(events):=20phase=206=20tests=20?= =?UTF-8?q?=E2=80=94=20publishAndWait=20+=20event=20log=20writer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Test parentRunId creates waitpoints for each subscriber (resumeParentOnCompletion=true, internalRunId populated) - Test no subscribers returns empty runs with no waitpoints - Test event log writer receives correct fanOutCount and metadata All 22 integration tests pass, 470 core unit tests pass. Co-Authored-By: Claude Opus 4.6 --- apps/webapp/test/engine/publishEvent.test.ts | 173 +++++++++++++++++++ 1 file changed, 173 insertions(+) diff --git a/apps/webapp/test/engine/publishEvent.test.ts b/apps/webapp/test/engine/publishEvent.test.ts index 26c32679f8e..cb2149a9d0c 100644 --- a/apps/webapp/test/engine/publishEvent.test.ts +++ b/apps/webapp/test/engine/publishEvent.test.ts @@ -1236,4 +1236,177 @@ describe("PublishEventService", () => { } } ); + + containerTest( + "publishAndWait: parentRunId creates waitpoints for each subscriber", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIds = ["parent-task", "handler-a", "handler-b"]; + const { worker } = await setupBackgroundWorker(engine, env, taskIds); + + // Create event definition + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "data.ready", + version: "1.0", + projectId: env.projectId, + }, + }); + + // Subscribe both handlers + for (const taskSlug of ["handler-a", "handler-b"]) { + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug, + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + } + + // Trigger a parent run first so we have a valid parentRunId + const triggerSvc = createTriggerTaskService(prisma, engine); + const parentResult = await triggerSvc.call({ + taskId: "parent-task", + environment: env, + body: { payload: { setup: true } }, + options: {}, + }); + expect(parentResult).toBeDefined(); + const parentRunId = parentResult!.run.friendlyId; + + // Now publish with parentRunId + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + const result = await service.call("data.ready", env, { value: 42 }, { + parentRunId, + }); + + expect(result.runs).toHaveLength(2); + expect(result.runs.every((r) => r.internalRunId !== undefined)).toBe(true); + + // Verify the triggered runs have resumeParentOnCompletion set + for (const run of result.runs) { + const dbRun = await prisma.taskRun.findFirst({ + where: { friendlyId: run.runId }, + }); + expect(dbRun).toBeDefined(); + expect(dbRun!.resumeParentOnCompletion).toBe(true); + } + + const triggeredTasks = result.runs.map((r) => r.taskIdentifier).sort(); + expect(triggeredTasks).toEqual(["handler-a", "handler-b"]); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "publishAndWait: no subscribers returns empty runs with no waitpoints", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, "parent-task"); + + // Create event definition with no subscribers + await prisma.eventDefinition.create({ + data: { + slug: "empty.event", + version: "1.0", + projectId: env.projectId, + }, + }); + + // Create a parent run + const triggerSvc = createTriggerTaskService(prisma, engine); + const parentResult = await triggerSvc.call({ + taskId: "parent-task", + environment: env, + body: { payload: {} }, + options: {}, + }); + const parentRunId = parentResult!.run.friendlyId; + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + const result = await service.call("empty.event", env, { data: "test" }, { + parentRunId, + }); + + expect(result.runs).toHaveLength(0); + expect(result.eventId).toMatch(/^evt_/); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "event log writer receives correct fanOutCount and event metadata", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, ["task-a", "task-b"]); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "audit.event", + version: "1.0", + projectId: env.projectId, + }, + }); + + for (const taskSlug of ["task-a", "task-b"]) { + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug, + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + } + + const logEntries: any[] = []; + const mockLogWriter = (entry: any) => logEntries.push(entry); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn, mockLogWriter); + + const result = await service.call("audit.event", env, { action: "test" }, { + idempotencyKey: "audit-key-1", + metadata: { source: "test" }, + }); + + expect(result.runs).toHaveLength(2); + + // Verify event log writer was called + expect(logEntries).toHaveLength(1); + const entry = logEntries[0]; + expect(entry.eventType).toBe("audit.event"); + expect(entry.fanOutCount).toBe(2); + expect(entry.idempotencyKey).toBe("audit-key-1"); + expect(entry.metadata).toEqual({ source: "test" }); + expect(entry.environmentId).toBe(env.id); + expect(entry.projectId).toBe(env.projectId); + } finally { + await engine.quit(); + } + } + ); }); From cc4b888a38061caff3ba105e0198dfb81dec6c7a Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 23:43:43 -0800 Subject: [PATCH 33/65] chore: add phase 6 changeset for publishAndWait Co-Authored-By: Claude Opus 4.6 --- .changeset/event-publish-and-wait.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .changeset/event-publish-and-wait.md diff --git a/.changeset/event-publish-and-wait.md b/.changeset/event-publish-and-wait.md new file mode 100644 index 00000000000..c4a94ec8cf5 --- /dev/null +++ b/.changeset/event-publish-and-wait.md @@ -0,0 +1,11 @@ +--- +"@trigger.dev/core": patch +"@trigger.dev/sdk": patch +"apps-webapp": patch +--- + +Add publishAndWait support to the event system. Events can now be published +with parentRunId to create waitpoints for each subscriber run, enabling +fan-out / fan-in patterns. The SDK exposes `event.publishAndWait()` which +publishes, blocks the parent run, and returns aggregated results from all +subscriber completions. From 6454ef3a6c584c60bcf69df9338396d1661306b9 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Fri, 27 Feb 2026 23:54:27 -0800 Subject: [PATCH 34/65] =?UTF-8?q?feat(events):=20phase=207=20=E2=80=94=20r?= =?UTF-8?q?ate=20limiting=20+=20backpressure?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add rateLimit JSON field to EventDefinition (Prisma migration) - Add EventRateLimitManifest to core schemas - Add rateLimit option to SDK event() definition - Flow rateLimit through deploy registration (EventManifest → DB) - Create EventRateLimitChecker interface + InMemoryEventRateLimitChecker - Integrate rate limiter into PublishEventService (checks after event lookup) - Add EventPublishRateLimitError with eventSlug, limit, remaining, retryAfterMs - All 3 publish routes return 429 with rate limit headers when exceeded - Add 11 unit tests for InMemoryEventRateLimitChecker + windowToMs + config parsing - Add 2 integration tests: rate limit blocks excess publishes, no limit = unlimited All 24 integration tests pass, 470 core unit tests pass. Co-Authored-By: Claude Opus 4.6 --- .../api.v1.events.$eventId.batchPublish.ts | 28 +++- .../routes/api.v1.events.$eventId.publish.ts | 27 +++- .../api.v1.events.$eventId.publishAndWait.ts | 27 +++- .../services/createBackgroundWorker.server.ts | 2 + .../events/eventRateLimiter.server.ts | 120 ++++++++++++++++++ .../events/eventRateLimiterGlobal.server.ts | 9 ++ .../v3/services/events/publishEvent.server.ts | 40 +++++- .../test/engine/eventRateLimiter.test.ts | 114 +++++++++++++++++ apps/webapp/test/engine/publishEvent.test.ts | 110 ++++++++++++++++ .../migration.sql | 2 + .../database/prisma/schema.prisma | 3 + .../core/src/v3/resource-catalog/catalog.ts | 2 + .../standardResourceCatalog.ts | 1 + packages/core/src/v3/schemas/schemas.ts | 11 ++ packages/trigger-sdk/src/v3/events.ts | 13 +- 15 files changed, 498 insertions(+), 11 deletions(-) create mode 100644 apps/webapp/app/v3/services/events/eventRateLimiter.server.ts create mode 100644 apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts create mode 100644 apps/webapp/test/engine/eventRateLimiter.test.ts create mode 100644 internal-packages/database/prisma/migrations/20260228074544_add_rate_limit_to_event_definition/migration.sql diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts b/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts index 5ec88d649a7..fc26760e890 100644 --- a/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts +++ b/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts @@ -3,8 +3,13 @@ import { BatchPublishEventRequestBody } from "@trigger.dev/core/v3"; import { z } from "zod"; import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; -import { PublishEventService, PublishEventResult } from "~/v3/services/events/publishEvent.server"; +import { + EventPublishRateLimitError, + PublishEventService, + PublishEventResult, +} from "~/v3/services/events/publishEvent.server"; import { writeEventLog } from "~/v3/services/events/eventLogWriter.server"; +import { eventPublishRateLimitChecker } from "~/v3/services/events/eventRateLimiterGlobal.server"; const ParamsSchema = z.object({ eventId: z.string(), @@ -22,7 +27,12 @@ const { action, loader } = createActionApiRoute( }, }, async ({ body, params, authentication }) => { - const service = new PublishEventService(undefined, undefined, writeEventLog); + const service = new PublishEventService( + undefined, + undefined, + writeEventLog, + eventPublishRateLimitChecker + ); try { const results: PublishEventResult[] = []; @@ -47,7 +57,19 @@ const { action, loader } = createActionApiRoute( return json({ results }, { status: 200 }); } catch (error) { - if (error instanceof ServiceValidationError) { + if (error instanceof EventPublishRateLimitError) { + return json( + { error: error.message }, + { + status: 429, + headers: { + "x-ratelimit-limit": String(error.limit), + "x-ratelimit-remaining": String(error.remaining), + "retry-after": String(Math.ceil(error.retryAfterMs / 1000)), + }, + } + ); + } else if (error instanceof ServiceValidationError) { return json({ error: error.message }, { status: error.status ?? 422 }); } else if (error instanceof Error) { return json({ error: error.message }, { status: 500 }); diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts b/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts index b7bc4bb6978..46435da7788 100644 --- a/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts +++ b/apps/webapp/app/routes/api.v1.events.$eventId.publish.ts @@ -3,8 +3,12 @@ import { PublishEventRequestBody } from "@trigger.dev/core/v3"; import { z } from "zod"; import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; -import { PublishEventService } from "~/v3/services/events/publishEvent.server"; +import { + EventPublishRateLimitError, + PublishEventService, +} from "~/v3/services/events/publishEvent.server"; import { writeEventLog } from "~/v3/services/events/eventLogWriter.server"; +import { eventPublishRateLimitChecker } from "~/v3/services/events/eventRateLimiterGlobal.server"; const ParamsSchema = z.object({ eventId: z.string(), @@ -22,7 +26,12 @@ const { action, loader } = createActionApiRoute( }, }, async ({ body, params, authentication }) => { - const service = new PublishEventService(undefined, undefined, writeEventLog); + const service = new PublishEventService( + undefined, + undefined, + writeEventLog, + eventPublishRateLimitChecker + ); try { const result = await service.call( @@ -41,7 +50,19 @@ const { action, loader } = createActionApiRoute( return json(result, { status: 200 }); } catch (error) { - if (error instanceof ServiceValidationError) { + if (error instanceof EventPublishRateLimitError) { + return json( + { error: error.message }, + { + status: 429, + headers: { + "x-ratelimit-limit": String(error.limit), + "x-ratelimit-remaining": String(error.remaining), + "retry-after": String(Math.ceil(error.retryAfterMs / 1000)), + }, + } + ); + } else if (error instanceof ServiceValidationError) { return json({ error: error.message }, { status: error.status ?? 422 }); } else if (error instanceof Error) { return json({ error: error.message }, { status: 500 }); diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts b/apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts index 7c4f3f0f228..d7c73b4536d 100644 --- a/apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts +++ b/apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts @@ -3,8 +3,12 @@ import { PublishAndWaitEventRequestBody } from "@trigger.dev/core/v3"; import { z } from "zod"; import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; import { ServiceValidationError } from "~/v3/services/baseService.server"; -import { PublishEventService } from "~/v3/services/events/publishEvent.server"; +import { + EventPublishRateLimitError, + PublishEventService, +} from "~/v3/services/events/publishEvent.server"; import { writeEventLog } from "~/v3/services/events/eventLogWriter.server"; +import { eventPublishRateLimitChecker } from "~/v3/services/events/eventRateLimiterGlobal.server"; const ParamsSchema = z.object({ eventId: z.string(), @@ -30,7 +34,12 @@ const { action, loader } = createActionApiRoute( ); } - const service = new PublishEventService(undefined, undefined, writeEventLog); + const service = new PublishEventService( + undefined, + undefined, + writeEventLog, + eventPublishRateLimitChecker + ); try { const result = await service.call( @@ -59,7 +68,19 @@ const { action, loader } = createActionApiRoute( { status: 200 } ); } catch (error) { - if (error instanceof ServiceValidationError) { + if (error instanceof EventPublishRateLimitError) { + return json( + { error: error.message }, + { + status: 429, + headers: { + "x-ratelimit-limit": String(error.limit), + "x-ratelimit-remaining": String(error.remaining), + "retry-after": String(Math.ceil(error.retryAfterMs / 1000)), + }, + } + ); + } else if (error instanceof ServiceValidationError) { return json({ error: error.message }, { status: error.status ?? 422 }); } else if (error instanceof Error) { return json({ error: error.message }, { status: 500 }); diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index f93c6b8a8df..11aad019d69 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -351,11 +351,13 @@ async function syncWorkerEvents( version: event.version ?? "1.0", description: event.description, schema: event.schema as any ?? undefined, + rateLimit: event.rateLimit as any ?? undefined, projectId: worker.projectId, }, update: { description: event.description, schema: event.schema as any ?? undefined, + rateLimit: event.rateLimit as any ?? undefined, }, }); diff --git a/apps/webapp/app/v3/services/events/eventRateLimiter.server.ts b/apps/webapp/app/v3/services/events/eventRateLimiter.server.ts new file mode 100644 index 00000000000..731a7abd168 --- /dev/null +++ b/apps/webapp/app/v3/services/events/eventRateLimiter.server.ts @@ -0,0 +1,120 @@ +import { z } from "zod"; +import { logger } from "~/services/logger.server"; + +/** + * Schema for per-event rate limit configuration stored in EventDefinition.rateLimit. + * + * Example: { "limit": 100, "window": "1m" } + */ +export const EventRateLimitConfig = z.object({ + /** Maximum number of publishes allowed in the window */ + limit: z.number().int().positive(), + /** Time window — e.g. "1m", "10s", "1h" */ + window: z.string().regex(/^\d+[smh]$/, 'Must be a duration like "10s", "1m", "1h"'), +}); + +export type EventRateLimitConfig = z.infer; + +/** Result of a rate limit check */ +export interface EventRateLimitResult { + allowed: boolean; + limit?: number; + remaining?: number; + /** Milliseconds until the window resets */ + retryAfter?: number; +} + +/** Interface for pluggable rate limit backends */ +export interface EventRateLimitChecker { + check(key: string, config: EventRateLimitConfig): Promise; +} + +/** + * Parse the rateLimit JSON from the database. Returns undefined if not set or invalid. + */ +export function parseEventRateLimitConfig( + rawConfig: unknown +): EventRateLimitConfig | undefined { + if (!rawConfig) return undefined; + const result = EventRateLimitConfig.safeParse(rawConfig); + if (!result.success) { + logger.warn("Invalid event rate limit config", { + config: rawConfig, + error: result.error.message, + }); + return undefined; + } + return result.data; +} + +/** Convert window string (e.g. "1m", "30s", "2h") to milliseconds */ +export function windowToMs(window: string): number { + const match = window.match(/^(\d+)([smh])$/); + if (!match) throw new Error(`Invalid window format: ${window}`); + + const value = parseInt(match[1]!, 10); + const unit = match[2]!; + + switch (unit) { + case "s": + return value * 1000; + case "m": + return value * 60 * 1000; + case "h": + return value * 60 * 60 * 1000; + default: + throw new Error(`Unknown time unit: ${unit}`); + } +} + +/** + * In-memory sliding window rate limiter. + * Suitable for single-process use and testing. + * For production, use a Redis-backed implementation. + */ +export class InMemoryEventRateLimitChecker implements EventRateLimitChecker { + private windows = new Map(); + + async check(key: string, config: EventRateLimitConfig): Promise { + const now = Date.now(); + const windowMs = windowToMs(config.window); + + // Get or create the timestamp array for this key + let timestamps = this.windows.get(key); + if (!timestamps) { + timestamps = []; + this.windows.set(key, timestamps); + } + + // Remove expired entries + const cutoff = now - windowMs; + while (timestamps.length > 0 && timestamps[0]! < cutoff) { + timestamps.shift(); + } + + if (timestamps.length < config.limit) { + timestamps.push(now); + return { + allowed: true, + limit: config.limit, + remaining: config.limit - timestamps.length, + }; + } + + // Rate limited — calculate retry after + const oldestInWindow = timestamps[0]!; + const retryAfter = oldestInWindow + windowMs - now; + + return { + allowed: false, + limit: config.limit, + remaining: 0, + retryAfter: Math.max(0, retryAfter), + }; + } + + /** Reset all state (useful for testing) */ + reset() { + this.windows.clear(); + } +} diff --git a/apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts b/apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts new file mode 100644 index 00000000000..05795b0a280 --- /dev/null +++ b/apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts @@ -0,0 +1,9 @@ +import { InMemoryEventRateLimitChecker } from "./eventRateLimiter.server"; + +/** + * Global singleton for the event publish rate limiter. + * + * Uses the in-memory sliding window implementation. + * For production at scale, this can be swapped for a Redis-backed checker. + */ +export const eventPublishRateLimitChecker = new InMemoryEventRateLimitChecker(); diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index ace9f194746..ab18fab0331 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -10,6 +10,10 @@ import { TriggerTaskServiceOptions, TriggerTaskServiceResult, } from "../triggerTask.server"; +import { + EventRateLimitChecker, + parseEventRateLimitConfig, +} from "./eventRateLimiter.server"; import { SchemaRegistryService } from "./schemaRegistry.server"; export type PublishEventOptions = { @@ -58,14 +62,29 @@ export type EventLogEntry = { fanOutCount: number; }; +/** Error thrown when a publish rate limit is exceeded */ +export class EventPublishRateLimitError extends Error { + constructor( + public readonly eventSlug: string, + public readonly limit: number, + public readonly remaining: number, + public readonly retryAfterMs: number + ) { + super(`Event "${eventSlug}" publish rate limit exceeded`); + this.name = "EventPublishRateLimitError"; + } +} + export class PublishEventService extends BaseService { private readonly _triggerFn: TriggerFn; private readonly _eventLogWriter?: EventLogWriter; + private readonly _rateLimitChecker?: EventRateLimitChecker; constructor( prisma?: PrismaClientOrTransaction, triggerFn?: TriggerFn, - eventLogWriter?: EventLogWriter + eventLogWriter?: EventLogWriter, + rateLimitChecker?: EventRateLimitChecker ) { super(prisma); this._triggerFn = @@ -75,6 +94,7 @@ export class PublishEventService extends BaseService { return svc.call(taskId, environment, body, options); }); this._eventLogWriter = eventLogWriter; + this._rateLimitChecker = rateLimitChecker; } public async call( @@ -106,6 +126,24 @@ export class PublishEventService extends BaseService { span.setAttribute("eventDefinitionId", eventDefinition.id); + // 1b. Check rate limit (if configured and checker is available) + if (this._rateLimitChecker && eventDefinition.rateLimit) { + const rateLimitConfig = parseEventRateLimitConfig(eventDefinition.rateLimit); + if (rateLimitConfig) { + const rateLimitKey = `${environment.projectId}:${eventSlug}`; + const result = await this._rateLimitChecker.check(rateLimitKey, rateLimitConfig); + if (!result.allowed) { + span.setAttribute("rateLimited", true); + throw new EventPublishRateLimitError( + eventSlug, + result.limit ?? rateLimitConfig.limit, + result.remaining ?? 0, + result.retryAfter ?? 0 + ); + } + } + } + // 2. Validate payload against stored schema (if exists) if (eventDefinition.schema) { const schemaRegistry = new SchemaRegistryService(this._prisma); diff --git a/apps/webapp/test/engine/eventRateLimiter.test.ts b/apps/webapp/test/engine/eventRateLimiter.test.ts new file mode 100644 index 00000000000..c175ee6db34 --- /dev/null +++ b/apps/webapp/test/engine/eventRateLimiter.test.ts @@ -0,0 +1,114 @@ +import { describe, expect, test } from "vitest"; +import { + InMemoryEventRateLimitChecker, + windowToMs, + parseEventRateLimitConfig, +} from "../../app/v3/services/events/eventRateLimiter.server"; + +describe("windowToMs", () => { + test("converts seconds", () => { + expect(windowToMs("10s")).toBe(10_000); + expect(windowToMs("1s")).toBe(1_000); + }); + + test("converts minutes", () => { + expect(windowToMs("1m")).toBe(60_000); + expect(windowToMs("5m")).toBe(300_000); + }); + + test("converts hours", () => { + expect(windowToMs("1h")).toBe(3_600_000); + expect(windowToMs("2h")).toBe(7_200_000); + }); + + test("throws on invalid format", () => { + expect(() => windowToMs("abc")).toThrow("Invalid window format"); + expect(() => windowToMs("10d")).toThrow("Invalid window format"); + expect(() => windowToMs("")).toThrow("Invalid window format"); + }); +}); + +describe("parseEventRateLimitConfig", () => { + test("parses valid config", () => { + const result = parseEventRateLimitConfig({ limit: 100, window: "1m" }); + expect(result).toEqual({ limit: 100, window: "1m" }); + }); + + test("returns undefined for null/undefined", () => { + expect(parseEventRateLimitConfig(null)).toBeUndefined(); + expect(parseEventRateLimitConfig(undefined)).toBeUndefined(); + }); + + test("returns undefined for invalid config", () => { + expect(parseEventRateLimitConfig({ limit: -1, window: "1m" })).toBeUndefined(); + expect(parseEventRateLimitConfig({ limit: 100 })).toBeUndefined(); + expect(parseEventRateLimitConfig("not an object")).toBeUndefined(); + }); +}); + +describe("InMemoryEventRateLimitChecker", () => { + test("allows requests within limit", async () => { + const checker = new InMemoryEventRateLimitChecker(); + const config = { limit: 3, window: "10s" }; + + const r1 = await checker.check("key1", config); + expect(r1.allowed).toBe(true); + expect(r1.remaining).toBe(2); + + const r2 = await checker.check("key1", config); + expect(r2.allowed).toBe(true); + expect(r2.remaining).toBe(1); + + const r3 = await checker.check("key1", config); + expect(r3.allowed).toBe(true); + expect(r3.remaining).toBe(0); + }); + + test("blocks requests exceeding limit", async () => { + const checker = new InMemoryEventRateLimitChecker(); + const config = { limit: 2, window: "10s" }; + + await checker.check("key1", config); + await checker.check("key1", config); + + const r3 = await checker.check("key1", config); + expect(r3.allowed).toBe(false); + expect(r3.remaining).toBe(0); + expect(r3.retryAfter).toBeDefined(); + expect(r3.retryAfter!).toBeGreaterThan(0); + }); + + test("different keys are independent", async () => { + const checker = new InMemoryEventRateLimitChecker(); + const config = { limit: 1, window: "10s" }; + + const r1 = await checker.check("key-a", config); + expect(r1.allowed).toBe(true); + + const r2 = await checker.check("key-b", config); + expect(r2.allowed).toBe(true); + + // key-a is now exhausted + const r3 = await checker.check("key-a", config); + expect(r3.allowed).toBe(false); + + // key-b is also exhausted + const r4 = await checker.check("key-b", config); + expect(r4.allowed).toBe(false); + }); + + test("reset clears all state", async () => { + const checker = new InMemoryEventRateLimitChecker(); + const config = { limit: 1, window: "10s" }; + + await checker.check("key1", config); + + const blocked = await checker.check("key1", config); + expect(blocked.allowed).toBe(false); + + checker.reset(); + + const afterReset = await checker.check("key1", config); + expect(afterReset.allowed).toBe(true); + }); +}); diff --git a/apps/webapp/test/engine/publishEvent.test.ts b/apps/webapp/test/engine/publishEvent.test.ts index cb2149a9d0c..5c281e816ce 100644 --- a/apps/webapp/test/engine/publishEvent.test.ts +++ b/apps/webapp/test/engine/publishEvent.test.ts @@ -39,8 +39,10 @@ import { import { RunEngineTriggerTaskService } from "../../app/runEngine/services/triggerTask.server"; import { PublishEventService, + EventPublishRateLimitError, type TriggerFn, } from "../../app/v3/services/events/publishEvent.server"; +import { InMemoryEventRateLimitChecker } from "../../app/v3/services/events/eventRateLimiter.server"; import { ServiceValidationError } from "../../app/v3/services/common.server"; vi.setConfig({ testTimeout: 120_000 }); @@ -1409,4 +1411,112 @@ describe("PublishEventService", () => { } } ); + + containerTest( + "rate limiter blocks publishes that exceed the configured limit", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, "limited-handler"); + + // Create event definition with rate limit: 2 publishes per 10s + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "rate.limited.event", + version: "1.0", + projectId: env.projectId, + rateLimit: { limit: 2, window: "10s" }, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "limited-handler", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + const rateLimitChecker = new InMemoryEventRateLimitChecker(); + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn, undefined, rateLimitChecker); + + // First two publishes should succeed + const result1 = await service.call("rate.limited.event", env, { n: 1 }); + expect(result1.runs).toHaveLength(1); + + const result2 = await service.call("rate.limited.event", env, { n: 2 }); + expect(result2.runs).toHaveLength(1); + + // Third publish should be rate limited + await expect( + service.call("rate.limited.event", env, { n: 3 }) + ).rejects.toThrow(EventPublishRateLimitError); + + // Verify error properties + try { + await service.call("rate.limited.event", env, { n: 4 }); + expect.unreachable("Should have thrown"); + } catch (error) { + expect(error).toBeInstanceOf(EventPublishRateLimitError); + const rle = error as EventPublishRateLimitError; + expect(rle.eventSlug).toBe("rate.limited.event"); + expect(rle.limit).toBe(2); + expect(rle.remaining).toBe(0); + expect(rle.retryAfterMs).toBeGreaterThan(0); + } + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "rate limiter does not block when no rateLimit is configured", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, "unlimited-handler"); + + // Create event definition WITHOUT rate limit + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "unlimited.event", + version: "1.0", + projectId: env.projectId, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "unlimited-handler", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + const rateLimitChecker = new InMemoryEventRateLimitChecker(); + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn, undefined, rateLimitChecker); + + // Should be able to publish many times without limit + for (let i = 0; i < 5; i++) { + const result = await service.call("unlimited.event", env, { n: i }); + expect(result.runs).toHaveLength(1); + } + } finally { + await engine.quit(); + } + } + ); }); diff --git a/internal-packages/database/prisma/migrations/20260228074544_add_rate_limit_to_event_definition/migration.sql b/internal-packages/database/prisma/migrations/20260228074544_add_rate_limit_to_event_definition/migration.sql new file mode 100644 index 00000000000..b63e81d553e --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260228074544_add_rate_limit_to_event_definition/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "public"."EventDefinition" ADD COLUMN "rateLimit" JSONB; diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 329addd2a68..7a081ee01a9 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -601,6 +601,9 @@ model EventDefinition { deprecatedAt DateTime? deprecatedMessage String? + // Rate limiting (Phase 7) — JSON config e.g. { "limit": 100, "window": "1m" } + rateLimit Json? + project Project @relation(fields: [projectId], references: [id], onDelete: Cascade, onUpdate: Cascade) projectId String diff --git a/packages/core/src/v3/resource-catalog/catalog.ts b/packages/core/src/v3/resource-catalog/catalog.ts index 081c45d73ce..72998235bfa 100644 --- a/packages/core/src/v3/resource-catalog/catalog.ts +++ b/packages/core/src/v3/resource-catalog/catalog.ts @@ -7,6 +7,8 @@ export interface EventMetadata { description?: string; /** Raw schema (Zod, etc.) stored for later conversion to JSON Schema */ rawSchema?: unknown; + /** Rate limit configuration */ + rateLimit?: { limit: number; window: string }; } export interface ResourceCatalog { diff --git a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts index c3a10ed23a7..3ca4a567fe5 100644 --- a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts +++ b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts @@ -190,6 +190,7 @@ export class StandardResourceCatalog implements ResourceCatalog { id: event.id, version: event.version, description: event.description, + rateLimit: event.rateLimit, })); } diff --git a/packages/core/src/v3/schemas/schemas.ts b/packages/core/src/v3/schemas/schemas.ts index cf02ead9974..5d5dae34ead 100644 --- a/packages/core/src/v3/schemas/schemas.ts +++ b/packages/core/src/v3/schemas/schemas.ts @@ -174,6 +174,15 @@ export const QueueManifest = z.object({ export type QueueManifest = z.infer; +export const EventRateLimitManifest = z.object({ + /** Maximum number of publishes allowed in the window */ + limit: z.number().int().positive(), + /** Time window — e.g. "1m", "10s", "1h" */ + window: z.string(), +}); + +export type EventRateLimitManifest = z.infer; + export const EventManifest = z.object({ /** Unique event identifier (e.g. "order.created") */ id: z.string(), @@ -183,6 +192,8 @@ export const EventManifest = z.object({ description: z.string().optional(), /** JSON Schema of the event payload (Draft 7) */ schema: z.unknown().optional(), + /** Rate limit configuration */ + rateLimit: EventRateLimitManifest.optional(), }); export type EventManifest = z.infer; diff --git a/packages/trigger-sdk/src/v3/events.ts b/packages/trigger-sdk/src/v3/events.ts index f6ee1285c66..2cdf5f43a9f 100644 --- a/packages/trigger-sdk/src/v3/events.ts +++ b/packages/trigger-sdk/src/v3/events.ts @@ -18,6 +18,14 @@ type Schema = TaskSchema; // ---- Types ---- +/** Rate limit configuration for an event */ +export interface EventRateLimit { + /** Maximum number of publishes allowed in the window */ + limit: number; + /** Time window — e.g. "1m", "10s", "1h" */ + window: string; +} + /** Options for defining an event */ export interface EventOptions { /** Unique event identifier (e.g. "order.created") */ @@ -28,6 +36,8 @@ export interface EventOptions( export function createEvent( options: EventOptions ): EventDefinition { - const { id, schema, description, version = "1.0" } = options; + const { id, schema, description, version = "1.0", rateLimit } = options; // Build the parse function if a schema is provided let parseFn: SchemaParseFn | undefined; @@ -245,6 +255,7 @@ export function createEvent Date: Fri, 27 Feb 2026 23:54:38 -0800 Subject: [PATCH 35/65] chore: add phase 7 changeset for event rate limiting Co-Authored-By: Claude Opus 4.6 --- .changeset/event-rate-limiting.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 .changeset/event-rate-limiting.md diff --git a/.changeset/event-rate-limiting.md b/.changeset/event-rate-limiting.md new file mode 100644 index 00000000000..ede7a7b75ff --- /dev/null +++ b/.changeset/event-rate-limiting.md @@ -0,0 +1,11 @@ +--- +"@trigger.dev/core": patch +"@trigger.dev/sdk": patch +"@trigger.dev/database": patch +"apps-webapp": patch +--- + +Add per-event rate limiting to the pub/sub system. Events can now be configured +with a `rateLimit: { limit, window }` option that limits how many times they can +be published within a sliding time window. When exceeded, the API returns 429 +with `x-ratelimit-limit`, `x-ratelimit-remaining`, and `retry-after` headers. From 42a3844e07f83efd60ae06050782875a26505fd7 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sat, 28 Feb 2026 00:04:01 -0800 Subject: [PATCH 36/65] =?UTF-8?q?feat(events):=20phase=208=20=E2=80=94=20o?= =?UTF-8?q?bservability=20+=20developer=20experience?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add event_counts_v1 query builder to ClickHouse client - Add GET /api/v1/events/{eventId}/stats endpoint - Queries ClickHouse materialized view for aggregated event counts - Supports periods: 1h, 6h, 24h, 7d, 30d - Returns time-bucketed event counts + fan-out totals - Add GetEventStatsResponseBody schema to core - Add getEventStats() to API client - Add validate() method to EventDefinition in SDK - Pre-flight payload validation against event schema - Returns validated payload or throws on failure All 24 integration tests pass, 470 core tests pass, 11 rate limiter unit tests pass. Co-Authored-By: Claude Opus 4.6 --- .../routes/api.v1.events.$eventId.stats.ts | 87 +++++++++++++++++++ .../clickhouse/src/eventCounts.ts | 28 ++++++ internal-packages/clickhouse/src/index.ts | 8 ++ packages/core/src/v3/apiClient/index.ts | 22 +++++ packages/core/src/v3/schemas/api.ts | 22 +++++ packages/trigger-sdk/src/v3/events.ts | 12 +++ 6 files changed, 179 insertions(+) create mode 100644 apps/webapp/app/routes/api.v1.events.$eventId.stats.ts create mode 100644 internal-packages/clickhouse/src/eventCounts.ts diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.stats.ts b/apps/webapp/app/routes/api.v1.events.$eventId.stats.ts new file mode 100644 index 00000000000..fe914fa0c96 --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.$eventId.stats.ts @@ -0,0 +1,87 @@ +import { json } from "@remix-run/server-runtime"; +import { z } from "zod"; +import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { clickhouseClient } from "~/services/clickhouseInstance.server"; + +const ParamsSchema = z.object({ + eventId: z.string(), +}); + +export const loader = createLoaderApiRoute( + { + params: ParamsSchema, + corsStrategy: "all", + authorization: { + action: "read", + resource: (params) => ({ tasks: params.eventId }), + superScopes: ["read:runs", "read:all", "admin"], + }, + findResource: async () => 1 as const, + }, + async ({ params, authentication, request }) => { + const url = new URL(request.url); + const period = url.searchParams.get("period") ?? "24h"; + + // Parse period to a ClickHouse interval + const intervalMap: Record = { + "1h": "1 HOUR", + "6h": "6 HOUR", + "24h": "24 HOUR", + "7d": "7 DAY", + "30d": "30 DAY", + }; + + const interval = intervalMap[period]; + if (!interval) { + return json( + { error: `Invalid period "${period}". Use: 1h, 6h, 24h, 7d, 30d` }, + { status: 400 } + ); + } + + const queryBuilder = clickhouseClient.eventCounts.queryBuilder(); + + queryBuilder + .where("project_id = {projectId: String}", { + projectId: authentication.environment.projectId, + }) + .where("environment_id = {environmentId: String}", { + environmentId: authentication.environment.id, + }) + .where("event_type = {eventType: String}", { + eventType: params.eventId, + }) + .where(`bucket_start >= now() - INTERVAL ${interval}`) + .orderBy("bucket_start ASC"); + + const [queryError, result] = await queryBuilder.execute(); + + if (queryError) { + return json({ error: "Failed to query event stats" }, { status: 500 }); + } + + let totalEventCount = 0; + let totalFanOut = 0; + + const buckets = result.map((row) => { + totalEventCount += row.event_count; + totalFanOut += row.total_fan_out; + + return { + timestamp: row.bucket_start, + eventCount: row.event_count, + fanOutCount: row.total_fan_out, + }; + }); + + return json({ + eventType: params.eventId, + period, + buckets, + totals: { + eventCount: totalEventCount, + fanOutCount: totalFanOut, + }, + }); + } +); diff --git a/internal-packages/clickhouse/src/eventCounts.ts b/internal-packages/clickhouse/src/eventCounts.ts new file mode 100644 index 00000000000..3af7f0df72b --- /dev/null +++ b/internal-packages/clickhouse/src/eventCounts.ts @@ -0,0 +1,28 @@ +import { z } from "zod"; +import { ClickhouseReader } from "./client/types.js"; + +export const EventCountsV1Output = z.object({ + project_id: z.string(), + environment_id: z.string(), + event_type: z.string(), + bucket_start: z.string(), + event_count: z.number().int(), + total_fan_out: z.number().int(), +}); + +export type EventCountsV1Output = z.output; + +export function getEventCountsQueryBuilder(ch: ClickhouseReader) { + return ch.queryBuilder({ + name: "getEventCounts", + baseQuery: `SELECT + project_id, + environment_id, + event_type, + bucket_start, + event_count, + total_fan_out + FROM trigger_dev.event_counts_v1`, + schema: EventCountsV1Output, + }); +} diff --git a/internal-packages/clickhouse/src/index.ts b/internal-packages/clickhouse/src/index.ts index b5b183f2eca..9ca361b7aee 100644 --- a/internal-packages/clickhouse/src/index.ts +++ b/internal-packages/clickhouse/src/index.ts @@ -28,6 +28,7 @@ import { } from "./taskEvents.js"; import { insertMetrics } from "./metrics.js"; import { insertEventLog, getEventLogQueryBuilder } from "./eventLog.js"; +import { getEventCountsQueryBuilder } from "./eventCounts.js"; import { Logger, type LogLevel } from "@trigger.dev/core/logger"; import type { Agent as HttpAgent } from "http"; import type { Agent as HttpsAgent } from "https"; @@ -36,6 +37,7 @@ export type * from "./taskRuns.js"; export type * from "./taskEvents.js"; export type * from "./metrics.js"; export type * from "./eventLog.js"; +export type * from "./eventCounts.js"; export type * from "./client/queryBuilder.js"; // Re-export column constants, indices, and type-safe accessors @@ -238,4 +240,10 @@ export class ClickHouse { queryBuilder: getEventLogQueryBuilder(this.reader), }; } + + get eventCounts() { + return { + queryBuilder: getEventCountsQueryBuilder(this.reader), + }; + } } diff --git a/packages/core/src/v3/apiClient/index.ts b/packages/core/src/v3/apiClient/index.ts index 47f4767f6a0..ad37c2046df 100644 --- a/packages/core/src/v3/apiClient/index.ts +++ b/packages/core/src/v3/apiClient/index.ts @@ -15,6 +15,7 @@ import { GetEventHistoryResponseBody, GetEventResponseBody, GetEventSchemaResponseBody, + GetEventStatsResponseBody, ListDeadLetterEventsResponseBody, ListEventsResponseBody, ReplayEventsRequestBody, @@ -1554,6 +1555,27 @@ export class ApiClient { ); } + getEventStats( + eventId: string, + params?: { period?: string }, + requestOptions?: ZodFetchOptions + ) { + const encodedEventId = encodeURIComponent(eventId); + const searchParams = new URLSearchParams(); + if (params?.period) searchParams.set("period", params.period); + const qs = searchParams.toString(); + + return zodfetch( + GetEventStatsResponseBody, + `${this.baseUrl}/api/v1/events/${encodedEventId}/stats${qs ? `?${qs}` : ""}`, + { + method: "GET", + headers: this.#getHeaders(false), + }, + mergeRequestOptions(this.defaultRequestOptions, requestOptions) + ); + } + getEventHistory( eventId: string, params?: { diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 6a4ea903512..f0bef68448d 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -1835,3 +1835,25 @@ export const RetryAllDeadLetterEventsResponseBody = z.object({ export type RetryAllDeadLetterEventsResponseBody = z.infer< typeof RetryAllDeadLetterEventsResponseBody >; + +// ---- Event Stats schemas ---- + +export const EventStatsTimeBucket = z.object({ + timestamp: z.string(), + eventCount: z.number().int(), + fanOutCount: z.number().int(), +}); + +export type EventStatsTimeBucket = z.infer; + +export const GetEventStatsResponseBody = z.object({ + eventType: z.string(), + period: z.string(), + buckets: z.array(EventStatsTimeBucket), + totals: z.object({ + eventCount: z.number().int(), + fanOutCount: z.number().int(), + }), +}); + +export type GetEventStatsResponseBody = z.infer; diff --git a/packages/trigger-sdk/src/v3/events.ts b/packages/trigger-sdk/src/v3/events.ts index 2cdf5f43a9f..4578386c0e2 100644 --- a/packages/trigger-sdk/src/v3/events.ts +++ b/packages/trigger-sdk/src/v3/events.ts @@ -97,6 +97,13 @@ export interface EventDefinition { * Can only be called from inside a task.run(). */ publishAndWait(payload: TPayload, options?: PublishEventOptions): Promise; + + /** + * Validate a payload against the event's schema (if one was provided). + * Returns the validated payload or throws if validation fails. + * Useful for pre-flight checks before publishing. + */ + validate(payload: TPayload): Promise; } /** Any event definition (for generic constraints) */ @@ -247,6 +254,11 @@ export function createEvent Date: Sat, 28 Feb 2026 00:04:12 -0800 Subject: [PATCH 37/65] chore: add phase 8 changeset for observability + DX Co-Authored-By: Claude Opus 4.6 --- .changeset/event-observability-dx.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .changeset/event-observability-dx.md diff --git a/.changeset/event-observability-dx.md b/.changeset/event-observability-dx.md new file mode 100644 index 00000000000..b2689f5ac5c --- /dev/null +++ b/.changeset/event-observability-dx.md @@ -0,0 +1,10 @@ +--- +"@trigger.dev/core": patch +"@trigger.dev/sdk": patch +"@internal/clickhouse": patch +"apps-webapp": patch +--- + +Add observability and developer experience improvements to the event system. +New stats API endpoint aggregates event publish counts and fan-out metrics from +ClickHouse. SDK gains a `validate()` method for pre-flight payload validation. From 81c09cda5fce4eff4bd0845df039f598a9eb84d8 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sat, 28 Feb 2026 17:07:33 -0800 Subject: [PATCH 38/65] =?UTF-8?q?feat(events):=20phase=209.1+9.2=20?= =?UTF-8?q?=E2=80=94=20Redis=20rate=20limiter=20+=20hash-based=20consumer?= =?UTF-8?q?=20groups?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 9.1: Add RedisEventRateLimitChecker using @upstash/ratelimit sliding window. Uses existing RATE_LIMIT_REDIS_HOST infrastructure. Falls back to InMemory when Redis is not configured. Caches Ratelimit instances per config. 9.2: Replace time-based consumer group selection (Date.now() / 1000 % N) with FNV-1a hash of eventId + groupName. This gives deterministic, evenly-distributed routing — same eventId always picks the same member, different eventIds distribute evenly across group members. Co-Authored-By: Claude Opus 4.6 --- .../events/eventRateLimiter.server.ts | 63 +++++++++++++++++++ .../events/eventRateLimiterGlobal.server.ts | 39 ++++++++++-- .../v3/services/events/publishEvent.server.ts | 41 ++++++++---- 3 files changed, 126 insertions(+), 17 deletions(-) diff --git a/apps/webapp/app/v3/services/events/eventRateLimiter.server.ts b/apps/webapp/app/v3/services/events/eventRateLimiter.server.ts index 731a7abd168..51e5fd55934 100644 --- a/apps/webapp/app/v3/services/events/eventRateLimiter.server.ts +++ b/apps/webapp/app/v3/services/events/eventRateLimiter.server.ts @@ -1,4 +1,6 @@ import { z } from "zod"; +import { Ratelimit } from "@upstash/ratelimit"; +import type { Duration, RateLimiterRedisClient } from "~/services/rateLimiter.server"; import { logger } from "~/services/logger.server"; /** @@ -118,3 +120,64 @@ export class InMemoryEventRateLimitChecker implements EventRateLimitChecker { this.windows.clear(); } } + +/** + * Convert an event rate limit window string (e.g. "30s", "1m", "2h") to an + * Upstash Duration string (e.g. "30 s", "1 m", "2 h"). + */ +function windowToUpstashDuration(window: string): Duration { + const match = window.match(/^(\d+)([smh])$/); + if (!match) throw new Error(`Invalid window format: ${window}`); + + const value = match[1]!; + const unit = match[2]!; + + const unitMap: Record = { s: "s", m: "m", h: "h" }; + return `${value} ${unitMap[unit]}` as Duration; +} + +/** + * Redis-backed sliding window rate limiter using @upstash/ratelimit. + * Survives process restarts and works across multiple instances. + */ +export class RedisEventRateLimitChecker implements EventRateLimitChecker { + private limiters = new Map(); + + constructor(private readonly redisClient: RateLimiterRedisClient) {} + + async check(key: string, config: EventRateLimitConfig): Promise { + // Get or create a limiter for this specific config (keyed by limit+window) + const configKey = `${config.limit}:${config.window}`; + let limiter = this.limiters.get(configKey); + + if (!limiter) { + limiter = new Ratelimit({ + redis: this.redisClient, + limiter: Ratelimit.slidingWindow(config.limit, windowToUpstashDuration(config.window)), + ephemeralCache: new Map(), + analytics: false, + prefix: "ratelimit:event-publish", + }); + this.limiters.set(configKey, limiter); + } + + const result = await limiter.limit(key); + + if (result.success) { + return { + allowed: true, + limit: result.limit, + remaining: result.remaining, + }; + } + + const retryAfter = result.reset - Date.now(); + + return { + allowed: false, + limit: result.limit, + remaining: 0, + retryAfter: Math.max(0, retryAfter), + }; + } +} diff --git a/apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts b/apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts index 05795b0a280..91f84dd9a9f 100644 --- a/apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts +++ b/apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts @@ -1,9 +1,40 @@ -import { InMemoryEventRateLimitChecker } from "./eventRateLimiter.server"; +import { env } from "~/env.server"; +import { createRedisRateLimitClient } from "~/services/rateLimiter.server"; +import { logger } from "~/services/logger.server"; +import { singleton } from "~/utils/singleton"; +import { + InMemoryEventRateLimitChecker, + RedisEventRateLimitChecker, +} from "./eventRateLimiter.server"; +import type { EventRateLimitChecker } from "./eventRateLimiter.server"; /** * Global singleton for the event publish rate limiter. * - * Uses the in-memory sliding window implementation. - * For production at scale, this can be swapped for a Redis-backed checker. + * Uses Redis when RATE_LIMIT_REDIS_HOST is configured (production), + * falls back to in-memory sliding window otherwise (dev/testing). */ -export const eventPublishRateLimitChecker = new InMemoryEventRateLimitChecker(); +export const eventPublishRateLimitChecker = singleton( + "eventPublishRateLimitChecker", + initializeRateLimitChecker +); + +function initializeRateLimitChecker(): EventRateLimitChecker { + if (env.RATE_LIMIT_REDIS_HOST) { + logger.info("Event rate limiter: using Redis-backed implementation"); + + const redisClient = createRedisRateLimitClient({ + port: env.RATE_LIMIT_REDIS_PORT, + host: env.RATE_LIMIT_REDIS_HOST, + username: env.RATE_LIMIT_REDIS_USERNAME, + password: env.RATE_LIMIT_REDIS_PASSWORD, + tlsDisabled: env.RATE_LIMIT_REDIS_TLS_DISABLED === "true", + clusterMode: env.RATE_LIMIT_REDIS_CLUSTER_MODE_ENABLED === "1", + }); + + return new RedisEventRateLimitChecker(redisClient); + } + + logger.info("Event rate limiter: using in-memory implementation (no RATE_LIMIT_REDIS_HOST)"); + return new InMemoryEventRateLimitChecker(); +} diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index ab18fab0331..9b4ba9bead7 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -1,4 +1,14 @@ import { TriggerTaskRequestBody, eventFilterMatches, matchesPattern } from "@trigger.dev/core/v3"; + +/** FNV-1a hash — fast, well-distributed hash for short strings */ +function fnv1aHash(str: string): number { + let hash = 0x811c9dc5; // FNV offset basis (32-bit) + for (let i = 0; i < str.length; i++) { + hash ^= str.charCodeAt(i); + hash = (hash * 0x01000193) >>> 0; // FNV prime, keep as uint32 + } + return hash; +} import type { EventFilter } from "@trigger.dev/core/v3"; import { PrismaClientOrTransaction } from "~/db.server"; import { AuthenticatedEnvironment } from "~/services/apiAuth.server"; @@ -245,8 +255,11 @@ export class PublishEventService extends BaseService { }; } - // 5. Apply consumer group selection — within a group, only one task receives each event - const subscriptionsToTrigger = this.applyConsumerGroups(matchingSubscriptions); + // 5. Generate event ID early so it can be used for deterministic consumer group selection + const eventId = generateFriendlyId("evt"); + + // 6. Apply consumer group selection — within a group, only one task receives each event + const subscriptionsToTrigger = this.applyConsumerGroups(matchingSubscriptions, eventId); if (subscriptionsToTrigger.length < matchingSubscriptions.length) { span.setAttribute( @@ -255,8 +268,7 @@ export class PublishEventService extends BaseService { ); } - // 6. Fan out: trigger each matching subscribed task - const eventId = generateFriendlyId("evt"); + // 7. Fan out: trigger each matching subscribed task const runs: PublishEventResult["runs"] = []; for (const subscription of subscriptionsToTrigger) { @@ -332,7 +344,7 @@ export class PublishEventService extends BaseService { } } - // 7. Persist to event log (async, non-blocking) + // 8. Persist to event log (async, non-blocking) if (this._eventLogWriter) { try { this._eventLogWriter({ @@ -368,10 +380,14 @@ export class PublishEventService extends BaseService { /** * Apply consumer group logic: within a consumer group, only one subscription receives each event. * Subscriptions without a consumer group are always included (normal fan-out). - * Selection is round-robin based on subscription count modulo (deterministic per group per call). + * + * Selection uses FNV-1a hash of the eventId for deterministic, evenly distributed routing. + * The same eventId always picks the same member, ensuring consistency for retries/replays. + * Different eventIds distribute evenly across group members. */ private applyConsumerGroups( - subscriptions: Array<{ id: string; consumerGroup: string | null; taskSlug: string }> + subscriptions: Array<{ id: string; consumerGroup: string | null; taskSlug: string }>, + eventId?: string ): typeof subscriptions { const ungrouped: typeof subscriptions = []; const groups = new Map(); @@ -389,14 +405,13 @@ export class PublishEventService extends BaseService { } } - // For each consumer group, pick one member using a simple hash-based selection const selected: typeof subscriptions = [...ungrouped]; - for (const [, members] of groups) { - // Sort by taskSlug for deterministic ordering, then pick using a rotating index - // The selection rotates based on the current timestamp (second-level granularity) - // so load is distributed over time + for (const [groupName, members] of groups) { + // Sort by taskSlug for deterministic ordering const sorted = members.sort((a, b) => a.taskSlug.localeCompare(b.taskSlug)); - const index = Math.floor(Date.now() / 1000) % sorted.length; + // Hash the eventId + group name for deterministic, distributed selection + const hashInput = eventId ? `${eventId}:${groupName}` : `${Date.now()}:${groupName}`; + const index = fnv1aHash(hashInput) % sorted.length; selected.push(sorted[index]!); } From c7e5877446870cf49e8ed867123cfedf38058b36 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sat, 28 Feb 2026 17:07:38 -0800 Subject: [PATCH 39/65] =?UTF-8?q?chore:=20update=20pubsub=20memory=20files?= =?UTF-8?q?=20=E2=80=94=20phases=20complete=20+=20pending=20items?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .../memory/MEMORY.md | 22 +- .../memory/pubsub-pending.md | 70 +++ .../memory/pubsub-progress.md | 256 +++------ .../memory/pubsub-roadmap.md | 504 +++--------------- 4 files changed, 246 insertions(+), 606 deletions(-) create mode 100644 .claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/MEMORY.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/MEMORY.md index d15fa701f49..0d2d1d991b3 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/MEMORY.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/MEMORY.md @@ -1,14 +1,11 @@ # Memory Index -## Active Projects - -- **Pub/Sub Event System**: Full roadmap for implementing publish/subscribe in Trigger.dev - - Roadmap: [pubsub-roadmap.md](pubsub-roadmap.md) (in English) - - Repo conventions: [repo-conventions.md](repo-conventions.md) - - Progress: [pubsub-progress.md](pubsub-progress.md) - - Status: Phase 0 + Phase 1 + Phase 2 + Phase 3 complete - - Current phase: Phase 3 done → next is Phase 4 (Dead Letter Queue) - - Branch: `feat/pubsub-event-system` +## Pub/Sub Event System +- [Roadmap & Status](pubsub-roadmap.md) — phases 0-8 complete, pending items identified +- [Detailed Progress](pubsub-progress.md) — per-phase notes, commits, decisions +- [Pending Items](pubsub-pending.md) — Redis rate limiter, consumer groups, dashboard, etc. +- Repo conventions: [repo-conventions.md](repo-conventions.md) +- Branch: `feat/pubsub-event-system` ## Repo Quick Reference @@ -20,6 +17,13 @@ - Import `@trigger.dev/core` subpaths only, never root - Migrations: clean extraneous lines, indexes need CONCURRENTLY in separate files - Changesets required for `packages/*` changes (default: patch) +- Tags in integration tests: avoid `tags` option in trigger calls — `createTag` uses global prisma mock `{}` + +## Rate Limiting Patterns in Codebase +- `apps/webapp/app/services/rateLimiter.server.ts` — Upstash `@upstash/ratelimit` wrapper (sliding window, token bucket, fixed window) +- `apps/webapp/app/v3/GCRARateLimiter.server.ts` — Custom GCRA with Redis Lua scripts +- Both use dedicated Redis connection (`RATE_LIMIT_REDIS_HOST` env vars) +- Good reference implementations: `mfaRateLimiter.server.ts`, `magicLinkRateLimiter.server.ts` ## User Preferences diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md new file mode 100644 index 00000000000..3ef04c96898 --- /dev/null +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md @@ -0,0 +1,70 @@ +# Pub/Sub Event System — Pending Items + +## Phase 9: Production Hardening + +Items identified during post-implementation audit. Ordered by priority. + +### 9.1 — Redis-backed Rate Limiter (swap InMemory for Redis) + +**Status**: NOT STARTED +**Complexity**: LOW — interface already exists, just need new implementation +**Why**: InMemory limiter doesn't survive restarts and doesn't work multi-instance + +The codebase already has proven rate limiting patterns: +- `apps/webapp/app/services/rateLimiter.server.ts` — Upstash `@upstash/ratelimit` wrapper (sliding window, token bucket, fixed window) +- `apps/webapp/app/v3/GCRARateLimiter.server.ts` — Custom GCRA with Redis Lua scripts (used for alerts) +- Both use dedicated Redis connection (`RATE_LIMIT_REDIS_HOST` env vars) + +**Implementation plan**: +1. Create `RedisEventRateLimitChecker` implementing `EventRateLimitChecker` interface +2. Use existing `RateLimiter` wrapper from `rateLimiter.server.ts` with `Ratelimit.slidingWindow()` +3. Key format already defined: `{projectId}:{eventSlug}` +4. Swap singleton in `eventRateLimiterGlobal.server.ts` +5. Keep `InMemoryEventRateLimitChecker` for tests + +**Key files**: +- Interface: `apps/webapp/app/v3/services/events/eventRateLimiter.server.ts` +- Singleton: `apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts` +- Reference: `apps/webapp/app/services/rateLimiter.server.ts` +- Reference: `apps/webapp/app/services/mfa/mfaRateLimiter.server.ts` (good example of production usage) + +### 9.2 — Consumer Group Improvement + +**Status**: NOT STARTED +**Complexity**: MEDIUM — needs design decision +**Why**: Current round-robin by timestamp is too simplistic for production + +Options: +1. **Redis-based round-robin counter** — atomic increment, true rotation across events +2. **Hash-based selection** — hash(eventId) % members for consistent routing per event +3. **Weighted selection** — respect task queue concurrency limits +4. Keep current for MVP, document as "basic" consumer groups + +**Current implementation**: `PublishEventService.applyConsumerGroups()` in `publishEvent.server.ts` + +### 9.3 — Verify Integration Tests + +**Status**: DONE (2026-02-28) +- 24/24 integration tests PASS with Docker running +- 11/11 rate limiter unit tests PASS +- 470/470 core unit tests PASS + +### 9.4 — Dashboard UI, CLI Commands, Reference Project, Documentation + +**Status**: NOT STARTED (deferred from Phase 8) +**Complexity**: HIGH — significant amount of work +**Items from original roadmap**: +- Event list/detail views in webapp dashboard +- `trigger events list|publish|history|replay|dlq` CLI commands +- `references/event-system/` demo project +- SDK docs in `rules/` directory +- Update `.claude/skills/trigger-dev-tasks/SKILL.md` + +### 9.5 — Consumer-side Rate Limiting + Backpressure Monitor + +**Status**: NOT STARTED (deferred from Phase 7) +**Complexity**: MEDIUM +**Items from original roadmap**: +- Per-consumer rate limit on task subscription +- `backpressureMonitor.server.ts` — lag detection, metrics +- `GET /api/v1/events/:eventId/metrics` endpoint diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md index 494c056acd5..b3b9cc3c44a 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-progress.md @@ -6,213 +6,115 @@ All sub-steps 0.1–0.9 implemented and committed. See git log for details. ## Phase 1: Schema Registry + Validation — COMPLETE ### What was done -1. **1.1 — Schema versioning DB + SchemaRegistryService** - - Added `compatibleVersions`, `deprecatedAt`, `deprecatedMessage` to EventDefinition model - - Added `schema` field to EventManifest (Zod schema in core) - - Created migration `20260228054059_add_event_schema_versioning` - - Installed `ajv@8` in webapp for JSON Schema validation - - Created `SchemaRegistryService` with: registerSchema, getSchema, listSchemas, validatePayload, checkCompatibility - -2. **1.2 — Schema discovery API endpoints** - - Created `GET /api/v1/events` — lists all event definitions with subscriber counts - - Created `GET /api/v1/events/:eventId` — event detail with schema, subscribers, versioning info - - Created `GET /api/v1/events/:eventId/schema` — JSON schema only - - Added response schemas (`ListEventsResponseBody`, `GetEventResponseBody`, `GetEventSchemaResponseBody`) to core - - Added API client methods (`listEvents`, `getEvent`, `getEventSchema`) - -3. **1.3 — Store JSON schema during deploy + validate at publish** - - Extended `EventMetadata` with `rawSchema` field - - SDK `event()` now stores raw schema in resource catalog - - CLI indexers (dev + managed) convert event schemas to JSON Schema via `schemaToJsonSchema` - - `syncWorkerEvents` stores JSON schema in `EventDefinition.schema` field - - `PublishEventService` validates payloads against stored schemas using ajv - - Added `getEventSchema()` to ResourceCatalog interface + implementations - -4. **1.4 — Tests + verification** - - 12 unit tests for SchemaRegistryService (validation, compatibility) - - 3 new integration tests for publish with schema validation - - All 9 integration tests pass (6 existing + 3 new) - - Full build passes: core, sdk, cli, webapp - - Changeset added - -### Key decisions -- Used `ajv@8` (industry standard) for JSON Schema validation at publish time -- Schema conversion happens at CLI indexing time (same pattern as task payloadSchema) -- Malformed schemas don't block publishes (graceful degradation) -- Compatibility checking is heuristic (checks required fields, type changes) — not exhaustive -- Schema validation errors return 422 with descriptive messages +1. **1.1** — Schema versioning DB + `SchemaRegistryService` (ajv@8, registerSchema, getSchema, listSchemas, validatePayload, checkCompatibility) +2. **1.2** — Discovery API: `GET /api/v1/events`, `GET /api/v1/events/:id`, `GET /api/v1/events/:id/schema` + API client methods +3. **1.3** — SDK stores raw schema → CLI converts to JSON Schema → DB stores → PublishEventService validates +4. **1.4** — 12 unit + 3 integration tests ### Commits -- `e6249e407` — phase 1.1: schema versioning DB + SchemaRegistryService -- `49b2903d5` — phase 1.2: schema discovery API endpoints -- `2a06ef605` — phase 1.3: store JSON schema during deploy + validate at publish -- `cfa67d079` — phase 1.4: tests + changeset +`e6249e407` → `49b2903d5` → `2a06ef605` → `cfa67d079` ## Phase 2: Smart Routing — COMPLETE ### What was done -1. **2.1 — Filter evaluator + pattern matcher** - - Reused existing `eventFilterMatches` (30+ tests already exist) — wrapped with caching layer - - Created `packages/core/src/v3/events/filterEvaluator.ts`: `compileFilter`, `evaluateFilter`, cache management - - Created `packages/core/src/v3/events/patternMatcher.ts`: `compilePattern`, `matchesPattern` for `*` and `#` wildcards - - 28 unit tests for filter evaluator + 30 unit tests for pattern matcher - -2. **2.2 — Filters in the SDK** - - Added `onEventFilter` to `TaskMetadata` schema - - Added `filter?: EventFilter` to `TaskOptionsWithEvent` type - - SDK `shared.ts` extracts filter and passes to metadata - - `syncWorkerEvents` stores filter in `EventSubscription.filter` during deploy - -3. **2.3 — Filter evaluation during fan-out** - - `PublishEventService` evaluates `subscription.filter` against payload before triggering - - Non-matching subscribers are skipped (no run created) - - Malformed filters err on side of delivery (graceful degradation) - - Span attributes: `filteredOutCount`, `matchingSubscriberCount` - - 3 integration tests: filter skips, filter allows, complex multi-field filter - -4. **2.4 — Wildcard pattern subscriptions** - - Created `events.match(pattern)` SDK helper returning `EventPatternMatcher` - - Added `onEventPattern` to `TaskMetadata` schema - - `syncWorkerEvents` stores pattern in `EventSubscription.pattern` - - `PublishEventService` queries pattern subscriptions and evaluates them against event slug - - Deduplication: subscriptions that appear in both exact and pattern results are kept only once - - 4 integration tests: `*` matches, `*` rejects, `#` multi-level, pattern+filter combo - -### Key decisions -- Reused existing `eventFilterMatches` rather than rewriting — it already has 34 tests -- Filter caching via `compileFilter(filter, cacheKey)` — keyed by subscription ID -- Pattern matching uses recursive segment-matching (not regex) for correctness with `#` -- `#` matches zero or more segments (AMQP-style) — `order.#` matches `order`, `order.created`, `order.status.changed` -- Pattern subscriptions still need an EventDefinition (placeholder with `pattern:` prefix) due to the foreign key constraint -- Malformed filters/patterns don't block publishes — errors are logged but delivery continues +1. **2.1** — Filter evaluator (reused `eventFilterMatches`) + pattern matcher (`*` and `#` wildcards) — 58 unit tests +2. **2.2** — Filters in SDK, stored in `EventSubscription.filter` during deploy +3. **2.3** — Filter evaluation during fan-out in `PublishEventService` — 3 integration tests +4. **2.4** — `events.match(pattern)` SDK helper, pattern subscriptions — 4 integration tests ### Commits -- `cd426b366` — phase 2.1: filter evaluator + pattern matcher with tests -- `be7ca08fb` — phase 2.2: filters in SDK + stored during deploy -- `676d37eb0` — phase 2.3: filter evaluation during fan-out -- `846438cf9` — phase 2.4: wildcard pattern subscriptions +`cd426b366` → `be7ca08fb` → `676d37eb0` → `846438cf9` ## Phase 3: Event Persistence + Replay — COMPLETE ### What was done -1. **3.1 — ClickHouse event_log_v1 table + insert function** - - Created migration `021_event_log_v1.sql`: `event_log_v1` table (MergeTree engine) - - Partitioned by `toYYYYMM(published_at)`, ordered by `(project_id, environment_id, event_type, published_at, event_id)` - - 90-day TTL, bloom filter indexes on event_id/publisher_run_id/idempotency_key - - ZSTD compression on all string columns, Delta+ZSTD on timestamps - - Created migration `022_event_counts_mv_v1.sql`: `event_counts_v1` (SummingMergeTree) + `event_counts_mv_v1` materialized view - - Created `internal-packages/clickhouse/src/eventLog.ts`: `EventLogV1Input/Output` schemas, `insertEventLog`, `getEventLogQueryBuilder` - - Added `eventLog` getter on `ClickHouse` class (insert + queryBuilder) - -2. **3.2 — Write to event log on each publish** - - Added `EventLogWriter` callback type + `EventLogEntry` type to `PublishEventService` - - Constructor accepts optional `eventLogWriter` (injectable, like `triggerFn`) - - After fan-out, calls writer with event metadata — fire-and-forget, errors logged not thrown - - Created `eventLogWriter.server.ts`: `writeEventLog()` function using `clickhouseClient.eventLog.insert` - - Wired into `publish` and `batchPublish` routes - -3. **3.3 — Event history API endpoint** - - Created `GET /api/v1/events/:eventId/history` route - - Query params: `from`, `to`, `limit` (max 200), `cursor`, `publisherRunId` - - Uses ClickHouse queryBuilder pattern: `.where().orderBy().limit().execute()` - - Cursor-based pagination (by published_at) - - Added `EventHistoryItem`, `GetEventHistoryResponseBody` schemas to core - - Added `getEventHistory()` API client method - -4. **3.4 — Event Replay service + API endpoint** - - Created `ReplayEventsService` with `call(params)` method - - Queries ClickHouse for events in date range, applies optional EventFilter - - Re-publishes each event via `PublishEventService` with `replay:{eventId}` idempotency key - - Supports `dryRun` (count without executing), `tasks[]` filter, max 10k events - - Created `POST /api/v1/events/:eventId/replay` endpoint - - Added `ReplayEventsRequestBody`, `ReplayEventsResponseBody` schemas to core - - Added `replayEvents()` API client method +1. **3.1** — ClickHouse `event_log_v1` + `event_counts_mv_v1` materialized view +2. **3.2** — `EventLogWriter` callback in `PublishEventService` (fire-and-forget) +3. **3.3** — `GET /api/v1/events/:eventId/history` (paginated, from ClickHouse) +4. **3.4** — `ReplayEventsService` + `POST /api/v1/events/:eventId/replay` ### Key decisions -- Used MergeTree (not ReplacingMergeTree) — events are immutable, no need for dedup/soft-delete -- EventLogWriter is injectable callback (not direct import) — keeps PublishEventService testable without ClickHouse -- Fire-and-forget ClickHouse writes — async `.then()` pattern, errors logged but never block publish -- Replay uses `replay:{originalEventId}` as idempotency key prefix — per-consumer dedup via PublishEventService -- No dedicated tests for 3.3/3.4 since they require ClickHouse (testcontainers only has Postgres+Redis) -- All existing 16 integration tests + 58 unit tests still pass +- MergeTree engine (events immutable), EventLogWriter injectable, fire-and-forget writes +- No tests for 3.3/3.4 (require ClickHouse, testcontainers only has Postgres+Redis) ### Commits -- `c63c1e781` — phase 3.1: event_log_v1 ClickHouse table + insert function -- `8dfb002ee` — phase 3.2: write to ClickHouse event log on each publish -- `02369b128` — phase 3.3: event history API endpoint -- `3d9863512` — phase 3.4: event replay service + API endpoint +`c63c1e781` → `8dfb002ee` → `02369b128` → `3d9863512` ## Phase 4: Dead Letter Queue — COMPLETE ### What was done -1. **4.1 — DeadLetterEvent model + enum + migration** - - Created `DeadLetterStatus` enum: `PENDING`, `RETRIED`, `DISCARDED` - - Created `DeadLetterEvent` model with: eventType, payload, taskSlug, failedRunId (FK to TaskRun), error, attemptCount, status, sourceEventId - - Added reverse relations on TaskRun, Project, RuntimeEnvironment - - Migration `20260228065743_add_dead_letter_event` (cleaned of extraneous lines) - -2. **4.2 — Store event context on runs + DLQ detection** - - Modified `PublishEventService` to inject `$$event` metadata into triggered runs: `{ eventId, eventType, sourceEventId }` - - Created `DeadLetterService` with `handleFailedRun(run, error)` method - - Extracts `$$event` from run metadata to identify event-triggered runs - - Hooked into `FinalizeTaskRunService` after `isFailedRunStatus()` check (alongside alerts) - -3. **4.3 — DLQ management API endpoints** - - Created `DeadLetterManagementService` with: list, retry, discard, retryAll methods - - `GET /api/v1/events/dlq` — paginated list with eventType/status filters - - `POST /api/v1/events/dlq/:id/retry` — re-triggers the task with `dlq-retry:{id}` idempotency key - - `POST /api/v1/events/dlq/:id/discard` — marks as DISCARDED - - `POST /api/v1/events/dlq/retry-all` — batch retry up to 1000 PENDING items - - DLQ response schemas added to core - - API client methods added +1. **4.1** — `DeadLetterEvent` model + `DeadLetterStatus` enum + migration +2. **4.2** — `$$event` metadata on triggered runs + `DeadLetterService` in `FinalizeTaskRunService` +3. **4.3** — `DeadLetterManagementService` + 4 API endpoints (list, retry, discard, retry-all) ### Key decisions -- Used `$$event` metadata prefix (double-dollar convention) to avoid collisions with user metadata -- Hooked into `FinalizeTaskRunService` (not EventBus) — matches existing alert pattern, has full run data available -- Phase 4.4 (SDK DLQ config per event) deferred to Phase 8 (DX) — current implementation is sufficient -- Retry creates new run with `dlq-retry:{dleId}` idempotency key for dedup -- retryAll is capped at 1000 items per call +- `$$event` metadata prefix, hooked into `FinalizeTaskRunService` +- Phase 4.4 (SDK DLQ config per event) deferred ### Commits -- `ec4139642` — phase 4.1: DeadLetterEvent model + enum + migration -- `5ed48645e` — phase 4.2: store event context on runs + DLQ detection -- `89d0daba8` — phase 4.3: DLQ management API endpoints +`ec4139642` → `5ed48645e` → `89d0daba8` ## Phase 5: Ordering + Consumer Groups — COMPLETE ### What was done -1. **5.1 — Ordering keys** - - Added `orderingKey` to `PublishEventRequestBody` and `BatchPublishEventRequestBody` in core schemas - - Added `orderingKey` to SDK `PublishEventOptions` and pass-through in `publish()` / `batchPublish()` - - `PublishEventService` maps `orderingKey` to `concurrencyKey` on triggered runs: `evt:{eventSlug}:{orderingKey}` - - Updated publish + batchPublish routes to pass ordering key through - - Span attribute added for observability - -2. **5.2 — Consumer groups** - - Added `onEventConsumerGroup` to `TaskResource` and `TaskMetadata` schemas - - Added `consumerGroup` option to `TaskOptionsWithEvent` type - - SDK `shared.ts` extracts and registers `consumerGroup` from task params - - `syncWorkerEvents` stores `consumerGroup` in `EventSubscription` during deploy - - `PublishEventService.applyConsumerGroups()` groups subscriptions by `consumerGroup` - - Ungrouped subscriptions always receive events (normal fan-out) - - Within each group, one member is selected (round-robin by timestamp) - -3. **5.3 — Tests** - - 3 new integration tests: ordering key sets concurrencyKey, consumer group picks one, multiple groups + ungrouped - - All 19 integration tests pass, all 470 unit tests pass - - Full build passes: core, sdk, cli, webapp - - Changeset added +1. **5.1** — `orderingKey` mapped to `concurrencyKey` on triggered runs +2. **5.2** — `consumerGroup` option, `applyConsumerGroups()` picks one per group +3. **5.3** — 3 integration tests + +### Key decisions +- Ordering at publish time (dynamic per-payload) +- Consumer groups: `Math.floor(Date.now() / 1000) % members.length` (simplistic — see pending items) + +### Commits +`dcd3ea3c1` → `8c033b3dd` → `3b3abf47a` + +## Phase 6: Publish-and-Wait — COMPLETE + +### What was done +1. **6.1** — `waitForEvent` in `SharedRuntimeManager` (resolvers, suspendable, lifecycle hooks) +2. **6.2** — `POST /api/v1/events/:eventId/publishAndWait` endpoint + API client method +3. **6.3** — SDK `publishAndWait()` method (validates, calls API with `parentRunId`, waits via `runtime.waitForEvent()`) +4. **6.tests** — 3 integration tests (waitpoints, no-subscribers, event log writer) ### Key decisions -- Ordering at **publish time** (not event definition time) — ordering key values are dynamic per-payload -- Maps to existing `concurrencyKey` infrastructure — no new queue management needed -- Consumer group selection uses `Math.floor(Date.now() / 1000) % members.length` for time-based rotation -- `consumerGroup` field already existed in Prisma schema from Phase 0.4 — no migration needed +- Leverages existing waitpoint system (`parentRunId` + `resumeParentOnCompletion: true`) +- `publishAndWait` only works inside `task.run()` (needs task context) + +### Commits +`a522cb6af` → `c4bd534af` → `a87bce472` + +## Phase 7: Rate Limiting — COMPLETE + +### What was done +1. `EventRateLimitConfig` zod schema + `EventRateLimitChecker` interface +2. `InMemoryEventRateLimitChecker` (sliding window) +3. `EventPublishRateLimitError` class with retry info +4. Integration in `PublishEventService` + 429 responses on all publish routes +5. `rateLimit` option on SDK `event()`, stored in DB during deploy +6. 11 unit tests + 2 integration tests + +### Known limitation +- InMemory only — see pubsub-pending.md for Redis upgrade plan + +### Commits +`6454ef3a6` + +## Phase 8: Observability + DX — COMPLETE (partial) + +### What was done +1. ClickHouse `event_counts_v1` query builder +2. `GET /api/v1/events/:eventId/stats` endpoint (time-bucketed metrics) +3. `getEventStats()` API client method +4. SDK `validate()` method for pre-flight payload validation + +### Not done (deferred) +- Dashboard UI, CLI commands, reference project, documentation +- See pubsub-pending.md ### Commits -- `dcd3ea3c1` — phase 5.1: ordering keys via concurrencyKey -- `8c033b3dd` — phase 5.2: consumer groups for load-balanced fan-out -- `3b3abf47a` — phase 5.3: integration tests for ordering + consumer groups +`42a3844e0` -## Phase 6: Publish-and-Wait — NOT STARTED -Next phase. Fan-out/fan-in with waitpoints. +## Test Verification (2026-02-28) +- 24/24 integration tests PASS (Docker + testcontainers) +- 11/11 rate limiter unit tests PASS +- 470/470 core unit tests PASS diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md index bb2bd709607..deddf7dc9f9 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md @@ -10,422 +10,81 @@ First-class pub/sub event system within Trigger.dev that enables: - Delivery guarantees, ordering, replay, DLQ - Replacing the need for Kafka/RabbitMQ/EventBridge for most use cases -## Roadmap Structure +## Branch: feat/pubsub-event-system -9 incremental phases. Each phase delivers usable functionality. +## Implementation Status -- **Phase 0**: Core — `event()` primitive + basic fan-out -- **Phase 1**: Schema Registry — versioning and validation -- **Phase 2**: Smart Routing — content-based filtering + wildcards -- **Phase 3**: Persistence — event log in ClickHouse + replay -- **Phase 4**: Dead Letter Queue — failure handling -- **Phase 5**: Ordering + Consumer Groups -- **Phase 6**: Publish-and-Wait (fan-out/fan-in) -- **Phase 7**: Rate Limiting + Backpressure -- **Phase 8**: Observability + Developer Experience +### Phases 0–8: CORE IMPLEMENTATION COMPLETE + +| Phase | Description | Status | +|-------|-------------|--------| +| 0 | Core — `event()` primitive + basic fan-out | DONE | +| 1 | Schema Registry — versioning and validation | DONE | +| 2 | Smart Routing — content-based filtering + wildcards | DONE | +| 3 | Persistence — event log in ClickHouse + replay | DONE | +| 4 | Dead Letter Queue — failure handling | DONE | +| 5 | Ordering + Consumer Groups | DONE | +| 6 | Publish-and-Wait (fan-out/fan-in) | DONE | +| 7 | Rate Limiting + Backpressure | DONE (publish-side only) | +| 8 | Observability + Developer Experience | DONE (API only, no UI/CLI/docs) | + +### Phase 9: Production Hardening — PENDING + +See [pubsub-pending.md](pubsub-pending.md) for detailed items: +- 9.1 — Redis-backed rate limiter (LOW complexity) +- 9.2 — Consumer group improvement (MEDIUM complexity) +- 9.3 — Integration tests verified (DONE) +- 9.4 — Dashboard UI, CLI commands, reference project, documentation (HIGH complexity) +- 9.5 — Consumer-side rate limiting + backpressure monitor (MEDIUM complexity) --- -## Implementation Process & Guidelines +## Known Limitations -### Workflow per phase +### Rate Limiter: In-Memory Only +- `InMemoryEventRateLimitChecker` uses sliding window in memory +- Lost on process restart, doesn't work with multiple instances +- Interface `EventRateLimitChecker` ready for Redis swap-in +- Codebase has proven patterns: Upstash `@upstash/ratelimit` + GCRA with Redis Lua scripts -1. **Read roadmap** from memory to understand current phase tasks -2. **Research before coding**: use sub-agents to explore existing patterns for each step - - Before step 0.5 (worker registration), read how `createBackgroundWorker` currently works - - Before step 0.7 (fan-out service), read how `TriggerTaskService` works - - Follow existing code conventions (naming, file structure, error handling patterns) -3. **Read existing code** for every file being modified (never edit blind) -4. **Implement step by step** (0.1 → 0.2 → ... → 0.N) following dependency order -5. **Verify after each step**: - - `pnpm run build --filter ` — type check - - Run specific tests if they exist for the changed code - - Fix any issues before moving to next step -6. **Commit after each step** (each sub-step 0.1, 0.2, etc. gets its own commit) - - Never commit broken code (build failures, test failures) - - Commit message format: `feat(events): phase X.Y — ` - - Each commit should be atomic and self-contained -7. **Full verification at end of phase**: - - `pnpm run build --filter @trigger.dev/core --filter @trigger.dev/sdk --filter webapp` - - Run all event-related tests - - Run typecheck on affected packages -8. **Update roadmap** in memory: mark completed tasks with `[x]`, note any deviations -9. **Write phase summary** to memory: what was done, what decisions were made, any tech debt -10. **Proceed to next phase** without waiting for user input +### Consumer Groups: Simplistic Round-Robin +- Selection: `Math.floor(Date.now() / 1000) % members.length` +- Not true consumer groups (no persistent state, no rebalancing) +- Within same second, all events go to same consumer -### Git strategy +### Phase 7 Partial +- Only publish-side rate limiting implemented +- No consumer-side rate limiting +- No backpressure monitor service -- Work on a feature branch: `feat/pubsub-event-system` -- Commit after every sub-step (0.1, 0.2, etc.) — one commit per sub-step minimum -- If a sub-step is large, break it into multiple commits (e.g., 0.4 DB models → one for schema, one for migration) -- Never commit code that doesn't build or has failing tests -- Changeset required when modifying public packages (`packages/*`) — add once per phase -- If a commit breaks something, fix it immediately before any other work +### Phase 8 Partial +- Only API-level observability (stats endpoint, SDK validate()) +- No dashboard UI, no CLI commands, no reference project, no documentation -### Code conventions (match existing codebase) +--- + +## Implementation Guidelines (preserved for reference) -- Follow the patterns found in existing services (e.g., `TriggerTaskService` for the publish service) +### Code conventions (match existing codebase) - Services go in `apps/webapp/app/v3/services/` with `.server.ts` suffix - API routes follow Remix flat file convention in `apps/webapp/app/routes/` - Use `env` from `apps/webapp/app/env.server.ts`, never `process.env` - For testable code, pass config as options (never import env.server.ts in tests) -- Prisma operations follow existing patterns (transactions, error handling) -- Use `generateFriendlyId()` for user-facing IDs - Zod schemas go in `packages/core/src/v3/schemas/` +- Commit message format: `feat(events): phase X.Y — ` -### Sub-agents usage - -- **DO use sub-agents for**: researching patterns in the codebase before coding, exploring how similar features are implemented, finding reference implementations -- **DO NOT use sub-agents for**: writing code — all code is written in main context to maintain full awareness of accumulated changes -- **DO use parallel bash calls for**: running build + test + typecheck simultaneously at verification checkpoints - -### Error recovery - -- If a build fails after a step: fix it before committing, don't move on -- If tests fail: investigate root cause, fix, re-run. Don't skip tests -- If a step's design doesn't work with existing code: adapt the plan, note deviation in roadmap -- If stuck on something for more than 2 attempts: note the blocker in the roadmap, skip to next step if possible, come back later -- If context gets too large: summarize current state to memory, the conversation auto-compresses old messages - -### Context management - -- The conversation auto-compresses old messages as context grows -- The roadmap file in memory serves as persistent state across compression -- Git commits serve as checkpoints — if context is lost, the code is in git -- Each phase starts by reading the roadmap + recent git log to understand state -- After completing each phase, write a brief summary to `memory/pubsub-progress.md` - -### Quality gates (must ALL pass before moving to next phase) - -1. All affected packages build successfully (`pnpm run build --filter ...`) -2. All new tests pass (`pnpm run test --filter ... --run`) +### Quality gates +1. All affected packages build successfully +2. All new tests pass 3. All existing tests still pass (no regressions) 4. No TypeScript errors in affected packages 5. All changes are committed to the feature branch -6. Roadmap updated with completed tasks marked `[x]` - -### Database migration rules (from CLAUDE.md) +### Database migration rules - Clean generated migrations of extraneous lines (see CLAUDE.md for list) - Indexes MUST use CONCURRENTLY and be in their own separate migration file -- New tables don't need CONCURRENTLY - Run `pnpm run db:migrate:deploy && pnpm run generate` after each migration -### Autonomous execution - -The implementation runs end-to-end without user intervention: -- Phase 0 → Phase 1 → ... → Phase 8 -- No need to ask user for confirmation between phases -- If a decision needs to be made (e.g., two valid approaches), pick the one that matches existing codebase patterns and note it in the roadmap -- If something is genuinely ambiguous or risky, ask the user via AskUserQuestion -- User can review progress anytime via `git log --oneline feat/pubsub-event-system` or reading `memory/pubsub-progress.md` - ---- - -## Phase 0: Core Primitives + Basic Fan-out — COMPLETE - -> **Goal**: Define events, subscribe tasks, publish, and have fan-out work. -> **Deliverable**: `event()` + `task({ on: ... })` + `.publish()` working end-to-end. - -All sub-steps 0.1–0.9 implemented and committed. See `pubsub-progress.md` for details. - ---- - -## Phase 1: Schema Registry + Validation — COMPLETE - -> **Goal**: Versioned schemas, robust validation, event discovery. -> **Requires**: Phase 0 - -All sub-steps 1.1–1.4 implemented and committed. See `pubsub-progress.md` for details. - -Key deliverables: -- [x] DB migration: `compatibleVersions`, `deprecatedAt`, `deprecatedMessage` on EventDefinition -- [x] `SchemaRegistryService` with registerSchema, getSchema, listSchemas, validatePayload, checkCompatibility -- [x] Discovery API: GET /api/v1/events, GET /api/v1/events/:id, GET /api/v1/events/:id/schema -- [x] API client methods: listEvents, getEvent, getEventSchema -- [x] Schema pipeline: SDK stores raw schema → CLI converts to JSON Schema → DB stores it → PublishEventService validates -- [x] ajv@8 for JSON Schema validation at publish time -- [x] 12 unit tests + 3 integration tests for schema validation -- [x] Changeset added - ---- - -## Phase 2: Smart Routing — Content-based Filtering + Wildcards — COMPLETE - -> **Goal**: Subscribe with filters (`amount >= 1000`) and patterns (`order.*`). -> **Requires**: Phase 0 - -All sub-steps 2.1–2.4 implemented and committed. See `pubsub-progress.md` for details. - -Key deliverables: -- [x] Filter evaluator: `compileFilter`, `evaluateFilter` wrapping existing `eventFilterMatches` with caching -- [x] Pattern matcher: `compilePattern`, `matchesPattern` for `*` (single) and `#` (multi) wildcards -- [x] `filter` option on `TaskOptionsWithEvent`, stored in `EventSubscription.filter` during deploy -- [x] `events.match(pattern)` SDK helper for wildcard subscriptions -- [x] `PublishEventService` evaluates filters and patterns during fan-out -- [x] 58 unit tests (28 filter + 30 pattern) + 7 integration tests (3 filter + 4 pattern) -- [x] Changeset added - ---- - -## Phase 3: Event Persistence + Replay — COMPLETE - -> **Goal**: Store all published events, enable replay. -> **Requires**: Phase 0 - -All sub-steps 3.1–3.4 implemented and committed. See `pubsub-progress.md` for details. - -Key deliverables: -- [x] ClickHouse `event_log_v1` table (MergeTree, 90-day TTL, bloom filter indexes) -- [x] `event_counts_v1` + `event_counts_mv_v1` materialized view for per-type counts -- [x] `insertEventLog` function + `eventLog` getter on ClickHouse class -- [x] `EventLogWriter` callback in `PublishEventService` — fire-and-forget ClickHouse writes -- [x] `writeEventLog` singleton wired into publish + batchPublish routes -- [x] `GET /api/v1/events/:eventId/history` — paginated event history from ClickHouse -- [x] `ReplayEventsService` — replay events in date range with filter/tasks/dryRun -- [x] `POST /api/v1/events/:eventId/replay` endpoint -- [x] API client methods: `getEventHistory`, `replayEvents` -- [x] Response schemas: `EventHistoryItem`, `GetEventHistoryResponseBody`, `ReplayEventsRequestBody`, `ReplayEventsResponseBody` -- [x] Changeset added - ---- - -## Phase 4: Dead Letter Queue — COMPLETE - -> **Goal**: Events that fail after all retries go to a DLQ for inspection and reprocessing. -> **Requires**: Phase 0, Phase 3 (for persistence) - -All sub-steps 4.1–4.3 implemented and committed. See `pubsub-progress.md` for details. - -Key deliverables: -- [x] `DeadLetterEvent` model + `DeadLetterStatus` enum + migration -- [x] `$$event` metadata on event-triggered runs for identification -- [x] `DeadLetterService` hooks into `FinalizeTaskRunService` on run failure -- [x] `DeadLetterManagementService` with list, retry, discard, retryAll -- [x] `GET /api/v1/events/dlq` — list DLQ entries (paginated, filterable) -- [x] `POST /api/v1/events/dlq/:id/retry` — retry single entry -- [x] `POST /api/v1/events/dlq/:id/discard` — discard single entry -- [x] `POST /api/v1/events/dlq/retry-all` — batch retry -- [x] API client methods: `listDeadLetterEvents`, `retryDeadLetterEvent`, `discardDeadLetterEvent`, `retryAllDeadLetterEvents` -- [x] Response schemas added to core -- [x] Changeset added -- Note: Phase 4.4 (SDK event() DLQ config) deferred to Phase 8 (DX) - ---- - -## Phase 5: Ordering + Consumer Groups — COMPLETE - -> **Goal**: Order guarantees by partition key. Competing consumers for load balancing. -> **Requires**: Phase 0 - -All sub-steps 5.1–5.3 implemented and committed. See `pubsub-progress.md` for details. - -Key deliverables: -- [x] `orderingKey` in publish options, mapped to `concurrencyKey` on triggered runs -- [x] `consumerGroup` option on `TaskOptionsWithEvent`, stored in `EventSubscription.consumerGroup` during deploy -- [x] `PublishEventService.applyConsumerGroups()` — within a group, only one task receives each event -- [x] 3 integration tests for ordering + consumer groups -- [x] Changeset added - ---- - -## Phase 6: Publish-and-Wait (Fan-out / Fan-in) - -> **Goal**: Publish an event and wait for all consumers to finish. -> **Requires**: Phase 0 - -### 6.1 — publishAndWait in the SDK - -**File to modify**: `packages/trigger-sdk/src/v3/events.ts` - -Tasks: -- [ ] Implement `EventDefinition.publishAndWait()`: - ```typescript - const results = await orderCreated.publishAndWait(payload, { - timeout: "5m", // optional - }); - // results: Record - ``` -- [ ] Internally: - - Call special endpoint `POST /api/v1/events/:eventId/publishAndWait` - - The endpoint creates runs + waitpoints - - Returns when all waitpoints complete - -### 6.2 — Backend: publish with waitpoints - -**New file**: `apps/webapp/app/v3/services/events/publishAndWait.server.ts` - -Tasks: -- [ ] Reuse existing `WaitpointSystem`: - 1. Create a coordinator "event waitpoint" - 2. Fan-out: create a run per consumer - 3. For each run, create a child waitpoint linked to the coordinator - 4. The caller is blocked on the coordinator waitpoint - 5. When each consumer finishes → completes its waitpoint - 6. When all child waitpoints complete → completes the coordinator -- [ ] Timeout: if a consumer doesn't finish, complete with partial error -- [ ] Result: aggregate outputs from each consumer - -### 6.3 — Timeout and error handling - -Tasks: -- [ ] If a consumer fails definitively (exhausted retries) → its result is error -- [ ] If timeout is reached before all finish → partial result with status of each -- [ ] The caller decides what to do with partial results - ---- - -## Phase 7: Rate Limiting + Backpressure - -> **Goal**: Control publish and consume speed. Detect lag. -> **Requires**: Phase 0 - -### 7.1 — Publish rate limiting - -**New file**: `apps/webapp/app/v3/services/events/rateLimiter.server.ts` - -Tasks: -- [ ] Implement sliding window rate limiter (Redis): - - Key: `ratelimit:publish:{projectId}:{eventSlug}` - - Configurable per-event - - Default: 1000 events/minute per type -- [ ] Response header `X-RateLimit-Remaining` on publish endpoint -- [ ] When exceeded: HTTP 429 with `Retry-After` header - -**File to modify**: `packages/trigger-sdk/src/v3/events.ts` - -Tasks: -- [ ] Extend `event()`: - ```typescript - event({ - id: "order.created", - schema: orderSchema, - rateLimit: { - limit: 500, - window: "1m", - }, - }); - ``` - -### 7.2 — Consumer rate limiting - -**File to modify**: `packages/trigger-sdk/src/v3/shared.ts` - -Tasks: -- [ ] Extend task with per-event rate limit: - ```typescript - task({ - on: orderCreated, - rateLimit: { limit: 100, window: "1m" }, - run: async (payload) => { ... }, - }); - ``` -- [ ] Implement as queue with rate limit (reuse concurrency limits infra) -- [ ] Events that exceed the rate are enqueued (not lost), processed when capacity is available - -### 7.3 — Backpressure detection + metrics - -**New file**: `apps/webapp/app/v3/services/events/backpressureMonitor.server.ts` - -Tasks: -- [ ] Monitor lag per consumer: `pendingRuns = publishedEvents - processedEvents` -- [ ] Metrics in ClickHouse: - - `event_publish_rate` per type - - `event_consume_rate` per consumer - - `event_consumer_lag` (difference) -- [ ] Alerts when lag exceeds configurable threshold -- [ ] Expose metrics in API: `GET /api/v1/events/:eventId/metrics` - ---- - -## Phase 8: Observability + Developer Experience - -> **Goal**: Dashboard, CLI, full traceability, documentation. -> **Requires**: Phases 0-7 (gradual, can start earlier) - -### 8.1 — Trace propagation - -**File to modify**: `apps/webapp/app/v3/services/events/publishEvent.server.ts` - -Tasks: -- [ ] Propagate `traceId` from publisher to all consumer runs -- [ ] Add span attribute `trigger.event.id` and `trigger.event.type` to each run -- [ ] Add `sourceEventId` to TaskRun metadata -- [ ] In run dashboard: show "Triggered by event: order.created" -- [ ] In event dashboard: show all runs it generated - -### 8.2 — Events dashboard (webapp) - -**New files in**: `apps/webapp/app/routes/` - -Tasks: -- [ ] Event list view: `/orgs/:orgSlug/projects/:projectSlug/events` - - List of EventDefinitions with stats (publish count, last published, subscriber count) -- [ ] Event detail view: `.../events/:eventSlug` - - Schema (formatted) - - List of subscribers (tasks) - - Recent publication history (from ClickHouse) - - Metrics: publish rate, consumer lag -- [ ] DLQ view: `.../events/dlq` - - List of dead letter events, filterable by type/status - - Actions: retry, discard, retry all -- [ ] Corresponding presenters in `apps/webapp/app/v3/presenters/` - -### 8.3 — CLI commands - -**File to modify**: `packages/cli-v3/src/commands/` - -Tasks: -- [ ] `trigger events list` — list project events -- [ ] `trigger events publish --payload '{...}'` — publish from CLI -- [ ] `trigger events history --from --to` — view history -- [ ] `trigger events replay --from --to` — replay -- [ ] `trigger events dlq list` — view dead letter queue -- [ ] `trigger events dlq retry ` — retry DLQ item - -### 8.4 — SDK helpers and DX - -**File to modify**: `packages/trigger-sdk/src/v3/events.ts` - -Tasks: -- [ ] Helper for local testing: - ```typescript - import { testEvent } from "@trigger.dev/sdk/testing"; - - // In tests - const result = await testEvent(orderCreated, { orderId: "123", amount: 50 }); - expect(result.runs).toHaveLength(2); - ``` -- [ ] Full type inference: consumer payload typed from event schema -- [ ] Descriptive error messages when schema validation fails -- [ ] Complete JSDoc on all public functions - -### 8.5 — Documentation - -**New files in**: `rules/` (next version) - -Tasks: -- [ ] Event system documentation for SDK rules: - - `events-basic.md` — define events, publish, subscribe - - `events-advanced.md` — filters, wildcards, ordering, consumer groups - - `events-reliability.md` — DLQ, replay, idempotency - - `events-patterns.md` — common patterns (saga, CQRS, event sourcing) -- [ ] Update `.claude/skills/trigger-dev-tasks/SKILL.md` with event examples -- [ ] Update `manifest.json` with new version - -### 8.6 — Reference project - -**New directory**: `references/event-system/` - -Tasks: -- [ ] Reference project demonstrating: - - Definition of multiple events - - Tasks subscribed with filters - - Publish from a task - - Publish-and-wait pattern - - DLQ handler -- [ ] Use as manual testing project (similar to hello-world) - --- ## Phase dependencies @@ -441,45 +100,50 @@ Phase 0 (Core) ─────┬── Phase 1 (Schema Registry) └── Phase 8 (DX) ← gradual, can start with Phase 0 ``` -Phases 1-7 are mostly independent of each other (all depend on Phase 0). -Phase 4 (DLQ) benefits from Phase 3 (persistence) but can work without it. -Phase 8 (DX) is built incrementally with each phase. - --- -## Key files to create/modify (summary) +## Key files created -### New files +### Services | File | Phase | |------|-------| -| `packages/trigger-sdk/src/v3/events.ts` | 0 | -| `packages/core/src/v3/events/schemaUtils.ts` | 1 | -| `packages/core/src/v3/events/filterEvaluator.ts` | 2 | +| `apps/webapp/app/v3/services/events/publishEvent.server.ts` | 0 | +| `apps/webapp/app/v3/services/events/schemaRegistry.server.ts` | 1 | +| `apps/webapp/app/v3/services/events/deadLetterService.server.ts` | 4 | +| `apps/webapp/app/v3/services/events/deadLetterManagement.server.ts` | 4 | +| `apps/webapp/app/v3/services/events/replayEvents.server.ts` | 3 | +| `apps/webapp/app/v3/services/events/eventRateLimiter.server.ts` | 7 | +| `apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts` | 7 | +| `apps/webapp/app/v3/services/events/eventLogWriter.server.ts` | 3 | + +### API Routes +| File | Phase | +|------|-------| +| `apps/webapp/app/routes/api.v1.events.ts` | 1 | +| `apps/webapp/app/routes/api.v1.events.$eventId.ts` | 1 | +| `apps/webapp/app/routes/api.v1.events.$eventId.schema.ts` | 1 | | `apps/webapp/app/routes/api.v1.events.$eventId.publish.ts` | 0 | | `apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts` | 0 | +| `apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts` | 6 | | `apps/webapp/app/routes/api.v1.events.$eventId.history.ts` | 3 | | `apps/webapp/app/routes/api.v1.events.$eventId.replay.ts` | 3 | -| `apps/webapp/app/routes/api.v1.events.ts` | 1 | +| `apps/webapp/app/routes/api.v1.events.$eventId.stats.ts` | 8 | | `apps/webapp/app/routes/api.v1.events.dlq.ts` | 4 | -| `apps/webapp/app/v3/services/events/publishEvent.server.ts` | 0 | -| `apps/webapp/app/v3/services/events/publishAndWait.server.ts` | 6 | -| `apps/webapp/app/v3/services/events/schemaRegistry.server.ts` | 1 | -| `apps/webapp/app/v3/services/events/deadLetterService.server.ts` | 4 | -| `apps/webapp/app/v3/services/events/replayEvents.server.ts` | 3 | -| `apps/webapp/app/v3/services/events/rateLimiter.server.ts` | 7 | -| `apps/webapp/app/v3/services/events/backpressureMonitor.server.ts` | 7 | -| `internal-packages/clickhouse/schema/XXX_event_log_v1.sql` | 3 | -| `internal-packages/run-engine/src/engine/tests/events.test.ts` | 0 | -| `references/event-system/` | 8 | +| `apps/webapp/app/routes/api.v1.events.dlq.$id.retry.ts` | 4 | +| `apps/webapp/app/routes/api.v1.events.dlq.$id.discard.ts` | 4 | +| `apps/webapp/app/routes/api.v1.events.dlq.retry-all.ts` | 4 | -### Files to modify +### SDK / Core | File | Phase | |------|-------| -| `packages/trigger-sdk/src/v3/index.ts` | 0 | -| `packages/trigger-sdk/src/v3/shared.ts` | 0, 2, 5 | -| `packages/core/src/v3/schemas/resources.ts` | 0 | -| `packages/core/src/v3/resource-catalog/catalog.ts` | 0 | -| `packages/core/src/v3/resource-catalog/standardCatalog.ts` | 0 | -| `internal-packages/database/prisma/schema.prisma` | 0, 1, 4, 5 | -| `apps/webapp/app/v3/services/createBackgroundWorker.server.ts` | 0 | -| `apps/webapp/app/v3/services/createDeploymentBackgroundWorkerV4.server.ts` | 0 | +| `packages/trigger-sdk/src/v3/events.ts` | 0 | +| `packages/core/src/v3/events/filterEvaluator.ts` | 2 | +| `packages/core/src/v3/events/patternMatcher.ts` | 2 | +| `internal-packages/clickhouse/src/eventLog.ts` | 3 | +| `internal-packages/clickhouse/src/eventCounts.ts` | 8 | + +### Tests +| File | Tests | +|------|-------| +| `apps/webapp/test/engine/publishEvent.test.ts` | 24 integration tests | +| `apps/webapp/test/engine/eventRateLimiter.test.ts` | 11 unit tests | From ae94c445e1c8e8d6ef6381a643604061c18fc193 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sat, 28 Feb 2026 17:20:06 -0800 Subject: [PATCH 40/65] chore: restore full original roadmap with current status annotations Co-Authored-By: Claude Opus 4.6 --- .../memory/pubsub-pending.md | 44 +- .../memory/pubsub-roadmap.md | 497 ++++++++++++++++-- 2 files changed, 453 insertions(+), 88 deletions(-) diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md index 3ef04c96898..374ca192c54 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md @@ -4,43 +4,21 @@ Items identified during post-implementation audit. Ordered by priority. -### 9.1 — Redis-backed Rate Limiter (swap InMemory for Redis) +### 9.1 — Redis-backed Rate Limiter -**Status**: NOT STARTED -**Complexity**: LOW — interface already exists, just need new implementation -**Why**: InMemory limiter doesn't survive restarts and doesn't work multi-instance - -The codebase already has proven rate limiting patterns: -- `apps/webapp/app/services/rateLimiter.server.ts` — Upstash `@upstash/ratelimit` wrapper (sliding window, token bucket, fixed window) -- `apps/webapp/app/v3/GCRARateLimiter.server.ts` — Custom GCRA with Redis Lua scripts (used for alerts) -- Both use dedicated Redis connection (`RATE_LIMIT_REDIS_HOST` env vars) - -**Implementation plan**: -1. Create `RedisEventRateLimitChecker` implementing `EventRateLimitChecker` interface -2. Use existing `RateLimiter` wrapper from `rateLimiter.server.ts` with `Ratelimit.slidingWindow()` -3. Key format already defined: `{projectId}:{eventSlug}` -4. Swap singleton in `eventRateLimiterGlobal.server.ts` -5. Keep `InMemoryEventRateLimitChecker` for tests - -**Key files**: -- Interface: `apps/webapp/app/v3/services/events/eventRateLimiter.server.ts` -- Singleton: `apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts` -- Reference: `apps/webapp/app/services/rateLimiter.server.ts` -- Reference: `apps/webapp/app/services/mfa/mfaRateLimiter.server.ts` (good example of production usage) +**Status**: DONE (commit `81c09cda5`) +- Created `RedisEventRateLimitChecker` using `@upstash/ratelimit` sliding window +- Singleton auto-detects: uses Redis when `RATE_LIMIT_REDIS_HOST` is set, falls back to InMemory +- Caches `Ratelimit` instances per config (limit+window combo) +- Reuses existing `createRedisRateLimitClient` infrastructure ### 9.2 — Consumer Group Improvement -**Status**: NOT STARTED -**Complexity**: MEDIUM — needs design decision -**Why**: Current round-robin by timestamp is too simplistic for production - -Options: -1. **Redis-based round-robin counter** — atomic increment, true rotation across events -2. **Hash-based selection** — hash(eventId) % members for consistent routing per event -3. **Weighted selection** — respect task queue concurrency limits -4. Keep current for MVP, document as "basic" consumer groups - -**Current implementation**: `PublishEventService.applyConsumerGroups()` in `publishEvent.server.ts` +**Status**: DONE (commit `81c09cda5`) +- Replaced `Math.floor(Date.now() / 1000) % N` with FNV-1a hash of `eventId:groupName` +- Deterministic: same eventId always routes to the same group member +- Evenly distributed: different eventIds spread across members +- Consistent for retries/replays (same eventId = same routing) ### 9.3 — Verify Integration Tests diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md index deddf7dc9f9..aac2b270ce5 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md @@ -12,9 +12,7 @@ First-class pub/sub event system within Trigger.dev that enables: ## Branch: feat/pubsub-event-system -## Implementation Status - -### Phases 0–8: CORE IMPLEMENTATION COMPLETE +## Implementation Status Summary | Phase | Description | Status | |-------|-------------|--------| @@ -27,64 +25,434 @@ First-class pub/sub event system within Trigger.dev that enables: | 6 | Publish-and-Wait (fan-out/fan-in) | DONE | | 7 | Rate Limiting + Backpressure | DONE (publish-side only) | | 8 | Observability + Developer Experience | DONE (API only, no UI/CLI/docs) | +| 9.1 | Redis-backed rate limiter | DONE | +| 9.2 | Hash-based consumer groups | DONE | +| 9.3 | Integration tests verified | DONE | +| 9.4 | Dashboard UI, CLI, docs, reference project | NOT STARTED | +| 9.5 | Consumer-side rate limiting + backpressure | NOT STARTED | -### Phase 9: Production Hardening — PENDING +See [pubsub-pending.md](pubsub-pending.md) for details on remaining items. +See [pubsub-progress.md](pubsub-progress.md) for per-phase implementation notes. -See [pubsub-pending.md](pubsub-pending.md) for detailed items: -- 9.1 — Redis-backed rate limiter (LOW complexity) -- 9.2 — Consumer group improvement (MEDIUM complexity) -- 9.3 — Integration tests verified (DONE) -- 9.4 — Dashboard UI, CLI commands, reference project, documentation (HIGH complexity) -- 9.5 — Consumer-side rate limiting + backpressure monitor (MEDIUM complexity) +## Roadmap Structure ---- +9 incremental phases. Each phase delivers usable functionality. -## Known Limitations +- **Phase 0**: Core — `event()` primitive + basic fan-out +- **Phase 1**: Schema Registry — versioning and validation +- **Phase 2**: Smart Routing — content-based filtering + wildcards +- **Phase 3**: Persistence — event log in ClickHouse + replay +- **Phase 4**: Dead Letter Queue — failure handling +- **Phase 5**: Ordering + Consumer Groups +- **Phase 6**: Publish-and-Wait (fan-out/fan-in) +- **Phase 7**: Rate Limiting + Backpressure +- **Phase 8**: Observability + Developer Experience -### Rate Limiter: In-Memory Only -- `InMemoryEventRateLimitChecker` uses sliding window in memory -- Lost on process restart, doesn't work with multiple instances -- Interface `EventRateLimitChecker` ready for Redis swap-in -- Codebase has proven patterns: Upstash `@upstash/ratelimit` + GCRA with Redis Lua scripts +--- -### Consumer Groups: Simplistic Round-Robin -- Selection: `Math.floor(Date.now() / 1000) % members.length` -- Not true consumer groups (no persistent state, no rebalancing) -- Within same second, all events go to same consumer +## Implementation Process & Guidelines -### Phase 7 Partial -- Only publish-side rate limiting implemented -- No consumer-side rate limiting -- No backpressure monitor service +### Workflow per phase -### Phase 8 Partial -- Only API-level observability (stats endpoint, SDK validate()) -- No dashboard UI, no CLI commands, no reference project, no documentation +1. **Read roadmap** from memory to understand current phase tasks +2. **Research before coding**: use sub-agents to explore existing patterns for each step + - Before step 0.5 (worker registration), read how `createBackgroundWorker` currently works + - Before step 0.7 (fan-out service), read how `TriggerTaskService` works + - Follow existing code conventions (naming, file structure, error handling patterns) +3. **Read existing code** for every file being modified (never edit blind) +4. **Implement step by step** (0.1 → 0.2 → ... → 0.N) following dependency order +5. **Verify after each step**: + - `pnpm run build --filter ` — type check + - Run specific tests if they exist for the changed code + - Fix any issues before moving to next step +6. **Commit after each step** (each sub-step 0.1, 0.2, etc. gets its own commit) + - Never commit broken code (build failures, test failures) + - Commit message format: `feat(events): phase X.Y — ` + - Each commit should be atomic and self-contained +7. **Full verification at end of phase**: + - `pnpm run build --filter @trigger.dev/core --filter @trigger.dev/sdk --filter webapp` + - Run all event-related tests + - Run typecheck on affected packages +8. **Update roadmap** in memory: mark completed tasks with `[x]`, note any deviations +9. **Write phase summary** to memory: what was done, what decisions were made, any tech debt +10. **Proceed to next phase** without waiting for user input ---- +### Git strategy -## Implementation Guidelines (preserved for reference) +- Work on a feature branch: `feat/pubsub-event-system` +- Commit after every sub-step (0.1, 0.2, etc.) — one commit per sub-step minimum +- If a sub-step is large, break it into multiple commits (e.g., 0.4 DB models → one for schema, one for migration) +- Never commit code that doesn't build or has failing tests +- Changeset required when modifying public packages (`packages/*`) — add once per phase +- If a commit breaks something, fix it immediately before any other work ### Code conventions (match existing codebase) + +- Follow the patterns found in existing services (e.g., `TriggerTaskService` for the publish service) - Services go in `apps/webapp/app/v3/services/` with `.server.ts` suffix - API routes follow Remix flat file convention in `apps/webapp/app/routes/` - Use `env` from `apps/webapp/app/env.server.ts`, never `process.env` - For testable code, pass config as options (never import env.server.ts in tests) +- Prisma operations follow existing patterns (transactions, error handling) +- Use `generateFriendlyId()` for user-facing IDs - Zod schemas go in `packages/core/src/v3/schemas/` -- Commit message format: `feat(events): phase X.Y — ` -### Quality gates -1. All affected packages build successfully -2. All new tests pass +### Sub-agents usage + +- **DO use sub-agents for**: researching patterns in the codebase before coding, exploring how similar features are implemented, finding reference implementations +- **DO NOT use sub-agents for**: writing code — all code is written in main context to maintain full awareness of accumulated changes +- **DO use parallel bash calls for**: running build + test + typecheck simultaneously at verification checkpoints + +### Error recovery + +- If a build fails after a step: fix it before committing, don't move on +- If tests fail: investigate root cause, fix, re-run. Don't skip tests +- If a step's design doesn't work with existing code: adapt the plan, note deviation in roadmap +- If stuck on something for more than 2 attempts: note the blocker in the roadmap, skip to next step if possible, come back later +- If context gets too large: summarize current state to memory, the conversation auto-compresses old messages + +### Context management + +- The conversation auto-compresses old messages as context grows +- The roadmap file in memory serves as persistent state across compression +- Git commits serve as checkpoints — if context is lost, the code is in git +- Each phase starts by reading the roadmap + recent git log to understand state +- After completing each phase, write a brief summary to `memory/pubsub-progress.md` + +### Quality gates (must ALL pass before moving to next phase) + +1. All affected packages build successfully (`pnpm run build --filter ...`) +2. All new tests pass (`pnpm run test --filter ... --run`) 3. All existing tests still pass (no regressions) 4. No TypeScript errors in affected packages 5. All changes are committed to the feature branch +6. Roadmap updated with completed tasks marked `[x]` + +### Database migration rules (from CLAUDE.md) -### Database migration rules - Clean generated migrations of extraneous lines (see CLAUDE.md for list) - Indexes MUST use CONCURRENTLY and be in their own separate migration file +- New tables don't need CONCURRENTLY - Run `pnpm run db:migrate:deploy && pnpm run generate` after each migration +### Autonomous execution + +The implementation runs end-to-end without user intervention: +- Phase 0 → Phase 1 → ... → Phase 8 +- No need to ask user for confirmation between phases +- If a decision needs to be made (e.g., two valid approaches), pick the one that matches existing codebase patterns and note it in the roadmap +- If something is genuinely ambiguous or risky, ask the user via AskUserQuestion +- User can review progress anytime via `git log --oneline feat/pubsub-event-system` or reading `memory/pubsub-progress.md` + +--- + +## Phase 0: Core Primitives + Basic Fan-out — COMPLETE + +> **Goal**: Define events, subscribe tasks, publish, and have fan-out work. +> **Deliverable**: `event()` + `task({ on: ... })` + `.publish()` working end-to-end. + +All sub-steps 0.1–0.9 implemented and committed. See `pubsub-progress.md` for details. + +--- + +## Phase 1: Schema Registry + Validation — COMPLETE + +> **Goal**: Versioned schemas, robust validation, event discovery. +> **Requires**: Phase 0 + +All sub-steps 1.1–1.4 implemented and committed. See `pubsub-progress.md` for details. + +Key deliverables: +- [x] DB migration: `compatibleVersions`, `deprecatedAt`, `deprecatedMessage` on EventDefinition +- [x] `SchemaRegistryService` with registerSchema, getSchema, listSchemas, validatePayload, checkCompatibility +- [x] Discovery API: GET /api/v1/events, GET /api/v1/events/:id, GET /api/v1/events/:id/schema +- [x] API client methods: listEvents, getEvent, getEventSchema +- [x] Schema pipeline: SDK stores raw schema → CLI converts to JSON Schema → DB stores it → PublishEventService validates +- [x] ajv@8 for JSON Schema validation at publish time +- [x] 12 unit tests + 3 integration tests for schema validation +- [x] Changeset added + +--- + +## Phase 2: Smart Routing — Content-based Filtering + Wildcards — COMPLETE + +> **Goal**: Subscribe with filters (`amount >= 1000`) and patterns (`order.*`). +> **Requires**: Phase 0 + +All sub-steps 2.1–2.4 implemented and committed. See `pubsub-progress.md` for details. + +Key deliverables: +- [x] Filter evaluator: `compileFilter`, `evaluateFilter` wrapping existing `eventFilterMatches` with caching +- [x] Pattern matcher: `compilePattern`, `matchesPattern` for `*` (single) and `#` (multi) wildcards +- [x] `filter` option on `TaskOptionsWithEvent`, stored in `EventSubscription.filter` during deploy +- [x] `events.match(pattern)` SDK helper for wildcard subscriptions +- [x] `PublishEventService` evaluates filters and patterns during fan-out +- [x] 58 unit tests (28 filter + 30 pattern) + 7 integration tests (3 filter + 4 pattern) +- [x] Changeset added + +--- + +## Phase 3: Event Persistence + Replay — COMPLETE + +> **Goal**: Store all published events, enable replay. +> **Requires**: Phase 0 + +All sub-steps 3.1–3.4 implemented and committed. See `pubsub-progress.md` for details. + +Key deliverables: +- [x] ClickHouse `event_log_v1` table (MergeTree, 90-day TTL, bloom filter indexes) +- [x] `event_counts_v1` + `event_counts_mv_v1` materialized view for per-type counts +- [x] `insertEventLog` function + `eventLog` getter on ClickHouse class +- [x] `EventLogWriter` callback in `PublishEventService` — fire-and-forget ClickHouse writes +- [x] `writeEventLog` singleton wired into publish + batchPublish routes +- [x] `GET /api/v1/events/:eventId/history` — paginated event history from ClickHouse +- [x] `ReplayEventsService` — replay events in date range with filter/tasks/dryRun +- [x] `POST /api/v1/events/:eventId/replay` endpoint +- [x] API client methods: `getEventHistory`, `replayEvents` +- [x] Response schemas: `EventHistoryItem`, `GetEventHistoryResponseBody`, `ReplayEventsRequestBody`, `ReplayEventsResponseBody` +- [x] Changeset added + +--- + +## Phase 4: Dead Letter Queue — COMPLETE + +> **Goal**: Events that fail after all retries go to a DLQ for inspection and reprocessing. +> **Requires**: Phase 0, Phase 3 (for persistence) + +All sub-steps 4.1–4.3 implemented and committed. See `pubsub-progress.md` for details. + +Key deliverables: +- [x] `DeadLetterEvent` model + `DeadLetterStatus` enum + migration +- [x] `$$event` metadata on event-triggered runs for identification +- [x] `DeadLetterService` hooks into `FinalizeTaskRunService` on run failure +- [x] `DeadLetterManagementService` with list, retry, discard, retryAll +- [x] `GET /api/v1/events/dlq` — list DLQ entries (paginated, filterable) +- [x] `POST /api/v1/events/dlq/:id/retry` — retry single entry +- [x] `POST /api/v1/events/dlq/:id/discard` — discard single entry +- [x] `POST /api/v1/events/dlq/retry-all` — batch retry +- [x] API client methods: `listDeadLetterEvents`, `retryDeadLetterEvent`, `discardDeadLetterEvent`, `retryAllDeadLetterEvents` +- [x] Response schemas added to core +- [x] Changeset added +- Note: Phase 4.4 (SDK event() DLQ config) deferred to Phase 8 (DX) + +--- + +## Phase 5: Ordering + Consumer Groups — COMPLETE + +> **Goal**: Order guarantees by partition key. Competing consumers for load balancing. +> **Requires**: Phase 0 + +All sub-steps 5.1–5.3 implemented and committed. See `pubsub-progress.md` for details. + +Key deliverables: +- [x] `orderingKey` in publish options, mapped to `concurrencyKey` on triggered runs +- [x] `consumerGroup` option on `TaskOptionsWithEvent`, stored in `EventSubscription.consumerGroup` during deploy +- [x] `PublishEventService.applyConsumerGroups()` — within a group, only one task receives each event +- [x] 3 integration tests for ordering + consumer groups +- [x] Changeset added + +--- + +## Phase 6: Publish-and-Wait (Fan-out / Fan-in) — COMPLETE + +> **Goal**: Publish an event and wait for all consumers to finish. +> **Requires**: Phase 0 + +### 6.1 — Runtime waitForEvent — DONE + +**File modified**: `packages/core/src/v3/runtime/sharedRuntimeManager.ts` + +Tasks: +- [x] `waitForEvent` implemented in `SharedRuntimeManager` with resolvers, suspendable, lifecycle hooks +- [x] `NoopRuntimeManager` returns empty results as fallback +- [x] `RuntimeAPI` exposes `waitForEvent` as public method + +### 6.2 — Backend: publishAndWait endpoint — DONE + +Tasks: +- [x] `POST /api/v1/events/:eventId/publishAndWait` endpoint +- [x] Reuses `PublishEventService` with `parentRunId` option +- [x] Each triggered run gets `parentRunId` + `resumeParentOnCompletion: true` +- [x] Run engine creates waitpoints automatically via existing infrastructure +- [x] API client method `publishAndWaitEvent()` + +### 6.3 — SDK publishAndWait — DONE + +**File modified**: `packages/trigger-sdk/src/v3/events.ts` + +Tasks: +- [x] `EventDefinition.publishAndWait()` implemented +- [x] Validates payload, calls API with `parentRunId: ctx.run.id` +- [x] Waits via `runtime.waitForEvent()` which registers resolvers for all runs +- [x] Returns aggregated `PublishAndWaitResult` with results keyed by task slug +- [x] Throws if called outside `task.run()` (needs task context for waitpoints) + +### 6.tests — DONE +- [x] 3 integration tests: waitpoints per subscriber, no-subscribers empty, event log writer fanOutCount + +--- + +## Phase 7: Rate Limiting + Backpressure — PARTIAL + +> **Goal**: Control publish and consume speed. Detect lag. +> **Requires**: Phase 0 + +### 7.1 — Publish rate limiting — DONE + +**File created**: `apps/webapp/app/v3/services/events/eventRateLimiter.server.ts` + +Tasks: +- [x] Implement sliding window rate limiter: + - `InMemoryEventRateLimitChecker` for dev/testing + - `RedisEventRateLimitChecker` using `@upstash/ratelimit` for production (Phase 9.1) + - Key: `{projectId}:{eventSlug}` + - Configurable per-event via `EventDefinition.rateLimit` JSON field +- [x] Response headers `x-ratelimit-limit`, `x-ratelimit-remaining`, `retry-after` on publish endpoints +- [x] When exceeded: HTTP 429 with `Retry-After` header +- [x] 11 unit tests + 2 integration tests + +**File modified**: `packages/trigger-sdk/src/v3/events.ts` + +Tasks: +- [x] Extend `event()`: + ```typescript + event({ + id: "order.created", + schema: orderSchema, + rateLimit: { + limit: 500, + window: "1m", + }, + }); + ``` + +### 7.2 — Consumer rate limiting — NOT DONE (deferred) + +**File to modify**: `packages/trigger-sdk/src/v3/shared.ts` + +Tasks: +- [ ] Extend task with per-event rate limit: + ```typescript + task({ + on: orderCreated, + rateLimit: { limit: 100, window: "1m" }, + run: async (payload) => { ... }, + }); + ``` +- [ ] Implement as queue with rate limit (reuse concurrency limits infra) +- [ ] Events that exceed the rate are enqueued (not lost), processed when capacity is available + +### 7.3 — Backpressure detection + metrics — NOT DONE (deferred) + +**New file**: `apps/webapp/app/v3/services/events/backpressureMonitor.server.ts` + +Tasks: +- [ ] Monitor lag per consumer: `pendingRuns = publishedEvents - processedEvents` +- [ ] Metrics in ClickHouse: + - `event_publish_rate` per type + - `event_consume_rate` per consumer + - `event_consumer_lag` (difference) +- [ ] Alerts when lag exceeds configurable threshold +- [ ] Expose metrics in API: `GET /api/v1/events/:eventId/metrics` + +--- + +## Phase 8: Observability + Developer Experience — PARTIAL + +> **Goal**: Dashboard, CLI, full traceability, documentation. +> **Requires**: Phases 0-7 (gradual, can start earlier) + +### 8.1 — Event stats API + SDK validate — DONE + +Tasks: +- [x] ClickHouse `event_counts_v1` query builder +- [x] `GET /api/v1/events/:eventId/stats` endpoint (time-bucketed metrics, periods: 1h/6h/24h/7d/30d) +- [x] `getEventStats()` API client method +- [x] SDK `validate()` method for pre-flight payload validation + +### 8.2 — Trace propagation — NOT DONE (deferred) + +**File to modify**: `apps/webapp/app/v3/services/events/publishEvent.server.ts` + +Tasks: +- [ ] Propagate `traceId` from publisher to all consumer runs +- [ ] Add span attribute `trigger.event.id` and `trigger.event.type` to each run +- [ ] Add `sourceEventId` to TaskRun metadata +- [ ] In run dashboard: show "Triggered by event: order.created" +- [ ] In event dashboard: show all runs it generated + +### 8.3 — Events dashboard (webapp) — NOT DONE (deferred) + +**New files in**: `apps/webapp/app/routes/` + +Tasks: +- [ ] Event list view: `/orgs/:orgSlug/projects/:projectSlug/events` + - List of EventDefinitions with stats (publish count, last published, subscriber count) +- [ ] Event detail view: `.../events/:eventSlug` + - Schema (formatted) + - List of subscribers (tasks) + - Recent publication history (from ClickHouse) + - Metrics: publish rate, consumer lag +- [ ] DLQ view: `.../events/dlq` + - List of dead letter events, filterable by type/status + - Actions: retry, discard, retry all +- [ ] Corresponding presenters in `apps/webapp/app/v3/presenters/` + +### 8.4 — CLI commands — NOT DONE (deferred) + +**File to modify**: `packages/cli-v3/src/commands/` + +Tasks: +- [ ] `trigger events list` — list project events +- [ ] `trigger events publish --payload '{...}'` — publish from CLI +- [ ] `trigger events history --from --to` — view history +- [ ] `trigger events replay --from --to` — replay +- [ ] `trigger events dlq list` — view dead letter queue +- [ ] `trigger events dlq retry ` — retry DLQ item + +### 8.5 — SDK helpers and DX — NOT DONE (deferred) + +**File to modify**: `packages/trigger-sdk/src/v3/events.ts` + +Tasks: +- [ ] Helper for local testing: + ```typescript + import { testEvent } from "@trigger.dev/sdk/testing"; + + // In tests + const result = await testEvent(orderCreated, { orderId: "123", amount: 50 }); + expect(result.runs).toHaveLength(2); + ``` +- [ ] Full type inference: consumer payload typed from event schema +- [ ] Descriptive error messages when schema validation fails +- [ ] Complete JSDoc on all public functions + +### 8.6 — Documentation — NOT DONE (deferred) + +**New files in**: `rules/` (next version) + +Tasks: +- [ ] Event system documentation for SDK rules: + - `events-basic.md` — define events, publish, subscribe + - `events-advanced.md` — filters, wildcards, ordering, consumer groups + - `events-reliability.md` — DLQ, replay, idempotency + - `events-patterns.md` — common patterns (saga, CQRS, event sourcing) +- [ ] Update `.claude/skills/trigger-dev-tasks/SKILL.md` with event examples +- [ ] Update `manifest.json` with new version + +### 8.7 — Reference project — NOT DONE (deferred) + +**New directory**: `references/event-system/` + +Tasks: +- [ ] Reference project demonstrating: + - Definition of multiple events + - Tasks subscribed with filters + - Publish from a task + - Publish-and-wait pattern + - DLQ handler +- [ ] Use as manual testing project (similar to hello-world) + --- ## Phase dependencies @@ -100,13 +468,26 @@ Phase 0 (Core) ─────┬── Phase 1 (Schema Registry) └── Phase 8 (DX) ← gradual, can start with Phase 0 ``` +Phases 1-7 are mostly independent of each other (all depend on Phase 0). +Phase 4 (DLQ) benefits from Phase 3 (persistence) but can work without it. +Phase 8 (DX) is built incrementally with each phase. + --- -## Key files created +## Key files to create/modify (summary) -### Services +### New files | File | Phase | |------|-------| +| `packages/trigger-sdk/src/v3/events.ts` | 0 | +| `packages/core/src/v3/events/schemaUtils.ts` | 1 | +| `packages/core/src/v3/events/filterEvaluator.ts` | 2 | +| `apps/webapp/app/routes/api.v1.events.$eventId.publish.ts` | 0 | +| `apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts` | 0 | +| `apps/webapp/app/routes/api.v1.events.$eventId.history.ts` | 3 | +| `apps/webapp/app/routes/api.v1.events.$eventId.replay.ts` | 3 | +| `apps/webapp/app/routes/api.v1.events.ts` | 1 | +| `apps/webapp/app/routes/api.v1.events.dlq.ts` | 4 | | `apps/webapp/app/v3/services/events/publishEvent.server.ts` | 0 | | `apps/webapp/app/v3/services/events/schemaRegistry.server.ts` | 1 | | `apps/webapp/app/v3/services/events/deadLetterService.server.ts` | 4 | @@ -115,35 +496,41 @@ Phase 0 (Core) ─────┬── Phase 1 (Schema Registry) | `apps/webapp/app/v3/services/events/eventRateLimiter.server.ts` | 7 | | `apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts` | 7 | | `apps/webapp/app/v3/services/events/eventLogWriter.server.ts` | 3 | - -### API Routes -| File | Phase | -|------|-------| -| `apps/webapp/app/routes/api.v1.events.ts` | 1 | -| `apps/webapp/app/routes/api.v1.events.$eventId.ts` | 1 | -| `apps/webapp/app/routes/api.v1.events.$eventId.schema.ts` | 1 | -| `apps/webapp/app/routes/api.v1.events.$eventId.publish.ts` | 0 | -| `apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts` | 0 | | `apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts` | 6 | -| `apps/webapp/app/routes/api.v1.events.$eventId.history.ts` | 3 | -| `apps/webapp/app/routes/api.v1.events.$eventId.replay.ts` | 3 | | `apps/webapp/app/routes/api.v1.events.$eventId.stats.ts` | 8 | -| `apps/webapp/app/routes/api.v1.events.dlq.ts` | 4 | | `apps/webapp/app/routes/api.v1.events.dlq.$id.retry.ts` | 4 | | `apps/webapp/app/routes/api.v1.events.dlq.$id.discard.ts` | 4 | | `apps/webapp/app/routes/api.v1.events.dlq.retry-all.ts` | 4 | +| `internal-packages/clickhouse/schema/021_event_log_v1.sql` | 3 | +| `internal-packages/clickhouse/schema/022_event_counts_mv_v1.sql` | 3 | +| `internal-packages/clickhouse/src/eventLog.ts` | 3 | +| `internal-packages/clickhouse/src/eventCounts.ts` | 8 | +| `apps/webapp/app/v3/services/events/backpressureMonitor.server.ts` | 7 (not done) | +| `references/event-system/` | 8 (not done) | -### SDK / Core +### Files to modify | File | Phase | |------|-------| -| `packages/trigger-sdk/src/v3/events.ts` | 0 | -| `packages/core/src/v3/events/filterEvaluator.ts` | 2 | -| `packages/core/src/v3/events/patternMatcher.ts` | 2 | -| `internal-packages/clickhouse/src/eventLog.ts` | 3 | -| `internal-packages/clickhouse/src/eventCounts.ts` | 8 | +| `packages/trigger-sdk/src/v3/index.ts` | 0 | +| `packages/trigger-sdk/src/v3/shared.ts` | 0, 2, 5 | +| `packages/core/src/v3/schemas/resources.ts` | 0 | +| `packages/core/src/v3/schemas/schemas.ts` | 0, 7 | +| `packages/core/src/v3/schemas/api.ts` | 3, 4, 6, 8 | +| `packages/core/src/v3/resource-catalog/catalog.ts` | 0, 7 | +| `packages/core/src/v3/resource-catalog/standardResourceCatalog.ts` | 0, 7 | +| `packages/core/src/v3/apiClient/index.ts` | 0, 3, 4, 6, 8 | +| `packages/core/src/v3/runtime/sharedRuntimeManager.ts` | 6 | +| `packages/core/src/v3/runtime/noopRuntimeManager.ts` | 6 | +| `packages/core/src/v3/index.ts` | 6 | +| `internal-packages/database/prisma/schema.prisma` | 0, 1, 4, 7 | +| `internal-packages/clickhouse/src/index.ts` | 3, 8 | +| `apps/webapp/app/v3/services/createBackgroundWorker.server.ts` | 0, 7 | +| `apps/webapp/app/v3/services/finalizeTaskRun.server.ts` | 4 | ### Tests | File | Tests | |------|-------| | `apps/webapp/test/engine/publishEvent.test.ts` | 24 integration tests | | `apps/webapp/test/engine/eventRateLimiter.test.ts` | 11 unit tests | +| Core filter/pattern tests | 58 + 30 unit tests | +| Core SchemaRegistryService tests | 12 unit tests | From 9745d2965329e8ad9b43fba6c89de381dc74c887 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sat, 28 Feb 2026 17:49:25 -0800 Subject: [PATCH 41/65] chore: mark completed tasks with [x] in roadmap based on actual implementation Co-Authored-By: Claude Opus 4.6 --- .../memory/pubsub-roadmap.md | 32 ++++++++++--------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md index aac2b270ce5..399faed6012 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md @@ -24,10 +24,10 @@ First-class pub/sub event system within Trigger.dev that enables: | 5 | Ordering + Consumer Groups | DONE | | 6 | Publish-and-Wait (fan-out/fan-in) | DONE | | 7 | Rate Limiting + Backpressure | DONE (publish-side only) | -| 8 | Observability + Developer Experience | DONE (API only, no UI/CLI/docs) | -| 9.1 | Redis-backed rate limiter | DONE | -| 9.2 | Hash-based consumer groups | DONE | -| 9.3 | Integration tests verified | DONE | +| 8 | Observability + Developer Experience | PARTIAL (API + types done, no UI/CLI/docs) | +| 9.1 | Redis-backed rate limiter | DONE (`81c09cda5`) | +| 9.2 | Hash-based consumer groups | DONE (`81c09cda5`) | +| 9.3 | Integration tests verified | DONE (24/24 pass) | | 9.4 | Dashboard UI, CLI, docs, reference project | NOT STARTED | | 9.5 | Consumer-side rate limiting + backpressure | NOT STARTED | @@ -370,16 +370,17 @@ Tasks: - [x] `getEventStats()` API client method - [x] SDK `validate()` method for pre-flight payload validation -### 8.2 — Trace propagation — NOT DONE (deferred) +### 8.2 — Trace propagation — PARTIAL -**File to modify**: `apps/webapp/app/v3/services/events/publishEvent.server.ts` +**File modified**: `apps/webapp/app/v3/services/events/publishEvent.server.ts` Tasks: -- [ ] Propagate `traceId` from publisher to all consumer runs -- [ ] Add span attribute `trigger.event.id` and `trigger.event.type` to each run -- [ ] Add `sourceEventId` to TaskRun metadata -- [ ] In run dashboard: show "Triggered by event: order.created" -- [ ] In event dashboard: show all runs it generated +- [x] Span attributes on publish: `eventSlug`, `eventDefinitionId`, `subscriberCount`, `matchingSubscriberCount`, `filteredOutCount`, `consumerGroupSkipped`, `rateLimited`, `orderingKey` +- [x] `$$event` metadata on each triggered run: `{ eventId, eventType, sourceEventId }` — used by DLQ for identification +- [ ] Propagate `traceId` from publisher to all consumer runs (currently inherits from span context) +- [ ] Named span attributes `trigger.event.id` and `trigger.event.type` (currently uses `eventSlug`) +- [ ] In run dashboard: show "Triggered by event: order.created" (UI work) +- [ ] In event dashboard: show all runs it generated (UI work) ### 8.3 — Events dashboard (webapp) — NOT DONE (deferred) @@ -410,11 +411,14 @@ Tasks: - [ ] `trigger events dlq list` — view dead letter queue - [ ] `trigger events dlq retry ` — retry DLQ item -### 8.5 — SDK helpers and DX — NOT DONE (deferred) +### 8.5 — SDK helpers and DX — PARTIAL -**File to modify**: `packages/trigger-sdk/src/v3/events.ts` +**File modified**: `packages/trigger-sdk/src/v3/events.ts` Tasks: +- [x] SDK `validate()` method for pre-flight payload validation +- [x] Full type inference: consumer payload typed from event schema (`TaskOptionsWithEvent<..., TPayload>` flows from `EventSource`) +- [x] Descriptive error messages when schema validation fails (422 with field paths from ajv) - [ ] Helper for local testing: ```typescript import { testEvent } from "@trigger.dev/sdk/testing"; @@ -423,8 +427,6 @@ Tasks: const result = await testEvent(orderCreated, { orderId: "123", amount: 50 }); expect(result.runs).toHaveLength(2); ``` -- [ ] Full type inference: consumer payload typed from event schema -- [ ] Descriptive error messages when schema validation fails - [ ] Complete JSDoc on all public functions ### 8.6 — Documentation — NOT DONE (deferred) From 9bd85f699e14bade984be3e57ff4bbaa492751b6 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sat, 28 Feb 2026 21:39:17 -0800 Subject: [PATCH 42/65] =?UTF-8?q?feat(events):=20phase=208.6=20=E2=80=94?= =?UTF-8?q?=20event=20system=20documentation=20+=20skill=20update?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add rules/4.4.0/events.md covering: event definition, publish/subscribe, content-based filters, wildcard patterns, publishAndWait, ordering keys, consumer groups, batch publish, validation, DLQ, and replay. Update manifest.json to v4.4.0 with events option. Update SKILL.md with events section and reference. Co-Authored-By: Claude Opus 4.6 --- .claude/skills/trigger-dev-tasks/SKILL.md | 39 ++++ rules/4.4.0/events.md | 247 ++++++++++++++++++++++ rules/manifest.json | 58 ++++- 3 files changed, 343 insertions(+), 1 deletion(-) create mode 100644 rules/4.4.0/events.md diff --git a/.claude/skills/trigger-dev-tasks/SKILL.md b/.claude/skills/trigger-dev-tasks/SKILL.md index 791c22c27ed..3be21348e45 100644 --- a/.claude/skills/trigger-dev-tasks/SKILL.md +++ b/.claude/skills/trigger-dev-tasks/SKILL.md @@ -180,6 +180,44 @@ for (const result of results) { | large-1x | 4 | 8GB | | large-2x | 8 | 16GB | +## Events (Pub/Sub) + +Define typed events, subscribe tasks, publish with automatic fan-out: + +```ts +import { event, task } from "@trigger.dev/sdk"; +import { z } from "zod"; + +// Define event with typed schema +export const orderCreated = event({ + id: "order.created", + schema: z.object({ + orderId: z.string(), + amount: z.number(), + customerId: z.string(), + }), +}); + +// Subscribe task — payload typed from schema +export const sendEmail = task({ + id: "send-order-email", + on: orderCreated, + run: async (payload) => { + await sendEmail(payload.customerId, `Order ${payload.orderId}`); + }, +}); + +// Publish event — triggers all subscribers +await orderCreated.publish({ orderId: "123", amount: 500, customerId: "abc" }); + +// Publish and wait for all subscribers to finish +const result = await orderCreated.publishAndWait(payload); // inside task.run() only +``` + +**Features:** content-based filters (`filter: { amount: [{ $gte: 1000 }] }`), wildcard patterns (`events.match("order.*")`), ordering keys, consumer groups, batch publish, rate limiting, DLQ, replay. + +See `events.md` for full documentation. + ## Design Principles 1. **Break complex workflows into subtasks** that can be independently retried and made idempotent @@ -198,3 +236,4 @@ For detailed documentation on specific topics, read these files: - `scheduled-tasks.md` - Cron schedules, declarative and imperative - `realtime.md` - Real-time subscriptions, streams, React hooks - `config.md` - trigger.config.ts, build extensions (Prisma, Playwright, FFmpeg, etc.) +- `events.md` - Pub/sub events, fan-out, filters, patterns, ordering, consumer groups, DLQ diff --git a/rules/4.4.0/events.md b/rules/4.4.0/events.md new file mode 100644 index 00000000000..5d07d1e51ea --- /dev/null +++ b/rules/4.4.0/events.md @@ -0,0 +1,247 @@ +# Trigger.dev Events (v4) + +**Pub/sub event system for fan-out, event-driven workflows, and task coordination** + +## Defining Events + +```ts +import { event } from "@trigger.dev/sdk"; +import { z } from "zod"; + +// Event with typed schema +export const orderCreated = event({ + id: "order.created", + schema: z.object({ + orderId: z.string(), + amount: z.number(), + customerId: z.string(), + }), +}); + +// Event without schema (payload is `unknown`) +export const systemAlert = event({ + id: "system.alert", + description: "Generic system alert", +}); + +// Event with rate limiting +export const userActivity = event({ + id: "user.activity", + schema: z.object({ userId: z.string(), action: z.string() }), + rateLimit: { + limit: 500, + window: "1m", // "10s", "1m", "1h" + }, +}); +``` + +> Events MUST be exported from your task files. The schema supports Zod, Valibot, ArkType, and any schema library compatible with `@standard-schema`. + +## Subscribing Tasks to Events + +```ts +import { task } from "@trigger.dev/sdk"; +import { orderCreated } from "./events"; + +// Subscribe a task to an event — payload is typed from schema +export const sendOrderEmail = task({ + id: "send-order-email", + on: orderCreated, + run: async (payload) => { + // payload is typed: { orderId: string, amount: number, customerId: string } + await sendEmail(payload.customerId, `Order ${payload.orderId} confirmed!`); + }, +}); + +// Multiple tasks can subscribe to the same event (fan-out) +export const updateInventory = task({ + id: "update-inventory", + on: orderCreated, + run: async (payload) => { + await adjustStock(payload.orderId); + }, +}); +``` + +## Publishing Events + +```ts +import { orderCreated } from "./events"; + +// From inside a task +export const checkoutTask = task({ + id: "checkout", + run: async (payload: { orderId: string; amount: number; customerId: string }) => { + // Process checkout... + + // Publish event — triggers all subscribed tasks + const result = await orderCreated.publish({ + orderId: payload.orderId, + amount: payload.amount, + customerId: payload.customerId, + }); + + console.log(`Published ${result.id}, triggered ${result.runs.length} tasks`); + }, +}); +``` + +### Publish Options + +```ts +await orderCreated.publish(payload, { + idempotencyKey: `order-${orderId}`, // Prevent duplicate publishes + delay: "30s", // Delay before triggering subscribers + tags: ["priority", "vip"], // Tags on generated runs + metadata: { source: "checkout" }, // Metadata on generated runs + orderingKey: customerId, // Sequential processing per key +}); +``` + +### Batch Publish + +```ts +const results = await orderCreated.batchPublish([ + { payload: { orderId: "1", amount: 50, customerId: "a" } }, + { payload: { orderId: "2", amount: 100, customerId: "b" }, options: { tags: ["bulk"] } }, +]); +``` + +## Content-based Filtering + +Subscribe only to events that match a filter: + +```ts +export const highValueHandler = task({ + id: "high-value-order", + on: orderCreated, + filter: { + amount: [{ $gte: 1000 }], + }, + run: async (payload) => { + // Only receives orders with amount >= 1000 + await notifyVipTeam(payload); + }, +}); +``` + +## Wildcard Pattern Subscriptions + +Subscribe to multiple event types using wildcard patterns: + +```ts +import { events, task } from "@trigger.dev/sdk"; + +// * matches exactly one segment +export const orderHandler = task({ + id: "order-handler", + on: events.match("order.*"), // matches order.created, order.updated, etc. + run: async (payload) => { + // payload is `unknown` for pattern subscriptions + }, +}); + +// # matches zero or more segments +export const allHandler = task({ + id: "audit-logger", + on: events.match("order.#"), // matches order, order.created, order.status.changed + run: async (payload) => { + await logAuditEvent(payload); + }, +}); +``` + +## Publish and Wait (Fan-out / Fan-in) + +Publish an event and wait for all subscriber tasks to complete: + +```ts +export const orchestrator = task({ + id: "orchestrator", + run: async (payload) => { + const result = await orderCreated.publishAndWait({ + orderId: "123", + amount: 500, + customerId: "abc", + }); + + // result.results is Record + for (const [taskSlug, runResult] of Object.entries(result.results)) { + console.log(`${taskSlug}: ${runResult.ok ? "success" : "failed"}`); + } + }, +}); +``` + +> `publishAndWait` can only be called from inside a `task.run()`. It blocks until all subscribers finish. + +## Ordering Keys + +Ensure events with the same key are processed sequentially per consumer: + +```ts +await orderCreated.publish(payload, { + orderingKey: payload.customerId, // All events for same customer processed in order +}); +``` + +## Consumer Groups + +Within a consumer group, only one task receives each event (load balancing): + +```ts +export const workerA = task({ + id: "order-processor-a", + on: orderCreated, + consumerGroup: "order-processors", + run: async (payload) => { /* ... */ }, +}); + +export const workerB = task({ + id: "order-processor-b", + on: orderCreated, + consumerGroup: "order-processors", + run: async (payload) => { /* ... */ }, +}); + +// Each published event goes to either workerA OR workerB, not both +``` + +## Validation + +Pre-validate a payload before publishing: + +```ts +try { + const validated = await orderCreated.validate({ orderId: "123", amount: -1 }); +} catch (error) { + console.error("Invalid payload:", error); +} +``` + +## Dead Letter Queue + +Events that fail after all retries are captured in a DLQ. The DLQ is managed via API: + +- `GET /api/v1/events/dlq` — list failed events +- `POST /api/v1/events/dlq/:id/retry` — retry a failed event +- `POST /api/v1/events/dlq/:id/discard` — discard a failed event +- `POST /api/v1/events/dlq/retry-all` — retry all pending failures + +## Event History & Replay + +Published events are persisted and can be replayed: + +- `GET /api/v1/events/:eventId/history` — view event history +- `POST /api/v1/events/:eventId/replay` — replay events in a date range + +## Best Practices + +- **Schema everything**: Define schemas for type safety and validation at publish time +- **Idempotency keys**: Use for critical events to prevent duplicate processing +- **Ordering keys**: Use when event order matters per entity (e.g., per customer) +- **Consumer groups**: Use when you want load balancing instead of fan-out +- **Filters**: Use to reduce unnecessary task invocations +- **Rate limits**: Configure per-event to protect downstream systems +- **publishAndWait**: Use for orchestration patterns (saga, scatter-gather) +- **DLQ**: Monitor and retry failed events, don't let them accumulate diff --git a/rules/manifest.json b/rules/manifest.json index 64d9a86139e..8f7e4a003ae 100644 --- a/rules/manifest.json +++ b/rules/manifest.json @@ -1,7 +1,7 @@ { "name": "trigger.dev", "description": "Trigger.dev coding agent rules", - "currentVersion": "4.3.0", + "currentVersion": "4.4.0", "versions": { "4.0.0": { "options": [ @@ -149,6 +149,62 @@ "installStrategy": "claude-code-subagent" } ] + }, + "4.4.0": { + "options": [ + { + "name": "basic", + "title": "Basic tasks", + "label": "Only the most important rules for writing basic Trigger.dev tasks", + "path": "4.3.0/basic-tasks.md", + "tokens": 1400 + }, + { + "name": "advanced-tasks", + "title": "Advanced tasks", + "label": "Comprehensive rules to help you write advanced Trigger.dev tasks", + "path": "4.3.0/advanced-tasks.md", + "tokens": 3500 + }, + { + "name": "events", + "title": "Events (Pub/Sub)", + "label": "Define events, subscribe tasks, publish with fan-out, filtering, ordering, and DLQ", + "path": "4.4.0/events.md", + "tokens": 2500 + }, + { + "name": "config", + "title": "Configuring Trigger.dev", + "label": "Configure your Trigger.dev project with a trigger.config.ts file", + "path": "4.1.0/config.md", + "tokens": 1900, + "applyTo": "**/trigger.config.ts" + }, + { + "name": "scheduled-tasks", + "title": "Scheduled Tasks", + "label": "How to write and use scheduled Trigger.dev tasks", + "path": "4.0.0/scheduled-tasks.md", + "tokens": 780 + }, + { + "name": "realtime", + "title": "Realtime", + "label": "How to use realtime in your Trigger.dev tasks and your frontend", + "path": "4.1.0/realtime.md", + "tokens": 1700 + }, + { + "name": "claude-code-agent", + "title": "Claude Code Agent", + "label": "An expert Trigger.dev developer as a Claude Code subagent", + "path": "4.0.0/claude-code-agent.md", + "tokens": 2700, + "client": "claude-code", + "installStrategy": "claude-code-subagent" + } + ] } } } \ No newline at end of file From c7c9119796ea05dfe71bdae9869e9cae43846256 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sat, 28 Feb 2026 21:40:15 -0800 Subject: [PATCH 43/65] =?UTF-8?q?feat(events):=20phase=208.7=20=E2=80=94?= =?UTF-8?q?=20event-system=20reference=20project?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Demo project showcasing all event system features: - events.ts: event definitions with schemas, rate limits - basic-subscribers.ts: fan-out to multiple tasks - filtered-subscribers.ts: content-based filtering - pattern-subscribers.ts: wildcard patterns (*, #) - publish-and-wait.ts: scatter-gather orchestration - consumer-groups.ts: load-balanced event handling - ordering.ts: sequential processing per entity Co-Authored-By: Claude Opus 4.6 --- references/event-system/.gitignore | 2 + references/event-system/package.json | 17 +++++++ references/event-system/src/index.ts | 1 + .../src/trigger/basic-subscribers.ts | 51 +++++++++++++++++++ .../src/trigger/consumer-groups.ts | 44 ++++++++++++++++ references/event-system/src/trigger/events.ts | 38 ++++++++++++++ .../src/trigger/filtered-subscribers.ts | 35 +++++++++++++ .../event-system/src/trigger/ordering.ts | 39 ++++++++++++++ .../src/trigger/pattern-subscribers.ts | 24 +++++++++ .../src/trigger/publish-and-wait.ts | 45 ++++++++++++++++ references/event-system/trigger.config.ts | 18 +++++++ references/event-system/tsconfig.json | 14 +++++ 12 files changed, 328 insertions(+) create mode 100644 references/event-system/.gitignore create mode 100644 references/event-system/package.json create mode 100644 references/event-system/src/index.ts create mode 100644 references/event-system/src/trigger/basic-subscribers.ts create mode 100644 references/event-system/src/trigger/consumer-groups.ts create mode 100644 references/event-system/src/trigger/events.ts create mode 100644 references/event-system/src/trigger/filtered-subscribers.ts create mode 100644 references/event-system/src/trigger/ordering.ts create mode 100644 references/event-system/src/trigger/pattern-subscribers.ts create mode 100644 references/event-system/src/trigger/publish-and-wait.ts create mode 100644 references/event-system/trigger.config.ts create mode 100644 references/event-system/tsconfig.json diff --git a/references/event-system/.gitignore b/references/event-system/.gitignore new file mode 100644 index 00000000000..ac7321418ff --- /dev/null +++ b/references/event-system/.gitignore @@ -0,0 +1,2 @@ +node_modules +.trigger diff --git a/references/event-system/package.json b/references/event-system/package.json new file mode 100644 index 00000000000..c5ee954a744 --- /dev/null +++ b/references/event-system/package.json @@ -0,0 +1,17 @@ +{ + "name": "references-event-system", + "private": true, + "type": "module", + "scripts": { + "dev": "trigger dev", + "deploy": "trigger deploy" + }, + "devDependencies": { + "trigger.dev": "workspace:*" + }, + "dependencies": { + "@trigger.dev/build": "workspace:*", + "@trigger.dev/sdk": "workspace:*", + "zod": "3.25.76" + } +} diff --git a/references/event-system/src/index.ts b/references/event-system/src/index.ts new file mode 100644 index 00000000000..cb0ff5c3b54 --- /dev/null +++ b/references/event-system/src/index.ts @@ -0,0 +1 @@ +export {}; diff --git a/references/event-system/src/trigger/basic-subscribers.ts b/references/event-system/src/trigger/basic-subscribers.ts new file mode 100644 index 00000000000..87e4a1ccd9c --- /dev/null +++ b/references/event-system/src/trigger/basic-subscribers.ts @@ -0,0 +1,51 @@ +import { task, logger } from "@trigger.dev/sdk"; +import { orderCreated, orderShipped } from "./events"; + +// ---- Basic Fan-out: Multiple tasks subscribe to the same event ---- + +/** Send a confirmation email when an order is created */ +export const sendConfirmationEmail = task({ + id: "send-confirmation-email", + on: orderCreated, + run: async (payload) => { + logger.info("Sending confirmation email", { + orderId: payload.orderId, + customerId: payload.customerId, + }); + + // Simulate email sending + return { sent: true, to: payload.customerId }; + }, +}); + +/** Update inventory when an order is created */ +export const updateInventory = task({ + id: "update-inventory", + on: orderCreated, + run: async (payload) => { + logger.info("Updating inventory", { + orderId: payload.orderId, + itemCount: payload.items.length, + }); + + for (const item of payload.items) { + logger.info(`Adjusting stock: ${item.sku} -${item.qty}`); + } + + return { adjusted: payload.items.length }; + }, +}); + +/** Notify customer when order is shipped */ +export const notifyShipped = task({ + id: "notify-shipped", + on: orderShipped, + run: async (payload) => { + logger.info("Order shipped notification", { + orderId: payload.orderId, + tracking: payload.trackingNumber, + }); + + return { notified: true }; + }, +}); diff --git a/references/event-system/src/trigger/consumer-groups.ts b/references/event-system/src/trigger/consumer-groups.ts new file mode 100644 index 00000000000..49781a2bf70 --- /dev/null +++ b/references/event-system/src/trigger/consumer-groups.ts @@ -0,0 +1,44 @@ +import { task, logger } from "@trigger.dev/sdk"; +import { userActivity } from "./events"; + +// ---- Consumer Groups: Load-balanced event handling ---- +// Within a consumer group, only ONE task receives each event. + +export const activityProcessorA = task({ + id: "activity-processor-a", + on: userActivity, + consumerGroup: "activity-processors", + run: async (payload) => { + logger.info("Processor A handling activity", { + userId: payload.userId, + action: payload.action, + }); + return { processor: "A", userId: payload.userId }; + }, +}); + +export const activityProcessorB = task({ + id: "activity-processor-b", + on: userActivity, + consumerGroup: "activity-processors", + run: async (payload) => { + logger.info("Processor B handling activity", { + userId: payload.userId, + action: payload.action, + }); + return { processor: "B", userId: payload.userId }; + }, +}); + +// This task is NOT in the consumer group — it receives ALL events +export const activityAnalytics = task({ + id: "activity-analytics", + on: userActivity, + run: async (payload) => { + logger.info("Analytics: recording activity", { + userId: payload.userId, + action: payload.action, + }); + return { recorded: true }; + }, +}); diff --git a/references/event-system/src/trigger/events.ts b/references/event-system/src/trigger/events.ts new file mode 100644 index 00000000000..f6815111215 --- /dev/null +++ b/references/event-system/src/trigger/events.ts @@ -0,0 +1,38 @@ +import { event } from "@trigger.dev/sdk"; +import { z } from "zod"; + +// ---- Event Definitions ---- + +/** Published when an order is placed */ +export const orderCreated = event({ + id: "order.created", + schema: z.object({ + orderId: z.string(), + amount: z.number(), + customerId: z.string(), + items: z.array(z.object({ sku: z.string(), qty: z.number() })), + }), +}); + +/** Published when an order is shipped */ +export const orderShipped = event({ + id: "order.shipped", + schema: z.object({ + orderId: z.string(), + trackingNumber: z.string(), + }), +}); + +/** Published for any user action (rate-limited) */ +export const userActivity = event({ + id: "user.activity", + schema: z.object({ + userId: z.string(), + action: z.string(), + timestamp: z.string(), + }), + rateLimit: { + limit: 100, + window: "1m", + }, +}); diff --git a/references/event-system/src/trigger/filtered-subscribers.ts b/references/event-system/src/trigger/filtered-subscribers.ts new file mode 100644 index 00000000000..d0eac3ad517 --- /dev/null +++ b/references/event-system/src/trigger/filtered-subscribers.ts @@ -0,0 +1,35 @@ +import { task, logger } from "@trigger.dev/sdk"; +import { orderCreated } from "./events"; + +// ---- Content-based Filtering: Only receive events that match ---- + +/** Only handles high-value orders (amount >= 1000) */ +export const highValueOrderHandler = task({ + id: "high-value-order", + on: orderCreated, + filter: { + amount: [{ $gte: 1000 }], + }, + run: async (payload) => { + logger.info("High-value order detected!", { + orderId: payload.orderId, + amount: payload.amount, + }); + + // Alert VIP team, apply special handling, etc. + return { flagged: true, amount: payload.amount }; + }, +}); + +/** Only handles orders from a specific customer */ +export const vipCustomerHandler = task({ + id: "vip-customer-handler", + on: orderCreated, + filter: { + customerId: ["customer-vip-001", "customer-vip-002"], + }, + run: async (payload) => { + logger.info("VIP customer order", { customerId: payload.customerId }); + return { vip: true }; + }, +}); diff --git a/references/event-system/src/trigger/ordering.ts b/references/event-system/src/trigger/ordering.ts new file mode 100644 index 00000000000..8305621a89e --- /dev/null +++ b/references/event-system/src/trigger/ordering.ts @@ -0,0 +1,39 @@ +import { task, logger } from "@trigger.dev/sdk"; +import { orderCreated } from "./events"; + +// ---- Ordering Keys: Sequential processing per entity ---- + +/** + * This publisher uses ordering keys to ensure events for the same customer + * are processed sequentially (no concurrent runs per customer). + */ +export const placeOrder = task({ + id: "place-order", + run: async (payload: { + orderId: string; + amount: number; + customerId: string; + }) => { + logger.info("Publishing order with ordering key", { + orderId: payload.orderId, + customerId: payload.customerId, + }); + + const result = await orderCreated.publish( + { + orderId: payload.orderId, + amount: payload.amount, + customerId: payload.customerId, + items: [{ sku: "ITEM-001", qty: 1 }], + }, + { + // Events for the same customer are processed one at a time + orderingKey: payload.customerId, + // Prevent duplicate publishes + idempotencyKey: `order-${payload.orderId}`, + } + ); + + return { eventId: result.id, runs: result.runs.length }; + }, +}); diff --git a/references/event-system/src/trigger/pattern-subscribers.ts b/references/event-system/src/trigger/pattern-subscribers.ts new file mode 100644 index 00000000000..ada32778005 --- /dev/null +++ b/references/event-system/src/trigger/pattern-subscribers.ts @@ -0,0 +1,24 @@ +import { events, task, logger } from "@trigger.dev/sdk"; + +// ---- Wildcard Pattern Subscriptions ---- + +/** Catches all order.* events (order.created, order.shipped, etc.) */ +export const orderAuditLog = task({ + id: "order-audit-log", + on: events.match("order.*"), + run: async (payload) => { + // payload is `unknown` for pattern subscriptions + logger.info("Order event received", { payload }); + return { logged: true }; + }, +}); + +/** Catches all user.# events (user.activity, user.profile.updated, etc.) */ +export const userEventTracker = task({ + id: "user-event-tracker", + on: events.match("user.#"), + run: async (payload) => { + logger.info("User event tracked", { payload }); + return { tracked: true }; + }, +}); diff --git a/references/event-system/src/trigger/publish-and-wait.ts b/references/event-system/src/trigger/publish-and-wait.ts new file mode 100644 index 00000000000..3c3bb157703 --- /dev/null +++ b/references/event-system/src/trigger/publish-and-wait.ts @@ -0,0 +1,45 @@ +import { task, logger } from "@trigger.dev/sdk"; +import { orderCreated, orderShipped } from "./events"; + +// ---- Publish-and-Wait: Fan-out then collect results ---- + +/** + * Orchestrator task that publishes an event and waits for all subscribers + * to finish before proceeding (scatter-gather pattern). + */ +export const processOrder = task({ + id: "process-order", + run: async (payload: { orderId: string; amount: number; customerId: string }) => { + logger.info("Starting order processing", { orderId: payload.orderId }); + + // Publish and wait for ALL subscribers (sendConfirmationEmail, updateInventory, etc.) + const result = await orderCreated.publishAndWait({ + orderId: payload.orderId, + amount: payload.amount, + customerId: payload.customerId, + items: [{ sku: "WIDGET-001", qty: 2 }], + }); + + logger.info("All subscribers completed", { + eventId: result.id, + subscriberCount: Object.keys(result.results).length, + }); + + // Check results from each subscriber + for (const [taskSlug, runResult] of Object.entries(result.results)) { + if (runResult.ok) { + logger.info(`${taskSlug}: success`, { output: runResult.output }); + } else { + logger.error(`${taskSlug}: failed`, { error: runResult.error }); + } + } + + // Continue with next step: publish shipped event + await orderShipped.publish({ + orderId: payload.orderId, + trackingNumber: `TRK-${Date.now()}`, + }); + + return { orderId: payload.orderId, status: "completed" }; + }, +}); diff --git a/references/event-system/trigger.config.ts b/references/event-system/trigger.config.ts new file mode 100644 index 00000000000..eaf9227c4e3 --- /dev/null +++ b/references/event-system/trigger.config.ts @@ -0,0 +1,18 @@ +import { defineConfig } from "@trigger.dev/sdk/v3"; + +export default defineConfig({ + compatibilityFlags: ["run_engine_v2"], + project: "proj_event_system_ref", + logLevel: "debug", + maxDuration: 300, + retries: { + enabledInDev: true, + default: { + maxAttempts: 3, + minTimeoutInMs: 1000, + maxTimeoutInMs: 10000, + factor: 2, + randomize: true, + }, + }, +}); diff --git a/references/event-system/tsconfig.json b/references/event-system/tsconfig.json new file mode 100644 index 00000000000..3bb455e5d40 --- /dev/null +++ b/references/event-system/tsconfig.json @@ -0,0 +1,14 @@ +{ + "compilerOptions": { + "target": "ES2023", + "module": "Node16", + "moduleResolution": "Node16", + "esModuleInterop": true, + "strict": true, + "skipLibCheck": true, + "customConditions": ["@triggerdotdev/source"], + "lib": ["DOM", "DOM.Iterable"], + "noEmit": true + }, + "include": ["./src/**/*.ts", "trigger.config.ts"] +} From 8510dbcd715a37fca17643ba9a26427a3728a698 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sat, 28 Feb 2026 21:40:53 -0800 Subject: [PATCH 44/65] =?UTF-8?q?feat(events):=20phase=208.5=20=E2=80=94?= =?UTF-8?q?=20JSDoc=20on=20createEvent=20and=20event()=20with=20examples?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- packages/trigger-sdk/src/v3/events.ts | 45 ++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/packages/trigger-sdk/src/v3/events.ts b/packages/trigger-sdk/src/v3/events.ts index 4578386c0e2..12651877b4e 100644 --- a/packages/trigger-sdk/src/v3/events.ts +++ b/packages/trigger-sdk/src/v3/events.ts @@ -119,17 +119,49 @@ export type EventDefinitionId = // ---- Implementation ---- -// Overload: with schema — payload type is inferred from schema +/** + * Define a typed event that can be published and subscribed to. + * + * When a schema is provided, the payload type is inferred from it and + * validated at both publish time (SDK-side) and server-side (JSON Schema). + * + * @example + * ```ts + * import { event } from "@trigger.dev/sdk"; + * import { z } from "zod"; + * + * // With schema — payload is typed as { orderId: string, amount: number } + * export const orderCreated = event({ + * id: "order.created", + * schema: z.object({ orderId: z.string(), amount: z.number() }), + * }); + * + * // Without schema — payload is `unknown` + * export const systemAlert = event({ id: "system.alert" }); + * + * // Publish + * await orderCreated.publish({ orderId: "123", amount: 500 }); + * + * // Subscribe + * export const handler = task({ + * id: "handle-order", + * on: orderCreated, + * run: async (payload) => { // payload is typed + * console.log(payload.orderId); + * }, + * }); + * ``` + */ export function createEvent( options: EventOptions & { schema: TSchema } ): EventDefinition>; -// Overload: without schema — payload type is unknown +/** @see {@link createEvent} */ export function createEvent( options: EventOptions ): EventDefinition; -// Overload: without schema (no schema field at all) +/** @see {@link createEvent} */ export function createEvent( options: Omit, "schema"> ): EventDefinition; @@ -277,7 +309,12 @@ export function createEvent Date: Sat, 28 Feb 2026 21:43:43 -0800 Subject: [PATCH 45/65] =?UTF-8?q?feat(events):=20phase=208.4=20=E2=80=94?= =?UTF-8?q?=20CLI=20commands=20for=20events?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `trigger events list` and `trigger events publish` commands: - events/index.ts: parent command registration - events/list.ts: list event definitions with subscriber count - events/publish.ts: publish event with JSON payload - apiClient.ts: listEvents() and publishEvent() methods Usage: trigger events list trigger events publish order.created --payload '{"orderId":"123"}' Co-Authored-By: Claude Opus 4.6 --- packages/cli-v3/src/apiClient.ts | 37 ++++++ packages/cli-v3/src/cli/index.ts | 2 + packages/cli-v3/src/commands/events/index.ts | 14 ++ packages/cli-v3/src/commands/events/list.ts | 105 +++++++++++++++ .../cli-v3/src/commands/events/publish.ts | 120 ++++++++++++++++++ 5 files changed, 278 insertions(+) create mode 100644 packages/cli-v3/src/commands/events/index.ts create mode 100644 packages/cli-v3/src/commands/events/list.ts create mode 100644 packages/cli-v3/src/commands/events/publish.ts diff --git a/packages/cli-v3/src/apiClient.ts b/packages/cli-v3/src/apiClient.ts index e099260203f..5b214457d8b 100644 --- a/packages/cli-v3/src/apiClient.ts +++ b/packages/cli-v3/src/apiClient.ts @@ -40,6 +40,8 @@ import { ApiBranchListResponseBody, GenerateRegistryCredentialsResponseBody, RemoteBuildProviderStatusResponseBody, + ListEventsResponseBody, + PublishEventResponseBody, } from "@trigger.dev/core/v3"; import { WorkloadDebugLogRequestBody, @@ -547,6 +549,41 @@ export class CliApiClient { }); } + async listEvents(projectRef: string) { + if (!this.accessToken) { + throw new Error("listEvents: No access token"); + } + + return wrapZodFetch(ListEventsResponseBody, `${this.apiURL}/api/v1/events`, { + method: "GET", + headers: { + ...this.getHeaders(), + "x-trigger-project-ref": projectRef, + }, + }); + } + + async publishEvent(projectRef: string, eventId: string, payload: unknown) { + if (!this.accessToken) { + throw new Error("publishEvent: No access token"); + } + + const encodedEventId = encodeURIComponent(eventId); + + return wrapZodFetch( + PublishEventResponseBody, + `${this.apiURL}/api/v1/events/${encodedEventId}/publish`, + { + method: "POST", + headers: { + ...this.getHeaders(), + "x-trigger-project-ref": projectRef, + }, + body: JSON.stringify({ payload }), + } + ); + } + get dev() { return { config: this.devConfig.bind(this), diff --git a/packages/cli-v3/src/cli/index.ts b/packages/cli-v3/src/cli/index.ts index fc482224e58..8fbd6c6badc 100644 --- a/packages/cli-v3/src/cli/index.ts +++ b/packages/cli-v3/src/cli/index.ts @@ -18,6 +18,7 @@ import { VERSION } from "../version.js"; import { installExitHandler } from "./common.js"; import { configureInstallMcpCommand } from "../commands/install-mcp.js"; import { configureInstallRulesCommand } from "../commands/install-rules.js"; +import { configureEventsCommand } from "../commands/events/index.js"; export const program = new Command(); @@ -42,5 +43,6 @@ configureAnalyzeCommand(program); configureMcpCommand(program); configureInstallMcpCommand(program); configureInstallRulesCommand(program); +configureEventsCommand(program); installExitHandler(); diff --git a/packages/cli-v3/src/commands/events/index.ts b/packages/cli-v3/src/commands/events/index.ts new file mode 100644 index 00000000000..fb55f6ed5b7 --- /dev/null +++ b/packages/cli-v3/src/commands/events/index.ts @@ -0,0 +1,14 @@ +import { Command } from "commander"; +import { configureEventsListCommand } from "./list.js"; +import { configureEventsPublishCommand } from "./publish.js"; + +export function configureEventsCommand(program: Command) { + const events = program + .command("events") + .description("Manage pub/sub events"); + + configureEventsListCommand(events); + configureEventsPublishCommand(events); + + return events; +} diff --git a/packages/cli-v3/src/commands/events/list.ts b/packages/cli-v3/src/commands/events/list.ts new file mode 100644 index 00000000000..8bb5f072a58 --- /dev/null +++ b/packages/cli-v3/src/commands/events/list.ts @@ -0,0 +1,105 @@ +import { Command } from "commander"; +import { z } from "zod"; +import { + CommonCommandOptions, + commonOptions, + handleTelemetry, + wrapCommandAction, +} from "../../cli/common.js"; +import { printInitialBanner } from "../../utilities/initialBanner.js"; +import { isLoggedIn } from "../../utilities/session.js"; +import { loadConfig } from "../../config.js"; +import { resolveLocalEnvVars } from "../../utilities/localEnvVars.js"; +import { CliApiClient } from "../../apiClient.js"; +import { intro, outro } from "@clack/prompts"; +import { spinner } from "../../utilities/windows.js"; +import { logger } from "../../utilities/logger.js"; +import { tryCatch } from "@trigger.dev/core"; + +const EventsListOptions = CommonCommandOptions.extend({ + config: z.string().optional(), + projectRef: z.string().optional(), + envFile: z.string().optional(), +}); + +type EventsListOptions = z.infer; + +export function configureEventsListCommand(program: Command) { + return commonOptions( + program + .command("list") + .description("List all event definitions in the project") + .option("-c, --config ", "The name of the config file") + .option("-p, --project-ref ", "The project ref") + .option("--env-file ", "Path to the .env file") + ).action(async (options) => { + await handleTelemetry(async () => { + await printInitialBanner(false, options.profile); + await eventsListCommand(options); + }); + }); +} + +async function eventsListCommand(options: unknown) { + return await wrapCommandAction( + "eventsListCommand", + EventsListOptions, + options, + async (opts) => { + return await _eventsListCommand(opts); + } + ); +} + +async function _eventsListCommand(options: EventsListOptions) { + intro("Listing event definitions"); + + const envVars = resolveLocalEnvVars(options.envFile); + + const authentication = await isLoggedIn(options.profile); + if (!authentication.ok) { + outro(`Not logged in. Use \`trigger login\` first.`); + return; + } + + const [configError, resolvedConfig] = await tryCatch( + loadConfig({ + overrides: { project: options.projectRef ?? envVars.TRIGGER_PROJECT_REF }, + configFile: options.config, + warn: false, + }) + ); + + if (configError || !resolvedConfig?.project) { + outro("Could not resolve project. Use --project-ref or configure trigger.config.ts."); + return; + } + + const loadingSpinner = spinner(); + loadingSpinner.start("Fetching events..."); + + const apiClient = new CliApiClient(authentication.auth.apiUrl, authentication.auth.accessToken); + const result = await apiClient.listEvents(resolvedConfig.project); + + if (!result.success) { + loadingSpinner.stop("Failed to fetch events"); + logger.error(result.error); + return; + } + + loadingSpinner.stop(`Found ${result.data.data.length} event(s)`); + + if (result.data.data.length === 0) { + outro("No events defined yet. Define events with `event()` in your task files."); + return; + } + + logger.table( + result.data.data.map((evt) => ({ + id: evt.slug, + version: evt.version, + subscribers: String(evt.subscriberCount), + schema: evt.hasSchema ? "yes" : "no", + })) + ); +} diff --git a/packages/cli-v3/src/commands/events/publish.ts b/packages/cli-v3/src/commands/events/publish.ts new file mode 100644 index 00000000000..0732506c318 --- /dev/null +++ b/packages/cli-v3/src/commands/events/publish.ts @@ -0,0 +1,120 @@ +import { Command } from "commander"; +import { z } from "zod"; +import { + CommonCommandOptions, + commonOptions, + handleTelemetry, + wrapCommandAction, +} from "../../cli/common.js"; +import { printInitialBanner } from "../../utilities/initialBanner.js"; +import { isLoggedIn } from "../../utilities/session.js"; +import { loadConfig } from "../../config.js"; +import { resolveLocalEnvVars } from "../../utilities/localEnvVars.js"; +import { CliApiClient } from "../../apiClient.js"; +import { intro, outro } from "@clack/prompts"; +import { spinner } from "../../utilities/windows.js"; +import { logger } from "../../utilities/logger.js"; +import { tryCatch } from "@trigger.dev/core"; + +const EventsPublishOptions = CommonCommandOptions.extend({ + config: z.string().optional(), + projectRef: z.string().optional(), + envFile: z.string().optional(), + payload: z.string(), +}); + +type EventsPublishOptions = z.infer; + +export function configureEventsPublishCommand(program: Command) { + return commonOptions( + program + .command("publish ") + .description("Publish an event with a JSON payload") + .requiredOption("--payload ", "JSON payload to publish") + .option("-c, --config ", "The name of the config file") + .option("-p, --project-ref ", "The project ref") + .option("--env-file ", "Path to the .env file") + ).action(async (eventId: string, options) => { + await handleTelemetry(async () => { + await printInitialBanner(false, options.profile); + await eventsPublishCommand({ ...options, eventId }); + }); + }); +} + +const EventsPublishCommandInput = EventsPublishOptions.extend({ + eventId: z.string(), +}); + +type EventsPublishCommandInput = z.infer; + +async function eventsPublishCommand(options: unknown) { + return await wrapCommandAction( + "eventsPublishCommand", + EventsPublishCommandInput, + options, + async (opts) => { + return await _eventsPublishCommand(opts); + } + ); +} + +async function _eventsPublishCommand(options: EventsPublishCommandInput) { + intro(`Publishing event "${options.eventId}"`); + + // Parse JSON payload + let payload: unknown; + try { + payload = JSON.parse(options.payload); + } catch { + outro("Invalid JSON payload. Provide valid JSON with --payload."); + return; + } + + const envVars = resolveLocalEnvVars(options.envFile); + + const authentication = await isLoggedIn(options.profile); + if (!authentication.ok) { + outro(`Not logged in. Use \`trigger login\` first.`); + return; + } + + const [configError, resolvedConfig] = await tryCatch( + loadConfig({ + overrides: { project: options.projectRef ?? envVars.TRIGGER_PROJECT_REF }, + configFile: options.config, + warn: false, + }) + ); + + if (configError || !resolvedConfig?.project) { + outro("Could not resolve project. Use --project-ref or configure trigger.config.ts."); + return; + } + + const loadingSpinner = spinner(); + loadingSpinner.start("Publishing event..."); + + const apiClient = new CliApiClient(authentication.auth.apiUrl, authentication.auth.accessToken); + const result = await apiClient.publishEvent(resolvedConfig.project, options.eventId, payload); + + if (!result.success) { + loadingSpinner.stop("Failed to publish event"); + logger.error(result.error); + return; + } + + loadingSpinner.stop("Event published"); + + logger.info(`Event ID: ${result.data.eventId}`); + logger.info(`Triggered ${result.data.runs.length} run(s)`); + + if (result.data.runs.length > 0) { + logger.table( + result.data.runs.map((run) => ({ + task: run.taskIdentifier, + runId: run.runId, + })) + ); + } +} From d661c7180f79cbcb4290582c4863fad8de456637 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sat, 28 Feb 2026 21:46:09 -0800 Subject: [PATCH 46/65] chore: add changeset for phase 8 DX improvements Co-Authored-By: Claude Opus 4.6 --- .changeset/event-docs-cli-dx.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 .changeset/event-docs-cli-dx.md diff --git a/.changeset/event-docs-cli-dx.md b/.changeset/event-docs-cli-dx.md new file mode 100644 index 00000000000..648f0217d7c --- /dev/null +++ b/.changeset/event-docs-cli-dx.md @@ -0,0 +1,10 @@ +--- +"@trigger.dev/sdk": patch +"trigger.dev": patch +--- + +Add event system documentation, CLI commands, and developer experience improvements. +New `rules/4.4.0/events.md` documentation covering all event features. +CLI `trigger events list` and `trigger events publish` commands. +JSDoc on `createEvent()` and `event()` with usage examples. +Reference project `references/event-system/` demonstrating all event patterns. From 826762d524483dfa1a2bf116b85dcdab6f7fb3e5 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sat, 28 Feb 2026 21:46:49 -0800 Subject: [PATCH 47/65] =?UTF-8?q?chore:=20update=20roadmap=20=E2=80=94=20m?= =?UTF-8?q?ark=208.4,=208.5,=208.6,=208.7=20as=20done?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .../memory/pubsub-roadmap.md | 46 +++++++++---------- 1 file changed, 22 insertions(+), 24 deletions(-) diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md index 399faed6012..67906eded63 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md @@ -24,11 +24,11 @@ First-class pub/sub event system within Trigger.dev that enables: | 5 | Ordering + Consumer Groups | DONE | | 6 | Publish-and-Wait (fan-out/fan-in) | DONE | | 7 | Rate Limiting + Backpressure | DONE (publish-side only) | -| 8 | Observability + Developer Experience | PARTIAL (API + types done, no UI/CLI/docs) | +| 8 | Observability + Developer Experience | PARTIAL (API, types, docs, CLI, ref project done; no dashboard UI) | | 9.1 | Redis-backed rate limiter | DONE (`81c09cda5`) | | 9.2 | Hash-based consumer groups | DONE (`81c09cda5`) | | 9.3 | Integration tests verified | DONE (24/24 pass) | -| 9.4 | Dashboard UI, CLI, docs, reference project | NOT STARTED | +| 9.4 | Dashboard UI, CLI, docs, reference project | PARTIAL (CLI + docs + ref done, dashboard UI pending) | | 9.5 | Consumer-side rate limiting + backpressure | NOT STARTED | See [pubsub-pending.md](pubsub-pending.md) for details on remaining items. @@ -399,13 +399,13 @@ Tasks: - Actions: retry, discard, retry all - [ ] Corresponding presenters in `apps/webapp/app/v3/presenters/` -### 8.4 — CLI commands — NOT DONE (deferred) +### 8.4 — CLI commands — PARTIAL -**File to modify**: `packages/cli-v3/src/commands/` +**Files created**: `packages/cli-v3/src/commands/events/` Tasks: -- [ ] `trigger events list` — list project events -- [ ] `trigger events publish --payload '{...}'` — publish from CLI +- [x] `trigger events list` — list project events +- [x] `trigger events publish --payload '{...}'` — publish from CLI - [ ] `trigger events history --from --to` — view history - [ ] `trigger events replay --from --to` — replay - [ ] `trigger events dlq list` — view dead letter queue @@ -429,31 +429,29 @@ Tasks: ``` - [ ] Complete JSDoc on all public functions -### 8.6 — Documentation — NOT DONE (deferred) +### 8.6 — Documentation — DONE -**New files in**: `rules/` (next version) +**Files created**: `rules/4.4.0/events.md` Tasks: -- [ ] Event system documentation for SDK rules: - - `events-basic.md` — define events, publish, subscribe - - `events-advanced.md` — filters, wildcards, ordering, consumer groups - - `events-reliability.md` — DLQ, replay, idempotency - - `events-patterns.md` — common patterns (saga, CQRS, event sourcing) -- [ ] Update `.claude/skills/trigger-dev-tasks/SKILL.md` with event examples -- [ ] Update `manifest.json` with new version +- [x] Event system documentation: `rules/4.4.0/events.md` — single comprehensive file covering all features (define, publish, subscribe, filters, patterns, ordering, consumer groups, DLQ, replay) +- [x] Update `.claude/skills/trigger-dev-tasks/SKILL.md` with events section and reference +- [x] Update `manifest.json` with new version 4.4.0 -### 8.7 — Reference project — NOT DONE (deferred) +### 8.7 — Reference project — DONE -**New directory**: `references/event-system/` +**Directory created**: `references/event-system/` Tasks: -- [ ] Reference project demonstrating: - - Definition of multiple events - - Tasks subscribed with filters - - Publish from a task - - Publish-and-wait pattern - - DLQ handler -- [ ] Use as manual testing project (similar to hello-world) +- [x] Reference project demonstrating: + - Definition of multiple events with schemas and rate limits + - Basic fan-out (multiple subscribers) + - Content-based filtering + - Wildcard pattern subscriptions + - Publish-and-wait (scatter-gather) + - Consumer groups (load balancing) + - Ordering keys (sequential per entity) +- [x] Use as manual testing project (similar to hello-world) --- From d5a3299b1c06eb1fd1cbb9e9f77fbef281faea09 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sat, 28 Feb 2026 22:57:05 -0800 Subject: [PATCH 48/65] =?UTF-8?q?chore:=20document=20E2E=20test=20findings?= =?UTF-8?q?=20=E2=80=94=20ordering=20limitation,=20payload=20size=20bug,?= =?UTF-8?q?=20ClickHouse=20in=20dev?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Found during live E2E testing: - 9.5: orderingKey doesn't guarantee strict ordering (Trigger.dev concurrencyKey limitation) - 9.6: payloads >512KB cause silent fan-out failure (0 runs, HTTP 200) - 9.7: ClickHouse tables not created in dev (stats/history/replay return 500) Co-Authored-By: Claude Opus 4.6 --- .../memory/pubsub-pending.md | 82 ++++++++++++++++++- 1 file changed, 81 insertions(+), 1 deletion(-) diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md index 374ca192c54..b0fd5571397 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md @@ -38,7 +38,87 @@ Items identified during post-implementation audit. Ordered by priority. - SDK docs in `rules/` directory - Update `.claude/skills/trigger-dev-tasks/SKILL.md` -### 9.5 — Consumer-side Rate Limiting + Backpressure Monitor +### 9.5 — Ordering Key Does Not Guarantee Strict Ordering + +**Status**: NOT RESOLVED — needs design decision +**Priority**: HIGH — correctness issue +**Found during**: E2E testing (2026-03-01) + +**Problem**: `orderingKey` maps to Trigger.dev's `concurrencyKey`, which creates a **copy of the queue per key**, each with the same `concurrencyLimit`. This means: + +- If task has `concurrencyLimit: 1` → ordering works per key, BUT the limit is per-key, not global. All different keys run in parallel with no global cap (only bounded by environment concurrency limit). +- If task has `concurrencyLimit: 10` → 10 events with the SAME key can run in parallel, breaking ordering. +- There's no way to express "strict ordering per key + global concurrency limit N" with Trigger.dev's current queue model. + +**Expected behavior** (like Kafka/SQS FIFO): +- `orderingKey` = strict sequential per key (always 1 at a time per key) +- `concurrencyLimit` = total parallel runs across all keys (separate concept) + +``` +concurrencyLimit: 3, ordering keys A/B/C: + +Slot 1: A1 → A2 → A3 (key A in order) +Slot 2: B1 → B2 (key B in order) +Slot 3: C1 → C2 (key C in order) +Max 3 running at once, each key strictly ordered. +``` + +**Trigger.dev's actual behavior with concurrencyKey**: +- Creates 3 separate queues (A, B, C), EACH with concurrencyLimit 3 +- So 9 runs could execute simultaneously (3 per key × 3 keys) +- Not true ordering + +**Options to resolve**: +1. Build ordering on top of Trigger.dev's queue system with custom logic in PublishEventService +2. Contribute ordering support upstream to Trigger.dev's run engine +3. Document as limitation and recommend `concurrencyLimit: 1` for ordering use cases +4. Use a separate ordering mechanism (Redis-based FIFO per key) before triggering runs + +**Test results that confirmed this**: +- `concurrencyLimit: 1` + same key → sequential (correct) +- `concurrencyLimit: 1` + different keys → parallel (capped by env limit ~8, not by concurrencyLimit) +- `concurrencyLimit: 2` + same key → 2 at a time (breaks ordering) +- 10 different keys + `concurrencyLimit: 1` → only ~8 ran in parallel (env limit, not queue limit) + +### 9.6 — Large Payloads Cause Silent Fan-out Failure + +**Status**: NOT RESOLVED — needs fix +**Priority**: HIGH — data loss / silent failure +**Found during**: E2E testing (2026-03-01) + +**Problem**: Payloads >512KB cause `PublishEventService` to return `runs: []` (HTTP 200, no error) because Trigger.dev's task trigger silently fails for large payloads (>512KB need object storage offloading which our event publish path doesn't handle). + +**Test results**: +- 100KB payload: 4 runs (OK) +- 500KB payload: 4 runs (OK) +- 600KB payload: 0 runs (SILENT FAILURE) +- 2MB payload: 0 runs (SILENT FAILURE) + +**The trigger call fails silently** — `TriggerTaskService` returns `undefined` for each subscriber, and `PublishEventService` logs it as a partial failure but still returns HTTP 200 with empty runs. + +**Options to resolve**: +1. Validate payload size in PublishEventService before fan-out (reject >512KB with clear error) +2. Use Trigger.dev's payload offloading mechanism (payloads >512KB go to object storage) +3. Both: warn on large payloads + support offloading + +### 9.7 — ClickHouse Tables Not Created in Dev + +**Status**: KNOWN LIMITATION +**Priority**: LOW — only affects stats/history/replay in local dev + +**Problem**: ClickHouse migrations (`021_event_log_v1.sql`, `022_event_counts_mv_v1.sql`) are not automatically applied in local dev. This causes: +- `GET /api/v1/events/:id/stats` → 500 "Failed to query event stats" +- `GET /api/v1/events/:id/history` → 500 "Failed to query event history" +- `POST /api/v1/events/:id/replay` → 500 "Failed to query events for replay" + +The event log writer (fire-and-forget) also fails silently: +``` +Table trigger_dev.event_log_v1 does not exist. +``` + +**Resolution**: Apply ClickHouse migrations in local dev, or improve error messages to indicate ClickHouse is not configured. + +### 9.8 — Consumer-side Rate Limiting + Backpressure Monitor **Status**: NOT STARTED (deferred from Phase 7) **Complexity**: MEDIUM From 3476f76d0e35b36c12dab1c4a3f4f145ba020bee Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sat, 28 Feb 2026 23:03:26 -0800 Subject: [PATCH 49/65] =?UTF-8?q?chore:=20update=209.6=20=E2=80=94=20large?= =?UTF-8?q?=20payload=20issue=20is=20object=20store=20config,=20not=20our?= =?UTF-8?q?=20bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Root cause: TriggerTaskService tries S3 offload for >512KB payloads. Without object store credentials (local dev), all subscriber triggers fail silently. Same behavior as regular tasks.trigger() without object store. Co-Authored-By: Claude Opus 4.6 --- .../memory/pubsub-pending.md | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md index b0fd5571397..c74b194f0da 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md @@ -80,26 +80,24 @@ Max 3 running at once, each key strictly ordered. - `concurrencyLimit: 2` + same key → 2 at a time (breaks ordering) - 10 different keys + `concurrencyLimit: 1` → only ~8 ran in parallel (env limit, not queue limit) -### 9.6 — Large Payloads Cause Silent Fan-out Failure +### 9.6 — Large Payloads >512KB Return 0 Runs (Silent Partial Failure) -**Status**: NOT RESOLVED — needs fix -**Priority**: HIGH — data loss / silent failure +**Status**: NOT A BUG IN OUR CODE — infrastructure issue in dev +**Priority**: MEDIUM — only in dev without object store configured **Found during**: E2E testing (2026-03-01) -**Problem**: Payloads >512KB cause `PublishEventService` to return `runs: []` (HTTP 200, no error) because Trigger.dev's task trigger silently fails for large payloads (>512KB need object storage offloading which our event publish path doesn't handle). +**Root cause**: `TriggerTaskService` detects payload >512KB and tries to offload to S3/R2 object store. In local dev, object store credentials are not set → throws `ServiceValidationError: "Failed to upload large payload to object store"`. Our `PublishEventService` catches this per-subscriber (partial failure pattern) and continues, resulting in 0 runs. -**Test results**: -- 100KB payload: 4 runs (OK) -- 500KB payload: 4 runs (OK) -- 600KB payload: 0 runs (SILENT FAILURE) -- 2MB payload: 0 runs (SILENT FAILURE) +**This is NOT specific to events** — a regular `tasks.trigger()` with >512KB payload would fail the same way without object store. -**The trigger call fails silently** — `TriggerTaskService` returns `undefined` for each subscriber, and `PublishEventService` logs it as a partial failure but still returns HTTP 200 with empty runs. +**Test results**: +- 500KB payload: 4 runs (OK — under threshold) +- 600KB payload: 0 runs (object store not configured) +- In production with object store: would work fine -**Options to resolve**: -1. Validate payload size in PublishEventService before fan-out (reject >512KB with clear error) -2. Use Trigger.dev's payload offloading mechanism (payloads >512KB go to object storage) -3. Both: warn on large payloads + support offloading +**Improvement we could make**: +- Detect payload size BEFORE fan-out and return a clear error (413 Payload Too Large) instead of HTTP 200 with 0 runs +- Or: propagate the TriggerTaskService error instead of treating it as partial failure ### 9.7 — ClickHouse Tables Not Created in Dev From ad83f88d35b81ad825455ba271b79174a24b9454 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sun, 1 Mar 2026 00:53:45 -0800 Subject: [PATCH 50/65] feat(events): ordering key with per-key serialization + global concurrency limit Run engine changes: - Add globalConcurrencyLimit (gcl) and globalCurrentConcurrency (gcc) Redis keys - Modify dequeueMessagesFromQueue Lua: check global limit when gcl exists - Modify releaseConcurrency Lua: SREM from gcc set - Modify enqueueMessage/enqueueMessageWithTtl: SREM from gcc on re-enqueue - No impact on existing queues (gcl check only runs when key exists) Event system changes: - PublishEventService overrides queue to `evt-order:{eventSlug}` when orderingKey present - Deploy creates ordering queue with concurrencyLimit:1 (per-key) + global limit - SDK event() accepts `ordering: { concurrencyLimit: N }` config - EventManifest/EventMetadata include ordering field Behavior: orderingKey guarantees strict per-key ordering (1 at a time per key) while concurrencyLimit controls total parallel runs across all keys. Run-engine tests: 236 pass, 2 fail (pre-existing flaky, not caused by this change) Event integration tests: 24/24 pass Co-Authored-By: Claude Opus 4.6 --- apps/webapp/app/v3/runQueue.server.ts | 9 +++ .../services/createBackgroundWorker.server.ts | 21 ++++++ .../v3/services/events/publishEvent.server.ts | 5 ++ .../run-engine/src/run-queue/index.ts | 70 +++++++++++++++++-- .../run-engine/src/run-queue/keyProducer.ts | 16 +++++ .../run-engine/src/run-queue/types.ts | 3 + .../core/src/v3/resource-catalog/catalog.ts | 2 + .../standardResourceCatalog.ts | 1 + packages/core/src/v3/schemas/schemas.ts | 9 +++ packages/trigger-sdk/src/v3/events.ts | 22 +++++- 10 files changed, 151 insertions(+), 7 deletions(-) diff --git a/apps/webapp/app/v3/runQueue.server.ts b/apps/webapp/app/v3/runQueue.server.ts index e7aa13c5c54..68127a71c43 100644 --- a/apps/webapp/app/v3/runQueue.server.ts +++ b/apps/webapp/app/v3/runQueue.server.ts @@ -32,6 +32,15 @@ export async function updateQueueConcurrencyLimits( ]); } +/** Updates the global concurrency limit for a queue (across all concurrency keys) */ +export async function updateGlobalQueueConcurrencyLimits( + environment: AuthenticatedEnvironment, + queueName: string, + concurrency: number +) { + await engine.runQueue.updateGlobalQueueConcurrencyLimits(environment, queueName, concurrency); +} + /** Removes MARQS and the RunQueue limits for a queue */ export async function removeQueueConcurrencyLimits( environment: AuthenticatedEnvironment, diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index 11aad019d69..5daa49d16bb 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -18,6 +18,7 @@ import { removeQueueConcurrencyLimits, updateEnvConcurrencyLimits, updateQueueConcurrencyLimits, + updateGlobalQueueConcurrencyLimits, } from "../runQueue.server"; import { calculateNextBuildVersion } from "../utils/calculateNextBuildVersion"; import { clampMaxDuration } from "../utils/maxDuration"; @@ -362,6 +363,26 @@ async function syncWorkerEvents( }); eventDefinitions.set(event.id, eventDef.id); + + // Create ordering queue for events that have ordering config + if (event.ordering) { + const orderingQueueName = `evt-order:${event.id}`; + const globalLimit = event.ordering.concurrencyLimit ?? environment.maximumConcurrencyLimit; + + await upsertWorkerQueueRecord( + orderingQueueName, + 1, // per-key limit: always 1 for strict ordering + orderingQueueName, + "SHARED", + worker, + prisma + ); + + // Set per-key limit = 1 in Redis + await updateQueueConcurrencyLimits(environment, orderingQueueName, 1); + // Set global limit in Redis (total concurrent across all keys) + await updateGlobalQueueConcurrencyLimits(environment, orderingQueueName, globalLimit); + } } } diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index 9b4ba9bead7..ac2b213c9f8 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -303,6 +303,11 @@ export class PublishEventService extends BaseService { : undefined, metadata: eventMetadata, delay: options.delay, + // When ordering key is present, route to dedicated ordering queue + // with concurrencyLimit:1 per key + global limit + queue: options.orderingKey + ? { name: `evt-order:${eventSlug}` } + : undefined, concurrencyKey: options.orderingKey ? `evt:${eventSlug}:${options.orderingKey}` : undefined, diff --git a/internal-packages/run-engine/src/run-queue/index.ts b/internal-packages/run-engine/src/run-queue/index.ts index 7ebfaf660d6..9796df15717 100644 --- a/internal-packages/run-engine/src/run-queue/index.ts +++ b/internal-packages/run-engine/src/run-queue/index.ts @@ -341,6 +341,21 @@ export class RunQueue { return this.redis.del(this.keys.queueConcurrencyLimitKey(env, queue)); } + public async updateGlobalQueueConcurrencyLimits( + env: MinimalAuthenticatedEnvironment, + queue: string, + concurrency: number + ) { + return this.redis.set(this.keys.queueGlobalConcurrencyLimitKey(env, queue), concurrency); + } + + public async removeGlobalQueueConcurrencyLimits( + env: MinimalAuthenticatedEnvironment, + queue: string + ) { + return this.redis.del(this.keys.queueGlobalConcurrencyLimitKey(env, queue)); + } + public async getQueueConcurrencyLimit(env: MinimalAuthenticatedEnvironment, queue: string) { const result = await this.redis.get(this.keys.queueConcurrencyLimitKey(env, queue)); @@ -906,6 +921,7 @@ export class RunQueue { this.keys.envCurrentConcurrencyKeyFromQueue(message.queue), this.keys.queueCurrentDequeuedKeyFromQueue(message.queue), this.keys.envCurrentDequeuedKeyFromQueue(message.queue), + this.keys.queueGlobalCurrentConcurrencyKeyFromQueue(message.queue), messageId ); }, @@ -1655,6 +1671,7 @@ export class RunQueue { const queueCurrentDequeuedKey = this.keys.queueCurrentDequeuedKeyFromQueue(message.queue); const envCurrentDequeuedKey = this.keys.envCurrentDequeuedKeyFromQueue(message.queue); const envQueueKey = this.keys.envQueueKeyFromQueue(message.queue); + const globalCurrentConcurrencyKey = this.keys.queueGlobalCurrentConcurrencyKeyFromQueue(message.queue); const masterQueueKey = this.keys.masterQueueKeyForEnvironment( message.environmentId, this.shardCount @@ -1694,6 +1711,7 @@ export class RunQueue { envCurrentDequeuedKey, envQueueKey, ttlInfo.ttlQueueKey, + globalCurrentConcurrencyKey, queueName, messageId, messageData, @@ -1711,6 +1729,7 @@ export class RunQueue { queueCurrentDequeuedKey, envCurrentDequeuedKey, envQueueKey, + globalCurrentConcurrencyKey, queueName, messageId, messageData, @@ -1745,6 +1764,10 @@ export class RunQueue { const messageKeyPrefix = this.keys.messageKeyPrefixFromQueue(messageQueue); const envQueueKey = this.keys.envQueueKeyFromQueue(messageQueue); const masterQueueKey = this.keys.masterQueueKeyForShard(shard); + const globalConcurrencyLimitKey = + this.keys.queueGlobalConcurrencyLimitKeyFromQueue(messageQueue); + const globalCurrentConcurrencyKey = + this.keys.queueGlobalCurrentConcurrencyKeyFromQueue(messageQueue); // Get TTL queue key if TTL system is enabled const ttlShardCount = this.options.ttlSystem?.shardCount ?? this.shardCount; @@ -1767,6 +1790,8 @@ export class RunQueue { envQueueKey, masterQueueKey, ttlQueueKey, + globalConcurrencyLimitKey, + globalCurrentConcurrencyKey, shard, maxCount, }); @@ -1783,6 +1808,8 @@ export class RunQueue { envQueueKey, masterQueueKey, ttlQueueKey, + globalConcurrencyLimitKey, + globalCurrentConcurrencyKey, //args messageQueue, String(Date.now()), @@ -2502,7 +2529,7 @@ end }); this.redis.defineCommand("enqueueMessage", { - numberOfKeys: 8, + numberOfKeys: 9, lua: ` local masterQueueKey = KEYS[1] local queueKey = KEYS[2] @@ -2512,6 +2539,7 @@ local envCurrentConcurrencyKey = KEYS[5] local queueCurrentDequeuedKey = KEYS[6] local envCurrentDequeuedKey = KEYS[7] local envQueueKey = KEYS[8] +local globalCurrentConcurrencyKey = KEYS[9] local queueName = ARGV[1] local messageId = ARGV[2] @@ -2541,12 +2569,13 @@ redis.call('SREM', queueCurrentConcurrencyKey, messageId) redis.call('SREM', envCurrentConcurrencyKey, messageId) redis.call('SREM', queueCurrentDequeuedKey, messageId) redis.call('SREM', envCurrentDequeuedKey, messageId) +redis.call('SREM', globalCurrentConcurrencyKey, messageId) `, }); // Enqueue with TTL tracking - atomically adds to both normal queue and TTL sorted set this.redis.defineCommand("enqueueMessageWithTtl", { - numberOfKeys: 9, + numberOfKeys: 10, lua: ` local masterQueueKey = KEYS[1] local queueKey = KEYS[2] @@ -2557,6 +2586,7 @@ local queueCurrentDequeuedKey = KEYS[6] local envCurrentDequeuedKey = KEYS[7] local envQueueKey = KEYS[8] local ttlQueueKey = KEYS[9] +local globalCurrentConcurrencyKey = KEYS[10] local queueName = ARGV[1] local messageId = ARGV[2] @@ -2591,6 +2621,7 @@ redis.call('SREM', queueCurrentConcurrencyKey, messageId) redis.call('SREM', envCurrentConcurrencyKey, messageId) redis.call('SREM', queueCurrentDequeuedKey, messageId) redis.call('SREM', envCurrentDequeuedKey, messageId) +redis.call('SREM', globalCurrentConcurrencyKey, messageId) `, }); @@ -2692,7 +2723,7 @@ return results }); this.redis.defineCommand("dequeueMessagesFromQueue", { - numberOfKeys: 10, + numberOfKeys: 12, lua: ` local queueKey = KEYS[1] local queueConcurrencyLimitKey = KEYS[2] @@ -2704,6 +2735,8 @@ local messageKeyPrefix = KEYS[7] local envQueueKey = KEYS[8] local masterQueueKey = KEYS[9] local ttlQueueKey = KEYS[10] -- Optional: TTL sorted set key (empty string if not used) +local globalConcurrencyLimitKey = KEYS[11] -- Global queue concurrency limit (without :ck:) +local globalCurrentConcurrencyKey = KEYS[12] -- Global queue concurrency tracking (without :ck:) local queueName = ARGV[1] local currentTime = tonumber(ARGV[2]) @@ -2722,7 +2755,7 @@ if envCurrentConcurrency >= envConcurrencyLimitWithBurstFactor then return nil end --- Check current queue concurrency against the limit +-- Check current queue concurrency against the limit (per-key when concurrencyKey is used) local queueCurrentConcurrency = tonumber(redis.call('SCARD', queueCurrentConcurrencyKey) or '0') local queueConcurrencyLimit = math.min(tonumber(redis.call('GET', queueConcurrencyLimitKey) or '1000000'), envConcurrencyLimit) local totalQueueConcurrencyLimit = queueConcurrencyLimit @@ -2732,10 +2765,23 @@ if queueCurrentConcurrency >= totalQueueConcurrencyLimit then return nil end +-- Check global queue concurrency limit (across all concurrency keys) +-- Only applies when globalConcurrencyLimitKey is set (e.g. for event ordering queues) +local globalAvailableCapacity = 1000000 +local globalConcurrencyLimitRaw = redis.call('GET', globalConcurrencyLimitKey) +if globalConcurrencyLimitRaw then + local globalConcurrencyLimit = tonumber(globalConcurrencyLimitRaw) + local globalCurrentConcurrency = tonumber(redis.call('SCARD', globalCurrentConcurrencyKey) or '0') + if globalCurrentConcurrency >= globalConcurrencyLimit then + return nil + end + globalAvailableCapacity = globalConcurrencyLimit - globalCurrentConcurrency +end + -- Calculate how many messages we can actually dequeue based on concurrency limits local envAvailableCapacity = envConcurrencyLimitWithBurstFactor - envCurrentConcurrency local queueAvailableCapacity = totalQueueConcurrencyLimit - queueCurrentConcurrency -local actualMaxCount = math.min(maxCount, envAvailableCapacity, queueAvailableCapacity) +local actualMaxCount = math.min(maxCount, envAvailableCapacity, queueAvailableCapacity, globalAvailableCapacity) if actualMaxCount <= 0 then return nil @@ -2779,6 +2825,11 @@ for i = 1, #messages, 2 do redis.call('SADD', queueCurrentConcurrencyKey, messageId) redis.call('SADD', envCurrentConcurrencyKey, messageId) + -- Track global queue concurrency (only if global limit is configured) + if globalConcurrencyLimitRaw then + redis.call('SADD', globalCurrentConcurrencyKey, messageId) + end + -- Remove from TTL set if provided (run is being executed, not expired) if ttlQueueKey and ttlQueueKey ~= '' and ttlExpiresAt then local ttlMember = queueName .. '|' .. messageId .. '|' .. (messageData.orgId or '') @@ -2999,13 +3050,14 @@ redis.call('SREM', envCurrentDequeuedKey, messageId) }); this.redis.defineCommand("releaseConcurrency", { - numberOfKeys: 4, + numberOfKeys: 5, lua: ` -- Keys: local queueCurrentConcurrencyKey = KEYS[1] local envCurrentConcurrencyKey = KEYS[2] local queueCurrentDequeuedKey = KEYS[3] local envCurrentDequeuedKey = KEYS[4] +local globalCurrentConcurrencyKey = KEYS[5] -- Args: local messageId = ARGV[1] @@ -3015,6 +3067,7 @@ redis.call('SREM', queueCurrentConcurrencyKey, messageId) redis.call('SREM', envCurrentConcurrencyKey, messageId) redis.call('SREM', queueCurrentDequeuedKey, messageId) redis.call('SREM', envCurrentDequeuedKey, messageId) +redis.call('SREM', globalCurrentConcurrencyKey, messageId) `, }); @@ -3139,6 +3192,7 @@ declare module "@internal/redis" { queueCurrentDequeuedKey: string, envCurrentDequeuedKey: string, envQueueKey: string, + globalCurrentConcurrencyKey: string, //args queueName: string, messageId: string, @@ -3158,6 +3212,7 @@ declare module "@internal/redis" { envCurrentDequeuedKey: string, envQueueKey: string, ttlQueueKey: string, + globalCurrentConcurrencyKey: string, //args queueName: string, messageId: string, @@ -3194,6 +3249,8 @@ declare module "@internal/redis" { envQueueKey: string, masterQueueKey: string, ttlQueueKey: string, + globalConcurrencyLimitKey: string, + globalCurrentConcurrencyKey: string, //args childQueueName: string, currentTime: string, @@ -3288,6 +3345,7 @@ declare module "@internal/redis" { envCurrentConcurrencyKey: string, queueCurrentDequeuedKey: string, envCurrentDequeuedKey: string, + globalCurrentConcurrencyKey: string, // args messageId: string, callback?: Callback diff --git a/internal-packages/run-engine/src/run-queue/keyProducer.ts b/internal-packages/run-engine/src/run-queue/keyProducer.ts index f925f0e9579..f7658dba3ef 100644 --- a/internal-packages/run-engine/src/run-queue/keyProducer.ts +++ b/internal-packages/run-engine/src/run-queue/keyProducer.ts @@ -17,6 +17,8 @@ const constants = { DEAD_LETTER_QUEUE_PART: "deadLetter", MASTER_QUEUE_PART: "masterQueue", WORKER_QUEUE_PART: "workerQueue", + GLOBAL_CONCURRENCY_LIMIT_PART: "globalConcurrency", + GLOBAL_CURRENT_CONCURRENCY_PART: "globalCurrentConcurrency", } as const; export class RunQueueFullKeyProducer implements RunQueueKeyProducer { @@ -138,6 +140,20 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer { return `${concurrencyQueueName}:${constants.CONCURRENCY_LIMIT_PART}`; } + queueGlobalConcurrencyLimitKeyFromQueue(queue: string) { + const globalQueueName = queue.replace(/:ck:.+$/, ""); + return `${globalQueueName}:${constants.GLOBAL_CONCURRENCY_LIMIT_PART}`; + } + + queueGlobalCurrentConcurrencyKeyFromQueue(queue: string) { + const globalQueueName = queue.replace(/:ck:.+$/, ""); + return `${globalQueueName}:${constants.GLOBAL_CURRENT_CONCURRENCY_PART}`; + } + + queueGlobalConcurrencyLimitKey(env: RunQueueKeyProducerEnvironment, queue: string) { + return [this.queueKey(env, queue), constants.GLOBAL_CONCURRENCY_LIMIT_PART].join(":"); + } + queueCurrentConcurrencyKeyFromQueue(queue: string) { return `${queue}:${constants.CURRENT_CONCURRENCY_PART}`; } diff --git a/internal-packages/run-engine/src/run-queue/types.ts b/internal-packages/run-engine/src/run-queue/types.ts index fd33e7e1925..2fcc3eeed71 100644 --- a/internal-packages/run-engine/src/run-queue/types.ts +++ b/internal-packages/run-engine/src/run-queue/types.ts @@ -75,6 +75,9 @@ export interface RunQueueKeyProducer { envQueueKeyFromQueue(queue: string): string; queueConcurrencyLimitKey(env: RunQueueKeyProducerEnvironment, queue: string): string; queueConcurrencyLimitKeyFromQueue(queue: string): string; + queueGlobalConcurrencyLimitKeyFromQueue(queue: string): string; + queueGlobalCurrentConcurrencyKeyFromQueue(queue: string): string; + queueGlobalConcurrencyLimitKey(env: RunQueueKeyProducerEnvironment, queue: string): string; queueCurrentConcurrencyKeyFromQueue(queue: string): string; queueCurrentConcurrencyKey( env: RunQueueKeyProducerEnvironment, diff --git a/packages/core/src/v3/resource-catalog/catalog.ts b/packages/core/src/v3/resource-catalog/catalog.ts index 72998235bfa..bfc309d6f4c 100644 --- a/packages/core/src/v3/resource-catalog/catalog.ts +++ b/packages/core/src/v3/resource-catalog/catalog.ts @@ -9,6 +9,8 @@ export interface EventMetadata { rawSchema?: unknown; /** Rate limit configuration */ rateLimit?: { limit: number; window: string }; + /** Ordering configuration — enables per-key serialization with global concurrency limit */ + ordering?: { concurrencyLimit?: number }; } export interface ResourceCatalog { diff --git a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts index 3ca4a567fe5..8fcef565673 100644 --- a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts +++ b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts @@ -191,6 +191,7 @@ export class StandardResourceCatalog implements ResourceCatalog { version: event.version, description: event.description, rateLimit: event.rateLimit, + ordering: event.ordering, })); } diff --git a/packages/core/src/v3/schemas/schemas.ts b/packages/core/src/v3/schemas/schemas.ts index 5d5dae34ead..183ca41b28e 100644 --- a/packages/core/src/v3/schemas/schemas.ts +++ b/packages/core/src/v3/schemas/schemas.ts @@ -183,6 +183,13 @@ export const EventRateLimitManifest = z.object({ export type EventRateLimitManifest = z.infer; +export const EventOrderingManifest = z.object({ + /** Maximum number of ordering keys processed in parallel */ + concurrencyLimit: z.number().int().positive().optional(), +}); + +export type EventOrderingManifest = z.infer; + export const EventManifest = z.object({ /** Unique event identifier (e.g. "order.created") */ id: z.string(), @@ -194,6 +201,8 @@ export const EventManifest = z.object({ schema: z.unknown().optional(), /** Rate limit configuration */ rateLimit: EventRateLimitManifest.optional(), + /** Ordering configuration — creates a dedicated queue with per-key serialization */ + ordering: EventOrderingManifest.optional(), }); export type EventManifest = z.infer; diff --git a/packages/trigger-sdk/src/v3/events.ts b/packages/trigger-sdk/src/v3/events.ts index 12651877b4e..4f2afef9494 100644 --- a/packages/trigger-sdk/src/v3/events.ts +++ b/packages/trigger-sdk/src/v3/events.ts @@ -38,6 +38,25 @@ export interface EventOptions( export function createEvent( options: EventOptions ): EventDefinition { - const { id, schema, description, version = "1.0", rateLimit } = options; + const { id, schema, description, version = "1.0", rateLimit, ordering } = options; // Build the parse function if a schema is provided let parseFn: SchemaParseFn | undefined; @@ -300,6 +319,7 @@ export function createEvent Date: Sun, 1 Mar 2026 00:54:23 -0800 Subject: [PATCH 51/65] chore: mark 9.5 ordering as done in pending items Co-Authored-By: Claude Opus 4.6 --- .../memory/pubsub-pending.md | 46 ++++--------------- 1 file changed, 10 insertions(+), 36 deletions(-) diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md index c74b194f0da..8106e738d2c 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-pending.md @@ -38,47 +38,21 @@ Items identified during post-implementation audit. Ordered by priority. - SDK docs in `rules/` directory - Update `.claude/skills/trigger-dev-tasks/SKILL.md` -### 9.5 — Ordering Key Does Not Guarantee Strict Ordering +### 9.5 — Ordering Key with Per-Key Serialization + Global Concurrency Limit -**Status**: NOT RESOLVED — needs design decision -**Priority**: HIGH — correctness issue -**Found during**: E2E testing (2026-03-01) - -**Problem**: `orderingKey` maps to Trigger.dev's `concurrencyKey`, which creates a **copy of the queue per key**, each with the same `concurrencyLimit`. This means: - -- If task has `concurrencyLimit: 1` → ordering works per key, BUT the limit is per-key, not global. All different keys run in parallel with no global cap (only bounded by environment concurrency limit). -- If task has `concurrencyLimit: 10` → 10 events with the SAME key can run in parallel, breaking ordering. -- There's no way to express "strict ordering per key + global concurrency limit N" with Trigger.dev's current queue model. +**Status**: DONE (commit `ad83f88d3`) -**Expected behavior** (like Kafka/SQS FIFO): -- `orderingKey` = strict sequential per key (always 1 at a time per key) -- `concurrencyLimit` = total parallel runs across all keys (separate concept) - -``` -concurrencyLimit: 3, ordering keys A/B/C: +**Solution**: Added `globalConcurrencyLimit` to the run engine (new Redis keys `gcl`/`gcc`). Modified 4 Lua scripts (dequeue, release, enqueue, enqueueWithTtl) to check global limit when set. PublishEventService overrides queue to `evt-order:{eventSlug}` with per-key limit=1 and global limit=N. -Slot 1: A1 → A2 → A3 (key A in order) -Slot 2: B1 → B2 (key B in order) -Slot 3: C1 → C2 (key C in order) -Max 3 running at once, each key strictly ordered. +SDK usage: +```typescript +event({ + id: "order.created", + ordering: { concurrencyLimit: 5 }, // max 5 keys in parallel, strict per-key ordering +}); ``` -**Trigger.dev's actual behavior with concurrencyKey**: -- Creates 3 separate queues (A, B, C), EACH with concurrencyLimit 3 -- So 9 runs could execute simultaneously (3 per key × 3 keys) -- Not true ordering - -**Options to resolve**: -1. Build ordering on top of Trigger.dev's queue system with custom logic in PublishEventService -2. Contribute ordering support upstream to Trigger.dev's run engine -3. Document as limitation and recommend `concurrencyLimit: 1` for ordering use cases -4. Use a separate ordering mechanism (Redis-based FIFO per key) before triggering runs - -**Test results that confirmed this**: -- `concurrencyLimit: 1` + same key → sequential (correct) -- `concurrencyLimit: 1` + different keys → parallel (capped by env limit ~8, not by concurrencyLimit) -- `concurrencyLimit: 2` + same key → 2 at a time (breaks ordering) -- 10 different keys + `concurrencyLimit: 1` → only ~8 ran in parallel (env limit, not queue limit) +**Needs E2E verification** with live hello-world project to confirm behavior. ### 9.6 — Large Payloads >512KB Return 0 Runs (Silent Partial Failure) From 193e26e9b6305a926a085a3c4f550d74a9545c1a Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Sun, 1 Mar 2026 01:25:09 -0800 Subject: [PATCH 52/65] =?UTF-8?q?fix(events):=20fix=20ordering=20=E2=80=94?= =?UTF-8?q?=20update=20all=20Lua=20scripts=20for=20global=20concurrency=20?= =?UTF-8?q?release?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous commit only updated dequeue/release/enqueue Lua scripts but missed: - acknowledgeMessage: runs completing weren't releasing global concurrency - nackMessage: nacked runs weren't releasing global concurrency - moveToDeadLetterQueue: DLQ'd runs weren't releasing - clearMessageFromConcurrencySets: cleanup wasn't releasing Also: set globalConcurrencyLimit on subscriber task's queue (not dedicated queue) since the dev worker only monitors task queues, not custom queues. Removed queue override from PublishEventService — runs stay in the task's own queue and ordering is enforced by concurrencyKey + concurrencyLimit:1 + globalConcurrencyLimit:N. E2E verified: globalConcurrencyLimit=2 with 3 keys correctly limits to max 2 concurrent runs while maintaining per-key ordering. Co-Authored-By: Claude Opus 4.6 --- .../services/createBackgroundWorker.server.ts | 16 +++++++++- .../v3/services/events/publishEvent.server.ts | 5 ---- .../run-engine/src/run-queue/index.ts | 24 ++++++++++++--- .../hello-world/src/trigger/events-test.ts | 29 +++++++++++++++++++ 4 files changed, 64 insertions(+), 10 deletions(-) create mode 100644 references/hello-world/src/trigger/events-test.ts diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index 5daa49d16bb..db99668107f 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -373,7 +373,7 @@ async function syncWorkerEvents( orderingQueueName, 1, // per-key limit: always 1 for strict ordering orderingQueueName, - "SHARED", + "NAMED", worker, prisma ); @@ -426,6 +426,20 @@ async function syncWorkerEvents( } } + // Look up event ordering config for this task's event + const eventManifest = metadata.events?.find((e) => e.id === task.onEvent); + const eventOrdering = eventManifest?.ordering; + + // If the event has ordering config, set globalConcurrencyLimit on this task's queue + if (eventOrdering?.concurrencyLimit) { + const taskQueueName = `task/${task.id}`; + await updateGlobalQueueConcurrencyLimits( + environment, + taskQueueName, + eventOrdering.concurrencyLimit + ); + } + const subscription = await prisma.eventSubscription.upsert({ where: { eventDefinitionId_taskSlug_environmentId: { diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index ac2b213c9f8..9b4ba9bead7 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -303,11 +303,6 @@ export class PublishEventService extends BaseService { : undefined, metadata: eventMetadata, delay: options.delay, - // When ordering key is present, route to dedicated ordering queue - // with concurrencyLimit:1 per key + global limit - queue: options.orderingKey - ? { name: `evt-order:${eventSlug}` } - : undefined, concurrencyKey: options.orderingKey ? `evt:${eventSlug}:${options.orderingKey}` : undefined, diff --git a/internal-packages/run-engine/src/run-queue/index.ts b/internal-packages/run-engine/src/run-queue/index.ts index 9796df15717..596a2b906e4 100644 --- a/internal-packages/run-engine/src/run-queue/index.ts +++ b/internal-packages/run-engine/src/run-queue/index.ts @@ -2063,6 +2063,7 @@ export class RunQueue { envCurrentDequeuedKey, envQueueKey, workerQueueKey, + this.keys.queueGlobalCurrentConcurrencyKeyFromQueue(message.queue), messageId, messageQueue, messageKeyValue, @@ -2105,6 +2106,7 @@ export class RunQueue { envCurrentConcurrencyKey, queueCurrentDequeuedKey, envCurrentDequeuedKey, + this.keys.queueGlobalCurrentConcurrencyKeyFromQueue(queue), messageId ); } @@ -2151,6 +2153,7 @@ export class RunQueue { queueCurrentDequeuedKey, envCurrentDequeuedKey, envQueueKey, + this.keys.queueGlobalCurrentConcurrencyKeyFromQueue(message.queue), //args messageId, messageQueue, @@ -2184,6 +2187,7 @@ export class RunQueue { envCurrentDequeuedKey, envQueueKey, deadLetterQueueKey, + this.keys.queueGlobalCurrentConcurrencyKeyFromQueue(message.queue), messageId, messageQueue ); @@ -2919,7 +2923,7 @@ return message }); this.redis.defineCommand("acknowledgeMessage", { - numberOfKeys: 9, + numberOfKeys: 10, lua: ` -- Keys: local masterQueueKey = KEYS[1] @@ -2931,6 +2935,7 @@ local queueCurrentDequeuedKey = KEYS[6] local envCurrentDequeuedKey = KEYS[7] local envQueueKey = KEYS[8] local workerQueueKey = KEYS[9] +local globalCurrentConcurrencyKey = KEYS[10] -- Args: local messageId = ARGV[1] @@ -2955,6 +2960,7 @@ end -- Update the concurrency keys redis.call('SREM', queueCurrentConcurrencyKey, messageId) +redis.call('SREM', globalCurrentConcurrencyKey, messageId) redis.call('SREM', envCurrentConcurrencyKey, messageId) redis.call('SREM', queueCurrentDequeuedKey, messageId) redis.call('SREM', envCurrentDequeuedKey, messageId) @@ -2967,7 +2973,7 @@ end }); this.redis.defineCommand("nackMessage", { - numberOfKeys: 8, + numberOfKeys: 9, lua: ` -- Keys: local masterQueueKey = KEYS[1] @@ -2978,6 +2984,7 @@ local envCurrentConcurrencyKey = KEYS[5] local queueCurrentDequeuedKey = KEYS[6] local envCurrentDequeuedKey = KEYS[7] local envQueueKey = KEYS[8] +local globalCurrentConcurrencyKey = KEYS[9] -- Args: local messageId = ARGV[1] @@ -2990,6 +2997,7 @@ redis.call('SET', messageKey, messageData) -- Update the concurrency keys redis.call('SREM', queueCurrentConcurrencyKey, messageId) +redis.call('SREM', globalCurrentConcurrencyKey, messageId) redis.call('SREM', envCurrentConcurrencyKey, messageId) redis.call('SREM', queueCurrentDequeuedKey, messageId) redis.call('SREM', envCurrentDequeuedKey, messageId) @@ -3009,7 +3017,7 @@ end }); this.redis.defineCommand("moveToDeadLetterQueue", { - numberOfKeys: 9, + numberOfKeys: 10, lua: ` -- Keys: local masterQueueKey = KEYS[1] @@ -3021,6 +3029,7 @@ local queueCurrentDequeuedKey = KEYS[6] local envCurrentDequeuedKey = KEYS[7] local envQueueKey = KEYS[8] local deadLetterQueueKey = KEYS[9] +local globalCurrentConcurrencyKey = KEYS[10] -- Args: local messageId = ARGV[1] @@ -3043,6 +3052,7 @@ redis.call('ZADD', deadLetterQueueKey, tonumber(redis.call('TIME')[1]), messageI -- Update the concurrency keys redis.call('SREM', queueCurrentConcurrencyKey, messageId) +redis.call('SREM', globalCurrentConcurrencyKey, messageId) redis.call('SREM', envCurrentConcurrencyKey, messageId) redis.call('SREM', queueCurrentDequeuedKey, messageId) redis.call('SREM', envCurrentDequeuedKey, messageId) @@ -3151,19 +3161,21 @@ return results }); this.redis.defineCommand("clearMessageFromConcurrencySets", { - numberOfKeys: 4, + numberOfKeys: 5, lua: ` -- Keys: local queueCurrentConcurrencyKey = KEYS[1] local envCurrentConcurrencyKey = KEYS[2] local queueCurrentDequeuedKey = KEYS[3] local envCurrentDequeuedKey = KEYS[4] +local globalCurrentConcurrencyKey = KEYS[5] -- Args: local messageId = ARGV[1] -- Update the concurrency keys redis.call('SREM', queueCurrentConcurrencyKey, messageId) +redis.call('SREM', globalCurrentConcurrencyKey, messageId) redis.call('SREM', envCurrentConcurrencyKey, messageId) redis.call('SREM', queueCurrentDequeuedKey, messageId) redis.call('SREM', envCurrentDequeuedKey, messageId) @@ -3285,6 +3297,7 @@ declare module "@internal/redis" { envCurrentDequeuedKey: string, envQueueKey: string, workerQueueKey: string, + globalCurrentConcurrencyKey: string, // args messageId: string, messageQueueName: string, @@ -3299,6 +3312,7 @@ declare module "@internal/redis" { envCurrentConcurrencyKey: string, queueCurrentDequeuedKey: string, envCurrentDequeuedKey: string, + globalCurrentConcurrencyKey: string, // args messageId: string, callback?: Callback @@ -3314,6 +3328,7 @@ declare module "@internal/redis" { queueCurrentDequeuedKey: string, envCurrentDequeuedKey: string, envQueueKey: string, + globalCurrentConcurrencyKey: string, // args messageId: string, messageQueueName: string, @@ -3333,6 +3348,7 @@ declare module "@internal/redis" { envCurrentDequeuedKey: string, envQueueKey: string, deadLetterQueueKey: string, + globalCurrentConcurrencyKey: string, // args messageId: string, messageQueueName: string, diff --git a/references/hello-world/src/trigger/events-test.ts b/references/hello-world/src/trigger/events-test.ts new file mode 100644 index 00000000000..112d9f0d010 --- /dev/null +++ b/references/hello-world/src/trigger/events-test.ts @@ -0,0 +1,29 @@ +import { event, task, logger } from "@trigger.dev/sdk"; +import { z } from "zod"; + +// Define event with ordering config +export const testEvent = event({ + id: "test.greeting", + schema: z.object({ + name: z.string(), + message: z.string(), + }), + ordering: { + concurrencyLimit: 2, + }, +}); + +// Slow subscriber with concurrencyLimit: 1 for ordering +// The concurrencyLimit:1 ensures per-key ordering when used with orderingKey +export const slowWorker = task({ + id: "slow-greeting-worker", + on: testEvent, + queue: { concurrencyLimit: 1 }, + run: async (payload) => { + const start = Date.now(); + logger.info(`[slow-worker] START "${payload.name}" at ${new Date().toISOString()}`); + await new Promise((r) => setTimeout(r, 2000)); + logger.info(`[slow-worker] END "${payload.name}" after ${Date.now() - start}ms`); + return { name: payload.name, duration: Date.now() - start }; + }, +}); From c1764962527098b87c69b6b9c8bb570ebb2e7c12 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Mon, 2 Mar 2026 22:29:13 -0800 Subject: [PATCH 53/65] =?UTF-8?q?fix(events):=20phase=2010=20=E2=80=94=20C?= =?UTF-8?q?RITICAL=20+=20HIGH=20audit=20fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 10.1: Fix expireTtlRuns Lua global concurrency slot leak (CRITICAL) Add SREM for globalCurrentConcurrency in TTL expiration script - 10.2: Fix clearMessageFromConcurrencySets bare queue name (HIGH) Add queueGlobalCurrentConcurrencyKey(env, queue) to build correct key - 10.3: Add .max(100) to batch publish items array (HIGH) - 10.4: Fix publishAndWait schema — move parentRunId to top-level (HIGH) - 10.5: ClickHouse interval already safe (whitelist map, not interpolation) - 10.6: Add @@index([projectId, environmentId, enabled]) to EventSubscription - 10.7: Fix batch publish partial failure — per-item error handling with 207 Also tightens Zod schemas: z.any() → z.unknown(), idempotencyKey .max(256), metadata → z.record(z.unknown()) Co-Authored-By: Claude Opus 4.6 --- .../api.v1.events.$eventId.batchPublish.ts | 47 ++++++++--------- .../api.v1.events.$eventId.publishAndWait.ts | 8 +-- .../migration.sql | 2 + .../database/prisma/schema.prisma | 2 +- .../run-engine/src/run-queue/index.ts | 7 ++- .../run-engine/src/run-queue/keyProducer.ts | 4 ++ .../run-engine/src/run-queue/types.ts | 1 + packages/core/src/v3/schemas/api.ts | 50 ++++++++++--------- packages/trigger-sdk/src/v3/events.ts | 6 +-- 9 files changed, 64 insertions(+), 63 deletions(-) create mode 100644 internal-packages/database/prisma/migrations/20260303061123_add_event_subscription_pattern_idx/migration.sql diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts b/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts index fc26760e890..746884695d8 100644 --- a/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts +++ b/apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts @@ -34,10 +34,13 @@ const { action, loader } = createActionApiRoute( eventPublishRateLimitChecker ); - try { - const results: PublishEventResult[] = []; + const results: Array< + | { ok: true; eventId: string; runs: PublishEventResult["runs"] } + | { ok: false; error: string } + > = []; - for (const item of body.items) { + for (const item of body.items) { + try { const result = await service.call( params.eventId, authentication.environment, @@ -52,31 +55,23 @@ const { action, loader } = createActionApiRoute( } ); - results.push(result); + results.push({ ok: true, eventId: result.eventId, runs: result.runs }); + } catch (error) { + if (error instanceof EventPublishRateLimitError) { + results.push({ ok: false, error: error.message }); + } else if (error instanceof ServiceValidationError) { + results.push({ ok: false, error: error.message }); + } else { + results.push({ + ok: false, + error: error instanceof Error ? error.message : "Unknown error", + }); + } } - - return json({ results }, { status: 200 }); - } catch (error) { - if (error instanceof EventPublishRateLimitError) { - return json( - { error: error.message }, - { - status: 429, - headers: { - "x-ratelimit-limit": String(error.limit), - "x-ratelimit-remaining": String(error.remaining), - "retry-after": String(Math.ceil(error.retryAfterMs / 1000)), - }, - } - ); - } else if (error instanceof ServiceValidationError) { - return json({ error: error.message }, { status: error.status ?? 422 }); - } else if (error instanceof Error) { - return json({ error: error.message }, { status: 500 }); - } - - return json({ error: "Something went wrong" }, { status: 500 }); } + + const hasErrors = results.some((r) => !r.ok); + return json({ results }, { status: hasErrors ? 207 : 200 }); } ); diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts b/apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts index d7c73b4536d..11ce74ec64a 100644 --- a/apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts +++ b/apps/webapp/app/routes/api.v1.events.$eventId.publishAndWait.ts @@ -26,13 +26,7 @@ const { action, loader } = createActionApiRoute( }, }, async ({ body, params, authentication }) => { - const parentRunId = body.options?.parentRunId; - if (!parentRunId) { - return json( - { error: "parentRunId is required for publishAndWait" }, - { status: 400 } - ); - } + const parentRunId = body.parentRunId; const service = new PublishEventService( undefined, diff --git a/internal-packages/database/prisma/migrations/20260303061123_add_event_subscription_pattern_idx/migration.sql b/internal-packages/database/prisma/migrations/20260303061123_add_event_subscription_pattern_idx/migration.sql new file mode 100644 index 00000000000..3a3b15665b3 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260303061123_add_event_subscription_pattern_idx/migration.sql @@ -0,0 +1,2 @@ +-- CreateIndex +CREATE INDEX CONCURRENTLY "EventSubscription_projectId_environmentId_enabled_idx" ON "public"."EventSubscription"("projectId", "environmentId", "enabled"); diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 7a081ee01a9..fdb0d2f779b 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -644,7 +644,7 @@ model EventSubscription { @@unique([eventDefinitionId, taskSlug, environmentId]) @@index([eventDefinitionId, environmentId, enabled]) - @@index([projectId, environmentId]) + @@index([projectId, environmentId, enabled]) } enum DeadLetterStatus { diff --git a/internal-packages/run-engine/src/run-queue/index.ts b/internal-packages/run-engine/src/run-queue/index.ts index 596a2b906e4..3b81d0e1543 100644 --- a/internal-packages/run-engine/src/run-queue/index.ts +++ b/internal-packages/run-engine/src/run-queue/index.ts @@ -2106,7 +2106,7 @@ export class RunQueue { envCurrentConcurrencyKey, queueCurrentDequeuedKey, envCurrentDequeuedKey, - this.keys.queueGlobalCurrentConcurrencyKeyFromQueue(queue), + this.keys.queueGlobalCurrentConcurrencyKey(env, queue), messageId ); } @@ -2698,6 +2698,11 @@ for i, member in ipairs(expiredMembers) do redis.call('SREM', concurrencyKey, runId) redis.call('SREM', dequeuedKey, runId) + -- Remove from global concurrency set (strip :ck:* suffix to get base queue key) + local globalQueueKey = string.gsub(rawQueueKey, ":ck:.+$", "") + local globalCurrentConcurrencyKey = keyPrefix .. globalQueueKey .. ":globalCurrentConcurrency" + redis.call('SREM', globalCurrentConcurrencyKey, runId) + -- Env concurrency (derive from rawQueueKey; must match RunQueueKeyProducer: org + proj + env) -- rawQueueKey format: {org:X}:proj:Y:env:Z:queue:Q[:ck:C] local projMatch = string.match(rawQueueKey, ":proj:([^:]+):env:") diff --git a/internal-packages/run-engine/src/run-queue/keyProducer.ts b/internal-packages/run-engine/src/run-queue/keyProducer.ts index f7658dba3ef..59f987258f0 100644 --- a/internal-packages/run-engine/src/run-queue/keyProducer.ts +++ b/internal-packages/run-engine/src/run-queue/keyProducer.ts @@ -154,6 +154,10 @@ export class RunQueueFullKeyProducer implements RunQueueKeyProducer { return [this.queueKey(env, queue), constants.GLOBAL_CONCURRENCY_LIMIT_PART].join(":"); } + queueGlobalCurrentConcurrencyKey(env: RunQueueKeyProducerEnvironment, queue: string) { + return [this.queueKey(env, queue), constants.GLOBAL_CURRENT_CONCURRENCY_PART].join(":"); + } + queueCurrentConcurrencyKeyFromQueue(queue: string) { return `${queue}:${constants.CURRENT_CONCURRENCY_PART}`; } diff --git a/internal-packages/run-engine/src/run-queue/types.ts b/internal-packages/run-engine/src/run-queue/types.ts index 2fcc3eeed71..7b6ef83f1a6 100644 --- a/internal-packages/run-engine/src/run-queue/types.ts +++ b/internal-packages/run-engine/src/run-queue/types.ts @@ -78,6 +78,7 @@ export interface RunQueueKeyProducer { queueGlobalConcurrencyLimitKeyFromQueue(queue: string): string; queueGlobalCurrentConcurrencyKeyFromQueue(queue: string): string; queueGlobalConcurrencyLimitKey(env: RunQueueKeyProducerEnvironment, queue: string): string; + queueGlobalCurrentConcurrencyKey(env: RunQueueKeyProducerEnvironment, queue: string): string; queueCurrentConcurrencyKeyFromQueue(queue: string): string; queueCurrentConcurrencyKey( env: RunQueueKeyProducerEnvironment, diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index f0bef68448d..65754d2f2ea 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -1602,14 +1602,14 @@ export type AppendToStreamResponseBody = z.infer; export const BatchPublishEventRequestBody = z.object({ - items: z.array( - z.object({ - payload: z.any(), - options: z - .object({ - idempotencyKey: z.string().optional(), - delay: z.string().or(z.coerce.date()).optional(), - tags: RunTags.optional(), - metadata: z.any().optional(), - context: z.any().optional(), - orderingKey: z.string().optional(), - }) - .optional(), - }) - ), + items: z + .array( + z.object({ + payload: z.unknown(), + options: z + .object({ + idempotencyKey: z.string().max(256).optional(), + delay: z.string().or(z.coerce.date()).optional(), + tags: RunTags.optional(), + metadata: z.record(z.unknown()).optional(), + context: z.unknown().optional(), + orderingKey: z.string().optional(), + }) + .optional(), + }) + ) + .max(100), }); export type BatchPublishEventRequestBody = z.infer; @@ -1656,16 +1658,16 @@ export const BatchPublishEventResponseBody = z.object({ export type BatchPublishEventResponseBody = z.infer; export const PublishAndWaitEventRequestBody = z.object({ - payload: z.any(), + payload: z.unknown(), + parentRunId: z.string(), options: z .object({ - idempotencyKey: z.string().optional(), + idempotencyKey: z.string().max(256).optional(), delay: z.string().or(z.coerce.date()).optional(), tags: RunTags.optional(), - metadata: z.any().optional(), - context: z.any().optional(), + metadata: z.record(z.unknown()).optional(), + context: z.unknown().optional(), orderingKey: z.string().optional(), - parentRunId: z.string(), }) .optional(), }); diff --git a/packages/trigger-sdk/src/v3/events.ts b/packages/trigger-sdk/src/v3/events.ts index 4f2afef9494..7cb26c4af35 100644 --- a/packages/trigger-sdk/src/v3/events.ts +++ b/packages/trigger-sdk/src/v3/events.ts @@ -239,6 +239,7 @@ export function createEvent Date: Mon, 2 Mar 2026 22:29:24 -0800 Subject: [PATCH 54/65] =?UTF-8?q?fix(events):=20phase=2011=20=E2=80=94=20M?= =?UTF-8?q?EDIUM=20audit=20fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 11.1: Fix N+1 in DLQ retryAll — inline retry logic, share TriggerTaskService - 11.2: Add 512KB payload size check before fan-out (returns 413) - 11.3: Add try/catch with ServiceValidationError handling to events routes - 11.4: Add --delay, --tags, --idempotency-key, --ordering-key to CLI publish Co-Authored-By: Claude Opus 4.6 --- .../app/routes/api.v1.events.dlq.retry-all.ts | 25 ++++++++--- apps/webapp/app/routes/api.v1.events.ts | 45 ++++++++++++------- .../events/deadLetterManagement.server.ts | 19 +++++++- .../v3/services/events/publishEvent.server.ts | 10 +++++ packages/cli-v3/src/apiClient.ts | 24 +++++++++- .../cli-v3/src/commands/events/publish.ts | 22 ++++++++- 6 files changed, 117 insertions(+), 28 deletions(-) diff --git a/apps/webapp/app/routes/api.v1.events.dlq.retry-all.ts b/apps/webapp/app/routes/api.v1.events.dlq.retry-all.ts index bde3ba32f7e..c05ef94d1d4 100644 --- a/apps/webapp/app/routes/api.v1.events.dlq.retry-all.ts +++ b/apps/webapp/app/routes/api.v1.events.dlq.retry-all.ts @@ -1,6 +1,7 @@ import { json } from "@remix-run/server-runtime"; import { z } from "zod"; import { createActionApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { ServiceValidationError } from "~/v3/services/baseService.server"; import { DeadLetterManagementService } from "~/v3/services/events/deadLetterManagement.server"; const BodySchema = z @@ -22,14 +23,24 @@ const { action, loader } = createActionApiRoute( async ({ body, authentication }) => { const service = new DeadLetterManagementService(); - const result = await service.retryAll({ - projectId: authentication.environment.projectId, - environmentId: authentication.environment.id, - eventType: body?.eventType, - environment: authentication.environment, - }); + try { + const result = await service.retryAll({ + projectId: authentication.environment.projectId, + environmentId: authentication.environment.id, + eventType: body?.eventType, + environment: authentication.environment, + }); - return json(result, { status: 200 }); + return json(result, { status: 200 }); + } catch (error) { + if (error instanceof ServiceValidationError) { + return json({ error: error.message }, { status: error.status ?? 422 }); + } + return json( + { error: error instanceof Error ? error.message : "Something went wrong" }, + { status: 500 } + ); + } } ); diff --git a/apps/webapp/app/routes/api.v1.events.ts b/apps/webapp/app/routes/api.v1.events.ts index 36cace033f2..9dd6afe363b 100644 --- a/apps/webapp/app/routes/api.v1.events.ts +++ b/apps/webapp/app/routes/api.v1.events.ts @@ -1,5 +1,6 @@ import { json } from "@remix-run/server-runtime"; import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { ServiceValidationError } from "~/v3/services/baseService.server"; import { SchemaRegistryService } from "~/v3/services/events/schemaRegistry.server"; export const loader = createLoaderApiRoute( @@ -15,23 +16,33 @@ export const loader = createLoaderApiRoute( async ({ authentication }) => { const service = new SchemaRegistryService(); - const events = await service.listSchemas({ - projectId: authentication.environment.projectId, - environmentId: authentication.environment.id, - }); + try { + const events = await service.listSchemas({ + projectId: authentication.environment.projectId, + environmentId: authentication.environment.id, + }); - return json({ - data: events.map((e) => ({ - id: e.id, - slug: e.slug, - version: e.version, - description: e.description, - hasSchema: e.schema !== null, - deprecatedAt: e.deprecatedAt, - subscriberCount: e.subscriberCount, - createdAt: e.createdAt, - updatedAt: e.updatedAt, - })), - }); + return json({ + data: events.map((e) => ({ + id: e.id, + slug: e.slug, + version: e.version, + description: e.description, + hasSchema: e.schema !== null, + deprecatedAt: e.deprecatedAt, + subscriberCount: e.subscriberCount, + createdAt: e.createdAt, + updatedAt: e.updatedAt, + })), + }); + } catch (error) { + if (error instanceof ServiceValidationError) { + return json({ error: error.message }, { status: error.status ?? 422 }); + } + return json( + { error: error instanceof Error ? error.message : "Something went wrong" }, + { status: 500 } + ); + } } ); diff --git a/apps/webapp/app/v3/services/events/deadLetterManagement.server.ts b/apps/webapp/app/v3/services/events/deadLetterManagement.server.ts index 5cbadca533f..a074d284cfb 100644 --- a/apps/webapp/app/v3/services/events/deadLetterManagement.server.ts +++ b/apps/webapp/app/v3/services/events/deadLetterManagement.server.ts @@ -136,9 +136,26 @@ export class DeadLetterManagementService extends BaseService { let retriedCount = 0; let failedCount = 0; + const triggerService = new TriggerTaskService(); + for (const dle of pendingItems) { try { - await this.retry(dle.id, params.environment); + const body: TriggerTaskRequestBody = { + payload: dle.payload, + options: { + idempotencyKey: `dlq-retry:${dle.id}`, + }, + }; + + await triggerService.call(dle.taskSlug, params.environment, body, { + idempotencyKey: `dlq-retry:${dle.id}`, + }); + + await this._prisma.deadLetterEvent.update({ + where: { id: dle.id }, + data: { status: "RETRIED", processedAt: new Date() }, + }); + retriedCount++; } catch { failedCount++; diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index 9b4ba9bead7..acfae8c9678 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -171,6 +171,16 @@ export class PublishEventService extends BaseService { } } + // 2b. Check payload size (512KB limit — larger payloads require object store) + const payloadBytes = Buffer.byteLength(JSON.stringify(payload), "utf-8"); + const MAX_PAYLOAD_BYTES = 512 * 1024; // 512KB + if (payloadBytes > MAX_PAYLOAD_BYTES) { + throw new ServiceValidationError( + `Payload size ${payloadBytes} bytes exceeds the 512KB limit. Use smaller payloads or configure the object store for large payloads.`, + 413 + ); + } + // 3. Find all active subscriptions: exact match + pattern-based const [exactSubscriptions, patternSubscriptions] = await Promise.all([ // Exact subscriptions: tied to this specific EventDefinition diff --git a/packages/cli-v3/src/apiClient.ts b/packages/cli-v3/src/apiClient.ts index 5b214457d8b..67921204b35 100644 --- a/packages/cli-v3/src/apiClient.ts +++ b/packages/cli-v3/src/apiClient.ts @@ -563,7 +563,17 @@ export class CliApiClient { }); } - async publishEvent(projectRef: string, eventId: string, payload: unknown) { + async publishEvent( + projectRef: string, + eventId: string, + payload: unknown, + options?: { + idempotencyKey?: string; + delay?: string; + tags?: string[]; + orderingKey?: string; + } + ) { if (!this.accessToken) { throw new Error("publishEvent: No access token"); } @@ -579,7 +589,17 @@ export class CliApiClient { ...this.getHeaders(), "x-trigger-project-ref": projectRef, }, - body: JSON.stringify({ payload }), + body: JSON.stringify({ + payload, + options: options + ? { + idempotencyKey: options.idempotencyKey, + delay: options.delay, + tags: options.tags, + orderingKey: options.orderingKey, + } + : undefined, + }), } ); } diff --git a/packages/cli-v3/src/commands/events/publish.ts b/packages/cli-v3/src/commands/events/publish.ts index 0732506c318..bec07281c49 100644 --- a/packages/cli-v3/src/commands/events/publish.ts +++ b/packages/cli-v3/src/commands/events/publish.ts @@ -21,6 +21,10 @@ const EventsPublishOptions = CommonCommandOptions.extend({ projectRef: z.string().optional(), envFile: z.string().optional(), payload: z.string(), + delay: z.string().optional(), + tags: z.string().optional(), + idempotencyKey: z.string().optional(), + orderingKey: z.string().optional(), }); type EventsPublishOptions = z.infer; @@ -34,6 +38,10 @@ export function configureEventsPublishCommand(program: Command) { .option("-c, --config ", "The name of the config file") .option("-p, --project-ref ", "The project ref") .option("--env-file ", "Path to the .env file") + .option("--delay ", "Delay before execution (e.g. '30s', '5m', ISO date)") + .option("--tags ", "Comma-separated tags to attach") + .option("--idempotency-key ", "Idempotency key for deduplication") + .option("--ordering-key ", "Ordering key for sequential processing") ).action(async (eventId: string, options) => { await handleTelemetry(async () => { await printInitialBanner(false, options.profile); @@ -96,7 +104,19 @@ async function _eventsPublishCommand(options: EventsPublishCommandInput) { loadingSpinner.start("Publishing event..."); const apiClient = new CliApiClient(authentication.auth.apiUrl, authentication.auth.accessToken); - const result = await apiClient.publishEvent(resolvedConfig.project, options.eventId, payload); + const publishOptions = { + idempotencyKey: options.idempotencyKey, + delay: options.delay, + tags: options.tags ? options.tags.split(",").map((t: string) => t.trim()) : undefined, + orderingKey: options.orderingKey, + }; + const hasOptions = Object.values(publishOptions).some((v) => v !== undefined); + const result = await apiClient.publishEvent( + resolvedConfig.project, + options.eventId, + payload, + hasOptions ? publishOptions : undefined + ); if (!result.success) { loadingSpinner.stop("Failed to publish event"); From c05c326cf9303d99b781246d8f172bd93fc908cc Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Mon, 2 Mar 2026 22:29:33 -0800 Subject: [PATCH 55/65] =?UTF-8?q?fix(events):=20phase=2013=20=E2=80=94=20L?= =?UTF-8?q?OW=20audit=20fixes=20(cache=20bounds)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 13.1: Add LRU eviction to validatorCache, filterCache, patternCache (max 1000) - 13.2: Zod schema tightening done in phase 10 commit Co-Authored-By: Claude Opus 4.6 --- .../app/v3/services/events/schemaRegistry.server.ts | 12 +++++++++++- packages/core/src/v3/events/filterEvaluator.ts | 9 +++++++++ packages/core/src/v3/events/patternMatcher.ts | 9 +++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/apps/webapp/app/v3/services/events/schemaRegistry.server.ts b/apps/webapp/app/v3/services/events/schemaRegistry.server.ts index 121903c0330..de69df1fef5 100644 --- a/apps/webapp/app/v3/services/events/schemaRegistry.server.ts +++ b/apps/webapp/app/v3/services/events/schemaRegistry.server.ts @@ -6,8 +6,9 @@ import { BaseService, ServiceValidationError } from "../baseService.server"; const ajv = new Ajv({ allErrors: true, strict: false }); -/** Cached compiled validators keyed by EventDefinition.id */ +/** Cached compiled validators keyed by EventDefinition.id (bounded to prevent memory leaks) */ const validatorCache = new Map(); +const VALIDATOR_CACHE_MAX = 1000; export type SchemaValidationResult = | { success: true } @@ -176,6 +177,15 @@ export class SchemaRegistryService extends BaseService { if (!validate) { validate = ajv.compile(schema as object); + if (validatorCache.size >= VALIDATOR_CACHE_MAX) { + // Evict oldest entries (Map iterates in insertion order) + const toDelete = Math.floor(VALIDATOR_CACHE_MAX / 2); + let i = 0; + for (const key of validatorCache.keys()) { + if (i++ >= toDelete) break; + validatorCache.delete(key); + } + } validatorCache.set(eventDefinitionId, validate); } diff --git a/packages/core/src/v3/events/filterEvaluator.ts b/packages/core/src/v3/events/filterEvaluator.ts index 849a4fc7e38..9795aa2af78 100644 --- a/packages/core/src/v3/events/filterEvaluator.ts +++ b/packages/core/src/v3/events/filterEvaluator.ts @@ -4,6 +4,7 @@ import { eventFilterMatches } from "../../eventFilterMatches.js"; type CompiledFilter = (payload: unknown) => boolean; const filterCache = new Map(); +const FILTER_CACHE_MAX = 1000; /** * Compile an EventFilter into a reusable predicate function. @@ -20,6 +21,14 @@ export function compileFilter(filter: EventFilter, cacheKey?: string): CompiledF const fn: CompiledFilter = (payload: unknown) => eventFilterMatches(payload, filter); if (cacheKey) { + if (filterCache.size >= FILTER_CACHE_MAX) { + const toDelete = Math.floor(FILTER_CACHE_MAX / 2); + let i = 0; + for (const key of filterCache.keys()) { + if (i++ >= toDelete) break; + filterCache.delete(key); + } + } filterCache.set(cacheKey, fn); } diff --git a/packages/core/src/v3/events/patternMatcher.ts b/packages/core/src/v3/events/patternMatcher.ts index fdef8c496b5..1e480815383 100644 --- a/packages/core/src/v3/events/patternMatcher.ts +++ b/packages/core/src/v3/events/patternMatcher.ts @@ -15,6 +15,7 @@ type PatternPredicate = (eventSlug: string) => boolean; const patternCache = new Map(); +const PATTERN_CACHE_MAX = 1000; /** * Compile a wildcard pattern into a reusable predicate. @@ -25,6 +26,14 @@ export function compilePattern(pattern: string): PatternPredicate { if (cached) return cached; const fn = buildPatternFn(pattern); + if (patternCache.size >= PATTERN_CACHE_MAX) { + const toDelete = Math.floor(PATTERN_CACHE_MAX / 2); + let i = 0; + for (const key of patternCache.keys()) { + if (i++ >= toDelete) break; + patternCache.delete(key); + } + } patternCache.set(pattern, fn); return fn; } From f269a5ecafb44e621a625c8fc8e50cb794e965b3 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Mon, 2 Mar 2026 22:29:42 -0800 Subject: [PATCH 56/65] =?UTF-8?q?chore:=20update=20memory=20files=20?= =?UTF-8?q?=E2=80=94=20audit=20fixes=20plan=20and=20roadmap=20status?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.6 --- .../memory/MEMORY.md | 1 + .../memory/pubsub-audit-fixes.md | 229 ++++++++++++++++++ .../memory/pubsub-roadmap.md | 5 + 3 files changed, 235 insertions(+) create mode 100644 .claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-audit-fixes.md diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/MEMORY.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/MEMORY.md index 0d2d1d991b3..17d1e48b28c 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/MEMORY.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/MEMORY.md @@ -4,6 +4,7 @@ - [Roadmap & Status](pubsub-roadmap.md) — phases 0-8 complete, pending items identified - [Detailed Progress](pubsub-progress.md) — per-phase notes, commits, decisions - [Pending Items](pubsub-pending.md) — Redis rate limiter, consumer groups, dashboard, etc. +- [Audit Fix Plan](pubsub-audit-fixes.md) — Phases 10-13: CRITICAL/HIGH/MEDIUM/LOW fixes + test coverage - Repo conventions: [repo-conventions.md](repo-conventions.md) - Branch: `feat/pubsub-event-system` diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-audit-fixes.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-audit-fixes.md new file mode 100644 index 00000000000..964db32828f --- /dev/null +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-audit-fixes.md @@ -0,0 +1,229 @@ +# Pub/Sub Event System — Audit Fix Plan + +Findings from 5-agent parallel audit (2026-03-03). Organized by priority. +Follow [Implementation Process & Guidelines](pubsub-roadmap.md#implementation-process--guidelines) for each phase. + +--- + +## Phase 10: Audit Fixes — CRITICAL + HIGH + +### 10.1 — Fix `expireTtlRuns` Lua: global concurrency slot leak (CRITICAL) + +**Found by**: Redis auditor +**Severity**: CRITICAL — permanent slot leak +**File**: `internal-packages/run-engine/src/run-queue/index.ts:2633-2726` + +**Problem**: The `expireTtlRuns` Lua script removes from `queueCurrentConcurrency`, `queueCurrentDequeued`, `envCurrentConcurrency`, `envCurrentDequeued` but does NOT remove from `globalCurrentConcurrency`. When a run with an ordering key expires via TTL, the global concurrency slot is permanently leaked, eventually starving the entire queue. + +**Fix**: +1. Add `globalCurrentConcurrencyKey` as a new KEYS parameter to the `expireTtlRuns` Lua +2. Add `redis.call('SREM', globalCurrentConcurrencyKey, messageId)` alongside existing SREMs +3. Update `numberOfKeys`, type declaration, and caller to pass the key +4. Update the caller that invokes `expireTtlRuns` to compute and pass `queueGlobalCurrentConcurrencyKeyFromQueue` + +**Verify**: Run existing run-engine TTL tests + +### 10.2 — Fix `clearMessageFromConcurrencySets` bare queue name (HIGH) + +**Found by**: Redis auditor +**Severity**: HIGH — SREM to wrong key, slot never released +**File**: `internal-packages/run-engine/src/engine/index.ts:2240-2243` + +**Problem**: `clearMessageFromConcurrencySets` is called with `taskRun.queue` which is a bare queue name (e.g. `"my-task"`), not a full Redis key. `queueGlobalCurrentConcurrencyKeyFromQueue()` expects a full key like `{org:X}:proj:Y:env:Z:queue:my-task` and produces a nonsense key from a bare name. + +**Fix**: Trace how other callers of similar methods get the full queue key (likely from the `message.queue` field which includes the full path). Ensure `clearMessageFromConcurrencySets` either: +- Receives the full queue key, or +- Has access to the env/org/project context to construct it + +**Verify**: Check that the same issue exists for the existing per-key `queueCurrentConcurrencyKeyFromQueue` call (it probably does but SREM on a wrong key is a no-op, not a crash). + +### 10.3 — Add `.max()` to batch publish items array (HIGH) + +**Found by**: Security auditor +**Severity**: HIGH — potential DoS +**File**: `packages/core/src/v3/schemas/api.ts` — `BatchPublishEventRequestBody` + +**Fix**: Add `.max(100)` (or similar) to the `items` array in `BatchPublishEventRequestBody`. Matches the pattern of existing batch trigger which has limits. + +### 10.4 — Fix publishAndWait schema: parentRunId required but options optional (HIGH) + +**Found by**: API auditor +**Severity**: HIGH — schema mismatch causes runtime 400 instead of Zod validation error +**File**: `packages/core/src/v3/schemas/api.ts:1658-1671` + +**Fix**: Either: +- Make `options` required in `PublishAndWaitEventRequestBody`, or +- Move `parentRunId` to be a top-level required field outside of `options` + +### 10.5 — Fix ClickHouse interval string interpolation (HIGH) + +**Found by**: Security auditor +**Severity**: HIGH — fragile pattern +**File**: `apps/webapp/app/routes/api.v1.events.$eventId.stats.ts:54` + +**Fix**: Use parameterized query or keep the whitelist validation but use a safer pattern (map from allowed period to interval string rather than interpolating user input). + +### 10.6 — Add missing index for pattern subscription query (HIGH) + +**Found by**: DB auditor +**Severity**: HIGH — full table scan on every publish +**File**: `internal-packages/database/prisma/schema.prisma` — `EventSubscription` + +**Fix**: +1. Add `@@index([projectId, environmentId, enabled])` to EventSubscription model +2. Create migration with `CREATE INDEX CONCURRENTLY` in its own file +3. Run `pnpm run db:migrate:deploy && pnpm run generate` + +### 10.7 — Fix batch publish partial failure semantics (HIGH) + +**Found by**: API auditor +**Severity**: HIGH — client can't determine which items succeeded +**File**: `apps/webapp/app/routes/api.v1.events.$eventId.batchPublish.ts:40-57` + +**Fix**: Two options: +- **Option A**: Validate ALL items upfront before triggering any (current approach fails mid-batch) +- **Option B**: Return partial results with per-item status (more complex but more resilient) + +Recommend Option A — validate schema + rate limits for all items first, then trigger. + +--- + +## Phase 11: Audit Fixes — MEDIUM + +### 11.1 — Fix N+1 in DLQ retryAll + +**File**: `apps/webapp/app/v3/services/events/deadLetterManagement.server.ts:126-148` +**Fix**: Remove redundant re-fetch in `retry()` when called from `retryAll()`, or batch the operations. + +### 11.2 — Add payload size check before fan-out + +**File**: `apps/webapp/app/v3/services/events/publishEvent.server.ts` +**Fix**: Check payload byte size before triggering subscribers. Return 413 if over limit and object store is not configured. + +### 11.3 — Fix inconsistent error handling in routes + +**Files**: `api.v1.events.dlq.retry-all.ts`, `api.v1.events.ts` +**Fix**: Add try/catch with ServiceValidationError handling, matching other routes. + +### 11.4 — Add CLI publish options support + +**File**: `packages/cli-v3/src/commands/events/publish.ts` +**Fix**: Add `--delay`, `--tags`, `--idempotency-key`, `--ordering-key` options. + +### 11.5 — Fix schema validation silent pass on compilation error + +**File**: `apps/webapp/app/v3/services/events/schemaRegistry.server.ts:198-201` +**Fix**: Log a warning when ajv compilation fails, and optionally reject the publish. + +### 11.6 — Add stale subscription cleanup + +**File**: `apps/webapp/app/v3/services/events/publishEvent.server.ts` +**Fix**: When a subscriber trigger fails consistently, log a warning and optionally disable the subscription after N consecutive failures. + +### 11.7 — Add data cleanup mechanism + +**Fix**: Add a periodic cleanup job (or TTL-based approach) for: +- Disabled EventSubscriptions older than 30 days +- Processed DeadLetterEvents (RETRIED/DISCARDED) older than 30 days +- Deprecated EventDefinitions with no active subscriptions + +--- + +## Phase 12: Test Coverage + +### 12.1 — Tests for ReplayEventsService + +**File**: `apps/webapp/test/engine/replayEvents.test.ts` (new) +**Tests**: +- Replay with date range filter +- Replay with task filter +- Replay dry run (count only) +- Replay with idempotency (no duplicate triggers) +- Replay when ClickHouse is unavailable (graceful error) + +Note: These require ClickHouse in testcontainers or mocking. + +### 12.2 — Tests for DeadLetterService + +**File**: `apps/webapp/test/engine/deadLetterService.test.ts` (new) +**Tests**: +- Failed event-triggered run creates DLQ entry +- Non-event run does NOT create DLQ entry +- DLQ entry has correct eventType, payload, error +- Multiple failures create separate DLQ entries + +### 12.3 — Tests for DeadLetterManagementService + +**File**: `apps/webapp/test/engine/deadLetterManagement.test.ts` (new) +**Tests**: +- List DLQ entries with pagination +- List with eventType filter +- List with status filter +- Retry creates new run with correct payload +- Retry marks DLQ entry as RETRIED +- Discard marks entry as DISCARDED +- RetryAll processes up to 1000 items +- Retry/discard nonexistent ID returns error + +### 12.4 — Tests for RedisEventRateLimitChecker + +**File**: `apps/webapp/test/engine/eventRateLimiter.test.ts` (extend) +**Tests**: +- Redis checker allows under limit +- Redis checker blocks over limit +- Redis checker returns correct remaining/retryAfter +- Different configs get separate Ratelimit instances + +Note: Requires Redis in testcontainers. + +### 12.5 — Tests for SchemaRegistryService.checkCompatibility + +**File**: extend existing SchemaRegistryService tests +**Tests**: +- Compatible schema change (add optional field) +- Incompatible change (remove required field) +- Incompatible change (change field type) + +--- + +## Phase 13: LOW Priority Fixes + +### 13.1 — Add LRU bounds to caches +- `validatorCache` in SchemaRegistryService: max 1000 entries +- `patternCache`/`filterCache` in core evaluators: max 1000 entries +- `InMemoryEventRateLimitChecker.windows`: evict entries older than 2x window + +### 13.2 — Tighten Zod schemas +- `payload: z.any()` → `payload: z.unknown()` +- `metadata: z.any()` → `metadata: z.record(z.unknown())` +- Add `.max(256)` to idempotencyKey +- Add DLQ status validation with Zod instead of `as` cast + +### 13.3 — Remove dead code +- Unused `compileFilter`/`evaluateFilter` exports from core filterEvaluator + +### 13.4 — Fix batchPublish URL naming +- Current: `/api/v1/events/:id/batchPublish` (camelCase) +- Consider: `/api/v1/events/:id/batch-publish` or keep for consistency + +--- + +## Execution Order + +``` +Phase 10 (CRITICAL+HIGH) → Phase 12 (Tests) → Phase 11 (MEDIUM) → Phase 13 (LOW) +``` + +Phase 10 first because it contains a CRITICAL bug (permanent slot leak). +Phase 12 second because tests validate the fixes and catch regressions. +Phase 11 and 13 are improvements, not blockers. + +## Verification per phase + +Same as roadmap guidelines: +1. `pnpm run build --filter @internal/run-engine --filter webapp --filter @trigger.dev/core --filter @trigger.dev/sdk` +2. `cd internal-packages/run-engine && pnpm run test --run` (run-engine: 236+ must pass) +3. `cd apps/webapp && pnpm run test ./test/engine/publishEvent.test.ts --run` (24+ must pass) +4. `cd apps/webapp && pnpm run test ./test/engine/eventRateLimiter.test.ts --run` (11+ must pass) +5. New test files must pass +6. Commit after each sub-step: `feat(events): phase X.Y — ` diff --git a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md index 67906eded63..b4be72bafc7 100644 --- a/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md +++ b/.claude/projects/-Users-terac-repos-trigger-dev/memory/pubsub-roadmap.md @@ -30,7 +30,12 @@ First-class pub/sub event system within Trigger.dev that enables: | 9.3 | Integration tests verified | DONE (24/24 pass) | | 9.4 | Dashboard UI, CLI, docs, reference project | PARTIAL (CLI + docs + ref done, dashboard UI pending) | | 9.5 | Consumer-side rate limiting + backpressure | NOT STARTED | +| 10 | Audit fixes — CRITICAL + HIGH (7 items) | NOT STARTED | +| 11 | Audit fixes — MEDIUM (7 items) | NOT STARTED | +| 12 | Test coverage gaps (5 test suites) | NOT STARTED | +| 13 | Audit fixes — LOW (4 items) | NOT STARTED | +See [pubsub-audit-fixes.md](pubsub-audit-fixes.md) for the full audit fix plan (Phases 10-13). See [pubsub-pending.md](pubsub-pending.md) for details on remaining items. See [pubsub-progress.md](pubsub-progress.md) for per-phase implementation notes. From d625a220ced4f247e17791c2fee3b3ed2006ac0a Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Mon, 2 Mar 2026 22:37:57 -0800 Subject: [PATCH 57/65] test(events): add DLQ service integration tests (Phase 12) Add testcontainer-based tests for DeadLetterService and DeadLetterManagementService covering DLQ entry creation, field correctness, pagination, filtering, and discard flow. Co-Authored-By: Claude Opus 4.6 --- .../test/engine/deadLetterManagement.test.ts | 270 ++++++++++++++++++ .../test/engine/deadLetterService.test.ts | 219 ++++++++++++++ 2 files changed, 489 insertions(+) create mode 100644 apps/webapp/test/engine/deadLetterManagement.test.ts create mode 100644 apps/webapp/test/engine/deadLetterService.test.ts diff --git a/apps/webapp/test/engine/deadLetterManagement.test.ts b/apps/webapp/test/engine/deadLetterManagement.test.ts new file mode 100644 index 00000000000..aafb3d3b97e --- /dev/null +++ b/apps/webapp/test/engine/deadLetterManagement.test.ts @@ -0,0 +1,270 @@ +import { describe, expect, vi } from "vitest"; + +// Mock the db prisma client (required for webapp service imports) +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +vi.mock("~/services/platform.v3.server", async (importOriginal) => { + const actual = (await importOriginal()) as Record; + return { + ...actual, + getEntitlement: vi.fn(), + }; +}); + +import { setupAuthenticatedEnvironment } from "@internal/run-engine/tests"; +import { postgresTest } from "@internal/testcontainers"; +import { generateFriendlyId } from "@trigger.dev/core/v3/isomorphic"; +import { DeadLetterManagementService } from "../../app/v3/services/events/deadLetterManagement.server"; +import { ServiceValidationError } from "../../app/v3/services/common.server"; + +vi.setConfig({ testTimeout: 120_000 }); + +/** + * Helper: create a TaskRun in the database so DeadLetterEvent can reference it. + */ +async function createTaskRun( + prisma: any, + env: { id: string; projectId: string; organization: { id: string } }, + taskIdentifier: string +) { + const runId = generateFriendlyId("run"); + return prisma.taskRun.create({ + data: { + id: runId, + friendlyId: runId, + number: 1, + taskIdentifier, + payload: JSON.stringify({ test: true }), + payloadType: "application/json", + traceId: "trace_" + runId, + spanId: "span_" + runId, + queue: `task/${taskIdentifier}`, + status: "COMPLETED_WITH_ERRORS", + runtimeEnvironmentId: env.id, + projectId: env.projectId, + organizationId: env.organization.id, + engine: "V2", + }, + }); +} + +/** + * Helper: create a DeadLetterEvent directly in the database. + */ +async function createDeadLetterEvent( + prisma: any, + env: { id: string; projectId: string }, + run: { id: string }, + overrides: { + eventType?: string; + status?: "PENDING" | "RETRIED" | "DISCARDED"; + payload?: object; + createdAt?: Date; + } = {} +) { + const dleId = generateFriendlyId("dle"); + return prisma.deadLetterEvent.create({ + data: { + id: dleId, + friendlyId: dleId, + eventType: overrides.eventType ?? "test.event", + payload: overrides.payload ?? { key: "value" }, + taskSlug: "test-task", + failedRunId: run.id, + error: { message: "test error" }, + attemptCount: 1, + sourceEventId: "src_" + dleId, + projectId: env.projectId, + environmentId: env.id, + status: overrides.status ?? "PENDING", + ...(overrides.createdAt && { createdAt: overrides.createdAt }), + }, + }); +} + +describe("DeadLetterManagementService", () => { + postgresTest( + "List DLQ entries with pagination", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + // Create 5 DLQ entries, each needs its own TaskRun (foreign key) + const entries = []; + for (let i = 0; i < 5; i++) { + const run = await createTaskRun(prisma, env, `task-${i}`); + const dle = await createDeadLetterEvent(prisma, env, run, { + eventType: "paginated.event", + // Stagger createdAt so ordering is deterministic + createdAt: new Date(Date.now() - (4 - i) * 1000), + }); + entries.push(dle); + } + + const service = new DeadLetterManagementService(prisma); + + // Page 1: limit 2 + const page1 = await service.list({ + projectId: env.projectId, + environmentId: env.id, + limit: 2, + }); + + expect(page1.data).toHaveLength(2); + expect(page1.pagination.hasMore).toBe(true); + expect(page1.pagination.cursor).toBeDefined(); + expect(page1.pagination.cursor).not.toBeNull(); + + // Page 2: use cursor from page 1 + const page2 = await service.list({ + projectId: env.projectId, + environmentId: env.id, + limit: 2, + cursor: page1.pagination.cursor!, + }); + + expect(page2.data).toHaveLength(2); + expect(page2.pagination.hasMore).toBe(true); + + // Page 3: last item + const page3 = await service.list({ + projectId: env.projectId, + environmentId: env.id, + limit: 2, + cursor: page2.pagination.cursor!, + }); + + expect(page3.data).toHaveLength(1); + expect(page3.pagination.hasMore).toBe(false); + expect(page3.pagination.cursor).toBeNull(); + + // All 5 entries across all pages, no duplicates + const allIds = [ + ...page1.data.map((d: any) => d.id), + ...page2.data.map((d: any) => d.id), + ...page3.data.map((d: any) => d.id), + ]; + expect(new Set(allIds).size).toBe(5); + } + ); + + postgresTest( + "List with eventType filter", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + // Create entries with different event types + const run1 = await createTaskRun(prisma, env, "task-alpha"); + const run2 = await createTaskRun(prisma, env, "task-beta"); + const run3 = await createTaskRun(prisma, env, "task-gamma"); + + await createDeadLetterEvent(prisma, env, run1, { eventType: "order.created" }); + await createDeadLetterEvent(prisma, env, run2, { eventType: "user.signed_up" }); + await createDeadLetterEvent(prisma, env, run3, { eventType: "order.created" }); + + const service = new DeadLetterManagementService(prisma); + + const orderEvents = await service.list({ + projectId: env.projectId, + environmentId: env.id, + eventType: "order.created", + }); + + expect(orderEvents.data).toHaveLength(2); + expect(orderEvents.data.every((d: any) => d.eventType === "order.created")).toBe(true); + + const userEvents = await service.list({ + projectId: env.projectId, + environmentId: env.id, + eventType: "user.signed_up", + }); + + expect(userEvents.data).toHaveLength(1); + expect(userEvents.data[0].eventType).toBe("user.signed_up"); + } + ); + + postgresTest( + "List with status filter", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const run1 = await createTaskRun(prisma, env, "task-p1"); + const run2 = await createTaskRun(prisma, env, "task-p2"); + const run3 = await createTaskRun(prisma, env, "task-r1"); + + await createDeadLetterEvent(prisma, env, run1, { status: "PENDING" }); + await createDeadLetterEvent(prisma, env, run2, { status: "PENDING" }); + await createDeadLetterEvent(prisma, env, run3, { status: "RETRIED" }); + + const service = new DeadLetterManagementService(prisma); + + const pendingOnly = await service.list({ + projectId: env.projectId, + environmentId: env.id, + status: "PENDING", + }); + + expect(pendingOnly.data).toHaveLength(2); + expect(pendingOnly.data.every((d: any) => d.status === "PENDING")).toBe(true); + + const retriedOnly = await service.list({ + projectId: env.projectId, + environmentId: env.id, + status: "RETRIED", + }); + + expect(retriedOnly.data).toHaveLength(1); + expect(retriedOnly.data[0].status).toBe("RETRIED"); + } + ); + + postgresTest( + "Discard marks entry as DISCARDED", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const run = await createTaskRun(prisma, env, "discard-task"); + const dle = await createDeadLetterEvent(prisma, env, run, { + eventType: "invoice.failed", + status: "PENDING", + }); + + const service = new DeadLetterManagementService(prisma); + + const result = await service.discard(dle.id, env); + + expect(result.id).toBe(dle.id); + expect(result.status).toBe("DISCARDED"); + + // Verify in DB + const updated = await prisma.deadLetterEvent.findUnique({ + where: { id: dle.id }, + }); + + expect(updated).toBeDefined(); + expect(updated!.status).toBe("DISCARDED"); + expect(updated!.processedAt).toBeDefined(); + expect(updated!.processedAt).not.toBeNull(); + } + ); + + postgresTest( + "Discard nonexistent ID returns error", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new DeadLetterManagementService(prisma); + + await expect(service.discard("dle_nonexistent_fake_id", env)).rejects.toThrow( + ServiceValidationError + ); + + await expect(service.discard("dle_nonexistent_fake_id", env)).rejects.toThrow( + "Dead letter event not found or already processed" + ); + } + ); +}); diff --git a/apps/webapp/test/engine/deadLetterService.test.ts b/apps/webapp/test/engine/deadLetterService.test.ts new file mode 100644 index 00000000000..f9a6e2722fb --- /dev/null +++ b/apps/webapp/test/engine/deadLetterService.test.ts @@ -0,0 +1,219 @@ +import { describe, expect, vi } from "vitest"; + +// Mock the db prisma client (required for webapp service imports) +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +vi.mock("~/services/platform.v3.server", async (importOriginal) => { + const actual = (await importOriginal()) as Record; + return { + ...actual, + getEntitlement: vi.fn(), + }; +}); + +import { setupAuthenticatedEnvironment } from "@internal/run-engine/tests"; +import { postgresTest } from "@internal/testcontainers"; +import { generateFriendlyId } from "@trigger.dev/core/v3/isomorphic"; +import { DeadLetterService } from "../../app/v3/services/events/deadLetterService.server"; + +vi.setConfig({ testTimeout: 120_000 }); + +/** + * Helper: create a TaskRun in the database with the given overrides. + * Returns the created TaskRun record. + */ +async function createTaskRun( + prisma: any, + env: { id: string; projectId: string; organization: { id: string } }, + overrides: { + taskIdentifier?: string; + payload?: string; + metadata?: string | null; + status?: string; + } = {} +) { + const runId = generateFriendlyId("run"); + return prisma.taskRun.create({ + data: { + id: runId, + friendlyId: runId, + number: 1, + taskIdentifier: overrides.taskIdentifier ?? "test-task", + payload: overrides.payload ?? JSON.stringify({ hello: "world" }), + payloadType: "application/json", + traceId: "trace_test_" + runId, + spanId: "span_test_" + runId, + queue: "task/test-task", + status: overrides.status ?? "COMPLETED_WITH_ERRORS", + runtimeEnvironmentId: env.id, + projectId: env.projectId, + organizationId: env.organization.id, + metadata: overrides.metadata ?? null, + engine: "V2", + }, + }); +} + +describe("DeadLetterService", () => { + postgresTest( + "Failed event-triggered run creates DLQ entry", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const eventMetadata = { + $$event: { + eventId: "evt_abc123", + eventType: "order.created", + sourceEventId: "src_evt_001", + }, + }; + + const run = await createTaskRun(prisma, env, { + taskIdentifier: "process-order", + payload: JSON.stringify({ orderId: "order_999" }), + metadata: JSON.stringify(eventMetadata), + status: "COMPLETED_WITH_ERRORS", + }); + + const service = new DeadLetterService(prisma); + await service.handleFailedRun(run, { message: "Task timed out" }); + + const dleEntries = await prisma.deadLetterEvent.findMany({ + where: { failedRunId: run.id }, + }); + + expect(dleEntries).toHaveLength(1); + expect(dleEntries[0].eventType).toBe("order.created"); + expect(dleEntries[0].taskSlug).toBe("process-order"); + } + ); + + postgresTest( + "Non-event run does NOT create DLQ entry", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + // No $$event in metadata + const run = await createTaskRun(prisma, env, { + taskIdentifier: "plain-task", + metadata: JSON.stringify({ someKey: "someValue" }), + status: "COMPLETED_WITH_ERRORS", + }); + + const service = new DeadLetterService(prisma); + await service.handleFailedRun(run, { message: "Something went wrong" }); + + const dleEntries = await prisma.deadLetterEvent.findMany({ + where: { failedRunId: run.id }, + }); + + expect(dleEntries).toHaveLength(0); + } + ); + + postgresTest( + "DLQ entry has correct fields", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const eventMetadata = { + $$event: { + eventId: "evt_field_test", + eventType: "user.signed_up", + sourceEventId: "src_evt_field", + }, + }; + + const run = await createTaskRun(prisma, env, { + taskIdentifier: "welcome-email", + payload: JSON.stringify({ userId: "usr_42", email: "test@example.com" }), + metadata: JSON.stringify(eventMetadata), + status: "COMPLETED_WITH_ERRORS", + }); + + const errorObj = { message: "SMTP timeout", code: "ETIMEOUT" }; + + const service = new DeadLetterService(prisma); + await service.handleFailedRun(run, errorObj); + + const dle = await prisma.deadLetterEvent.findFirst({ + where: { failedRunId: run.id }, + }); + + expect(dle).toBeDefined(); + expect(dle!.eventType).toBe("user.signed_up"); + expect(dle!.payload).toEqual({ userId: "usr_42", email: "test@example.com" }); + expect(dle!.error).toEqual(errorObj); + expect(dle!.taskSlug).toBe("welcome-email"); + expect(dle!.failedRunId).toBe(run.id); + expect(dle!.sourceEventId).toBe("src_evt_field"); + expect(dle!.projectId).toBe(env.projectId); + expect(dle!.environmentId).toBe(env.id); + expect(dle!.status).toBe("PENDING"); + expect(dle!.friendlyId).toMatch(/^dle_/); + } + ); + + postgresTest( + "Multiple failures create separate DLQ entries", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new DeadLetterService(prisma); + + // First failed run + const run1 = await createTaskRun(prisma, env, { + taskIdentifier: "task-a", + payload: JSON.stringify({ item: 1 }), + metadata: JSON.stringify({ + $$event: { + eventId: "evt_multi_1", + eventType: "item.created", + sourceEventId: "src_multi_1", + }, + }), + status: "COMPLETED_WITH_ERRORS", + }); + + // Second failed run + const run2 = await createTaskRun(prisma, env, { + taskIdentifier: "task-b", + payload: JSON.stringify({ item: 2 }), + metadata: JSON.stringify({ + $$event: { + eventId: "evt_multi_2", + eventType: "item.created", + sourceEventId: "src_multi_2", + }, + }), + status: "COMPLETED_WITH_ERRORS", + }); + + await service.handleFailedRun(run1, { message: "Error 1" }); + await service.handleFailedRun(run2, { message: "Error 2" }); + + const allEntries = await prisma.deadLetterEvent.findMany({ + where: { + projectId: env.projectId, + environmentId: env.id, + }, + orderBy: { createdAt: "asc" }, + }); + + expect(allEntries).toHaveLength(2); + + const entry1 = allEntries.find((e: any) => e.failedRunId === run1.id); + const entry2 = allEntries.find((e: any) => e.failedRunId === run2.id); + + expect(entry1).toBeDefined(); + expect(entry2).toBeDefined(); + expect(entry1!.taskSlug).toBe("task-a"); + expect(entry2!.taskSlug).toBe("task-b"); + expect(entry1!.sourceEventId).toBe("src_multi_1"); + expect(entry2!.sourceEventId).toBe("src_multi_2"); + } + ); +}); From 86dc70fcd507d72c2cbf9d48bea932cb248a27f6 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Tue, 3 Mar 2026 22:20:16 -0800 Subject: [PATCH 58/65] fix(events): fix typecheck errors in event routes, DLQ service, and TaskResource schema - Fix resource callback signature in history/stats loader routes (first arg is resource, not params) - Add missing onEventFilter and onEventPattern to TaskResource schema in resources.ts - Fix JSON.parse return type narrowing in DeadLetterService.extractPayload Co-Authored-By: Claude Opus 4.6 --- apps/webapp/app/routes/api.v1.events.$eventId.history.ts | 2 +- apps/webapp/app/routes/api.v1.events.$eventId.stats.ts | 2 +- apps/webapp/app/v3/services/events/deadLetterService.server.ts | 3 ++- packages/core/src/v3/schemas/resources.ts | 2 ++ 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.history.ts b/apps/webapp/app/routes/api.v1.events.$eventId.history.ts index bf8ecbcc1b3..d1593941c42 100644 --- a/apps/webapp/app/routes/api.v1.events.$eventId.history.ts +++ b/apps/webapp/app/routes/api.v1.events.$eventId.history.ts @@ -13,7 +13,7 @@ export const loader = createLoaderApiRoute( corsStrategy: "all", authorization: { action: "read", - resource: (params) => ({ tasks: params.eventId }), + resource: (_resource, params) => ({ tasks: params.eventId }), superScopes: ["read:runs", "read:all", "admin"], }, findResource: async () => 1 as const, diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.stats.ts b/apps/webapp/app/routes/api.v1.events.$eventId.stats.ts index fe914fa0c96..96873fbd69a 100644 --- a/apps/webapp/app/routes/api.v1.events.$eventId.stats.ts +++ b/apps/webapp/app/routes/api.v1.events.$eventId.stats.ts @@ -13,7 +13,7 @@ export const loader = createLoaderApiRoute( corsStrategy: "all", authorization: { action: "read", - resource: (params) => ({ tasks: params.eventId }), + resource: (_resource, params) => ({ tasks: params.eventId }), superScopes: ["read:runs", "read:all", "admin"], }, findResource: async () => 1 as const, diff --git a/apps/webapp/app/v3/services/events/deadLetterService.server.ts b/apps/webapp/app/v3/services/events/deadLetterService.server.ts index fb699eb1dab..abd03c89bd7 100644 --- a/apps/webapp/app/v3/services/events/deadLetterService.server.ts +++ b/apps/webapp/app/v3/services/events/deadLetterService.server.ts @@ -75,7 +75,8 @@ export class DeadLetterService extends BaseService { private extractPayload(run: TaskRun): object { try { if (typeof run.payload === "string") { - return JSON.parse(run.payload); + const parsed: unknown = JSON.parse(run.payload); + return typeof parsed === "object" && parsed !== null ? (parsed as object) : { raw: parsed }; } return { raw: run.payload }; } catch { diff --git a/packages/core/src/v3/schemas/resources.ts b/packages/core/src/v3/schemas/resources.ts index e965ce99579..ea9cc3d047c 100644 --- a/packages/core/src/v3/schemas/resources.ts +++ b/packages/core/src/v3/schemas/resources.ts @@ -16,6 +16,8 @@ export const TaskResource = z.object({ // JSONSchema type - using z.unknown() for runtime validation to accept JSONSchema7 payloadSchema: z.unknown().optional(), onEvent: z.string().optional(), + onEventFilter: z.unknown().optional(), + onEventPattern: z.string().optional(), onEventConsumerGroup: z.string().optional(), }); From 63a4bd34527cbd794826a6de0bc6ad44fb5d26f5 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Tue, 3 Mar 2026 22:44:51 -0800 Subject: [PATCH 59/65] =?UTF-8?q?feat(events):=20phase=204.4=20=E2=80=94?= =?UTF-8?q?=20SDK=20DLQ=20config=20per=20event?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `dlq` option to `event()` allowing per-event DLQ configuration. When `dlq.enabled` is false, failed event-triggered runs are silently discarded instead of being stored in the dead letter queue. - Add EventDLQConfig type to SDK and EventDLQManifest schema to core - Add dlqConfig JSON column to EventDefinition model - Wire config through deploy (createBackgroundWorker) and resource catalog - DeadLetterService checks config before creating DLQ entries Co-Authored-By: Claude Opus 4.6 --- .../services/createBackgroundWorker.server.ts | 2 ++ .../events/deadLetterService.server.ts | 28 +++++++++++++++++++ .../migration.sql | 2 ++ .../database/prisma/schema.prisma | 3 ++ .../core/src/v3/resource-catalog/catalog.ts | 2 ++ .../standardResourceCatalog.ts | 1 + packages/core/src/v3/schemas/schemas.ts | 9 ++++++ packages/trigger-sdk/src/v3/events.ts | 26 ++++++++++++++++- 8 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 internal-packages/database/prisma/migrations/20260304064319_add_dlq_config_to_event_definition/migration.sql diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index db99668107f..767132bc303 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -353,12 +353,14 @@ async function syncWorkerEvents( description: event.description, schema: event.schema as any ?? undefined, rateLimit: event.rateLimit as any ?? undefined, + dlqConfig: event.dlq as any ?? undefined, projectId: worker.projectId, }, update: { description: event.description, schema: event.schema as any ?? undefined, rateLimit: event.rateLimit as any ?? undefined, + dlqConfig: event.dlq as any ?? undefined, }, }); diff --git a/apps/webapp/app/v3/services/events/deadLetterService.server.ts b/apps/webapp/app/v3/services/events/deadLetterService.server.ts index abd03c89bd7..09bfc03b885 100644 --- a/apps/webapp/app/v3/services/events/deadLetterService.server.ts +++ b/apps/webapp/app/v3/services/events/deadLetterService.server.ts @@ -21,6 +21,16 @@ export class DeadLetterService extends BaseService { return; // Not an event-triggered run } + // Check if DLQ is disabled for this event type + const dlqEnabled = await this.isDLQEnabled(eventContext.eventType, run.projectId); + if (!dlqEnabled) { + logger.debug("DLQ disabled for event type, skipping", { + runId: run.id, + eventType: eventContext.eventType, + }); + return; + } + try { await this._prisma.deadLetterEvent.create({ data: { @@ -52,6 +62,24 @@ export class DeadLetterService extends BaseService { } } + private async isDLQEnabled(eventType: string, projectId: string): Promise { + try { + const eventDef = await this._prisma.eventDefinition.findFirst({ + where: { slug: eventType, projectId }, + select: { dlqConfig: true }, + }); + + if (!eventDef?.dlqConfig) { + return true; // Default: DLQ enabled + } + + const config = eventDef.dlqConfig as Record; + return config.enabled !== false; + } catch { + return true; // On error, default to enabled + } + } + private extractEventContext(run: TaskRun): EventContext | null { if (!run.metadata) return null; diff --git a/internal-packages/database/prisma/migrations/20260304064319_add_dlq_config_to_event_definition/migration.sql b/internal-packages/database/prisma/migrations/20260304064319_add_dlq_config_to_event_definition/migration.sql new file mode 100644 index 00000000000..db4ad69bb86 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260304064319_add_dlq_config_to_event_definition/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "public"."EventDefinition" ADD COLUMN "dlqConfig" JSONB; diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index fdb0d2f779b..71ab6c62604 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -604,6 +604,9 @@ model EventDefinition { // Rate limiting (Phase 7) — JSON config e.g. { "limit": 100, "window": "1m" } rateLimit Json? + // DLQ configuration (Phase 4.4) — JSON config e.g. { "enabled": false } + dlqConfig Json? + project Project @relation(fields: [projectId], references: [id], onDelete: Cascade, onUpdate: Cascade) projectId String diff --git a/packages/core/src/v3/resource-catalog/catalog.ts b/packages/core/src/v3/resource-catalog/catalog.ts index bfc309d6f4c..c04291a1654 100644 --- a/packages/core/src/v3/resource-catalog/catalog.ts +++ b/packages/core/src/v3/resource-catalog/catalog.ts @@ -11,6 +11,8 @@ export interface EventMetadata { rateLimit?: { limit: number; window: string }; /** Ordering configuration — enables per-key serialization with global concurrency limit */ ordering?: { concurrencyLimit?: number }; + /** Dead letter queue configuration */ + dlq?: { enabled?: boolean }; } export interface ResourceCatalog { diff --git a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts index 8fcef565673..aa976a44a9f 100644 --- a/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts +++ b/packages/core/src/v3/resource-catalog/standardResourceCatalog.ts @@ -192,6 +192,7 @@ export class StandardResourceCatalog implements ResourceCatalog { description: event.description, rateLimit: event.rateLimit, ordering: event.ordering, + dlq: event.dlq, })); } diff --git a/packages/core/src/v3/schemas/schemas.ts b/packages/core/src/v3/schemas/schemas.ts index 183ca41b28e..f03f4faed9b 100644 --- a/packages/core/src/v3/schemas/schemas.ts +++ b/packages/core/src/v3/schemas/schemas.ts @@ -190,6 +190,13 @@ export const EventOrderingManifest = z.object({ export type EventOrderingManifest = z.infer; +export const EventDLQManifest = z.object({ + /** Whether to store failed event-triggered runs in the DLQ (default: true) */ + enabled: z.boolean().optional(), +}); + +export type EventDLQManifest = z.infer; + export const EventManifest = z.object({ /** Unique event identifier (e.g. "order.created") */ id: z.string(), @@ -203,6 +210,8 @@ export const EventManifest = z.object({ rateLimit: EventRateLimitManifest.optional(), /** Ordering configuration — creates a dedicated queue with per-key serialization */ ordering: EventOrderingManifest.optional(), + /** Dead letter queue configuration */ + dlq: EventDLQManifest.optional(), }); export type EventManifest = z.infer; diff --git a/packages/trigger-sdk/src/v3/events.ts b/packages/trigger-sdk/src/v3/events.ts index 7cb26c4af35..965fb94e5f9 100644 --- a/packages/trigger-sdk/src/v3/events.ts +++ b/packages/trigger-sdk/src/v3/events.ts @@ -51,6 +51,19 @@ export interface EventOptions( export function createEvent( options: EventOptions ): EventDefinition { - const { id, schema, description, version = "1.0", rateLimit, ordering } = options; + const { id, schema, description, version = "1.0", rateLimit, ordering, dlq } = options; // Build the parse function if a schema is provided let parseFn: SchemaParseFn | undefined; @@ -318,6 +341,7 @@ export function createEvent Date: Tue, 3 Mar 2026 22:47:18 -0800 Subject: [PATCH 60/65] =?UTF-8?q?feat(events):=20stale=20subscription=20cl?= =?UTF-8?q?eanup=20=E2=80=94=20daily=20cron=20job=20in=20admin=20worker?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add CleanupStaleSubscriptionsService that finds disabled EventSubscriptions whose associated task no longer exists in any active worker, and deletes them. Runs daily at 3 AM UTC via the admin worker cron. Co-Authored-By: Claude Opus 4.6 --- .../app/v3/services/adminWorker.server.ts | 13 +++++ .../cleanupStaleSubscriptions.server.ts | 58 +++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 apps/webapp/app/v3/services/events/cleanupStaleSubscriptions.server.ts diff --git a/apps/webapp/app/v3/services/adminWorker.server.ts b/apps/webapp/app/v3/services/adminWorker.server.ts index 97c94b954f0..aa52c83fb93 100644 --- a/apps/webapp/app/v3/services/adminWorker.server.ts +++ b/apps/webapp/app/v3/services/adminWorker.server.ts @@ -8,6 +8,8 @@ import { singleton } from "~/utils/singleton"; import { tracer } from "../tracer.server"; import { $replica } from "~/db.server"; import { RunsBackfillerService } from "../../services/runsBackfiller.server"; +import { CleanupStaleSubscriptionsService } from "./events/cleanupStaleSubscriptions.server"; +import { prisma } from "~/db.server"; function initializeWorker() { const redisOptions = { @@ -26,6 +28,13 @@ function initializeWorker() { name: "admin-worker", redisOptions, catalog: { + "admin.cleanupStaleSubscriptions": { + schema: z.object({}), + visibilityTimeoutMs: 60_000 * 5, // 5 minutes + retry: { maxAttempts: 3 }, + cron: "0 3 * * *", // Daily at 3 AM UTC + jitterInMs: 60_000, // 1 minute jitter + }, "admin.backfillRunsToReplication": { schema: z.object({ from: z.coerce.date(), @@ -50,6 +59,10 @@ function initializeWorker() { shutdownTimeoutMs: env.ADMIN_WORKER_SHUTDOWN_TIMEOUT_MS, logger: new Logger("AdminWorker", env.ADMIN_WORKER_LOG_LEVEL), jobs: { + "admin.cleanupStaleSubscriptions": async () => { + const service = new CleanupStaleSubscriptionsService(prisma); + await service.call(); + }, "admin.backfillRunsToReplication": async ({ payload, id }) => { if (!runsReplicationInstance) { logger.error("Runs replication instance not found"); diff --git a/apps/webapp/app/v3/services/events/cleanupStaleSubscriptions.server.ts b/apps/webapp/app/v3/services/events/cleanupStaleSubscriptions.server.ts new file mode 100644 index 00000000000..f9e973a9259 --- /dev/null +++ b/apps/webapp/app/v3/services/events/cleanupStaleSubscriptions.server.ts @@ -0,0 +1,58 @@ +import { PrismaClientOrTransaction } from "~/db.server"; +import { logger } from "~/services/logger.server"; + +/** + * Cleans up stale EventSubscriptions — disabled subscriptions whose associated + * task no longer exists in any active worker for that environment. + */ +export class CleanupStaleSubscriptionsService { + constructor(private readonly _prisma: PrismaClientOrTransaction) {} + + async call(): Promise<{ deletedCount: number; scannedCount: number }> { + // Find all disabled subscriptions + const disabledSubscriptions = await this._prisma.eventSubscription.findMany({ + where: { enabled: false }, + select: { + id: true, + taskSlug: true, + projectId: true, + environmentId: true, + }, + }); + + if (disabledSubscriptions.length === 0) { + return { deletedCount: 0, scannedCount: 0 }; + } + + // For each disabled subscription, check if ANY active worker still has that task + const idsToDelete: string[] = []; + + for (const sub of disabledSubscriptions) { + const taskExists = await this._prisma.backgroundWorkerTask.findFirst({ + where: { + slug: sub.taskSlug, + projectId: sub.projectId, + runtimeEnvironmentId: sub.environmentId, + }, + select: { id: true }, + }); + + if (!taskExists) { + idsToDelete.push(sub.id); + } + } + + if (idsToDelete.length > 0) { + await this._prisma.eventSubscription.deleteMany({ + where: { id: { in: idsToDelete } }, + }); + } + + logger.info("Cleaned up stale event subscriptions", { + deletedCount: idsToDelete.length, + scannedCount: disabledSubscriptions.length, + }); + + return { deletedCount: idsToDelete.length, scannedCount: disabledSubscriptions.length }; + } +} From d53ec0bedd3e4d0d43dbcb098f21f7d27dfe255e Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Tue, 3 Mar 2026 22:53:56 -0800 Subject: [PATCH 61/65] =?UTF-8?q?feat(events):=20CLI=20commands=20?= =?UTF-8?q?=E2=80=94=20history,=20replay,=20dlq=20list,=20dlq=20retry?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add remaining event CLI commands: - `trigger events history ` — paginated event publish history - `trigger events replay ` — replay historical events to subscribers - `trigger events dlq list` — list dead letter queue entries - `trigger events dlq retry ` — retry a specific DLQ entry Adds corresponding CliApiClient methods: getEventHistory, replayEvents, listDeadLetterEvents, retryDeadLetterEvent. Co-Authored-By: Claude Opus 4.6 --- packages/cli-v3/src/apiClient.ts | 123 ++++++++++ packages/cli-v3/src/commands/events/dlq.ts | 220 ++++++++++++++++++ .../cli-v3/src/commands/events/history.ts | 130 +++++++++++ packages/cli-v3/src/commands/events/index.ts | 6 + packages/cli-v3/src/commands/events/replay.ts | 123 ++++++++++ 5 files changed, 602 insertions(+) create mode 100644 packages/cli-v3/src/commands/events/dlq.ts create mode 100644 packages/cli-v3/src/commands/events/history.ts create mode 100644 packages/cli-v3/src/commands/events/replay.ts diff --git a/packages/cli-v3/src/apiClient.ts b/packages/cli-v3/src/apiClient.ts index 67921204b35..ce7a4a776d8 100644 --- a/packages/cli-v3/src/apiClient.ts +++ b/packages/cli-v3/src/apiClient.ts @@ -42,6 +42,10 @@ import { RemoteBuildProviderStatusResponseBody, ListEventsResponseBody, PublishEventResponseBody, + GetEventHistoryResponseBody, + ReplayEventsResponseBody, + ListDeadLetterEventsResponseBody, + RetryDeadLetterEventResponseBody, } from "@trigger.dev/core/v3"; import { WorkloadDebugLogRequestBody, @@ -604,6 +608,125 @@ export class CliApiClient { ); } + async getEventHistory( + projectRef: string, + eventId: string, + options?: { + from?: string; + to?: string; + limit?: number; + cursor?: string; + } + ) { + if (!this.accessToken) { + throw new Error("getEventHistory: No access token"); + } + + const encodedEventId = encodeURIComponent(eventId); + const params = new URLSearchParams(); + if (options?.from) params.set("from", options.from); + if (options?.to) params.set("to", options.to); + if (options?.limit) params.set("limit", String(options.limit)); + if (options?.cursor) params.set("cursor", options.cursor); + const qs = params.toString(); + + return wrapZodFetch( + GetEventHistoryResponseBody, + `${this.apiURL}/api/v1/events/${encodedEventId}/history${qs ? `?${qs}` : ""}`, + { + method: "GET", + headers: { + ...this.getHeaders(), + "x-trigger-project-ref": projectRef, + }, + } + ); + } + + async replayEvents( + projectRef: string, + eventId: string, + body: { + from: string; + to: string; + filter?: unknown; + tasks?: string[]; + dryRun?: boolean; + } + ) { + if (!this.accessToken) { + throw new Error("replayEvents: No access token"); + } + + const encodedEventId = encodeURIComponent(eventId); + + return wrapZodFetch( + ReplayEventsResponseBody, + `${this.apiURL}/api/v1/events/${encodedEventId}/replay`, + { + method: "POST", + headers: { + ...this.getHeaders(), + "x-trigger-project-ref": projectRef, + }, + body: JSON.stringify(body), + } + ); + } + + async listDeadLetterEvents( + projectRef: string, + options?: { + eventType?: string; + status?: string; + limit?: number; + cursor?: string; + } + ) { + if (!this.accessToken) { + throw new Error("listDeadLetterEvents: No access token"); + } + + const params = new URLSearchParams(); + if (options?.eventType) params.set("eventType", options.eventType); + if (options?.status) params.set("status", options.status); + if (options?.limit) params.set("limit", String(options.limit)); + if (options?.cursor) params.set("cursor", options.cursor); + const qs = params.toString(); + + return wrapZodFetch( + ListDeadLetterEventsResponseBody, + `${this.apiURL}/api/v1/events/dlq${qs ? `?${qs}` : ""}`, + { + method: "GET", + headers: { + ...this.getHeaders(), + "x-trigger-project-ref": projectRef, + }, + } + ); + } + + async retryDeadLetterEvent(projectRef: string, id: string) { + if (!this.accessToken) { + throw new Error("retryDeadLetterEvent: No access token"); + } + + const encodedId = encodeURIComponent(id); + + return wrapZodFetch( + RetryDeadLetterEventResponseBody, + `${this.apiURL}/api/v1/events/dlq/${encodedId}/retry`, + { + method: "POST", + headers: { + ...this.getHeaders(), + "x-trigger-project-ref": projectRef, + }, + } + ); + } + get dev() { return { config: this.devConfig.bind(this), diff --git a/packages/cli-v3/src/commands/events/dlq.ts b/packages/cli-v3/src/commands/events/dlq.ts new file mode 100644 index 00000000000..50532931211 --- /dev/null +++ b/packages/cli-v3/src/commands/events/dlq.ts @@ -0,0 +1,220 @@ +import { Command } from "commander"; +import { z } from "zod"; +import { + CommonCommandOptions, + commonOptions, + handleTelemetry, + wrapCommandAction, +} from "../../cli/common.js"; +import { printInitialBanner } from "../../utilities/initialBanner.js"; +import { isLoggedIn } from "../../utilities/session.js"; +import { loadConfig } from "../../config.js"; +import { resolveLocalEnvVars } from "../../utilities/localEnvVars.js"; +import { CliApiClient } from "../../apiClient.js"; +import { intro, outro } from "@clack/prompts"; +import { spinner } from "../../utilities/windows.js"; +import { logger } from "../../utilities/logger.js"; +import { tryCatch } from "@trigger.dev/core"; + +// --- dlq list --- + +const DlqListOptions = CommonCommandOptions.extend({ + config: z.string().optional(), + projectRef: z.string().optional(), + envFile: z.string().optional(), + eventType: z.string().optional(), + status: z.string().optional(), + limit: z.coerce.number().int().optional(), + cursor: z.string().optional(), +}); + +type DlqListOptions = z.infer; + +function configureDlqListCommand(program: Command) { + return commonOptions( + program + .command("list") + .description("List dead letter queue entries") + .option("-c, --config ", "The name of the config file") + .option("-p, --project-ref ", "The project ref") + .option("--env-file ", "Path to the .env file") + .option("--event-type ", "Filter by event type") + .option("--status ", "Filter by status (PENDING, RETRIED, DISCARDED)") + .option("--limit ", "Max results (default 50, max 200)") + .option("--cursor ", "Pagination cursor from previous response") + ).action(async (options) => { + await handleTelemetry(async () => { + await printInitialBanner(false, options.profile); + await dlqListCommand(options); + }); + }); +} + +async function dlqListCommand(options: unknown) { + return await wrapCommandAction("dlqListCommand", DlqListOptions, options, async (opts) => { + return await _dlqListCommand(opts); + }); +} + +async function _dlqListCommand(options: DlqListOptions) { + intro("Dead letter queue"); + + const envVars = resolveLocalEnvVars(options.envFile); + + const authentication = await isLoggedIn(options.profile); + if (!authentication.ok) { + outro(`Not logged in. Use \`trigger login\` first.`); + return; + } + + const [configError, resolvedConfig] = await tryCatch( + loadConfig({ + overrides: { project: options.projectRef ?? envVars.TRIGGER_PROJECT_REF }, + configFile: options.config, + warn: false, + }) + ); + + if (configError || !resolvedConfig?.project) { + outro("Could not resolve project. Use --project-ref or configure trigger.config.ts."); + return; + } + + const loadingSpinner = spinner(); + loadingSpinner.start("Fetching DLQ entries..."); + + const apiClient = new CliApiClient(authentication.auth.apiUrl, authentication.auth.accessToken); + const result = await apiClient.listDeadLetterEvents(resolvedConfig.project, { + eventType: options.eventType, + status: options.status, + limit: options.limit, + cursor: options.cursor, + }); + + if (!result.success) { + loadingSpinner.stop("Failed to fetch DLQ entries"); + logger.error(result.error); + return; + } + + const { data, pagination } = result.data; + loadingSpinner.stop(`Found ${data.length} DLQ entry/entries`); + + if (data.length === 0) { + outro("No dead letter entries found."); + return; + } + + logger.table( + data.map((entry) => ({ + id: entry.friendlyId, + eventType: entry.eventType, + task: entry.taskSlug, + status: entry.status, + attempts: String(entry.attemptCount), + created: entry.createdAt, + })) + ); + + if (pagination.hasMore && pagination.cursor) { + logger.info(`\nMore results available. Use --cursor ${pagination.cursor} to see next page.`); + } +} + +// --- dlq retry --- + +const DlqRetryOptions = CommonCommandOptions.extend({ + config: z.string().optional(), + projectRef: z.string().optional(), + envFile: z.string().optional(), +}); + +type DlqRetryOptions = z.infer; + +function configureDlqRetryCommand(program: Command) { + return commonOptions( + program + .command("retry ") + .description("Retry a dead letter queue entry") + .option("-c, --config ", "The name of the config file") + .option("-p, --project-ref ", "The project ref") + .option("--env-file ", "Path to the .env file") + ).action(async (id: string, options) => { + await handleTelemetry(async () => { + await printInitialBanner(false, options.profile); + await dlqRetryCommand({ ...options, id }); + }); + }); +} + +const DlqRetryCommandInput = DlqRetryOptions.extend({ + id: z.string(), +}); + +type DlqRetryCommandInput = z.infer; + +async function dlqRetryCommand(options: unknown) { + return await wrapCommandAction( + "dlqRetryCommand", + DlqRetryCommandInput, + options, + async (opts) => { + return await _dlqRetryCommand(opts); + } + ); +} + +async function _dlqRetryCommand(options: DlqRetryCommandInput) { + intro(`Retrying DLQ entry "${options.id}"`); + + const envVars = resolveLocalEnvVars(options.envFile); + + const authentication = await isLoggedIn(options.profile); + if (!authentication.ok) { + outro(`Not logged in. Use \`trigger login\` first.`); + return; + } + + const [configError, resolvedConfig] = await tryCatch( + loadConfig({ + overrides: { project: options.projectRef ?? envVars.TRIGGER_PROJECT_REF }, + configFile: options.config, + warn: false, + }) + ); + + if (configError || !resolvedConfig?.project) { + outro("Could not resolve project. Use --project-ref or configure trigger.config.ts."); + return; + } + + const loadingSpinner = spinner(); + loadingSpinner.start("Retrying dead letter event..."); + + const apiClient = new CliApiClient(authentication.auth.apiUrl, authentication.auth.accessToken); + const result = await apiClient.retryDeadLetterEvent(resolvedConfig.project, options.id); + + if (!result.success) { + loadingSpinner.stop("Failed to retry DLQ entry"); + logger.error(result.error); + return; + } + + loadingSpinner.stop("DLQ entry retried successfully"); + + logger.info(`Status: ${result.data.status}`); + if (result.data.runId) { + logger.info(`New run ID: ${result.data.runId}`); + } +} + +// --- Main export --- + +export function configureEventsDlqCommand(program: Command) { + const dlq = program.command("dlq").description("Manage the dead letter queue"); + + configureDlqListCommand(dlq); + configureDlqRetryCommand(dlq); + + return dlq; +} diff --git a/packages/cli-v3/src/commands/events/history.ts b/packages/cli-v3/src/commands/events/history.ts new file mode 100644 index 00000000000..a6d0044660f --- /dev/null +++ b/packages/cli-v3/src/commands/events/history.ts @@ -0,0 +1,130 @@ +import { Command } from "commander"; +import { z } from "zod"; +import { + CommonCommandOptions, + commonOptions, + handleTelemetry, + wrapCommandAction, +} from "../../cli/common.js"; +import { printInitialBanner } from "../../utilities/initialBanner.js"; +import { isLoggedIn } from "../../utilities/session.js"; +import { loadConfig } from "../../config.js"; +import { resolveLocalEnvVars } from "../../utilities/localEnvVars.js"; +import { CliApiClient } from "../../apiClient.js"; +import { intro, outro } from "@clack/prompts"; +import { spinner } from "../../utilities/windows.js"; +import { logger } from "../../utilities/logger.js"; +import { tryCatch } from "@trigger.dev/core"; + +const EventsHistoryOptions = CommonCommandOptions.extend({ + config: z.string().optional(), + projectRef: z.string().optional(), + envFile: z.string().optional(), + from: z.string().optional(), + to: z.string().optional(), + limit: z.coerce.number().int().optional(), + cursor: z.string().optional(), +}); + +type EventsHistoryOptions = z.infer; + +export function configureEventsHistoryCommand(program: Command) { + return commonOptions( + program + .command("history ") + .description("Show publish history for an event type") + .option("-c, --config ", "The name of the config file") + .option("-p, --project-ref ", "The project ref") + .option("--env-file ", "Path to the .env file") + .option("--from ", "Start date (ISO 8601)") + .option("--to ", "End date (ISO 8601)") + .option("--limit ", "Max results (default 50, max 200)") + .option("--cursor ", "Pagination cursor from previous response") + ).action(async (eventId: string, options) => { + await handleTelemetry(async () => { + await printInitialBanner(false, options.profile); + await eventsHistoryCommand({ ...options, eventId }); + }); + }); +} + +const EventsHistoryCommandInput = EventsHistoryOptions.extend({ + eventId: z.string(), +}); + +type EventsHistoryCommandInput = z.infer; + +async function eventsHistoryCommand(options: unknown) { + return await wrapCommandAction( + "eventsHistoryCommand", + EventsHistoryCommandInput, + options, + async (opts) => { + return await _eventsHistoryCommand(opts); + } + ); +} + +async function _eventsHistoryCommand(options: EventsHistoryCommandInput) { + intro(`Event history for "${options.eventId}"`); + + const envVars = resolveLocalEnvVars(options.envFile); + + const authentication = await isLoggedIn(options.profile); + if (!authentication.ok) { + outro(`Not logged in. Use \`trigger login\` first.`); + return; + } + + const [configError, resolvedConfig] = await tryCatch( + loadConfig({ + overrides: { project: options.projectRef ?? envVars.TRIGGER_PROJECT_REF }, + configFile: options.config, + warn: false, + }) + ); + + if (configError || !resolvedConfig?.project) { + outro("Could not resolve project. Use --project-ref or configure trigger.config.ts."); + return; + } + + const loadingSpinner = spinner(); + loadingSpinner.start("Fetching event history..."); + + const apiClient = new CliApiClient(authentication.auth.apiUrl, authentication.auth.accessToken); + const result = await apiClient.getEventHistory(resolvedConfig.project, options.eventId, { + from: options.from, + to: options.to, + limit: options.limit, + cursor: options.cursor, + }); + + if (!result.success) { + loadingSpinner.stop("Failed to fetch event history"); + logger.error(result.error); + return; + } + + const { data, pagination } = result.data; + loadingSpinner.stop(`Found ${data.length} event(s)`); + + if (data.length === 0) { + outro("No events found for the given criteria."); + return; + } + + logger.table( + data.map((evt) => ({ + published: evt.publishedAt, + eventId: evt.eventId, + fanOut: String(evt.fanOutCount), + idempotencyKey: evt.idempotencyKey ?? "-", + tags: evt.tags?.join(", ") ?? "-", + })) + ); + + if (pagination.hasMore && pagination.cursor) { + logger.info(`\nMore results available. Use --cursor ${pagination.cursor} to see next page.`); + } +} diff --git a/packages/cli-v3/src/commands/events/index.ts b/packages/cli-v3/src/commands/events/index.ts index fb55f6ed5b7..9d5d3b8fdbe 100644 --- a/packages/cli-v3/src/commands/events/index.ts +++ b/packages/cli-v3/src/commands/events/index.ts @@ -1,6 +1,9 @@ import { Command } from "commander"; +import { configureEventsDlqCommand } from "./dlq.js"; +import { configureEventsHistoryCommand } from "./history.js"; import { configureEventsListCommand } from "./list.js"; import { configureEventsPublishCommand } from "./publish.js"; +import { configureEventsReplayCommand } from "./replay.js"; export function configureEventsCommand(program: Command) { const events = program @@ -9,6 +12,9 @@ export function configureEventsCommand(program: Command) { configureEventsListCommand(events); configureEventsPublishCommand(events); + configureEventsHistoryCommand(events); + configureEventsReplayCommand(events); + configureEventsDlqCommand(events); return events; } diff --git a/packages/cli-v3/src/commands/events/replay.ts b/packages/cli-v3/src/commands/events/replay.ts new file mode 100644 index 00000000000..66423198f7f --- /dev/null +++ b/packages/cli-v3/src/commands/events/replay.ts @@ -0,0 +1,123 @@ +import { Command } from "commander"; +import { z } from "zod"; +import { + CommonCommandOptions, + commonOptions, + handleTelemetry, + wrapCommandAction, +} from "../../cli/common.js"; +import { printInitialBanner } from "../../utilities/initialBanner.js"; +import { isLoggedIn } from "../../utilities/session.js"; +import { loadConfig } from "../../config.js"; +import { resolveLocalEnvVars } from "../../utilities/localEnvVars.js"; +import { CliApiClient } from "../../apiClient.js"; +import { intro, outro } from "@clack/prompts"; +import { spinner } from "../../utilities/windows.js"; +import { logger } from "../../utilities/logger.js"; +import { tryCatch } from "@trigger.dev/core"; + +const EventsReplayOptions = CommonCommandOptions.extend({ + config: z.string().optional(), + projectRef: z.string().optional(), + envFile: z.string().optional(), + from: z.string(), + to: z.string(), + tasks: z.string().optional(), + dryRun: z.boolean().optional(), +}); + +type EventsReplayOptions = z.infer; + +export function configureEventsReplayCommand(program: Command) { + return commonOptions( + program + .command("replay ") + .description("Replay historical events to re-trigger subscriber runs") + .requiredOption("--from ", "Start date (ISO 8601)") + .requiredOption("--to ", "End date (ISO 8601)") + .option("-c, --config ", "The name of the config file") + .option("-p, --project-ref ", "The project ref") + .option("--env-file ", "Path to the .env file") + .option("--tasks ", "Comma-separated task slugs to replay to (default: all)") + .option("--dry-run", "Preview replay without triggering runs") + ).action(async (eventId: string, options) => { + await handleTelemetry(async () => { + await printInitialBanner(false, options.profile); + await eventsReplayCommand({ ...options, eventId }); + }); + }); +} + +const EventsReplayCommandInput = EventsReplayOptions.extend({ + eventId: z.string(), +}); + +type EventsReplayCommandInput = z.infer; + +async function eventsReplayCommand(options: unknown) { + return await wrapCommandAction( + "eventsReplayCommand", + EventsReplayCommandInput, + options, + async (opts) => { + return await _eventsReplayCommand(opts); + } + ); +} + +async function _eventsReplayCommand(options: EventsReplayCommandInput) { + intro(`Replaying events for "${options.eventId}"`); + + const envVars = resolveLocalEnvVars(options.envFile); + + const authentication = await isLoggedIn(options.profile); + if (!authentication.ok) { + outro(`Not logged in. Use \`trigger login\` first.`); + return; + } + + const [configError, resolvedConfig] = await tryCatch( + loadConfig({ + overrides: { project: options.projectRef ?? envVars.TRIGGER_PROJECT_REF }, + configFile: options.config, + warn: false, + }) + ); + + if (configError || !resolvedConfig?.project) { + outro("Could not resolve project. Use --project-ref or configure trigger.config.ts."); + return; + } + + const loadingSpinner = spinner(); + loadingSpinner.start(options.dryRun ? "Running dry-run replay..." : "Replaying events..."); + + const apiClient = new CliApiClient(authentication.auth.apiUrl, authentication.auth.accessToken); + const result = await apiClient.replayEvents(resolvedConfig.project, options.eventId, { + from: options.from, + to: options.to, + tasks: options.tasks ? options.tasks.split(",").map((t: string) => t.trim()) : undefined, + dryRun: options.dryRun, + }); + + if (!result.success) { + loadingSpinner.stop("Failed to replay events"); + logger.error(result.error); + return; + } + + const { replayedCount, skippedCount, dryRun, runs } = result.data; + loadingSpinner.stop(dryRun ? "Dry run complete" : "Replay complete"); + + logger.info(`Replayed: ${replayedCount}, Skipped: ${skippedCount}${dryRun ? " (dry run)" : ""}`); + + if (runs && runs.length > 0) { + logger.table( + runs.map((run) => ({ + task: run.taskIdentifier, + runId: run.runId, + sourceEvent: run.sourceEventId, + })) + ); + } +} From bd74a50bb736a96ebbd353856c386c94542d6dd7 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Tue, 3 Mar 2026 22:57:23 -0800 Subject: [PATCH 62/65] test(events): add Redis-backed rate limiter integration tests Add 3 redisTest integration tests for RedisEventRateLimitChecker: - allows requests within limit - blocks requests exceeding limit - isolates keys from each other Uses @internal/testcontainers redisTest fixture with ioredis adapter. Total: 14 tests (11 existing + 3 new Redis tests), all passing. Co-Authored-By: Claude Opus 4.6 --- .../test/engine/eventRateLimiter.test.ts | 104 ++++++++++++++++++ 1 file changed, 104 insertions(+) diff --git a/apps/webapp/test/engine/eventRateLimiter.test.ts b/apps/webapp/test/engine/eventRateLimiter.test.ts index c175ee6db34..985ca5da744 100644 --- a/apps/webapp/test/engine/eventRateLimiter.test.ts +++ b/apps/webapp/test/engine/eventRateLimiter.test.ts @@ -1,6 +1,9 @@ import { describe, expect, test } from "vitest"; +import { redisTest } from "@internal/testcontainers"; +import { createRedisClient } from "@internal/redis"; import { InMemoryEventRateLimitChecker, + RedisEventRateLimitChecker, windowToMs, parseEventRateLimitConfig, } from "../../app/v3/services/events/eventRateLimiter.server"; @@ -112,3 +115,104 @@ describe("InMemoryEventRateLimitChecker", () => { expect(afterReset.allowed).toBe(true); }); }); + +// ─── RedisEventRateLimitChecker (needs Redis container) ─── + +function createUpstashAdapter(redis: ReturnType) { + return { + sadd: async (key: string, ...members: TData[]): Promise => { + return redis.sadd(key, members as (string | number | Buffer)[]); + }, + hset: ( + key: string, + obj: { + [key: string]: TValue; + } + ): Promise => { + return redis.hset(key, obj); + }, + eval: ( + ...args: [script: string, keys: string[], args: TArgs] + ): Promise => { + const script = args[0]; + const keys = args[1]; + const argsArray = args[2]; + return redis.eval( + script, + keys.length, + ...keys, + ...(argsArray as (string | Buffer | number)[]) + ) as Promise; + }, + }; +} + +redisTest( + "RedisEventRateLimitChecker allows requests within limit", + { timeout: 30_000 }, + async ({ redisOptions }) => { + const redis = createRedisClient("test:rateLimiter", redisOptions); + + try { + const checker = new RedisEventRateLimitChecker(createUpstashAdapter(redis)); + const config = { limit: 3, window: "10s" }; + + const r1 = await checker.check("redis-key-1", config); + expect(r1.allowed).toBe(true); + + const r2 = await checker.check("redis-key-1", config); + expect(r2.allowed).toBe(true); + + const r3 = await checker.check("redis-key-1", config); + expect(r3.allowed).toBe(true); + } finally { + redis.disconnect(); + } + } +); + +redisTest( + "RedisEventRateLimitChecker blocks requests exceeding limit", + { timeout: 30_000 }, + async ({ redisOptions }) => { + const redis = createRedisClient("test:rateLimiter", redisOptions); + + try { + const checker = new RedisEventRateLimitChecker(createUpstashAdapter(redis)); + const config = { limit: 2, window: "10s" }; + + await checker.check("redis-key-2", config); + await checker.check("redis-key-2", config); + + const result = await checker.check("redis-key-2", config); + expect(result.allowed).toBe(false); + expect(result.remaining).toBe(0); + } finally { + redis.disconnect(); + } + } +); + +redisTest( + "RedisEventRateLimitChecker isolates keys", + { timeout: 30_000 }, + async ({ redisOptions }) => { + const redis = createRedisClient("test:rateLimiter", redisOptions); + + try { + const checker = new RedisEventRateLimitChecker(createUpstashAdapter(redis)); + const config = { limit: 1, window: "10s" }; + + const r1 = await checker.check("redis-key-a", config); + expect(r1.allowed).toBe(true); + + const r2 = await checker.check("redis-key-b", config); + expect(r2.allowed).toBe(true); + + const r3 = await checker.check("redis-key-a", config); + expect(r3.allowed).toBe(false); + } finally { + redis.disconnect(); + } + } +); From ef2ec8678d4005283392dc268d9fd74026deef84 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Tue, 3 Mar 2026 23:07:53 -0800 Subject: [PATCH 63/65] feat(events): consumer-side rate limiting + metrics endpoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Consumer-side rate limiting: - Add `rateLimit` JSON field to EventSubscription model - Add `consumerRateLimit` option to TaskOptionsWithEvent in SDK - Propagate through manifest pipeline → createBackgroundWorker - PublishEventService checks per-subscriber rate limits during fan-out - Rate-limited subscribers are skipped with warning log Metrics endpoint: - Add GET /api/v1/events/:eventId/metrics for backpressure monitoring - Returns subscriber health (active/disabled/filters/rate limits) - Returns DLQ depth (pending/retried/discarded counts) - Returns event-level rate limit configuration - Add GetEventMetricsResponseBody schema Co-Authored-By: Claude Opus 4.6 --- .../routes/api.v1.events.$eventId.metrics.ts | 111 ++++++++++++++++++ .../services/createBackgroundWorker.server.ts | 2 + .../v3/services/events/publishEvent.server.ts | 27 ++++- .../migration.sql | 2 + .../database/prisma/schema.prisma | 1 + packages/core/src/v3/schemas/api.ts | 35 ++++++ packages/core/src/v3/schemas/resources.ts | 4 + packages/core/src/v3/schemas/schemas.ts | 5 + packages/core/src/v3/types/tasks.ts | 2 + packages/trigger-sdk/src/v3/shared.ts | 2 + 10 files changed, 189 insertions(+), 2 deletions(-) create mode 100644 apps/webapp/app/routes/api.v1.events.$eventId.metrics.ts create mode 100644 internal-packages/database/prisma/migrations/20260304065828_add_rate_limit_to_event_subscription/migration.sql diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.metrics.ts b/apps/webapp/app/routes/api.v1.events.$eventId.metrics.ts new file mode 100644 index 00000000000..71ec976ed04 --- /dev/null +++ b/apps/webapp/app/routes/api.v1.events.$eventId.metrics.ts @@ -0,0 +1,111 @@ +import { json } from "@remix-run/server-runtime"; +import { z } from "zod"; +import { prisma } from "~/db.server"; +import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; +import { parseEventRateLimitConfig } from "~/v3/services/events/eventRateLimiter.server"; + +const ParamsSchema = z.object({ + eventId: z.string(), +}); + +export const loader = createLoaderApiRoute( + { + params: ParamsSchema, + corsStrategy: "all", + authorization: { + action: "read", + resource: (_resource, params) => ({ tasks: params.eventId }), + superScopes: ["read:runs", "read:all", "admin"], + }, + findResource: async () => 1 as const, + }, + async ({ params, authentication }) => { + const environment = authentication.environment; + + // Find event definition + const eventDef = await prisma.eventDefinition.findFirst({ + where: { + slug: params.eventId, + projectId: environment.projectId, + }, + orderBy: { createdAt: "desc" }, + }); + + if (!eventDef) { + return json({ error: `Event "${params.eventId}" not found` }, { status: 404 }); + } + + // Get subscribers + const subscriptions = await prisma.eventSubscription.findMany({ + where: { + eventDefinitionId: eventDef.id, + environmentId: environment.id, + }, + select: { + taskSlug: true, + enabled: true, + rateLimit: true, + filter: true, + consumerGroup: true, + }, + }); + + const activeCount = subscriptions.filter((s) => s.enabled).length; + const disabledCount = subscriptions.length - activeCount; + + // Get DLQ counts + const [pendingCount, retriedCount, discardedCount] = await Promise.all([ + prisma.deadLetterEvent.count({ + where: { + eventType: params.eventId, + projectId: environment.projectId, + environmentId: environment.id, + status: "PENDING", + }, + }), + prisma.deadLetterEvent.count({ + where: { + eventType: params.eventId, + projectId: environment.projectId, + environmentId: environment.id, + status: "RETRIED", + }, + }), + prisma.deadLetterEvent.count({ + where: { + eventType: params.eventId, + projectId: environment.projectId, + environmentId: environment.id, + status: "DISCARDED", + }, + }), + ]); + + // Parse rate limit config + const rateLimitConfig = parseEventRateLimitConfig(eventDef.rateLimit); + + return json({ + eventType: params.eventId, + subscribers: { + total: subscriptions.length, + active: activeCount, + disabled: disabledCount, + list: subscriptions.map((s) => ({ + taskSlug: s.taskSlug, + enabled: s.enabled, + hasRateLimit: !!s.rateLimit, + hasFilter: !!s.filter, + consumerGroup: s.consumerGroup, + })), + }, + dlq: { + pending: pendingCount, + retried: retriedCount, + discarded: discardedCount, + }, + rateLimit: rateLimitConfig + ? { limit: rateLimitConfig.limit, window: rateLimitConfig.window } + : null, + }); + } +); diff --git a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts index 767132bc303..edd0ced9e47 100644 --- a/apps/webapp/app/v3/services/createBackgroundWorker.server.ts +++ b/apps/webapp/app/v3/services/createBackgroundWorker.server.ts @@ -460,6 +460,7 @@ async function syncWorkerEvents( filter: (task.onEventFilter as any) ?? undefined, pattern: task.onEventPattern ?? undefined, consumerGroup: task.onEventConsumerGroup ?? undefined, + rateLimit: (task.onEventConsumerRateLimit as any) ?? undefined, }, update: { workerId: worker.id, @@ -467,6 +468,7 @@ async function syncWorkerEvents( filter: (task.onEventFilter as any) ?? undefined, pattern: task.onEventPattern ?? undefined, consumerGroup: task.onEventConsumerGroup ?? undefined, + rateLimit: (task.onEventConsumerRateLimit as any) ?? undefined, }, }); diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index acfae8c9678..5836c66013a 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -278,11 +278,34 @@ export class PublishEventService extends BaseService { ); } - // 7. Fan out: trigger each matching subscribed task + // 7. Check per-subscriber rate limits and fan out const runs: PublishEventResult["runs"] = []; for (const subscription of subscriptionsToTrigger) { try { + // Check per-subscriber rate limit (if configured) + if (this._rateLimitChecker && subscription.rateLimit) { + const subRateLimitConfig = parseEventRateLimitConfig(subscription.rateLimit); + if (subRateLimitConfig) { + const subRateLimitKey = `consumer:${subscription.taskSlug}:${eventSlug}`; + const subRateLimitResult = await this._rateLimitChecker.check( + subRateLimitKey, + subRateLimitConfig + ); + if (!subRateLimitResult.allowed) { + logger.warn("Subscriber rate limit exceeded, skipping", { + eventSlug, + eventId, + taskSlug: subscription.taskSlug, + limit: subRateLimitResult.limit, + retryAfter: subRateLimitResult.retryAfter, + }); + span.setAttribute("consumerRateLimited", true); + continue; + } + } + } + // Derive per-consumer idempotency key if a global one was provided const consumerIdempotencyKey = options.idempotencyKey ? `${options.idempotencyKey}:${subscription.taskSlug}` @@ -396,7 +419,7 @@ export class PublishEventService extends BaseService { * Different eventIds distribute evenly across group members. */ private applyConsumerGroups( - subscriptions: Array<{ id: string; consumerGroup: string | null; taskSlug: string }>, + subscriptions: Array<{ id: string; consumerGroup: string | null; taskSlug: string; rateLimit: unknown }>, eventId?: string ): typeof subscriptions { const ungrouped: typeof subscriptions = []; diff --git a/internal-packages/database/prisma/migrations/20260304065828_add_rate_limit_to_event_subscription/migration.sql b/internal-packages/database/prisma/migrations/20260304065828_add_rate_limit_to_event_subscription/migration.sql new file mode 100644 index 00000000000..685af9af1b1 --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260304065828_add_rate_limit_to_event_subscription/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "public"."EventSubscription" ADD COLUMN "rateLimit" JSONB; diff --git a/internal-packages/database/prisma/schema.prisma b/internal-packages/database/prisma/schema.prisma index 71ab6c62604..d119f2ea64d 100644 --- a/internal-packages/database/prisma/schema.prisma +++ b/internal-packages/database/prisma/schema.prisma @@ -639,6 +639,7 @@ model EventSubscription { filter Json? // EventFilter (Phase 2) pattern String? // Wildcard pattern (Phase 2) consumerGroup String? // Consumer group name (Phase 5) + rateLimit Json? // Per-subscriber rate limit { limit, window } enabled Boolean @default(true) priority Int @default(0) diff --git a/packages/core/src/v3/schemas/api.ts b/packages/core/src/v3/schemas/api.ts index 65754d2f2ea..8d71f46e8dc 100644 --- a/packages/core/src/v3/schemas/api.ts +++ b/packages/core/src/v3/schemas/api.ts @@ -1859,3 +1859,38 @@ export const GetEventStatsResponseBody = z.object({ }); export type GetEventStatsResponseBody = z.infer; + +// ---- Event Metrics schemas ---- + +export const EventMetricsSubscriber = z.object({ + taskSlug: z.string(), + enabled: z.boolean(), + hasRateLimit: z.boolean(), + hasFilter: z.boolean(), + consumerGroup: z.string().nullable(), +}); + +export type EventMetricsSubscriber = z.infer; + +export const GetEventMetricsResponseBody = z.object({ + eventType: z.string(), + subscribers: z.object({ + total: z.number().int(), + active: z.number().int(), + disabled: z.number().int(), + list: z.array(EventMetricsSubscriber), + }), + dlq: z.object({ + pending: z.number().int(), + retried: z.number().int(), + discarded: z.number().int(), + }), + rateLimit: z + .object({ + limit: z.number().int(), + window: z.string(), + }) + .nullable(), +}); + +export type GetEventMetricsResponseBody = z.infer; diff --git a/packages/core/src/v3/schemas/resources.ts b/packages/core/src/v3/schemas/resources.ts index ea9cc3d047c..59fe9d0a5cd 100644 --- a/packages/core/src/v3/schemas/resources.ts +++ b/packages/core/src/v3/schemas/resources.ts @@ -19,6 +19,10 @@ export const TaskResource = z.object({ onEventFilter: z.unknown().optional(), onEventPattern: z.string().optional(), onEventConsumerGroup: z.string().optional(), + onEventConsumerRateLimit: z.object({ + limit: z.number().int().positive(), + window: z.string(), + }).optional(), }); export type TaskResource = z.infer; diff --git a/packages/core/src/v3/schemas/schemas.ts b/packages/core/src/v3/schemas/schemas.ts index f03f4faed9b..105742aa007 100644 --- a/packages/core/src/v3/schemas/schemas.ts +++ b/packages/core/src/v3/schemas/schemas.ts @@ -240,6 +240,11 @@ const taskMetadata = { onEventPattern: z.string().optional(), /** Consumer group name — within a group, only one task receives each event */ onEventConsumerGroup: z.string().optional(), + /** Per-subscriber rate limit — controls how fast this task receives events */ + onEventConsumerRateLimit: z.object({ + limit: z.number().int().positive(), + window: z.string(), + }).optional(), }; export const TaskMetadata = z.object(taskMetadata); diff --git a/packages/core/src/v3/types/tasks.ts b/packages/core/src/v3/types/tasks.ts index f36aa832fd8..3f5d005e655 100644 --- a/packages/core/src/v3/types/tasks.ts +++ b/packages/core/src/v3/types/tasks.ts @@ -424,6 +424,8 @@ export type TaskOptionsWithEvent< filter?: import("../schemas/eventFilter.js").EventFilter; /** Consumer group — within a group, only one task receives each event */ consumerGroup?: string; + /** Per-subscriber rate limit — controls how fast this task receives events */ + consumerRateLimit?: { limit: number; window: string }; }; declare const __output: unique symbol; diff --git a/packages/trigger-sdk/src/v3/shared.ts b/packages/trigger-sdk/src/v3/shared.ts index bc59134aac1..f6dc6c63f3d 100644 --- a/packages/trigger-sdk/src/v3/shared.ts +++ b/packages/trigger-sdk/src/v3/shared.ts @@ -250,6 +250,7 @@ export function createTask< const onEventFilter = "filter" in params && params.filter ? params.filter : undefined; const onEventPattern = eventSource && "pattern" in eventSource ? eventSource.pattern : undefined; const onEventConsumerGroup = "consumerGroup" in params && params.consumerGroup ? params.consumerGroup as string : undefined; + const onEventConsumerRateLimit = "consumerRateLimit" in params && params.consumerRateLimit ? params.consumerRateLimit as { limit: number; window: string } : undefined; resourceCatalog.registerTaskMetadata({ id: params.id, @@ -263,6 +264,7 @@ export function createTask< onEventFilter, onEventPattern, onEventConsumerGroup, + onEventConsumerRateLimit, fns: { run: params.run, }, From 6d5533bc9b4a8c32f4fadb454c4e7a1b5b19f079 Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Tue, 3 Mar 2026 23:43:15 -0800 Subject: [PATCH 64/65] fix(events): ClickHouse DateTime64 trailing 'Z' bug + ReplayEventsService tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix ClickHouse DateTime64(3) parameter parsing — strip trailing 'Z' from ISO strings since DateTime64 without timezone specifier rejects it. Applied to ReplayEventsService and history endpoint. Add 6 integration tests for ReplayEventsService using containerTest (Postgres + Redis + ClickHouse testcontainers): - No events in range returns 0 - Dry run returns count without publishing - Replays events and triggers subscriber runs - EventFilter narrows replayed events - Malformed payloads are skipped gracefully - Tags from original events are preserved Co-Authored-By: Claude Opus 4.6 --- .../routes/api.v1.events.$eventId.history.ts | 10 +- .../v3/services/events/replayEvents.server.ts | 4 +- apps/webapp/test/engine/replayEvents.test.ts | 616 ++++++++++++++++++ 3 files changed, 625 insertions(+), 5 deletions(-) create mode 100644 apps/webapp/test/engine/replayEvents.test.ts diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.history.ts b/apps/webapp/app/routes/api.v1.events.$eventId.history.ts index d1593941c42..cef55becc3a 100644 --- a/apps/webapp/app/routes/api.v1.events.$eventId.history.ts +++ b/apps/webapp/app/routes/api.v1.events.$eventId.history.ts @@ -40,11 +40,13 @@ export const loader = createLoaderApiRoute( }); if (from) { - queryBuilder.where("published_at >= {from: DateTime64(3)}", { from }); + queryBuilder.where("published_at >= {from: DateTime64(3)}", { + from: from.replace("Z", ""), + }); } if (to) { - queryBuilder.where("published_at <= {to: DateTime64(3)}", { to }); + queryBuilder.where("published_at <= {to: DateTime64(3)}", { to: to.replace("Z", "") }); } if (publisherRunId) { @@ -52,7 +54,9 @@ export const loader = createLoaderApiRoute( } if (cursor) { - queryBuilder.where("published_at < {cursor: DateTime64(3)}", { cursor }); + queryBuilder.where("published_at < {cursor: DateTime64(3)}", { + cursor: cursor.replace("Z", ""), + }); } queryBuilder.orderBy("published_at DESC, event_id DESC").limit(limit + 1); diff --git a/apps/webapp/app/v3/services/events/replayEvents.server.ts b/apps/webapp/app/v3/services/events/replayEvents.server.ts index 1f6f978baf1..4a54990f972 100644 --- a/apps/webapp/app/v3/services/events/replayEvents.server.ts +++ b/apps/webapp/app/v3/services/events/replayEvents.server.ts @@ -66,10 +66,10 @@ export class ReplayEventsService extends BaseService { eventType: params.eventSlug, }) .where("published_at >= {from: DateTime64(3)}", { - from: params.from.toISOString(), + from: params.from.toISOString().replace("Z", ""), }) .where("published_at <= {to: DateTime64(3)}", { - to: params.to.toISOString(), + to: params.to.toISOString().replace("Z", ""), }) .orderBy("published_at ASC, event_id ASC") .limit(MAX_REPLAY_EVENTS); diff --git a/apps/webapp/test/engine/replayEvents.test.ts b/apps/webapp/test/engine/replayEvents.test.ts new file mode 100644 index 00000000000..bb66d4e3bfb --- /dev/null +++ b/apps/webapp/test/engine/replayEvents.test.ts @@ -0,0 +1,616 @@ +import { describe, expect, vi } from "vitest"; + +// Mock the db prisma client (required for webapp service imports) +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +vi.mock("~/services/platform.v3.server", async (importOriginal) => { + const actual = (await importOriginal()) as Record; + return { + ...actual, + getEntitlement: vi.fn(), + }; +}); + +import { RunEngine } from "@internal/run-engine"; +import { setupAuthenticatedEnvironment, setupBackgroundWorker } from "@internal/run-engine/tests"; +import { containerTest } from "@internal/testcontainers"; +import { trace } from "@opentelemetry/api"; +import { IOPacket } from "@trigger.dev/core/v3"; +import { TaskRun } from "@trigger.dev/database"; +import { ClickHouse } from "@internal/clickhouse"; +import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; +import { DefaultQueueManager } from "~/runEngine/concerns/queues.server"; +import { + EntitlementValidationParams, + MaxAttemptsValidationParams, + ParentRunValidationParams, + PayloadProcessor, + TagValidationParams, + TracedEventSpan, + TraceEventConcern, + TriggerRacepoints, + TriggerRacepointSystem, + TriggerTaskRequest, + TriggerTaskValidator, + ValidationResult, +} from "~/runEngine/types"; +import { RunEngineTriggerTaskService } from "../../app/runEngine/services/triggerTask.server"; +import { type TriggerFn } from "../../app/v3/services/events/publishEvent.server"; +import { ReplayEventsService } from "../../app/v3/services/events/replayEvents.server"; + +vi.setConfig({ testTimeout: 120_000 }); + +class MockPayloadProcessor implements PayloadProcessor { + async process(request: TriggerTaskRequest): Promise { + return { + data: JSON.stringify(request.body.payload), + dataType: "application/json", + }; + } +} + +class MockTriggerTaskValidator implements TriggerTaskValidator { + validateTags(params: TagValidationParams): ValidationResult { + return { ok: true }; + } + validateEntitlement(params: EntitlementValidationParams): Promise { + return Promise.resolve({ ok: true }); + } + validateMaxAttempts(params: MaxAttemptsValidationParams): ValidationResult { + return { ok: true }; + } + validateParentRun(params: ParentRunValidationParams): ValidationResult { + return { ok: true }; + } +} + +class MockTraceEventConcern implements TraceEventConcern { + async traceRun( + request: TriggerTaskRequest, + parentStore: string | undefined, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } + + async traceIdempotentRun( + request: TriggerTaskRequest, + parentStore: string | undefined, + options: { + existingRun: TaskRun; + idempotencyKey: string; + incomplete: boolean; + isError: boolean; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } + + async traceDebouncedRun( + request: TriggerTaskRequest, + parentStore: string | undefined, + options: { + existingRun: TaskRun; + debounceKey: string; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } +} + +function createEngine(prisma: any, redisOptions: any) { + return new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); +} + +function createTriggerTaskService(prisma: any, engine: RunEngine) { + const traceEventConcern = new MockTraceEventConcern(); + return new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern(prisma, engine, traceEventConcern), + validator: new MockTriggerTaskValidator(), + traceEventConcern, + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024, + }); +} + +function buildTriggerFn(prisma: any, engine: RunEngine): TriggerFn { + const svc = createTriggerTaskService(prisma, engine); + return async (taskId, environment, body, options) => { + return svc.call({ + taskId, + environment, + body, + options, + }); + }; +} + +/** Format a Date for ClickHouse DateTime64(3) — strip trailing 'Z' from ISO string */ +function toClickHouseDateTime(date: Date): string { + return date.toISOString().replace("Z", ""); +} + +/** Insert test events directly into ClickHouse event_log_v1 */ +async function insertTestEvents( + clickhouse: ClickHouse, + events: Array<{ + event_id: string; + event_type: string; + payload: unknown; + published_at: Date; + environment_id: string; + project_id: string; + organization_id: string; + tags?: string[]; + }> +) { + const insert = clickhouse.eventLog.insert; + for (const event of events) { + const [err] = await insert({ + event_id: event.event_id, + event_type: event.event_type, + payload: JSON.stringify(event.payload), + payload_type: "application/json", + published_at: toClickHouseDateTime(event.published_at), + environment_id: event.environment_id, + project_id: event.project_id, + organization_id: event.organization_id, + publisher_run_id: "", + idempotency_key: "", + tags: event.tags ?? [], + metadata: "{}", + fan_out_count: 1, + }); + if (err) { + throw new Error(`Failed to insert test event: ${err.message}`); + } + } +} + +describe("ReplayEventsService", () => { + containerTest( + "replay returns 0 when no events exist in date range", + async ({ prisma, redisOptions, clickhouseContainer }) => { + const engine = createEngine(prisma, redisOptions); + const clickhouse = new ClickHouse({ url: clickhouseContainer.getConnectionUrl() }); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new ReplayEventsService(clickhouse, prisma); + + const result = await service.call({ + eventSlug: "order.created", + environment: env, + from: new Date("2026-01-01"), + to: new Date("2026-01-02"), + }); + + expect(result.replayedCount).toBe(0); + expect(result.skippedCount).toBe(0); + expect(result.dryRun).toBe(false); + } finally { + await engine.quit(); + await clickhouse.close(); + } + } + ); + + containerTest( + "dry run returns count without actually publishing", + async ({ prisma, redisOptions, clickhouseContainer }) => { + const engine = createEngine(prisma, redisOptions); + const clickhouse = new ClickHouse({ url: clickhouseContainer.getConnectionUrl() }); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + // Insert test events into ClickHouse + await insertTestEvents(clickhouse, [ + { + event_id: "evt_dry_1", + event_type: "order.created", + payload: { orderId: "o1" }, + published_at: new Date("2026-01-15T10:00:00Z"), + environment_id: env.id, + project_id: env.projectId, + organization_id: env.organizationId, + }, + { + event_id: "evt_dry_2", + event_type: "order.created", + payload: { orderId: "o2" }, + published_at: new Date("2026-01-15T11:00:00Z"), + environment_id: env.id, + project_id: env.projectId, + organization_id: env.organizationId, + }, + ]); + + const service = new ReplayEventsService(clickhouse, prisma); + + const result = await service.call({ + eventSlug: "order.created", + environment: env, + from: new Date("2026-01-01"), + to: new Date("2026-02-01"), + dryRun: true, + }); + + expect(result.dryRun).toBe(true); + expect(result.replayedCount).toBe(2); + expect(result.skippedCount).toBe(0); + expect(result.runs).toBeUndefined(); + } finally { + await engine.quit(); + await clickhouse.close(); + } + } + ); + + containerTest( + "replay re-publishes events and triggers subscriber runs", + async ({ prisma, redisOptions, clickhouseContainer }) => { + const engine = createEngine(prisma, redisOptions); + const clickhouse = new ClickHouse({ url: clickhouseContainer.getConnectionUrl() }); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, "order-handler"); + + // Create event definition and subscription + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "order.created", + version: "1.0", + projectId: env.projectId, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "order-handler", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + // Insert test events into ClickHouse + await insertTestEvents(clickhouse, [ + { + event_id: "evt_replay_1", + event_type: "order.created", + payload: { orderId: "o1", amount: 100 }, + published_at: new Date("2026-01-15T10:00:00Z"), + environment_id: env.id, + project_id: env.projectId, + organization_id: env.organizationId, + }, + ]); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new ReplayEventsService(clickhouse, prisma, triggerFn); + + const result = await service.call({ + eventSlug: "order.created", + environment: env, + from: new Date("2026-01-01"), + to: new Date("2026-02-01"), + }); + + expect(result.replayedCount).toBe(1); + expect(result.skippedCount).toBe(0); + expect(result.dryRun).toBe(false); + expect(result.runs).toHaveLength(1); + expect(result.runs![0]!.taskIdentifier).toBe("order-handler"); + expect(result.runs![0]!.sourceEventId).toBe("evt_replay_1"); + } finally { + await engine.quit(); + await clickhouse.close(); + } + } + ); + + containerTest( + "replay applies EventFilter to narrow events", + async ({ prisma, redisOptions, clickhouseContainer }) => { + const engine = createEngine(prisma, redisOptions); + const clickhouse = new ClickHouse({ url: clickhouseContainer.getConnectionUrl() }); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, "filtered-handler"); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "order.created", + version: "1.0", + projectId: env.projectId, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "filtered-handler", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + // Insert events: one matches filter, one does not + await insertTestEvents(clickhouse, [ + { + event_id: "evt_f1", + event_type: "order.created", + payload: { status: "paid", amount: 500 }, + published_at: new Date("2026-01-15T10:00:00Z"), + environment_id: env.id, + project_id: env.projectId, + organization_id: env.organizationId, + }, + { + event_id: "evt_f2", + event_type: "order.created", + payload: { status: "pending", amount: 50 }, + published_at: new Date("2026-01-15T11:00:00Z"), + environment_id: env.id, + project_id: env.projectId, + organization_id: env.organizationId, + }, + ]); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new ReplayEventsService(clickhouse, prisma, triggerFn); + + const result = await service.call({ + eventSlug: "order.created", + environment: env, + from: new Date("2026-01-01"), + to: new Date("2026-02-01"), + filter: { status: ["paid"] }, + }); + + expect(result.replayedCount).toBe(1); + expect(result.skippedCount).toBe(1); + expect(result.runs).toHaveLength(1); + expect(result.runs![0]!.sourceEventId).toBe("evt_f1"); + } finally { + await engine.quit(); + await clickhouse.close(); + } + } + ); + + containerTest( + "replay skips events with malformed payloads", + async ({ prisma, redisOptions, clickhouseContainer }) => { + const engine = createEngine(prisma, redisOptions); + const clickhouse = new ClickHouse({ url: clickhouseContainer.getConnectionUrl() }); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, "bad-payload-handler"); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "malformed.event", + version: "1.0", + projectId: env.projectId, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "bad-payload-handler", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + // Insert one good and one malformed event directly via raw ClickHouse client + const insert = clickhouse.eventLog.insert; + await insert({ + event_id: "evt_good", + event_type: "malformed.event", + payload: JSON.stringify({ valid: true }), + payload_type: "application/json", + published_at: toClickHouseDateTime(new Date("2026-01-15T10:00:00Z")), + environment_id: env.id, + project_id: env.projectId, + organization_id: env.organizationId, + publisher_run_id: "", + idempotency_key: "", + tags: [], + metadata: "{}", + fan_out_count: 1, + }); + await insert({ + event_id: "evt_bad", + event_type: "malformed.event", + payload: "NOT_VALID_JSON{{{", + payload_type: "application/json", + published_at: toClickHouseDateTime(new Date("2026-01-15T11:00:00Z")), + environment_id: env.id, + project_id: env.projectId, + organization_id: env.organizationId, + publisher_run_id: "", + idempotency_key: "", + tags: [], + metadata: "{}", + fan_out_count: 1, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new ReplayEventsService(clickhouse, prisma, triggerFn); + + const result = await service.call({ + eventSlug: "malformed.event", + environment: env, + from: new Date("2026-01-01"), + to: new Date("2026-02-01"), + }); + + // The good event should replay, the bad one should be caught by the try/catch + expect(result.replayedCount).toBe(1); + expect(result.runs).toHaveLength(1); + expect(result.runs![0]!.sourceEventId).toBe("evt_good"); + } finally { + await engine.quit(); + await clickhouse.close(); + } + } + ); + + containerTest( + "replay preserves tags from original events", + async ({ prisma, redisOptions, clickhouseContainer }) => { + const engine = createEngine(prisma, redisOptions); + const clickhouse = new ClickHouse({ url: clickhouseContainer.getConnectionUrl() }); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, "tagged-handler"); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "tagged.event", + version: "1.0", + projectId: env.projectId, + }, + }); + + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "tagged-handler", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + await insertTestEvents(clickhouse, [ + { + event_id: "evt_tagged_1", + event_type: "tagged.event", + payload: { data: "hello" }, + published_at: new Date("2026-01-15T10:00:00Z"), + environment_id: env.id, + project_id: env.projectId, + organization_id: env.organizationId, + tags: ["region:us", "priority:high"], + }, + ]); + + // Use a mock triggerFn to capture what gets passed through + let capturedTags: string[] | undefined; + const mockTriggerFn: TriggerFn = async (taskId, environment, body, options) => { + capturedTags = body.options?.tags as string[] | undefined; + return { + run: { + id: "run_internal_1", + friendlyId: "run_mock_1", + }, + }; + }; + + const service = new ReplayEventsService(clickhouse, prisma, mockTriggerFn); + + const result = await service.call({ + eventSlug: "tagged.event", + environment: env, + from: new Date("2026-01-01"), + to: new Date("2026-02-01"), + }); + + expect(result.replayedCount).toBe(1); + expect(result.runs).toHaveLength(1); + expect(result.runs![0]!.sourceEventId).toBe("evt_tagged_1"); + // Verify tags were passed through to the trigger function + expect(capturedTags).toEqual(["region:us", "priority:high"]); + } finally { + await engine.quit(); + await clickhouse.close(); + } + } + ); +}); From 7493b863e88e0f6b8d4937f2ae7057209b6f454b Mon Sep 17 00:00:00 2001 From: Giovanni Borgogno Date: Wed, 4 Mar 2026 20:27:35 -0800 Subject: [PATCH 65/65] =?UTF-8?q?fix(events):=20address=20all=20audit=20fi?= =?UTF-8?q?ndings=20=E2=80=94=20security,=20prod-readiness,=20tests,=20doc?= =?UTF-8?q?s?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit HIGH fixes: - Change changeset bump levels from patch to minor (new feature) - EventLogWriter: use logger.error instead of logger.warn for ClickHouse failures - Log warning when InMemory rate limiter used in production (no RATE_LIMIT_REDIS_HOST) - Fix sendEmail naming collision in SKILL.md docs MEDIUM fixes: - Document consumerRateLimit, metrics endpoint, and DLQ config in events.md - Add z.enum() validation on DLQ status query param (was unvalidated cast) - Add limit param validation (Math.max/min) on DLQ list route - Add fail-open try/catch around rate limiter checks (Redis down → allow publish) - Add defensive safety comment on ClickHouse interval interpolation in stats route Tests added: - DLQ retry: nonexistent ID, already-discarded, retryAll empty - SchemaRegistry DB: registerSchema, upsert, getSchema latest/versioned/null, listSchemas - PublishEvent: payload size limit (512KB → 413), per-subscriber rate limit skipping LOW fixes: - Remove dead EventPatternSource type from core - Make CleanupStaleSubscriptionsService extend BaseService - Drop redundant 2-col EventSubscription index (3-col supersedes it) - Parallelize fan-out with Promise.allSettled for better throughput Co-Authored-By: Claude Opus 4.6 --- .changeset/event-dead-letter-queue.md | 4 +- .changeset/event-docs-cli-dx.md | 4 +- .changeset/event-observability-dx.md | 8 +- .changeset/event-ordering-consumer-groups.md | 6 +- .changeset/event-persistence-replay.md | 4 +- .changeset/event-publish-and-wait.md | 6 +- .changeset/event-rate-limiting.md | 8 +- .changeset/event-schema-registry.md | 6 +- .changeset/event-smart-routing.md | 6 +- .claude/skills/trigger-dev-tasks/SKILL.md | 2 +- .../routes/api.v1.events.$eventId.stats.ts | 4 +- apps/webapp/app/routes/api.v1.events.dlq.ts | 22 +- .../cleanupStaleSubscriptions.server.ts | 5 +- .../services/events/eventLogWriter.server.ts | 4 +- .../events/eventRateLimiterGlobal.server.ts | 8 +- .../v3/services/events/publishEvent.server.ts | 157 +++++++------ .../test/engine/deadLetterManagement.test.ts | 59 +++++ apps/webapp/test/engine/publishEvent.test.ts | 98 ++++++++ .../test/engine/schemaRegistryDb.test.ts | 212 ++++++++++++++++++ .../migration.sql | 2 + packages/core/src/v3/types/tasks.ts | 6 - rules/4.4.0/events.md | 38 ++++ 22 files changed, 558 insertions(+), 111 deletions(-) create mode 100644 apps/webapp/test/engine/schemaRegistryDb.test.ts create mode 100644 internal-packages/database/prisma/migrations/20260305000000_drop_redundant_event_subscription_idx/migration.sql diff --git a/.changeset/event-dead-letter-queue.md b/.changeset/event-dead-letter-queue.md index a6b718dfcc9..11e7abc918d 100644 --- a/.changeset/event-dead-letter-queue.md +++ b/.changeset/event-dead-letter-queue.md @@ -1,6 +1,6 @@ --- -"@trigger.dev/core": patch -"trigger.dev": patch +"@trigger.dev/core": minor +"trigger.dev": minor --- Add dead letter queue for failed event-triggered task runs diff --git a/.changeset/event-docs-cli-dx.md b/.changeset/event-docs-cli-dx.md index 648f0217d7c..3c7f7d71474 100644 --- a/.changeset/event-docs-cli-dx.md +++ b/.changeset/event-docs-cli-dx.md @@ -1,6 +1,6 @@ --- -"@trigger.dev/sdk": patch -"trigger.dev": patch +"@trigger.dev/sdk": minor +"trigger.dev": minor --- Add event system documentation, CLI commands, and developer experience improvements. diff --git a/.changeset/event-observability-dx.md b/.changeset/event-observability-dx.md index b2689f5ac5c..8083686d69d 100644 --- a/.changeset/event-observability-dx.md +++ b/.changeset/event-observability-dx.md @@ -1,8 +1,8 @@ --- -"@trigger.dev/core": patch -"@trigger.dev/sdk": patch -"@internal/clickhouse": patch -"apps-webapp": patch +"@trigger.dev/core": minor +"@trigger.dev/sdk": minor +"@internal/clickhouse": minor +"apps-webapp": minor --- Add observability and developer experience improvements to the event system. diff --git a/.changeset/event-ordering-consumer-groups.md b/.changeset/event-ordering-consumer-groups.md index 586b2347ba5..0d81c2b7a1c 100644 --- a/.changeset/event-ordering-consumer-groups.md +++ b/.changeset/event-ordering-consumer-groups.md @@ -1,7 +1,7 @@ --- -"@trigger.dev/core": patch -"@trigger.dev/sdk": patch -"trigger.dev": patch +"@trigger.dev/core": minor +"@trigger.dev/sdk": minor +"trigger.dev": minor --- Add ordering keys and consumer groups for event subscriptions diff --git a/.changeset/event-persistence-replay.md b/.changeset/event-persistence-replay.md index 80c455dcce4..bcee0ce320f 100644 --- a/.changeset/event-persistence-replay.md +++ b/.changeset/event-persistence-replay.md @@ -1,6 +1,6 @@ --- -"@trigger.dev/core": patch -"trigger.dev": patch +"@trigger.dev/core": minor +"trigger.dev": minor --- Add event persistence in ClickHouse and replay API for pub/sub events diff --git a/.changeset/event-publish-and-wait.md b/.changeset/event-publish-and-wait.md index c4a94ec8cf5..21a410ad3dd 100644 --- a/.changeset/event-publish-and-wait.md +++ b/.changeset/event-publish-and-wait.md @@ -1,7 +1,7 @@ --- -"@trigger.dev/core": patch -"@trigger.dev/sdk": patch -"apps-webapp": patch +"@trigger.dev/core": minor +"@trigger.dev/sdk": minor +"apps-webapp": minor --- Add publishAndWait support to the event system. Events can now be published diff --git a/.changeset/event-rate-limiting.md b/.changeset/event-rate-limiting.md index ede7a7b75ff..54b9ddf58ce 100644 --- a/.changeset/event-rate-limiting.md +++ b/.changeset/event-rate-limiting.md @@ -1,8 +1,8 @@ --- -"@trigger.dev/core": patch -"@trigger.dev/sdk": patch -"@trigger.dev/database": patch -"apps-webapp": patch +"@trigger.dev/core": minor +"@trigger.dev/sdk": minor +"@trigger.dev/database": minor +"apps-webapp": minor --- Add per-event rate limiting to the pub/sub system. Events can now be configured diff --git a/.changeset/event-schema-registry.md b/.changeset/event-schema-registry.md index 4ae5318e773..d431e98e6db 100644 --- a/.changeset/event-schema-registry.md +++ b/.changeset/event-schema-registry.md @@ -1,7 +1,7 @@ --- -"@trigger.dev/core": patch -"@trigger.dev/sdk": patch -"trigger.dev": patch +"@trigger.dev/core": minor +"@trigger.dev/sdk": minor +"trigger.dev": minor --- Add event schema registry with versioning, validation, and discovery API endpoints diff --git a/.changeset/event-smart-routing.md b/.changeset/event-smart-routing.md index 366d4ac06a5..ae27e2025ad 100644 --- a/.changeset/event-smart-routing.md +++ b/.changeset/event-smart-routing.md @@ -1,7 +1,7 @@ --- -"@trigger.dev/core": patch -"@trigger.dev/sdk": patch -"trigger.dev": patch +"@trigger.dev/core": minor +"@trigger.dev/sdk": minor +"trigger.dev": minor --- Add smart routing for events: content-based filters and wildcard pattern subscriptions diff --git a/.claude/skills/trigger-dev-tasks/SKILL.md b/.claude/skills/trigger-dev-tasks/SKILL.md index 3be21348e45..eab4f771d6f 100644 --- a/.claude/skills/trigger-dev-tasks/SKILL.md +++ b/.claude/skills/trigger-dev-tasks/SKILL.md @@ -199,7 +199,7 @@ export const orderCreated = event({ }); // Subscribe task — payload typed from schema -export const sendEmail = task({ +export const sendOrderEmail = task({ id: "send-order-email", on: orderCreated, run: async (payload) => { diff --git a/apps/webapp/app/routes/api.v1.events.$eventId.stats.ts b/apps/webapp/app/routes/api.v1.events.$eventId.stats.ts index 96873fbd69a..e07bdf4b04f 100644 --- a/apps/webapp/app/routes/api.v1.events.$eventId.stats.ts +++ b/apps/webapp/app/routes/api.v1.events.$eventId.stats.ts @@ -22,7 +22,9 @@ export const loader = createLoaderApiRoute( const url = new URL(request.url); const period = url.searchParams.get("period") ?? "24h"; - // Parse period to a ClickHouse interval + // SAFETY: interval is NOT user input — it comes from a closed allowlist below. + // Invalid periods are rejected with 400 before the value is used in the query. + // This is safe from SQL injection because only hardcoded strings can reach the query. const intervalMap: Record = { "1h": "1 HOUR", "6h": "6 HOUR", diff --git a/apps/webapp/app/routes/api.v1.events.dlq.ts b/apps/webapp/app/routes/api.v1.events.dlq.ts index 695f63d5569..fc5f1c0c12b 100644 --- a/apps/webapp/app/routes/api.v1.events.dlq.ts +++ b/apps/webapp/app/routes/api.v1.events.dlq.ts @@ -1,7 +1,10 @@ import { json } from "@remix-run/server-runtime"; +import { z } from "zod"; import { createLoaderApiRoute } from "~/services/routeBuilders/apiBuilder.server"; import { DeadLetterManagementService } from "~/v3/services/events/deadLetterManagement.server"; +const DeadLetterStatusEnum = z.enum(["PENDING", "RETRIED", "DISCARDED"]); + export const loader = createLoaderApiRoute( { corsStrategy: "all", @@ -15,14 +18,17 @@ export const loader = createLoaderApiRoute( async ({ authentication, request }) => { const url = new URL(request.url); const eventType = url.searchParams.get("eventType") ?? undefined; - const status = url.searchParams.get("status") as - | "PENDING" - | "RETRIED" - | "DISCARDED" - | undefined; - const limit = url.searchParams.get("limit") - ? parseInt(url.searchParams.get("limit")!, 10) - : undefined; + const rawStatus = url.searchParams.get("status"); + const statusParse = rawStatus ? DeadLetterStatusEnum.safeParse(rawStatus) : undefined; + if (rawStatus && (!statusParse || !statusParse.success)) { + return json( + { error: `Invalid status "${rawStatus}". Use: PENDING, RETRIED, DISCARDED` }, + { status: 400 } + ); + } + const status = statusParse?.success ? statusParse.data : undefined; + const rawLimit = url.searchParams.get("limit"); + const limit = rawLimit ? Math.max(1, Math.min(parseInt(rawLimit, 10) || 20, 200)) : undefined; const cursor = url.searchParams.get("cursor") ?? undefined; const service = new DeadLetterManagementService(); diff --git a/apps/webapp/app/v3/services/events/cleanupStaleSubscriptions.server.ts b/apps/webapp/app/v3/services/events/cleanupStaleSubscriptions.server.ts index f9e973a9259..856171005ef 100644 --- a/apps/webapp/app/v3/services/events/cleanupStaleSubscriptions.server.ts +++ b/apps/webapp/app/v3/services/events/cleanupStaleSubscriptions.server.ts @@ -1,12 +1,11 @@ -import { PrismaClientOrTransaction } from "~/db.server"; import { logger } from "~/services/logger.server"; +import { BaseService } from "../baseService.server"; /** * Cleans up stale EventSubscriptions — disabled subscriptions whose associated * task no longer exists in any active worker for that environment. */ -export class CleanupStaleSubscriptionsService { - constructor(private readonly _prisma: PrismaClientOrTransaction) {} +export class CleanupStaleSubscriptionsService extends BaseService { async call(): Promise<{ deletedCount: number; scannedCount: number }> { // Find all disabled subscriptions diff --git a/apps/webapp/app/v3/services/events/eventLogWriter.server.ts b/apps/webapp/app/v3/services/events/eventLogWriter.server.ts index b118384b791..cd26ad39153 100644 --- a/apps/webapp/app/v3/services/events/eventLogWriter.server.ts +++ b/apps/webapp/app/v3/services/events/eventLogWriter.server.ts @@ -27,7 +27,7 @@ export function writeEventLog(entry: EventLogEntry): void { insertFn(row).then( ([error]) => { if (error) { - logger.warn("Failed to insert event into ClickHouse event log", { + logger.error("Failed to insert event into ClickHouse event log", { eventId: entry.eventId, eventType: entry.eventType, error: error.message, @@ -35,7 +35,7 @@ export function writeEventLog(entry: EventLogEntry): void { } }, (err) => { - logger.warn("Failed to insert event into ClickHouse event log", { + logger.error("Failed to insert event into ClickHouse event log", { eventId: entry.eventId, eventType: entry.eventType, error: err instanceof Error ? err.message : String(err), diff --git a/apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts b/apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts index 91f84dd9a9f..6e5f433199f 100644 --- a/apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts +++ b/apps/webapp/app/v3/services/events/eventRateLimiterGlobal.server.ts @@ -35,6 +35,12 @@ function initializeRateLimitChecker(): EventRateLimitChecker { return new RedisEventRateLimitChecker(redisClient); } - logger.info("Event rate limiter: using in-memory implementation (no RATE_LIMIT_REDIS_HOST)"); + if (env.NODE_ENV === "production") { + logger.warn( + "Event rate limiter: using in-memory implementation in production (no RATE_LIMIT_REDIS_HOST). Rate limits will be per-process and ineffective across multiple instances." + ); + } else { + logger.info("Event rate limiter: using in-memory implementation (no RATE_LIMIT_REDIS_HOST)"); + } return new InMemoryEventRateLimitChecker(); } diff --git a/apps/webapp/app/v3/services/events/publishEvent.server.ts b/apps/webapp/app/v3/services/events/publishEvent.server.ts index 5836c66013a..2cc274c45c0 100644 --- a/apps/webapp/app/v3/services/events/publishEvent.server.ts +++ b/apps/webapp/app/v3/services/events/publishEvent.server.ts @@ -141,15 +141,25 @@ export class PublishEventService extends BaseService { const rateLimitConfig = parseEventRateLimitConfig(eventDefinition.rateLimit); if (rateLimitConfig) { const rateLimitKey = `${environment.projectId}:${eventSlug}`; - const result = await this._rateLimitChecker.check(rateLimitKey, rateLimitConfig); - if (!result.allowed) { - span.setAttribute("rateLimited", true); - throw new EventPublishRateLimitError( + try { + const result = await this._rateLimitChecker.check(rateLimitKey, rateLimitConfig); + if (!result.allowed) { + span.setAttribute("rateLimited", true); + throw new EventPublishRateLimitError( + eventSlug, + result.limit ?? rateLimitConfig.limit, + result.remaining ?? 0, + result.retryAfter ?? 0 + ); + } + } catch (error) { + if (error instanceof EventPublishRateLimitError) throw error; + // Fail open: if rate limiter backend is down, allow the publish + logger.error("Rate limiter check failed, allowing publish (fail-open)", { eventSlug, - result.limit ?? rateLimitConfig.limit, - result.remaining ?? 0, - result.retryAfter ?? 0 - ); + rateLimitKey, + error: error instanceof Error ? error.message : String(error), + }); } } } @@ -278,16 +288,14 @@ export class PublishEventService extends BaseService { ); } - // 7. Check per-subscriber rate limits and fan out - const runs: PublishEventResult["runs"] = []; - - for (const subscription of subscriptionsToTrigger) { - try { - // Check per-subscriber rate limit (if configured) - if (this._rateLimitChecker && subscription.rateLimit) { - const subRateLimitConfig = parseEventRateLimitConfig(subscription.rateLimit); - if (subRateLimitConfig) { - const subRateLimitKey = `consumer:${subscription.taskSlug}:${eventSlug}`; + // 7. Check per-subscriber rate limits and fan out (parallel) + const triggerPromises = subscriptionsToTrigger.map(async (subscription) => { + // Check per-subscriber rate limit (if configured) + if (this._rateLimitChecker && subscription.rateLimit) { + const subRateLimitConfig = parseEventRateLimitConfig(subscription.rateLimit); + if (subRateLimitConfig) { + const subRateLimitKey = `consumer:${subscription.taskSlug}:${eventSlug}`; + try { const subRateLimitResult = await this._rateLimitChecker.check( subRateLimitKey, subRateLimitConfig @@ -301,53 +309,66 @@ export class PublishEventService extends BaseService { retryAfter: subRateLimitResult.retryAfter, }); span.setAttribute("consumerRateLimited", true); - continue; + return null; } + } catch (rateLimitError) { + // Fail open: if rate limiter backend is down, deliver the event + logger.error("Consumer rate limiter check failed, delivering event (fail-open)", { + eventSlug, + eventId, + taskSlug: subscription.taskSlug, + error: + rateLimitError instanceof Error + ? rateLimitError.message + : String(rateLimitError), + }); } } + } - // Derive per-consumer idempotency key if a global one was provided - const consumerIdempotencyKey = options.idempotencyKey - ? `${options.idempotencyKey}:${subscription.taskSlug}` - : undefined; - - // Merge event context into metadata so DLQ can identify event-triggered runs - const eventMetadata = { - ...(typeof options.metadata === "object" && options.metadata !== null - ? (options.metadata as Record) - : {}), - $$event: { - eventId, - eventType: eventSlug, - sourceEventId: options.idempotencyKey - ? `${options.idempotencyKey}` - : undefined, - }, - }; - - const body: TriggerTaskRequestBody = { - payload, - context: options.context, - options: { - tags: options.tags - ? Array.isArray(options.tags) - ? options.tags - : [options.tags] - : undefined, - metadata: eventMetadata, - delay: options.delay, - concurrencyKey: options.orderingKey - ? `evt:${eventSlug}:${options.orderingKey}` - : undefined, - parentRunId: options.parentRunId, - resumeParentOnCompletion: options.parentRunId ? true : undefined, - }, - }; - - const triggerOptions: TriggerTaskServiceOptions = { - idempotencyKey: consumerIdempotencyKey, - }; + // Derive per-consumer idempotency key if a global one was provided + const consumerIdempotencyKey = options.idempotencyKey + ? `${options.idempotencyKey}:${subscription.taskSlug}` + : undefined; + + // Merge event context into metadata so DLQ can identify event-triggered runs + const eventMetadata = { + ...(typeof options.metadata === "object" && options.metadata !== null + ? (options.metadata as Record) + : {}), + $$event: { + eventId, + eventType: eventSlug, + sourceEventId: options.idempotencyKey + ? `${options.idempotencyKey}` + : undefined, + }, + }; + + const body: TriggerTaskRequestBody = { + payload, + context: options.context, + options: { + tags: options.tags + ? Array.isArray(options.tags) + ? options.tags + : [options.tags] + : undefined, + metadata: eventMetadata, + delay: options.delay, + concurrencyKey: options.orderingKey + ? `evt:${eventSlug}:${options.orderingKey}` + : undefined, + parentRunId: options.parentRunId, + resumeParentOnCompletion: options.parentRunId ? true : undefined, + }, + }; + const triggerOptions: TriggerTaskServiceOptions = { + idempotencyKey: consumerIdempotencyKey, + }; + + try { const result = await this._triggerFn( subscription.taskSlug, environment, @@ -356,14 +377,15 @@ export class PublishEventService extends BaseService { ); if (result) { - runs.push({ + return { taskIdentifier: subscription.taskSlug, runId: result.run.friendlyId, internalRunId: options.parentRunId ? result.run.id : undefined, - }); + }; } + return null; } catch (error) { - // Partial failure: log the error but continue with other subscribers + // Partial failure: log the error but don't block other subscribers logger.error("Failed to trigger task for event subscription", { eventSlug, eventId, @@ -374,6 +396,15 @@ export class PublishEventService extends BaseService { ? { name: error.name, message: error.message, stack: error.stack } : String(error), }); + return null; + } + }); + + const results = await Promise.allSettled(triggerPromises); + const runs: PublishEventResult["runs"] = []; + for (const r of results) { + if (r.status === "fulfilled" && r.value !== null) { + runs.push(r.value); } } diff --git a/apps/webapp/test/engine/deadLetterManagement.test.ts b/apps/webapp/test/engine/deadLetterManagement.test.ts index aafb3d3b97e..6355ac999ee 100644 --- a/apps/webapp/test/engine/deadLetterManagement.test.ts +++ b/apps/webapp/test/engine/deadLetterManagement.test.ts @@ -267,4 +267,63 @@ describe("DeadLetterManagementService", () => { ); } ); + + postgresTest( + "Retry nonexistent ID throws ServiceValidationError", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new DeadLetterManagementService(prisma); + + await expect(service.retry("dle_nonexistent_fake_id", env)).rejects.toThrow( + ServiceValidationError + ); + + await expect(service.retry("dle_nonexistent_fake_id", env)).rejects.toThrow( + "Dead letter event not found or already processed" + ); + } + ); + + postgresTest( + "Retry already-discarded entry throws ServiceValidationError", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const run = await createTaskRun(prisma, env, "retry-discard-task"); + await createDeadLetterEvent(prisma, env, run, { + eventType: "retry.discarded", + status: "DISCARDED", + }); + + const dle = await prisma.deadLetterEvent.findFirst({ + where: { failedRunId: run.id }, + }); + + const service = new DeadLetterManagementService(prisma); + + // Discarded entries should not be retryable + await expect(service.retry(dle!.id, env)).rejects.toThrow( + "Dead letter event not found or already processed" + ); + } + ); + + postgresTest( + "RetryAll with no pending items returns zero counts", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new DeadLetterManagementService(prisma); + + const result = await service.retryAll({ + projectId: env.projectId, + environmentId: env.id, + environment: env, + }); + + expect(result.retriedCount).toBe(0); + expect(result.failedCount).toBe(0); + } + ); }); diff --git a/apps/webapp/test/engine/publishEvent.test.ts b/apps/webapp/test/engine/publishEvent.test.ts index 5c281e816ce..5999b095f84 100644 --- a/apps/webapp/test/engine/publishEvent.test.ts +++ b/apps/webapp/test/engine/publishEvent.test.ts @@ -1519,4 +1519,102 @@ describe("PublishEventService", () => { } } ); + + containerTest( + "payload exceeding 512KB throws 413 error", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + await prisma.eventDefinition.create({ + data: { + slug: "large.payload", + version: "1.0", + projectId: env.projectId, + }, + }); + + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn); + + // Create a payload that exceeds 512KB + const largePayload = { data: "x".repeat(512 * 1024 + 1) }; + + await expect( + service.call("large.payload", env, largePayload) + ).rejects.toThrow(ServiceValidationError); + + await expect( + service.call("large.payload", env, largePayload) + ).rejects.toThrow("exceeds the 512KB limit"); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "per-subscriber rate limit skips rate-limited subscriber but delivers to others", + async ({ prisma, redisOptions }) => { + const engine = createEngine(prisma, redisOptions); + + try { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const { worker } = await setupBackgroundWorker(engine, env, [ + "limited-consumer", + "unlimited-consumer", + ]); + + const eventDef = await prisma.eventDefinition.create({ + data: { + slug: "consumer.limited", + version: "1.0", + projectId: env.projectId, + }, + }); + + // Subscriber with a very tight rate limit (1 per minute) + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "limited-consumer", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + rateLimit: { limit: 1, window: "1m" }, + }, + }); + + // Subscriber without rate limit + await prisma.eventSubscription.create({ + data: { + eventDefinition: { connect: { id: eventDef.id } }, + taskSlug: "unlimited-consumer", + project: { connect: { id: env.projectId } }, + environment: { connect: { id: env.id } }, + worker: { connect: { id: worker.id } }, + enabled: true, + }, + }); + + const rateLimitChecker = new InMemoryEventRateLimitChecker(); + const triggerFn = buildTriggerFn(prisma, engine); + const service = new PublishEventService(prisma, triggerFn, undefined, rateLimitChecker); + + // First publish: both subscribers should receive + const result1 = await service.call("consumer.limited", env, { n: 1 }); + expect(result1.runs).toHaveLength(2); + + // Second publish: limited-consumer should be skipped (rate limited) + const result2 = await service.call("consumer.limited", env, { n: 2 }); + expect(result2.runs).toHaveLength(1); + expect(result2.runs[0].taskIdentifier).toBe("unlimited-consumer"); + } finally { + await engine.quit(); + } + } + ); }); diff --git a/apps/webapp/test/engine/schemaRegistryDb.test.ts b/apps/webapp/test/engine/schemaRegistryDb.test.ts new file mode 100644 index 00000000000..081afa3a09c --- /dev/null +++ b/apps/webapp/test/engine/schemaRegistryDb.test.ts @@ -0,0 +1,212 @@ +import { describe, expect, vi } from "vitest"; + +// Mock the db prisma client (required for webapp service imports) +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +vi.mock("~/services/platform.v3.server", async (importOriginal) => { + const actual = (await importOriginal()) as Record; + return { + ...actual, + getEntitlement: vi.fn(), + }; +}); + +import { setupAuthenticatedEnvironment } from "@internal/run-engine/tests"; +import { postgresTest } from "@internal/testcontainers"; +import { SchemaRegistryService } from "../../app/v3/services/events/schemaRegistry.server"; + +vi.setConfig({ testTimeout: 120_000 }); + +describe("SchemaRegistryService DB methods", () => { + postgresTest( + "registerSchema creates a new EventDefinition", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new SchemaRegistryService(prisma); + + const result = await service.registerSchema({ + projectId: env.projectId, + eventSlug: "order.created", + version: "1.0", + schema: { + type: "object", + properties: { orderId: { type: "string" } }, + required: ["orderId"], + }, + description: "Order created event", + }); + + expect(result.eventDefinitionId).toBeDefined(); + + // Verify in DB + const dbRecord = await prisma.eventDefinition.findUnique({ + where: { id: result.eventDefinitionId }, + }); + + expect(dbRecord).toBeDefined(); + expect(dbRecord!.slug).toBe("order.created"); + expect(dbRecord!.version).toBe("1.0"); + expect(dbRecord!.description).toBe("Order created event"); + expect(dbRecord!.schema).toBeDefined(); + } + ); + + postgresTest( + "registerSchema upserts on same slug+version+project", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new SchemaRegistryService(prisma); + + // First registration + const result1 = await service.registerSchema({ + projectId: env.projectId, + eventSlug: "user.updated", + version: "1.0", + schema: { type: "object", properties: { name: { type: "string" } } }, + description: "v1 description", + }); + + // Update same slug+version+project + const result2 = await service.registerSchema({ + projectId: env.projectId, + eventSlug: "user.updated", + version: "1.0", + schema: { + type: "object", + properties: { name: { type: "string" }, age: { type: "number" } }, + }, + description: "v1 updated description", + }); + + // Same record was updated (not duplicated) + expect(result2.eventDefinitionId).toBe(result1.eventDefinitionId); + + const dbRecord = await prisma.eventDefinition.findUnique({ + where: { id: result2.eventDefinitionId }, + }); + expect(dbRecord!.description).toBe("v1 updated description"); + } + ); + + postgresTest( + "getSchema returns latest version by default", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new SchemaRegistryService(prisma); + + await service.registerSchema({ + projectId: env.projectId, + eventSlug: "order.shipped", + version: "1.0", + schema: { type: "object" }, + }); + + // Create v2 slightly later + await new Promise((r) => setTimeout(r, 10)); + await service.registerSchema({ + projectId: env.projectId, + eventSlug: "order.shipped", + version: "2.0", + schema: { type: "object", properties: { trackingId: { type: "string" } } }, + }); + + const result = await service.getSchema({ + projectId: env.projectId, + eventSlug: "order.shipped", + }); + + expect(result).toBeDefined(); + expect(result!.version).toBe("2.0"); + } + ); + + postgresTest( + "getSchema returns specific version when requested", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new SchemaRegistryService(prisma); + + await service.registerSchema({ + projectId: env.projectId, + eventSlug: "test.versioned", + version: "1.0", + schema: { type: "object" }, + description: "Version 1", + }); + + await service.registerSchema({ + projectId: env.projectId, + eventSlug: "test.versioned", + version: "2.0", + schema: { type: "object" }, + description: "Version 2", + }); + + const v1 = await service.getSchema({ + projectId: env.projectId, + eventSlug: "test.versioned", + version: "1.0", + }); + + expect(v1).toBeDefined(); + expect(v1!.version).toBe("1.0"); + expect(v1!.description).toBe("Version 1"); + } + ); + + postgresTest( + "getSchema returns null for nonexistent event", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new SchemaRegistryService(prisma); + + const result = await service.getSchema({ + projectId: env.projectId, + eventSlug: "nonexistent.event", + }); + + expect(result).toBeNull(); + } + ); + + postgresTest( + "listSchemas returns all events with subscriber counts", + async ({ prisma }) => { + const env = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new SchemaRegistryService(prisma); + + // Register two events + await service.registerSchema({ + projectId: env.projectId, + eventSlug: "event.a", + version: "1.0", + schema: { type: "object" }, + }); + + await service.registerSchema({ + projectId: env.projectId, + eventSlug: "event.b", + version: "1.0", + schema: { type: "object" }, + }); + + const result = await service.listSchemas({ + projectId: env.projectId, + }); + + expect(result).toHaveLength(2); + expect(result.map((r) => r.slug).sort()).toEqual(["event.a", "event.b"]); + // No subscriptions yet, so subscriber counts should be 0 + expect(result.every((r) => r.subscriberCount === 0)).toBe(true); + } + ); +}); diff --git a/internal-packages/database/prisma/migrations/20260305000000_drop_redundant_event_subscription_idx/migration.sql b/internal-packages/database/prisma/migrations/20260305000000_drop_redundant_event_subscription_idx/migration.sql new file mode 100644 index 00000000000..09abd0ee4ca --- /dev/null +++ b/internal-packages/database/prisma/migrations/20260305000000_drop_redundant_event_subscription_idx/migration.sql @@ -0,0 +1,2 @@ +-- DropIndex +DROP INDEX CONCURRENTLY IF EXISTS "EventSubscription_projectId_environmentId_idx"; diff --git a/packages/core/src/v3/types/tasks.ts b/packages/core/src/v3/types/tasks.ts index 3f5d005e655..39eea70e4a4 100644 --- a/packages/core/src/v3/types/tasks.ts +++ b/packages/core/src/v3/types/tasks.ts @@ -41,12 +41,6 @@ export interface EventSource { readonly version: string; } -/** A pattern-based event source (e.g., "order.*") for wildcard subscriptions */ -export interface EventPatternSource extends EventSource { - /** The wildcard pattern (e.g., "order.*", "order.#") */ - readonly pattern: string; -} - export class SubtaskUnwrapError extends Error { public readonly taskId: string; public readonly runId: string; diff --git a/rules/4.4.0/events.md b/rules/4.4.0/events.md index 5d07d1e51ea..56a39d3d779 100644 --- a/rules/4.4.0/events.md +++ b/rules/4.4.0/events.md @@ -219,6 +219,38 @@ try { } ``` +## DLQ Configuration Per Event + +Disable or configure the dead letter queue per event: + +```ts +export const ephemeralEvent = event({ + id: "ephemeral.notification", + schema: z.object({ message: z.string() }), + dlq: { enabled: false }, // Don't store failures in DLQ +}); +``` + +## Consumer Rate Limiting + +Limit how fast individual subscriber tasks consume events: + +```ts +export const rateLimitedConsumer = task({ + id: "limited-processor", + on: orderCreated, + consumerRateLimit: { + limit: 100, + window: "1m", // Max 100 events per minute for this subscriber + }, + run: async (payload) => { + // Events exceeding the rate are skipped with a warning log + }, +}); +``` + +> Consumer rate limits are per-subscriber. Events that exceed the limit are skipped (not queued). Use publish-level rate limiting to queue events instead. + ## Dead Letter Queue Events that fail after all retries are captured in a DLQ. The DLQ is managed via API: @@ -228,6 +260,12 @@ Events that fail after all retries are captured in a DLQ. The DLQ is managed via - `POST /api/v1/events/dlq/:id/discard` — discard a failed event - `POST /api/v1/events/dlq/retry-all` — retry all pending failures +## Event Metrics + +Get subscriber health, DLQ depth, and rate limit config for an event: + +- `GET /api/v1/events/:eventId/metrics` — returns subscriber counts (total/active/disabled, per-subscriber details), DLQ depth (pending/retried/discarded), and rate limit configuration + ## Event History & Replay Published events are persisted and can be replayed: