From 4508552bc63974112f35047163d3516f5d80259e Mon Sep 17 00:00:00 2001 From: waleed Date: Sat, 27 Jun 2026 17:47:44 -0700 Subject: [PATCH] fix(webhooks): run inactive deployment-version cleanup inline on deploy When a deploy activates a new version, superseded versions' webhooks are removed by a separate, best-effort CLEANUP_INACTIVE outbox event. When that event is lost/dead-letters, old-version webhooks linger as is_active orphans that fetchActiveWebhooks skips (version mismatch), so they silently stop polling (~515 webhooks across ~130 workflows in prod). Run the existing cleanupInactiveDeploymentVersions synchronously in the SYNC_ACTIVE handler, right after the active version's webhooks/schedules are registered, falling back to the deferred outbox event only if the inline pass throws. This reuses the existing guarded cleanup, which re-checks each version is still inactive before tearing anything down (so it never touches the active version) and runs strictly after registration (so a teardown failure can't block it). --- apps/sim/lib/workflows/deployment-outbox.ts | 37 ++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/apps/sim/lib/workflows/deployment-outbox.ts b/apps/sim/lib/workflows/deployment-outbox.ts index 6dcb0b9fe4a..3c4ce5e9952 100644 --- a/apps/sim/lib/workflows/deployment-outbox.ts +++ b/apps/sim/lib/workflows/deployment-outbox.ts @@ -207,9 +207,10 @@ const syncActiveSideEffects = async (rawPayload: unknown): Promise => { return } - await enqueueWorkflowInactiveDeploymentCleanup(db, { + await syncInactiveDeploymentCleanup({ workflowId: payload.workflowId, activeDeploymentVersionId: payload.deploymentVersionId, + workflow: workflowData, userId: payload.userId, requestId, }) @@ -278,6 +279,40 @@ const cleanupUndeployedSideEffects = async (rawPayload: unknown): Promise await removeMcpToolsIfStillUndeployed(payload.workflowId, requestId) } +/** + * Run inactive-version cleanup synchronously as part of the active-version sync, right + * after the active version's webhooks/schedules are registered. + * + * {@link cleanupInactiveDeploymentVersions} re-checks that each version is still inactive + * before tearing anything down, so it can never touch the now-active version. Running it + * inline — rather than only enqueueing it — closes the window where a lost + * `CLEANUP_INACTIVE` outbox event leaves superseded webhooks behind as live-but-never-polled + * `is_active` orphans. The deferred event is kept as a fallback so cleanup still retries if + * the inline pass throws, without failing the already-succeeded registration. + */ +async function syncInactiveDeploymentCleanup(params: { + workflowId: string + activeDeploymentVersionId: string + workflow: Record + userId: string + requestId: string +}): Promise { + try { + await cleanupInactiveDeploymentVersions(params) + } catch (cleanupError) { + logger.warn( + `[${params.requestId}] Inline inactive-deployment cleanup failed; deferring to outbox retry`, + cleanupError + ) + await enqueueWorkflowInactiveDeploymentCleanup(db, { + workflowId: params.workflowId, + activeDeploymentVersionId: params.activeDeploymentVersionId, + userId: params.userId, + requestId: params.requestId, + }) + } +} + async function cleanupInactiveDeploymentVersions(params: { workflowId: string activeDeploymentVersionId: string