From 468d89458abebc7a8daba26e4c37540a32f35667 Mon Sep 17 00:00:00 2001 From: Mariano Fuentes Date: Wed, 27 May 2026 13:51:59 -0400 Subject: [PATCH] fix(evidence-export): stream runs to prevent OOM on bulk export fix(evidence-export): stream runs to prevent OOM on bulk export --- .../evidence-export/evidence-data-loader.ts | 55 ++-- .../evidence-export.service.spec.ts | 12 + .../evidence-export.service.ts | 82 +++-- .../evidence-export/evidence-json-builder.ts | 53 ++++ .../evidence-export/evidence-pdf-generator.ts | 293 +++++++++--------- 5 files changed, 298 insertions(+), 197 deletions(-) diff --git a/apps/api/src/tasks/evidence-export/evidence-data-loader.ts b/apps/api/src/tasks/evidence-export/evidence-data-loader.ts index 6d0da40c56..45fa235eac 100644 --- a/apps/api/src/tasks/evidence-export/evidence-data-loader.ts +++ b/apps/api/src/tasks/evidence-export/evidence-data-loader.ts @@ -197,22 +197,38 @@ export async function loadFullAutomation({ taskId: string; header: NormalizedAutomation; }): Promise { + const runs: NormalizedEvidenceRun[] = []; + for await (const batch of streamAutomationRuns({ taskId, header })) { + runs.push(...batch); + } + return { ...header, runs }; +} + +// Yields runs in batches so callers can process incrementally without +// accumulating all runs in memory. Each batch is GC-eligible after processing. +export async function* streamAutomationRuns({ + taskId, + header, +}: { + taskId: string; + header: NormalizedAutomation; +}): AsyncGenerator { if (header.type === 'app_automation' && header.checkId) { - return loadAppAutomationRuns(taskId, header); + yield* streamAppRuns(taskId, header.checkId); + } else { + yield* streamCustomRuns(taskId, header.id); } - return loadCustomAutomationRuns(taskId, header); } -async function loadAppAutomationRuns( +async function* streamAppRuns( taskId: string, - header: NormalizedAutomation, -): Promise { - const runs: NormalizedEvidenceRun[] = []; + checkId: string, +): AsyncGenerator { let cursor: { id: string } | undefined; for (;;) { const batch = await db.integrationCheckRun.findMany({ - where: { taskId, checkId: header.checkId }, + where: { taskId, checkId }, include: { results: true, connection: { include: { provider: true } }, @@ -224,28 +240,25 @@ async function loadAppAutomationRuns( if (batch.length === 0) break; - for (const run of batch) { - runs.push(normalizeAppAutomationRun(toAppAutomationRun(run))); - } + yield batch.map((run) => + normalizeAppAutomationRun(toAppAutomationRun(run)), + ); if (batch.length < RUN_BATCH_SIZE) break; cursor = { id: batch[batch.length - 1].id }; } - - return { ...header, runs }; } -async function loadCustomAutomationRuns( +async function* streamCustomRuns( taskId: string, - header: NormalizedAutomation, -): Promise { - const runs: NormalizedEvidenceRun[] = []; + automationId: string, +): AsyncGenerator { let cursor: { id: string } | undefined; for (;;) { const batch = await db.evidenceAutomationRun.findMany({ where: { - evidenceAutomation: { id: header.id, taskId }, + evidenceAutomation: { id: automationId, taskId }, version: { not: null }, }, include: { @@ -258,15 +271,13 @@ async function loadCustomAutomationRuns( if (batch.length === 0) break; - for (const run of batch) { - runs.push(normalizeCustomAutomationRun(toCustomAutomationRun(run))); - } + yield batch.map((run) => + normalizeCustomAutomationRun(toCustomAutomationRun(run)), + ); if (batch.length < RUN_BATCH_SIZE) break; cursor = { id: batch[batch.length - 1].id }; } - - return { ...header, runs }; } // Prisma result → normalizer interface mappers (single source of truth). diff --git a/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts b/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts index ab6de156d8..8b47882611 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts @@ -87,6 +87,18 @@ jest.mock('@db', () => ({ jest.mock('./evidence-pdf-generator', () => ({ generateTaskSummaryPDF: jest.fn(() => Buffer.from('SUMMARY-PDF')), generateAutomationPDF: jest.fn(() => Buffer.from('AUTOMATION-PDF')), + generateAutomationPDFFromStream: jest.fn( + async ( + _header: unknown, + _context: unknown, + runBatches: AsyncIterable, + ) => { + for await (const _batch of runBatches) { + /* drain so underlying DB queries execute */ + } + return Buffer.from('AUTOMATION-PDF'); + }, + ), sanitizeFilename: (name: string) => name .toLowerCase() diff --git a/apps/api/src/tasks/evidence-export/evidence-export.service.ts b/apps/api/src/tasks/evidence-export/evidence-export.service.ts index cc77678d57..9781f06296 100644 --- a/apps/api/src/tasks/evidence-export/evidence-export.service.ts +++ b/apps/api/src/tasks/evidence-export/evidence-export.service.ts @@ -11,10 +11,11 @@ import type { } from './evidence-export.types'; import { generateAutomationPDF, + generateAutomationPDFFromStream, generateTaskSummaryPDF, sanitizeFilename, } from './evidence-pdf-generator'; -import { buildAutomationJson } from './evidence-json-builder'; +import { buildAutomationJson, buildAutomationJsonStream } from './evidence-json-builder'; import { appendAttachmentToArchive, createFilenameTracker, @@ -24,6 +25,7 @@ import { import { getAutomationHeaders, loadFullAutomation, + streamAutomationRuns, findTasksWithEvidence, } from './evidence-data-loader'; @@ -221,7 +223,8 @@ export class EvidenceExportService { }); } - // Loads each automation's runs individually so peak memory ≈ one automation, not all combined. + // Streams each automation's runs through PDF/JSON generation so peak memory + // is bounded by one batch of runs (~50) instead of the full automation. private async appendTaskContents(params: { archive: Archiver; headers: TaskEvidenceSummary; @@ -257,15 +260,10 @@ export class EvidenceExportService { } for (const automationHeader of headers.automations) { - const automation = await loadFullAutomation({ - taskId: headers.taskId, - header: automationHeader, - }); - - this.appendAutomationToArchive({ + await this.appendAutomationStreaming({ archive, headers, - automation, + automationHeader, folderName, options, perAutomationSubfolders, @@ -273,50 +271,70 @@ export class EvidenceExportService { } } - private appendAutomationToArchive(params: { + private async appendAutomationStreaming(params: { archive: Archiver; headers: TaskEvidenceSummary; - automation: NormalizedAutomation; + automationHeader: NormalizedAutomation; folderName: string; options: { includeRawJson?: boolean }; perAutomationSubfolders: boolean; - }): void { + }): Promise { const { archive, headers, - automation, + automationHeader, folderName, options, perAutomationSubfolders, } = params; const typePrefix = - automation.type === 'app_automation' ? 'app' : 'custom'; - const automationName = sanitizeFilename(automation.name); - const idSuffix = automation.id.slice(-8); - - const pdfBuffer = generateAutomationPDF(automation, { - organizationName: headers.organizationName, - taskTitle: headers.taskTitle, - }); - + automationHeader.type === 'app_automation' ? 'app' : 'custom'; + const automationName = sanitizeFilename(automationHeader.name); + const idSuffix = automationHeader.id.slice(-8); const basePath = perAutomationSubfolders ? `${folderName}/${typePrefix}-${automationName}-${idSuffix}` : folderName; - const pdfName = perAutomationSubfolders - ? `${basePath}/evidence.pdf` - : `${basePath}/${typePrefix}-${automationName}-${idSuffix}.pdf`; + const filePrefix = perAutomationSubfolders + ? `${basePath}/evidence` + : `${basePath}/${typePrefix}-${automationName}-${idSuffix}`; - archive.append(pdfBuffer, { name: pdfName }); + const context = { + organizationName: headers.organizationName, + taskTitle: headers.taskTitle, + }; if (options.includeRawJson) { - const jsonName = perAutomationSubfolders - ? `${basePath}/evidence.json` - : `${basePath}/${typePrefix}-${automationName}-${idSuffix}.json`; - archive.append( - Buffer.from(buildAutomationJson(headers, automation), 'utf-8'), - { name: jsonName }, + // Two independent DB cursors so neither PDF nor JSON buffers the full run set. + const pdfBuffer = await generateAutomationPDFFromStream( + automationHeader, + context, + streamAutomationRuns({ + taskId: headers.taskId, + header: automationHeader, + }), + ); + archive.append(pdfBuffer, { name: `${filePrefix}.pdf` }); + + const jsonStream = buildAutomationJsonStream({ + summary: headers, + header: automationHeader, + runBatches: streamAutomationRuns({ + taskId: headers.taskId, + header: automationHeader, + }), + }); + archive.append(jsonStream, { name: `${filePrefix}.json` }); + } else { + const pdfBuffer = await generateAutomationPDFFromStream( + automationHeader, + context, + streamAutomationRuns({ + taskId: headers.taskId, + header: automationHeader, + }), ); + archive.append(pdfBuffer, { name: `${filePrefix}.pdf` }); } } diff --git a/apps/api/src/tasks/evidence-export/evidence-json-builder.ts b/apps/api/src/tasks/evidence-export/evidence-json-builder.ts index 1f3c317d5f..e0d2914abf 100644 --- a/apps/api/src/tasks/evidence-export/evidence-json-builder.ts +++ b/apps/api/src/tasks/evidence-export/evidence-json-builder.ts @@ -1,8 +1,10 @@ +import { Readable } from 'node:stream'; import { configure as configureStringify } from 'safe-stable-stringify'; import { redactSensitiveData } from './evidence-redaction'; import type { TaskEvidenceSummary, NormalizedAutomation, + NormalizedEvidenceRun, } from './evidence-export.types'; const safeStringify = configureStringify({ @@ -11,6 +13,30 @@ const safeStringify = configureStringify({ deterministic: false, }); +function stringifyAutomationMeta(header: NormalizedAutomation): string { + return ( + safeStringify( + redactSensitiveData({ + id: header.id, + name: header.name, + type: header.type, + integrationName: header.integrationName, + totalRuns: header.totalRuns, + successfulRuns: header.successfulRuns, + failedRuns: header.failedRuns, + latestRunAt: header.latestRunAt, + }), + null, + 2, + ) ?? '{}' + ); +} + +function stringifyRun(run: NormalizedEvidenceRun): string { + const { type, automationName, automationId, ...rest } = run; + return safeStringify(redactSensitiveData(rest), null, 2) ?? '{}'; +} + export function buildAutomationJson( summary: TaskEvidenceSummary, automation: NormalizedAutomation, @@ -38,3 +64,30 @@ export function buildAutomationJson( ) ?? '{}' ); } + +export function buildAutomationJsonStream({ + summary, + header, + runBatches, +}: { + summary: TaskEvidenceSummary; + header: NormalizedAutomation; + runBatches: AsyncIterable; +}): Readable { + async function* chunks(): AsyncGenerator { + yield `{\n "automation": ${stringifyAutomationMeta(header)},\n "runs": [\n`; + + let first = true; + for await (const batch of runBatches) { + for (const run of batch) { + if (!first) yield ',\n'; + first = false; + yield ` ${stringifyRun(run)}`; + } + } + + yield `\n ],\n "exportedAt": ${JSON.stringify(summary.exportedAt.toISOString())}\n}\n`; + } + + return Readable.from(chunks(), { encoding: 'utf-8' }); +} diff --git a/apps/api/src/tasks/evidence-export/evidence-pdf-generator.ts b/apps/api/src/tasks/evidence-export/evidence-pdf-generator.ts index cde5bb2122..4196ed9e29 100644 --- a/apps/api/src/tasks/evidence-export/evidence-pdf-generator.ts +++ b/apps/api/src/tasks/evidence-export/evidence-pdf-generator.ts @@ -10,6 +10,7 @@ import stringify from 'safe-stable-stringify'; import { redactSensitiveData } from './evidence-redaction'; import type { NormalizedAutomation, + NormalizedEvidenceRun, TaskEvidenceSummary, } from './evidence-export.types'; @@ -233,18 +234,8 @@ function addPageNumbers(config: PDFConfig): void { } } -/** - * Generate PDF for a single automation - */ -export function generateAutomationPDF( - automation: NormalizedAutomation, - context: { - organizationName: string; - taskTitle: string; - }, -): Buffer { - const doc = new jsPDF(); - const config: PDFConfig = { +function createPDFConfig(doc: jsPDF): PDFConfig { + return { doc, pageWidth: doc.internal.pageSize.getWidth(), pageHeight: doc.internal.pageSize.getHeight(), @@ -254,25 +245,27 @@ export function generateAutomationPDF( defaultFontSize: 10, yPosition: 20, }; +} - // Header +function renderAutomationHeader( + config: PDFConfig, + header: NormalizedAutomation, + context: { organizationName: string; taskTitle: string }, +): void { addText(config, context.organizationName, { fontSize: 14, bold: true }); config.yPosition += config.lineHeight * 0.5; addText(config, `Task: ${context.taskTitle}`, { fontSize: 11 }); config.yPosition += config.lineHeight; - // Automation title const typeLabel = - automation.type === 'app_automation' - ? 'App Automation' - : 'Custom Automation'; - addText(config, `${typeLabel}: ${automation.name}`, { + header.type === 'app_automation' ? 'App Automation' : 'Custom Automation'; + addText(config, `${typeLabel}: ${header.name}`, { fontSize: 13, bold: true, }); - if (automation.integrationName) { - addText(config, `Integration: ${automation.integrationName}`, { + if (header.integrationName) { + addText(config, `Integration: ${header.integrationName}`, { fontSize: 10, color: [100, 100, 100], }); @@ -280,7 +273,6 @@ export function generateAutomationPDF( config.yPosition += config.lineHeight; - // Export timestamp addText(config, `Exported: ${format(new Date(), 'PPpp')}`, { fontSize: 9, color: [128, 128, 128], @@ -288,157 +280,172 @@ export function generateAutomationPDF( addSeparator(config); - // Summary section addSectionHeader(config, 'Summary'); - addText(config, `Total Runs: ${automation.totalRuns}`); - addText(config, `Successful: ${automation.successfulRuns}`); - addText(config, `Failed: ${automation.failedRuns}`); + addText(config, `Total Runs: ${header.totalRuns}`); + addText(config, `Successful: ${header.successfulRuns}`); + addText(config, `Failed: ${header.failedRuns}`); - if (automation.latestRunAt) { - addText(config, `Latest Run: ${format(automation.latestRunAt, 'PPpp')}`); + if (header.latestRunAt) { + addText(config, `Latest Run: ${format(header.latestRunAt, 'PPpp')}`); } addSeparator(config); - - // Runs section addSectionHeader(config, 'Run History'); +} - for (const run of automation.runs) { - checkPageBreak(config, config.lineHeight * 8); - - // Run header - const statusColor = getStatusColor(run.status, run.failedCount > 0); - addText(config, `Run: ${format(run.createdAt, 'PPpp')}`, { - fontSize: 10, - bold: true, - }); - addText(config, `Status: ${run.status.toUpperCase()}`, { - fontSize: 9, - color: statusColor, - }); +function renderRunToPDF(config: PDFConfig, run: NormalizedEvidenceRun): void { + checkPageBreak(config, config.lineHeight * 8); - if (run.durationMs) { - addText(config, `Duration: ${run.durationMs}ms`, { fontSize: 9 }); - } + const statusColor = getStatusColor(run.status, run.failedCount > 0); + addText(config, `Run: ${format(run.createdAt, 'PPpp')}`, { + fontSize: 10, + bold: true, + }); + addText(config, `Status: ${run.status.toUpperCase()}`, { + fontSize: 9, + color: statusColor, + }); - // Metrics - if (run.type === 'app_automation') { - addText( - config, - `Checked: ${run.totalChecked} | Passed: ${run.passedCount} | Failed: ${run.failedCount}`, - { fontSize: 9 }, - ); - } else if (run.evaluationStatus) { - addText(config, `Evaluation: ${run.evaluationStatus.toUpperCase()}`, { - fontSize: 9, - color: run.evaluationStatus === 'pass' ? [0, 128, 0] : [200, 0, 0], - }); - if (run.evaluationReason) { - addText(config, `Reason: ${run.evaluationReason}`, { - fontSize: 9, - indent: 10, - }); - } - } + if (run.durationMs) { + addText(config, `Duration: ${run.durationMs}ms`, { fontSize: 9 }); + } - // Error - if (run.error) { - config.yPosition += config.lineHeight * 0.5; - addText(config, 'Error:', { + if (run.type === 'app_automation') { + addText( + config, + `Checked: ${run.totalChecked} | Passed: ${run.passedCount} | Failed: ${run.failedCount}`, + { fontSize: 9 }, + ); + } else if (run.evaluationStatus) { + addText(config, `Evaluation: ${run.evaluationStatus.toUpperCase()}`, { + fontSize: 9, + color: run.evaluationStatus === 'pass' ? [0, 128, 0] : [200, 0, 0], + }); + if (run.evaluationReason) { + addText(config, `Reason: ${run.evaluationReason}`, { fontSize: 9, - bold: true, - color: [200, 0, 0], - }); - addText(config, run.error, { - fontSize: 8, - color: [150, 0, 0], indent: 10, }); } + } - // Output (for custom automations) - if (run.output) { - config.yPosition += config.lineHeight * 0.5; - addText(config, 'Output:', { fontSize: 9, bold: true }); - const outputText = formatJsonForPDF(run.output); - addMonospaceText(config, outputText); - } + if (run.error) { + config.yPosition += config.lineHeight * 0.5; + addText(config, 'Error:', { + fontSize: 9, + bold: true, + color: [200, 0, 0], + }); + addText(config, run.error, { + fontSize: 8, + color: [150, 0, 0], + indent: 10, + }); + } - // Logs - if (run.logs) { - config.yPosition += config.lineHeight * 0.5; - addText(config, 'Logs:', { fontSize: 9, bold: true }); - const logsText = formatJsonForPDF(run.logs); - addMonospaceText(config, logsText); - } + if (run.output) { + config.yPosition += config.lineHeight * 0.5; + addText(config, 'Output:', { fontSize: 9, bold: true }); + addMonospaceText(config, formatJsonForPDF(run.output)); + } - // Results (for app automations) - if (run.results.length > 0) { - config.yPosition += config.lineHeight * 0.5; - addText(config, `Results (${run.results.length}):`, { + if (run.logs) { + config.yPosition += config.lineHeight * 0.5; + addText(config, 'Logs:', { fontSize: 9, bold: true }); + addMonospaceText(config, formatJsonForPDF(run.logs)); + } + + if (run.results.length > 0) { + config.yPosition += config.lineHeight * 0.5; + addText(config, `Results (${run.results.length}):`, { + fontSize: 9, + bold: true, + }); + + for (const result of run.results) { + checkPageBreak(config, config.lineHeight * 5); + + const resultIcon = result.passed ? '[PASS]' : '[FAIL]'; + const resultColor: [number, number, number] = result.passed + ? [0, 100, 0] + : [180, 0, 0]; + + addText(config, `${resultIcon} ${result.title}`, { fontSize: 9, bold: true, + color: resultColor, + indent: 10, }); - for (const result of run.results) { - checkPageBreak(config, config.lineHeight * 5); - - const resultIcon = result.passed ? '[PASS]' : '[FAIL]'; - const resultColor: [number, number, number] = result.passed - ? [0, 100, 0] - : [180, 0, 0]; + addText( + config, + `Resource: ${result.resourceType}/${result.resourceId}`, + { fontSize: 8, indent: 15 }, + ); - addText(config, `${resultIcon} ${result.title}`, { - fontSize: 9, - bold: true, - color: resultColor, - indent: 10, + if (result.description) { + addText(config, result.description, { fontSize: 8, indent: 15 }); + } + if (result.severity) { + addText(config, `Severity: ${result.severity}`, { + fontSize: 8, + indent: 15, }); - - addText( - config, - `Resource: ${result.resourceType}/${result.resourceId}`, - { - fontSize: 8, - indent: 15, - }, - ); - - if (result.description) { - addText(config, result.description, { fontSize: 8, indent: 15 }); - } - - if (result.severity) { - addText(config, `Severity: ${result.severity}`, { - fontSize: 8, - indent: 15, - }); - } - - if (result.remediation) { - addText(config, `Remediation: ${result.remediation}`, { - fontSize: 8, - indent: 15, - }); - } - - if (result.evidence) { - addText(config, 'Evidence:', { fontSize: 8, bold: true, indent: 15 }); - const evidenceText = formatJsonForPDF(result.evidence); - addMonospaceText(config, evidenceText, 20); - } - - config.yPosition += config.lineHeight * 0.5; } - } + if (result.remediation) { + addText(config, `Remediation: ${result.remediation}`, { + fontSize: 8, + indent: 15, + }); + } + if (result.evidence) { + addText(config, 'Evidence:', { fontSize: 8, bold: true, indent: 15 }); + addMonospaceText(config, formatJsonForPDF(result.evidence), 20); + } - config.yPosition += config.lineHeight; - addSeparator(config); + config.yPosition += config.lineHeight * 0.5; + } } + config.yPosition += config.lineHeight; + addSeparator(config); +} + +/** + * Generate PDF for a single automation (loads all runs into memory). + * Use generateAutomationPDFFromStream for large automations. + */ +export function generateAutomationPDF( + automation: NormalizedAutomation, + context: { organizationName: string; taskTitle: string }, +): Buffer { + const config = createPDFConfig(new jsPDF()); + renderAutomationHeader(config, automation, context); + for (const run of automation.runs) { + renderRunToPDF(config, run); + } addPageNumbers(config); + return Buffer.from(config.doc.output('arraybuffer')); +} - return Buffer.from(doc.output('arraybuffer')); +/** + * Build a PDF incrementally from an async stream of run batches. + * Peak memory = one batch of runs + the jsPDF document (lightweight page objects). + */ +export async function generateAutomationPDFFromStream( + header: NormalizedAutomation, + context: { organizationName: string; taskTitle: string }, + runBatches: AsyncIterable, +): Promise { + const config = createPDFConfig(new jsPDF()); + renderAutomationHeader(config, header, context); + for await (const batch of runBatches) { + for (const run of batch) { + renderRunToPDF(config, run); + } + } + addPageNumbers(config); + return Buffer.from(config.doc.output('arraybuffer')); } /**