Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 33 additions & 22 deletions apps/api/src/tasks/evidence-export/evidence-data-loader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -197,22 +197,38 @@ export async function loadFullAutomation({
taskId: string;
header: NormalizedAutomation;
}): Promise<NormalizedAutomation> {
const runs: NormalizedEvidenceRun[] = [];
for await (const batch of streamAutomationRuns({ taskId, header })) {
runs.push(...batch);
}
return { ...header, runs };
}

// Yields runs in batches so callers can process incrementally without
// accumulating all runs in memory. Each batch is GC-eligible after processing.
export async function* streamAutomationRuns({
taskId,
header,
}: {
taskId: string;
header: NormalizedAutomation;
}): AsyncGenerator<NormalizedEvidenceRun[]> {
if (header.type === 'app_automation' && header.checkId) {
return loadAppAutomationRuns(taskId, header);
yield* streamAppRuns(taskId, header.checkId);
} else {
yield* streamCustomRuns(taskId, header.id);
}
return loadCustomAutomationRuns(taskId, header);
}

async function loadAppAutomationRuns(
async function* streamAppRuns(
taskId: string,
header: NormalizedAutomation,
): Promise<NormalizedAutomation> {
const runs: NormalizedEvidenceRun[] = [];
checkId: string,
): AsyncGenerator<NormalizedEvidenceRun[]> {
let cursor: { id: string } | undefined;

for (;;) {
const batch = await db.integrationCheckRun.findMany({
where: { taskId, checkId: header.checkId },
where: { taskId, checkId },
include: {
results: true,
connection: { include: { provider: true } },
Expand All @@ -224,28 +240,25 @@ async function loadAppAutomationRuns(

if (batch.length === 0) break;

for (const run of batch) {
runs.push(normalizeAppAutomationRun(toAppAutomationRun(run)));
}
yield batch.map((run) =>
normalizeAppAutomationRun(toAppAutomationRun(run)),
);

if (batch.length < RUN_BATCH_SIZE) break;
cursor = { id: batch[batch.length - 1].id };
}

return { ...header, runs };
}

async function loadCustomAutomationRuns(
async function* streamCustomRuns(
taskId: string,
header: NormalizedAutomation,
): Promise<NormalizedAutomation> {
const runs: NormalizedEvidenceRun[] = [];
automationId: string,
): AsyncGenerator<NormalizedEvidenceRun[]> {
let cursor: { id: string } | undefined;

for (;;) {
const batch = await db.evidenceAutomationRun.findMany({
where: {
evidenceAutomation: { id: header.id, taskId },
evidenceAutomation: { id: automationId, taskId },
version: { not: null },
},
include: {
Expand All @@ -258,15 +271,13 @@ async function loadCustomAutomationRuns(

if (batch.length === 0) break;

for (const run of batch) {
runs.push(normalizeCustomAutomationRun(toCustomAutomationRun(run)));
}
yield batch.map((run) =>
normalizeCustomAutomationRun(toCustomAutomationRun(run)),
);

if (batch.length < RUN_BATCH_SIZE) break;
cursor = { id: batch[batch.length - 1].id };
}

return { ...header, runs };
}

// Prisma result → normalizer interface mappers (single source of truth).
Expand Down
12 changes: 12 additions & 0 deletions apps/api/src/tasks/evidence-export/evidence-export.service.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,18 @@ jest.mock('@db', () => ({
jest.mock('./evidence-pdf-generator', () => ({
generateTaskSummaryPDF: jest.fn(() => Buffer.from('SUMMARY-PDF')),
generateAutomationPDF: jest.fn(() => Buffer.from('AUTOMATION-PDF')),
generateAutomationPDFFromStream: jest.fn(
async (
_header: unknown,
_context: unknown,
runBatches: AsyncIterable<unknown>,
) => {
for await (const _batch of runBatches) {
/* drain so underlying DB queries execute */
}
return Buffer.from('AUTOMATION-PDF');
},
),
sanitizeFilename: (name: string) =>
name
.toLowerCase()
Expand Down
82 changes: 50 additions & 32 deletions apps/api/src/tasks/evidence-export/evidence-export.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,11 @@ import type {
} from './evidence-export.types';
import {
generateAutomationPDF,
generateAutomationPDFFromStream,
generateTaskSummaryPDF,
sanitizeFilename,
} from './evidence-pdf-generator';
import { buildAutomationJson } from './evidence-json-builder';
import { buildAutomationJson, buildAutomationJsonStream } from './evidence-json-builder';
import {
appendAttachmentToArchive,
createFilenameTracker,
Expand All @@ -24,6 +25,7 @@ import {
import {
getAutomationHeaders,
loadFullAutomation,
streamAutomationRuns,
findTasksWithEvidence,
} from './evidence-data-loader';

Expand Down Expand Up @@ -221,7 +223,8 @@ export class EvidenceExportService {
});
}

// Loads each automation's runs individually so peak memory ≈ one automation, not all combined.
// Streams each automation's runs through PDF/JSON generation so peak memory
// is bounded by one batch of runs (~50) instead of the full automation.
private async appendTaskContents(params: {
archive: Archiver;
headers: TaskEvidenceSummary;
Expand Down Expand Up @@ -257,66 +260,81 @@ export class EvidenceExportService {
}

for (const automationHeader of headers.automations) {
const automation = await loadFullAutomation({
taskId: headers.taskId,
header: automationHeader,
});

this.appendAutomationToArchive({
await this.appendAutomationStreaming({
archive,
headers,
automation,
automationHeader,
folderName,
options,
perAutomationSubfolders,
});
}
}

private appendAutomationToArchive(params: {
private async appendAutomationStreaming(params: {
archive: Archiver;
headers: TaskEvidenceSummary;
automation: NormalizedAutomation;
automationHeader: NormalizedAutomation;
folderName: string;
options: { includeRawJson?: boolean };
perAutomationSubfolders: boolean;
}): void {
}): Promise<void> {
const {
archive,
headers,
automation,
automationHeader,
folderName,
options,
perAutomationSubfolders,
} = params;

const typePrefix =
automation.type === 'app_automation' ? 'app' : 'custom';
const automationName = sanitizeFilename(automation.name);
const idSuffix = automation.id.slice(-8);

const pdfBuffer = generateAutomationPDF(automation, {
organizationName: headers.organizationName,
taskTitle: headers.taskTitle,
});

automationHeader.type === 'app_automation' ? 'app' : 'custom';
const automationName = sanitizeFilename(automationHeader.name);
const idSuffix = automationHeader.id.slice(-8);
const basePath = perAutomationSubfolders
? `${folderName}/${typePrefix}-${automationName}-${idSuffix}`
: folderName;
const pdfName = perAutomationSubfolders
? `${basePath}/evidence.pdf`
: `${basePath}/${typePrefix}-${automationName}-${idSuffix}.pdf`;
const filePrefix = perAutomationSubfolders
? `${basePath}/evidence`
: `${basePath}/${typePrefix}-${automationName}-${idSuffix}`;

archive.append(pdfBuffer, { name: pdfName });
const context = {
organizationName: headers.organizationName,
taskTitle: headers.taskTitle,
};

if (options.includeRawJson) {
const jsonName = perAutomationSubfolders
? `${basePath}/evidence.json`
: `${basePath}/${typePrefix}-${automationName}-${idSuffix}.json`;
archive.append(
Buffer.from(buildAutomationJson(headers, automation), 'utf-8'),
{ name: jsonName },
// Two independent DB cursors so neither PDF nor JSON buffers the full run set.
const pdfBuffer = await generateAutomationPDFFromStream(
automationHeader,
context,
streamAutomationRuns({
taskId: headers.taskId,
header: automationHeader,
}),
);
archive.append(pdfBuffer, { name: `${filePrefix}.pdf` });

const jsonStream = buildAutomationJsonStream({
summary: headers,
header: automationHeader,
runBatches: streamAutomationRuns({
taskId: headers.taskId,
header: automationHeader,
}),
});
archive.append(jsonStream, { name: `${filePrefix}.json` });
} else {
const pdfBuffer = await generateAutomationPDFFromStream(
automationHeader,
context,
streamAutomationRuns({
taskId: headers.taskId,
header: automationHeader,
}),
);
archive.append(pdfBuffer, { name: `${filePrefix}.pdf` });
}
}

Expand Down
53 changes: 53 additions & 0 deletions apps/api/src/tasks/evidence-export/evidence-json-builder.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import { Readable } from 'node:stream';
import { configure as configureStringify } from 'safe-stable-stringify';
import { redactSensitiveData } from './evidence-redaction';
import type {
TaskEvidenceSummary,
NormalizedAutomation,
NormalizedEvidenceRun,
} from './evidence-export.types';

const safeStringify = configureStringify({
Expand All @@ -11,6 +13,30 @@ const safeStringify = configureStringify({
deterministic: false,
});

function stringifyAutomationMeta(header: NormalizedAutomation): string {
return (
safeStringify(
redactSensitiveData({
id: header.id,
name: header.name,
type: header.type,
integrationName: header.integrationName,
totalRuns: header.totalRuns,
successfulRuns: header.successfulRuns,
failedRuns: header.failedRuns,
latestRunAt: header.latestRunAt,
}),
null,
2,
) ?? '{}'
);
}

function stringifyRun(run: NormalizedEvidenceRun): string {
const { type, automationName, automationId, ...rest } = run;
return safeStringify(redactSensitiveData(rest), null, 2) ?? '{}';
}

export function buildAutomationJson(
summary: TaskEvidenceSummary,
automation: NormalizedAutomation,
Expand Down Expand Up @@ -38,3 +64,30 @@ export function buildAutomationJson(
) ?? '{}'
);
}

export function buildAutomationJsonStream({
summary,
header,
runBatches,
}: {
summary: TaskEvidenceSummary;
header: NormalizedAutomation;
runBatches: AsyncIterable<NormalizedEvidenceRun[]>;
}): Readable {
async function* chunks(): AsyncGenerator<string> {
yield `{\n "automation": ${stringifyAutomationMeta(header)},\n "runs": [\n`;

let first = true;
for await (const batch of runBatches) {
for (const run of batch) {
if (!first) yield ',\n';
first = false;
yield ` ${stringifyRun(run)}`;
}
}

yield `\n ],\n "exportedAt": ${JSON.stringify(summary.exportedAt.toISOString())}\n}\n`;
}

return Readable.from(chunks(), { encoding: 'utf-8' });
}
Loading
Loading