Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions scripts/profile-load-save.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/**
* CPU profiling script for load-modify-save workflow.
*
* Usage: bun --cpu-prof-md scripts/profile-load-save.ts
*
* Runs the load → modify → save cycle multiple times to get
* a representative CPU profile showing where time is spent.
*/

import { readFileSync } from "node:fs";

import { PDF } from "../src/index.ts";

const HEAVY_PDF = "fixtures/benchmarks/cc-journalists-guide.pdf";
const ITERATIONS = 20;

const pdfBytes = new Uint8Array(readFileSync(HEAVY_PDF));
console.log(`PDF size: ${(pdfBytes.length / 1024 / 1024).toFixed(1)}MB`);

// Warm up
{
const pdf = await PDF.load(pdfBytes);
const page = pdf.getPage(0)!;
page.drawRectangle({ x: 50, y: 50, width: 100, height: 100 });
await pdf.save();
}

console.log(`Running ${ITERATIONS} iterations of load → modify → save...`);

const start = performance.now();

for (let i = 0; i < ITERATIONS; i++) {
const pdf = await PDF.load(pdfBytes);
const page = pdf.getPage(0)!;
page.drawRectangle({ x: 50, y: 50, width: 100, height: 100 });
await pdf.save();
}

const elapsed = performance.now() - start;
console.log(`Total: ${elapsed.toFixed(0)}ms`);
console.log(`Average: ${(elapsed / ITERATIONS).toFixed(1)}ms per iteration`);
10 changes: 7 additions & 3 deletions src/api/pdf.ts
Original file line number Diff line number Diff line change
Expand Up @@ -429,8 +429,10 @@ export class PDF {

const pdfCatalog = new PDFCatalog(catalogDict, registry);
const pagesRef = catalogDict.getRef("Pages");
// Use registry.resolve so page tree objects are tracked for
// modification detection and reachability analysis during save.
const pages = pagesRef
? PDFPageTree.load(pagesRef, parsed.getObject.bind(parsed))
? PDFPageTree.load(pagesRef, registry.resolve.bind(registry))
: PDFPageTree.empty();

// Load Info dictionary if present (for metadata access)
Expand Down Expand Up @@ -526,7 +528,7 @@ export class PDF {
const pdfCatalog = new PDFCatalog(catalogDict, registry);
const pagesRef = catalogDict.getRef("Pages");
const pages = pagesRef
? PDFPageTree.load(pagesRef, parsed.getObject.bind(parsed))
? PDFPageTree.load(pagesRef, registry.resolve.bind(registry))
: PDFPageTree.empty();

// Load Info dictionary if present (for metadata change tracking)
Expand Down Expand Up @@ -3171,7 +3173,7 @@ export class PDF {
return result;
}

// Full save (collectReachableRefs in writeComplete will load all reachable objects)
// Full save — write all objects in a single pass.
const result = writeComplete(this.ctx.registry, {
version: this.ctx.info.version,
root,
Expand All @@ -3182,6 +3184,8 @@ export class PDF {
securityHandler,
compressStreams: options.compressStreams,
compressionThreshold: options.compressionThreshold,
// Pre-size output buffer to avoid repeated doubling for large PDFs.
sizeHint: this.originalBytes.length > 0 ? this.originalBytes.length : undefined,
});

// Reset pending security state after successful save
Expand Down
4 changes: 3 additions & 1 deletion src/filters/ascii-hex-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ export class ASCIIHexFilter implements Filter {
private static readonly NIBBLE_MASK = 0x0f;

decode(data: Uint8Array, _params?: PdfDict): Uint8Array {
const output = new ByteWriter();
const output = new ByteWriter(undefined, {
initialSize: Math.ceil(data.length / 2), // Hex is 2 chars per byte
});

let high: number | null = null;

Expand Down
8 changes: 6 additions & 2 deletions src/filters/ascii85-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ export class ASCII85Filter implements Filter {
private static readonly ZERO_SHORTCUT = 0x7a;

decode(data: Uint8Array, _params?: PdfDict): Uint8Array {
const output = new ByteWriter();
const output = new ByteWriter(undefined, {
initialSize: Math.ceil((data.length * 4) / 5), // Estimate output size
});

let buffer = 0;
let count = 0;
Expand Down Expand Up @@ -102,7 +104,9 @@ export class ASCII85Filter implements Filter {
}

encode(data: Uint8Array, _params?: PdfDict): Uint8Array {
const output = new ByteWriter();
const output = new ByteWriter(undefined, {
initialSize: Math.ceil((data.length * 5) / 4) + 2, // Estimate output size + end marker
});

// Process 4 bytes at a time
let i = 0;
Expand Down
4 changes: 3 additions & 1 deletion src/filters/lzw-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,9 @@ export class LZWFilter implements Filter {
}

private lzwDecode(data: Uint8Array, earlyChange: number): Uint8Array {
const output = new ByteWriter();
const output = new ByteWriter(undefined, {
initialSize: data.length * 4, // Estimate output size (LZW can expand up to 4x)
});

// LZW constants
// Bit reading state
Expand Down
9 changes: 7 additions & 2 deletions src/filters/run-length-filter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ export class RunLengthFilter implements Filter {
readonly name = "RunLengthDecode";

decode(data: Uint8Array, _params?: PdfDict): Uint8Array {
const output = new ByteWriter();
const output = new ByteWriter(undefined, {
initialSize: data.length * 4, // Estimate output size (RLE can expand up to 4x)
});
let i = 0;

while (i < data.length) {
Expand Down Expand Up @@ -52,7 +54,10 @@ export class RunLengthFilter implements Filter {
}

encode(data: Uint8Array, _params?: PdfDict): Uint8Array {
const output = new ByteWriter();
const output = new ByteWriter(undefined, {
initialSize: data.length * 2, // Worst case (no runs)
});

let i = 0;

while (i < data.length) {
Expand Down
4 changes: 3 additions & 1 deletion src/helpers/format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,8 @@ export function parsePdfDate(str: string): Date | undefined {
// Number Formatting
// ─────────────────────────────────────────────────────────────────────────────

const TRAILING_ZERO_REGEX = /\.?0+$/;

/**
* Format a number for PDF output.
*
Expand All @@ -112,7 +114,7 @@ export function formatPdfNumber(value: number): string {
let str = value.toFixed(5);

// Remove trailing zeros and unnecessary decimal point
str = str.replace(/\.?0+$/, "");
str = str.replace(TRAILING_ZERO_REGEX, "");

// Handle edge case where we stripped everything after decimal
if (str === "" || str === "-") {
Expand Down
23 changes: 16 additions & 7 deletions src/io/byte-writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,17 +25,19 @@ export class ByteWriter {
* @param options - Configuration options
*/
constructor(existingBytes?: Uint8Array, options: ByteWriterOptions = {}) {
const initialSize = options.initialSize ?? 65536;
this.maxSize = options.maxSize ?? Number.MAX_SAFE_INTEGER;

if (existingBytes) {
// Start with existing bytes, leave room to grow
const size = Math.max(existingBytes.length * 2, initialSize);
this.buffer = new Uint8Array(size);
// When initialSize is provided, use it directly — the caller knows the
// expected final size. Otherwise default to 2x the existing bytes.
const size = options.initialSize ?? existingBytes.length * 2;

this.buffer = new Uint8Array(Math.max(size, existingBytes.length));
this.buffer.set(existingBytes);

this.offset = existingBytes.length;
} else {
this.buffer = new Uint8Array(initialSize);
this.buffer = new Uint8Array(options.initialSize ?? 65536);
}
}

Expand Down Expand Up @@ -105,11 +107,18 @@ export class ByteWriter {

/**
* Get final bytes.
* Returns a copy (slice) so the internal buffer can be garbage collected.
*
* If the internal buffer is exactly the right size, returns it directly
* (zero-copy). Otherwise returns a trimmed copy so the oversized buffer
* can be garbage collected.
*
* Note: ByteWriter is single-use. Do not write after calling toBytes().
*/
toBytes(): Uint8Array {
return this.buffer.slice(0, this.offset);
if (this.offset === this.buffer.length) {
return this.buffer;
}

return this.buffer.subarray(0, this.offset);
}
}
7 changes: 5 additions & 2 deletions src/parser/indirect-object-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,12 @@ export class IndirectObjectParser {
// Get the stream length
const length = this.resolveLength(dict);

// Read exactly `length` bytes
// Read exactly `length` bytes.
// Use subarray (zero-copy view) since the underlying PDF bytes
// are kept alive by the PDF object for the document's lifetime.
const startPos = this.scanner.position;
const data = this.scanner.bytes.slice(startPos, startPos + length);
const data = this.scanner.bytes.subarray(startPos, startPos + length);

this.scanner.moveTo(startPos + length);

// Skip optional EOL before "endstream"
Expand Down
13 changes: 12 additions & 1 deletion src/writer/pdf-writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,14 @@ export interface WriteOptions {
* The encrypt dictionary reference must also be provided.
*/
securityHandler?: StandardSecurityHandler;

/**
* Hint for the final PDF size in bytes.
*
* When provided, the ByteWriter will pre-allocate a buffer of this size,
* reducing the need for reallocations during writing.
*/
sizeHint?: number;
}

/**
Expand Down Expand Up @@ -341,7 +349,10 @@ function collectReachableRefs(
* ```
*/
export function writeComplete(registry: ObjectRegistry, options: WriteOptions): WriteResult {
const writer = new ByteWriter();
const writer = new ByteWriter(undefined, {
initialSize: options.sizeHint,
});

const compress = options.compressStreams ?? true;
const threshold = options.compressionThreshold ?? DEFAULT_COMPRESSION_THRESHOLD;

Expand Down
8 changes: 6 additions & 2 deletions src/writer/serializer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@ import type { PdfRef } from "#src/objects/pdf-ref";
* @returns The PDF byte representation
*/
export function serializeObject(obj: PdfObject): Uint8Array {
const writer = new ByteWriter();
const writer = new ByteWriter(undefined, {
initialSize: 256, // Start with a reasonable buffer size
});

// All PdfObject types implement PdfPrimitive
obj.toBytes(writer);
Expand All @@ -36,7 +38,9 @@ export function serializeObject(obj: PdfObject): Uint8Array {
* @returns The complete indirect object definition
*/
export function serializeIndirectObject(ref: PdfRef, obj: PdfObject): Uint8Array {
const writer = new ByteWriter();
const writer = new ByteWriter(undefined, {
initialSize: 256, // Start with a reasonable buffer size
});

writer.writeAscii(`${ref.objectNumber} ${ref.generation} obj\n`);
obj.toBytes(writer);
Expand Down