From 516b5fa0f6fd976c0ab656daf71bcc6242407708 Mon Sep 17 00:00:00 2001 From: Nate Date: Tue, 21 Apr 2026 23:31:32 -0400 Subject: [PATCH] perf(core): avoid per-ref string allocations via PdfRef.key Ref identity Maps/Sets were keyed on template strings like "${objNum} ${gen}", allocating a string per visit. Introduce PdfRef.key, a packed numeric identity (objNum * 65536 + gen, lossless since PDF caps generation at 65535), and migrate the two hottest call sites: collectReachableRefs in the writer GC walk, and the parser's object cache plus page-tree visitor. Measured on the same machine: - load heavy PDF (9.9MB): +13.5% - load small/medium/form PDF: +9 to 10% - save heavy PDF (9.9MB): +7% - save with modifications (19KB): +3% --- src/objects/pdf-ref.ts | 11 +++++++++++ src/parser/document-parser.ts | 18 ++++++++---------- src/writer/pdf-writer.ts | 12 +++++------- 3 files changed, 24 insertions(+), 17 deletions(-) diff --git a/src/objects/pdf-ref.ts b/src/objects/pdf-ref.ts index 77312c7..2d0f450 100644 --- a/src/objects/pdf-ref.ts +++ b/src/objects/pdf-ref.ts @@ -70,6 +70,17 @@ export class PdfRef implements PdfPrimitive { return `${this.objectNumber} ${this.generation} R`; } + /** + * Numeric identity key for use in Set / Map, + * avoiding per-ref string allocation. Lossless: PDF caps generation + * at 65535. + * + * Use multiplication, not `<<` — bitshift truncates to int32. + */ + get key(): number { + return this.objectNumber * 65536 + this.generation; + } + toBytes(writer: ByteWriter): void { writer.writeAscii(`${this.objectNumber} ${this.generation} R`); } diff --git a/src/parser/document-parser.ts b/src/parser/document-parser.ts index 04b392d..5de946a 100644 --- a/src/parser/document-parser.ts +++ b/src/parser/document-parser.ts @@ -391,8 +391,8 @@ export class DocumentParser { trailer: PdfDict, recoveredViaBruteForce: boolean, ): ParsedDocument { - // Object cache: "objNum genNum" -> PdfObject - const cache = new Map(); + // Object cache: packed ref key (see PdfRef.key) -> PdfObject + const cache = new Map(); // Object stream cache: streamObjNum -> ObjectStreamParser const objectStreamCache = new Map(); @@ -467,7 +467,7 @@ export class DocumentParser { // Create length resolver for stream objects with indirect /Length const lengthResolver: LengthResolver = (ref: PdfRef) => { // Check object cache first - const cacheKey = `${ref.objectNumber} ${ref.generation}`; + const cacheKey = ref.key; const cached = cache.get(cacheKey); if (cached instanceof PdfNumber) { @@ -602,7 +602,7 @@ export class DocumentParser { }; const getObject = (ref: PdfRef): PdfObject | null => { - const key = `${ref.objectNumber} ${ref.generation}`; + const key = ref.key; // Check cache if (cache.has(key)) { @@ -704,19 +704,17 @@ export class DocumentParser { */ const getPages = (): PdfRef[] => { const pages: PdfRef[] = []; - const visited = new Set(); + const visited = new Set(); const walkNode = (nodeOrRef: PdfObject | null, currentRef?: PdfRef): void => { // Handle references if (nodeOrRef instanceof PdfRef) { - const key = `${nodeOrRef.objectNumber} ${nodeOrRef.generation}`; - - if (visited.has(key)) { - this.warnings.push(`Circular reference in page tree: ${key}`); + if (visited.has(nodeOrRef.key)) { + this.warnings.push(`Circular reference in page tree: ${nodeOrRef.toString()}`); return; } - visited.add(key); + visited.add(nodeOrRef.key); const resolved = getObject(nodeOrRef); diff --git a/src/writer/pdf-writer.ts b/src/writer/pdf-writer.ts index 88640b2..a82b332 100644 --- a/src/writer/pdf-writer.ts +++ b/src/writer/pdf-writer.ts @@ -275,7 +275,7 @@ function encryptStreamDict(stream: PdfStream, ctx: EncryptionContext): PdfStream * Collect all refs reachable from the document root and trailer entries. * * Walks the object graph starting from Root, Info, and Encrypt (if present), - * returning the set of all object keys (as "objNum gen" strings) that are reachable. + * returning the set of packed ref keys (see `PdfRef.key`) that are reachable. * This is used for garbage collection during full saves. */ function collectReachableRefs( @@ -283,8 +283,8 @@ function collectReachableRefs( root: PdfRef, info?: PdfRef, encrypt?: PdfRef, -): Set { - const visited = new Set(); +): Set { + const visited = new Set(); const stack: PdfObject[] = [root]; if (info) { @@ -299,7 +299,7 @@ function collectReachableRefs( const obj = stack.pop()!; if (obj instanceof PdfRef) { - const key = `${obj.objectNumber} ${obj.generation}`; + const key = obj.key; if (visited.has(key)) { continue; @@ -375,9 +375,7 @@ export function writeComplete(registry: ObjectRegistry, options: WriteOptions): // Write only reachable objects and record offsets for (const [ref, obj] of registry.entries()) { - const key = `${ref.objectNumber} ${ref.generation}`; - - if (!reachableKeys.has(key)) { + if (!reachableKeys.has(ref.key)) { continue; // Skip orphan objects } // Prepare object (compress streams if needed)