Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/objects/pdf-ref.ts
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,17 @@ export class PdfRef implements PdfPrimitive {
return `${this.objectNumber} ${this.generation} R`;
}

/**
* Numeric identity key for use in Set<number> / Map<number, ...>,
* avoiding per-ref string allocation. Lossless: PDF caps generation
* at 65535.
*
* Use multiplication, not `<<` — bitshift truncates to int32.
*/
get key(): number {
return this.objectNumber * 65536 + this.generation;
}

toBytes(writer: ByteWriter): void {
writer.writeAscii(`${this.objectNumber} ${this.generation} R`);
}
Expand Down
18 changes: 8 additions & 10 deletions src/parser/document-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -391,8 +391,8 @@ export class DocumentParser {
trailer: PdfDict,
recoveredViaBruteForce: boolean,
): ParsedDocument {
// Object cache: "objNum genNum" -> PdfObject
const cache = new Map<string, PdfObject>();
// Object cache: packed ref key (see PdfRef.key) -> PdfObject
const cache = new Map<number, PdfObject>();

// Object stream cache: streamObjNum -> ObjectStreamParser
const objectStreamCache = new Map<number, ObjectStreamParser>();
Expand Down Expand Up @@ -467,7 +467,7 @@ export class DocumentParser {
// Create length resolver for stream objects with indirect /Length
const lengthResolver: LengthResolver = (ref: PdfRef) => {
// Check object cache first
const cacheKey = `${ref.objectNumber} ${ref.generation}`;
const cacheKey = ref.key;
const cached = cache.get(cacheKey);

if (cached instanceof PdfNumber) {
Expand Down Expand Up @@ -602,7 +602,7 @@ export class DocumentParser {
};

const getObject = (ref: PdfRef): PdfObject | null => {
const key = `${ref.objectNumber} ${ref.generation}`;
const key = ref.key;

// Check cache
if (cache.has(key)) {
Expand Down Expand Up @@ -704,19 +704,17 @@ export class DocumentParser {
*/
const getPages = (): PdfRef[] => {
const pages: PdfRef[] = [];
const visited = new Set<string>();
const visited = new Set<number>();

const walkNode = (nodeOrRef: PdfObject | null, currentRef?: PdfRef): void => {
// Handle references
if (nodeOrRef instanceof PdfRef) {
const key = `${nodeOrRef.objectNumber} ${nodeOrRef.generation}`;

if (visited.has(key)) {
this.warnings.push(`Circular reference in page tree: ${key}`);
if (visited.has(nodeOrRef.key)) {
this.warnings.push(`Circular reference in page tree: ${nodeOrRef.toString()}`);
return;
}

visited.add(key);
visited.add(nodeOrRef.key);

const resolved = getObject(nodeOrRef);

Expand Down
12 changes: 5 additions & 7 deletions src/writer/pdf-writer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -275,16 +275,16 @@ function encryptStreamDict(stream: PdfStream, ctx: EncryptionContext): PdfStream
* Collect all refs reachable from the document root and trailer entries.
*
* Walks the object graph starting from Root, Info, and Encrypt (if present),
* returning the set of all object keys (as "objNum gen" strings) that are reachable.
* returning the set of packed ref keys (see `PdfRef.key`) that are reachable.
* This is used for garbage collection during full saves.
*/
function collectReachableRefs(
registry: ObjectRegistry,
root: PdfRef,
info?: PdfRef,
encrypt?: PdfRef,
): Set<string> {
const visited = new Set<string>();
): Set<number> {
const visited = new Set<number>();
const stack: PdfObject[] = [root];

if (info) {
Expand All @@ -299,7 +299,7 @@ function collectReachableRefs(
const obj = stack.pop()!;

if (obj instanceof PdfRef) {
const key = `${obj.objectNumber} ${obj.generation}`;
const key = obj.key;

if (visited.has(key)) {
continue;
Expand Down Expand Up @@ -375,9 +375,7 @@ export function writeComplete(registry: ObjectRegistry, options: WriteOptions):

// Write only reachable objects and record offsets
for (const [ref, obj] of registry.entries()) {
const key = `${ref.objectNumber} ${ref.generation}`;

if (!reachableKeys.has(key)) {
if (!reachableKeys.has(ref.key)) {
continue; // Skip orphan objects
}
// Prepare object (compress streams if needed)
Expand Down
Loading