Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added fixtures/malformed/xref-off-by-one.pdf
Binary file not shown.
17 changes: 17 additions & 0 deletions src/api/pdf.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -702,6 +702,23 @@ describe("PDF", () => {

expect(extracted.getPageCount()).toBe(0);
});

it("handles PDF with off-by-one xref subsection start", async () => {
// Some malformed PDFs have the xref subsection header saying "1 N"
// instead of "0 N", shifting all object numbers by one. This caused
// wrong page count and infinite loop in extractPages due to objects
// resolving to wrong offsets (e.g., Pages root resolving as a Page
// with a self-referencing Parent).
const bytes = await loadFixture("malformed", "xref-off-by-one.pdf");
const pdf = await PDF.load(bytes);

expect(pdf.getPageCount()).toBe(3);
expect(pdf.getPages()[2].width).toBe(300);
expect(pdf.getPages()[2].height).toBe(400);

const extracted = await pdf.extractPages([0, 1, 2]);
expect(extracted.getPageCount()).toBe(3);
});
});

describe("embedPage and drawPage", () => {
Expand Down
9 changes: 9 additions & 0 deletions src/document/object-copier.ts
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,7 @@ export class ObjectCopier {
*/
private getInheritedAttribute(page: PdfDict, key: string): PdfObject | null {
let current: PdfDict | null = page;
const visited = new Set<string>();

while (current) {
const value = current.get(key);
Expand All @@ -483,6 +484,14 @@ export class ObjectCopier {
break;
}

const refKey = `${parentRef.objectNumber}:${parentRef.generation}`;

if (visited.has(refKey)) {
break;
}

visited.add(refKey);

const parent = this.source.getObject(parentRef);
current = parent instanceof PdfDict ? parent : null;
}
Expand Down
41 changes: 41 additions & 0 deletions src/parser/xref-parser.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,47 @@ trailer

expect(result.entries.size).toBe(2);
});

it("corrects off-by-one subsection start when free list head is at wrong position", () => {
// Some malformed PDFs report firstObjNum=1 when entries actually start at 0.
// The free list head (gen 65535, type f) is always object 0.
const p = parser(`xref
1 4
0000000000 65535 f
0000000015 00000 n
0000000074 00000 n
0000000120 00000 n
trailer
<< /Size 4 /Root 1 0 R >>
`);
const result = p.parseTable();

expect(result.entries.size).toBe(4);

// Entry should be corrected to object 0 (not 1)
const entry0 = result.entries.get(0);
expect(entry0).toBeDefined();
expect(entry0!.type).toBe("free");
if (entry0!.type === "free") {
expect(entry0!.generation).toBe(65535);
}

// Object 1 should be at offset 15
const entry1 = result.entries.get(1);
expect(entry1).toBeDefined();
expect(entry1!.type).toBe("uncompressed");
if (entry1!.type === "uncompressed") {
expect(entry1!.offset).toBe(15);
}

// Object 3 should be at offset 120
const entry3 = result.entries.get(3);
expect(entry3).toBeDefined();
expect(entry3!.type).toBe("uncompressed");
if (entry3!.type === "uncompressed") {
expect(entry3!.offset).toBe(120);
}
});
});

describe("trailer parsing", () => {
Expand Down
28 changes: 25 additions & 3 deletions src/parser/xref-parser.ts
Original file line number Diff line number Diff line change
Expand Up @@ -353,10 +353,32 @@ export class XRefParser {
this.skipWhitespaceFromCurrent();

// Read entries
const parsedEntries: XRefEntry[] = [];

for (let i = 0; i < count; i++) {
const objNum = firstObjNum + i;
const entry = this.parseEntry();
entries.set(objNum, entry);
parsedEntries.push(this.parseEntry());
}

// Detect off-by-one in subsection start: some malformed PDFs report
// firstObjNum=1 when the entries actually start at object 0.
// The free list head (generation 65535, type free) is always object 0,
// so if we see it at position 1, correct it. (Same fix as pdf.js #3248/#7229)
let correctedFirstObjNum = firstObjNum;

if (
firstObjNum === 1 &&
parsedEntries.length > 0 &&
parsedEntries[0].type === "free" &&
parsedEntries[0].generation === 65535
) {
correctedFirstObjNum = 0;
console.warn(
"XRef: corrected subsection start from 1 to 0 (free list head at wrong position)",
);
}

for (let i = 0; i < parsedEntries.length; i++) {
entries.set(correctedFirstObjNum + i, parsedEntries[i]);
}
}

Expand Down
Loading