Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions modules/key-card/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"@bitgo/sdk-core": "^36.39.0",
"@bitgo/statics": "^58.34.0",
"jspdf": ">=4.2.0",
"pdfjs-dist": "^5.6.205",
"qrcode": "^1.5.1"
},
"devDependencies": {
Expand Down
1 change: 1 addition & 0 deletions modules/key-card/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export * from './faq';
export * from './generateQrData';
export * from './utils';
export * from './types';
export * from './parseKeycard';

export async function generateKeycard(params: GenerateKeycardParams): Promise<void> {
if ('coin' in params) {
Expand Down
170 changes: 170 additions & 0 deletions modules/key-card/src/parseKeycard.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import type { KeycardEntry, PDFTextNode } from './types';

// pdfjs-dist is loaded lazily inside extractKeycardEntriesFromPDF to avoid
// loading browser-only globals at module evaluation time, which would crash
// in Node.js test environments.
//
// pdfjs-dist/webpack.mjs is Mozilla's official webpack entry point. It sets
// GlobalWorkerOptions.workerPort via webpack's native new Worker(new URL(...))
// pattern, so no manual worker configuration is needed in webpack builds.

// --- Regexes ---
const sectionHeaderRegex = /^([A-D])\s*[:.)-]\s*(.+?)\s*$/i;
const dataLineRegex = /^data\s*:\s*(.*)$/i;
const faqHeaderRegex = /^BitGo\s+KeyCard\s+FAQ$/i;

// --- Line reconstruction from PDF text nodes ---

function buildLinesFromPDFNodes(nodes: PDFTextNode[]): string[] {
// Sort by page asc, y desc (top-to-bottom), x asc (left-to-right)
const sorted = [...nodes].sort((a, b) => {
if (a.page !== b.page) return a.page - b.page;
if (Math.abs(a.y - b.y) > 2) return b.y - a.y;
return a.x - b.x;
});

const lines: string[] = [];
let currentLine: PDFTextNode[] = [];
let currentY = -Infinity;
let currentPage = -1;

for (const node of sorted) {
if (node.page !== currentPage || Math.abs(node.y - currentY) > 2) {
if (currentLine.length > 0) {
lines.push(buildLineText(currentLine));
}
currentLine = [node];
currentY = node.y;
currentPage = node.page;
} else {
currentLine.push(node);
}
}
if (currentLine.length > 0) {
lines.push(buildLineText(currentLine));
}
return lines;
}

function buildLineText(nodes: PDFTextNode[]): string {
const sorted = [...nodes].sort((a, b) => a.x - b.x);
let result = '';
let lastX = -Infinity;
let lastWidth = 0;
for (const node of sorted) {
if (lastX !== -Infinity && node.x - (lastX + lastWidth) > 2) {
result += ' ';
}
result += node.text;
lastX = node.x;
lastWidth = node.width;
}
return result;
}

// --- Section parsing ---

function parseKeycardFromLines(lines: string[]): KeycardEntry[] {
const entries: KeycardEntry[] = [];
let currentLabel: string | null = null;
let currentValue = '';
let capturingData = false;
let braceDepth = 0;
let isJsonSection = false;

const flushEntry = () => {
if (currentLabel !== null) {
entries.push({ label: currentLabel, value: currentValue.trim() });
currentLabel = null;
currentValue = '';
capturingData = false;
braceDepth = 0;
isJsonSection = false;
}
};

for (const line of lines) {
if (faqHeaderRegex.test(line)) {
flushEntry();
break;
}

const headerMatch = sectionHeaderRegex.exec(line);
if (headerMatch) {
flushEntry();
currentLabel = line.trim();
continue;
}

if (currentLabel === null) continue;

if (!capturingData) {
const dataMatch = dataLineRegex.exec(line);
if (dataMatch) {
capturingData = true;
const firstChunk = dataMatch[1] ?? '';
if (firstChunk.includes('{')) {
isJsonSection = true;
braceDepth += (firstChunk.match(/\{/g) ?? []).length;
braceDepth -= (firstChunk.match(/\}/g) ?? []).length;
}
currentValue = firstChunk;
if (isJsonSection && braceDepth <= 0) flushEntry();
}
} else if (isJsonSection) {
braceDepth += (line.match(/\{/g) ?? []).length;
braceDepth -= (line.match(/\}/g) ?? []).length;
currentValue += line;
if (braceDepth <= 0) flushEntry();
} else {
currentValue += line;
}
}
flushEntry();
return entries;
}

// --- Public API ---

/**
* Extracts structured keycard entries from a BitGo KeyCard PDF file.
*
* Parses all PDF text nodes across all pages, reconstructs visual lines,
* then identifies labelled sections (A:, B:, C:, D:) and their associated
* data values. Stops parsing at the FAQ section header.
*
* @param file - A browser `File` object representing the KeyCard PDF.
* @returns An object containing:
* - `lines`: The reconstructed text lines from all PDF pages (useful for debugging).
* - `entries`: The parsed `KeycardEntry` array (label + value pairs).
*/
export async function extractKeycardEntriesFromPDF(file: File): Promise<{
lines: string[];
entries: KeycardEntry[];
}> {
const pdfjsLib = await import('pdfjs-dist/webpack.mjs');
const arrayBuffer = await file.arrayBuffer();
const pdf = await pdfjsLib.getDocument({ data: arrayBuffer }).promise;
const nodes: PDFTextNode[] = [];

for (let pageNum = 1; pageNum <= pdf.numPages; pageNum++) {
const page = await pdf.getPage(pageNum);
const textContent = await page.getTextContent();
for (const item of textContent.items) {
if ('str' in item && item.str.trim()) {
const transform = item.transform as number[];
nodes.push({
text: item.str,
x: transform[4],
y: transform[5],
page: pageNum,
width: item.width,
});
}
}
}

const lines = buildLinesFromPDFNodes(nodes);
const entries = parseKeycardFromLines(lines);
return { lines, entries };
}
3 changes: 3 additions & 0 deletions modules/key-card/src/pdf-declarations.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
declare module 'pdfjs-dist/webpack.mjs' {
export * from 'pdfjs-dist';
}
26 changes: 26 additions & 0 deletions modules/key-card/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,29 @@ export interface QrData {
user: QrDataEntry;
userMasterPublicKey?: MasterPublicKeyQrDataEntry;
}

/**
* @internal
* A single text node extracted from a PDF page via pdfjs-dist's getTextContent().
* Not part of the public API — used only within parseKeycard.ts.
*/
export interface PDFTextNode {
text: string;
x: number;
y: number;
page: number;
width: number;
}

/**
* A label/value pair extracted from a BitGo KeyCard section.
*
* `label` is the full section header line (e.g. "A: User Key").
* `value` is the content of the `data:` field for that section.
* For JSON sections (e.g. encrypted key objects), `value` is the
* concatenated multi-line JSON string.
*/
export interface KeycardEntry {
label: string;
value: string;
}
Loading
Loading