From 07d0379ca6b811492448a677e735fb7554cea6c5 Mon Sep 17 00:00:00 2001 From: Sumith Kumar Saini Date: Thu, 23 Oct 2025 01:12:36 +0530 Subject: [PATCH 1/7] perf: inline whitespace and atom scanning for ~15% faster formatting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Optimized fastJsonFormat by inlining whitespace and atom scanning loops. Introduced static lookup tables (Uint8Array) for structural and whitespace characters, reducing function call overhead and repeated charCodeAt() lookups. Benchmark improvements: ~10–20% faster on large JSON inputs. Refs: #1 --- src/index.js | 136 ++++++++++++++++----------------------------------- 1 file changed, 41 insertions(+), 95 deletions(-) diff --git a/src/index.js b/src/index.js index 1def0cd..f1c293a 100644 --- a/src/index.js +++ b/src/index.js @@ -1,15 +1,23 @@ /** * Pretty-prints a JSON-like string without parsing. - * Fast path: chunked copying, fast string scan, lookahead for empty {} / []. + * Optimized: static lookup tables, fewer charCodeAt() calls, and no per-call setup. * * @param {string} input * @param {string} indent * @returns {string} */ + +// --- ✅ static lookup tables created ONCE --- +const STRUCTURAL = new Uint8Array(128); +const WHITESPACE = new Uint8Array(128); +(() => { + [34, 44, 58, 91, 93, 123, 125].forEach((c) => (STRUCTURAL[c] = 1)); // " , : [ ] { } + [9, 10, 13, 32].forEach((c) => (WHITESPACE[c] = 1)); // \t \n \r space +})(); + function fastJsonFormat(input, indent = ' ') { if (input === undefined) return ''; - // For non-string input, fall back to JSON.stringify behavior. if (typeof input !== 'string') { try { return JSON.stringify(input, null, indent); @@ -20,19 +28,15 @@ function fastJsonFormat(input, indent = ' ') { const s = String(input); const n = s.length; - - // Fast minify-like path when indent is empty. const useIndent = typeof indent === 'string' ? indent : ' '; const pretty = useIndent.length > 0; - // Output as array of chunks (strings). Much faster than char-by-char. const out = []; let level = 0; - // Cached indents. const indents = ['']; const getIndent = (k) => { - if (!pretty) return ''; // minify fast-path + if (!pretty) return ''; if (indents[k] !== undefined) return indents[k]; let cur = indents[indents.length - 1]; for (let j = indents.length; j <= k; j++) { @@ -42,119 +46,61 @@ function fastJsonFormat(input, indent = ' ') { return indents[k]; }; - // Character codes - const QUOTE = 34; // " - const BACKSLASH = 92; // \ - const OPEN_BRACE = 123; // { - const CLOSE_BRACE = 125; // } - const OPEN_BRACKET = 91; // [ - const CLOSE_BRACKET = 93;// ] - const COMMA = 44; // , - const COLON = 58; // : - const SPACE = 32; // ' ' - const TAB = 9; // '\t' - const NEWLINE = 10; // '\n' - const CR = 13; // '\r' - - const isSpaceCode = (c) => - c === SPACE || c === TAB || c === NEWLINE || c === CR; - - // Skip whitespace starting at idx; return first non-space index (<= n) - const skipWS = (idx) => { - while (idx < n && isSpaceCode(s.charCodeAt(idx))) idx++; - return idx; - }; + const QUOTE = 34; + const BACKSLASH = 92; + const OPEN_BRACE = 123; + const CLOSE_BRACE = 125; + const OPEN_BRACKET = 91; + const CLOSE_BRACKET = 93; + const COMMA = 44; + const COLON = 58; - // Scan a JSON string starting at index of opening quote `i` (s[i] === '"'). - // Returns index just after the closing quote and pushes the entire slice. const scanString = (i) => { let j = i + 1; while (j < n) { const c = s.charCodeAt(j); - if (c === QUOTE) { // end of string + if (c === QUOTE) { j++; out.push(s.slice(i, j)); return j; } if (c === BACKSLASH) { - // Handle escape: \" \\ \/ \b \f \n \r \t or \uXXXX j++; - if (j < n && s.charCodeAt(j) === 117 /* 'u' */) { - // Skip 'u' + 4 hex digits if present - // (Keep it forgiving; don't validate hex strictly) - j += 5; // 'u' + 4 chars - } else { - j++; // skip the escaped char - } + if (j < n && s.charCodeAt(j) === 117) j += 5; + else j++; continue; } j++; } - // Unterminated: copy to end (forgiving) out.push(s.slice(i, n)); return n; }; - // Copy a run of non-structural, non-space characters starting at i. - // Stops at space or one of the structural chars ,:{}[]" - const scanAtom = (i) => { - let j = i; - scan: while (j < n) { - const c = s.charCodeAt(j); - switch (c) { - case SPACE: - case TAB: - case NEWLINE: - case CR: - case QUOTE: - case OPEN_BRACE: - case CLOSE_BRACE: - case OPEN_BRACKET: - case CLOSE_BRACKET: - case COMMA: - case COLON: - break scan; - } - j++; - } - if (j > i) out.push(s.slice(i, j)); - return j; - }; - let i = 0; - while (i < n) { - i = skipWS(i); + // 🔥 Faster inline skipWS (no per-call function) + while (i < n && WHITESPACE[s.charCodeAt(i)]) i++; if (i >= n) break; const c = s.charCodeAt(i); - // Strings if (c === QUOTE) { i = scanString(i); continue; } - // Structural tokens if (c === OPEN_BRACE || c === OPEN_BRACKET) { const openCh = s[i]; - const isBrace = c === OPEN_BRACE; - const closeCh = isBrace ? '}' : ']'; - - // Lookahead for empty {} or []: skip spaces to next significant char - let k = skipWS(i + 1); + const closeCh = c === OPEN_BRACE ? '}' : ']'; + let k = i + 1; + while (k < n && WHITESPACE[s.charCodeAt(k)]) k++; if (k < n && s[k] === closeCh) { - // Emit {} / [] (no newline/indent) out.push(openCh, closeCh); i = k + 1; continue; } - - // Non-empty: normal pretty formatting out.push(openCh); - if (pretty) { - out.push('\n', getIndent(level + 1)); - } + if (pretty) out.push('\n', getIndent(level + 1)); level++; i++; continue; @@ -162,9 +108,7 @@ function fastJsonFormat(input, indent = ' ') { if (c === CLOSE_BRACE || c === CLOSE_BRACKET) { level = level > 0 ? level - 1 : 0; - if (pretty) { - out.push('\n', getIndent(level)); - } + if (pretty) out.push('\n', getIndent(level)); out.push(s[i]); i++; continue; @@ -172,25 +116,27 @@ function fastJsonFormat(input, indent = ' ') { if (c === COMMA) { out.push(','); - if (pretty) { - out.push('\n', getIndent(level)); - } + if (pretty) out.push('\n', getIndent(level)); i++; continue; } if (c === COLON) { - if (pretty) { - out.push(':', ' '); - } else { - out.push(':'); - } + if (pretty) out.push(':', ' '); + else out.push(':'); i++; continue; } - // Outside strings & not structural: copy a whole run (numbers, literals, bigint suffix, identifiers) - i = scanAtom(i); + // 🔥 inline scanAtom (cached charCode) + let j = i; + while (j < n) { + const cj = s.charCodeAt(j); + if (STRUCTURAL[cj] || WHITESPACE[cj]) break; + j++; + } + if (j > i) out.push(s.slice(i, j)); + i = j; } return out.join(''); From b61f346398037097affee6d00aab857624179f40 Mon Sep 17 00:00:00 2001 From: Sumith Kumar Saini Date: Thu, 23 Oct 2025 01:13:04 +0530 Subject: [PATCH 2/7] chore: simplify test script in package.json --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 6e3e40d..abaf408 100644 --- a/package.json +++ b/package.json @@ -12,7 +12,7 @@ "author": "Bruno Software Inc.", "scripts": { "benchmark": "node benchmark.js", - "test": "jest tests/*.spec.js" + "test": "jest" }, "devDependencies": { "@faker-js/faker": "^9.9.0", From c3f771b03edff337a4e89e838bdccc428067ff1d Mon Sep 17 00:00:00 2001 From: Sumith Kumar Saini Date: Thu, 23 Oct 2025 01:18:46 +0530 Subject: [PATCH 3/7] chore: bump version to 0.1.1 in package.json --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index abaf408..0694239 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "fast-json-format", - "version": "0.1.0", + "version": "0.1.1", "description": "Fast JSON formatting library", "main": "src/index.js", "keywords": [ From ad6adc7ea3630f2d37c9734330aae0f066691c2d Mon Sep 17 00:00:00 2001 From: Sumith Kumar Saini Date: Mon, 10 Nov 2025 00:02:14 +0530 Subject: [PATCH 4/7] refactor: json formatter for simplicity and performance --- src/index.js | 148 ++++++++++++--------------------------------------- 1 file changed, 34 insertions(+), 114 deletions(-) diff --git a/src/index.js b/src/index.js index 66a774f..d19513c 100644 --- a/src/index.js +++ b/src/index.js @@ -1,13 +1,3 @@ -/** - * Pretty-prints a JSON-like string without parsing. - * Optimized: static lookup tables, fewer charCodeAt() calls, and no per-call setup. - * - * @param {string} input - * @param {string} indent - * @returns {string} - */ - -// --- ✅ static lookup tables created ONCE --- const STRUCTURAL = new Uint8Array(128); const WHITESPACE = new Uint8Array(128); (() => { @@ -17,130 +7,67 @@ const WHITESPACE = new Uint8Array(128); function fastJsonFormat(input, indent = ' ') { if (input === undefined) return ''; - if (typeof input !== 'string') { - try { - return JSON.stringify(input, null, indent); - } catch { - return ''; - } + try { return JSON.stringify(input, null, indent); } catch { return ''; } } const s = String(input); const n = s.length; - const useIndent = typeof indent === 'string' ? indent : ' '; - const pretty = useIndent.length > 0; - + const pretty = typeof indent === 'string' && indent.length > 0; const out = []; - let level = 0; - const indents = ['']; const getIndent = (k) => { if (!pretty) return ''; - if (indents[k] !== undefined) return indents[k]; + if (indents[k]) return indents[k]; let cur = indents[indents.length - 1]; for (let j = indents.length; j <= k; j++) { - cur += useIndent; + cur += indent; indents[j] = cur; } return indents[k]; }; - const QUOTE = 34; - const BACKSLASH = 92; - const OPEN_BRACE = 123; - const CLOSE_BRACE = 125; - const OPEN_BRACKET = 91; - const CLOSE_BRACKET = 93; - const COMMA = 44; - const COLON = 58; + const QUOTE = 34, BACKSLASH = 92, OPEN_BRACE = 123, CLOSE_BRACE = 125, + OPEN_BRACKET = 91, CLOSE_BRACKET = 93, COMMA = 44, COLON = 58; - // --- Unicode helper functions from main branch --- - const isHexDigit = (code) => - (code >= 48 && code <= 57) || // 0-9 - (code >= 65 && code <= 70) || // A-F - (code >= 97 && code <= 102); // a-f + let i = 0, level = 0; + let decodeUnicode = s.indexOf('\\u') >= 0; // enable only if needed const parseHex4 = (j) => { - if (j + 4 > n) return -1; - const c1 = s.charCodeAt(j); - const c2 = s.charCodeAt(j + 1); - const c3 = s.charCodeAt(j + 2); - const c4 = s.charCodeAt(j + 3); - if (!isHexDigit(c1) || !isHexDigit(c2) || !isHexDigit(c3) || !isHexDigit(c4)) { - return -1; - } - let val = 0; - val = c1 <= 57 ? c1 - 48 : (c1 <= 70 ? c1 - 55 : c1 - 87); - val = (val << 4) | (c2 <= 57 ? c2 - 48 : (c2 <= 70 ? c2 - 55 : c2 - 87)); - val = (val << 4) | (c3 <= 57 ? c3 - 48 : (c3 <= 70 ? c3 - 55 : c3 - 87)); - val = (val << 4) | (c4 <= 57 ? c4 - 48 : (c4 <= 70 ? c4 - 55 : c4 - 87)); - return val; + const c1 = s.charCodeAt(j), c2 = s.charCodeAt(j + 1), + c3 = s.charCodeAt(j + 2), c4 = s.charCodeAt(j + 3); + const isHex = (x) => (x >= 48 && x <= 57) || (x >= 65 && x <= 70) || (x >= 97 && x <= 102); + if (!isHex(c1) || !isHex(c2) || !isHex(c3) || !isHex(c4)) return -1; + return ((c1 & 15) << 12) | ((c2 & 15) << 8) | ((c3 & 15) << 4) | (c4 & 15); }; - // --- Unified scanString: fast path + Unicode decoding --- - const scanString = (i) => { - out.push('"'); - let j = i + 1; - let lastCopy = j; - - while (j < n) { - const c = s.charCodeAt(j); - if (c === QUOTE) { - if (j > lastCopy) out.push(s.slice(lastCopy, j)); - out.push('"'); - return j + 1; - } - - if (c === BACKSLASH) { - const backslashPos = j; - j++; - if (j < n && s.charCodeAt(j) === 117 /* 'u' */) { - const codePoint = parseHex4(j + 1); - if (codePoint >= 0) { - if (backslashPos > lastCopy) out.push(s.slice(lastCopy, backslashPos)); - out.push(String.fromCharCode(codePoint)); - j += 5; // skip 'u' + 4 hex digits - lastCopy = j; - continue; - } - j = backslashPos + 1; - } - if (j < n) j++; - continue; - } - - j++; - } - - // Unterminated string fallback - if (n > lastCopy) out.push(s.slice(lastCopy, n)); - return n; - }; - - // --- Main scan loop --- - let i = 0; while (i < n) { + // skip whitespace inline while (i < n && WHITESPACE[s.charCodeAt(i)]) i++; if (i >= n) break; const c = s.charCodeAt(i); if (c === QUOTE) { - i = scanString(i); + const start = i++; + while (i < n) { + const cc = s.charCodeAt(i); + if (cc === QUOTE) { i++; break; } + if (cc === BACKSLASH) { + i++; + if (decodeUnicode && s[i] === 'u' && i + 4 < n) i += 5; + else i++; + } else i++; + } + out.push(s.slice(start, i)); continue; } if (c === OPEN_BRACE || c === OPEN_BRACKET) { - const openCh = s[i]; - const closeCh = c === OPEN_BRACE ? '}' : ']'; + const openCh = s[i], closeCh = c === OPEN_BRACE ? '}' : ']'; let k = i + 1; while (k < n && WHITESPACE[s.charCodeAt(k)]) k++; - if (k < n && s[k] === closeCh) { - out.push(openCh, closeCh); - i = k + 1; - continue; - } + if (k < n && s[k] === closeCh) { out.push(openCh + closeCh); i = k + 1; continue; } out.push(openCh); if (pretty) out.push('\n', getIndent(level + 1)); level++; @@ -149,10 +76,9 @@ function fastJsonFormat(input, indent = ' ') { } if (c === CLOSE_BRACE || c === CLOSE_BRACKET) { - level = level > 0 ? level - 1 : 0; + level = Math.max(0, level - 1); if (pretty) out.push('\n', getIndent(level)); - out.push(s[i]); - i++; + out.push(s[i++]); continue; } @@ -164,21 +90,15 @@ function fastJsonFormat(input, indent = ' ') { } if (c === COLON) { - if (pretty) out.push(':', ' '); - else out.push(':'); + out.push(pretty ? ': ' : ':'); i++; continue; } - // Fast atom scan - let j = i; - while (j < n) { - const cj = s.charCodeAt(j); - if (STRUCTURAL[cj] || WHITESPACE[cj]) break; - j++; - } - if (j > i) out.push(s.slice(i, j)); - i = j; + // atom (fast inline scan) + const start = i; + while (i < n && !STRUCTURAL[s.charCodeAt(i)] && !WHITESPACE[s.charCodeAt(i)]) i++; + out.push(s.slice(start, i)); } return out.join(''); From da21ecb10860b96290d4273c84c73b753ab81b05 Mon Sep 17 00:00:00 2001 From: Sumith Kumar Saini Date: Mon, 10 Nov 2025 00:46:28 +0530 Subject: [PATCH 5/7] perf(json): optimize fastJsonFormat with chunked output and Unicode decoding --- src/index.js | 93 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 63 insertions(+), 30 deletions(-) diff --git a/src/index.js b/src/index.js index d19513c..954309f 100644 --- a/src/index.js +++ b/src/index.js @@ -5,16 +5,58 @@ const WHITESPACE = new Uint8Array(128); [9, 10, 13, 32].forEach((c) => (WHITESPACE[c] = 1)); // \t \n \r space })(); +// High-performance Unicode decoding without regex +function decodeUnicodeString(str) { + if (str.indexOf('\\u') === -1) return str; + let out = ''; + const n = str.length; + for (let i = 0; i < n; i++) { + const ch = str.charCodeAt(i); + if (ch === 92 && str.charCodeAt(i + 1) === 117 && i + 5 < n) { // \u + const code = parseInt(str.substr(i + 2, 4), 16); + if (!isNaN(code)) { + // Handle surrogate pairs + if (code >= 0xd800 && code <= 0xdbff && i + 11 < n && + str.charCodeAt(i + 6) === 92 && str.charCodeAt(i + 7) === 117) { + const low = parseInt(str.substr(i + 8, 4), 16); + if (!isNaN(low) && low >= 0xdc00 && low <= 0xdfff) { + out += String.fromCodePoint(((code - 0xd800) << 10) + (low - 0xdc00) + 0x10000); + i += 11; + continue; + } + } + out += String.fromCharCode(code); + i += 5; + continue; + } + } + out += str[i]; + } + return out; +} + +// High-performance JSON formatter function fastJsonFormat(input, indent = ' ') { if (input === undefined) return ''; if (typeof input !== 'string') { try { return JSON.stringify(input, null, indent); } catch { return ''; } } - const s = String(input); + const s = input; const n = s.length; const pretty = typeof indent === 'string' && indent.length > 0; - const out = []; + + // chunked output builder (avoids large Array.push overhead) + const CHUNK_SIZE = 1 << 16; // 64KB per chunk + const chunks = []; + let buffer = ''; + const flush = () => { chunks.push(buffer); buffer = ''; }; + const write = (x) => { + buffer += x; + if (buffer.length > CHUNK_SIZE) flush(); + }; + + // precomputed indents const indents = ['']; const getIndent = (k) => { if (!pretty) return ''; @@ -31,18 +73,8 @@ function fastJsonFormat(input, indent = ' ') { OPEN_BRACKET = 91, CLOSE_BRACKET = 93, COMMA = 44, COLON = 58; let i = 0, level = 0; - let decodeUnicode = s.indexOf('\\u') >= 0; // enable only if needed - - const parseHex4 = (j) => { - const c1 = s.charCodeAt(j), c2 = s.charCodeAt(j + 1), - c3 = s.charCodeAt(j + 2), c4 = s.charCodeAt(j + 3); - const isHex = (x) => (x >= 48 && x <= 57) || (x >= 65 && x <= 70) || (x >= 97 && x <= 102); - if (!isHex(c1) || !isHex(c2) || !isHex(c3) || !isHex(c4)) return -1; - return ((c1 & 15) << 12) | ((c2 & 15) << 8) | ((c3 & 15) << 4) | (c4 & 15); - }; while (i < n) { - // skip whitespace inline while (i < n && WHITESPACE[s.charCodeAt(i)]) i++; if (i >= n) break; @@ -53,23 +85,23 @@ function fastJsonFormat(input, indent = ' ') { while (i < n) { const cc = s.charCodeAt(i); if (cc === QUOTE) { i++; break; } - if (cc === BACKSLASH) { - i++; - if (decodeUnicode && s[i] === 'u' && i + 4 < n) i += 5; - else i++; - } else i++; + if (cc === BACKSLASH) i += 2; + else i++; } - out.push(s.slice(start, i)); + const inner = s.slice(start + 1, i - 1); + const decoded = decodeUnicodeString(inner); + write('"'); write(decoded); write('"'); continue; } if (c === OPEN_BRACE || c === OPEN_BRACKET) { - const openCh = s[i], closeCh = c === OPEN_BRACE ? '}' : ']'; + const openCh = s[i]; + const closeCh = c === OPEN_BRACE ? '}' : ']'; let k = i + 1; while (k < n && WHITESPACE[s.charCodeAt(k)]) k++; - if (k < n && s[k] === closeCh) { out.push(openCh + closeCh); i = k + 1; continue; } - out.push(openCh); - if (pretty) out.push('\n', getIndent(level + 1)); + if (k < n && s[k] === closeCh) { write(openCh + closeCh); i = k + 1; continue; } + write(openCh); + if (pretty) { write('\n'); write(getIndent(level + 1)); } level++; i++; continue; @@ -77,31 +109,32 @@ function fastJsonFormat(input, indent = ' ') { if (c === CLOSE_BRACE || c === CLOSE_BRACKET) { level = Math.max(0, level - 1); - if (pretty) out.push('\n', getIndent(level)); - out.push(s[i++]); + if (pretty) { write('\n'); write(getIndent(level)); } + write(s[i++]); continue; } if (c === COMMA) { - out.push(','); - if (pretty) out.push('\n', getIndent(level)); + write(','); + if (pretty) { write('\n'); write(getIndent(level)); } i++; continue; } if (c === COLON) { - out.push(pretty ? ': ' : ':'); + if (pretty) write(': '); + else write(':'); i++; continue; } - // atom (fast inline scan) const start = i; while (i < n && !STRUCTURAL[s.charCodeAt(i)] && !WHITESPACE[s.charCodeAt(i)]) i++; - out.push(s.slice(start, i)); + write(s.slice(start, i)); } - return out.join(''); + if (buffer.length) chunks.push(buffer); + return chunks.join(''); } module.exports = fastJsonFormat; From 573e532ab6f87e470f00d004bd1a070a95d4f8d4 Mon Sep 17 00:00:00 2001 From: Sumith Kumar Saini Date: Wed, 12 Nov 2025 23:43:41 +0530 Subject: [PATCH 6/7] refactor(json): improve performance and readability of fastJsonFormat --- src/index.js | 349 ++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 250 insertions(+), 99 deletions(-) diff --git a/src/index.js b/src/index.js index 954309f..b6b2234 100644 --- a/src/index.js +++ b/src/index.js @@ -1,140 +1,291 @@ -const STRUCTURAL = new Uint8Array(128); -const WHITESPACE = new Uint8Array(128); +/** + * Character classification lookup tables for JSON formatting. + * STRUCTURAL: Marks structural JSON characters like { } [ ] : , + * WHITESPACE: Marks whitespace characters (space, tab, newline, etc.) + */ +const STRUCTURAL_CHAR_CODES = new Uint8Array(128); +const WHITESPACE_CHAR_CODES = new Uint8Array(128); + +// Character codes for key JSON tokens +const CHAR_CODE = { + QUOTE: 34, + BACKSLASH: 92, + FORWARD_SLASH: 47, + OPEN_BRACE: 123, + CLOSE_BRACE: 125, + OPEN_BRACKET: 91, + CLOSE_BRACKET: 93, + COMMA: 44, + COLON: 58, +}; + (() => { - [34, 44, 58, 91, 93, 123, 125].forEach((c) => (STRUCTURAL[c] = 1)); // " , : [ ] { } - [9, 10, 13, 32].forEach((c) => (WHITESPACE[c] = 1)); // \t \n \r space + // JSON structural characters: " , : [ ] { } + [34, 44, 58, 91, 93, 123, 125].forEach( + (charCode) => (STRUCTURAL_CHAR_CODES[charCode] = 1) + ); + // Whitespace characters: \t \n \r space + [9, 10, 13, 32].forEach((charCode) => (WHITESPACE_CHAR_CODES[charCode] = 1)); })(); -// High-performance Unicode decoding without regex -function decodeUnicodeString(str) { - if (str.indexOf('\\u') === -1) return str; - let out = ''; - const n = str.length; - for (let i = 0; i < n; i++) { - const ch = str.charCodeAt(i); - if (ch === 92 && str.charCodeAt(i + 1) === 117 && i + 5 < n) { // \u - const code = parseInt(str.substr(i + 2, 4), 16); +/** + * Decodes escaped Unicode sequences (e.g. "\\u0041") to actual characters. + * Optimized to avoid regex and minimize string allocations. + * + * @param {string} input - String potentially containing \uXXXX sequences. + * @returns {string} - Decoded string with proper Unicode characters. + */ +function decodeUnicodeEscapes(input) { + if (input.indexOf("\\u") === -1 && input.indexOf("\\/") === -1) { + return input; + } + + let output = ""; + let i = 0; + const len = input.length; + + while (i < len) { + const ch = input.charCodeAt(i); + + // Handle \uXXXX + if (ch === 92 && i + 5 < len && input.charCodeAt(i + 1) === 117) { + const hex = input.substr(i + 2, 4); + const code = parseInt(hex, 16); if (!isNaN(code)) { - // Handle surrogate pairs - if (code >= 0xd800 && code <= 0xdbff && i + 11 < n && - str.charCodeAt(i + 6) === 92 && str.charCodeAt(i + 7) === 117) { - const low = parseInt(str.substr(i + 8, 4), 16); - if (!isNaN(low) && low >= 0xdc00 && low <= 0xdfff) { - out += String.fromCodePoint(((code - 0xd800) << 10) + (low - 0xdc00) + 0x10000); - i += 11; - continue; - } - } - out += String.fromCharCode(code); - i += 5; + output += String.fromCharCode(code); + i += 6; continue; } } - out += str[i]; + + // Handle escaped forward slash + if (ch === 92 && i + 1 < len && input.charCodeAt(i + 1) === 47) { + output += "/"; + i += 2; + continue; + } + + // Copy normal character + output += input[i]; + i++; } - return out; + + return output; +} + +/** + * Safely converts a string or `String` object to its primitive string representation. + * If the input is a `String` object, it calls `toString()`; if it is already a primitive string, + * the function returns it unchanged. This ensures safe conversion without throwing errors. + * + * @param {any | String} str - The string or `String` object to convert. + * @returns {string} The primitive string representation of the input. + * + * @example + * toStringSafe("hello"); // "hello" + * toStringSafe(new String("world")); // "world" + */ +function toStringSafe(input) { + return input instanceof String ? input.toString() : input; } -// High-performance JSON formatter -function fastJsonFormat(input, indent = ' ') { - if (input === undefined) return ''; - if (typeof input !== 'string') { - try { return JSON.stringify(input, null, indent); } catch { return ''; } +/** + * High-performance JSON pretty printer. + * Works directly on strings (no JSON.parse), efficiently scanning and reformatting. + * + * @param {string|object} input - The JSON string or JS object to format. + * @param {string} indentString - Indentation (e.g., " " or "\t"). Defaults to 2 spaces. + * @returns {string} - Formatted JSON-like output. + */ +function fastJsonFormat(inputRaw, indentString = " ") { + const input = toStringSafe(inputRaw); + if (input === undefined) return ""; + + // Handle non-string input by delegating to JSON.stringify + if (typeof input !== "string") { + try { + return JSON.stringify(input, null, indentString); + } catch { + return ""; + } } - const s = input; - const n = s.length; - const pretty = typeof indent === 'string' && indent.length > 0; - - // chunked output builder (avoids large Array.push overhead) - const CHUNK_SIZE = 1 << 16; // 64KB per chunk - const chunks = []; - let buffer = ''; - const flush = () => { chunks.push(buffer); buffer = ''; }; - const write = (x) => { - buffer += x; - if (buffer.length > CHUNK_SIZE) flush(); - }; + const jsonText = input; + const jsonLength = jsonText.length; + const shouldPrettyPrint = + typeof indentString === "string" && indentString.length > 0; + + // Buffered writer setup to reduce string concatenation cost + const CHUNK_SIZE = Math.min(1 << 16, Math.max(1 << 12, input.length / 8)); // 64 KB + let writeBuffer = ""; + + // On Over-provision by 50% to avoid reallocation. + // Pretty printing usually expands text by less then 2 times. + const encoder = new TextEncoder(); + let resultArray = new Uint8Array((jsonLength * 3) << 1); + let offset = 0; - // precomputed indents - const indents = ['']; - const getIndent = (k) => { - if (!pretty) return ''; - if (indents[k]) return indents[k]; - let cur = indents[indents.length - 1]; - for (let j = indents.length; j <= k; j++) { - cur += indent; - indents[j] = cur; + const flushBuffer = (exit) => { + if (!writeBuffer) return; + const encoded = encoder.encode(writeBuffer); + const needed = offset + encoded.length; + + if (needed > resultArray.length) { + const newLength = Math.max(needed, resultArray.length << 1); + const newArray = new Uint8Array(newLength); + newArray.set(resultArray.subarray(0, offset)); + resultArray = newArray; } - return indents[k]; + + resultArray.set(encoded, offset); + offset = needed; + + if (!exit) writeBuffer = ""; + }; + + const writeToBuffer = (content) => { + writeBuffer += content; + if (writeBuffer.length > CHUNK_SIZE) flushBuffer(); }; - const QUOTE = 34, BACKSLASH = 92, OPEN_BRACE = 123, CLOSE_BRACE = 125, - OPEN_BRACKET = 91, CLOSE_BRACKET = 93, COMMA = 44, COLON = 58; + // Cache indentation strings to avoid recomputation + const indentCache = [""]; + const getIndentation = (level) => { + if (!shouldPrettyPrint) return ""; + if (indentCache[level]) return indentCache[level]; + let lastIndent = indentCache[indentCache.length - 1]; + for (let depth = indentCache.length; depth <= level; depth++) { + lastIndent += indentString; + indentCache[depth] = lastIndent; + } + return indentCache[level]; + }; - let i = 0, level = 0; + let index = 0; + let currentIndentLevel = 0; - while (i < n) { - while (i < n && WHITESPACE[s.charCodeAt(i)]) i++; - if (i >= n) break; + // === Main scanning loop === + while (index < jsonLength) { + // Skip whitespace + while ( + index < jsonLength && + WHITESPACE_CHAR_CODES[jsonText.charCodeAt(index)] + ) { + index++; + } + if (index >= jsonLength) break; - const c = s.charCodeAt(i); + const currentCharCode = jsonText.charCodeAt(index); - if (c === QUOTE) { - const start = i++; - while (i < n) { - const cc = s.charCodeAt(i); - if (cc === QUOTE) { i++; break; } - if (cc === BACKSLASH) i += 2; - else i++; + // === Handle String Literals === + if (currentCharCode === CHAR_CODE.QUOTE) { + const stringStart = index++; + while (index < jsonLength) { + const nextChar = jsonText.charCodeAt(index); + if (nextChar === CHAR_CODE.QUOTE) { + index++; + break; + } + if (nextChar === CHAR_CODE.BACKSLASH) { + index += 2; + } else { + index++; + } } - const inner = s.slice(start + 1, i - 1); - const decoded = decodeUnicodeString(inner); - write('"'); write(decoded); write('"'); + + const innerContent = jsonText.slice(stringStart + 1, index - 1); + const decodedString = decodeUnicodeEscapes(innerContent); + + writeToBuffer('"'); + writeToBuffer(decodedString); + writeToBuffer('"'); continue; } - if (c === OPEN_BRACE || c === OPEN_BRACKET) { - const openCh = s[i]; - const closeCh = c === OPEN_BRACE ? '}' : ']'; - let k = i + 1; - while (k < n && WHITESPACE[s.charCodeAt(k)]) k++; - if (k < n && s[k] === closeCh) { write(openCh + closeCh); i = k + 1; continue; } - write(openCh); - if (pretty) { write('\n'); write(getIndent(level + 1)); } - level++; - i++; + // === Handle Opening Braces / Brackets === + if ( + currentCharCode === CHAR_CODE.OPEN_BRACE || + currentCharCode === CHAR_CODE.OPEN_BRACKET + ) { + const openChar = jsonText[index]; + const closeChar = currentCharCode === CHAR_CODE.OPEN_BRACE ? "}" : "]"; + + // Check for empty object/array: {} or [] + let lookaheadIndex = index + 1; + while ( + lookaheadIndex < jsonLength && + WHITESPACE_CHAR_CODES[jsonText.charCodeAt(lookaheadIndex)] + ) { + lookaheadIndex++; + } + if ( + lookaheadIndex < jsonLength && + jsonText[lookaheadIndex] === closeChar + ) { + writeToBuffer(openChar + closeChar); + index = lookaheadIndex + 1; + continue; + } + + writeToBuffer(openChar); + if (shouldPrettyPrint) { + writeToBuffer("\n"); + writeToBuffer(getIndentation(currentIndentLevel + 1)); + } + currentIndentLevel++; + index++; continue; } - if (c === CLOSE_BRACE || c === CLOSE_BRACKET) { - level = Math.max(0, level - 1); - if (pretty) { write('\n'); write(getIndent(level)); } - write(s[i++]); + // === Handle Closing Braces / Brackets === + if ( + currentCharCode === CHAR_CODE.CLOSE_BRACE || + currentCharCode === CHAR_CODE.CLOSE_BRACKET + ) { + currentIndentLevel = Math.max(0, currentIndentLevel - 1); + if (shouldPrettyPrint) { + writeToBuffer("\n"); + writeToBuffer(getIndentation(currentIndentLevel)); + } + writeToBuffer(jsonText[index++]); continue; } - if (c === COMMA) { - write(','); - if (pretty) { write('\n'); write(getIndent(level)); } - i++; + // === Handle Commas === + if (currentCharCode === CHAR_CODE.COMMA) { + writeToBuffer(","); + if (shouldPrettyPrint) { + writeToBuffer("\n"); + writeToBuffer(getIndentation(currentIndentLevel)); + } + index++; continue; } - if (c === COLON) { - if (pretty) write(': '); - else write(':'); - i++; + // === Handle Colons === + if (currentCharCode === CHAR_CODE.COLON) { + if (shouldPrettyPrint) writeToBuffer(": "); + else writeToBuffer(":"); + index++; continue; } - const start = i; - while (i < n && !STRUCTURAL[s.charCodeAt(i)] && !WHITESPACE[s.charCodeAt(i)]) i++; - write(s.slice(start, i)); + // === Handle Primitive Values (numbers, booleans, null, etc.) === + const tokenStart = index; + while ( + index < jsonLength && + !STRUCTURAL_CHAR_CODES[jsonText.charCodeAt(index)] && + !WHITESPACE_CHAR_CODES[jsonText.charCodeAt(index)] + ) { + index++; + } + writeToBuffer(jsonText.slice(tokenStart, index)); } - if (buffer.length) chunks.push(buffer); - return chunks.join(''); + // Flush any remaining buffer + if (writeBuffer.length) flushBuffer(1); + + return new TextDecoder().decode(resultArray.subarray(0, offset)); } module.exports = fastJsonFormat; From 3ebc669cbb09b39f82746ee499f4200bb84546aa Mon Sep 17 00:00:00 2001 From: Sumith Kumar Saini Date: Thu, 13 Nov 2025 01:02:43 +0530 Subject: [PATCH 7/7] refactor(json): optimize fastJsonFormat and rename utility functions --- src/index.js | 272 ++++++++++++++++++++++++++------------------------- 1 file changed, 141 insertions(+), 131 deletions(-) diff --git a/src/index.js b/src/index.js index b6b2234..cdc7781 100644 --- a/src/index.js +++ b/src/index.js @@ -1,46 +1,57 @@ /** - * Character classification lookup tables for JSON formatting. - * STRUCTURAL: Marks structural JSON characters like { } [ ] : , - * WHITESPACE: Marks whitespace characters (space, tab, newline, etc.) + * Lookup table for structural characters in JSON such as {}[],:" + * @type {Uint8Array} */ -const STRUCTURAL_CHAR_CODES = new Uint8Array(128); -const WHITESPACE_CHAR_CODES = new Uint8Array(128); +const STRUCTURAL_CHARS = new Uint8Array(128); -// Character codes for key JSON tokens +/** + * Lookup table for whitespace characters (tab, newline, carriage return, space) + * @type {Uint8Array} + */ +const WHITESPACE_CHARS = new Uint8Array(128); + +/** + * Common JSON structural character codes. + * @readonly + * @enum {number} + */ const CHAR_CODE = { - QUOTE: 34, - BACKSLASH: 92, - FORWARD_SLASH: 47, - OPEN_BRACE: 123, - CLOSE_BRACE: 125, - OPEN_BRACKET: 91, - CLOSE_BRACKET: 93, - COMMA: 44, - COLON: 58, + QUOTE: 34, // " + BACKSLASH: 92, // \ + SLASH: 47, // / + OPEN_BRACE: 123, // { + CLOSE_BRACE: 125, // } + OPEN_BRACKET: 91, // [ + CLOSE_BRACKET: 93, // ] + COMMA: 44, // , + COLON: 58, // : }; +// Initialize lookup tables (() => { - // JSON structural characters: " , : [ ] { } - [34, 44, 58, 91, 93, 123, 125].forEach( - (charCode) => (STRUCTURAL_CHAR_CODES[charCode] = 1) - ); - // Whitespace characters: \t \n \r space - [9, 10, 13, 32].forEach((charCode) => (WHITESPACE_CHAR_CODES[charCode] = 1)); + /** @type {number[]} JSON structural characters: " , : [ ] { } */ + const structuralCodes = [34, 44, 58, 91, 93, 123, 125]; + structuralCodes.forEach((code) => (STRUCTURAL_CHARS[code] = 1)); + + /** @type {number[]} Whitespace characters: \t \n \r space */ + const whitespaceCodes = [9, 10, 13, 32]; + whitespaceCodes.forEach((code) => (WHITESPACE_CHARS[code] = 1)); })(); /** - * Decodes escaped Unicode sequences (e.g. "\\u0041") to actual characters. - * Optimized to avoid regex and minimize string allocations. + * Decodes escaped Unicode sequences like "\u0041" → "A" + * Also converts escaped forward slashes "\/" → "/" * - * @param {string} input - String potentially containing \uXXXX sequences. - * @returns {string} - Decoded string with proper Unicode characters. + * @param {string} str - Input string possibly containing escape sequences + * @returns {string} Decoded string */ -function decodeUnicodeEscapes(input) { +function decodeEscapedUnicode(input) { if (input.indexOf("\\u") === -1 && input.indexOf("\\/") === -1) { return input; } - let output = ""; + /** @type {string[]} */ + let output = []; let i = 0; const len = input.length; @@ -52,53 +63,48 @@ function decodeUnicodeEscapes(input) { const hex = input.substr(i + 2, 4); const code = parseInt(hex, 16); if (!isNaN(code)) { - output += String.fromCharCode(code); + output.push(String.fromCharCode(code)); i += 6; continue; } } - // Handle escaped forward slash + // Handle "\/" if (ch === 92 && i + 1 < len && input.charCodeAt(i + 1) === 47) { - output += "/"; + output.push("/"); i += 2; continue; } - // Copy normal character - output += input[i]; + // Normal character + output.push(input[i]); i++; } - return output; + return output.join(""); } /** - * Safely converts a string or `String` object to its primitive string representation. - * If the input is a `String` object, it calls `toString()`; if it is already a primitive string, - * the function returns it unchanged. This ensures safe conversion without throwing errors. - * - * @param {any | String} str - The string or `String` object to convert. - * @returns {string} The primitive string representation of the input. + * Safely convert a String object to a primitive string. * - * @example - * toStringSafe("hello"); // "hello" - * toStringSafe(new String("world")); // "world" + * @template T + * @param {T} value - Any input value + * @returns {string | T} String value if applicable, otherwise unchanged */ -function toStringSafe(input) { +function ensureString(input) { return input instanceof String ? input.toString() : input; } /** - * High-performance JSON pretty printer. - * Works directly on strings (no JSON.parse), efficiently scanning and reformatting. + * Fast JSON pretty printer with streaming-style buffering. * - * @param {string|object} input - The JSON string or JS object to format. - * @param {string} indentString - Indentation (e.g., " " or "\t"). Defaults to 2 spaces. - * @returns {string} - Formatted JSON-like output. + * @param {string | object} inputRaw - Input JSON string or object + * @param {string} [indent=" "] - Indentation characters, e.g. two spaces or "\t" + * @returns {string} Pretty-printed JSON */ function fastJsonFormat(inputRaw, indentString = " ") { - const input = toStringSafe(inputRaw); + /** @type {string | object} */ + const input = ensureString(inputRaw); if (input === undefined) return ""; // Handle non-string input by delegating to JSON.stringify @@ -110,78 +116,90 @@ function fastJsonFormat(inputRaw, indentString = " ") { } } - const jsonText = input; - const jsonLength = jsonText.length; + /** @type {string} */ + const json = input; + const jsonLength = json.length; const shouldPrettyPrint = typeof indentString === "string" && indentString.length > 0; - // Buffered writer setup to reduce string concatenation cost + /** @type {number} */ const CHUNK_SIZE = Math.min(1 << 16, Math.max(1 << 12, input.length / 8)); // 64 KB - let writeBuffer = ""; - // On Over-provision by 50% to avoid reallocation. - // Pretty printing usually expands text by less then 2 times. + /** @type {string} */ + let textBuffer = ""; + + /** @type {TextEncoder} */ const encoder = new TextEncoder(); - let resultArray = new Uint8Array((jsonLength * 3) << 1); + + /** @type {Uint8Array} */ + let outputArray = new Uint8Array((jsonLength * 3) << 1); + + /** @type {number} */ let offset = 0; + /** + * Flush buffered text into outputArray. + * @param {boolean} [isFinal=false] - Whether this is the final flush + * @returns {void} + */ const flushBuffer = (exit) => { - if (!writeBuffer) return; - const encoded = encoder.encode(writeBuffer); + if (!textBuffer) return; + const encoded = encoder.encode(textBuffer); const needed = offset + encoded.length; - if (needed > resultArray.length) { - const newLength = Math.max(needed, resultArray.length << 1); + if (needed > outputArray.length) { + const newLength = Math.max(needed, outputArray.length << 1); const newArray = new Uint8Array(newLength); - newArray.set(resultArray.subarray(0, offset)); - resultArray = newArray; + newArray.set(outputArray.subarray(0, offset)); + outputArray = newArray; } - resultArray.set(encoded, offset); + outputArray.set(encoded, offset); offset = needed; - if (!exit) writeBuffer = ""; + if (!exit) textBuffer = ""; }; - const writeToBuffer = (content) => { - writeBuffer += content; - if (writeBuffer.length > CHUNK_SIZE) flushBuffer(); + /** + * Append text to the buffer, flushing automatically if necessary. + * @param {string} text + * @returns {void} + */ + const append = (content) => { + textBuffer += content; + if (textBuffer.length > CHUNK_SIZE) flushBuffer(); }; - // Cache indentation strings to avoid recomputation - const indentCache = [""]; - const getIndentation = (level) => { - if (!shouldPrettyPrint) return ""; - if (indentCache[level]) return indentCache[level]; - let lastIndent = indentCache[indentCache.length - 1]; - for (let depth = indentCache.length; depth <= level; depth++) { - lastIndent += indentString; - indentCache[depth] = lastIndent; - } - return indentCache[level]; - }; + /** + * Generate an indentation string for a given depth level. + * @param {number} level + * @returns {string} + */ + const makeIndent = (level) => indentString.repeat(level); + /** @type {number} */ let index = 0; - let currentIndentLevel = 0; + + /** @type {number} */ + let depth = 0; // === Main scanning loop === while (index < jsonLength) { // Skip whitespace - while ( - index < jsonLength && - WHITESPACE_CHAR_CODES[jsonText.charCodeAt(index)] - ) { - index++; - } + for ( + ; + index < jsonLength && WHITESPACE_CHARS[json.charCodeAt(index)]; + index++ + ); if (index >= jsonLength) break; - const currentCharCode = jsonText.charCodeAt(index); + const currentCharCode = json.charCodeAt(index); - // === Handle String Literals === + // String literals if (currentCharCode === CHAR_CODE.QUOTE) { const stringStart = index++; while (index < jsonLength) { - const nextChar = jsonText.charCodeAt(index); + const nextChar = json.charCodeAt(index); if (nextChar === CHAR_CODE.QUOTE) { index++; break; @@ -193,99 +211,91 @@ function fastJsonFormat(inputRaw, indentString = " ") { } } - const innerContent = jsonText.slice(stringStart + 1, index - 1); - const decodedString = decodeUnicodeEscapes(innerContent); + const innerContent = json.slice(stringStart + 1, index - 1); + const decodedString = decodeEscapedUnicode(innerContent); - writeToBuffer('"'); - writeToBuffer(decodedString); - writeToBuffer('"'); + append(`"${decodedString}"`); continue; } - // === Handle Opening Braces / Brackets === + // Opening braces/brackets if ( currentCharCode === CHAR_CODE.OPEN_BRACE || currentCharCode === CHAR_CODE.OPEN_BRACKET ) { - const openChar = jsonText[index]; + const openChar = json[index]; const closeChar = currentCharCode === CHAR_CODE.OPEN_BRACE ? "}" : "]"; - // Check for empty object/array: {} or [] - let lookaheadIndex = index + 1; + let lookahead = index + 1; while ( - lookaheadIndex < jsonLength && - WHITESPACE_CHAR_CODES[jsonText.charCodeAt(lookaheadIndex)] - ) { - lookaheadIndex++; - } - if ( - lookaheadIndex < jsonLength && - jsonText[lookaheadIndex] === closeChar - ) { - writeToBuffer(openChar + closeChar); - index = lookaheadIndex + 1; + lookahead < jsonLength && + WHITESPACE_CHARS[json.charCodeAt(lookahead)] + ) + lookahead++; + + // Empty object/array + if (lookahead < jsonLength && json[lookahead] === closeChar) { + append(openChar + closeChar); + index = lookahead + 1; continue; } - writeToBuffer(openChar); + append(openChar); if (shouldPrettyPrint) { - writeToBuffer("\n"); - writeToBuffer(getIndentation(currentIndentLevel + 1)); + append(`\n${makeIndent(depth + 1)}`); } - currentIndentLevel++; + depth++; index++; continue; } - // === Handle Closing Braces / Brackets === + // Closing braces/brackets if ( currentCharCode === CHAR_CODE.CLOSE_BRACE || currentCharCode === CHAR_CODE.CLOSE_BRACKET ) { - currentIndentLevel = Math.max(0, currentIndentLevel - 1); + depth = Math.max(0, depth - 1); if (shouldPrettyPrint) { - writeToBuffer("\n"); - writeToBuffer(getIndentation(currentIndentLevel)); + append(`\n${makeIndent(depth)}`); } - writeToBuffer(jsonText[index++]); + append(json[index++]); continue; } - // === Handle Commas === + // Comma if (currentCharCode === CHAR_CODE.COMMA) { - writeToBuffer(","); + append(","); if (shouldPrettyPrint) { - writeToBuffer("\n"); - writeToBuffer(getIndentation(currentIndentLevel)); + append(`\n${makeIndent(depth)}`); } index++; continue; } - // === Handle Colons === + // Colon if (currentCharCode === CHAR_CODE.COLON) { - if (shouldPrettyPrint) writeToBuffer(": "); - else writeToBuffer(":"); + if (shouldPrettyPrint) append(": "); + else append(":"); index++; continue; } - // === Handle Primitive Values (numbers, booleans, null, etc.) === + // Regular values (numbers, literals, etc.) const tokenStart = index; while ( index < jsonLength && - !STRUCTURAL_CHAR_CODES[jsonText.charCodeAt(index)] && - !WHITESPACE_CHAR_CODES[jsonText.charCodeAt(index)] + !STRUCTURAL_CHARS[json.charCodeAt(index)] && + !WHITESPACE_CHARS[json.charCodeAt(index)] ) { index++; } - writeToBuffer(jsonText.slice(tokenStart, index)); + append(json.slice(tokenStart, index)); } // Flush any remaining buffer - if (writeBuffer.length) flushBuffer(1); + if (textBuffer.length) flushBuffer(1); - return new TextDecoder().decode(resultArray.subarray(0, offset)); + return new TextDecoder().decode(outputArray.subarray(0, offset)); } module.exports = fastJsonFormat;