diff --git a/package.json b/package.json index 69cea74..6913b7b 100644 --- a/package.json +++ b/package.json @@ -18,7 +18,7 @@ "scripts": { "prepack": "npm run test", "benchmark": "node benchmark.js", - "test": "jest tests/*.spec.js" + "test": "jest" }, "devDependencies": { "@faker-js/faker": "^9.9.0", diff --git a/src/index.js b/src/index.js index fb72f44..cdc7781 100644 --- a/src/index.js +++ b/src/index.js @@ -1,252 +1,301 @@ /** - * Pretty-prints a JSON-like string without parsing. - * Fast path: chunked copying, fast string scan, lookahead for empty {} / []. + * Lookup table for structural characters in JSON such as {}[],:" + * @type {Uint8Array} + */ +const STRUCTURAL_CHARS = new Uint8Array(128); + +/** + * Lookup table for whitespace characters (tab, newline, carriage return, space) + * @type {Uint8Array} + */ +const WHITESPACE_CHARS = new Uint8Array(128); + +/** + * Common JSON structural character codes. + * @readonly + * @enum {number} + */ +const CHAR_CODE = { + QUOTE: 34, // " + BACKSLASH: 92, // \ + SLASH: 47, // / + OPEN_BRACE: 123, // { + CLOSE_BRACE: 125, // } + OPEN_BRACKET: 91, // [ + CLOSE_BRACKET: 93, // ] + COMMA: 44, // , + COLON: 58, // : +}; + +// Initialize lookup tables +(() => { + /** @type {number[]} JSON structural characters: " , : [ ] { } */ + const structuralCodes = [34, 44, 58, 91, 93, 123, 125]; + structuralCodes.forEach((code) => (STRUCTURAL_CHARS[code] = 1)); + + /** @type {number[]} Whitespace characters: \t \n \r space */ + const whitespaceCodes = [9, 10, 13, 32]; + whitespaceCodes.forEach((code) => (WHITESPACE_CHARS[code] = 1)); +})(); + +/** + * Decodes escaped Unicode sequences like "\u0041" → "A" + * Also converts escaped forward slashes "\/" → "/" + * + * @param {string} str - Input string possibly containing escape sequences + * @returns {string} Decoded string + */ +function decodeEscapedUnicode(input) { + if (input.indexOf("\\u") === -1 && input.indexOf("\\/") === -1) { + return input; + } + + /** @type {string[]} */ + let output = []; + let i = 0; + const len = input.length; + + while (i < len) { + const ch = input.charCodeAt(i); + + // Handle \uXXXX + if (ch === 92 && i + 5 < len && input.charCodeAt(i + 1) === 117) { + const hex = input.substr(i + 2, 4); + const code = parseInt(hex, 16); + if (!isNaN(code)) { + output.push(String.fromCharCode(code)); + i += 6; + continue; + } + } + + // Handle "\/" + if (ch === 92 && i + 1 < len && input.charCodeAt(i + 1) === 47) { + output.push("/"); + i += 2; + continue; + } + + // Normal character + output.push(input[i]); + i++; + } + + return output.join(""); +} + +/** + * Safely convert a String object to a primitive string. * - * @param {string} input - * @param {string} indent - * @returns {string} + * @template T + * @param {T} value - Any input value + * @returns {string | T} String value if applicable, otherwise unchanged */ -function fastJsonFormat(input, indent = ' ') { - if (input === undefined) return ''; +function ensureString(input) { + return input instanceof String ? input.toString() : input; +} - // For non-string input, fall back to JSON.stringify behavior. - if (typeof input !== 'string') { +/** + * Fast JSON pretty printer with streaming-style buffering. + * + * @param {string | object} inputRaw - Input JSON string or object + * @param {string} [indent=" "] - Indentation characters, e.g. two spaces or "\t" + * @returns {string} Pretty-printed JSON + */ +function fastJsonFormat(inputRaw, indentString = " ") { + /** @type {string | object} */ + const input = ensureString(inputRaw); + if (input === undefined) return ""; + + // Handle non-string input by delegating to JSON.stringify + if (typeof input !== "string") { try { - return JSON.stringify(input, null, indent); + return JSON.stringify(input, null, indentString); } catch { - return ''; + return ""; } } - const s = String(input); - const n = s.length; - - // Fast minify-like path when indent is empty. - const useIndent = typeof indent === 'string' ? indent : ' '; - const pretty = useIndent.length > 0; - - // Output as array of chunks (strings). Much faster than char-by-char. - const out = []; - let level = 0; - - // Cached indents. - const indents = ['']; - const getIndent = (k) => { - if (!pretty) return ''; // minify fast-path - if (indents[k] !== undefined) return indents[k]; - let cur = indents[indents.length - 1]; - for (let j = indents.length; j <= k; j++) { - cur += useIndent; - indents[j] = cur; - } - return indents[k]; - }; + /** @type {string} */ + const json = input; + const jsonLength = json.length; + const shouldPrettyPrint = + typeof indentString === "string" && indentString.length > 0; - // Character codes - const QUOTE = 34; // " - const BACKSLASH = 92; // \ - const OPEN_BRACE = 123; // { - const CLOSE_BRACE = 125; // } - const OPEN_BRACKET = 91; // [ - const CLOSE_BRACKET = 93;// ] - const COMMA = 44; // , - const COLON = 58; // : - const SPACE = 32; // ' ' - const TAB = 9; // '\t' - const NEWLINE = 10; // '\n' - const CR = 13; // '\r' - - const isSpaceCode = (c) => - c === SPACE || c === TAB || c === NEWLINE || c === CR; - - // Skip whitespace starting at idx; return first non-space index (<= n) - const skipWS = (idx) => { - while (idx < n && isSpaceCode(s.charCodeAt(idx))) idx++; - return idx; - }; + /** @type {number} */ + const CHUNK_SIZE = Math.min(1 << 16, Math.max(1 << 12, input.length / 8)); // 64 KB - // Helper: check if character code is a valid hex digit (0-9, A-F, a-f) - const isHexDigit = (code) => { - return (code >= 48 && code <= 57) || // 0-9 - (code >= 65 && code <= 70) || // A-F - (code >= 97 && code <= 102); // a-f - }; + /** @type {string} */ + let textBuffer = ""; - // Helper: parse 4 hex digits starting at position j - // Returns -1 if invalid, otherwise the code point - const parseHex4 = (j) => { - if (j + 4 > n) return -1; - const c1 = s.charCodeAt(j); - const c2 = s.charCodeAt(j + 1); - const c3 = s.charCodeAt(j + 2); - const c4 = s.charCodeAt(j + 3); - if (!isHexDigit(c1) || !isHexDigit(c2) || !isHexDigit(c3) || !isHexDigit(c4)) { - return -1; - } - // Fast hex parsing without parseInt - let val = 0; - // First digit - val = c1 <= 57 ? c1 - 48 : (c1 <= 70 ? c1 - 55 : c1 - 87); - // Second digit - val = (val << 4) | (c2 <= 57 ? c2 - 48 : (c2 <= 70 ? c2 - 55 : c2 - 87)); - // Third digit - val = (val << 4) | (c3 <= 57 ? c3 - 48 : (c3 <= 70 ? c3 - 55 : c3 - 87)); - // Fourth digit - val = (val << 4) | (c4 <= 57 ? c4 - 48 : (c4 <= 70 ? c4 - 55 : c4 - 87)); - return val; - }; + /** @type {TextEncoder} */ + const encoder = new TextEncoder(); - // Scan a JSON string starting at index of opening quote `i` (s[i] === '"'). - // Returns index just after the closing quote and decodes \uXXXX sequences. - const scanString = (i) => { - out.push('"'); // opening quote - let j = i + 1; - let lastCopy = j; // track where we last copied from - - while (j < n) { - const c = s.charCodeAt(j); - if (c === QUOTE) { // end of string - // Copy any remaining content before the closing quote - if (j > lastCopy) { - out.push(s.slice(lastCopy, j)); - } - out.push('"'); // closing quote - return j + 1; - } - if (c === BACKSLASH) { - const backslashPos = j; - j++; - if (j < n && s.charCodeAt(j) === 117 /* 'u' */) { - // Found \uXXXX - try to decode it to actual unicode character - const codePoint = parseHex4(j + 1); - - if (codePoint >= 0) { - // Valid hex sequence - decode it - // Copy everything up to the backslash - if (backslashPos > lastCopy) { - out.push(s.slice(lastCopy, backslashPos)); - } - // Convert to actual unicode character - out.push(String.fromCharCode(codePoint)); - j += 5; // skip 'u' + 4 hex digits - lastCopy = j; - continue; - } - // If parsing failed, reset and let it be copied as-is - j = backslashPos + 1; - } - // For other escapes (or invalid \u), just skip the escaped char - if (j < n) j++; - continue; - } - j++; - } - // Unterminated: copy remaining content (forgiving) - if (n > lastCopy) { - out.push(s.slice(lastCopy, n)); + /** @type {Uint8Array} */ + let outputArray = new Uint8Array((jsonLength * 3) << 1); + + /** @type {number} */ + let offset = 0; + + /** + * Flush buffered text into outputArray. + * @param {boolean} [isFinal=false] - Whether this is the final flush + * @returns {void} + */ + const flushBuffer = (exit) => { + if (!textBuffer) return; + const encoded = encoder.encode(textBuffer); + const needed = offset + encoded.length; + + if (needed > outputArray.length) { + const newLength = Math.max(needed, outputArray.length << 1); + const newArray = new Uint8Array(newLength); + newArray.set(outputArray.subarray(0, offset)); + outputArray = newArray; } - return n; + + outputArray.set(encoded, offset); + offset = needed; + + if (!exit) textBuffer = ""; }; - // Copy a run of non-structural, non-space characters starting at i. - // Stops at space or one of the structural chars ,:{}[]" - const scanAtom = (i) => { - let j = i; - scan: while (j < n) { - const c = s.charCodeAt(j); - switch (c) { - case SPACE: - case TAB: - case NEWLINE: - case CR: - case QUOTE: - case OPEN_BRACE: - case CLOSE_BRACE: - case OPEN_BRACKET: - case CLOSE_BRACKET: - case COMMA: - case COLON: - break scan; - } - j++; - } - if (j > i) out.push(s.slice(i, j)); - return j; + /** + * Append text to the buffer, flushing automatically if necessary. + * @param {string} text + * @returns {void} + */ + const append = (content) => { + textBuffer += content; + if (textBuffer.length > CHUNK_SIZE) flushBuffer(); }; - let i = 0; + /** + * Generate an indentation string for a given depth level. + * @param {number} level + * @returns {string} + */ + const makeIndent = (level) => indentString.repeat(level); + + /** @type {number} */ + let index = 0; + + /** @type {number} */ + let depth = 0; + + // === Main scanning loop === + while (index < jsonLength) { + // Skip whitespace + for ( + ; + index < jsonLength && WHITESPACE_CHARS[json.charCodeAt(index)]; + index++ + ); + if (index >= jsonLength) break; + + const currentCharCode = json.charCodeAt(index); - while (i < n) { - i = skipWS(i); - if (i >= n) break; + // String literals + if (currentCharCode === CHAR_CODE.QUOTE) { + const stringStart = index++; + while (index < jsonLength) { + const nextChar = json.charCodeAt(index); + if (nextChar === CHAR_CODE.QUOTE) { + index++; + break; + } + if (nextChar === CHAR_CODE.BACKSLASH) { + index += 2; + } else { + index++; + } + } - const c = s.charCodeAt(i); + const innerContent = json.slice(stringStart + 1, index - 1); + const decodedString = decodeEscapedUnicode(innerContent); - // Strings - if (c === QUOTE) { - i = scanString(i); + append(`"${decodedString}"`); continue; } - // Structural tokens - if (c === OPEN_BRACE || c === OPEN_BRACKET) { - const openCh = s[i]; - const isBrace = c === OPEN_BRACE; - const closeCh = isBrace ? '}' : ']'; - - // Lookahead for empty {} or []: skip spaces to next significant char - let k = skipWS(i + 1); - if (k < n && s[k] === closeCh) { - // Emit {} / [] (no newline/indent) - out.push(openCh, closeCh); - i = k + 1; + // Opening braces/brackets + if ( + currentCharCode === CHAR_CODE.OPEN_BRACE || + currentCharCode === CHAR_CODE.OPEN_BRACKET + ) { + const openChar = json[index]; + const closeChar = currentCharCode === CHAR_CODE.OPEN_BRACE ? "}" : "]"; + + let lookahead = index + 1; + while ( + lookahead < jsonLength && + WHITESPACE_CHARS[json.charCodeAt(lookahead)] + ) + lookahead++; + + // Empty object/array + if (lookahead < jsonLength && json[lookahead] === closeChar) { + append(openChar + closeChar); + index = lookahead + 1; continue; } - // Non-empty: normal pretty formatting - out.push(openCh); - if (pretty) { - out.push('\n', getIndent(level + 1)); + append(openChar); + if (shouldPrettyPrint) { + append(`\n${makeIndent(depth + 1)}`); } - level++; - i++; + depth++; + index++; continue; } - if (c === CLOSE_BRACE || c === CLOSE_BRACKET) { - level = level > 0 ? level - 1 : 0; - if (pretty) { - out.push('\n', getIndent(level)); + // Closing braces/brackets + if ( + currentCharCode === CHAR_CODE.CLOSE_BRACE || + currentCharCode === CHAR_CODE.CLOSE_BRACKET + ) { + depth = Math.max(0, depth - 1); + if (shouldPrettyPrint) { + append(`\n${makeIndent(depth)}`); } - out.push(s[i]); - i++; + append(json[index++]); continue; } - if (c === COMMA) { - out.push(','); - if (pretty) { - out.push('\n', getIndent(level)); + // Comma + if (currentCharCode === CHAR_CODE.COMMA) { + append(","); + if (shouldPrettyPrint) { + append(`\n${makeIndent(depth)}`); } - i++; + index++; continue; } - if (c === COLON) { - if (pretty) { - out.push(':', ' '); - } else { - out.push(':'); - } - i++; + // Colon + if (currentCharCode === CHAR_CODE.COLON) { + if (shouldPrettyPrint) append(": "); + else append(":"); + index++; continue; } - // Outside strings & not structural: copy a whole run (numbers, literals, bigint suffix, identifiers) - i = scanAtom(i); + // Regular values (numbers, literals, etc.) + const tokenStart = index; + while ( + index < jsonLength && + !STRUCTURAL_CHARS[json.charCodeAt(index)] && + !WHITESPACE_CHARS[json.charCodeAt(index)] + ) { + index++; + } + append(json.slice(tokenStart, index)); } - return out.join(''); + // Flush any remaining buffer + if (textBuffer.length) flushBuffer(1); + + return new TextDecoder().decode(outputArray.subarray(0, offset)); } module.exports = fastJsonFormat;