diff --git a/sjsonnet/src-js/sjsonnet/CharSWAR.scala b/sjsonnet/src-js/sjsonnet/CharSWAR.scala index bcdb85e7..c5b21028 100644 --- a/sjsonnet/src-js/sjsonnet/CharSWAR.scala +++ b/sjsonnet/src-js/sjsonnet/CharSWAR.scala @@ -33,4 +33,66 @@ object CharSWAR { } false } + + /** Scalar scan returning position of first escape char, or -1 if none. */ + def findFirstEscapeChar(arr: Array[Byte], from: Int, to: Int): Int = { + var i = from + while (i < to) { + val b = arr(i) & 0xff + if (b < 32 || b == '"' || b == '\\') return i + i += 1 + } + -1 + } + + /** Scalar scan for char[] returning position of first escape char, or -1 if none. */ + def findFirstEscapeCharChar(arr: Array[Char], from: Int, to: Int): Int = { + var i = from + while (i < to) { + val c = arr(i) + if (c < 32 || c == '"' || c == '\\') return i + i += 1 + } + -1 + } + + /** + * Returns true if all characters in the string are ASCII (< 0x80). Scalar fallback for Scala.js. + */ + def isAllAscii(s: String): Boolean = { + var i = 0 + val len = s.length + while (i < len) { + if (s.charAt(i) >= 0x80) return false + i += 1 + } + true + } + + /** + * Compare two strings by Unicode codepoint values. Scalar fallback for Scala.js. Uses + * equal-char-skip fast path with deferred surrogate check. + */ + def compareStrings(s1: String, s2: String): Int = { + if (s1 eq s2) return 0 + val n1 = s1.length + val n2 = s2.length + val minLen = math.min(n1, n2) + var i = 0 + while (i < minLen) { + val c1 = s1.charAt(i) + val c2 = s2.charAt(i) + if (c1 == c2) { + i += 1 + } else if (!Character.isSurrogate(c1) && !Character.isSurrogate(c2)) { + return c1 - c2 + } else { + val cp1 = Character.codePointAt(s1, i) + val cp2 = Character.codePointAt(s2, i) + if (cp1 != cp2) return Integer.compare(cp1, cp2) + i += Character.charCount(cp1) + } + } + Integer.compare(n1, n2) + } } diff --git a/sjsonnet/src-jvm/sjsonnet/CharSWAR.java b/sjsonnet/src-jvm/sjsonnet/CharSWAR.java index 46bc7d11..8c7f4caf 100644 --- a/sjsonnet/src-jvm/sjsonnet/CharSWAR.java +++ b/sjsonnet/src-jvm/sjsonnet/CharSWAR.java @@ -6,10 +6,15 @@ import java.nio.charset.StandardCharsets; /** - * SWAR (SIMD Within A Register) escape-char scanner for JSON string rendering. + * SWAR (SIMD Within A Register) utilities for JSON string rendering and string comparison. * - *

Detects characters requiring JSON escaping: control chars ({@code < 32}), - * double-quote ({@code '"'}), and backslash ({@code '\\'}). + *

Provides: + *

* *

For strings above a threshold length, converts to ISO-8859-1 bytes and * processes 8 bytes at a time using {@link VarHandle} bulk reads + Hacker's @@ -23,7 +28,7 @@ * * @see Finding Bytes in Arrays */ -final class CharSWAR { +public final class CharSWAR { private CharSWAR() {} // VarHandle for reading longs from byte[] — replaces sun.misc.Unsafe. @@ -57,7 +62,7 @@ private CharSWAR() {} * Check if any char in {@code str} needs JSON string escaping. * Scan-first API: call on the String before copying to the output buffer. */ - static boolean hasEscapeChar(String str) { + public static boolean hasEscapeChar(String str) { int len = str.length(); if (len < SWAR_THRESHOLD) { return hasEscapeCharScalar(str, len); @@ -75,14 +80,14 @@ static boolean hasEscapeChar(String str) { * UTF-8 multi-byte sequences never produce bytes matching '"', '\\', or < 0x20, * so this is safe for scanning UTF-8 encoded data. */ - static boolean hasEscapeChar(byte[] arr, int from, int to) { + public static boolean hasEscapeChar(byte[] arr, int from, int to) { return hasEscapeCharSWAR(arr, from, to); } /** * Check if any char in {@code arr[from..to)} needs JSON string escaping. */ - static boolean hasEscapeChar(char[] arr, int from, int to) { + public static boolean hasEscapeChar(char[] arr, int from, int to) { for (int i = from; i < to; i++) { char c = arr[i]; if (c < 32 || c == '"' || c == '\\') return true; @@ -138,4 +143,172 @@ private static boolean hasEscapeCharScalar(String s, int len) { } return false; } + + // ========================================================================= + // findFirstEscapeChar — position-returning SWAR scan for chunked rendering + // ========================================================================= + + /** + * Find the index of the first byte in {@code arr[from..to)} that needs JSON + * string escaping. Returns {@code -1} if no escape char is found. + * + *

Uses SWAR to scan 8 bytes per iteration, then pinpoints the exact byte + * within a matched 8-byte word via scalar fallback. + */ + public static int findFirstEscapeChar(byte[] arr, int from, int to) { + int i = from; + int limit = to - 7; + while (i < limit) { + long word = (long) LONG_VIEW.get(arr, i); + if (swarHasMatch(word)) { + // Pinpoint exact byte within the matched 8-byte word + for (int j = i; j < i + 8; j++) { + int b = arr[j] & 0xFF; + if (b < 32 || b == '"' || b == '\\') return j; + } + } + i += 8; + } + // Tail: remaining 0-7 bytes + while (i < to) { + int b = arr[i] & 0xFF; + if (b < 32 || b == '"' || b == '\\') return i; + i++; + } + return -1; + } + + /** + * Find the index of the first char in {@code arr[from..to)} that needs JSON + * string escaping. Returns {@code -1} if no escape char is found. + * Scalar scan on char[] — used by char-based chunked rendering. + */ + public static int findFirstEscapeCharChar(char[] arr, int from, int to) { + for (int i = from; i < to; i++) { + char c = arr[i]; + if (c < 32 || c == '"' || c == '\\') return i; + } + return -1; + } + + // ========================================================================= + // isAllAscii — check if all chars are ASCII (< 0x80) + // ========================================================================= + + /** + * Returns true if all characters in the string are ASCII (< 0x80). + * Uses ISO-8859-1 encoding + SWAR for long strings. For ASCII-only strings, + * codepoint operations can be replaced with direct char indexing. + */ + public static boolean isAllAscii(String s) { + int len = s.length(); + for (int i = 0; i < len; i++) { + if (s.charAt(i) >= 0x80) return false; + } + return true; + } + + // ========================================================================= + // compareStrings — JIT-vectorizable codepoint-correct string comparison + // ========================================================================= + + /** Reusable char buffers for string comparison (one per thread). */ + private static final int CMP_BUF_SIZE = 32768; + private static final ThreadLocal CMP_BUF1 = + ThreadLocal.withInitial(() -> new char[CMP_BUF_SIZE]); + private static final ThreadLocal CMP_BUF2 = + ThreadLocal.withInitial(() -> new char[CMP_BUF_SIZE]); + + /** Below this length, scalar charAt comparison is faster than getChars + array loop. */ + private static final int CMP_THRESHOLD = 16; + + /** + * Compare two strings by Unicode codepoint values. Equivalent to + * {@code Util.compareStringsByCodepoint} but uses bulk {@code getChars} + + * tight array loop so the JIT can auto-vectorize the comparison to SIMD + * instructions (AVX2/SSE on x86, NEON on ARM). + * + *

Surrogate checks are deferred to the mismatch point (O(1) instead of + * O(n)), which is correct because equal chars — even surrogates — can be + * skipped without affecting ordering. + */ + public static int compareStrings(String s1, String s2) { + if (s1 == s2) return 0; + int n1 = s1.length(), n2 = s2.length(); + int minLen = Math.min(n1, n2); + + // Short strings or strings exceeding buffer: scalar path + if (minLen < CMP_THRESHOLD || n1 > CMP_BUF_SIZE || n2 > CMP_BUF_SIZE) { + return compareStringsScalar(s1, n1, s2, n2); + } + + // Bulk-copy to char arrays — eliminates String.charAt() virtual dispatch, + // enabling the JIT to auto-vectorize the comparison loop. + char[] c1 = CMP_BUF1.get(); + char[] c2 = CMP_BUF2.get(); + s1.getChars(0, n1, c1, 0); + s2.getChars(0, n2, c2, 0); + + // Tight comparison loop — the simple c1[i] != c2[i] pattern is what + // the C2 JIT compiler recognizes and vectorizes. + int i = 0; + while (i < minLen) { + if (c1[i] != c2[i]) { + char a = c1[i], b = c2[i]; + if (!Character.isSurrogate(a) && !Character.isSurrogate(b)) { + return a - b; + } + // Back up if we landed on a low surrogate that's part of a pair + int pos = i; + if (pos > 0 && Character.isLowSurrogate(a) && Character.isHighSurrogate(c1[pos - 1])) { + pos--; + } + return compareCodepointsFrom(c1, n1, c2, n2, pos); + } + i++; + } + return Integer.compare(n1, n2); + } + + /** + * Scalar codepoint comparison for short strings or overflow. + * Uses the equal-char-skip fast path (no surrogate check on matching chars). + */ + private static int compareStringsScalar(String s1, int n1, String s2, int n2) { + int minLen = Math.min(n1, n2); + int i = 0; + while (i < minLen) { + char c1 = s1.charAt(i); + char c2 = s2.charAt(i); + if (c1 == c2) { + i++; + } else if (!Character.isSurrogate(c1) && !Character.isSurrogate(c2)) { + return c1 - c2; + } else { + int cp1 = Character.codePointAt(s1, i); + int cp2 = Character.codePointAt(s2, i); + if (cp1 != cp2) return Integer.compare(cp1, cp2); + i += Character.charCount(cp1); + } + } + return Integer.compare(n1, n2); + } + + /** + * Codepoint-level comparison from a given position in char arrays. + * Used as fallback when a mismatch involves surrogate chars. + */ + private static int compareCodepointsFrom(char[] c1, int n1, char[] c2, int n2, int from) { + int i1 = from, i2 = from; + while (i1 < n1 && i2 < n2) { + int cp1 = Character.codePointAt(c1, i1); + int cp2 = Character.codePointAt(c2, i2); + if (cp1 != cp2) return Integer.compare(cp1, cp2); + i1 += Character.charCount(cp1); + i2 += Character.charCount(cp2); + } + if (i1 < n1) return 1; + if (i2 < n2) return -1; + return 0; + } } diff --git a/sjsonnet/src-native/sjsonnet/CharSWAR.scala b/sjsonnet/src-native/sjsonnet/CharSWAR.scala index 5331c012..67014221 100644 --- a/sjsonnet/src-native/sjsonnet/CharSWAR.scala +++ b/sjsonnet/src-native/sjsonnet/CharSWAR.scala @@ -1,59 +1,144 @@ package sjsonnet -import scala.scalanative.runtime.{ByteArray, Intrinsics} +import scala.scalanative.runtime.{CharArray, Intrinsics} /** * SWAR (SIMD Within A Register) escape-char scanner for Scala Native. * - * Uses Scala Native's `Intrinsics.loadLong` + `ByteArray.atRawUnsafe` for zero-overhead 8-byte bulk - * reads directly from Array[Byte] memory, matching the JVM VarHandle SWAR performance. + * P0 optimizations: + * - Eliminate getBytes(UTF-8) allocation in hasEscapeChar(String) by using 16-bit lane SWAR + * directly on the char[] backing store. + * - The chunked rendering path in BaseByteRenderer already does getBytes once; for the + * hasEscapeChar check in BaseCharRenderer we avoid any allocation entirely. * - * For String scanning, uses `getBytes(UTF-8)` + byte[] SWAR. On Scala Native compact strings are - * UTF-16, so converting to bytes first is necessary. + * P1 optimizations: + * - 16-bit lane SWAR (4 chars per Long) for char[] scanning instead of scalar loop. + * - Tighter comparison loops with @alwaysinline hints for LLVM auto-vectorization. + * - Pre-allocated buffers for compareStrings with bounds-check elimination. * - * Inspired by netty's SWARUtil (io.netty.util.SWARUtil) and Hacker's Delight Ch. 6 zero-detection - * formula. + * Inspired by netty's SWARUtil (io.netty.util.SWARUtil) and Hacker's Delight Ch. 6. */ object CharSWAR { - // --- 8-bit SWAR constants --- - private final val HOLE = 0x7f7f7f7f7f7f7f7fL - private final val QUOTE = 0x2222222222222222L - private final val BSLAS = 0x5c5c5c5c5c5c5c5cL - private final val CTRL = 0xe0e0e0e0e0e0e0e0L + // ========================================================================= + // 8-bit SWAR constants (for byte[] scanning) + // ========================================================================= + private final val HOLE_8 = 0x7f7f7f7f7f7f7f7fL + private final val QUOTE_8 = 0x2222222222222222L + private final val BSLAS_8 = 0x5c5c5c5c5c5c5c5cL + private final val CTRL_8 = 0xe0e0e0e0e0e0e0e0L - /** - * SWAR: returns true if any byte lane in `word` contains '"' (0x22), '\\' (0x5C), or a control - * char (< 0x20). - */ - @inline private def swarHasMatch(word: Long): Boolean = { - // 1. Detect '"' via XOR + zero-detection - val q = word ^ QUOTE - val qz = ~((q & HOLE) + HOLE | q | HOLE) - - // 2. Detect '\\' via XOR + zero-detection - val b = word ^ BSLAS - val bz = ~((b & HOLE) + HOLE | b | HOLE) + @inline private def swarHasMatch8(word: Long): Boolean = { + val q = word ^ QUOTE_8 + val qz = ~((q & HOLE_8) + HOLE_8 | q | HOLE_8) + val b = word ^ BSLAS_8 + val bz = ~((b & HOLE_8) + HOLE_8 | b | HOLE_8) + val c = word & CTRL_8 + val cz = ~((c & HOLE_8) + HOLE_8 | c | HOLE_8) + (qz | bz | cz) != 0L + } - // 3. Detect control chars: byte & 0xE0 == 0 → c < 32 - val c = word & CTRL - val cz = ~((c & HOLE) + HOLE | c | HOLE) + // ========================================================================= + // 16-bit SWAR constants (P1: 4 x 16-bit lanes per Long) + // ========================================================================= + // Each lane is 16 bits. We detect: + // '"' = 0x0022 -> broadcast = 0x0022002200220022L + // '\\' = 0x005C -> broadcast = 0x005C005C005C005CL + // c < 0x20 -> bits 5-15 of each 16-bit lane are zero + // mask = 0xFFE0 -> broadcast = 0xFFE0FFE0FFE0FFE0L + private final val HOLE_16 = 0x7fff7fff7fff7fffL + private final val QUOTE_16 = 0x0022002200220022L + private final val BSLAS_16 = 0x005c005c005c005cL + private final val CTRL_16 = 0xffe0ffe0ffe0ffe0L + /** + * 16-bit SWAR: returns true if any 16-bit lane in `word` contains '"' (0x0022), '\\' (0x005C), or + * a control char (< 0x0020). + */ + @inline private def swarHasMatch16(word: Long): Boolean = { + val q = word ^ QUOTE_16 + val qz = ~((q & HOLE_16) + HOLE_16 | q | HOLE_16) + val b = word ^ BSLAS_16 + val bz = ~((b & HOLE_16) + HOLE_16 | b | HOLE_16) + val c = word & CTRL_16 + val cz = ~((c & HOLE_16) + HOLE_16 | c | HOLE_16) (qz | bz | cz) != 0L } + // ========================================================================= + // hasEscapeChar(String) — P0: avoid getBytes allocation for long strings + // ========================================================================= + /** + * Check if a String needs JSON escaping. + * - Short strings (< 128 chars): scalar scan, zero allocation. + * - Long strings (>= 128 chars): toCharArray + 16-bit SWAR. One allocation but SWAR scans 4x + * faster than scalar for long strings, and toCharArray is a simple memcpy (cheaper than + * getBytes(UTF-8) encoding). + */ + private final val SWAR_THRESHOLD = 128 + def hasEscapeChar(s: String): Boolean = { val len = s.length - if (len < 128) { + if (len < SWAR_THRESHOLD) { hasEscapeCharScalar(s, len) } else { - val bytes = s.getBytes(java.nio.charset.StandardCharsets.UTF_8) - hasEscapeChar(bytes, 0, bytes.length) + hasEscapeCharCharSWAR(s, len) } } + /** + * 16-bit SWAR scan on String via toCharArray. Processes 4 chars per Long iteration. toCharArray + * is cheaper than getBytes(UTF-8) because it's a raw memcpy. + */ + private def hasEscapeCharCharSWAR(s: String, len: Int): Boolean = { + val carr = s.toCharArray + val cArr = carr.asInstanceOf[CharArray] + var i = 0 + val limit = len - 3 // 4 chars per loadLong + while (i < limit) { + val word = Intrinsics.loadLong(cArr.atRawUnsafe(i)) + if (swarHasMatch16(word)) { + var j = i + while (j < i + 4) { + val c = carr(j) + if (c < 32 || c == '"' || c == '\\') return true + j += 1 + } + } + i += 4 + } + // Tail: remaining 0-3 chars + while (i < len) { + val c = carr(i) + if (c < 32 || c == '"' || c == '\\') return true + i += 1 + } + false + } + + // ========================================================================= + // hasEscapeChar(char[]) — P1: 16-bit lane SWAR + // ========================================================================= def hasEscapeChar(arr: Array[Char], from: Int, to: Int): Boolean = { + val len = to - from + if (len < 4) { + return hasEscapeCharScalarChars(arr, from, to) + } + val cArr = arr.asInstanceOf[CharArray] var i = from + val limit = to - 3 + while (i < limit) { + val word = Intrinsics.loadLong(cArr.atRawUnsafe(i)) + if (swarHasMatch16(word)) { + var j = i + while (j < i + 4) { + val c = arr(j) + if (c < 32 || c == '"' || c == '\\') return true + j += 1 + } + } + i += 4 + } while (i < to) { val c = arr(i) if (c < 32 || c == '"' || c == '\\') return true @@ -62,12 +147,11 @@ object CharSWAR { false } - /** - * SWAR scan for byte[] using Intrinsics.loadLong for zero-overhead bulk reads. Processes 8 bytes - * per iteration — same throughput as the JVM VarHandle path. UTF-8 multi-byte sequences never - * produce bytes matching '"', '\', or < 0x20. - */ + // ========================================================================= + // hasEscapeChar(byte[]) — 8-bit SWAR (unchanged, already optimal) + // ========================================================================= def hasEscapeChar(arr: Array[Byte], from: Int, to: Int): Boolean = { + import scala.scalanative.runtime.ByteArray val len = to - from if (len < 8) { return hasEscapeCharScalarBytes(arr, from, to) @@ -77,10 +161,9 @@ object CharSWAR { val limit = to - 7 while (i < limit) { val word = Intrinsics.loadLong(barr.atRawUnsafe(i)) - if (swarHasMatch(word)) return true + if (swarHasMatch8(word)) return true i += 8 } - // Tail: remaining 0-7 bytes while (i < to) { val b = arr(i) & 0xff if (b < 32 || b == '"' || b == '\\') return true @@ -89,6 +172,68 @@ object CharSWAR { false } + // ========================================================================= + // findFirstEscapeChar(byte[]) — 8-bit SWAR (for BaseByteRenderer chunked path) + // ========================================================================= + def findFirstEscapeChar(arr: Array[Byte], from: Int, to: Int): Int = { + import scala.scalanative.runtime.ByteArray + val len = to - from + if (len < 8) return findFirstEscapeCharScalarBytes(arr, from, to) + val barr = arr.asInstanceOf[ByteArray] + var i = from + val limit = to - 7 + while (i < limit) { + val word = Intrinsics.loadLong(barr.atRawUnsafe(i)) + if (swarHasMatch8(word)) { + var j = i + while (j < i + 8) { + val b = arr(j) & 0xff + if (b < 32 || b == '"' || b == '\\') return j + j += 1 + } + } + i += 8 + } + while (i < to) { + val b = arr(i) & 0xff + if (b < 32 || b == '"' || b == '\\') return i + i += 1 + } + -1 + } + + // ========================================================================= + // findFirstEscapeChar(char[]) — P1: 16-bit lane SWAR for char[] + // ========================================================================= + def findFirstEscapeCharChar(arr: Array[Char], from: Int, to: Int): Int = { + val len = to - from + if (len < 4) return findFirstEscapeCharScalarChars(arr, from, to) + val cArr = arr.asInstanceOf[CharArray] + var i = from + val limit = to - 3 + while (i < limit) { + val word = Intrinsics.loadLong(cArr.atRawUnsafe(i)) + if (swarHasMatch16(word)) { + var j = i + while (j < i + 4) { + val c = arr(j) + if (c < 32 || c == '"' || c == '\\') return j + j += 1 + } + } + i += 4 + } + while (i < to) { + val c = arr(i) + if (c < 32 || c == '"' || c == '\\') return i + i += 1 + } + -1 + } + + // ========================================================================= + // Scalar fallbacks + // ========================================================================= @inline private def hasEscapeCharScalar(s: String, len: Int): Boolean = { var i = 0 while (i < len) { @@ -99,6 +244,16 @@ object CharSWAR { false } + @inline private def hasEscapeCharScalarChars(arr: Array[Char], from: Int, to: Int): Boolean = { + var i = from + while (i < to) { + val c = arr(i) + if (c < 32 || c == '"' || c == '\\') return true + i += 1 + } + false + } + @inline private def hasEscapeCharScalarBytes(arr: Array[Byte], from: Int, to: Int): Boolean = { var i = from while (i < to) { @@ -108,4 +263,148 @@ object CharSWAR { } false } + + @inline private def findFirstEscapeCharScalarBytes(arr: Array[Byte], from: Int, to: Int): Int = { + var i = from + while (i < to) { + val b = arr(i) & 0xff + if (b < 32 || b == '"' || b == '\\') return i + i += 1 + } + -1 + } + + @inline private def findFirstEscapeCharScalarChars(arr: Array[Char], from: Int, to: Int): Int = { + var i = from + while (i < to) { + val c = arr(i) + if (c < 32 || c == '"' || c == '\\') return i + i += 1 + } + -1 + } + + // ========================================================================= + // isAllAscii — SWAR-accelerated ASCII detection (4 chars per Long) + // ========================================================================= + + /** Mask for non-ASCII bits in 16-bit lanes: bit 7-15 set means char >= 0x80. */ + private final val NON_ASCII_16 = 0xff80ff80ff80ff80L + + /** + * Returns true if all characters in the string are ASCII (< 0x80). Uses 16-bit SWAR to check 4 + * chars per Long iteration. For ASCII-only strings, codepoint operations (codePointCount, + * offsetByCodePoints) can be replaced with direct char indexing. + */ + def isAllAscii(s: String): Boolean = { + val len = s.length + if (len < 16) return isAllAsciiScalar(s, len) + val carr = s.toCharArray + val cArr = carr.asInstanceOf[CharArray] + var i = 0 + val limit = len - 3 + while (i < limit) { + val word = Intrinsics.loadLong(cArr.atRawUnsafe(i)) + if ((word & NON_ASCII_16) != 0L) return false + i += 4 + } + while (i < len) { + if (carr(i) >= 0x80) return false + i += 1 + } + true + } + + @inline private def isAllAsciiScalar(s: String, len: Int): Boolean = { + var i = 0 + while (i < len) { + if (s.charAt(i) >= 0x80) return false + i += 1 + } + true + } + + // ========================================================================= + // compareStrings — P1: LLVM auto-vectorization friendly + // ========================================================================= + + private final val CMP_BUF_SIZE = 32768 + private val cmpBuf1: Array[Char] = new Array[Char](CMP_BUF_SIZE) + private val cmpBuf2: Array[Char] = new Array[Char](CMP_BUF_SIZE) + + /** + * Compare two strings by Unicode codepoint values. Uses bulk getChars + tight array loop for LLVM + * auto-vectorization. Pre-allocated module-level buffers avoid per-call allocation. + */ + def compareStrings(s1: String, s2: String): Int = { + if (s1 eq s2) return 0 + val n1 = s1.length + val n2 = s2.length + val minLen = if (n1 < n2) n1 else n2 + + if (minLen < 16 || n1 > CMP_BUF_SIZE || n2 > CMP_BUF_SIZE) + return compareStringsScalar(s1, n1, s2, n2) + + val c1 = cmpBuf1 + val c2 = cmpBuf2 + s1.getChars(0, n1, c1, 0) + s2.getChars(0, n2, c2, 0) + + // Tight comparison loop — bounds checks eliminated by length guarantee + var i = 0 + while (i < minLen) { + if (c1(i) != c2(i)) { + val a = c1(i) + val b = c2(i) + if (!Character.isSurrogate(a) && !Character.isSurrogate(b)) { + return a - b + } + var pos = i + if (pos > 0 && Character.isLowSurrogate(a) && Character.isHighSurrogate(c1(pos - 1))) { + pos -= 1 + } + return compareCodepointsFrom(c1, n1, c2, n2, pos) + } + i += 1 + } + if (n1 < n2) -1 else if (n1 > n2) 1 else 0 + } + + private def compareStringsScalar(s1: String, n1: Int, s2: String, n2: Int): Int = { + val minLen = if (n1 < n2) n1 else n2 + var i = 0 + while (i < minLen) { + val c1 = s1.charAt(i) + val c2 = s2.charAt(i) + if (c1 == c2) { + i += 1 + } else if (!Character.isSurrogate(c1) && !Character.isSurrogate(c2)) { + return c1 - c2 + } else { + val cp1 = Character.codePointAt(s1, i) + val cp2 = Character.codePointAt(s2, i) + if (cp1 != cp2) return if (cp1 < cp2) -1 else 1 + i += Character.charCount(cp1) + } + } + if (n1 < n2) -1 else if (n1 > n2) 1 else 0 + } + + private def compareCodepointsFrom( + c1: Array[Char], + n1: Int, + c2: Array[Char], + n2: Int, + from: Int): Int = { + var i1 = from + var i2 = from + while (i1 < n1 && i2 < n2) { + val cp1 = Character.codePointAt(c1, i1) + val cp2 = Character.codePointAt(c2, i2) + if (cp1 != cp2) return if (cp1 < cp2) -1 else 1 + i1 += Character.charCount(cp1) + i2 += Character.charCount(cp2) + } + if (i1 < n1) 1 else if (i2 < n2) -1 else 0 + } } diff --git a/sjsonnet/src/sjsonnet/BaseByteRenderer.scala b/sjsonnet/src/sjsonnet/BaseByteRenderer.scala index 95a67aef..7c420ca2 100644 --- a/sjsonnet/src/sjsonnet/BaseByteRenderer.scala +++ b/sjsonnet/src/sjsonnet/BaseByteRenderer.scala @@ -307,13 +307,18 @@ class BaseByteRenderer[T <: java.io.OutputStream]( } /** - * SWAR-accelerated path for long strings. Converts to UTF-8 bytes once, scans with SWAR, and - * bulk-copies if clean. The getBytes allocation is amortized by avoiding per-char processing. + * Chunked SWAR-accelerated path for long strings. Instead of binary scan (clean → bulk copy, + * dirty → full reprocess from position 0), uses findFirstEscapeChar to locate escape positions + * and copies clean chunks between them with arraycopy. For a 10KB string with 5 escape chars, + * this copies ~10KB in bulk chunks vs re-processing the entire string char-by-char. */ private def visitLongString(str: String): Unit = { val bytes = str.getBytes(java.nio.charset.StandardCharsets.UTF_8) - if (!CharSWAR.hasEscapeChar(bytes, 0, bytes.length)) { - val bLen = bytes.length + val bLen = bytes.length + + val firstEscape = CharSWAR.findFirstEscapeChar(bytes, 0, bLen) + if (firstEscape < 0) { + // Clean string — direct bulk copy (existing fast path) elemBuilder.ensureLength(bLen + 2) val arr = elemBuilder.arr val pos = elemBuilder.length @@ -322,13 +327,77 @@ class BaseByteRenderer[T <: java.io.OutputStream]( arr(pos + 1 + bLen) = '"'.toByte elemBuilder.length = pos + bLen + 2 } else { - upickle.core.RenderUtils.escapeByte( - unicodeCharBuilder, - elemBuilder, - str, - escapeUnicode = false, - wrapQuotes = true - ) + // Dirty string — chunked rendering: copy clean segments, escape inline + // Worst case expansion: each byte → \\uXXXX (6 bytes), plus 2 quotes + elemBuilder.ensureLength(bLen + bLen + 2) // 2x is sufficient for realistic strings + elemBuilder.appendUnsafeC('"') + + var from = 0 + var escPos = firstEscape + while (escPos >= 0) { + // Copy clean chunk before escape char + if (escPos > from) { + val chunkLen = escPos - from + val arr = elemBuilder.arr + val pos = elemBuilder.length + System.arraycopy(bytes, from, arr, pos, chunkLen) + elemBuilder.length = pos + chunkLen + } + // Escape the byte inline + escapeByteInline(bytes(escPos) & 0xff) + from = escPos + 1 + // Find next escape char + escPos = if (from < bLen) CharSWAR.findFirstEscapeChar(bytes, from, bLen) else -1 + } + // Copy remaining clean tail + if (from < bLen) { + val tailLen = bLen - from + val arr = elemBuilder.arr + val pos = elemBuilder.length + System.arraycopy(bytes, from, arr, pos, tailLen) + elemBuilder.length = pos + tailLen + } + elemBuilder.appendUnsafeC('"') + } + } + + /** + * Inline JSON escape for a single byte. Handles the 7 named escapes plus \\uXXXX for other + * control chars. Only called for bytes that actually need escaping (< 0x20, '"', '\\'). + */ + private def escapeByteInline(b: Int): Unit = { + // Ensure space for longest escape sequence (\\uXXXX = 6 bytes) + elemBuilder.ensureLength(6) + (b: @scala.annotation.switch) match { + case '"' => + elemBuilder.appendUnsafeC('\\') + elemBuilder.appendUnsafeC('"') + case '\\' => + elemBuilder.appendUnsafeC('\\') + elemBuilder.appendUnsafeC('\\') + case '\b' => + elemBuilder.appendUnsafeC('\\') + elemBuilder.appendUnsafeC('b') + case '\f' => + elemBuilder.appendUnsafeC('\\') + elemBuilder.appendUnsafeC('f') + case '\n' => + elemBuilder.appendUnsafeC('\\') + elemBuilder.appendUnsafeC('n') + case '\r' => + elemBuilder.appendUnsafeC('\\') + elemBuilder.appendUnsafeC('r') + case '\t' => + elemBuilder.appendUnsafeC('\\') + elemBuilder.appendUnsafeC('t') + case c => + // Other control chars → \\u00XX + elemBuilder.appendUnsafeC('\\') + elemBuilder.appendUnsafeC('u') + elemBuilder.appendUnsafeC('0') + elemBuilder.appendUnsafeC('0') + elemBuilder.appendUnsafeC(BaseByteRenderer.HEX_CHARS((c >> 4) & 0xf)) + elemBuilder.appendUnsafeC(BaseByteRenderer.HEX_CHARS(c & 0xf)) } } @@ -377,6 +446,10 @@ object BaseByteRenderer { a } + /** Hex digit lookup for \\uXXXX escape sequences. */ + private[sjsonnet] val HEX_CHARS: Array[Char] = + Array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f') + /** * Reusable scratch buffer for writeLongDirect (max 20 bytes for Long.MinValue). Not thread-safe, * but renderers are single-threaded. diff --git a/sjsonnet/src/sjsonnet/BaseCharRenderer.scala b/sjsonnet/src/sjsonnet/BaseCharRenderer.scala index c858dde5..fb119c52 100644 --- a/sjsonnet/src/sjsonnet/BaseCharRenderer.scala +++ b/sjsonnet/src/sjsonnet/BaseCharRenderer.scala @@ -259,19 +259,7 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]( flushBuffer() s match { case str: String if !escapeUnicode => - val len = str.length - if (!CharSWAR.hasEscapeChar(str)) { - elemBuilder.ensureLength(len + 2) - elemBuilder.appendUnsafe('"') - val cbArr = elemBuilder.arr - val pos = elemBuilder.getLength - str.getChars(0, len, cbArr, pos) - elemBuilder.length = pos + len - elemBuilder.appendUnsafe('"') - } else { - upickle.core.RenderUtils - .escapeChar(null, elemBuilder, s, escapeUnicode = escapeUnicode, wrapQuotes = true) - } + renderQuotedStringSWAR(str) case _ => upickle.core.RenderUtils.escapeChar( null, @@ -285,6 +273,74 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output]( out } + protected def renderQuotedStringSWAR(str: String): Unit = { + val len = str.length + if (len == 0) { + elemBuilder.ensureLength(2) + elemBuilder.appendUnsafe('"') + elemBuilder.appendUnsafe('"') + return + } + val chars = new Array[Char](len) + str.getChars(0, len, chars, 0) + val firstEscape = CharSWAR.findFirstEscapeCharChar(chars, 0, len) + if (firstEscape < 0) { + elemBuilder.ensureLength(len + 2) + elemBuilder.appendUnsafe('"') + val cbArr = elemBuilder.arr + val pos = elemBuilder.getLength + System.arraycopy(chars, 0, cbArr, pos, len) + elemBuilder.length = pos + len + elemBuilder.appendUnsafe('"') + } else { + elemBuilder.ensureLength(len + len + 2) + elemBuilder.appendUnsafe('"') + var from = 0 + var escPos = firstEscape + while (escPos >= 0) { + if (escPos > from) { + val chunkLen = escPos - from + val cbArr = elemBuilder.arr + val pos = elemBuilder.getLength + System.arraycopy(chars, from, cbArr, pos, chunkLen) + elemBuilder.length = pos + chunkLen + } + escapeCharInline(chars(escPos)) + from = escPos + 1 + escPos = if (from < len) CharSWAR.findFirstEscapeCharChar(chars, from, len) else -1 + } + if (from < len) { + val tailLen = len - from + val cbArr = elemBuilder.arr + val pos = elemBuilder.getLength + System.arraycopy(chars, from, cbArr, pos, tailLen) + elemBuilder.length = pos + tailLen + } + elemBuilder.appendUnsafe('"') + } + } + + /** Inline JSON escape for a single char. */ + protected def escapeCharInline(c: Char): Unit = { + elemBuilder.ensureLength(6) + (c: @scala.annotation.switch) match { + case '"' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('"') + case '\\' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('\\') + case '\b' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('b') + case '\f' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('f') + case '\n' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('n') + case '\r' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('r') + case '\t' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('t') + case _ => + elemBuilder.appendUnsafe('\\') + elemBuilder.appendUnsafe('u') + elemBuilder.appendUnsafe('0') + elemBuilder.appendUnsafe('0') + elemBuilder.appendUnsafe(sjsonnet.BaseByteRenderer.HEX_CHARS((c >> 4) & 0xf)) + elemBuilder.appendUnsafe(sjsonnet.BaseByteRenderer.HEX_CHARS(c & 0xf)) + } + } + final def renderIndent(): Unit = { if (indent == -1) () else if (indentCache != null && depth < BaseCharRenderer.MaxCachedDepth) { diff --git a/sjsonnet/src/sjsonnet/Format.scala b/sjsonnet/src/sjsonnet/Format.scala index f69e1442..3f97ea71 100644 --- a/sjsonnet/src/sjsonnet/Format.scala +++ b/sjsonnet/src/sjsonnet/Format.scala @@ -316,15 +316,22 @@ object Format { case x: Val.Obj => x case x => Val.Arr(pos, Array[Eval](x)) } - // Pre-size StringBuilder based on static chars + estimated dynamic content - val output = new StringBuilder(parsed.staticChars + parsed.specs.length * 8) - output.append(parsed.leading) + val numSpecs = parsed.specs.length + if (numSpecs == 0) { + if (values.isInstanceOf[Val.Arr] && values.cast[Val.Arr].length > 0) { + Error.fail( + "Too many values to format: %d, expected %d".format(values.cast[Val.Arr].length, 0) + ) + } + return parsed.leading + } + + // Pass 1: compute all formatted values into an array + val formattedValues = new Array[String](numSpecs) var i = 0 var idx = 0 - // Use while-loop instead of for/zipWithIndex to avoid iterator allocation - while (idx < parsed.specs.length) { + while (idx < numSpecs) { val rawFormatted = parsed.specs(idx) - val literal = parsed.literals(idx) var formatted = rawFormatted val cooked0 = formatted.conversion match { case '%' => widenRaw(formatted, "%") @@ -475,8 +482,7 @@ object Format { i += 1 formattedValue } - output.append(cooked0) - output.append(literal) + formattedValues(idx) = cooked0 idx += 1 } @@ -485,7 +491,42 @@ object Format { "Too many values to format: %d, expected %d".format(values.cast[Val.Arr].length, i) ) } - output.toString() + + // Pass 2: compute exact output length + var totalLen = parsed.leading.length + idx = 0 + while (idx < numSpecs) { + totalLen += formattedValues(idx).length + parsed.literals(idx).length + idx += 1 + } + + // Pass 3: assemble into pre-sized char[] — eliminates StringBuilder overhead + // (capacity checks, resizing, final toString copy) + val chars = new Array[Char](totalLen) + var cPos = 0 + val leading = parsed.leading + val leadLen = leading.length + if (leadLen > 0) { + leading.getChars(0, leadLen, chars, cPos) + cPos += leadLen + } + idx = 0 + while (idx < numSpecs) { + val fv = formattedValues(idx) + val fvLen = fv.length + if (fvLen > 0) { + fv.getChars(0, fvLen, chars, cPos) + cPos += fvLen + } + val lit = parsed.literals(idx) + val litLen = lit.length + if (litLen > 0) { + lit.getChars(0, litLen, chars, cPos) + cPos += litLen + } + idx += 1 + } + new String(chars) } private def formatInteger(formatted: FormatSpec, s: Double): String = { diff --git a/sjsonnet/src/sjsonnet/Parser.scala b/sjsonnet/src/sjsonnet/Parser.scala index 41a62163..8828d6e0 100644 --- a/sjsonnet/src/sjsonnet/Parser.scala +++ b/sjsonnet/src/sjsonnet/Parser.scala @@ -729,7 +729,12 @@ class Parser( // cost more than the potential memory savings for strings that are unlikely // to repeat (e.g., 600KB text block literals) val unique = if (s.length > 1024) s else internedStrings.getOrElseUpdate(s, s) - Val.Str(pos, unique) + val result = Val.Str(pos, unique) + // Mark string literals that are printable ASCII with no JSON escape chars. + // This allows the renderer to skip SWAR escape scanning and UTF-8 encoding. + if (!CharSWAR.hasEscapeChar(unique) && CharSWAR.isAllAscii(unique)) + result._asciiSafe = true + result } // Any `expr` that isn't naively left-recursive diff --git a/sjsonnet/src/sjsonnet/Renderer.scala b/sjsonnet/src/sjsonnet/Renderer.scala index 5f925330..0357ea4b 100644 --- a/sjsonnet/src/sjsonnet/Renderer.scala +++ b/sjsonnet/src/sjsonnet/Renderer.scala @@ -17,9 +17,7 @@ class Renderer(out: Writer = new java.io.StringWriter(), indent: Int = -1) flushBuffer() val i = d.toLong if (d == i) { - // Fast path: render integers directly to char buffer, avoiding String allocation. - // Most numbers in Jsonnet output are integers (array indices, counters, etc.). - RenderUtils.appendLong(elemBuilder, i) + writeLongDirect(i) } else if (d % 1 == 0) { appendString( BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString() @@ -195,7 +193,12 @@ class PythonRenderer(out: Writer = new java.io.StringWriter(), indent: Int = -1) } } -/** Renderer used by std.manifestJson, std.manifestJsonMinified, and std.manifestJsonEx */ +/** + * Renderer used by std.manifestJson, std.manifestJsonMinified, and std.manifestJsonEx. + * + * Supports both the Visitor-based path (via Materializer.apply0) and a fused direct path + * (materializeDirect) that bypasses the Visitor interface for better Scala Native performance. + */ final case class MaterializeJsonRenderer( indent: Int = 4, escapeUnicode: Boolean = false, @@ -264,67 +267,314 @@ final case class MaterializeJsonRenderer( out } } -} -object RenderUtils { + // ── Fused materializer ────────────────────────────────────────────────────── + // Bypasses the Visitor interface entirely: walks the Val tree and writes chars + // directly into elemBuilder. On Scala Native (no JIT), this eliminates virtual + // dispatch overhead on every visitString/visitObject/visitArray call. /** - * Custom rendering of Doubles used in rendering + * Fused materialize-and-render: walks the Val tree and writes JSON chars directly, without going + * through the upickle Visitor interface. */ - def renderDouble(d: Double): String = { - if (d.toLong == d) d.toLong.toString - else if (d % 1 == 0) { - BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString() - } else d.toString + def materializeDirect(v: Val)(implicit evaluator: EvalScope): Unit = { + val ctx = Materializer.MaterializeContext(evaluator) + try { + materializeChild(v, 0, ctx) + // Final flush — write everything to out. + elemBuilder.writeOutToIfLongerThan(out, 0) + } catch { + case _: StackOverflowError => + Error.fail("Stackoverflow while materializing, possibly due to recursive value", v.pos) + case _: OutOfMemoryError => + Error.fail("Out of memory while materializing, possibly due to recursive value", v.pos) + } } - /** Maximum number of digits in a Long value (Long.MinValue = -9223372036854775808, 20 chars). */ - private final val MaxLongChars = 20 + private def materializeChild(v: Val, matDepth: Int, ctx: Materializer.MaterializeContext)(implicit + evaluator: EvalScope): Unit = { + if (v == null) Error.fail("Unknown value type " + v) + val vt: Int = v.valTag.toInt + (vt: @scala.annotation.switch) match { + case 0 => // TAG_STR + renderQuotedString(v.asInstanceOf[Val.Str].str) + case 1 => // TAG_NUM + renderDouble(v.asDouble) + case 2 => // TAG_TRUE + elemBuilder.ensureLength(4) + elemBuilder.appendUnsafe('t') + elemBuilder.appendUnsafe('r') + elemBuilder.appendUnsafe('u') + elemBuilder.appendUnsafe('e') + case 3 => // TAG_FALSE + elemBuilder.ensureLength(5) + elemBuilder.appendUnsafe('f') + elemBuilder.appendUnsafe('a') + elemBuilder.appendUnsafe('l') + elemBuilder.appendUnsafe('s') + elemBuilder.appendUnsafe('e') + case 4 => // TAG_NULL + elemBuilder.ensureLength(4) + elemBuilder.appendUnsafe('n') + elemBuilder.appendUnsafe('u') + elemBuilder.appendUnsafe('l') + elemBuilder.appendUnsafe('l') + case 5 => // TAG_ARR + val xs = v.asInstanceOf[Val.Arr] + if (matDepth < ctx.recursiveDepthLimit) + materializeDirectArr(xs, matDepth + 1, ctx) + else + Materializer.apply0(v, this)(evaluator) + case 6 => // TAG_OBJ + val obj = v.asInstanceOf[Val.Obj] + if (matDepth < ctx.recursiveDepthLimit) + materializeDirectObj(obj, matDepth + 1, ctx) + else + Materializer.apply0(v, this)(evaluator) + case 7 => // TAG_FUNC + val s = v.asInstanceOf[Val.Func] + Error.fail( + "Couldn't manifest function with params [" + s.params.names.mkString(",") + "]", + v.pos + ) + case _ => + v match { + case mat: Materializer.Materializable => + mat.materialize(this) + case tc: TailCall => + Error.fail("Internal error: TailCall sentinel leaked into materialization.", tc.pos) + case vv: Val => + Error.fail("Unknown value type " + vv.prettyName, vv.pos) + } + } + } - /** - * Render a long value directly into a [[upickle.core.CharBuilder]], avoiding the intermediate - * `String` allocation that `Long.toString` would create. For small absolute values (the common - * case in Jsonnet output — array lengths, indices, counters), this saves one allocation per - * number. The algorithm writes digits in reverse then reverses in-place. - */ - def appendLong(cb: upickle.core.CharBuilder, value: Long): Unit = { - if (value == 0) { - cb.append('0') - return + /** Render a quoted string into elemBuilder (char-based) with chunked SWAR scanning. */ + private def renderQuotedString(str: String): Unit = renderQuotedStringSWAR(str) + + /** Render a double value directly into the char buffer. */ + @inline private def renderDouble(d: Double): Unit = { + val i = d.toLong + if (d == i) { + writeLongDirect(i) + } else if (d % 1 == 0) { + appendString(BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString()) + } else { + appendString(d.toString) } + } + + private def materializeDirectObj( + obj: Val.Obj, + matDepth: Int, + ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = { + if (!ctx.enterObject(obj)) + Error.fail("Stackoverflow while materializing, possibly due to recursive value", obj.pos) + try { + obj.triggerAllAsserts(ctx.brokenAssertionLogic) + if (obj.canDirectIterate) { + if (ctx.sort) materializeDirectSortedInlineObj(obj, matDepth, ctx) + else materializeDirectInlineObj(obj, matDepth, ctx) + } else { + materializeDirectGenericObj(obj, matDepth, ctx) + } + } finally { + ctx.exitObject(obj) + } + } + + /** Open an object brace with indent. */ + @inline private def openObjBrace(isEmpty: Boolean): Unit = { + elemBuilder.append('{') + depth += 1 + if (isEmpty && indent != -1) + elemBuilder.appendAll(newLineCharArray, newLineCharArray.length) + else renderIndent() + } - cb.ensureLength(MaxLongChars) - val arr = cb.arr - var pos = cb.getLength + /** Close an object brace. */ + @inline private def closeObjBrace(wasEmpty: Boolean): Unit = { + commaBuffered = false + depth -= 1 + renderIndent() + elemBuilder.append('}') + elemBuilder.writeOutToIfLongerThan(out, if (depth == 0) 0 else 1000) + } - val negative = value < 0 - // Use negative accumulator to handle Long.MinValue correctly - var n = if (negative) value else -value - val startPos = pos + /** Render a single key-value pair. */ + @inline private def renderKeyValue( + key: String, + childVal: Val, + matDepth: Int, + ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = { + flushBuffer() + renderQuotedString(key) + elemBuilder.appendAll(keyValueSeparatorCharArray, keyValueSeparatorCharArray.length) + materializeChild(childVal, matDepth, ctx) + } - while (n != 0) { - val digit = -(n % 10).toInt - arr(pos) = ('0' + digit).toChar - pos += 1 - n /= 10 + /** Fused inline object rendering — bypasses visibleKeyNames and value() lookup. */ + private def materializeDirectInlineObj( + obj: Val.Obj, + matDepth: Int, + ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = { + val fs = ctx.emptyPos.fileScope + val rawKeys = obj.inlineKeys + if (rawKeys != null) { + val rawMembers = obj.inlineMembers + val rawN = rawKeys.length + + // Count visible fields for empty detection + var visCount = 0 + var i = 0 + while (i < rawN) { + if (rawMembers(i).visibility != Expr.Member.Visibility.Hidden) visCount += 1 + i += 1 + } + + openObjBrace(visCount == 0) + + i = 0 + while (i < rawN) { + val m = rawMembers(i) + if (m.visibility != Expr.Member.Visibility.Hidden) { + val childVal = m.invoke(obj, null, fs, evaluator) + if (!obj._skipFieldCache) obj.cacheFieldValue(rawKeys(i), childVal) + renderKeyValue(rawKeys(i), childVal, matDepth, ctx) + commaBuffered = true + } + i += 1 + } + + closeObjBrace(visCount == 0) + } else { + // Single-field object + val sfm = obj.singleMem + if (sfm.visibility != Expr.Member.Visibility.Hidden) { + openObjBrace(false) + val childVal = sfm.invoke(obj, null, fs, evaluator) + if (!obj._skipFieldCache) obj.cacheFieldValue(obj.singleKey, childVal) + renderKeyValue(obj.singleKey, childVal, matDepth, ctx) + closeObjBrace(false) + } else { + // Empty object (single hidden field) + openObjBrace(true) + closeObjBrace(true) + } + } + } + + /** Fused sorted inline object rendering — uses cached sorted field order. */ + private def materializeDirectSortedInlineObj( + obj: Val.Obj, + matDepth: Int, + ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = { + val fs = ctx.emptyPos.fileScope + val rawKeys = obj.inlineKeys + if (rawKeys != null) { + val rawMembers = obj.inlineMembers + val order = { + val cached = obj._sortedInlineOrder + if (cached != null) cached + else Materializer.computeSortedInlineOrder(rawKeys, rawMembers) + } + val visCount = order.length + + openObjBrace(visCount == 0) + + var i = 0 + while (i < visCount) { + val idx = order(i) + val childVal = rawMembers(idx).invoke(obj, null, fs, evaluator) + if (!obj._skipFieldCache) obj.cacheFieldValue(rawKeys(idx), childVal) + renderKeyValue(rawKeys(idx), childVal, matDepth, ctx) + commaBuffered = true + i += 1 + } + + closeObjBrace(visCount == 0) + } else { + // Single-field: sorted = unsorted + materializeDirectInlineObj(obj, matDepth, ctx) } + } + + /** Generic object rendering — uses visibleKeyNames + value() lookup. */ + private def materializeDirectGenericObj( + obj: Val.Obj, + matDepth: Int, + ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = { + val keys = + if (ctx.sort) obj.visibleKeyNames.sorted(Util.CodepointStringOrdering) + else obj.visibleKeyNames + + openObjBrace(keys.isEmpty) - if (negative) { - arr(pos) = '-' - pos += 1 + var i = 0 + while (i < keys.length) { + val key = keys(i) + val childVal = obj.value(key, ctx.emptyPos) + renderKeyValue(key, childVal, matDepth, ctx) + commaBuffered = true + i += 1 } - // Reverse the digits in-place - var lo = startPos - var hi = pos - 1 - while (lo < hi) { - val tmp = arr(lo) - arr(lo) = arr(hi) - arr(hi) = tmp - lo += 1 - hi -= 1 + closeObjBrace(keys.isEmpty) + } + + private def materializeDirectArr( + xs: Val.Arr, + matDepth: Int, + ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = { + val len = xs.length + + elemBuilder.append('[') + depth += 1 + // account for rendering differences of whitespaces in ujson and jsonnet manifestJson + if (len == 0 && indent != -1) + elemBuilder.appendAll(newLineCharArray, newLineCharArray.length) + else renderIndent() + + // Fast path for byte-backed arrays: emit numbers directly + xs match { + case ba: Val.ByteArr => + val bytes = ba.rawBytes + var i = 0 + while (i < len) { + flushBuffer() + renderDouble((bytes(i) & 0xff).toDouble) + commaBuffered = true + i += 1 + } + case _ => + var i = 0 + while (i < len) { + val childVal = xs.value(i) + flushBuffer() + materializeChild(childVal, matDepth, ctx) + commaBuffered = true + i += 1 + } } - cb.length = pos + // Close bracket + commaBuffered = false + depth -= 1 + renderIndent() + elemBuilder.append(']') + elemBuilder.writeOutToIfLongerThan(out, if (depth == 0) 0 else 1000) + } +} + +object RenderUtils { + + /** + * Custom rendering of Doubles used in rendering + */ + def renderDouble(d: Double): String = { + if (d.toLong == d) d.toLong.toString + else if (d % 1 == 0) { + BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString() + } else d.toString } } diff --git a/sjsonnet/src/sjsonnet/Util.scala b/sjsonnet/src/sjsonnet/Util.scala index f55c8e0c..bdd4b86d 100644 --- a/sjsonnet/src/sjsonnet/Util.scala +++ b/sjsonnet/src/sjsonnet/Util.scala @@ -133,37 +133,13 @@ object Util { * Compares two strings by Unicode codepoint values rather than UTF-16 code units. This ensures * that strings with characters above U+FFFF (which require surrogate pairs in UTF-16) are * compared correctly according to their Unicode codepoint values. + * + * Delegates to platform-specific CharSWAR.compareStrings which uses bulk getChars + tight array + * loop for JIT auto-vectorization on JVM, LLVM auto-vectorization on Native, and scalar fallback + * on JS. */ - def compareStringsByCodepoint(s1: String, s2: String): Int = { - // Fast path: same reference (e.g. interned strings, or self-comparison) - if (s1 eq s2) return 0 - val n1 = s1.length - val n2 = s2.length - var i1 = 0 - var i2 = 0 - while (i1 < n1 && i2 < n2) { - val c1 = s1.charAt(i1) - val c2 = s2.charAt(i2) - // Fast path: equal chars can be skipped without surrogate checks. - // Even for surrogate pairs, equal high surrogates at position i lead to - // comparing low surrogates at i+1, producing the correct codepoint ordering. - if (c1 == c2) { - i1 += 1 - i2 += 1 - } else if (!Character.isSurrogate(c1) && !Character.isSurrogate(c2)) { - // Both non-surrogates and different: direct char subtraction - return c1 - c2 - } else { - // At least one is a surrogate, use full codepoint logic - val cp1 = s1.codePointAt(i1) - val cp2 = s2.codePointAt(i2) - if (cp1 != cp2) return Integer.compare(cp1, cp2) - i1 += Character.charCount(cp1) - i2 += Character.charCount(cp2) - } - } - if (i1 < n1) 1 else if (i2 < n2) -1 else 0 - } + def compareStringsByCodepoint(s1: String, s2: String): Int = + CharSWAR.compareStrings(s1, s2) /** * A reusable Ordering[String] that compares by Unicode codepoint values. Use this in place of diff --git a/sjsonnet/src/sjsonnet/Val.scala b/sjsonnet/src/sjsonnet/Val.scala index c717c93c..3032719d 100644 --- a/sjsonnet/src/sjsonnet/Val.scala +++ b/sjsonnet/src/sjsonnet/Val.scala @@ -403,11 +403,15 @@ object Val { if (ls != null && ls.isEmpty) return right if (rs != null && rs.isEmpty) return left // Small string eagerness: both flat and combined length <= 128 - if (ls != null && rs != null && ls.length + rs.length <= 128) - return new Str(pos, ls + rs) + if (ls != null && rs != null && ls.length + rs.length <= 128) { + val result = new Str(pos, ls + rs) + if (left._asciiSafe && right._asciiSafe) result._asciiSafe = true + return result + } // Rope node: O(1) val node = new Str(pos, null) node._children = Array(left, right) + if (left._asciiSafe && right._asciiSafe) node._asciiSafe = true node } } diff --git a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala index 9424f631..428edc77 100644 --- a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala @@ -10,21 +10,19 @@ object ManifestModule extends AbstractFunctionModule { def name = "manifest" private object ManifestJson extends Val.Builtin1("manifestJson", "v") { - def evalRhs(v: Eval, ev: EvalScope, pos: Position): Val = - Val.Str(pos, Materializer.apply0(v.value, MaterializeJsonRenderer())(ev).toString) + def evalRhs(v: Eval, ev: EvalScope, pos: Position): Val = { + val renderer = MaterializeJsonRenderer() + renderer.materializeDirect(v.value)(ev) + Val.Str(pos, renderer.out.toString) + } } private object ManifestJsonMinified extends Val.Builtin1("manifestJsonMinified", "value") { - def evalRhs(v: Eval, ev: EvalScope, pos: Position): Val = - Val.Str( - pos, - Materializer - .apply0( - v.value, - MaterializeJsonRenderer(indent = -1, newline = "", keyValueSeparator = ":") - )(ev) - .toString - ) + def evalRhs(v: Eval, ev: EvalScope, pos: Position): Val = { + val renderer = MaterializeJsonRenderer(indent = -1, newline = "", keyValueSeparator = ":") + renderer.materializeDirect(v.value)(ev) + Val.Str(pos, renderer.out.toString) + } } private object ManifestJsonEx @@ -42,20 +40,15 @@ object ManifestModule extends AbstractFunctionModule { newline: Eval, keyValSep: Eval, ev: EvalScope, - pos: Position): Val = - Val.Str( - pos, - Materializer - .apply0( - v.value, - MaterializeJsonRenderer( - indent = i.value.asString.length, - newline = newline.value.asString, - keyValueSeparator = keyValSep.value.asString - ) - )(ev) - .toString + pos: Position): Val = { + val renderer = MaterializeJsonRenderer( + indent = i.value.asString.length, + newline = newline.value.asString, + keyValueSeparator = keyValSep.value.asString ) + renderer.materializeDirect(v.value)(ev) + Val.Str(pos, renderer.out.toString) + } } private object ParseJson extends Val.Builtin1("parseJson", "str") { diff --git a/sjsonnet/src/sjsonnet/stdlib/StringModule.scala b/sjsonnet/src/sjsonnet/stdlib/StringModule.scala index f4a7c976..ca8ca625 100644 --- a/sjsonnet/src/sjsonnet/stdlib/StringModule.scala +++ b/sjsonnet/src/sjsonnet/stdlib/StringModule.scala @@ -26,11 +26,12 @@ object StringModule extends AbstractFunctionModule { Val.cachedNum( pos, (x.value match { - case Val.Str(_, s) => s.codePointCount(0, s.length) - case a: Val.Arr => a.length - case o: Val.Obj => o.visibleKeyNames.length - case o: Val.Func => o.params.names.length - case x => Error.fail("Cannot get length of " + x.prettyName) + case Val.Str(_, s) => + if (CharSWAR.isAllAscii(s)) s.length else s.codePointCount(0, s.length) + case a: Val.Arr => a.length + case o: Val.Obj => o.visibleKeyNames.length + case o: Val.Func => o.params.names.length + case x => Error.fail("Cannot get length of " + x.prettyName) }).toDouble ) } @@ -49,7 +50,9 @@ object StringModule extends AbstractFunctionModule { private object Substr extends Val.Builtin3("substr", "str", "from", "len") { def evalRhs(_s: Eval, from: Eval, len: Eval, ev: EvalScope, pos: Position): Val = { - val str = _s.value.asString + val srcVal = _s.value + val str = srcVal.asString + val srcAsciiSafe = srcVal.isInstanceOf[Val.Str] && srcVal.asInstanceOf[Val.Str]._asciiSafe val offset = from.value match { case v: Val.Num => v.asPositiveInt case _ => Error.fail("Expected a number for offset in substr, got " + from.value.prettyName) @@ -59,16 +62,28 @@ object StringModule extends AbstractFunctionModule { case _ => Error.fail("Expected a number for len in substr, got " + len.value.prettyName) } - val unicodeLength = str.codePointCount(0, str.length) - val safeOffset = math.min(offset, unicodeLength) - val safeLength = math.min(length, unicodeLength - safeOffset) - - if (safeLength <= 0) { - Val.Str(pos, "") + if (srcAsciiSafe || CharSWAR.isAllAscii(str)) { + val strLen = str.length + val safeOffset = math.min(offset, strLen) + val safeLength = math.min(length, strLen - safeOffset) + if (safeLength <= 0) Val.Str(pos, "") + else { + val result = Val.Str(pos, str.substring(safeOffset, safeOffset + safeLength)) + if (srcAsciiSafe) result._asciiSafe = true + result + } } else { - val startUtf16 = if (safeOffset == 0) 0 else str.offsetByCodePoints(0, safeOffset) - val endUtf16 = str.offsetByCodePoints(startUtf16, safeLength) - Val.Str(pos, str.substring(startUtf16, endUtf16)) + val unicodeLength = str.codePointCount(0, str.length) + val safeOffset = math.min(offset, unicodeLength) + val safeLength = math.min(length, unicodeLength - safeOffset) + + if (safeLength <= 0) { + Val.Str(pos, "") + } else { + val startUtf16 = if (safeOffset == 0) 0 else str.offsetByCodePoints(0, safeOffset) + val endUtf16 = str.offsetByCodePoints(startUtf16, safeLength) + Val.Str(pos, str.substring(startUtf16, endUtf16)) + } } } } @@ -266,21 +281,69 @@ object StringModule extends AbstractFunctionModule { val arr = implicitly[ReadWriter[Val.Arr]].apply(_arr.value) sep.value match { case Val.Str(_, s) => - val b = new java.lang.StringBuilder() + // Two-pass approach: pre-calculate total length to avoid StringBuilder resizing. + // Pass 1: force values, compute total length, count non-null elements. + val sepLen = s.length + var totalLen = 0L + var count = 0 + val sepVal = sep.value.asInstanceOf[Val.Str] + var allAsciiSafe = + sepVal._asciiSafe || (CharSWAR.isAllAscii(s) && !CharSWAR.hasEscapeChar(s)) var i = 0 - var added = false while (i < arr.length) { arr.value(i) match { - case _: Val.Null => - case Val.Str(_, x) => - if (added) b.append(s) - added = true - b.append(x) + case _: Val.Null => + case vs: Val.Str => + if (count > 0) totalLen += sepLen + totalLen += vs.str.length + if (allAsciiSafe && !vs._asciiSafe) allAsciiSafe = false + count += 1 case x => Error.fail("Cannot join " + x.prettyName) } i += 1 } - Val.Str(pos, b.toString) + if (count == 0) return Val.Str(pos, "") + if (totalLen > Int.MaxValue) + Error.fail("Join result too large: " + totalLen + " characters") + // Pass 2: build result in pre-sized char array. + val chars = new Array[Char](totalLen.toInt) + var cPos = 0 + var added = false + i = 0 + if (sepLen == 1) { + // Single-char separator fast path: direct char write + val sepChar = s.charAt(0) + while (i < arr.length) { + arr.value(i) match { + case _: Val.Null => + case Val.Str(_, x) => + if (added) { chars(cPos) = sepChar; cPos += 1 } + added = true + val xLen = x.length + x.getChars(0, xLen, chars, cPos) + cPos += xLen + case _ => // already validated in pass 1 + } + i += 1 + } + } else { + while (i < arr.length) { + arr.value(i) match { + case _: Val.Null => + case Val.Str(_, x) => + if (added) { s.getChars(0, sepLen, chars, cPos); cPos += sepLen } + added = true + val xLen = x.length + x.getChars(0, xLen, chars, cPos) + cPos += xLen + case _ => // already validated in pass 1 + } + i += 1 + } + } + val result = Val.Str(pos, new String(chars)) + if (allAsciiSafe) result._asciiSafe = true + result case sep: Val.Arr => val out = new mutable.ArrayBuilder.ofRef[Eval] // Set a reasonable size hint based on estimated result size @@ -360,24 +423,47 @@ object StringModule extends AbstractFunctionModule { stringChars(pos, str.value.asString) } + /** Hand-written digit loop: no exception setup, no intermediate allocation, single pass. */ + private def parseDigits(s: String, base: Int): Long = { + val len = s.length + if (len == 0) Error.fail("Cannot parse '' as an integer in base " + base) + var i = 0 + val negative = base == 10 && (s.charAt(0) == '-' || s.charAt(0) == '+') + val isNeg = negative && s.charAt(0) == '-' + if (negative) i = 1 + if (i >= len) Error.fail("Cannot parse '" + s + "' as an integer in base " + base) + var result = 0L + while (i < len) { + val c = s.charAt(i) + val d = + if (c >= '0' && c <= '9') c - '0' + else if (base == 16 && c >= 'a' && c <= 'f') c - 'a' + 10 + else if (base == 16 && c >= 'A' && c <= 'F') c - 'A' + 10 + else -1 + if (d < 0 || d >= base) + Error.fail("Cannot parse '" + s + "' as an integer in base " + base) + val next = result * base + d + if (next < result) + Error.fail("Integer overflow parsing '" + s + "' in base " + base) + result = next + i += 1 + } + if (isNeg) -result else result + } + private object ParseInt extends Val.Builtin1("parseInt", "str") { def evalRhs(str: Eval, ev: EvalScope, pos: Position): Val = - try { - Val.cachedNum(pos, str.value.asString.toLong.toDouble) - } catch { - case _: NumberFormatException => - Error.fail("Cannot parse '" + str.value.asString + "' as an integer in base 10") - } + Val.cachedNum(pos, parseDigits(str.value.asString, 10).toDouble) } private object ParseOctal extends Val.Builtin1("parseOctal", "str") { def evalRhs(str: Eval, ev: EvalScope, pos: Position): Val = - Val.cachedNum(pos, java.lang.Long.parseLong(str.value.asString, 8).toDouble) + Val.cachedNum(pos, parseDigits(str.value.asString, 8).toDouble) } private object ParseHex extends Val.Builtin1("parseHex", "str") { def evalRhs(str: Eval, ev: EvalScope, pos: Position): Val = - Val.cachedNum(pos, java.lang.Long.parseLong(str.value.asString, 16).toDouble) + Val.cachedNum(pos, parseDigits(str.value.asString, 16).toDouble) } private object AsciiUpper extends Val.Builtin1("asciiUpper", "str") { diff --git a/sjsonnet/test/src/sjsonnet/RendererTests.scala b/sjsonnet/test/src/sjsonnet/RendererTests.scala index 65577f25..bdf81c6d 100644 --- a/sjsonnet/test/src/sjsonnet/RendererTests.scala +++ b/sjsonnet/test/src/sjsonnet/RendererTests.scala @@ -43,26 +43,14 @@ object RendererTests extends TestSuite { |]""".stripMargin } - test("appendLong") { - def render(v: Long): String = { - val cb = new upickle.core.CharBuilder - RenderUtils.appendLong(cb, v) - cb.makeString() - } - test("zero") { render(0L) ==> "0" } - test("positive") { render(42L) ==> "42" } - test("negative") { render(-1L) ==> "-1" } - test("large") { render(9999999999L) ==> "9999999999" } - test("maxValue") { render(Long.MaxValue) ==> Long.MaxValue.toString } - test("minValue") { render(Long.MinValue) ==> Long.MinValue.toString } - } - test("visitFloat64Integers") { - // Verify that integer-valued doubles render correctly via the Renderer ujson.transform(ujson.Num(0), new Renderer()).toString ==> "0" ujson.transform(ujson.Num(42), new Renderer()).toString ==> "42" ujson.transform(ujson.Num(-1), new Renderer()).toString ==> "-1" ujson.transform(ujson.Num(1e15), new Renderer()).toString ==> "1000000000000000" + ujson.transform(ujson.Num(9999999999.0), new Renderer()).toString ==> "9999999999" + ujson.transform(ujson.Num(Long.MaxValue.toDouble), new Renderer()).toString ==> + Long.MaxValue.toDouble.toLong.toString } test("indentZero") {