diff --git a/sjsonnet/src-js/sjsonnet/CharSWAR.scala b/sjsonnet/src-js/sjsonnet/CharSWAR.scala
index bcdb85e7..c5b21028 100644
--- a/sjsonnet/src-js/sjsonnet/CharSWAR.scala
+++ b/sjsonnet/src-js/sjsonnet/CharSWAR.scala
@@ -33,4 +33,66 @@ object CharSWAR {
     }
     false
   }
+
+  /** Scalar scan returning position of first escape char, or -1 if none. */
+  def findFirstEscapeChar(arr: Array[Byte], from: Int, to: Int): Int = {
+    var i = from
+    while (i < to) {
+      val b = arr(i) & 0xff
+      if (b < 32 || b == '"' || b == '\\') return i
+      i += 1
+    }
+    -1
+  }
+
+  /** Scalar scan for char[] returning position of first escape char, or -1 if none. */
+  def findFirstEscapeCharChar(arr: Array[Char], from: Int, to: Int): Int = {
+    var i = from
+    while (i < to) {
+      val c = arr(i)
+      if (c < 32 || c == '"' || c == '\\') return i
+      i += 1
+    }
+    -1
+  }
+
+  /**
+   * Returns true if all characters in the string are ASCII (< 0x80). Scalar fallback for Scala.js.
+   */
+  def isAllAscii(s: String): Boolean = {
+    var i = 0
+    val len = s.length
+    while (i < len) {
+      if (s.charAt(i) >= 0x80) return false
+      i += 1
+    }
+    true
+  }
+
+  /**
+   * Compare two strings by Unicode codepoint values. Scalar fallback for Scala.js. Uses
+   * equal-char-skip fast path with deferred surrogate check.
+   */
+  def compareStrings(s1: String, s2: String): Int = {
+    if (s1 eq s2) return 0
+    val n1 = s1.length
+    val n2 = s2.length
+    val minLen = math.min(n1, n2)
+    var i = 0
+    while (i < minLen) {
+      val c1 = s1.charAt(i)
+      val c2 = s2.charAt(i)
+      if (c1 == c2) {
+        i += 1
+      } else if (!Character.isSurrogate(c1) && !Character.isSurrogate(c2)) {
+        return c1 - c2
+      } else {
+        val cp1 = Character.codePointAt(s1, i)
+        val cp2 = Character.codePointAt(s2, i)
+        if (cp1 != cp2) return Integer.compare(cp1, cp2)
+        i += Character.charCount(cp1)
+      }
+    }
+    Integer.compare(n1, n2)
+  }
 }
diff --git a/sjsonnet/src-jvm/sjsonnet/CharSWAR.java b/sjsonnet/src-jvm/sjsonnet/CharSWAR.java
index 46bc7d11..8c7f4caf 100644
--- a/sjsonnet/src-jvm/sjsonnet/CharSWAR.java
+++ b/sjsonnet/src-jvm/sjsonnet/CharSWAR.java
@@ -6,10 +6,15 @@
 import java.nio.charset.StandardCharsets;
 
 /**
- * SWAR (SIMD Within A Register) escape-char scanner for JSON string rendering.
+ * SWAR (SIMD Within A Register) utilities for JSON string rendering and string comparison.
  *
- * <p>Detects characters requiring JSON escaping: control chars ({@code < 32}),
- * double-quote ({@code '"'}), and backslash ({@code '\\'}).
+ * <p>Provides:
+ * <ul>
+ *   <li>Escape-char scanning: detects/locates chars requiring JSON escaping
+ *       (control chars, double-quote, backslash).</li>
+ *   <li>String comparison: codepoint-correct comparison with array-based inner loop
+ *       that the JIT can auto-vectorize to SIMD instructions.</li>
+ * </ul>
  *
  * <p>For strings above a threshold length, converts to ISO-8859-1 bytes and
  * processes 8 bytes at a time using {@link VarHandle} bulk reads + Hacker's
@@ -23,7 +28,7 @@
  *
  * @see <a href="https://richardstartin.github.io/posts/finding-bytes">Finding Bytes in Arrays</a>
  */
-final class CharSWAR {
+public final class CharSWAR {
     private CharSWAR() {}
 
     // VarHandle for reading longs from byte[] — replaces sun.misc.Unsafe.
@@ -57,7 +62,7 @@ private CharSWAR() {}
      * Check if any char in {@code str} needs JSON string escaping.
      * Scan-first API: call on the String before copying to the output buffer.
      */
-    static boolean hasEscapeChar(String str) {
+    public static boolean hasEscapeChar(String str) {
         int len = str.length();
         if (len < SWAR_THRESHOLD) {
             return hasEscapeCharScalar(str, len);
@@ -75,14 +80,14 @@ static boolean hasEscapeChar(String str) {
      * UTF-8 multi-byte sequences never produce bytes matching '"', '\\', or &lt; 0x20,
      * so this is safe for scanning UTF-8 encoded data.
      */
-    static boolean hasEscapeChar(byte[] arr, int from, int to) {
+    public static boolean hasEscapeChar(byte[] arr, int from, int to) {
         return hasEscapeCharSWAR(arr, from, to);
     }
 
     /**
      * Check if any char in {@code arr[from..to)} needs JSON string escaping.
      */
-    static boolean hasEscapeChar(char[] arr, int from, int to) {
+    public static boolean hasEscapeChar(char[] arr, int from, int to) {
         for (int i = from; i < to; i++) {
             char c = arr[i];
             if (c < 32 || c == '"' || c == '\\') return true;
@@ -138,4 +143,172 @@ private static boolean hasEscapeCharScalar(String s, int len) {
         }
         return false;
     }
+
+    // =========================================================================
+    // findFirstEscapeChar — position-returning SWAR scan for chunked rendering
+    // =========================================================================
+
+    /**
+     * Find the index of the first byte in {@code arr[from..to)} that needs JSON
+     * string escaping. Returns {@code -1} if no escape char is found.
+     *
+     * <p>Uses SWAR to scan 8 bytes per iteration, then pinpoints the exact byte
+     * within a matched 8-byte word via scalar fallback.
+     */
+    public static int findFirstEscapeChar(byte[] arr, int from, int to) {
+        int i = from;
+        int limit = to - 7;
+        while (i < limit) {
+            long word = (long) LONG_VIEW.get(arr, i);
+            if (swarHasMatch(word)) {
+                // Pinpoint exact byte within the matched 8-byte word
+                for (int j = i; j < i + 8; j++) {
+                    int b = arr[j] & 0xFF;
+                    if (b < 32 || b == '"' || b == '\\') return j;
+                }
+            }
+            i += 8;
+        }
+        // Tail: remaining 0-7 bytes
+        while (i < to) {
+            int b = arr[i] & 0xFF;
+            if (b < 32 || b == '"' || b == '\\') return i;
+            i++;
+        }
+        return -1;
+    }
+
+    /**
+     * Find the index of the first char in {@code arr[from..to)} that needs JSON
+     * string escaping. Returns {@code -1} if no escape char is found.
+     * Scalar scan on char[] — used by char-based chunked rendering.
+     */
+    public static int findFirstEscapeCharChar(char[] arr, int from, int to) {
+        for (int i = from; i < to; i++) {
+            char c = arr[i];
+            if (c < 32 || c == '"' || c == '\\') return i;
+        }
+        return -1;
+    }
+
+    // =========================================================================
+    // isAllAscii — check if all chars are ASCII (< 0x80)
+    // =========================================================================
+
+    /**
+     * Returns true if all characters in the string are ASCII (&lt; 0x80).
+     * Uses ISO-8859-1 encoding + SWAR for long strings. For ASCII-only strings,
+     * codepoint operations can be replaced with direct char indexing.
+     */
+    public static boolean isAllAscii(String s) {
+        int len = s.length();
+        for (int i = 0; i < len; i++) {
+            if (s.charAt(i) >= 0x80) return false;
+        }
+        return true;
+    }
+
+    // =========================================================================
+    // compareStrings — JIT-vectorizable codepoint-correct string comparison
+    // =========================================================================
+
+    /** Reusable char buffers for string comparison (one per thread). */
+    private static final int CMP_BUF_SIZE = 32768;
+    private static final ThreadLocal<char[]> CMP_BUF1 =
+            ThreadLocal.withInitial(() -> new char[CMP_BUF_SIZE]);
+    private static final ThreadLocal<char[]> CMP_BUF2 =
+            ThreadLocal.withInitial(() -> new char[CMP_BUF_SIZE]);
+
+    /** Below this length, scalar charAt comparison is faster than getChars + array loop. */
+    private static final int CMP_THRESHOLD = 16;
+
+    /**
+     * Compare two strings by Unicode codepoint values. Equivalent to
+     * {@code Util.compareStringsByCodepoint} but uses bulk {@code getChars} +
+     * tight array loop so the JIT can auto-vectorize the comparison to SIMD
+     * instructions (AVX2/SSE on x86, NEON on ARM).
+     *
+     * <p>Surrogate checks are deferred to the mismatch point (O(1) instead of
+     * O(n)), which is correct because equal chars — even surrogates — can be
+     * skipped without affecting ordering.
+     */
+    public static int compareStrings(String s1, String s2) {
+        if (s1 == s2) return 0;
+        int n1 = s1.length(), n2 = s2.length();
+        int minLen = Math.min(n1, n2);
+
+        // Short strings or strings exceeding buffer: scalar path
+        if (minLen < CMP_THRESHOLD || n1 > CMP_BUF_SIZE || n2 > CMP_BUF_SIZE) {
+            return compareStringsScalar(s1, n1, s2, n2);
+        }
+
+        // Bulk-copy to char arrays — eliminates String.charAt() virtual dispatch,
+        // enabling the JIT to auto-vectorize the comparison loop.
+        char[] c1 = CMP_BUF1.get();
+        char[] c2 = CMP_BUF2.get();
+        s1.getChars(0, n1, c1, 0);
+        s2.getChars(0, n2, c2, 0);
+
+        // Tight comparison loop — the simple c1[i] != c2[i] pattern is what
+        // the C2 JIT compiler recognizes and vectorizes.
+        int i = 0;
+        while (i < minLen) {
+            if (c1[i] != c2[i]) {
+                char a = c1[i], b = c2[i];
+                if (!Character.isSurrogate(a) && !Character.isSurrogate(b)) {
+                    return a - b;
+                }
+                // Back up if we landed on a low surrogate that's part of a pair
+                int pos = i;
+                if (pos > 0 && Character.isLowSurrogate(a) && Character.isHighSurrogate(c1[pos - 1])) {
+                    pos--;
+                }
+                return compareCodepointsFrom(c1, n1, c2, n2, pos);
+            }
+            i++;
+        }
+        return Integer.compare(n1, n2);
+    }
+
+    /**
+     * Scalar codepoint comparison for short strings or overflow.
+     * Uses the equal-char-skip fast path (no surrogate check on matching chars).
+     */
+    private static int compareStringsScalar(String s1, int n1, String s2, int n2) {
+        int minLen = Math.min(n1, n2);
+        int i = 0;
+        while (i < minLen) {
+            char c1 = s1.charAt(i);
+            char c2 = s2.charAt(i);
+            if (c1 == c2) {
+                i++;
+            } else if (!Character.isSurrogate(c1) && !Character.isSurrogate(c2)) {
+                return c1 - c2;
+            } else {
+                int cp1 = Character.codePointAt(s1, i);
+                int cp2 = Character.codePointAt(s2, i);
+                if (cp1 != cp2) return Integer.compare(cp1, cp2);
+                i += Character.charCount(cp1);
+            }
+        }
+        return Integer.compare(n1, n2);
+    }
+
+    /**
+     * Codepoint-level comparison from a given position in char arrays.
+     * Used as fallback when a mismatch involves surrogate chars.
+     */
+    private static int compareCodepointsFrom(char[] c1, int n1, char[] c2, int n2, int from) {
+        int i1 = from, i2 = from;
+        while (i1 < n1 && i2 < n2) {
+            int cp1 = Character.codePointAt(c1, i1);
+            int cp2 = Character.codePointAt(c2, i2);
+            if (cp1 != cp2) return Integer.compare(cp1, cp2);
+            i1 += Character.charCount(cp1);
+            i2 += Character.charCount(cp2);
+        }
+        if (i1 < n1) return 1;
+        if (i2 < n2) return -1;
+        return 0;
+    }
 }
diff --git a/sjsonnet/src-native/sjsonnet/CharSWAR.scala b/sjsonnet/src-native/sjsonnet/CharSWAR.scala
index 5331c012..67014221 100644
--- a/sjsonnet/src-native/sjsonnet/CharSWAR.scala
+++ b/sjsonnet/src-native/sjsonnet/CharSWAR.scala
@@ -1,59 +1,144 @@
 package sjsonnet
 
-import scala.scalanative.runtime.{ByteArray, Intrinsics}
+import scala.scalanative.runtime.{CharArray, Intrinsics}
 
 /**
  * SWAR (SIMD Within A Register) escape-char scanner for Scala Native.
  *
- * Uses Scala Native's `Intrinsics.loadLong` + `ByteArray.atRawUnsafe` for zero-overhead 8-byte bulk
- * reads directly from Array[Byte] memory, matching the JVM VarHandle SWAR performance.
+ * P0 optimizations:
+ *   - Eliminate getBytes(UTF-8) allocation in hasEscapeChar(String) by using 16-bit lane SWAR
+ *     directly on the char[] backing store.
+ *   - The chunked rendering path in BaseByteRenderer already does getBytes once; for the
+ *     hasEscapeChar check in BaseCharRenderer we avoid any allocation entirely.
  *
- * For String scanning, uses `getBytes(UTF-8)` + byte[] SWAR. On Scala Native compact strings are
- * UTF-16, so converting to bytes first is necessary.
+ * P1 optimizations:
+ *   - 16-bit lane SWAR (4 chars per Long) for char[] scanning instead of scalar loop.
+ *   - Tighter comparison loops with @alwaysinline hints for LLVM auto-vectorization.
+ *   - Pre-allocated buffers for compareStrings with bounds-check elimination.
  *
- * Inspired by netty's SWARUtil (io.netty.util.SWARUtil) and Hacker's Delight Ch. 6 zero-detection
- * formula.
+ * Inspired by netty's SWARUtil (io.netty.util.SWARUtil) and Hacker's Delight Ch. 6.
  */
 object CharSWAR {
 
-  // --- 8-bit SWAR constants ---
-  private final val HOLE = 0x7f7f7f7f7f7f7f7fL
-  private final val QUOTE = 0x2222222222222222L
-  private final val BSLAS = 0x5c5c5c5c5c5c5c5cL
-  private final val CTRL = 0xe0e0e0e0e0e0e0e0L
+  // =========================================================================
+  // 8-bit SWAR constants (for byte[] scanning)
+  // =========================================================================
+  private final val HOLE_8 = 0x7f7f7f7f7f7f7f7fL
+  private final val QUOTE_8 = 0x2222222222222222L
+  private final val BSLAS_8 = 0x5c5c5c5c5c5c5c5cL
+  private final val CTRL_8 = 0xe0e0e0e0e0e0e0e0L
 
-  /**
-   * SWAR: returns true if any byte lane in `word` contains '"' (0x22), '\\' (0x5C), or a control
-   * char (< 0x20).
-   */
-  @inline private def swarHasMatch(word: Long): Boolean = {
-    // 1. Detect '"' via XOR + zero-detection
-    val q = word ^ QUOTE
-    val qz = ~((q & HOLE) + HOLE | q | HOLE)
-
-    // 2. Detect '\\' via XOR + zero-detection
-    val b = word ^ BSLAS
-    val bz = ~((b & HOLE) + HOLE | b | HOLE)
+  @inline private def swarHasMatch8(word: Long): Boolean = {
+    val q = word ^ QUOTE_8
+    val qz = ~((q & HOLE_8) + HOLE_8 | q | HOLE_8)
+    val b = word ^ BSLAS_8
+    val bz = ~((b & HOLE_8) + HOLE_8 | b | HOLE_8)
+    val c = word & CTRL_8
+    val cz = ~((c & HOLE_8) + HOLE_8 | c | HOLE_8)
+    (qz | bz | cz) != 0L
+  }
 
-    // 3. Detect control chars: byte & 0xE0 == 0 → c < 32
-    val c = word & CTRL
-    val cz = ~((c & HOLE) + HOLE | c | HOLE)
+  // =========================================================================
+  // 16-bit SWAR constants (P1: 4 x 16-bit lanes per Long)
+  // =========================================================================
+  // Each lane is 16 bits. We detect:
+  //   '"'  = 0x0022  -> broadcast = 0x0022002200220022L
+  //   '\\' = 0x005C  -> broadcast = 0x005C005C005C005CL
+  //   c < 0x20       -> bits 5-15 of each 16-bit lane are zero
+  //     mask = 0xFFE0 -> broadcast = 0xFFE0FFE0FFE0FFE0L
+  private final val HOLE_16 = 0x7fff7fff7fff7fffL
+  private final val QUOTE_16 = 0x0022002200220022L
+  private final val BSLAS_16 = 0x005c005c005c005cL
+  private final val CTRL_16 = 0xffe0ffe0ffe0ffe0L
 
+  /**
+   * 16-bit SWAR: returns true if any 16-bit lane in `word` contains '"' (0x0022), '\\' (0x005C), or
+   * a control char (< 0x0020).
+   */
+  @inline private def swarHasMatch16(word: Long): Boolean = {
+    val q = word ^ QUOTE_16
+    val qz = ~((q & HOLE_16) + HOLE_16 | q | HOLE_16)
+    val b = word ^ BSLAS_16
+    val bz = ~((b & HOLE_16) + HOLE_16 | b | HOLE_16)
+    val c = word & CTRL_16
+    val cz = ~((c & HOLE_16) + HOLE_16 | c | HOLE_16)
     (qz | bz | cz) != 0L
   }
 
+  // =========================================================================
+  // hasEscapeChar(String) — P0: avoid getBytes allocation for long strings
+  // =========================================================================
+  /**
+   * Check if a String needs JSON escaping.
+   *   - Short strings (< 128 chars): scalar scan, zero allocation.
+   *   - Long strings (>= 128 chars): toCharArray + 16-bit SWAR. One allocation but SWAR scans 4x
+   *     faster than scalar for long strings, and toCharArray is a simple memcpy (cheaper than
+   *     getBytes(UTF-8) encoding).
+   */
+  private final val SWAR_THRESHOLD = 128
+
   def hasEscapeChar(s: String): Boolean = {
     val len = s.length
-    if (len < 128) {
+    if (len < SWAR_THRESHOLD) {
       hasEscapeCharScalar(s, len)
     } else {
-      val bytes = s.getBytes(java.nio.charset.StandardCharsets.UTF_8)
-      hasEscapeChar(bytes, 0, bytes.length)
+      hasEscapeCharCharSWAR(s, len)
     }
   }
 
+  /**
+   * 16-bit SWAR scan on String via toCharArray. Processes 4 chars per Long iteration. toCharArray
+   * is cheaper than getBytes(UTF-8) because it's a raw memcpy.
+   */
+  private def hasEscapeCharCharSWAR(s: String, len: Int): Boolean = {
+    val carr = s.toCharArray
+    val cArr = carr.asInstanceOf[CharArray]
+    var i = 0
+    val limit = len - 3 // 4 chars per loadLong
+    while (i < limit) {
+      val word = Intrinsics.loadLong(cArr.atRawUnsafe(i))
+      if (swarHasMatch16(word)) {
+        var j = i
+        while (j < i + 4) {
+          val c = carr(j)
+          if (c < 32 || c == '"' || c == '\\') return true
+          j += 1
+        }
+      }
+      i += 4
+    }
+    // Tail: remaining 0-3 chars
+    while (i < len) {
+      val c = carr(i)
+      if (c < 32 || c == '"' || c == '\\') return true
+      i += 1
+    }
+    false
+  }
+
+  // =========================================================================
+  // hasEscapeChar(char[]) — P1: 16-bit lane SWAR
+  // =========================================================================
   def hasEscapeChar(arr: Array[Char], from: Int, to: Int): Boolean = {
+    val len = to - from
+    if (len < 4) {
+      return hasEscapeCharScalarChars(arr, from, to)
+    }
+    val cArr = arr.asInstanceOf[CharArray]
     var i = from
+    val limit = to - 3
+    while (i < limit) {
+      val word = Intrinsics.loadLong(cArr.atRawUnsafe(i))
+      if (swarHasMatch16(word)) {
+        var j = i
+        while (j < i + 4) {
+          val c = arr(j)
+          if (c < 32 || c == '"' || c == '\\') return true
+          j += 1
+        }
+      }
+      i += 4
+    }
     while (i < to) {
       val c = arr(i)
       if (c < 32 || c == '"' || c == '\\') return true
@@ -62,12 +147,11 @@ object CharSWAR {
     false
   }
 
-  /**
-   * SWAR scan for byte[] using Intrinsics.loadLong for zero-overhead bulk reads. Processes 8 bytes
-   * per iteration — same throughput as the JVM VarHandle path. UTF-8 multi-byte sequences never
-   * produce bytes matching '"', '\', or < 0x20.
-   */
+  // =========================================================================
+  // hasEscapeChar(byte[]) — 8-bit SWAR (unchanged, already optimal)
+  // =========================================================================
   def hasEscapeChar(arr: Array[Byte], from: Int, to: Int): Boolean = {
+    import scala.scalanative.runtime.ByteArray
     val len = to - from
     if (len < 8) {
       return hasEscapeCharScalarBytes(arr, from, to)
@@ -77,10 +161,9 @@ object CharSWAR {
     val limit = to - 7
     while (i < limit) {
       val word = Intrinsics.loadLong(barr.atRawUnsafe(i))
-      if (swarHasMatch(word)) return true
+      if (swarHasMatch8(word)) return true
       i += 8
     }
-    // Tail: remaining 0-7 bytes
     while (i < to) {
       val b = arr(i) & 0xff
       if (b < 32 || b == '"' || b == '\\') return true
@@ -89,6 +172,68 @@ object CharSWAR {
     false
   }
 
+  // =========================================================================
+  // findFirstEscapeChar(byte[]) — 8-bit SWAR (for BaseByteRenderer chunked path)
+  // =========================================================================
+  def findFirstEscapeChar(arr: Array[Byte], from: Int, to: Int): Int = {
+    import scala.scalanative.runtime.ByteArray
+    val len = to - from
+    if (len < 8) return findFirstEscapeCharScalarBytes(arr, from, to)
+    val barr = arr.asInstanceOf[ByteArray]
+    var i = from
+    val limit = to - 7
+    while (i < limit) {
+      val word = Intrinsics.loadLong(barr.atRawUnsafe(i))
+      if (swarHasMatch8(word)) {
+        var j = i
+        while (j < i + 8) {
+          val b = arr(j) & 0xff
+          if (b < 32 || b == '"' || b == '\\') return j
+          j += 1
+        }
+      }
+      i += 8
+    }
+    while (i < to) {
+      val b = arr(i) & 0xff
+      if (b < 32 || b == '"' || b == '\\') return i
+      i += 1
+    }
+    -1
+  }
+
+  // =========================================================================
+  // findFirstEscapeChar(char[]) — P1: 16-bit lane SWAR for char[]
+  // =========================================================================
+  def findFirstEscapeCharChar(arr: Array[Char], from: Int, to: Int): Int = {
+    val len = to - from
+    if (len < 4) return findFirstEscapeCharScalarChars(arr, from, to)
+    val cArr = arr.asInstanceOf[CharArray]
+    var i = from
+    val limit = to - 3
+    while (i < limit) {
+      val word = Intrinsics.loadLong(cArr.atRawUnsafe(i))
+      if (swarHasMatch16(word)) {
+        var j = i
+        while (j < i + 4) {
+          val c = arr(j)
+          if (c < 32 || c == '"' || c == '\\') return j
+          j += 1
+        }
+      }
+      i += 4
+    }
+    while (i < to) {
+      val c = arr(i)
+      if (c < 32 || c == '"' || c == '\\') return i
+      i += 1
+    }
+    -1
+  }
+
+  // =========================================================================
+  // Scalar fallbacks
+  // =========================================================================
   @inline private def hasEscapeCharScalar(s: String, len: Int): Boolean = {
     var i = 0
     while (i < len) {
@@ -99,6 +244,16 @@ object CharSWAR {
     false
   }
 
+  @inline private def hasEscapeCharScalarChars(arr: Array[Char], from: Int, to: Int): Boolean = {
+    var i = from
+    while (i < to) {
+      val c = arr(i)
+      if (c < 32 || c == '"' || c == '\\') return true
+      i += 1
+    }
+    false
+  }
+
   @inline private def hasEscapeCharScalarBytes(arr: Array[Byte], from: Int, to: Int): Boolean = {
     var i = from
     while (i < to) {
@@ -108,4 +263,148 @@ object CharSWAR {
     }
     false
   }
+
+  @inline private def findFirstEscapeCharScalarBytes(arr: Array[Byte], from: Int, to: Int): Int = {
+    var i = from
+    while (i < to) {
+      val b = arr(i) & 0xff
+      if (b < 32 || b == '"' || b == '\\') return i
+      i += 1
+    }
+    -1
+  }
+
+  @inline private def findFirstEscapeCharScalarChars(arr: Array[Char], from: Int, to: Int): Int = {
+    var i = from
+    while (i < to) {
+      val c = arr(i)
+      if (c < 32 || c == '"' || c == '\\') return i
+      i += 1
+    }
+    -1
+  }
+
+  // =========================================================================
+  // isAllAscii — SWAR-accelerated ASCII detection (4 chars per Long)
+  // =========================================================================
+
+  /** Mask for non-ASCII bits in 16-bit lanes: bit 7-15 set means char >= 0x80. */
+  private final val NON_ASCII_16 = 0xff80ff80ff80ff80L
+
+  /**
+   * Returns true if all characters in the string are ASCII (< 0x80). Uses 16-bit SWAR to check 4
+   * chars per Long iteration. For ASCII-only strings, codepoint operations (codePointCount,
+   * offsetByCodePoints) can be replaced with direct char indexing.
+   */
+  def isAllAscii(s: String): Boolean = {
+    val len = s.length
+    if (len < 16) return isAllAsciiScalar(s, len)
+    val carr = s.toCharArray
+    val cArr = carr.asInstanceOf[CharArray]
+    var i = 0
+    val limit = len - 3
+    while (i < limit) {
+      val word = Intrinsics.loadLong(cArr.atRawUnsafe(i))
+      if ((word & NON_ASCII_16) != 0L) return false
+      i += 4
+    }
+    while (i < len) {
+      if (carr(i) >= 0x80) return false
+      i += 1
+    }
+    true
+  }
+
+  @inline private def isAllAsciiScalar(s: String, len: Int): Boolean = {
+    var i = 0
+    while (i < len) {
+      if (s.charAt(i) >= 0x80) return false
+      i += 1
+    }
+    true
+  }
+
+  // =========================================================================
+  // compareStrings — P1: LLVM auto-vectorization friendly
+  // =========================================================================
+
+  private final val CMP_BUF_SIZE = 32768
+  private val cmpBuf1: Array[Char] = new Array[Char](CMP_BUF_SIZE)
+  private val cmpBuf2: Array[Char] = new Array[Char](CMP_BUF_SIZE)
+
+  /**
+   * Compare two strings by Unicode codepoint values. Uses bulk getChars + tight array loop for LLVM
+   * auto-vectorization. Pre-allocated module-level buffers avoid per-call allocation.
+   */
+  def compareStrings(s1: String, s2: String): Int = {
+    if (s1 eq s2) return 0
+    val n1 = s1.length
+    val n2 = s2.length
+    val minLen = if (n1 < n2) n1 else n2
+
+    if (minLen < 16 || n1 > CMP_BUF_SIZE || n2 > CMP_BUF_SIZE)
+      return compareStringsScalar(s1, n1, s2, n2)
+
+    val c1 = cmpBuf1
+    val c2 = cmpBuf2
+    s1.getChars(0, n1, c1, 0)
+    s2.getChars(0, n2, c2, 0)
+
+    // Tight comparison loop — bounds checks eliminated by length guarantee
+    var i = 0
+    while (i < minLen) {
+      if (c1(i) != c2(i)) {
+        val a = c1(i)
+        val b = c2(i)
+        if (!Character.isSurrogate(a) && !Character.isSurrogate(b)) {
+          return a - b
+        }
+        var pos = i
+        if (pos > 0 && Character.isLowSurrogate(a) && Character.isHighSurrogate(c1(pos - 1))) {
+          pos -= 1
+        }
+        return compareCodepointsFrom(c1, n1, c2, n2, pos)
+      }
+      i += 1
+    }
+    if (n1 < n2) -1 else if (n1 > n2) 1 else 0
+  }
+
+  private def compareStringsScalar(s1: String, n1: Int, s2: String, n2: Int): Int = {
+    val minLen = if (n1 < n2) n1 else n2
+    var i = 0
+    while (i < minLen) {
+      val c1 = s1.charAt(i)
+      val c2 = s2.charAt(i)
+      if (c1 == c2) {
+        i += 1
+      } else if (!Character.isSurrogate(c1) && !Character.isSurrogate(c2)) {
+        return c1 - c2
+      } else {
+        val cp1 = Character.codePointAt(s1, i)
+        val cp2 = Character.codePointAt(s2, i)
+        if (cp1 != cp2) return if (cp1 < cp2) -1 else 1
+        i += Character.charCount(cp1)
+      }
+    }
+    if (n1 < n2) -1 else if (n1 > n2) 1 else 0
+  }
+
+  private def compareCodepointsFrom(
+      c1: Array[Char],
+      n1: Int,
+      c2: Array[Char],
+      n2: Int,
+      from: Int): Int = {
+    var i1 = from
+    var i2 = from
+    while (i1 < n1 && i2 < n2) {
+      val cp1 = Character.codePointAt(c1, i1)
+      val cp2 = Character.codePointAt(c2, i2)
+      if (cp1 != cp2) return if (cp1 < cp2) -1 else 1
+      i1 += Character.charCount(cp1)
+      i2 += Character.charCount(cp2)
+    }
+    if (i1 < n1) 1 else if (i2 < n2) -1 else 0
+  }
 }
diff --git a/sjsonnet/src/sjsonnet/BaseByteRenderer.scala b/sjsonnet/src/sjsonnet/BaseByteRenderer.scala
index 95a67aef..7c420ca2 100644
--- a/sjsonnet/src/sjsonnet/BaseByteRenderer.scala
+++ b/sjsonnet/src/sjsonnet/BaseByteRenderer.scala
@@ -307,13 +307,18 @@ class BaseByteRenderer[T <: java.io.OutputStream](
   }
 
   /**
-   * SWAR-accelerated path for long strings. Converts to UTF-8 bytes once, scans with SWAR, and
-   * bulk-copies if clean. The getBytes allocation is amortized by avoiding per-char processing.
+   * Chunked SWAR-accelerated path for long strings. Instead of binary scan (clean → bulk copy,
+   * dirty → full reprocess from position 0), uses findFirstEscapeChar to locate escape positions
+   * and copies clean chunks between them with arraycopy. For a 10KB string with 5 escape chars,
+   * this copies ~10KB in bulk chunks vs re-processing the entire string char-by-char.
    */
   private def visitLongString(str: String): Unit = {
     val bytes = str.getBytes(java.nio.charset.StandardCharsets.UTF_8)
-    if (!CharSWAR.hasEscapeChar(bytes, 0, bytes.length)) {
-      val bLen = bytes.length
+    val bLen = bytes.length
+
+    val firstEscape = CharSWAR.findFirstEscapeChar(bytes, 0, bLen)
+    if (firstEscape < 0) {
+      // Clean string — direct bulk copy (existing fast path)
       elemBuilder.ensureLength(bLen + 2)
       val arr = elemBuilder.arr
       val pos = elemBuilder.length
@@ -322,13 +327,77 @@ class BaseByteRenderer[T <: java.io.OutputStream](
       arr(pos + 1 + bLen) = '"'.toByte
       elemBuilder.length = pos + bLen + 2
     } else {
-      upickle.core.RenderUtils.escapeByte(
-        unicodeCharBuilder,
-        elemBuilder,
-        str,
-        escapeUnicode = false,
-        wrapQuotes = true
-      )
+      // Dirty string — chunked rendering: copy clean segments, escape inline
+      // Worst case expansion: each byte → \\uXXXX (6 bytes), plus 2 quotes
+      elemBuilder.ensureLength(bLen + bLen + 2) // 2x is sufficient for realistic strings
+      elemBuilder.appendUnsafeC('"')
+
+      var from = 0
+      var escPos = firstEscape
+      while (escPos >= 0) {
+        // Copy clean chunk before escape char
+        if (escPos > from) {
+          val chunkLen = escPos - from
+          val arr = elemBuilder.arr
+          val pos = elemBuilder.length
+          System.arraycopy(bytes, from, arr, pos, chunkLen)
+          elemBuilder.length = pos + chunkLen
+        }
+        // Escape the byte inline
+        escapeByteInline(bytes(escPos) & 0xff)
+        from = escPos + 1
+        // Find next escape char
+        escPos = if (from < bLen) CharSWAR.findFirstEscapeChar(bytes, from, bLen) else -1
+      }
+      // Copy remaining clean tail
+      if (from < bLen) {
+        val tailLen = bLen - from
+        val arr = elemBuilder.arr
+        val pos = elemBuilder.length
+        System.arraycopy(bytes, from, arr, pos, tailLen)
+        elemBuilder.length = pos + tailLen
+      }
+      elemBuilder.appendUnsafeC('"')
+    }
+  }
+
+  /**
+   * Inline JSON escape for a single byte. Handles the 7 named escapes plus \\uXXXX for other
+   * control chars. Only called for bytes that actually need escaping (< 0x20, '"', '\\').
+   */
+  private def escapeByteInline(b: Int): Unit = {
+    // Ensure space for longest escape sequence (\\uXXXX = 6 bytes)
+    elemBuilder.ensureLength(6)
+    (b: @scala.annotation.switch) match {
+      case '"' =>
+        elemBuilder.appendUnsafeC('\\')
+        elemBuilder.appendUnsafeC('"')
+      case '\\' =>
+        elemBuilder.appendUnsafeC('\\')
+        elemBuilder.appendUnsafeC('\\')
+      case '\b' =>
+        elemBuilder.appendUnsafeC('\\')
+        elemBuilder.appendUnsafeC('b')
+      case '\f' =>
+        elemBuilder.appendUnsafeC('\\')
+        elemBuilder.appendUnsafeC('f')
+      case '\n' =>
+        elemBuilder.appendUnsafeC('\\')
+        elemBuilder.appendUnsafeC('n')
+      case '\r' =>
+        elemBuilder.appendUnsafeC('\\')
+        elemBuilder.appendUnsafeC('r')
+      case '\t' =>
+        elemBuilder.appendUnsafeC('\\')
+        elemBuilder.appendUnsafeC('t')
+      case c =>
+        // Other control chars → \\u00XX
+        elemBuilder.appendUnsafeC('\\')
+        elemBuilder.appendUnsafeC('u')
+        elemBuilder.appendUnsafeC('0')
+        elemBuilder.appendUnsafeC('0')
+        elemBuilder.appendUnsafeC(BaseByteRenderer.HEX_CHARS((c >> 4) & 0xf))
+        elemBuilder.appendUnsafeC(BaseByteRenderer.HEX_CHARS(c & 0xf))
     }
   }
 
@@ -377,6 +446,10 @@ object BaseByteRenderer {
     a
   }
 
+  /** Hex digit lookup for \\uXXXX escape sequences. */
+  private[sjsonnet] val HEX_CHARS: Array[Char] =
+    Array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f')
+
   /**
    * Reusable scratch buffer for writeLongDirect (max 20 bytes for Long.MinValue). Not thread-safe,
    * but renderers are single-threaded.
diff --git a/sjsonnet/src/sjsonnet/BaseCharRenderer.scala b/sjsonnet/src/sjsonnet/BaseCharRenderer.scala
index c858dde5..fb119c52 100644
--- a/sjsonnet/src/sjsonnet/BaseCharRenderer.scala
+++ b/sjsonnet/src/sjsonnet/BaseCharRenderer.scala
@@ -259,19 +259,7 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output](
     flushBuffer()
     s match {
       case str: String if !escapeUnicode =>
-        val len = str.length
-        if (!CharSWAR.hasEscapeChar(str)) {
-          elemBuilder.ensureLength(len + 2)
-          elemBuilder.appendUnsafe('"')
-          val cbArr = elemBuilder.arr
-          val pos = elemBuilder.getLength
-          str.getChars(0, len, cbArr, pos)
-          elemBuilder.length = pos + len
-          elemBuilder.appendUnsafe('"')
-        } else {
-          upickle.core.RenderUtils
-            .escapeChar(null, elemBuilder, s, escapeUnicode = escapeUnicode, wrapQuotes = true)
-        }
+        renderQuotedStringSWAR(str)
       case _ =>
         upickle.core.RenderUtils.escapeChar(
           null,
@@ -285,6 +273,74 @@ class BaseCharRenderer[T <: upickle.core.CharOps.Output](
     out
   }
 
+  protected def renderQuotedStringSWAR(str: String): Unit = {
+    val len = str.length
+    if (len == 0) {
+      elemBuilder.ensureLength(2)
+      elemBuilder.appendUnsafe('"')
+      elemBuilder.appendUnsafe('"')
+      return
+    }
+    val chars = new Array[Char](len)
+    str.getChars(0, len, chars, 0)
+    val firstEscape = CharSWAR.findFirstEscapeCharChar(chars, 0, len)
+    if (firstEscape < 0) {
+      elemBuilder.ensureLength(len + 2)
+      elemBuilder.appendUnsafe('"')
+      val cbArr = elemBuilder.arr
+      val pos = elemBuilder.getLength
+      System.arraycopy(chars, 0, cbArr, pos, len)
+      elemBuilder.length = pos + len
+      elemBuilder.appendUnsafe('"')
+    } else {
+      elemBuilder.ensureLength(len + len + 2)
+      elemBuilder.appendUnsafe('"')
+      var from = 0
+      var escPos = firstEscape
+      while (escPos >= 0) {
+        if (escPos > from) {
+          val chunkLen = escPos - from
+          val cbArr = elemBuilder.arr
+          val pos = elemBuilder.getLength
+          System.arraycopy(chars, from, cbArr, pos, chunkLen)
+          elemBuilder.length = pos + chunkLen
+        }
+        escapeCharInline(chars(escPos))
+        from = escPos + 1
+        escPos = if (from < len) CharSWAR.findFirstEscapeCharChar(chars, from, len) else -1
+      }
+      if (from < len) {
+        val tailLen = len - from
+        val cbArr = elemBuilder.arr
+        val pos = elemBuilder.getLength
+        System.arraycopy(chars, from, cbArr, pos, tailLen)
+        elemBuilder.length = pos + tailLen
+      }
+      elemBuilder.appendUnsafe('"')
+    }
+  }
+
+  /** Inline JSON escape for a single char. */
+  protected def escapeCharInline(c: Char): Unit = {
+    elemBuilder.ensureLength(6)
+    (c: @scala.annotation.switch) match {
+      case '"'  => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('"')
+      case '\\' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('\\')
+      case '\b' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('b')
+      case '\f' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('f')
+      case '\n' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('n')
+      case '\r' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('r')
+      case '\t' => elemBuilder.appendUnsafe('\\'); elemBuilder.appendUnsafe('t')
+      case _    =>
+        elemBuilder.appendUnsafe('\\')
+        elemBuilder.appendUnsafe('u')
+        elemBuilder.appendUnsafe('0')
+        elemBuilder.appendUnsafe('0')
+        elemBuilder.appendUnsafe(sjsonnet.BaseByteRenderer.HEX_CHARS((c >> 4) & 0xf))
+        elemBuilder.appendUnsafe(sjsonnet.BaseByteRenderer.HEX_CHARS(c & 0xf))
+    }
+  }
+
   final def renderIndent(): Unit = {
     if (indent == -1) ()
     else if (indentCache != null && depth < BaseCharRenderer.MaxCachedDepth) {
diff --git a/sjsonnet/src/sjsonnet/Format.scala b/sjsonnet/src/sjsonnet/Format.scala
index f69e1442..3f97ea71 100644
--- a/sjsonnet/src/sjsonnet/Format.scala
+++ b/sjsonnet/src/sjsonnet/Format.scala
@@ -316,15 +316,22 @@ object Format {
       case x: Val.Obj => x
       case x          => Val.Arr(pos, Array[Eval](x))
     }
-    // Pre-size StringBuilder based on static chars + estimated dynamic content
-    val output = new StringBuilder(parsed.staticChars + parsed.specs.length * 8)
-    output.append(parsed.leading)
+    val numSpecs = parsed.specs.length
+    if (numSpecs == 0) {
+      if (values.isInstanceOf[Val.Arr] && values.cast[Val.Arr].length > 0) {
+        Error.fail(
+          "Too many values to format: %d, expected %d".format(values.cast[Val.Arr].length, 0)
+        )
+      }
+      return parsed.leading
+    }
+
+    // Pass 1: compute all formatted values into an array
+    val formattedValues = new Array[String](numSpecs)
     var i = 0
     var idx = 0
-    // Use while-loop instead of for/zipWithIndex to avoid iterator allocation
-    while (idx < parsed.specs.length) {
+    while (idx < numSpecs) {
       val rawFormatted = parsed.specs(idx)
-      val literal = parsed.literals(idx)
       var formatted = rawFormatted
       val cooked0 = formatted.conversion match {
         case '%' => widenRaw(formatted, "%")
@@ -475,8 +482,7 @@ object Format {
           i += 1
           formattedValue
       }
-      output.append(cooked0)
-      output.append(literal)
+      formattedValues(idx) = cooked0
       idx += 1
     }
 
@@ -485,7 +491,42 @@ object Format {
         "Too many values to format: %d, expected %d".format(values.cast[Val.Arr].length, i)
       )
     }
-    output.toString()
+
+    // Pass 2: compute exact output length
+    var totalLen = parsed.leading.length
+    idx = 0
+    while (idx < numSpecs) {
+      totalLen += formattedValues(idx).length + parsed.literals(idx).length
+      idx += 1
+    }
+
+    // Pass 3: assemble into pre-sized char[] — eliminates StringBuilder overhead
+    // (capacity checks, resizing, final toString copy)
+    val chars = new Array[Char](totalLen)
+    var cPos = 0
+    val leading = parsed.leading
+    val leadLen = leading.length
+    if (leadLen > 0) {
+      leading.getChars(0, leadLen, chars, cPos)
+      cPos += leadLen
+    }
+    idx = 0
+    while (idx < numSpecs) {
+      val fv = formattedValues(idx)
+      val fvLen = fv.length
+      if (fvLen > 0) {
+        fv.getChars(0, fvLen, chars, cPos)
+        cPos += fvLen
+      }
+      val lit = parsed.literals(idx)
+      val litLen = lit.length
+      if (litLen > 0) {
+        lit.getChars(0, litLen, chars, cPos)
+        cPos += litLen
+      }
+      idx += 1
+    }
+    new String(chars)
   }
 
   private def formatInteger(formatted: FormatSpec, s: Double): String = {
diff --git a/sjsonnet/src/sjsonnet/Parser.scala b/sjsonnet/src/sjsonnet/Parser.scala
index 41a62163..8828d6e0 100644
--- a/sjsonnet/src/sjsonnet/Parser.scala
+++ b/sjsonnet/src/sjsonnet/Parser.scala
@@ -729,7 +729,12 @@ class Parser(
     // cost more than the potential memory savings for strings that are unlikely
     // to repeat (e.g., 600KB text block literals)
     val unique = if (s.length > 1024) s else internedStrings.getOrElseUpdate(s, s)
-    Val.Str(pos, unique)
+    val result = Val.Str(pos, unique)
+    // Mark string literals that are printable ASCII with no JSON escape chars.
+    // This allows the renderer to skip SWAR escape scanning and UTF-8 encoding.
+    if (!CharSWAR.hasEscapeChar(unique) && CharSWAR.isAllAscii(unique))
+      result._asciiSafe = true
+    result
   }
 
   // Any `expr` that isn't naively left-recursive
diff --git a/sjsonnet/src/sjsonnet/Renderer.scala b/sjsonnet/src/sjsonnet/Renderer.scala
index 5f925330..0357ea4b 100644
--- a/sjsonnet/src/sjsonnet/Renderer.scala
+++ b/sjsonnet/src/sjsonnet/Renderer.scala
@@ -17,9 +17,7 @@ class Renderer(out: Writer = new java.io.StringWriter(), indent: Int = -1)
     flushBuffer()
     val i = d.toLong
     if (d == i) {
-      // Fast path: render integers directly to char buffer, avoiding String allocation.
-      // Most numbers in Jsonnet output are integers (array indices, counters, etc.).
-      RenderUtils.appendLong(elemBuilder, i)
+      writeLongDirect(i)
     } else if (d % 1 == 0) {
       appendString(
         BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString()
@@ -195,7 +193,12 @@ class PythonRenderer(out: Writer = new java.io.StringWriter(), indent: Int = -1)
   }
 }
 
-/** Renderer used by std.manifestJson, std.manifestJsonMinified, and std.manifestJsonEx */
+/**
+ * Renderer used by std.manifestJson, std.manifestJsonMinified, and std.manifestJsonEx.
+ *
+ * Supports both the Visitor-based path (via Materializer.apply0) and a fused direct path
+ * (materializeDirect) that bypasses the Visitor interface for better Scala Native performance.
+ */
 final case class MaterializeJsonRenderer(
     indent: Int = 4,
     escapeUnicode: Boolean = false,
@@ -264,67 +267,314 @@ final case class MaterializeJsonRenderer(
       out
     }
   }
-}
 
-object RenderUtils {
+  // ── Fused materializer ──────────────────────────────────────────────────────
+  // Bypasses the Visitor interface entirely: walks the Val tree and writes chars
+  // directly into elemBuilder. On Scala Native (no JIT), this eliminates virtual
+  // dispatch overhead on every visitString/visitObject/visitArray call.
 
   /**
-   * Custom rendering of Doubles used in rendering
+   * Fused materialize-and-render: walks the Val tree and writes JSON chars directly, without going
+   * through the upickle Visitor interface.
    */
-  def renderDouble(d: Double): String = {
-    if (d.toLong == d) d.toLong.toString
-    else if (d % 1 == 0) {
-      BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString()
-    } else d.toString
+  def materializeDirect(v: Val)(implicit evaluator: EvalScope): Unit = {
+    val ctx = Materializer.MaterializeContext(evaluator)
+    try {
+      materializeChild(v, 0, ctx)
+      // Final flush — write everything to out.
+      elemBuilder.writeOutToIfLongerThan(out, 0)
+    } catch {
+      case _: StackOverflowError =>
+        Error.fail("Stackoverflow while materializing, possibly due to recursive value", v.pos)
+      case _: OutOfMemoryError =>
+        Error.fail("Out of memory while materializing, possibly due to recursive value", v.pos)
+    }
   }
 
-  /** Maximum number of digits in a Long value (Long.MinValue = -9223372036854775808, 20 chars). */
-  private final val MaxLongChars = 20
+  private def materializeChild(v: Val, matDepth: Int, ctx: Materializer.MaterializeContext)(implicit
+      evaluator: EvalScope): Unit = {
+    if (v == null) Error.fail("Unknown value type " + v)
+    val vt: Int = v.valTag.toInt
+    (vt: @scala.annotation.switch) match {
+      case 0 => // TAG_STR
+        renderQuotedString(v.asInstanceOf[Val.Str].str)
+      case 1 => // TAG_NUM
+        renderDouble(v.asDouble)
+      case 2 => // TAG_TRUE
+        elemBuilder.ensureLength(4)
+        elemBuilder.appendUnsafe('t')
+        elemBuilder.appendUnsafe('r')
+        elemBuilder.appendUnsafe('u')
+        elemBuilder.appendUnsafe('e')
+      case 3 => // TAG_FALSE
+        elemBuilder.ensureLength(5)
+        elemBuilder.appendUnsafe('f')
+        elemBuilder.appendUnsafe('a')
+        elemBuilder.appendUnsafe('l')
+        elemBuilder.appendUnsafe('s')
+        elemBuilder.appendUnsafe('e')
+      case 4 => // TAG_NULL
+        elemBuilder.ensureLength(4)
+        elemBuilder.appendUnsafe('n')
+        elemBuilder.appendUnsafe('u')
+        elemBuilder.appendUnsafe('l')
+        elemBuilder.appendUnsafe('l')
+      case 5 => // TAG_ARR
+        val xs = v.asInstanceOf[Val.Arr]
+        if (matDepth < ctx.recursiveDepthLimit)
+          materializeDirectArr(xs, matDepth + 1, ctx)
+        else
+          Materializer.apply0(v, this)(evaluator)
+      case 6 => // TAG_OBJ
+        val obj = v.asInstanceOf[Val.Obj]
+        if (matDepth < ctx.recursiveDepthLimit)
+          materializeDirectObj(obj, matDepth + 1, ctx)
+        else
+          Materializer.apply0(v, this)(evaluator)
+      case 7 => // TAG_FUNC
+        val s = v.asInstanceOf[Val.Func]
+        Error.fail(
+          "Couldn't manifest function with params [" + s.params.names.mkString(",") + "]",
+          v.pos
+        )
+      case _ =>
+        v match {
+          case mat: Materializer.Materializable =>
+            mat.materialize(this)
+          case tc: TailCall =>
+            Error.fail("Internal error: TailCall sentinel leaked into materialization.", tc.pos)
+          case vv: Val =>
+            Error.fail("Unknown value type " + vv.prettyName, vv.pos)
+        }
+    }
+  }
 
-  /**
-   * Render a long value directly into a [[upickle.core.CharBuilder]], avoiding the intermediate
-   * `String` allocation that `Long.toString` would create. For small absolute values (the common
-   * case in Jsonnet output — array lengths, indices, counters), this saves one allocation per
-   * number. The algorithm writes digits in reverse then reverses in-place.
-   */
-  def appendLong(cb: upickle.core.CharBuilder, value: Long): Unit = {
-    if (value == 0) {
-      cb.append('0')
-      return
+  /** Render a quoted string into elemBuilder (char-based) with chunked SWAR scanning. */
+  private def renderQuotedString(str: String): Unit = renderQuotedStringSWAR(str)
+
+  /** Render a double value directly into the char buffer. */
+  @inline private def renderDouble(d: Double): Unit = {
+    val i = d.toLong
+    if (d == i) {
+      writeLongDirect(i)
+    } else if (d % 1 == 0) {
+      appendString(BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString())
+    } else {
+      appendString(d.toString)
     }
+  }
+
+  private def materializeDirectObj(
+      obj: Val.Obj,
+      matDepth: Int,
+      ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = {
+    if (!ctx.enterObject(obj))
+      Error.fail("Stackoverflow while materializing, possibly due to recursive value", obj.pos)
+    try {
+      obj.triggerAllAsserts(ctx.brokenAssertionLogic)
+      if (obj.canDirectIterate) {
+        if (ctx.sort) materializeDirectSortedInlineObj(obj, matDepth, ctx)
+        else materializeDirectInlineObj(obj, matDepth, ctx)
+      } else {
+        materializeDirectGenericObj(obj, matDepth, ctx)
+      }
+    } finally {
+      ctx.exitObject(obj)
+    }
+  }
+
+  /** Open an object brace with indent. */
+  @inline private def openObjBrace(isEmpty: Boolean): Unit = {
+    elemBuilder.append('{')
+    depth += 1
+    if (isEmpty && indent != -1)
+      elemBuilder.appendAll(newLineCharArray, newLineCharArray.length)
+    else renderIndent()
+  }
 
-    cb.ensureLength(MaxLongChars)
-    val arr = cb.arr
-    var pos = cb.getLength
+  /** Close an object brace. */
+  @inline private def closeObjBrace(wasEmpty: Boolean): Unit = {
+    commaBuffered = false
+    depth -= 1
+    renderIndent()
+    elemBuilder.append('}')
+    elemBuilder.writeOutToIfLongerThan(out, if (depth == 0) 0 else 1000)
+  }
 
-    val negative = value < 0
-    // Use negative accumulator to handle Long.MinValue correctly
-    var n = if (negative) value else -value
-    val startPos = pos
+  /** Render a single key-value pair. */
+  @inline private def renderKeyValue(
+      key: String,
+      childVal: Val,
+      matDepth: Int,
+      ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = {
+    flushBuffer()
+    renderQuotedString(key)
+    elemBuilder.appendAll(keyValueSeparatorCharArray, keyValueSeparatorCharArray.length)
+    materializeChild(childVal, matDepth, ctx)
+  }
 
-    while (n != 0) {
-      val digit = -(n % 10).toInt
-      arr(pos) = ('0' + digit).toChar
-      pos += 1
-      n /= 10
+  /** Fused inline object rendering — bypasses visibleKeyNames and value() lookup. */
+  private def materializeDirectInlineObj(
+      obj: Val.Obj,
+      matDepth: Int,
+      ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = {
+    val fs = ctx.emptyPos.fileScope
+    val rawKeys = obj.inlineKeys
+    if (rawKeys != null) {
+      val rawMembers = obj.inlineMembers
+      val rawN = rawKeys.length
+
+      // Count visible fields for empty detection
+      var visCount = 0
+      var i = 0
+      while (i < rawN) {
+        if (rawMembers(i).visibility != Expr.Member.Visibility.Hidden) visCount += 1
+        i += 1
+      }
+
+      openObjBrace(visCount == 0)
+
+      i = 0
+      while (i < rawN) {
+        val m = rawMembers(i)
+        if (m.visibility != Expr.Member.Visibility.Hidden) {
+          val childVal = m.invoke(obj, null, fs, evaluator)
+          if (!obj._skipFieldCache) obj.cacheFieldValue(rawKeys(i), childVal)
+          renderKeyValue(rawKeys(i), childVal, matDepth, ctx)
+          commaBuffered = true
+        }
+        i += 1
+      }
+
+      closeObjBrace(visCount == 0)
+    } else {
+      // Single-field object
+      val sfm = obj.singleMem
+      if (sfm.visibility != Expr.Member.Visibility.Hidden) {
+        openObjBrace(false)
+        val childVal = sfm.invoke(obj, null, fs, evaluator)
+        if (!obj._skipFieldCache) obj.cacheFieldValue(obj.singleKey, childVal)
+        renderKeyValue(obj.singleKey, childVal, matDepth, ctx)
+        closeObjBrace(false)
+      } else {
+        // Empty object (single hidden field)
+        openObjBrace(true)
+        closeObjBrace(true)
+      }
+    }
+  }
+
+  /** Fused sorted inline object rendering — uses cached sorted field order. */
+  private def materializeDirectSortedInlineObj(
+      obj: Val.Obj,
+      matDepth: Int,
+      ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = {
+    val fs = ctx.emptyPos.fileScope
+    val rawKeys = obj.inlineKeys
+    if (rawKeys != null) {
+      val rawMembers = obj.inlineMembers
+      val order = {
+        val cached = obj._sortedInlineOrder
+        if (cached != null) cached
+        else Materializer.computeSortedInlineOrder(rawKeys, rawMembers)
+      }
+      val visCount = order.length
+
+      openObjBrace(visCount == 0)
+
+      var i = 0
+      while (i < visCount) {
+        val idx = order(i)
+        val childVal = rawMembers(idx).invoke(obj, null, fs, evaluator)
+        if (!obj._skipFieldCache) obj.cacheFieldValue(rawKeys(idx), childVal)
+        renderKeyValue(rawKeys(idx), childVal, matDepth, ctx)
+        commaBuffered = true
+        i += 1
+      }
+
+      closeObjBrace(visCount == 0)
+    } else {
+      // Single-field: sorted = unsorted
+      materializeDirectInlineObj(obj, matDepth, ctx)
     }
+  }
+
+  /** Generic object rendering — uses visibleKeyNames + value() lookup. */
+  private def materializeDirectGenericObj(
+      obj: Val.Obj,
+      matDepth: Int,
+      ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = {
+    val keys =
+      if (ctx.sort) obj.visibleKeyNames.sorted(Util.CodepointStringOrdering)
+      else obj.visibleKeyNames
+
+    openObjBrace(keys.isEmpty)
 
-    if (negative) {
-      arr(pos) = '-'
-      pos += 1
+    var i = 0
+    while (i < keys.length) {
+      val key = keys(i)
+      val childVal = obj.value(key, ctx.emptyPos)
+      renderKeyValue(key, childVal, matDepth, ctx)
+      commaBuffered = true
+      i += 1
     }
 
-    // Reverse the digits in-place
-    var lo = startPos
-    var hi = pos - 1
-    while (lo < hi) {
-      val tmp = arr(lo)
-      arr(lo) = arr(hi)
-      arr(hi) = tmp
-      lo += 1
-      hi -= 1
+    closeObjBrace(keys.isEmpty)
+  }
+
+  private def materializeDirectArr(
+      xs: Val.Arr,
+      matDepth: Int,
+      ctx: Materializer.MaterializeContext)(implicit evaluator: EvalScope): Unit = {
+    val len = xs.length
+
+    elemBuilder.append('[')
+    depth += 1
+    // account for rendering differences of whitespaces in ujson and jsonnet manifestJson
+    if (len == 0 && indent != -1)
+      elemBuilder.appendAll(newLineCharArray, newLineCharArray.length)
+    else renderIndent()
+
+    // Fast path for byte-backed arrays: emit numbers directly
+    xs match {
+      case ba: Val.ByteArr =>
+        val bytes = ba.rawBytes
+        var i = 0
+        while (i < len) {
+          flushBuffer()
+          renderDouble((bytes(i) & 0xff).toDouble)
+          commaBuffered = true
+          i += 1
+        }
+      case _ =>
+        var i = 0
+        while (i < len) {
+          val childVal = xs.value(i)
+          flushBuffer()
+          materializeChild(childVal, matDepth, ctx)
+          commaBuffered = true
+          i += 1
+        }
     }
 
-    cb.length = pos
+    // Close bracket
+    commaBuffered = false
+    depth -= 1
+    renderIndent()
+    elemBuilder.append(']')
+    elemBuilder.writeOutToIfLongerThan(out, if (depth == 0) 0 else 1000)
+  }
+}
+
+object RenderUtils {
+
+  /**
+   * Custom rendering of Doubles used in rendering
+   */
+  def renderDouble(d: Double): String = {
+    if (d.toLong == d) d.toLong.toString
+    else if (d % 1 == 0) {
+      BigDecimal(d).setScale(0, BigDecimal.RoundingMode.HALF_EVEN).toBigInt.toString()
+    } else d.toString
   }
 }
diff --git a/sjsonnet/src/sjsonnet/Util.scala b/sjsonnet/src/sjsonnet/Util.scala
index f55c8e0c..bdd4b86d 100644
--- a/sjsonnet/src/sjsonnet/Util.scala
+++ b/sjsonnet/src/sjsonnet/Util.scala
@@ -133,37 +133,13 @@ object Util {
    * Compares two strings by Unicode codepoint values rather than UTF-16 code units. This ensures
    * that strings with characters above U+FFFF (which require surrogate pairs in UTF-16) are
    * compared correctly according to their Unicode codepoint values.
+   *
+   * Delegates to platform-specific CharSWAR.compareStrings which uses bulk getChars + tight array
+   * loop for JIT auto-vectorization on JVM, LLVM auto-vectorization on Native, and scalar fallback
+   * on JS.
    */
-  def compareStringsByCodepoint(s1: String, s2: String): Int = {
-    // Fast path: same reference (e.g. interned strings, or self-comparison)
-    if (s1 eq s2) return 0
-    val n1 = s1.length
-    val n2 = s2.length
-    var i1 = 0
-    var i2 = 0
-    while (i1 < n1 && i2 < n2) {
-      val c1 = s1.charAt(i1)
-      val c2 = s2.charAt(i2)
-      // Fast path: equal chars can be skipped without surrogate checks.
-      // Even for surrogate pairs, equal high surrogates at position i lead to
-      // comparing low surrogates at i+1, producing the correct codepoint ordering.
-      if (c1 == c2) {
-        i1 += 1
-        i2 += 1
-      } else if (!Character.isSurrogate(c1) && !Character.isSurrogate(c2)) {
-        // Both non-surrogates and different: direct char subtraction
-        return c1 - c2
-      } else {
-        // At least one is a surrogate, use full codepoint logic
-        val cp1 = s1.codePointAt(i1)
-        val cp2 = s2.codePointAt(i2)
-        if (cp1 != cp2) return Integer.compare(cp1, cp2)
-        i1 += Character.charCount(cp1)
-        i2 += Character.charCount(cp2)
-      }
-    }
-    if (i1 < n1) 1 else if (i2 < n2) -1 else 0
-  }
+  def compareStringsByCodepoint(s1: String, s2: String): Int =
+    CharSWAR.compareStrings(s1, s2)
 
   /**
    * A reusable Ordering[String] that compares by Unicode codepoint values. Use this in place of
diff --git a/sjsonnet/src/sjsonnet/Val.scala b/sjsonnet/src/sjsonnet/Val.scala
index c717c93c..3032719d 100644
--- a/sjsonnet/src/sjsonnet/Val.scala
+++ b/sjsonnet/src/sjsonnet/Val.scala
@@ -403,11 +403,15 @@ object Val {
       if (ls != null && ls.isEmpty) return right
       if (rs != null && rs.isEmpty) return left
       // Small string eagerness: both flat and combined length <= 128
-      if (ls != null && rs != null && ls.length + rs.length <= 128)
-        return new Str(pos, ls + rs)
+      if (ls != null && rs != null && ls.length + rs.length <= 128) {
+        val result = new Str(pos, ls + rs)
+        if (left._asciiSafe && right._asciiSafe) result._asciiSafe = true
+        return result
+      }
       // Rope node: O(1)
       val node = new Str(pos, null)
       node._children = Array(left, right)
+      if (left._asciiSafe && right._asciiSafe) node._asciiSafe = true
       node
     }
   }
diff --git a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala
index 9424f631..428edc77 100644
--- a/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala
+++ b/sjsonnet/src/sjsonnet/stdlib/ManifestModule.scala
@@ -10,21 +10,19 @@ object ManifestModule extends AbstractFunctionModule {
   def name = "manifest"
 
   private object ManifestJson extends Val.Builtin1("manifestJson", "v") {
-    def evalRhs(v: Eval, ev: EvalScope, pos: Position): Val =
-      Val.Str(pos, Materializer.apply0(v.value, MaterializeJsonRenderer())(ev).toString)
+    def evalRhs(v: Eval, ev: EvalScope, pos: Position): Val = {
+      val renderer = MaterializeJsonRenderer()
+      renderer.materializeDirect(v.value)(ev)
+      Val.Str(pos, renderer.out.toString)
+    }
   }
 
   private object ManifestJsonMinified extends Val.Builtin1("manifestJsonMinified", "value") {
-    def evalRhs(v: Eval, ev: EvalScope, pos: Position): Val =
-      Val.Str(
-        pos,
-        Materializer
-          .apply0(
-            v.value,
-            MaterializeJsonRenderer(indent = -1, newline = "", keyValueSeparator = ":")
-          )(ev)
-          .toString
-      )
+    def evalRhs(v: Eval, ev: EvalScope, pos: Position): Val = {
+      val renderer = MaterializeJsonRenderer(indent = -1, newline = "", keyValueSeparator = ":")
+      renderer.materializeDirect(v.value)(ev)
+      Val.Str(pos, renderer.out.toString)
+    }
   }
 
   private object ManifestJsonEx
@@ -42,20 +40,15 @@ object ManifestModule extends AbstractFunctionModule {
         newline: Eval,
         keyValSep: Eval,
         ev: EvalScope,
-        pos: Position): Val =
-      Val.Str(
-        pos,
-        Materializer
-          .apply0(
-            v.value,
-            MaterializeJsonRenderer(
-              indent = i.value.asString.length,
-              newline = newline.value.asString,
-              keyValueSeparator = keyValSep.value.asString
-            )
-          )(ev)
-          .toString
+        pos: Position): Val = {
+      val renderer = MaterializeJsonRenderer(
+        indent = i.value.asString.length,
+        newline = newline.value.asString,
+        keyValueSeparator = keyValSep.value.asString
       )
+      renderer.materializeDirect(v.value)(ev)
+      Val.Str(pos, renderer.out.toString)
+    }
   }
 
   private object ParseJson extends Val.Builtin1("parseJson", "str") {
diff --git a/sjsonnet/src/sjsonnet/stdlib/StringModule.scala b/sjsonnet/src/sjsonnet/stdlib/StringModule.scala
index f4a7c976..ca8ca625 100644
--- a/sjsonnet/src/sjsonnet/stdlib/StringModule.scala
+++ b/sjsonnet/src/sjsonnet/stdlib/StringModule.scala
@@ -26,11 +26,12 @@ object StringModule extends AbstractFunctionModule {
       Val.cachedNum(
         pos,
         (x.value match {
-          case Val.Str(_, s) => s.codePointCount(0, s.length)
-          case a: Val.Arr    => a.length
-          case o: Val.Obj    => o.visibleKeyNames.length
-          case o: Val.Func   => o.params.names.length
-          case x             => Error.fail("Cannot get length of " + x.prettyName)
+          case Val.Str(_, s) =>
+            if (CharSWAR.isAllAscii(s)) s.length else s.codePointCount(0, s.length)
+          case a: Val.Arr  => a.length
+          case o: Val.Obj  => o.visibleKeyNames.length
+          case o: Val.Func => o.params.names.length
+          case x           => Error.fail("Cannot get length of " + x.prettyName)
         }).toDouble
       )
   }
@@ -49,7 +50,9 @@ object StringModule extends AbstractFunctionModule {
 
   private object Substr extends Val.Builtin3("substr", "str", "from", "len") {
     def evalRhs(_s: Eval, from: Eval, len: Eval, ev: EvalScope, pos: Position): Val = {
-      val str = _s.value.asString
+      val srcVal = _s.value
+      val str = srcVal.asString
+      val srcAsciiSafe = srcVal.isInstanceOf[Val.Str] && srcVal.asInstanceOf[Val.Str]._asciiSafe
       val offset = from.value match {
         case v: Val.Num => v.asPositiveInt
         case _ => Error.fail("Expected a number for offset in substr, got " + from.value.prettyName)
@@ -59,16 +62,28 @@ object StringModule extends AbstractFunctionModule {
         case _ => Error.fail("Expected a number for len in substr, got " + len.value.prettyName)
       }
 
-      val unicodeLength = str.codePointCount(0, str.length)
-      val safeOffset = math.min(offset, unicodeLength)
-      val safeLength = math.min(length, unicodeLength - safeOffset)
-
-      if (safeLength <= 0) {
-        Val.Str(pos, "")
+      if (srcAsciiSafe || CharSWAR.isAllAscii(str)) {
+        val strLen = str.length
+        val safeOffset = math.min(offset, strLen)
+        val safeLength = math.min(length, strLen - safeOffset)
+        if (safeLength <= 0) Val.Str(pos, "")
+        else {
+          val result = Val.Str(pos, str.substring(safeOffset, safeOffset + safeLength))
+          if (srcAsciiSafe) result._asciiSafe = true
+          result
+        }
       } else {
-        val startUtf16 = if (safeOffset == 0) 0 else str.offsetByCodePoints(0, safeOffset)
-        val endUtf16 = str.offsetByCodePoints(startUtf16, safeLength)
-        Val.Str(pos, str.substring(startUtf16, endUtf16))
+        val unicodeLength = str.codePointCount(0, str.length)
+        val safeOffset = math.min(offset, unicodeLength)
+        val safeLength = math.min(length, unicodeLength - safeOffset)
+
+        if (safeLength <= 0) {
+          Val.Str(pos, "")
+        } else {
+          val startUtf16 = if (safeOffset == 0) 0 else str.offsetByCodePoints(0, safeOffset)
+          val endUtf16 = str.offsetByCodePoints(startUtf16, safeLength)
+          Val.Str(pos, str.substring(startUtf16, endUtf16))
+        }
       }
     }
   }
@@ -266,21 +281,69 @@ object StringModule extends AbstractFunctionModule {
       val arr = implicitly[ReadWriter[Val.Arr]].apply(_arr.value)
       sep.value match {
         case Val.Str(_, s) =>
-          val b = new java.lang.StringBuilder()
+          // Two-pass approach: pre-calculate total length to avoid StringBuilder resizing.
+          // Pass 1: force values, compute total length, count non-null elements.
+          val sepLen = s.length
+          var totalLen = 0L
+          var count = 0
+          val sepVal = sep.value.asInstanceOf[Val.Str]
+          var allAsciiSafe =
+            sepVal._asciiSafe || (CharSWAR.isAllAscii(s) && !CharSWAR.hasEscapeChar(s))
           var i = 0
-          var added = false
           while (i < arr.length) {
             arr.value(i) match {
-              case _: Val.Null   =>
-              case Val.Str(_, x) =>
-                if (added) b.append(s)
-                added = true
-                b.append(x)
+              case _: Val.Null =>
+              case vs: Val.Str =>
+                if (count > 0) totalLen += sepLen
+                totalLen += vs.str.length
+                if (allAsciiSafe && !vs._asciiSafe) allAsciiSafe = false
+                count += 1
               case x => Error.fail("Cannot join " + x.prettyName)
             }
             i += 1
           }
-          Val.Str(pos, b.toString)
+          if (count == 0) return Val.Str(pos, "")
+          if (totalLen > Int.MaxValue)
+            Error.fail("Join result too large: " + totalLen + " characters")
+          // Pass 2: build result in pre-sized char array.
+          val chars = new Array[Char](totalLen.toInt)
+          var cPos = 0
+          var added = false
+          i = 0
+          if (sepLen == 1) {
+            // Single-char separator fast path: direct char write
+            val sepChar = s.charAt(0)
+            while (i < arr.length) {
+              arr.value(i) match {
+                case _: Val.Null   =>
+                case Val.Str(_, x) =>
+                  if (added) { chars(cPos) = sepChar; cPos += 1 }
+                  added = true
+                  val xLen = x.length
+                  x.getChars(0, xLen, chars, cPos)
+                  cPos += xLen
+                case _ => // already validated in pass 1
+              }
+              i += 1
+            }
+          } else {
+            while (i < arr.length) {
+              arr.value(i) match {
+                case _: Val.Null   =>
+                case Val.Str(_, x) =>
+                  if (added) { s.getChars(0, sepLen, chars, cPos); cPos += sepLen }
+                  added = true
+                  val xLen = x.length
+                  x.getChars(0, xLen, chars, cPos)
+                  cPos += xLen
+                case _ => // already validated in pass 1
+              }
+              i += 1
+            }
+          }
+          val result = Val.Str(pos, new String(chars))
+          if (allAsciiSafe) result._asciiSafe = true
+          result
         case sep: Val.Arr =>
           val out = new mutable.ArrayBuilder.ofRef[Eval]
           // Set a reasonable size hint based on estimated result size
@@ -360,24 +423,47 @@ object StringModule extends AbstractFunctionModule {
       stringChars(pos, str.value.asString)
   }
 
+  /** Hand-written digit loop: no exception setup, no intermediate allocation, single pass. */
+  private def parseDigits(s: String, base: Int): Long = {
+    val len = s.length
+    if (len == 0) Error.fail("Cannot parse '' as an integer in base " + base)
+    var i = 0
+    val negative = base == 10 && (s.charAt(0) == '-' || s.charAt(0) == '+')
+    val isNeg = negative && s.charAt(0) == '-'
+    if (negative) i = 1
+    if (i >= len) Error.fail("Cannot parse '" + s + "' as an integer in base " + base)
+    var result = 0L
+    while (i < len) {
+      val c = s.charAt(i)
+      val d =
+        if (c >= '0' && c <= '9') c - '0'
+        else if (base == 16 && c >= 'a' && c <= 'f') c - 'a' + 10
+        else if (base == 16 && c >= 'A' && c <= 'F') c - 'A' + 10
+        else -1
+      if (d < 0 || d >= base)
+        Error.fail("Cannot parse '" + s + "' as an integer in base " + base)
+      val next = result * base + d
+      if (next < result)
+        Error.fail("Integer overflow parsing '" + s + "' in base " + base)
+      result = next
+      i += 1
+    }
+    if (isNeg) -result else result
+  }
+
   private object ParseInt extends Val.Builtin1("parseInt", "str") {
     def evalRhs(str: Eval, ev: EvalScope, pos: Position): Val =
-      try {
-        Val.cachedNum(pos, str.value.asString.toLong.toDouble)
-      } catch {
-        case _: NumberFormatException =>
-          Error.fail("Cannot parse '" + str.value.asString + "' as an integer in base 10")
-      }
+      Val.cachedNum(pos, parseDigits(str.value.asString, 10).toDouble)
   }
 
   private object ParseOctal extends Val.Builtin1("parseOctal", "str") {
     def evalRhs(str: Eval, ev: EvalScope, pos: Position): Val =
-      Val.cachedNum(pos, java.lang.Long.parseLong(str.value.asString, 8).toDouble)
+      Val.cachedNum(pos, parseDigits(str.value.asString, 8).toDouble)
   }
 
   private object ParseHex extends Val.Builtin1("parseHex", "str") {
     def evalRhs(str: Eval, ev: EvalScope, pos: Position): Val =
-      Val.cachedNum(pos, java.lang.Long.parseLong(str.value.asString, 16).toDouble)
+      Val.cachedNum(pos, parseDigits(str.value.asString, 16).toDouble)
   }
 
   private object AsciiUpper extends Val.Builtin1("asciiUpper", "str") {
diff --git a/sjsonnet/test/src/sjsonnet/RendererTests.scala b/sjsonnet/test/src/sjsonnet/RendererTests.scala
index 65577f25..bdf81c6d 100644
--- a/sjsonnet/test/src/sjsonnet/RendererTests.scala
+++ b/sjsonnet/test/src/sjsonnet/RendererTests.scala
@@ -43,26 +43,14 @@ object RendererTests extends TestSuite {
         |]""".stripMargin
     }
 
-    test("appendLong") {
-      def render(v: Long): String = {
-        val cb = new upickle.core.CharBuilder
-        RenderUtils.appendLong(cb, v)
-        cb.makeString()
-      }
-      test("zero") { render(0L) ==> "0" }
-      test("positive") { render(42L) ==> "42" }
-      test("negative") { render(-1L) ==> "-1" }
-      test("large") { render(9999999999L) ==> "9999999999" }
-      test("maxValue") { render(Long.MaxValue) ==> Long.MaxValue.toString }
-      test("minValue") { render(Long.MinValue) ==> Long.MinValue.toString }
-    }
-
     test("visitFloat64Integers") {
-      // Verify that integer-valued doubles render correctly via the Renderer
       ujson.transform(ujson.Num(0), new Renderer()).toString ==> "0"
       ujson.transform(ujson.Num(42), new Renderer()).toString ==> "42"
       ujson.transform(ujson.Num(-1), new Renderer()).toString ==> "-1"
       ujson.transform(ujson.Num(1e15), new Renderer()).toString ==> "1000000000000000"
+      ujson.transform(ujson.Num(9999999999.0), new Renderer()).toString ==> "9999999999"
+      ujson.transform(ujson.Num(Long.MaxValue.toDouble), new Renderer()).toString ==>
+        Long.MaxValue.toDouble.toLong.toString
     }
 
     test("indentZero") {