anonrig · spokodev · Jun 24, 2026
diff --git a/lib/internals/decode.js b/lib/internals/decode.js
@@ -0,0 +1,55 @@
+"use strict";
+
+/**
+ * Lenient percent-decoder used only as a fallback when the fast path
+ * (`fast-decode-uri-component`) returns `null` for input that contains a
+ * syntactically valid percent-escape that is not valid UTF-8.
+ *
+ * It mirrors how `node:querystring` decodes such a token: when at least one
+ * decodable `%XX` escape is present, every code unit is masked to a single
+ * byte, escapes contribute their byte, and invalid byte sequences become
+ * U+FFFD (instead of being left as the raw "%XX" text). When the token has
+ * no decodable escape (only malformed ones such as "%zz"), it is returned
+ * unchanged, exactly as node:querystring leaves it.
+ *
+ * @param {string} str
+ * @returns {string}
+ */
+function decodeString(str) {
+  const len = str.length;
+  const buf = Buffer.allocUnsafe(len);
+  let bufLen = 0;
+  let hasEscape = false;
+
+  for (let i = 0; i < len; i++) {
+    const c = str.charCodeAt(i);
+
+    if (c === 37 /* % */ && i + 2 < len) {
+      const hi = hexValue(str.charCodeAt(i + 1));
+      const lo = hexValue(str.charCodeAt(i + 2));
+
+      if (hi !== -1 && lo !== -1) {
+        buf[bufLen++] = (hi << 4) | lo;
+        hasEscape = true;
+        i += 2;
+        continue;
+      }
+    }
+
+    buf[bufLen++] = c & 0xff;
+  }
+
+  // No decodable escape: node:querystring returns the token untouched.
+  if (!hasEscape) return str;
+
+  return buf.toString("utf8", 0, bufLen);
+}
+
+function hexValue(c) {
+  if (c >= 48 && c <= 57) return c - 48; // 0-9
+  if (c >= 65 && c <= 70) return c - 55; // A-F
+  if (c >= 97 && c <= 102) return c - 87; // a-f
+  return -1;
+}
+
+module.exports = { decodeString };
diff --git a/lib/parse.js b/lib/parse.js
@@ -1,6 +1,7 @@
 "use strict";
 
 const fastDecode = require("fast-decode-uri-component");
+const { decodeString } = require("./internals/decode");
 
 const plusRegex = /\+/g;
 const Empty = function () {};
@@ -55,7 +56,11 @@ function parse(input) {
 
         // Optimization: Do not decode if it's not necessary.
         if (shouldDecodeKey) {
-          key = fastDecode(key) || key;
+          const decodedKey = fastDecode(key);
+          // fastDecode returns null on invalid UTF-8; fall back to a lenient
+          // decoder so the result matches node:querystring (U+FFFD) instead of
+          // keeping the raw "%XX" text.
+          key = decodedKey === null ? decodeString(key) : decodedKey;
         }
 
         if (hasBothKeyValuePair) {
@@ -66,7 +71,8 @@ function parse(input) {
           }
 
           if (shouldDecodeValue) {
-            value = fastDecode(value) || value;
+            const decodedValue = fastDecode(value);
+            value = decodedValue === null ? decodeString(value) : decodedValue;
           }
         }
         const currentValue = result[key];

diff --git a/test/parse.test.ts b/test/parse.test.ts
@@ -60,3 +60,26 @@ test("should parse large numbers", () => {
     "918854443121279438895193",
   );
 });
+
+test("matches node:querystring on invalid UTF-8 escapes in values", () => {
+  // Syntactically valid escapes that are not valid UTF-8. node:querystring
+  // substitutes U+FFFD; it never returns the raw "%XX" text.
+  for (const input of [
+    "a=%C3",
+    "a=%ff",
+    "a=%80",
+    "a=%E4%B8",
+    "a=%F0%9F%98",
+    "a=%ED%A0%80",
+    "a=%C0%80",
+    "a=%C3%A9%E4%B8", // valid then invalid in the same value
+  ]) {
+    assert.deepEqual(qs.parse(input), querystring.parse(input), input);
+  }
+});
+
+test("matches node:querystring on invalid UTF-8 escapes in keys", () => {
+  for (const input of ["%C3=x", "%ff=x", "%E4%B8=x"]) {
+    assert.deepEqual(qs.parse(input), querystring.parse(input), input);
+  }
+});