Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions lib/internals/decode.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"use strict";

/**
* Lenient percent-decoder used only as a fallback when the fast path
* (`fast-decode-uri-component`) returns `null` for input that contains a
* syntactically valid percent-escape that is not valid UTF-8.
*
* It mirrors how `node:querystring` decodes such a token: when at least one
* decodable `%XX` escape is present, every code unit is masked to a single
* byte, escapes contribute their byte, and invalid byte sequences become
* U+FFFD (instead of being left as the raw "%XX" text). When the token has
* no decodable escape (only malformed ones such as "%zz"), it is returned
* unchanged, exactly as node:querystring leaves it.
*
* @param {string} str
* @returns {string}
*/
function decodeString(str) {
const len = str.length;
const buf = Buffer.allocUnsafe(len);
let bufLen = 0;
let hasEscape = false;

for (let i = 0; i < len; i++) {
const c = str.charCodeAt(i);

if (c === 37 /* % */ && i + 2 < len) {
const hi = hexValue(str.charCodeAt(i + 1));
const lo = hexValue(str.charCodeAt(i + 2));

if (hi !== -1 && lo !== -1) {
buf[bufLen++] = (hi << 4) | lo;
hasEscape = true;
i += 2;
continue;
}
}

buf[bufLen++] = c & 0xff;
}

// No decodable escape: node:querystring returns the token untouched.
if (!hasEscape) return str;

return buf.toString("utf8", 0, bufLen);
}

function hexValue(c) {
if (c >= 48 && c <= 57) return c - 48; // 0-9
if (c >= 65 && c <= 70) return c - 55; // A-F
if (c >= 97 && c <= 102) return c - 87; // a-f
return -1;
}

module.exports = { decodeString };
10 changes: 8 additions & 2 deletions lib/parse.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"use strict";

const fastDecode = require("fast-decode-uri-component");
const { decodeString } = require("./internals/decode");

const plusRegex = /\+/g;
const Empty = function () {};
Expand Down Expand Up @@ -55,7 +56,11 @@ function parse(input) {

// Optimization: Do not decode if it's not necessary.
if (shouldDecodeKey) {
key = fastDecode(key) || key;
const decodedKey = fastDecode(key);
// fastDecode returns null on invalid UTF-8; fall back to a lenient
// decoder so the result matches node:querystring (U+FFFD) instead of
// keeping the raw "%XX" text.
key = decodedKey === null ? decodeString(key) : decodedKey;
}

if (hasBothKeyValuePair) {
Expand All @@ -66,7 +71,8 @@ function parse(input) {
}

if (shouldDecodeValue) {
value = fastDecode(value) || value;
const decodedValue = fastDecode(value);
value = decodedValue === null ? decodeString(value) : decodedValue;
}
}
const currentValue = result[key];
Expand Down
23 changes: 23 additions & 0 deletions test/parse.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,26 @@ test("should parse large numbers", () => {
"918854443121279438895193",
);
});

test("matches node:querystring on invalid UTF-8 escapes in values", () => {
// Syntactically valid escapes that are not valid UTF-8. node:querystring
// substitutes U+FFFD; it never returns the raw "%XX" text.
for (const input of [
"a=%C3",
"a=%ff",
"a=%80",
"a=%E4%B8",
"a=%F0%9F%98",
"a=%ED%A0%80",
"a=%C0%80",
"a=%C3%A9%E4%B8", // valid then invalid in the same value
]) {
assert.deepEqual(qs.parse(input), querystring.parse(input), input);
}
});

test("matches node:querystring on invalid UTF-8 escapes in keys", () => {
for (const input of ["%C3=x", "%ff=x", "%E4%B8=x"]) {
assert.deepEqual(qs.parse(input), querystring.parse(input), input);
}
});