Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,17 @@ To be released.
[#826]: https://github.com/fedify-dev/fedify/issues/826
[#850]: https://github.com/fedify-dev/fedify/pull/850

### @fedify/vocab-runtime

- Changed `getDocumentLoader()` to reject HTML and XHTML responses that do
not advertise an ActivityPub alternate document with a `FetchError`
instead of attempting to parse the HTML as JSON. This makes remote HTML
error pages surface as document loading failures with the response URL and
content type, rather than generic JSON parser crashes. [[#912], [#913]]

[#912]: https://github.com/fedify-dev/fedify/issues/912
[#913]: https://github.com/fedify-dev/fedify/pull/913


Version 2.3.1
-------------
Expand Down
108 changes: 102 additions & 6 deletions packages/vocab-runtime/src/docloader.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import fetchMock from "fetch-mock";
import { deepStrictEqual, ok, rejects } from "node:assert";
import { test } from "node:test";
import preloadedContexts from "./contexts.ts";
import { getDocumentLoader } from "./docloader.ts";
import { getDocumentLoader, getRemoteDocument } from "./docloader.ts";
import { FetchError } from "./request.ts";
import { UrlError } from "./url.ts";

Expand Down Expand Up @@ -90,7 +90,7 @@ test("getDocumentLoader()", async (t) => {
type: "Object",
},
headers: {
"Content-Type": "text/html; charset=utf-8",
"Content-Type": "application/activity+json",
Link: '<https://example.com/object>; rel="alternate"; ' +
'type="application/ld+json; profile="https://www.w3.org/ns/activitystreams""',
},
Expand Down Expand Up @@ -247,6 +247,44 @@ test("getDocumentLoader()", async (t) => {
});
});

fetchMock.get("https://example.com/html-no-alternate", {
body: `<!DOCTYPE html>
<html>
<head>
<title>Not an ActivityPub document</title>
</head>
<body>Not found</body>
</html>`,
headers: { "Content-Type": "text/html; charset=utf-8" },
});

await t.test("HTML without ActivityPub alternate link", async () => {
await rejects(
() => fetchDocumentLoader("https://example.com/html-no-alternate"),
(error) => {
ok(error instanceof FetchError);
ok(
error.message.includes(
"HTML document has no ActivityPub alternate link",
),
);
ok(
error.message.includes("Content-Type: text/html; charset=utf-8"),
);
deepStrictEqual(
error.url,
new URL("https://example.com/html-no-alternate"),
);
ok(error.response != null);
deepStrictEqual(
error.response.headers.get("Content-Type"),
"text/html; charset=utf-8",
);
return true;
},
);
});

fetchMock.get("https://example.com/wrong-content-type", {
body: {
"@context": "https://www.w3.org/ns/activitystreams",
Expand All @@ -257,7 +295,7 @@ test("getDocumentLoader()", async (t) => {
headers: { "Content-Type": "text/html; charset=utf-8" },
});

await t.test("Wrong Content-Type", async () => {
await t.test("wrong Content-Type with JSON body", async () => {
deepStrictEqual(
await fetchDocumentLoader("https://example.com/wrong-content-type"),
{
Expand All @@ -273,6 +311,64 @@ test("getDocumentLoader()", async (t) => {
);
});

fetchMock.get("https://example.com/large-html", {
body: "<!DOCTYPE html>",
headers: {
"Content-Length": String(1024 * 1024 + 1),
"Content-Type": "text/html; charset=utf-8",
},
});

await t.test("HTML Content-Length over limit", async () => {
await rejects(
() => fetchDocumentLoader("https://example.com/large-html"),
(error) => {
ok(error instanceof FetchError);
ok(
error.message.includes(
"HTML document is too large to scan for an ActivityPub alternate link",
),
);
ok(error.response != null);
deepStrictEqual(error.response.status, 200);
deepStrictEqual(
error.response.headers.get("Content-Type"),
"text/html; charset=utf-8",
);
return true;
},
);
});

await t.test("HTML Content-Length over limit cancels body", async () => {
let canceled = false;
const response = new Response("<!DOCTYPE html>", {
headers: {
"Content-Length": String(1024 * 1024 + 1),
"Content-Type": "text/html; charset=utf-8",
},
});
Object.defineProperty(response, "body", {
value: {
cancel: () => {
canceled = true;
},
},
});
await rejects(
() =>
getRemoteDocument(
"https://example.com/large-html-cancel",
response,
() => {
throw new Error("unexpected alternate fetch");
},
),
FetchError,
);
deepStrictEqual(canceled, true);
});

fetchMock.get("https://example.com/404", { status: 404 });

await t.test("not ok", async () => {
Expand Down Expand Up @@ -459,11 +555,11 @@ test("getDocumentLoader()", async (t) => {

await t.test("ReDoS resistance (CVE-2025-68475)", async () => {
const start = performance.now();
// The malicious HTML will fail JSON parsing, but the important thing is
// that it should complete quickly (not hang due to ReDoS)
// The malicious HTML will fail alternate discovery, but the important
// thing is that it should complete quickly (not hang due to ReDoS).
await rejects(
() => fetchDocumentLoader("https://example.com/redos"),
SyntaxError,
FetchError,
);
const elapsed = performance.now() - start;

Expand Down
84 changes: 76 additions & 8 deletions packages/vocab-runtime/src/docloader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import { UrlError, validatePublicUrl } from "./url.ts";

const logger = getLogger(["fedify", "runtime", "docloader"]);
const DEFAULT_MAX_REDIRECTION = 20;
const MAX_HTML_SIZE = 1024 * 1024; // 1MB

/**
* A remote JSON-LD document and its context fetched by
Expand Down Expand Up @@ -112,6 +113,59 @@ export type AuthenticatedDocumentLoaderFactory = (
options?: DocumentLoaderFactoryOptions,
) => DocumentLoader;

function createResponseMetadata(response: Response): Response {
return new Response(null, {
headers: response.headers,
status: response.status,
statusText: response.statusText,
});
}

async function cancelResponseBody(response: Response): Promise<void> {
if (response.body != null) {
await response.body.cancel();
}
}

async function readBoundedText(
response: Response,
maxBytes: number,
): Promise<{ text: string; size: number; tooLarge: boolean }> {
const contentLength = response.headers.get("Content-Length");
if (contentLength != null) {
const size = Number(contentLength);
if (size > maxBytes) {
await cancelResponseBody(response);
return { text: "", size, tooLarge: true };
Comment thread
dahlia marked this conversation as resolved.
}
}

if (response.body == null) return { text: "", size: 0, tooLarge: false };

const reader = response.body.getReader();
const decoder = new TextDecoder();
let text = "";
let size = 0;
try {
while (true) {
const result = await reader.read();
if (result.done) break;
const chunkSize = result.value.byteLength;
if (size + chunkSize > maxBytes) {
size += chunkSize;
await reader.cancel();
return { text: "", size, tooLarge: true };
}
size += chunkSize;
text += decoder.decode(result.value, { stream: true });
}
text += decoder.decode();
return { text, size, tooLarge: false };
} finally {
reader.releaseLock();
}
}

/**
* Gets a {@link RemoteDocument} from the given response.
* @param url The URL of the document to load.
Expand Down Expand Up @@ -200,15 +254,19 @@ export async function getRemoteDocument(
contentType === "application/xhtml+xml" ||
contentType?.startsWith("application/xhtml+xml;"))
) {
// Security: Limit HTML response size to mitigate ReDoS attacks
const MAX_HTML_SIZE = 1024 * 1024; // 1MB
const html = await response.text();
if (html.length > MAX_HTML_SIZE) {
const errorResponse = createResponseMetadata(response);
const html = await readBoundedText(response, MAX_HTML_SIZE);
if (html.tooLarge) {
logger.warn(
"HTML response too large, skipping alternate link discovery: {url}",
{ url: documentUrl, size: html.length },
{ url: documentUrl, size: html.size },
);
throw new FetchError(
documentUrl,
`HTML document is too large to scan for an ActivityPub alternate link ` +
`(Content-Type: ${contentType})`,
errorResponse,
);
document = JSON.parse(html);
} else {
// Safe regex patterns without nested quantifiers to prevent ReDoS
// (CVE-2025-68475)
Expand All @@ -219,7 +277,7 @@ export async function getRemoteDocument(
/([a-z][a-z:_-]*)=(?:"([^"]*)"|'([^']*)'|([^\s>]+))/gi;

let tagMatch: RegExpExecArray | null;
while ((tagMatch = tagPattern.exec(html)) !== null) {
while ((tagMatch = tagPattern.exec(html.text)) !== null) {
const tagContent = tagMatch[2];
let attrMatch: RegExpExecArray | null;
const attribs: Record<string, string> = {};
Expand Down Expand Up @@ -247,7 +305,17 @@ export async function getRemoteDocument(
return await fetch(new URL(attribs.href, docUrl).href);
}
}
document = JSON.parse(html);
try {
document = JSON.parse(html.text);
} catch (error) {
if (!(error instanceof SyntaxError)) throw error;
throw new FetchError(
documentUrl,
`HTML document has no ActivityPub alternate link ` +
`(Content-Type: ${contentType})`,
errorResponse,
);
}
}
} else {
document = await response.json();
Expand Down
Loading