diff --git a/CHANGES.md b/CHANGES.md index 8bd936766..e6314c274 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -20,6 +20,17 @@ To be released. [#826]: https://github.com/fedify-dev/fedify/issues/826 [#850]: https://github.com/fedify-dev/fedify/pull/850 +### @fedify/vocab-runtime + + - Changed `getDocumentLoader()` to reject HTML and XHTML responses that do + not advertise an ActivityPub alternate document with a `FetchError` + instead of attempting to parse the HTML as JSON. This makes remote HTML + error pages surface as document loading failures with the response URL and + content type, rather than generic JSON parser crashes. [[#912], [#913]] + +[#912]: https://github.com/fedify-dev/fedify/issues/912 +[#913]: https://github.com/fedify-dev/fedify/pull/913 + Version 2.3.1 ------------- diff --git a/packages/vocab-runtime/src/docloader.test.ts b/packages/vocab-runtime/src/docloader.test.ts index e96d8c9ab..8ac8d719b 100644 --- a/packages/vocab-runtime/src/docloader.test.ts +++ b/packages/vocab-runtime/src/docloader.test.ts @@ -2,7 +2,7 @@ import fetchMock from "fetch-mock"; import { deepStrictEqual, ok, rejects } from "node:assert"; import { test } from "node:test"; import preloadedContexts from "./contexts.ts"; -import { getDocumentLoader } from "./docloader.ts"; +import { getDocumentLoader, getRemoteDocument } from "./docloader.ts"; import { FetchError } from "./request.ts"; import { UrlError } from "./url.ts"; @@ -90,7 +90,7 @@ test("getDocumentLoader()", async (t) => { type: "Object", }, headers: { - "Content-Type": "text/html; charset=utf-8", + "Content-Type": "application/activity+json", Link: '; rel="alternate"; ' + 'type="application/ld+json; profile="https://www.w3.org/ns/activitystreams""', }, @@ -247,6 +247,44 @@ test("getDocumentLoader()", async (t) => { }); }); + fetchMock.get("https://example.com/html-no-alternate", { + body: ` + + + Not an ActivityPub document + + Not found + `, + headers: { "Content-Type": "text/html; charset=utf-8" }, + }); + + await t.test("HTML without ActivityPub alternate link", async () => { + await rejects( + () => fetchDocumentLoader("https://example.com/html-no-alternate"), + (error) => { + ok(error instanceof FetchError); + ok( + error.message.includes( + "HTML document has no ActivityPub alternate link", + ), + ); + ok( + error.message.includes("Content-Type: text/html; charset=utf-8"), + ); + deepStrictEqual( + error.url, + new URL("https://example.com/html-no-alternate"), + ); + ok(error.response != null); + deepStrictEqual( + error.response.headers.get("Content-Type"), + "text/html; charset=utf-8", + ); + return true; + }, + ); + }); + fetchMock.get("https://example.com/wrong-content-type", { body: { "@context": "https://www.w3.org/ns/activitystreams", @@ -257,7 +295,7 @@ test("getDocumentLoader()", async (t) => { headers: { "Content-Type": "text/html; charset=utf-8" }, }); - await t.test("Wrong Content-Type", async () => { + await t.test("wrong Content-Type with JSON body", async () => { deepStrictEqual( await fetchDocumentLoader("https://example.com/wrong-content-type"), { @@ -273,6 +311,64 @@ test("getDocumentLoader()", async (t) => { ); }); + fetchMock.get("https://example.com/large-html", { + body: "", + headers: { + "Content-Length": String(1024 * 1024 + 1), + "Content-Type": "text/html; charset=utf-8", + }, + }); + + await t.test("HTML Content-Length over limit", async () => { + await rejects( + () => fetchDocumentLoader("https://example.com/large-html"), + (error) => { + ok(error instanceof FetchError); + ok( + error.message.includes( + "HTML document is too large to scan for an ActivityPub alternate link", + ), + ); + ok(error.response != null); + deepStrictEqual(error.response.status, 200); + deepStrictEqual( + error.response.headers.get("Content-Type"), + "text/html; charset=utf-8", + ); + return true; + }, + ); + }); + + await t.test("HTML Content-Length over limit cancels body", async () => { + let canceled = false; + const response = new Response("", { + headers: { + "Content-Length": String(1024 * 1024 + 1), + "Content-Type": "text/html; charset=utf-8", + }, + }); + Object.defineProperty(response, "body", { + value: { + cancel: () => { + canceled = true; + }, + }, + }); + await rejects( + () => + getRemoteDocument( + "https://example.com/large-html-cancel", + response, + () => { + throw new Error("unexpected alternate fetch"); + }, + ), + FetchError, + ); + deepStrictEqual(canceled, true); + }); + fetchMock.get("https://example.com/404", { status: 404 }); await t.test("not ok", async () => { @@ -459,11 +555,11 @@ test("getDocumentLoader()", async (t) => { await t.test("ReDoS resistance (CVE-2025-68475)", async () => { const start = performance.now(); - // The malicious HTML will fail JSON parsing, but the important thing is - // that it should complete quickly (not hang due to ReDoS) + // The malicious HTML will fail alternate discovery, but the important + // thing is that it should complete quickly (not hang due to ReDoS). await rejects( () => fetchDocumentLoader("https://example.com/redos"), - SyntaxError, + FetchError, ); const elapsed = performance.now() - start; diff --git a/packages/vocab-runtime/src/docloader.ts b/packages/vocab-runtime/src/docloader.ts index 0cca8d6bf..f8a6966d1 100644 --- a/packages/vocab-runtime/src/docloader.ts +++ b/packages/vocab-runtime/src/docloader.ts @@ -13,6 +13,7 @@ import { UrlError, validatePublicUrl } from "./url.ts"; const logger = getLogger(["fedify", "runtime", "docloader"]); const DEFAULT_MAX_REDIRECTION = 20; +const MAX_HTML_SIZE = 1024 * 1024; // 1MB /** * A remote JSON-LD document and its context fetched by @@ -112,6 +113,59 @@ export type AuthenticatedDocumentLoaderFactory = ( options?: DocumentLoaderFactoryOptions, ) => DocumentLoader; +function createResponseMetadata(response: Response): Response { + return new Response(null, { + headers: response.headers, + status: response.status, + statusText: response.statusText, + }); +} + +async function cancelResponseBody(response: Response): Promise { + if (response.body != null) { + await response.body.cancel(); + } +} + +async function readBoundedText( + response: Response, + maxBytes: number, +): Promise<{ text: string; size: number; tooLarge: boolean }> { + const contentLength = response.headers.get("Content-Length"); + if (contentLength != null) { + const size = Number(contentLength); + if (size > maxBytes) { + await cancelResponseBody(response); + return { text: "", size, tooLarge: true }; + } + } + + if (response.body == null) return { text: "", size: 0, tooLarge: false }; + + const reader = response.body.getReader(); + const decoder = new TextDecoder(); + let text = ""; + let size = 0; + try { + while (true) { + const result = await reader.read(); + if (result.done) break; + const chunkSize = result.value.byteLength; + if (size + chunkSize > maxBytes) { + size += chunkSize; + await reader.cancel(); + return { text: "", size, tooLarge: true }; + } + size += chunkSize; + text += decoder.decode(result.value, { stream: true }); + } + text += decoder.decode(); + return { text, size, tooLarge: false }; + } finally { + reader.releaseLock(); + } +} + /** * Gets a {@link RemoteDocument} from the given response. * @param url The URL of the document to load. @@ -200,15 +254,19 @@ export async function getRemoteDocument( contentType === "application/xhtml+xml" || contentType?.startsWith("application/xhtml+xml;")) ) { - // Security: Limit HTML response size to mitigate ReDoS attacks - const MAX_HTML_SIZE = 1024 * 1024; // 1MB - const html = await response.text(); - if (html.length > MAX_HTML_SIZE) { + const errorResponse = createResponseMetadata(response); + const html = await readBoundedText(response, MAX_HTML_SIZE); + if (html.tooLarge) { logger.warn( "HTML response too large, skipping alternate link discovery: {url}", - { url: documentUrl, size: html.length }, + { url: documentUrl, size: html.size }, + ); + throw new FetchError( + documentUrl, + `HTML document is too large to scan for an ActivityPub alternate link ` + + `(Content-Type: ${contentType})`, + errorResponse, ); - document = JSON.parse(html); } else { // Safe regex patterns without nested quantifiers to prevent ReDoS // (CVE-2025-68475) @@ -219,7 +277,7 @@ export async function getRemoteDocument( /([a-z][a-z:_-]*)=(?:"([^"]*)"|'([^']*)'|([^\s>]+))/gi; let tagMatch: RegExpExecArray | null; - while ((tagMatch = tagPattern.exec(html)) !== null) { + while ((tagMatch = tagPattern.exec(html.text)) !== null) { const tagContent = tagMatch[2]; let attrMatch: RegExpExecArray | null; const attribs: Record = {}; @@ -247,7 +305,17 @@ export async function getRemoteDocument( return await fetch(new URL(attribs.href, docUrl).href); } } - document = JSON.parse(html); + try { + document = JSON.parse(html.text); + } catch (error) { + if (!(error instanceof SyntaxError)) throw error; + throw new FetchError( + documentUrl, + `HTML document has no ActivityPub alternate link ` + + `(Content-Type: ${contentType})`, + errorResponse, + ); + } } } else { document = await response.json();