From c3db5add0678971dea0d15575724e5766aaba47d Mon Sep 17 00:00:00 2001 From: Kiko Beats Date: Wed, 1 Jul 2026 14:03:51 +0200 Subject: [PATCH] fix: treat microlink.io content hosts as valid target URLs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A URL like https://cdn.microlink.io/file-examples/sample.docx was having its host stripped as if it were an API endpoint, leaving an invalid bare path (file-examples/sample.docx → EINVALURLCLIENT). Host stripping now only runs for API-shaped input, which always carries a `url=` query. A bare target URL on a microlink.io content host (cdn, etc.) has none, so it is left untouched. Co-Authored-By: Claude Opus 4.8 --- src/api.js | 27 ++++++++++++++++----------- test/normalize-input.js | 17 +++++++++++++++++ 2 files changed, 33 insertions(+), 11 deletions(-) diff --git a/src/api.js b/src/api.js index 04dfb6f..c91ec5d 100644 --- a/src/api.js +++ b/src/api.js @@ -48,17 +48,22 @@ const normalizeInput = (input, endpoint) => { // query string) so a bare target URL is left untouched. let isApiInput = /^\??url=/.test(input) || input.startsWith('?') - // Always strip a canonical `*.microlink.io` host (users paste these into any - // binary), plus the binary's own endpoint host when it isn't on microlink.io. - const sanitizers = [microlinkUrl()] - const endpointRegex = endpointUrl(endpoint) - if (endpointRegex) sanitizers.push(endpointRegex) - - for (const regex of sanitizers) { - const next = normalized.replace(regex, '') - if (next !== normalized) { - isApiInput = true - normalized = next + // Host stripping only applies to an actual API request, which always carries a + // `url=` query. A bare target URL — even on a microlink.io content host such as + // cdn.microlink.io — has none, so its host must survive untouched. + if (/(?:^|[?&])url=/.test(input)) { + // Strip a canonical `*.microlink.io` host (users paste these into any + // binary), plus the binary's own endpoint host when it isn't on microlink.io. + const sanitizers = [microlinkUrl()] + const endpointRegex = endpointUrl(endpoint) + if (endpointRegex) sanitizers.push(endpointRegex) + + for (const regex of sanitizers) { + const next = normalized.replace(regex, '') + if (next !== normalized) { + isApiInput = true + normalized = next + } } } diff --git a/test/normalize-input.js b/test/normalize-input.js index 3f33fb3..a195dd3 100644 --- a/test/normalize-input.js +++ b/test/normalize-input.js @@ -52,6 +52,23 @@ test('bare target URL is left untouched', t => { ) }) +test('a microlink.io content host is a valid target, not an API host', t => { + // cdn.microlink.io (and other content hosts) carry no `url=` param, so the + // host must not be stripped as if it were an API endpoint + t.is( + normalizeInput('https://cdn.microlink.io/file-examples/sample.docx'), + 'https://cdn.microlink.io/file-examples/sample.docx' + ) + // even given a dev endpoint, and with appended flags + t.is( + normalizeInput( + 'https://cdn.microlink.io/file-examples/sample.docx&pdf=true&meta=false', + 'http://localhost:3000' + ), + 'https://cdn.microlink.io/file-examples/sample.docx&pdf=true&meta=false' + ) +}) + test('endpoint host is stripped when passed (microlink-dev / microlink-next)', t => { t.is( normalizeInput(