Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 16 additions & 11 deletions src/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,17 +48,22 @@ const normalizeInput = (input, endpoint) => {
// query string) so a bare target URL is left untouched.
let isApiInput = /^\??url=/.test(input) || input.startsWith('?')

// Always strip a canonical `*.microlink.io` host (users paste these into any
// binary), plus the binary's own endpoint host when it isn't on microlink.io.
const sanitizers = [microlinkUrl()]
const endpointRegex = endpointUrl(endpoint)
if (endpointRegex) sanitizers.push(endpointRegex)

for (const regex of sanitizers) {
const next = normalized.replace(regex, '')
if (next !== normalized) {
isApiInput = true
normalized = next
// Host stripping only applies to an actual API request, which always carries a
// `url=` query. A bare target URL — even on a microlink.io content host such as
// cdn.microlink.io — has none, so its host must survive untouched.
if (/(?:^|[?&])url=/.test(input)) {
// Strip a canonical `*.microlink.io` host (users paste these into any
// binary), plus the binary's own endpoint host when it isn't on microlink.io.
const sanitizers = [microlinkUrl()]
const endpointRegex = endpointUrl(endpoint)
if (endpointRegex) sanitizers.push(endpointRegex)

for (const regex of sanitizers) {
const next = normalized.replace(regex, '')
if (next !== normalized) {
isApiInput = true
normalized = next
}
}
}

Expand Down
17 changes: 17 additions & 0 deletions test/normalize-input.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,23 @@ test('bare target URL is left untouched', t => {
)
})

test('a microlink.io content host is a valid target, not an API host', t => {
// cdn.microlink.io (and other content hosts) carry no `url=` param, so the
// host must not be stripped as if it were an API endpoint
t.is(
normalizeInput('https://cdn.microlink.io/file-examples/sample.docx'),
'https://cdn.microlink.io/file-examples/sample.docx'
)
// even given a dev endpoint, and with appended flags
t.is(
normalizeInput(
'https://cdn.microlink.io/file-examples/sample.docx&pdf=true&meta=false',
'http://localhost:3000'
),
'https://cdn.microlink.io/file-examples/sample.docx&pdf=true&meta=false'
)
})

test('endpoint host is stripped when passed (microlink-dev / microlink-next)', t => {
t.is(
normalizeInput(
Expand Down