Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 56 additions & 11 deletions src/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -21,20 +21,63 @@ const exit = require('./exit')
const microlinkUrl = () =>
/^https?:\/\/((?!fonts|geolocation\.)[a-z0-9-]+\.)+microlink\.io/

const normalizeInput = input => {
// Leading-host matcher for the binary's own endpoint (e.g. `microlink-dev` →
// `http://localhost:3000`, `microlink-next` → `https://next.microlink.io`).
// Each binary sets its own `cli.flags.endpoint`, so the host we strip is driven
// by that flag rather than a blanket list shared across every executable.
const endpointUrl = endpoint => {
if (!endpoint) return null
let host
try {
;({ host } = new URL(endpoint))
} catch {
return null
}
// Require a host boundary (path/query/fragment or end-of-string) after the
// host so a longer host that merely *starts with* it isn't matched (e.g.
// endpoint `localhost:3000` must not strip a prefix of `localhost:30001`).
const escapedHost = host.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
return new RegExp(`^https?://${escapedHost}(?=[/?#]|$)`)
}

const normalizeInput = (input, endpoint) => {
if (!input) return input
let normalized = input
const sanitizers = [
microlinkUrl,
() => require('is-local-address/ipv4').regex,
() => require('is-local-address/ipv6').regex
]

for (const createRegex of sanitizers) {
normalized = normalized.replace(createRegex(), '')

// Recognize API-shaped input (full endpoint URL, or a bare `?url=`/`url=`
// query string) so a bare target URL is left untouched.
let isApiInput = /^\??url=/.test(input) || input.startsWith('?')

// Always strip a canonical `*.microlink.io` host (users paste these into any
// binary), plus the binary's own endpoint host when it isn't on microlink.io.
const sanitizers = [microlinkUrl()]
const endpointRegex = endpointUrl(endpoint)
if (endpointRegex) sanitizers.push(endpointRegex)

for (const regex of sanitizers) {
const next = normalized.replace(regex, '')
if (next !== normalized) {
isApiInput = true
normalized = next
}
}

return normalized.replace(/^\??url=/, '')
if (!isApiInput) return normalized

// Drop the leftover path/query separators after the host (e.g. `/?url=…`).
normalized = normalized.replace(/^\/+/, '').replace(/^\?/, '')

// Lift the `url=` param to the front so the caller's `url=${…}`
// reconstruction keeps every other param intact regardless of their order
// (e.g. `data.markdown.attr=markdown&embed=markdown&url=…`).
const params = normalized.split('&')
const urlIndex = params.findIndex(p => p === 'url' || p.startsWith('url='))
if (urlIndex === -1) return normalized

const urlValue = params[urlIndex].replace(/^url=?/, '')
return [urlValue, ...params.filter((_, index) => index !== urlIndex)].join(
'&'
)
}

const getInput = input => {
Expand All @@ -55,7 +98,9 @@ const fetch = async (cli, gotOpts) => {
} = cli.flags
const isJson = json || jsonFull
const input = getInput(cli.input, endpoint)
const { url, ...queryParams } = toPlainObject(`url=${normalizeInput(input)}`)
const { url, ...queryParams } = toPlainObject(
`url=${normalizeInput(input, endpoint)}`
)
const mqlOpts = { endpoint, ...queryParams, ...flags }
const spinner = printText.spinner()
const shouldSpin = !isJson && pretty
Expand Down
74 changes: 74 additions & 0 deletions test/normalize-input.js
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,77 @@ test('clean API endpoint', t => {
'https://example.com&force&ping.url=false'
)
})

test('endpoint with trailing slash', t => {
t.is(
normalizeInput(
'https://api.microlink.io/?url=https://example.com&embed=markdown'
),
'https://example.com&embed=markdown'
)
})

test('params before url= are preserved (url lifted to front)', t => {
t.is(
normalizeInput(
'https://api.microlink.io/?data.markdown.attr=markdown&embed=markdown&url=https://example.com'
),
'https://example.com&data.markdown.attr=markdown&embed=markdown'
)
})

test('bare target URL is left untouched', t => {
t.is(
normalizeInput('https://www.msn.com/en-gb/news/ar-AA23ijAE'),
'https://www.msn.com/en-gb/news/ar-AA23ijAE'
)
})

test('endpoint host is stripped when passed (microlink-dev / microlink-next)', t => {
t.is(
normalizeInput(
'http://localhost:3000/?data.markdown.attr=markdown&url=https://example.com',
'http://localhost:3000'
),
'https://example.com&data.markdown.attr=markdown'
)
t.is(
normalizeInput(
'https://next.microlink.io/?url=https://example.com&embed=markdown',
'https://next.microlink.io'
),
'https://example.com&embed=markdown'
)
})

test('a pasted *.microlink.io URL is normalized by any binary', t => {
// e.g. pasting a prod api URL into microlink-dev (endpoint = localhost)
t.is(
normalizeInput(
'https://api.microlink.io/?url=https://example.com&embed=markdown',
'http://localhost:3000'
),
'https://example.com&embed=markdown'
)
})

test('endpoint host is not stripped from a host that merely starts with it', t => {
// endpoint `localhost:3000` must not strip the prefix of `localhost:30001`
// (a different port), which would leave corrupted `1/?url=…` remainder text
t.is(
normalizeInput(
'http://localhost:30001/?url=https://example.com',
'http://localhost:3000'
),
'http://localhost:30001/?url=https://example.com'
)
})

test('endpoint host is NOT stripped by an unrelated binary', t => {
// a localhost input given to the prod binary (no endpoint) stays as a bare
// target URL — localhost stripping is scoped to the binary that targets it
t.is(
normalizeInput('http://localhost:3000/?url=https://example.com'),
'http://localhost:3000/?url=https://example.com'
)
})