diff --git a/README.md b/README.md index 94ef4b6..1036d69 100644 --- a/README.md +++ b/README.md @@ -256,6 +256,18 @@ decodo google-search "shoes" --geo de --parse Use `decodo --help` for all geo, locale, and target-specific options from the API schema. +### Request timeouts + +Requests use the SDK default timeout of 180s. Override it globally with `--timeout ` — useful to fail fast on slow targets or batch runs. A timed-out request exits with code `6`. + +```bash +# Fail fast: give up after 30s +decodo scrape https://example.com --timeout 30000 + +# Allow longer for a heavy page render +decodo screenshot https://example.com --timeout 120000 -o shot.png +``` + ## Agent tooling Coding agents (Cursor, Claude Code, Codex, Gemini CLI, Windsurf) should invoke the CLI as a **shell subprocess**, not embed scraping logic. diff --git a/src/cli/services/global-opts.ts b/src/cli/services/global-opts.ts index 3867460..f99a774 100644 --- a/src/cli/services/global-opts.ts +++ b/src/cli/services/global-opts.ts @@ -1,6 +1,7 @@ import type { Command } from "commander"; export interface RootOptions { + timeout?: number; token?: string; verbose?: boolean; } diff --git a/src/cli/services/parse-timeout.ts b/src/cli/services/parse-timeout.ts new file mode 100644 index 0000000..85bd60b --- /dev/null +++ b/src/cli/services/parse-timeout.ts @@ -0,0 +1,11 @@ +import { CliUsageError } from "../../platform/services/handle-cli-error.js"; + +export function parseTimeout(value: string): number { + const parsed = Number.parseInt(value, 10); + if (Number.isNaN(parsed) || parsed < 1) { + throw new CliUsageError( + "--timeout must be a positive integer (milliseconds)." + ); + } + return parsed; +} diff --git a/src/index.ts b/src/index.ts index 537f2cf..b6c1b8c 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,6 +5,7 @@ import { fileURLToPath } from "node:url"; import { Command } from "commander"; import { createCommands } from "./cli/register.js"; import { configureCommanderExit } from "./cli/services/configure-commander-exit.js"; +import { parseTimeout } from "./cli/services/parse-timeout.js"; import { handleCliError } from "./platform/services/handle-cli-error.js"; function readVersion(): string { @@ -27,6 +28,11 @@ const program = new Command() .option( "--token ", "Basic auth token (overrides DECODO_AUTH_TOKEN and saved config)" + ) + .option( + "--timeout ", + "Per-request timeout in milliseconds (default: 180000)", + parseTimeout ); async function main(): Promise { diff --git a/src/scrape/services/client.ts b/src/scrape/services/client.ts index 5870be2..2729b90 100644 --- a/src/scrape/services/client.ts +++ b/src/scrape/services/client.ts @@ -2,10 +2,12 @@ import { DecodoClient, type DecodoSchema } from "@decodo/sdk-ts"; export function createDecodoClient( token: string, - schema?: DecodoSchema + schema?: DecodoSchema, + timeoutMs?: number ): DecodoClient { return new DecodoClient({ webScrapingApi: { token }, schema, + timeoutMs, }); } diff --git a/src/scrape/services/run-target-scrape.ts b/src/scrape/services/run-target-scrape.ts index 5ef7aae..fe0b13b 100644 --- a/src/scrape/services/run-target-scrape.ts +++ b/src/scrape/services/run-target-scrape.ts @@ -32,8 +32,9 @@ async function executeScrape({ outputContext, input, verbose = false, + timeoutMs, }: ExecuteScrapeOptions): Promise { - const client = createDecodoClient(token, schema); + const client = createDecodoClient(token, schema, timeoutMs); const startedAt = Date.now(); const response = await client.webScrapingApi.scrape( body as unknown as ScrapeRequest @@ -52,6 +53,7 @@ interface ExecuteBatchOptions { options: Record; resolveBody: ScrapeBodyBuilder; schema: DecodoSchema; + timeoutMs?: number; token: string; verbose: boolean; } @@ -63,8 +65,9 @@ async function executeBatch({ resolveBody, binary, verbose, + timeoutMs, }: ExecuteBatchOptions): Promise { - const client = createDecodoClient(token, schema); + const client = createDecodoClient(token, schema, timeoutMs); const batch = options as BatchFlags & OutputOptions; const full = batch.full === true; @@ -107,6 +110,7 @@ export function createTargetAction( ): Promise => { const rootOpts = getRootOpts(command); const verbose = rootOpts.verbose === true; + const timeoutMs = rootOpts.timeout; try { const batchMode = (options as BatchFlags).inputFile !== undefined; @@ -134,6 +138,7 @@ export function createTargetAction( resolveBody, binary: outputContext?.binary?.kind === "png", verbose, + timeoutMs, }); return; } @@ -149,6 +154,7 @@ export function createTargetAction( outputContext, input, verbose, + timeoutMs, }); } catch (err) { handleCliError(err, { fallbackMessage: "Scrape failed." }); diff --git a/src/scrape/types/run-target-scrape.ts b/src/scrape/types/run-target-scrape.ts index b974b5f..e8ebc99 100644 --- a/src/scrape/types/run-target-scrape.ts +++ b/src/scrape/types/run-target-scrape.ts @@ -7,6 +7,7 @@ export interface ExecuteScrapeOptions { options: Record; outputContext?: Partial; schema: DecodoSchema; + timeoutMs?: number; token: string; verbose?: boolean; } diff --git a/tests/cli/services/parse-timeout.test.ts b/tests/cli/services/parse-timeout.test.ts new file mode 100644 index 0000000..95d5c85 --- /dev/null +++ b/tests/cli/services/parse-timeout.test.ts @@ -0,0 +1,13 @@ +import { describe, expect, it } from "vitest"; +import { parseTimeout } from "../../../src/cli/services/parse-timeout.js"; +import { CliUsageError } from "../../../src/platform/services/handle-cli-error.js"; + +describe("parseTimeout", () => { + it("parses a positive integer of milliseconds", () => { + expect(parseTimeout("5000")).toBe(5000); + }); + + it.each(["0", "-1", "abc", ""])("rejects %p as a usage error", (value) => { + expect(() => parseTimeout(value)).toThrow(CliUsageError); + }); +}); diff --git a/tests/index.test.ts b/tests/index.test.ts index 27f3849..99c172f 100644 --- a/tests/index.test.ts +++ b/tests/index.test.ts @@ -32,12 +32,13 @@ describe("cli", () => { expect(output).toBe(packageJson.version); }); - it("shows verbose flag in root help", () => { + it("shows verbose and timeout flags in root help", () => { const output = execFileSync(process.execPath, [cliPath, "--help"], { encoding: "utf8", }); expect(output).toContain("-v, --verbose"); + expect(output).toContain("--timeout "); }); it.each([ @@ -45,6 +46,7 @@ describe("cli", () => { ["unknown command", ["nosuchcmd"], 2], ["missing required arg", ["search"], 2], ["invalid choice", ["search", "q", "--engine", "yahoo"], 2], + ["invalid timeout", ["--timeout", "0", "scrape", "https://x.com"], 2], ])("exits with code 2 on %s", (_label, args, expectedExit) => { const { exitCode } = runCli(args); expect(exitCode).toBe(expectedExit); diff --git a/tests/scrape/services/run-target-scrape.test.ts b/tests/scrape/services/run-target-scrape.test.ts index 178a921..cd0031b 100644 --- a/tests/scrape/services/run-target-scrape.test.ts +++ b/tests/scrape/services/run-target-scrape.test.ts @@ -85,11 +85,44 @@ describe("createTargetAction", () => { }); expect(createDecodoClient).toHaveBeenCalledWith( "test-token", - BundledSchema.shared + BundledSchema.shared, + undefined ); expect(stdout).toBe('{"ok":true}\n'); }); + it("passes the global --timeout through to the client", async () => { + const scrape = vi.fn().mockResolvedValue({ + results: [{ content: { ok: true } }], + }); + vi.mocked(createDecodoClient).mockReturnValue({ + webScrapingApi: { scrape }, + } as never); + + const googleSearch = new Command("google-search") + .argument("") + .action(createTargetAction("google_search", BundledSchema.shared)); + attachScrapeOutputOptions(googleSearch); + + const program = new Command() + .option("--token ") + .option("--timeout ", "", (value: string) => + Number.parseInt(value, 10) + ) + .addCommand(googleSearch); + + await program.parseAsync( + ["google-search", "coffee", "--token", "test-token", "--timeout", "5000"], + { from: "user" } + ); + + expect(createDecodoClient).toHaveBeenCalledWith( + "test-token", + BundledSchema.shared, + 5000 + ); + }); + it("prints verbose logs to stderr when --verbose is set", async () => { const scrape = vi.fn().mockResolvedValue({ results: [{ content: { ok: true } }],