From 4f94d59cdaeb936b97a3527b033ef53ccf183a3c Mon Sep 17 00:00:00 2001 From: Bob Date: Sun, 8 Feb 2026 04:24:45 +0300 Subject: [PATCH 1/2] feat(crawl): add --max-age option for crawl requests --- src/__tests__/commands/crawl.test.ts | 22 ++++++++++++++++++++++ src/commands/crawl.ts | 6 ++++++ src/index.ts | 6 ++++++ src/types/crawl.ts | 2 ++ 4 files changed, 36 insertions(+) diff --git a/src/__tests__/commands/crawl.test.ts b/src/__tests__/commands/crawl.test.ts index 956bc1a..d15b5d3 100644 --- a/src/__tests__/commands/crawl.test.ts +++ b/src/__tests__/commands/crawl.test.ts @@ -247,6 +247,28 @@ describe('executeCrawl', () => { } ); }); + + it('should include scrapeOptions.maxAge when provided', async () => { + const mockResponse = { + id: '550e8400-e29b-41d4-a716-446655440000', + url: 'https://example.com', + }; + mockClient.startCrawl.mockResolvedValue(mockResponse); + + await executeCrawl({ + urlOrJobId: 'https://example.com', + maxAge: 172800000, + }); + + expect(mockClient.startCrawl).toHaveBeenCalledWith( + 'https://example.com', + { + scrapeOptions: { + maxAge: 172800000, + }, + } + ); + }); }); describe('Check crawl status', () => { diff --git a/src/commands/crawl.ts b/src/commands/crawl.ts index 54daaf6..b265953 100644 --- a/src/commands/crawl.ts +++ b/src/commands/crawl.ts @@ -92,6 +92,12 @@ export async function executeCrawl( if (options.maxConcurrency !== undefined) { crawlOptions.maxConcurrency = options.maxConcurrency; } + if (options.maxAge !== undefined) { + crawlOptions.scrapeOptions = { + ...(crawlOptions.scrapeOptions ?? {}), + maxAge: options.maxAge, + }; + } // If wait mode, use the convenience crawl method with polling if (wait) { diff --git a/src/index.ts b/src/index.ts index f3a571c..0810079 100644 --- a/src/index.ts +++ b/src/index.ts @@ -214,6 +214,11 @@ function createCrawlCommand(): Command { 'Maximum concurrent requests', parseInt ) + .option( + '--max-age ', + 'Maximum age of cached content in milliseconds', + parseInt + ) .option( '-k, --api-key ', 'Firecrawl API key (overrides global --api-key)' @@ -260,6 +265,7 @@ function createCrawlCommand(): Command { allowSubdomains: options.allowSubdomains, delay: options.delay, maxConcurrency: options.maxConcurrency, + maxAge: options.maxAge, }; await handleCrawlCommand(crawlOptions); diff --git a/src/types/crawl.ts b/src/types/crawl.ts index 5fc15e8..66e4645 100644 --- a/src/types/crawl.ts +++ b/src/types/crawl.ts @@ -45,6 +45,8 @@ export interface CrawlOptions { delay?: number; /** Maximum concurrency */ maxConcurrency?: number; + /** Maximum age of cached content in milliseconds (API-level caching) */ + maxAge?: number; } export interface CrawlResult { From 96bc291794f4df8ced05637e3aa500ae76ff6776 Mon Sep 17 00:00:00 2001 From: Bob Date: Sun, 8 Feb 2026 17:07:48 +0300 Subject: [PATCH 2/2] feat(crawl): add --only-main-content option for crawl requests --- src/__tests__/commands/crawl.test.ts | 22 ++++++++++++++++++++++ src/commands/crawl.ts | 6 ++++++ src/index.ts | 2 ++ src/types/crawl.ts | 2 ++ 4 files changed, 32 insertions(+) diff --git a/src/__tests__/commands/crawl.test.ts b/src/__tests__/commands/crawl.test.ts index d15b5d3..c8c2751 100644 --- a/src/__tests__/commands/crawl.test.ts +++ b/src/__tests__/commands/crawl.test.ts @@ -269,6 +269,28 @@ describe('executeCrawl', () => { } ); }); + + it('should include scrapeOptions.onlyMainContent when provided', async () => { + const mockResponse = { + id: '550e8400-e29b-41d4-a716-446655440000', + url: 'https://example.com', + }; + mockClient.startCrawl.mockResolvedValue(mockResponse); + + await executeCrawl({ + urlOrJobId: 'https://example.com', + onlyMainContent: true, + }); + + expect(mockClient.startCrawl).toHaveBeenCalledWith( + 'https://example.com', + { + scrapeOptions: { + onlyMainContent: true, + }, + } + ); + }); }); describe('Check crawl status', () => { diff --git a/src/commands/crawl.ts b/src/commands/crawl.ts index b265953..de4b199 100644 --- a/src/commands/crawl.ts +++ b/src/commands/crawl.ts @@ -98,6 +98,12 @@ export async function executeCrawl( maxAge: options.maxAge, }; } + if (options.onlyMainContent !== undefined) { + crawlOptions.scrapeOptions = { + ...(crawlOptions.scrapeOptions ?? {}), + onlyMainContent: options.onlyMainContent, + }; + } // If wait mode, use the convenience crawl method with polling if (wait) { diff --git a/src/index.ts b/src/index.ts index 0810079..ed9f41e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -219,6 +219,7 @@ function createCrawlCommand(): Command { 'Maximum age of cached content in milliseconds', parseInt ) + .option('--only-main-content', 'Include only main content', false) .option( '-k, --api-key ', 'Firecrawl API key (overrides global --api-key)' @@ -266,6 +267,7 @@ function createCrawlCommand(): Command { delay: options.delay, maxConcurrency: options.maxConcurrency, maxAge: options.maxAge, + onlyMainContent: options.onlyMainContent, }; await handleCrawlCommand(crawlOptions); diff --git a/src/types/crawl.ts b/src/types/crawl.ts index 66e4645..073f195 100644 --- a/src/types/crawl.ts +++ b/src/types/crawl.ts @@ -47,6 +47,8 @@ export interface CrawlOptions { maxConcurrency?: number; /** Maximum age of cached content in milliseconds (API-level caching) */ maxAge?: number; + /** Include only main content */ + onlyMainContent?: boolean; } export interface CrawlResult {