diff --git a/src/__tests__/commands/crawl.test.ts b/src/__tests__/commands/crawl.test.ts index 956bc1a..c8c2751 100644 --- a/src/__tests__/commands/crawl.test.ts +++ b/src/__tests__/commands/crawl.test.ts @@ -247,6 +247,50 @@ describe('executeCrawl', () => { } ); }); + + it('should include scrapeOptions.maxAge when provided', async () => { + const mockResponse = { + id: '550e8400-e29b-41d4-a716-446655440000', + url: 'https://example.com', + }; + mockClient.startCrawl.mockResolvedValue(mockResponse); + + await executeCrawl({ + urlOrJobId: 'https://example.com', + maxAge: 172800000, + }); + + expect(mockClient.startCrawl).toHaveBeenCalledWith( + 'https://example.com', + { + scrapeOptions: { + maxAge: 172800000, + }, + } + ); + }); + + it('should include scrapeOptions.onlyMainContent when provided', async () => { + const mockResponse = { + id: '550e8400-e29b-41d4-a716-446655440000', + url: 'https://example.com', + }; + mockClient.startCrawl.mockResolvedValue(mockResponse); + + await executeCrawl({ + urlOrJobId: 'https://example.com', + onlyMainContent: true, + }); + + expect(mockClient.startCrawl).toHaveBeenCalledWith( + 'https://example.com', + { + scrapeOptions: { + onlyMainContent: true, + }, + } + ); + }); }); describe('Check crawl status', () => { diff --git a/src/commands/crawl.ts b/src/commands/crawl.ts index 54daaf6..de4b199 100644 --- a/src/commands/crawl.ts +++ b/src/commands/crawl.ts @@ -92,6 +92,18 @@ export async function executeCrawl( if (options.maxConcurrency !== undefined) { crawlOptions.maxConcurrency = options.maxConcurrency; } + if (options.maxAge !== undefined) { + crawlOptions.scrapeOptions = { + ...(crawlOptions.scrapeOptions ?? {}), + maxAge: options.maxAge, + }; + } + if (options.onlyMainContent !== undefined) { + crawlOptions.scrapeOptions = { + ...(crawlOptions.scrapeOptions ?? {}), + onlyMainContent: options.onlyMainContent, + }; + } // If wait mode, use the convenience crawl method with polling if (wait) { diff --git a/src/index.ts b/src/index.ts index f3a571c..ed9f41e 100644 --- a/src/index.ts +++ b/src/index.ts @@ -214,6 +214,12 @@ function createCrawlCommand(): Command { 'Maximum concurrent requests', parseInt ) + .option( + '--max-age ', + 'Maximum age of cached content in milliseconds', + parseInt + ) + .option('--only-main-content', 'Include only main content', false) .option( '-k, --api-key ', 'Firecrawl API key (overrides global --api-key)' @@ -260,6 +266,8 @@ function createCrawlCommand(): Command { allowSubdomains: options.allowSubdomains, delay: options.delay, maxConcurrency: options.maxConcurrency, + maxAge: options.maxAge, + onlyMainContent: options.onlyMainContent, }; await handleCrawlCommand(crawlOptions); diff --git a/src/types/crawl.ts b/src/types/crawl.ts index 5fc15e8..073f195 100644 --- a/src/types/crawl.ts +++ b/src/types/crawl.ts @@ -45,6 +45,10 @@ export interface CrawlOptions { delay?: number; /** Maximum concurrency */ maxConcurrency?: number; + /** Maximum age of cached content in milliseconds (API-level caching) */ + maxAge?: number; + /** Include only main content */ + onlyMainContent?: boolean; } export interface CrawlResult {