diff --git a/cli-manifest.json b/cli-manifest.json index bfdb71e..0551c19 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -17096,6 +17096,204 @@ "sourceFile": "indeed/search.js", "navigateBefore": false }, + { + "site": "instacart", + "name": "categories", + "description": "Visible Instacart collection links for a retailer", + "access": "read", + "domain": "www.instacart.com", + "strategy": "ui", + "browser": true, + "args": [ + { + "name": "retailer", + "type": "str", + "required": true, + "positional": true, + "help": "Retailer slug, for example sprouts or costco" + }, + { + "name": "limit", + "type": "int", + "default": 10, + "required": false, + "help": "Maximum collections to return (1-30)" + } + ], + "columns": [ + "rank", + "slug", + "name", + "url" + ], + "type": "js", + "modulePath": "instacart/categories.js", + "sourceFile": "instacart/categories.js", + "navigateBefore": true, + "siteSession": "persistent" + }, + { + "site": "instacart", + "name": "collection", + "description": "Visible Instacart product cards from a retailer collection", + "access": "read", + "domain": "www.instacart.com", + "strategy": "ui", + "browser": true, + "args": [ + { + "name": "retailer", + "type": "str", + "required": true, + "positional": true, + "help": "Retailer slug, for example sprouts or costco" + }, + { + "name": "collection", + "type": "str", + "required": true, + "positional": true, + "help": "Collection slug, for example produce or fresh-fruits" + }, + { + "name": "limit", + "type": "int", + "default": 10, + "required": false, + "help": "Maximum products to return (1-30)" + } + ], + "columns": [ + "rank", + "productId", + "title", + "priceText", + "originalPriceText", + "discount", + "size", + "stock", + "url" + ], + "type": "js", + "modulePath": "instacart/collection.js", + "sourceFile": "instacart/collection.js", + "navigateBefore": true, + "siteSession": "persistent" + }, + { + "site": "instacart", + "name": "product", + "description": "Visible Instacart product detail by product URL or id", + "access": "read", + "domain": "www.instacart.com", + "strategy": "ui", + "browser": true, + "args": [ + { + "name": "product", + "type": "str", + "required": true, + "positional": true, + "help": "Instacart product URL or numeric product id" + }, + { + "name": "retailer", + "type": "string", + "default": "", + "required": false, + "help": "Retailer slug required when product is a numeric id, for example sprouts" + } + ], + "columns": [ + "productId", + "title", + "priceText", + "originalPriceText", + "discount", + "size", + "stock", + "retailer", + "url" + ], + "type": "js", + "modulePath": "instacart/product.js", + "sourceFile": "instacart/product.js", + "navigateBefore": true, + "siteSession": "persistent" + }, + { + "site": "instacart", + "name": "storefront", + "description": "Visible Instacart product cards from a retailer storefront", + "access": "read", + "domain": "www.instacart.com", + "strategy": "ui", + "browser": true, + "args": [ + { + "name": "retailer", + "type": "str", + "required": true, + "positional": true, + "help": "Retailer slug, for example sprouts or costco" + }, + { + "name": "limit", + "type": "int", + "default": 10, + "required": false, + "help": "Maximum products to return (1-30)" + } + ], + "columns": [ + "rank", + "productId", + "title", + "priceText", + "originalPriceText", + "discount", + "size", + "stock", + "url" + ], + "type": "js", + "modulePath": "instacart/storefront.js", + "sourceFile": "instacart/storefront.js", + "navigateBefore": true, + "siteSession": "persistent" + }, + { + "site": "instacart", + "name": "stores", + "description": "Visible Instacart nearby stores from the public marketplace page", + "access": "read", + "domain": "www.instacart.com", + "strategy": "ui", + "browser": true, + "args": [ + { + "name": "limit", + "type": "int", + "default": 10, + "required": false, + "help": "Maximum stores to return (1-30)" + } + ], + "columns": [ + "rank", + "slug", + "name", + "delivery", + "pickup", + "tags", + "url" + ], + "type": "js", + "modulePath": "instacart/stores.js", + "sourceFile": "instacart/stores.js", + "navigateBefore": true, + "siteSession": "persistent" + }, { "site": "instagram", "name": "collection-create", diff --git a/clis/instacart/categories.js b/clis/instacart/categories.js new file mode 100644 index 0000000..64dd252 --- /dev/null +++ b/clis/instacart/categories.js @@ -0,0 +1,38 @@ +import { AuthRequiredError, CommandExecutionError, EmptyResultError } from '@agentrhq/webcmd/errors'; +import { cli, Strategy } from '@agentrhq/webcmd/registry'; +import { BASE_URL, HOST, buildExtractCollectionsScript, gotoInstacartPage, normalizeRetailer, parseLimit } from './utils.js'; + +cli({ + site: 'instacart', + name: 'categories', + access: 'read', + description: 'Visible Instacart collection links for a retailer', + domain: HOST, + strategy: Strategy.UI, + siteSession: 'persistent', + args: [ + { name: 'retailer', positional: true, required: true, help: 'Retailer slug, for example sprouts or costco' }, + { name: 'limit', type: 'int', default: 10, help: 'Maximum collections to return (1-30)' }, + ], + columns: ['rank', 'slug', 'name', 'url'], + func: async (page, kwargs) => { + const retailer = normalizeRetailer(kwargs.retailer); + const limit = parseLimit(kwargs.limit); + await gotoInstacartPage(page, `${BASE_URL}/store/${retailer}/storefront`, 2500); + await page.wait({ selector: `a[href*="/store/${retailer}/collections/"]`, timeout: 8 }).catch(async () => { + await page.wait(3); + }); + const rows = await page.evaluate(buildExtractCollectionsScript(retailer, limit)); + if (!Array.isArray(rows)) { + throw new CommandExecutionError('Instacart categories extraction returned an unreadable response'); + } + const pageText = await page.evaluate('(() => String(document.body?.innerText || document.body?.textContent || "").slice(0, 2000))()'); + if (/log in to continue|sign up to continue|verify you are human|captcha/i.test(String(pageText))) { + throw new AuthRequiredError(HOST, 'Instacart requires browser access. Open Instacart in CloakBrowser, clear any prompt, then rerun.'); + } + if (!rows.length) { + throw new EmptyResultError('instacart categories', `No visible collection links were found for retailer "${retailer}". Try a retailer from \`webcmd instacart stores\`.`); + } + return rows; + }, +}); diff --git a/clis/instacart/collection.js b/clis/instacart/collection.js new file mode 100644 index 0000000..c600c32 --- /dev/null +++ b/clis/instacart/collection.js @@ -0,0 +1,68 @@ +import { AuthRequiredError, CommandExecutionError, EmptyResultError } from '@agentrhq/webcmd/errors'; +import { cli, Strategy } from '@agentrhq/webcmd/registry'; +import { BASE_URL, HOST, buildExtractProductsScript, gotoInstacartPage, normalizeCollection, normalizeRetailer, parseLimit } from './utils.js'; + +async function ensureCollectionRoute(page, retailer, collection) { + const path = `/store/${retailer}/collections/${collection}`; + const currentUrl = await page.evaluate('window.location.href').catch(() => ''); + if (String(currentUrl).includes(path)) return; + + await gotoInstacartPage(page, `${BASE_URL}/store/${retailer}/storefront`, 2500); + await page.wait({ selector: `a[href*="${path}"]`, timeout: 8 }).catch(async () => { + await page.wait(3); + }); + const clicked = await page.evaluate(`(() => { + const path = ${JSON.stringify(path)}; + const link = Array.from(document.querySelectorAll('a[href*="/collections/"]')).find((node) => { + try { return new URL(node.getAttribute('href') || '', location.href).pathname === path; } catch { return false; } + }); + if (!link) return false; + link.click(); + return true; + })()`); + if (clicked) { + await page.wait(3); + } + const nextUrl = await page.evaluate('window.location.href').catch(() => ''); + if (!String(nextUrl).includes(path)) { + throw new CommandExecutionError(`Instacart did not navigate to collection "${retailer}/${collection}"`); + } +} + +cli({ + site: 'instacart', + name: 'collection', + access: 'read', + description: 'Visible Instacart product cards from a retailer collection', + domain: HOST, + strategy: Strategy.UI, + siteSession: 'persistent', + args: [ + { name: 'retailer', positional: true, required: true, help: 'Retailer slug, for example sprouts or costco' }, + { name: 'collection', positional: true, required: true, help: 'Collection slug, for example produce or fresh-fruits' }, + { name: 'limit', type: 'int', default: 10, help: 'Maximum products to return (1-30)' }, + ], + columns: ['rank', 'productId', 'title', 'priceText', 'originalPriceText', 'discount', 'size', 'stock', 'url'], + func: async (page, kwargs) => { + const retailer = normalizeRetailer(kwargs.retailer); + const collection = normalizeCollection(kwargs.collection); + const limit = parseLimit(kwargs.limit); + await gotoInstacartPage(page, `${BASE_URL}/store/${retailer}/collections/${collection}`, 2500); + await ensureCollectionRoute(page, retailer, collection); + await page.wait({ selector: 'a[href*="/products/"]', timeout: 8 }).catch(async () => { + await page.wait(3); + }); + const rows = await page.evaluate(buildExtractProductsScript(limit)); + if (!Array.isArray(rows)) { + throw new CommandExecutionError('Instacart collection extraction returned an unreadable response'); + } + const pageText = await page.evaluate('(() => String(document.body?.innerText || document.body?.textContent || "").slice(0, 2000))()'); + if (/log in to continue|sign up to continue|verify you are human|captcha/i.test(String(pageText))) { + throw new AuthRequiredError(HOST, 'Instacart requires browser access. Open Instacart in CloakBrowser, clear any prompt, then rerun.'); + } + if (!rows.length) { + throw new EmptyResultError('instacart collection', `No visible product cards were found for "${retailer}/${collection}". Try a collection from \`webcmd instacart categories ${retailer}\`.`); + } + return rows; + }, +}); diff --git a/clis/instacart/instacart.test.js b/clis/instacart/instacart.test.js new file mode 100644 index 0000000..38ac594 --- /dev/null +++ b/clis/instacart/instacart.test.js @@ -0,0 +1,269 @@ +import { JSDOM } from 'jsdom'; +import { describe, expect, it, vi } from 'vitest'; +import { AuthRequiredError, EmptyResultError } from '@agentrhq/webcmd/errors'; +import { getRegistry } from '@agentrhq/webcmd/registry'; +import './categories.js'; +import './collection.js'; +import './product.js'; +import './stores.js'; +import './storefront.js'; +import { + buildProductUrl, + extractCollectionLinks, + extractProductCards, + extractProductDetail, + extractStoreCards, + normalizeCollection, + normalizeRetailer, + parseLimit, +} from './utils.js'; + +const categoriesCommand = getRegistry().get('instacart/categories'); +const collectionCommand = getRegistry().get('instacart/collection'); +const productCommand = getRegistry().get('instacart/product'); +const storesCommand = getRegistry().get('instacart/stores'); +const storefrontCommand = getRegistry().get('instacart/storefront'); + +function createPage(evaluateResults) { + const results = [...evaluateResults]; + return { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockImplementation(async () => results.shift()), + }; +} + +describe('instacart command metadata', () => { + it('registers all Instacart commands as persistent browser commands', () => { + for (const command of [categoriesCommand, collectionCommand, productCommand, storesCommand, storefrontCommand]) { + expect(command).toMatchObject({ + site: 'instacart', + access: 'read', + browser: true, + strategy: 'ui', + siteSession: 'persistent', + }); + } + }); +}); + +describe('instacart helper validation', () => { + it('validates limit and retailer slugs without silent clamp', () => { + expect(parseLimit(undefined)).toBe(10); + expect(parseLimit('30')).toBe(30); + expect(() => parseLimit(0)).toThrow('--limit must be between 1 and 30'); + expect(() => parseLimit(31)).toThrow('--limit must be between 1 and 30'); + expect(() => parseLimit('many')).toThrow('--limit must be an integer'); + + expect(normalizeRetailer(' Sprouts ')).toBe('sprouts'); + expect(() => normalizeRetailer('sprouts/storefront')).toThrow('retailer must be an Instacart retailer slug'); + + expect(normalizeCollection(' Fresh-Fruits ')).toBe('fresh-fruits'); + expect(() => normalizeCollection('fresh/fruits')).toThrow('collection must be an Instacart collection slug'); + }); + + it('builds product URLs from ids and validates full product URLs', () => { + expect(buildProductUrl('16616932', 'sprouts')).toBe('https://www.instacart.com/products/16616932?retailerSlug=sprouts'); + expect(buildProductUrl('https://www.instacart.com/products/16616932-organic-asparagus-each?retailerSlug=safeway', 'sprouts')) + .toBe('https://www.instacart.com/products/16616932-organic-asparagus-each?retailerSlug=sprouts'); + expect(() => buildProductUrl('16616932', '')).toThrow('retailer is required'); + expect(() => buildProductUrl('https://example.com/products/16616932', '')).toThrow('product URL must be an Instacart'); + }); +}); + +describe('instacart DOM extraction', () => { + it('extracts visible store cards from linked storefronts', () => { + const dom = new JSDOM(` + + Safeway +
Delivery by 5:30am
+
Pickup available
+ EBT + Lots of deals +
+ Safeway duplicate + + Sprouts Farmers Market +
Delivery by 8:45am
+ $15 off + No markups +
+ `, { url: 'https://www.instacart.com/' }); + + expect(extractStoreCards(dom.window.document, 10)).toEqual([ + { + rank: 1, + slug: 'safeway', + name: 'Safeway', + delivery: 'Delivery by 5:30am', + pickup: 'Pickup available', + tags: 'EBT, Lots of deals', + url: 'https://www.instacart.com/store/safeway/storefront', + }, + { + rank: 2, + slug: 'sprouts', + name: 'Sprouts Farmers Market', + delivery: 'Delivery by 8:45am', + pickup: null, + tags: '$15 off, No markups', + url: 'https://www.instacart.com/store/sprouts/storefront', + }, + ]); + }); + + it('extracts product cards from visible storefront links', () => { + const dom = new JSDOM(` + + Organic +
Current price: $3.86 each (estimated)
+ $386 +
Original Price: $5.81 each (estimated)
+ $5.81 + 34% off + Organic Asparagus +
Organic Asparagus
+ $3.98 / lb + About 0.97 lb each + Many in stock + +
+ `, { url: 'https://www.instacart.com/store/sprouts/storefront' }); + + expect(extractProductCards(dom.window.document, 10)).toEqual([{ + rank: 1, + productId: '16616932', + title: 'Organic Asparagus', + priceText: '$3.86', + originalPriceText: '$5.81', + discount: '34% off', + size: null, + stock: 'Many in stock', + url: 'https://www.instacart.com/products/16616932-organic-asparagus-each?retailerSlug=sprouts', + }]); + }); + + it('extracts visible collection links for a retailer', () => { + const dom = new JSDOM(` + Produce + Fresh Fruits + Produce duplicate + Wrong retailer + `, { url: 'https://www.instacart.com/store/sprouts/storefront' }); + + expect(extractCollectionLinks(dom.window.document, 'sprouts', 10)).toEqual([ + { + rank: 1, + slug: 'produce', + name: 'Produce', + url: 'https://www.instacart.com/store/sprouts/collections/produce', + }, + { + rank: 2, + slug: 'fresh-fruits', + name: 'Fresh Fruits', + url: 'https://www.instacart.com/store/sprouts/collections/fresh-fruits', + }, + ]); + }); + + it('extracts a visible product detail page', () => { + const dom = new JSDOM(` +

Organic Asparagus

+
+
Current price: $3.86 each (estimated)
+
Original Price: $5.81 each (estimated)
+ 34% off + About 0.97 lb each + Many in stock +
+ `, { url: 'https://www.instacart.com/products/16616932-organic-asparagus-each?retailerSlug=sprouts' }); + + expect(extractProductDetail(dom.window.document)).toEqual({ + productId: '16616932', + title: 'Organic Asparagus', + priceText: '$3.86', + originalPriceText: '$5.81', + discount: '34% off', + size: null, + stock: 'Many in stock', + retailer: 'sprouts', + url: 'https://www.instacart.com/products/16616932-organic-asparagus-each?retailerSlug=sprouts', + }); + }); +}); + +describe('instacart command execution', () => { + it('stores returns extracted rows', async () => { + const page = createPage([ + [{ rank: 1, slug: 'safeway', name: 'Safeway', url: 'https://www.instacart.com/store/safeway/storefront' }], + 'All stores in San Francisco Bay Area', + ]); + + await expect(storesCommand.func(page, { limit: 1 })).resolves.toEqual([ + { rank: 1, slug: 'safeway', name: 'Safeway', url: 'https://www.instacart.com/store/safeway/storefront' }, + ]); + }); + + it('storefront returns extracted product rows', async () => { + const page = createPage([ + [{ rank: 1, productId: '16616932', title: 'Organic Asparagus', priceText: '$3.86' }], + 'Sprouts Farmers Market Organic Asparagus', + ]); + + await expect(storefrontCommand.func(page, { retailer: 'sprouts', limit: 1 })).resolves.toEqual([ + { rank: 1, productId: '16616932', title: 'Organic Asparagus', priceText: '$3.86' }, + ]); + expect(page.goto).toHaveBeenCalledWith('https://www.instacart.com/store/sprouts/storefront', { waitUntil: 'load', settleMs: 2500 }); + }); + + it('categories returns extracted collection rows', async () => { + const page = createPage([ + [{ rank: 1, slug: 'produce', name: 'Produce', url: 'https://www.instacart.com/store/sprouts/collections/produce' }], + 'Sprouts Farmers Market Produce Fresh Fruits', + ]); + + await expect(categoriesCommand.func(page, { retailer: 'sprouts', limit: 1 })).resolves.toEqual([ + { rank: 1, slug: 'produce', name: 'Produce', url: 'https://www.instacart.com/store/sprouts/collections/produce' }, + ]); + expect(page.goto).toHaveBeenCalledWith('https://www.instacart.com/store/sprouts/storefront', { waitUntil: 'load', settleMs: 2500 }); + }); + + it('collection returns extracted product rows', async () => { + const page = createPage([ + 'https://www.instacart.com/store/sprouts/collections/produce', + [{ rank: 1, productId: '16616932', title: 'Organic Asparagus', priceText: '$3.86' }], + 'Sprouts Farmers Market Organic Asparagus', + ]); + + await expect(collectionCommand.func(page, { retailer: 'sprouts', collection: 'produce', limit: 1 })).resolves.toEqual([ + { rank: 1, productId: '16616932', title: 'Organic Asparagus', priceText: '$3.86' }, + ]); + expect(page.goto).toHaveBeenCalledWith('https://www.instacart.com/store/sprouts/collections/produce', { waitUntil: 'load', settleMs: 2500 }); + }); + + it('product returns extracted product detail rows', async () => { + const page = createPage([ + { productId: '16616932', title: 'Organic Asparagus', priceText: '$3.86', retailer: 'sprouts', url: 'https://www.instacart.com/products/16616932?retailerSlug=sprouts' }, + 'Organic Asparagus Current price: $3.86', + ]); + + await expect(productCommand.func(page, { product: '16616932', retailer: 'sprouts' })).resolves.toEqual([ + { productId: '16616932', title: 'Organic Asparagus', priceText: '$3.86', retailer: 'sprouts', url: 'https://www.instacart.com/products/16616932?retailerSlug=sprouts' }, + ]); + expect(page.goto).toHaveBeenCalledWith('https://www.instacart.com/products/16616932?retailerSlug=sprouts', { waitUntil: 'load', settleMs: 3000 }); + }); + + it('throws typed auth and empty errors', async () => { + await expect(storesCommand.func(createPage([[], 'verify you are human']), { limit: 1 })) + .rejects.toBeInstanceOf(AuthRequiredError); + await expect(storefrontCommand.func(createPage([[], 'Sprouts Farmers Market']), { retailer: 'sprouts', limit: 1 })) + .rejects.toBeInstanceOf(EmptyResultError); + await expect(categoriesCommand.func(createPage([[], 'Sprouts Farmers Market']), { retailer: 'sprouts', limit: 1 })) + .rejects.toBeInstanceOf(EmptyResultError); + await expect(collectionCommand.func(createPage(['https://www.instacart.com/store/sprouts/collections/produce', [], 'Sprouts Farmers Market']), { retailer: 'sprouts', collection: 'produce', limit: 1 })) + .rejects.toBeInstanceOf(EmptyResultError); + await expect(productCommand.func(createPage([{ productId: null, title: null, priceText: null }, 'Sprouts Farmers Market']), { product: '16616932', retailer: 'sprouts' })) + .rejects.toBeInstanceOf(EmptyResultError); + }); +}); diff --git a/clis/instacart/product.js b/clis/instacart/product.js new file mode 100644 index 0000000..8ba723d --- /dev/null +++ b/clis/instacart/product.js @@ -0,0 +1,37 @@ +import { AuthRequiredError, CommandExecutionError, EmptyResultError } from '@agentrhq/webcmd/errors'; +import { cli, Strategy } from '@agentrhq/webcmd/registry'; +import { HOST, buildExtractProductDetailScript, buildProductUrl, gotoInstacartPage } from './utils.js'; + +cli({ + site: 'instacart', + name: 'product', + access: 'read', + description: 'Visible Instacart product detail by product URL or id', + domain: HOST, + strategy: Strategy.UI, + siteSession: 'persistent', + args: [ + { name: 'product', positional: true, required: true, help: 'Instacart product URL or numeric product id' }, + { name: 'retailer', type: 'string', default: '', help: 'Retailer slug required when product is a numeric id, for example sprouts' }, + ], + columns: ['productId', 'title', 'priceText', 'originalPriceText', 'discount', 'size', 'stock', 'retailer', 'url'], + func: async (page, kwargs) => { + const url = buildProductUrl(kwargs.product, kwargs.retailer); + await gotoInstacartPage(page, url, 3000); + await page.wait({ selector: 'h1', timeout: 8 }).catch(async () => { + await page.wait(3); + }); + const row = await page.evaluate(buildExtractProductDetailScript()); + if (!row || typeof row !== 'object') { + throw new CommandExecutionError('Instacart product extraction returned an unreadable response'); + } + const pageText = await page.evaluate('(() => String(document.body?.innerText || document.body?.textContent || "").slice(0, 2000))()'); + if (/log in to continue|sign up to continue|verify you are human|captcha/i.test(String(pageText))) { + throw new AuthRequiredError(HOST, 'Instacart requires browser access. Open Instacart in CloakBrowser, clear any prompt, then rerun.'); + } + if (!row.productId || !row.title || !row.priceText) { + throw new EmptyResultError('instacart product', `No visible product detail was found at ${url}. Try a product URL from \`webcmd instacart storefront\` or \`webcmd instacart collection\`.`); + } + return [row]; + }, +}); diff --git a/clis/instacart/storefront.js b/clis/instacart/storefront.js new file mode 100644 index 0000000..8d333d0 --- /dev/null +++ b/clis/instacart/storefront.js @@ -0,0 +1,38 @@ +import { AuthRequiredError, CommandExecutionError, EmptyResultError } from '@agentrhq/webcmd/errors'; +import { cli, Strategy } from '@agentrhq/webcmd/registry'; +import { BASE_URL, HOST, buildExtractProductsScript, normalizeRetailer, parseLimit } from './utils.js'; + +cli({ + site: 'instacart', + name: 'storefront', + access: 'read', + description: 'Visible Instacart product cards from a retailer storefront', + domain: HOST, + strategy: Strategy.UI, + siteSession: 'persistent', + args: [ + { name: 'retailer', positional: true, required: true, help: 'Retailer slug, for example sprouts or costco' }, + { name: 'limit', type: 'int', default: 10, help: 'Maximum products to return (1-30)' }, + ], + columns: ['rank', 'productId', 'title', 'priceText', 'originalPriceText', 'discount', 'size', 'stock', 'url'], + func: async (page, kwargs) => { + const retailer = normalizeRetailer(kwargs.retailer); + const limit = parseLimit(kwargs.limit); + await page.goto(`${BASE_URL}/store/${retailer}/storefront`, { waitUntil: 'load', settleMs: 2500 }); + await page.wait({ selector: 'a[href*="/products/"]', timeout: 8 }).catch(async () => { + await page.wait(3); + }); + const rows = await page.evaluate(buildExtractProductsScript(limit)); + if (!Array.isArray(rows)) { + throw new CommandExecutionError('Instacart storefront extraction returned an unreadable response'); + } + const pageText = await page.evaluate('(() => String(document.body?.innerText || document.body?.textContent || "").slice(0, 2000))()'); + if (/log in to continue|sign up to continue|verify you are human|captcha/i.test(String(pageText))) { + throw new AuthRequiredError(HOST, 'Instacart requires browser access. Open Instacart in CloakBrowser, clear any prompt, then rerun.'); + } + if (!rows.length) { + throw new EmptyResultError('instacart storefront', `No visible product cards were found for retailer "${retailer}". Try a retailer from \`webcmd instacart stores\`.`); + } + return rows; + }, +}); diff --git a/clis/instacart/stores.js b/clis/instacart/stores.js new file mode 100644 index 0000000..947f4c0 --- /dev/null +++ b/clis/instacart/stores.js @@ -0,0 +1,34 @@ +import { AuthRequiredError, CommandExecutionError, EmptyResultError } from '@agentrhq/webcmd/errors'; +import { cli, Strategy } from '@agentrhq/webcmd/registry'; +import { BASE_URL, HOST, buildExtractStoresScript, parseLimit } from './utils.js'; + +cli({ + site: 'instacart', + name: 'stores', + access: 'read', + description: 'Visible Instacart nearby stores from the public marketplace page', + domain: HOST, + strategy: Strategy.UI, + siteSession: 'persistent', + args: [ + { name: 'limit', type: 'int', default: 10, help: 'Maximum stores to return (1-30)' }, + ], + columns: ['rank', 'slug', 'name', 'delivery', 'pickup', 'tags', 'url'], + func: async (page, kwargs) => { + const limit = parseLimit(kwargs.limit); + await page.goto(BASE_URL, { waitUntil: 'load', settleMs: 2000 }); + await page.wait(2); + const rows = await page.evaluate(buildExtractStoresScript(limit)); + if (!Array.isArray(rows)) { + throw new CommandExecutionError('Instacart stores extraction returned an unreadable response'); + } + const pageText = await page.evaluate('(() => String(document.body?.innerText || document.body?.textContent || "").slice(0, 2000))()'); + if (/log in to continue|sign up to continue|verify you are human|captcha/i.test(String(pageText))) { + throw new AuthRequiredError(HOST, 'Instacart requires browser access. Open Instacart in CloakBrowser, clear any prompt, then rerun.'); + } + if (!rows.length) { + throw new EmptyResultError('instacart stores', 'No visible store cards were found. Instacart may need a location or changed its layout.'); + } + return rows; + }, +}); diff --git a/clis/instacart/utils.js b/clis/instacart/utils.js new file mode 100644 index 0000000..1cbb7aa --- /dev/null +++ b/clis/instacart/utils.js @@ -0,0 +1,309 @@ +import { ArgumentError, CommandExecutionError } from '@agentrhq/webcmd/errors'; + +export const HOST = 'www.instacart.com'; +export const BASE_URL = `https://${HOST}`; +export const MAX_LIMIT = 30; + +export function normalizeText(value) { + return String(value ?? '').replace(/\s+/g, ' ').trim(); +} + +export function parseLimit(raw, fallback = 10) { + if (raw === undefined || raw === null || raw === '') return fallback; + const value = Number(raw); + if (!Number.isInteger(value)) { + throw new ArgumentError(`--limit must be an integer between 1 and ${MAX_LIMIT}, got ${JSON.stringify(raw)}`); + } + if (value < 1 || value > MAX_LIMIT) { + throw new ArgumentError(`--limit must be between 1 and ${MAX_LIMIT}, got ${value}`); + } + return value; +} + +export function normalizeRetailer(raw) { + const value = String(raw ?? '').trim().toLowerCase(); + if (!value) throw new ArgumentError('retailer is required'); + if (!/^[a-z0-9-]+$/.test(value)) { + throw new ArgumentError('retailer must be an Instacart retailer slug like "sprouts" or "costco"'); + } + return value; +} + +export function normalizeCollection(raw) { + const value = String(raw ?? '').trim().toLowerCase(); + if (!value) throw new ArgumentError('collection is required'); + if (!/^[a-z0-9-]+$/.test(value)) { + throw new ArgumentError('collection must be an Instacart collection slug like "produce" or "fresh-fruits"'); + } + return value; +} + +export function absoluteUrl(href, baseUrl = BASE_URL) { + const value = String(href ?? '').trim(); + if (!value) return null; + try { + return new URL(value, baseUrl).href; + } catch { + return null; + } +} + +export function unique(values) { + const seen = new Set(); + const result = []; + for (const value of values.map(normalizeText).filter(Boolean)) { + const key = value.toLowerCase(); + if (seen.has(key)) continue; + seen.add(key); + result.push(value); + } + return result; +} + +export function leafTexts(root) { + return unique(Array.from(root.querySelectorAll('span, div, p')).filter((node) => node.children.length === 0).map((node) => node.textContent)); +} + +export function extractStoreCards(doc, limit = 10) { + const pageUrl = doc?.location?.href || doc?.URL || 'https://www.instacart.com'; + const links = Array.from(doc.querySelectorAll('a[href*="/store/"][href$="/storefront"]')); + const seen = new Set(); + const rows = []; + for (const link of links) { + if (rows.length >= limit) break; + const href = link.getAttribute('href') || ''; + const match = href.match(/\/store\/([^/?#]+)\/storefront/i); + if (!match) continue; + const slug = match[1]; + if (seen.has(slug)) continue; + const bits = leafTexts(link); + const name = bits.find((bit) => !/^(Delivery by|Pickup available|No markups|\$\d+\s+off|EBT|Lots of deals|Low prices|Bulk pricing|Loyalty savings|By\s|\d+\s*(hr|min)|\d+\.\d+\s*mi)/i.test(bit)); + if (!name) continue; + const delivery = bits.find((bit) => /^(Delivery by|By\s|\d+\s*(hr|min)$)/i.test(bit)) || null; + const pickup = bits.find((bit) => /^Pickup/i.test(bit)) || null; + const tags = bits.filter((bit) => ![name, delivery, pickup].includes(bit)).join(', ') || null; + seen.add(slug); + rows.push({ + rank: rows.length + 1, + slug, + name, + delivery, + pickup, + tags, + url: absoluteUrl(href, pageUrl), + }); + } + return rows; +} + +export function productIdFromUrl(url) { + const match = String(url || '').match(/\/products\/(\d+)/); + return match ? match[1] : null; +} + +export function retailerFromProductUrl(url) { + try { + const parsed = new URL(String(url)); + return parsed.searchParams.get('retailerSlug') || null; + } catch { + return null; + } +} + +export function buildProductUrl(raw, retailerRaw) { + const value = String(raw ?? '').trim(); + if (!value) throw new ArgumentError('product is required'); + if (/^https?:\/\//i.test(value)) { + let parsed; + try { + parsed = new URL(value); + } catch { + throw new ArgumentError('product must be an Instacart product URL or numeric product id'); + } + if (parsed.hostname !== HOST || !productIdFromUrl(parsed.href)) { + throw new ArgumentError('product URL must be an Instacart /products/ URL'); + } + const retailer = retailerRaw ? normalizeRetailer(retailerRaw) : retailerFromProductUrl(parsed.href); + if (retailer) parsed.searchParams.set('retailerSlug', retailer); + return parsed.href; + } + if (!/^\d+$/.test(value)) { + throw new ArgumentError('product must be an Instacart product URL or numeric product id'); + } + const retailer = normalizeRetailer(retailerRaw); + return `${BASE_URL}/products/${value}?retailerSlug=${retailer}`; +} + +function compactPrice(raw) { + const value = normalizeText(raw); + const match = value.match(/Current price:\s*(\$\d+(?:\.\d{2})?)/i); + if (match) return match[1]; + const fallback = value.match(/\$\d+(?:\.\d{2})?\b/); + return fallback ? fallback[0] : null; +} + +function compactOriginalPrice(raw) { + const value = normalizeText(raw); + const match = value.match(/Original Price:\s*(\$\d+(?:\.\d{2})?)/i); + return match ? match[1] : null; +} + +function firstMatching(values, pattern) { + return values.find((value) => pattern.test(value)) || null; +} + +export function extractProductCards(doc, limit = 10) { + const pageUrl = doc?.location?.href || doc?.URL || 'https://www.instacart.com'; + const links = Array.from(doc.querySelectorAll('a[href*="/products/"]')); + const seen = new Set(); + const rows = []; + for (const link of links) { + if (rows.length >= limit) break; + const url = absoluteUrl(link.getAttribute('href'), pageUrl); + const productId = productIdFromUrl(url); + if (!url || !productId || seen.has(productId)) continue; + const spanTexts = leafTexts(link); + const allText = normalizeText(link.textContent || ''); + const priceText = compactPrice(allText); + const originalPriceText = compactOriginalPrice(allText); + const discount = firstMatching(spanTexts, /^(\d+%\s+off|buy\s+\d+)/i); + const stock = firstMatching(spanTexts, /^((many|few)\s+in stock|in stock|out of stock)$/i); + const size = firstMatching(spanTexts, /^\d+(?:\.\d+)?\s*(oz|lb|ct|fl oz|g|kg|ml|l|pack|x\b)/i); + const headingTitle = normalizeText(link.querySelector('[role="heading"]')?.textContent); + const fallbackTitle = spanTexts.find((value) => { + if (!value || value.length > 120) return false; + if (/^(add|current price|original price|\$|\d+$|each|\/|organic$|non gmo$|in season$)/i.test(value)) return false; + if (value === priceText || value === originalPriceText || value === discount || value === stock || value === size) return false; + return /[a-z]/i.test(value); + }); + const title = headingTitle || fallbackTitle; + if (!title || !priceText) continue; + seen.add(productId); + rows.push({ + rank: rows.length + 1, + productId, + title, + priceText, + originalPriceText, + discount, + size, + stock, + url, + }); + } + return rows; +} + +export function extractCollectionLinks(doc, retailer, limit = 10) { + const pageUrl = doc?.location?.href || doc?.URL || `${BASE_URL}/store/${retailer}/storefront`; + const pattern = new RegExp(`/store/${retailer}/collections/([^/?#]+)`, 'i'); + const links = Array.from(doc.querySelectorAll(`a[href*="/store/${retailer}/collections/"]`)); + const seen = new Set(); + const rows = []; + for (const link of links) { + if (rows.length >= limit) break; + const href = link.getAttribute('href') || ''; + const match = href.match(pattern); + if (!match) continue; + const slug = match[1].toLowerCase(); + if (seen.has(slug)) continue; + const name = normalizeText(link.getAttribute('aria-label') || link.textContent || slug); + if (!name || name.length > 120) continue; + seen.add(slug); + rows.push({ + rank: rows.length + 1, + slug, + name, + url: absoluteUrl(href, pageUrl), + }); + } + return rows; +} + +export function extractProductDetail(doc) { + const pageUrl = doc?.location?.href || doc?.URL || BASE_URL; + const bodyText = normalizeText(doc.body?.innerText || doc.body?.textContent || ''); + const leaf = leafTexts(doc.body || doc); + const heading = normalizeText(doc.querySelector('h1')?.textContent); + const docTitle = normalizeText(doc.title || '').replace(/\s+Same-Day Delivery.*$/i, '').replace(/\s+\|\s+Instacart$/i, ''); + const title = heading || docTitle || null; + const priceText = compactPrice(bodyText); + const originalPriceText = compactOriginalPrice(bodyText); + const discount = firstMatching(leaf, /^(\d+%\s+off|buy\s+\d+)/i); + const stock = firstMatching(leaf, /^((many|few)\s+in stock|in stock|out of stock)$/i); + const size = firstMatching(leaf, /^\d+(?:\.\d+)?\s*(oz|lb|ct|fl oz|g|kg|ml|l|pack|x\b)/i); + return { + productId: productIdFromUrl(pageUrl), + title, + priceText, + originalPriceText, + discount, + size, + stock, + retailer: retailerFromProductUrl(pageUrl), + url: pageUrl, + }; +} + +export async function gotoInstacartPage(page, url, settleMs = 2500) { + try { + await page.goto(url, { waitUntil: 'load', settleMs }); + } catch (error) { + if (!/ERR_ABORTED/i.test(String(error?.message || error))) { + throw new CommandExecutionError(`Instacart navigation failed: ${error?.message || error}`); + } + } +} + +export function buildExtractStoresScript(limit) { + return `(() => { + const normalizeText = ${normalizeText.toString()}; + const absoluteUrl = ${absoluteUrl.toString()}; + const unique = ${unique.toString()}; + const leafTexts = ${leafTexts.toString()}; + const extractStoreCards = ${extractStoreCards.toString()}; + return extractStoreCards(document, ${limit}); + })()`; +} + +export function buildExtractProductsScript(limit) { + return `(() => { + const normalizeText = ${normalizeText.toString()}; + const absoluteUrl = ${absoluteUrl.toString()}; + const unique = ${unique.toString()}; + const leafTexts = ${leafTexts.toString()}; + const productIdFromUrl = ${productIdFromUrl.toString()}; + const compactPrice = ${compactPrice.toString()}; + const compactOriginalPrice = ${compactOriginalPrice.toString()}; + const firstMatching = ${firstMatching.toString()}; + const extractProductCards = ${extractProductCards.toString()}; + return extractProductCards(document, ${limit}); + })()`; +} + +export function buildExtractCollectionsScript(retailer, limit) { + return `(() => { + const normalizeText = ${normalizeText.toString()}; + const absoluteUrl = ${absoluteUrl.toString()}; + const unique = ${unique.toString()}; + const leafTexts = ${leafTexts.toString()}; + const extractCollectionLinks = ${extractCollectionLinks.toString()}; + return extractCollectionLinks(document, ${JSON.stringify(retailer)}, ${limit}); + })()`; +} + +export function buildExtractProductDetailScript() { + return `(() => { + const normalizeText = ${normalizeText.toString()}; + const absoluteUrl = ${absoluteUrl.toString()}; + const unique = ${unique.toString()}; + const leafTexts = ${leafTexts.toString()}; + const productIdFromUrl = ${productIdFromUrl.toString()}; + const retailerFromProductUrl = ${retailerFromProductUrl.toString()}; + const compactPrice = ${compactPrice.toString()}; + const compactOriginalPrice = ${compactOriginalPrice.toString()}; + const firstMatching = ${firstMatching.toString()}; + const extractProductDetail = ${extractProductDetail.toString()}; + return extractProductDetail(document); + })()`; +}