diff --git a/cli-manifest.json b/cli-manifest.json index b708dd7..098bbe5 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -1,4 +1,40 @@ [ + { + "site": "allrecipes", + "name": "recipe", + "description": "Extract an Allrecipes recipe from a recipe URL", + "access": "read", + "domain": "www.allrecipes.com", + "strategy": "ui", + "browser": true, + "args": [ + { + "name": "url", + "type": "str", + "required": true, + "positional": true, + "help": "Allrecipes recipe URL" + } + ], + "columns": [ + "title", + "author", + "rating", + "ratingCount", + "prepTime", + "cookTime", + "totalTime", + "servings", + "calories", + "ingredients", + "instructions", + "url" + ], + "type": "js", + "modulePath": "allrecipes/recipe.js", + "sourceFile": "allrecipes/recipe.js", + "navigateBefore": true + }, { "site": "amazon", "name": "bestsellers", diff --git a/clis/allrecipes/allrecipes.test.js b/clis/allrecipes/allrecipes.test.js new file mode 100644 index 0000000..f748cc0 --- /dev/null +++ b/clis/allrecipes/allrecipes.test.js @@ -0,0 +1,87 @@ +import { describe, expect, it, vi } from 'vitest'; +import { ArgumentError, CommandExecutionError, EmptyResultError } from '@agentrhq/webcmd/errors'; +import { getRegistry } from '@agentrhq/webcmd/registry'; +import { buildRecipeScript, mapRecipe } from './recipe.js'; + +function page(result) { + return { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(result), + }; +} + +describe('allrecipes recipe', () => { + const cmd = getRegistry().get('allrecipes/recipe'); + + it('maps JSON-LD recipe data into one row', async () => { + const p = page({ + ok: true, + href: 'https://www.allrecipes.com/recipe/23600/worlds-best-lasagna/', + payload: { + name: "World's Best Lasagna", + author: { name: 'John Chandler' }, + aggregateRating: { ratingValue: '4.8', ratingCount: '20500' }, + prepTime: 'PT30M', + cookTime: 'PT2H30M', + totalTime: 'PT3H15M', + recipeYield: ['12 servings'], + nutrition: { calories: '448 calories' }, + recipeIngredient: ['1 pound sweet Italian sausage', '12 lasagna noodles'], + recipeInstructions: [{ text: 'Cook sausage.' }, { text: 'Layer noodles.' }], + }, + }); + + const rows = await cmd.func(p, { url: 'https://www.allrecipes.com/recipe/23600/worlds-best-lasagna/' }); + expect(p.goto).toHaveBeenCalledWith('https://www.allrecipes.com/recipe/23600/worlds-best-lasagna/'); + expect(rows).toEqual([{ + title: "World's Best Lasagna", + author: 'John Chandler', + rating: 4.8, + ratingCount: 20500, + prepTime: 'PT30M', + cookTime: 'PT2H30M', + totalTime: 'PT3H15M', + servings: '12 servings', + calories: '448 calories', + ingredients: '1 pound sweet Italian sausage\n12 lasagna noodles', + instructions: '1. Cook sausage.\n2. Layer noodles.', + url: 'https://www.allrecipes.com/recipe/23600/worlds-best-lasagna/', + }]); + }); + + it('rejects non-Allrecipes URLs', async () => { + await expect(cmd.func(page({}), { url: 'https://example.com/recipe' })).rejects.toBeInstanceOf(ArgumentError); + }); + + it('reports browser verification challenges', async () => { + await expect(cmd.func(page({ ok: false, challenge: true }), { + url: 'https://www.allrecipes.com/recipe/23600/worlds-best-lasagna/', + })).rejects.toBeInstanceOf(CommandExecutionError); + }); + + it('throws EmptyResultError when no title is extracted', async () => { + await expect(cmd.func(page({ ok: true, payload: {}, href: 'https://www.allrecipes.com/recipe/x/' }), { + url: 'https://www.allrecipes.com/recipe/x/', + })).rejects.toBeInstanceOf(EmptyResultError); + }); +}); + +describe('allrecipes helpers', () => { + it('builds a JSON-LD extraction script', () => { + expect(buildRecipeScript()).toContain('application/ld+json'); + expect(buildRecipeScript()).toContain("'@type'"); + }); + + it('maps array authors and raw instruction strings', () => { + expect(mapRecipe({ + name: 'A & B Pie', + author: [{ name: 'A' }, { name: 'B' }], + recipeInstructions: ['Bake it 'til done.'], + }, 'https://www.allrecipes.com/recipe/1/pie/')).toMatchObject({ + title: 'A & B Pie', + author: 'A, B', + instructions: "1. Bake it 'til done.", + }); + }); +}); diff --git a/clis/allrecipes/recipe.js b/clis/allrecipes/recipe.js new file mode 100644 index 0000000..c8f1d59 --- /dev/null +++ b/clis/allrecipes/recipe.js @@ -0,0 +1,121 @@ +import { ArgumentError, CommandExecutionError, EmptyResultError } from '@agentrhq/webcmd/errors'; +import { cli, Strategy } from '@agentrhq/webcmd/registry'; + +const HOST = /^https?:\/\/(?:www\.)?allrecipes\.com\//i; + +function requireUrl(value) { + const url = String(value ?? '').trim(); + if (!url) throw new ArgumentError('allrecipes recipe URL is required'); + if (!HOST.test(url)) throw new ArgumentError(`URL must be on allrecipes.com, got ${url}`); + return url; +} + +function asText(value) { + if (value == null) return ''; + if (Array.isArray(value)) return value.map(asText).filter(Boolean).join(', '); + if (typeof value === 'object') return asText(value.name || value.text); + return decodeHtml(String(value).trim()); +} + +function decodeHtml(value) { + return String(value ?? '') + .replace(/&#(\d+);/g, (_, n) => String.fromCodePoint(Number(n))) + .replace(/&#x([0-9a-f]+);/gi, (_, n) => String.fromCodePoint(Number.parseInt(n, 16))) + .replace(/"/g, '"') + .replace(/'|'/g, "'") + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/&/g, '&'); +} + +function instructionText(value) { + if (!Array.isArray(value)) return asText(value); + return value + .map((step) => asText(step?.text || step?.name || step)) + .filter(Boolean) + .map((step, i) => `${i + 1}. ${step}`) + .join('\n'); +} + +export function mapRecipe(recipe, fallbackUrl) { + const rating = recipe?.aggregateRating || {}; + const ratingCount = rating.ratingCount ?? rating.reviewCount; + return { + title: asText(recipe?.name), + author: asText(recipe?.author), + rating: rating.ratingValue == null ? null : Number(rating.ratingValue), + ratingCount: ratingCount == null ? null : Number(ratingCount), + prepTime: asText(recipe?.prepTime), + cookTime: asText(recipe?.cookTime), + totalTime: asText(recipe?.totalTime), + servings: asText(recipe?.recipeYield), + calories: asText(recipe?.nutrition?.calories), + ingredients: Array.isArray(recipe?.recipeIngredient) ? recipe.recipeIngredient.map(asText).join('\n') : '', + instructions: instructionText(recipe?.recipeInstructions), + url: asText(recipe?.url) || fallbackUrl, + }; +} + +export function buildRecipeScript() { + return `(() => { + const bodyText = document.body?.innerText || ''; + if (/Just a moment|Enable JavaScript and cookies to continue/i.test(document.title + '\\n' + bodyText)) { + return { ok: false, challenge: true }; + } + const isRecipe = (item) => { + const type = item && item['@type']; + return type === 'Recipe' || (Array.isArray(type) && type.includes('Recipe')); + }; + const visit = (item) => { + if (!item || typeof item !== 'object') return null; + if (isRecipe(item)) return item; + if (Array.isArray(item)) { + for (const child of item) { + const found = visit(child); + if (found) return found; + } + } + const graph = item['@graph']; + return Array.isArray(graph) ? visit(graph) : null; + }; + for (const script of document.querySelectorAll('script[type="application/ld+json"]')) { + try { + const found = visit(JSON.parse(script.textContent || 'null')); + if (found) return { ok: true, payload: found, href: location.href }; + } catch {} + } + const title = document.querySelector('h1')?.textContent?.trim() || ''; + return title ? { ok: true, payload: { name: title, url: location.href }, href: location.href } : { ok: false }; +})()`; +} + +cli({ + site: 'allrecipes', + name: 'recipe', + access: 'read', + description: 'Extract an Allrecipes recipe from a recipe URL', + domain: 'www.allrecipes.com', + strategy: Strategy.UI, + browser: true, + args: [ + { name: 'url', positional: true, required: true, help: 'Allrecipes recipe URL' }, + ], + columns: ['title', 'author', 'rating', 'ratingCount', 'prepTime', 'cookTime', 'totalTime', 'servings', 'calories', 'ingredients', 'instructions', 'url'], + func: async (page, args) => { + const url = requireUrl(args.url); + await page.goto(url); + await page.wait(2); + const result = await page.evaluate(buildRecipeScript()); + if (result?.challenge) { + throw new CommandExecutionError('Allrecipes showed a browser verification challenge', 'Retry with a logged-in/persistent browser session after the page finishes loading.'); + } + if (!result?.ok) { + throw new CommandExecutionError('allrecipes recipe extraction failed', 'Check that the URL points to an Allrecipes recipe page.'); + } + const row = mapRecipe(result.payload, result.href || url); + if (!row.title) { + throw new EmptyResultError('allrecipes recipe', 'The page loaded but no recipe title was found.'); + } + return [row]; + }, +});