From 71e50befa6366c38025fcc03fd61ae306297e074 Mon Sep 17 00:00:00 2001 From: Ankit Ranjan Date: Fri, 3 Jul 2026 13:47:21 +0530 Subject: [PATCH 1/2] Add Allrecipes adapter --- cli-manifest.json | 37 ++++++++++ clis/allrecipes/allrecipes.test.js | 88 +++++++++++++++++++++++ clis/allrecipes/recipe.js | 111 +++++++++++++++++++++++++++++ 3 files changed, 236 insertions(+) create mode 100644 clis/allrecipes/allrecipes.test.js create mode 100644 clis/allrecipes/recipe.js diff --git a/cli-manifest.json b/cli-manifest.json index bfdb71e..db93ac4 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -1357,6 +1357,43 @@ "modulePath": "aibase/news.js", "sourceFile": "aibase/news.js" }, + { + "site": "allrecipes", + "name": "recipe", + "description": "Extract an Allrecipes recipe from a recipe URL", + "access": "read", + "domain": "www.allrecipes.com", + "strategy": "ui", + "browser": true, + "args": [ + { + "name": "url", + "type": "str", + "required": true, + "positional": true, + "help": "Allrecipes recipe URL" + } + ], + "columns": [ + "title", + "author", + "rating", + "ratingCount", + "prepTime", + "cookTime", + "totalTime", + "servings", + "calories", + "ingredients", + "instructions", + "url", + "key" + ], + "type": "js", + "modulePath": "allrecipes/recipe.js", + "sourceFile": "allrecipes/recipe.js", + "navigateBefore": true + }, { "site": "amazon", "name": "bestsellers", diff --git a/clis/allrecipes/allrecipes.test.js b/clis/allrecipes/allrecipes.test.js new file mode 100644 index 0000000..c6dc8c7 --- /dev/null +++ b/clis/allrecipes/allrecipes.test.js @@ -0,0 +1,88 @@ +import { describe, expect, it, vi } from 'vitest'; +import { ArgumentError, CommandExecutionError, EmptyResultError } from '@agentrhq/webcmd/errors'; +import { getRegistry } from '@agentrhq/webcmd/registry'; +import { buildRecipeScript, mapRecipe } from './recipe.js'; + +function page(result) { + return { + goto: vi.fn().mockResolvedValue(undefined), + wait: vi.fn().mockResolvedValue(undefined), + evaluate: vi.fn().mockResolvedValue(result), + }; +} + +describe('allrecipes recipe', () => { + const cmd = getRegistry().get('allrecipes/recipe'); + + it('maps JSON-LD recipe data into one row', async () => { + const p = page({ + ok: true, + href: 'https://www.allrecipes.com/recipe/23600/worlds-best-lasagna/', + payload: { + name: "World's Best Lasagna", + author: { name: 'John Chandler' }, + aggregateRating: { ratingValue: '4.8', ratingCount: 20500 }, + prepTime: 'PT30M', + cookTime: 'PT2H30M', + totalTime: 'PT3H15M', + recipeYield: ['12 servings'], + nutrition: { calories: '448 calories' }, + recipeIngredient: ['1 pound sweet Italian sausage', '12 lasagna noodles'], + recipeInstructions: [{ text: 'Cook sausage.' }, { text: 'Layer noodles.' }], + }, + }); + + const rows = await cmd.func(p, { url: 'https://www.allrecipes.com/recipe/23600/worlds-best-lasagna/' }); + expect(p.goto).toHaveBeenCalledWith('https://www.allrecipes.com/recipe/23600/worlds-best-lasagna/'); + expect(rows).toEqual([{ + title: "World's Best Lasagna", + author: 'John Chandler', + rating: 4.8, + ratingCount: 20500, + prepTime: 'PT30M', + cookTime: 'PT2H30M', + totalTime: 'PT3H15M', + servings: '12 servings', + calories: '448 calories', + ingredients: '1 pound sweet Italian sausage\n12 lasagna noodles', + instructions: '1. Cook sausage.\n2. Layer noodles.', + url: 'https://www.allrecipes.com/recipe/23600/worlds-best-lasagna/', + key: 'recipe/23600/worlds-best-lasagna', + }]); + }); + + it('rejects non-Allrecipes URLs', async () => { + await expect(cmd.func(page({}), { url: 'https://example.com/recipe' })).rejects.toBeInstanceOf(ArgumentError); + }); + + it('reports browser verification challenges', async () => { + await expect(cmd.func(page({ ok: false, challenge: true }), { + url: 'https://www.allrecipes.com/recipe/23600/worlds-best-lasagna/', + })).rejects.toBeInstanceOf(CommandExecutionError); + }); + + it('throws EmptyResultError when no title is extracted', async () => { + await expect(cmd.func(page({ ok: true, payload: {}, href: 'https://www.allrecipes.com/recipe/x/' }), { + url: 'https://www.allrecipes.com/recipe/x/', + })).rejects.toBeInstanceOf(EmptyResultError); + }); +}); + +describe('allrecipes helpers', () => { + it('builds a JSON-LD extraction script', () => { + expect(buildRecipeScript()).toContain('application/ld+json'); + expect(buildRecipeScript()).toContain("'@type'"); + }); + + it('maps array authors and raw instruction strings', () => { + expect(mapRecipe({ + name: 'Pie', + author: [{ name: 'A' }, { name: 'B' }], + recipeInstructions: ['Bake it.'], + }, 'https://www.allrecipes.com/recipe/1/pie/')).toMatchObject({ + title: 'Pie', + author: 'A, B', + instructions: '1. Bake it.', + }); + }); +}); diff --git a/clis/allrecipes/recipe.js b/clis/allrecipes/recipe.js new file mode 100644 index 0000000..85f4f01 --- /dev/null +++ b/clis/allrecipes/recipe.js @@ -0,0 +1,111 @@ +import { ArgumentError, CommandExecutionError, EmptyResultError } from '@agentrhq/webcmd/errors'; +import { cli, Strategy } from '@agentrhq/webcmd/registry'; + +const HOST = /^https?:\/\/(?:www\.)?allrecipes\.com\//i; + +function requireUrl(value) { + const url = String(value ?? '').trim(); + if (!url) throw new ArgumentError('allrecipes recipe URL is required'); + if (!HOST.test(url)) throw new ArgumentError(`URL must be on allrecipes.com, got ${url}`); + return url; +} + +function asText(value) { + if (value == null) return ''; + if (Array.isArray(value)) return value.map(asText).filter(Boolean).join(', '); + if (typeof value === 'object') return asText(value.name || value.text); + return String(value).trim(); +} + +function instructionText(value) { + if (!Array.isArray(value)) return asText(value); + return value + .map((step) => asText(step?.text || step?.name || step)) + .filter(Boolean) + .map((step, i) => `${i + 1}. ${step}`) + .join('\n'); +} + +export function mapRecipe(recipe, fallbackUrl) { + const rating = recipe?.aggregateRating || {}; + const key = String(fallbackUrl).replace(/^https?:\/\/(?:www\.)?allrecipes\.com\//i, '').replace(/\/$/, ''); + return { + title: asText(recipe?.name), + author: asText(recipe?.author), + rating: rating.ratingValue == null ? null : Number(rating.ratingValue), + ratingCount: rating.ratingCount ?? rating.reviewCount ?? null, + prepTime: asText(recipe?.prepTime), + cookTime: asText(recipe?.cookTime), + totalTime: asText(recipe?.totalTime), + servings: asText(recipe?.recipeYield), + calories: asText(recipe?.nutrition?.calories), + ingredients: Array.isArray(recipe?.recipeIngredient) ? recipe.recipeIngredient.join('\n') : '', + instructions: instructionText(recipe?.recipeInstructions), + url: asText(recipe?.url) || fallbackUrl, + key, + }; +} + +export function buildRecipeScript() { + return `(() => { + const bodyText = document.body?.innerText || ''; + if (/Just a moment|Enable JavaScript and cookies to continue/i.test(document.title + '\\n' + bodyText)) { + return { ok: false, challenge: true }; + } + const isRecipe = (item) => { + const type = item && item['@type']; + return type === 'Recipe' || (Array.isArray(type) && type.includes('Recipe')); + }; + const visit = (item) => { + if (!item || typeof item !== 'object') return null; + if (isRecipe(item)) return item; + if (Array.isArray(item)) { + for (const child of item) { + const found = visit(child); + if (found) return found; + } + } + const graph = item['@graph']; + return Array.isArray(graph) ? visit(graph) : null; + }; + for (const script of document.querySelectorAll('script[type="application/ld+json"]')) { + try { + const found = visit(JSON.parse(script.textContent || 'null')); + if (found) return { ok: true, payload: found, href: location.href }; + } catch {} + } + const title = document.querySelector('h1')?.textContent?.trim() || ''; + return title ? { ok: true, payload: { name: title, url: location.href }, href: location.href } : { ok: false }; +})()`; +} + +cli({ + site: 'allrecipes', + name: 'recipe', + access: 'read', + description: 'Extract an Allrecipes recipe from a recipe URL', + domain: 'www.allrecipes.com', + strategy: Strategy.UI, + browser: true, + args: [ + { name: 'url', positional: true, required: true, help: 'Allrecipes recipe URL' }, + ], + columns: ['title', 'author', 'rating', 'ratingCount', 'prepTime', 'cookTime', 'totalTime', 'servings', 'calories', 'ingredients', 'instructions', 'url', 'key'], + func: async (page, args) => { + const url = requireUrl(args.url); + await page.goto(url); + await page.wait(2); + const result = await page.evaluate(buildRecipeScript()); + if (result?.challenge) { + throw new CommandExecutionError('Allrecipes showed a browser verification challenge', 'Retry with a logged-in/persistent browser session after the page finishes loading.'); + } + if (!result?.ok) { + throw new CommandExecutionError('allrecipes recipe extraction failed', 'Check that the URL points to an Allrecipes recipe page.'); + } + const row = mapRecipe(result.payload, result.href || url); + if (!row.title) { + throw new EmptyResultError('allrecipes recipe', 'The page loaded but no recipe title was found.'); + } + return [row]; + }, +}); From 87e3f24c274bbcd979473b36d2d61189a5c4eee1 Mon Sep 17 00:00:00 2001 From: Ankit Ranjan Date: Fri, 3 Jul 2026 14:43:42 +0530 Subject: [PATCH 2/2] Fix Allrecipes recipe output shape --- cli-manifest.json | 3 +-- clis/allrecipes/allrecipes.test.js | 11 +++++------ clis/allrecipes/recipe.js | 22 ++++++++++++++++------ 3 files changed, 22 insertions(+), 14 deletions(-) diff --git a/cli-manifest.json b/cli-manifest.json index db93ac4..beceb98 100644 --- a/cli-manifest.json +++ b/cli-manifest.json @@ -1386,8 +1386,7 @@ "calories", "ingredients", "instructions", - "url", - "key" + "url" ], "type": "js", "modulePath": "allrecipes/recipe.js", diff --git a/clis/allrecipes/allrecipes.test.js b/clis/allrecipes/allrecipes.test.js index c6dc8c7..f748cc0 100644 --- a/clis/allrecipes/allrecipes.test.js +++ b/clis/allrecipes/allrecipes.test.js @@ -21,7 +21,7 @@ describe('allrecipes recipe', () => { payload: { name: "World's Best Lasagna", author: { name: 'John Chandler' }, - aggregateRating: { ratingValue: '4.8', ratingCount: 20500 }, + aggregateRating: { ratingValue: '4.8', ratingCount: '20500' }, prepTime: 'PT30M', cookTime: 'PT2H30M', totalTime: 'PT3H15M', @@ -47,7 +47,6 @@ describe('allrecipes recipe', () => { ingredients: '1 pound sweet Italian sausage\n12 lasagna noodles', instructions: '1. Cook sausage.\n2. Layer noodles.', url: 'https://www.allrecipes.com/recipe/23600/worlds-best-lasagna/', - key: 'recipe/23600/worlds-best-lasagna', }]); }); @@ -76,13 +75,13 @@ describe('allrecipes helpers', () => { it('maps array authors and raw instruction strings', () => { expect(mapRecipe({ - name: 'Pie', + name: 'A & B Pie', author: [{ name: 'A' }, { name: 'B' }], - recipeInstructions: ['Bake it.'], + recipeInstructions: ['Bake it 'til done.'], }, 'https://www.allrecipes.com/recipe/1/pie/')).toMatchObject({ - title: 'Pie', + title: 'A & B Pie', author: 'A, B', - instructions: '1. Bake it.', + instructions: "1. Bake it 'til done.", }); }); }); diff --git a/clis/allrecipes/recipe.js b/clis/allrecipes/recipe.js index 85f4f01..c8f1d59 100644 --- a/clis/allrecipes/recipe.js +++ b/clis/allrecipes/recipe.js @@ -14,7 +14,18 @@ function asText(value) { if (value == null) return ''; if (Array.isArray(value)) return value.map(asText).filter(Boolean).join(', '); if (typeof value === 'object') return asText(value.name || value.text); - return String(value).trim(); + return decodeHtml(String(value).trim()); +} + +function decodeHtml(value) { + return String(value ?? '') + .replace(/&#(\d+);/g, (_, n) => String.fromCodePoint(Number(n))) + .replace(/&#x([0-9a-f]+);/gi, (_, n) => String.fromCodePoint(Number.parseInt(n, 16))) + .replace(/"/g, '"') + .replace(/'|'/g, "'") + .replace(/</g, '<') + .replace(/>/g, '>') + .replace(/&/g, '&'); } function instructionText(value) { @@ -28,21 +39,20 @@ function instructionText(value) { export function mapRecipe(recipe, fallbackUrl) { const rating = recipe?.aggregateRating || {}; - const key = String(fallbackUrl).replace(/^https?:\/\/(?:www\.)?allrecipes\.com\//i, '').replace(/\/$/, ''); + const ratingCount = rating.ratingCount ?? rating.reviewCount; return { title: asText(recipe?.name), author: asText(recipe?.author), rating: rating.ratingValue == null ? null : Number(rating.ratingValue), - ratingCount: rating.ratingCount ?? rating.reviewCount ?? null, + ratingCount: ratingCount == null ? null : Number(ratingCount), prepTime: asText(recipe?.prepTime), cookTime: asText(recipe?.cookTime), totalTime: asText(recipe?.totalTime), servings: asText(recipe?.recipeYield), calories: asText(recipe?.nutrition?.calories), - ingredients: Array.isArray(recipe?.recipeIngredient) ? recipe.recipeIngredient.join('\n') : '', + ingredients: Array.isArray(recipe?.recipeIngredient) ? recipe.recipeIngredient.map(asText).join('\n') : '', instructions: instructionText(recipe?.recipeInstructions), url: asText(recipe?.url) || fallbackUrl, - key, }; } @@ -90,7 +100,7 @@ cli({ args: [ { name: 'url', positional: true, required: true, help: 'Allrecipes recipe URL' }, ], - columns: ['title', 'author', 'rating', 'ratingCount', 'prepTime', 'cookTime', 'totalTime', 'servings', 'calories', 'ingredients', 'instructions', 'url', 'key'], + columns: ['title', 'author', 'rating', 'ratingCount', 'prepTime', 'cookTime', 'totalTime', 'servings', 'calories', 'ingredients', 'instructions', 'url'], func: async (page, args) => { const url = requireUrl(args.url); await page.goto(url);