From a83b8da124a775a2d7729e7508277ffe3e7f725a Mon Sep 17 00:00:00 2001 From: zack-dev-cm Date: Wed, 20 May 2026 11:43:16 +0400 Subject: [PATCH] fix site crawler checks --- package.json | 1 + scripts/website-smoke.mjs | 109 ++++++++++++++++++++++++++++++++++++++ src/exec.test.ts | 4 +- website/README.md | 3 ++ website/_headers | 7 +++ website/index.html | 2 +- website/robots.txt | 4 ++ website/sitemap.xml | 9 ++++ 8 files changed, 136 insertions(+), 3 deletions(-) create mode 100644 scripts/website-smoke.mjs create mode 100644 website/_headers create mode 100644 website/robots.txt create mode 100644 website/sitemap.xml diff --git a/package.json b/package.json index 4ce5c14..b8d0afc 100644 --- a/package.json +++ b/package.json @@ -19,6 +19,7 @@ "lint": "oxlint . --config oxlint.json", "format": "oxfmt --write .", "format:check": "oxfmt --check .", + "website:smoke": "node scripts/website-smoke.mjs", "test": "vitest run", "pack:smoke": "node scripts/package-smoke.mjs" }, diff --git a/scripts/website-smoke.mjs b/scripts/website-smoke.mjs new file mode 100644 index 0000000..be80ad3 --- /dev/null +++ b/scripts/website-smoke.mjs @@ -0,0 +1,109 @@ +import { readFile, stat } from "node:fs/promises"; +import { join } from "node:path"; + +const root = process.cwd(); +const website = join(root, "website"); +const failures = []; + +function fail(message) { + failures.push(message); +} + +async function mustRead(relativePath) { + try { + return await readFile(join(root, relativePath), "utf8"); + } catch { + fail(`missing ${relativePath}`); + return ""; + } +} + +function stripTags(value) { + return value + .replace(//giu, " ") + .replace(/<[^>]+>/gu, "") + .replace(/\s+/gu, " ") + .trim(); +} + +function extractFirst(html, pattern, label) { + const match = html.match(pattern); + if (!match) { + fail(`missing ${label}`); + return ""; + } + return match[1] || ""; +} + +const html = await mustRead("website/index.html"); +const robots = await mustRead("website/robots.txt"); +const sitemap = await mustRead("website/sitemap.xml"); +const headers = await mustRead("website/_headers"); + +const title = stripTags(extractFirst(html, /([\s\S]*?)<\/title>/iu, "title")); +if (title !== "Clawpatch — Automated Code Review") { + fail(`unexpected title: ${title}`); +} + +const description = html.match(/<meta\s+name="description"\s+content="([^"]+)"/iu)?.[1] || ""; +if (!description.includes("Automated code review that lands fixes")) { + fail("meta description does not contain the product promise"); +} + +const h1 = stripTags(extractFirst(html, /<h1>([\s\S]*?)<\/h1>/iu, "h1")); +if (h1 !== "Code review with explicit fixes") { + fail(`unexpected h1 text: ${h1}`); +} + +const ids = new Set([...html.matchAll(/\sid="([^"]+)"/giu)].map((match) => match[1])); +const anchorLinks = [...html.matchAll(/href="#([^"]+)"/giu)].map((match) => match[1]); +for (const id of anchorLinks) { + if (!ids.has(id)) fail(`missing anchor target: #${id}`); +} + +if (!robots.includes("Sitemap: https://clawpatch.ai/sitemap.xml")) { + fail("robots.txt missing sitemap reference"); +} + +if (!sitemap.includes("<loc>https://clawpatch.ai/</loc>")) { + fail("sitemap.xml missing canonical homepage loc"); +} + +for (const expectedHeader of [ + "Strict-Transport-Security", + "X-Content-Type-Options", + "X-Frame-Options", + "Referrer-Policy", + "Permissions-Policy", + "Content-Security-Policy", +]) { + if (!headers.includes(expectedHeader)) { + fail(`_headers missing ${expectedHeader}`); + } +} + +const socialCard = await readFile(join(website, "social-card.png")); +if (socialCard.toString("ascii", 1, 4) !== "PNG") { + fail("social-card.png is not a PNG"); +} else { + const width = socialCard.readUInt32BE(16); + const height = socialCard.readUInt32BE(20); + if (width !== 1200 || height !== 630) { + fail(`social-card.png dimensions are ${width}x${height}, expected 1200x630`); + } +} + +for (const file of ["website/favicon.svg", "website/CNAME", "website/.nojekyll"]) { + try { + await stat(join(root, file)); + } catch { + fail(`missing ${file}`); + } +} + +if (failures.length) { + console.error(failures.join("\n")); + process.exit(1); +} + +console.log("Website smoke checks passed."); diff --git a/src/exec.test.ts b/src/exec.test.ts index f8270fc..483466f 100644 --- a/src/exec.test.ts +++ b/src/exec.test.ts @@ -92,7 +92,7 @@ describe("runCommandArgs", () => { "import { writeFileSync } from 'node:fs';", "process.on('SIGTERM', () => {});", "process.send?.('ready');", - `setTimeout(() => writeFileSync(${JSON.stringify(marker)}, 'alive'), 2500);`, + `setTimeout(() => writeFileSync(${JSON.stringify(marker)}, 'alive'), 4500);`, "setInterval(() => {}, 1000);", ].join("\n"), "utf8", @@ -111,7 +111,7 @@ describe("runCommandArgs", () => { ); const result = await runCommandArgs(process.execPath, [parentScript], dir, undefined, { - timeoutMs: 1000, + timeoutMs: 3000, }); await new Promise((resolve) => setTimeout(resolve, 1200)); diff --git a/website/README.md b/website/README.md index e4ad31d..78e4c07 100644 --- a/website/README.md +++ b/website/README.md @@ -8,6 +8,9 @@ Files: - `favicon.svg`: browser icon - `social-card.svg`: link preview card - `social-card.png`: raster link preview card for Open Graph/Twitter +- `robots.txt`: crawler policy with sitemap reference +- `sitemap.xml`: canonical single-page sitemap +- `_headers`: static security headers for hosts that support header files Preview: diff --git a/website/_headers b/website/_headers new file mode 100644 index 0000000..82c0d66 --- /dev/null +++ b/website/_headers @@ -0,0 +1,7 @@ +/* + Strict-Transport-Security: max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: nosniff + X-Frame-Options: DENY + Referrer-Policy: strict-origin-when-cross-origin + Permissions-Policy: camera=(), microphone=(), geolocation=(), payment=(), usb=() + Content-Security-Policy: default-src 'self'; script-src 'self' 'unsafe-inline'; style-src 'self' 'unsafe-inline' https://fonts.googleapis.com; font-src https://fonts.gstatic.com; img-src 'self' data:; connect-src 'self'; base-uri 'none'; form-action 'none'; frame-ancestors 'none'; object-src 'none' diff --git a/website/index.html b/website/index.html index 49d2f10..d8bbf0e 100644 --- a/website/index.html +++ b/website/index.html @@ -1057,7 +1057,7 @@ <h2>Reference</h2> <main> <header class="home-hero"> <p class="eyebrow">Automated Code Review · Explicit Fixes</p> - <h1>Code review with<br />explicit fixes</h1> + <h1>Code review with <br />explicit fixes</h1> <p class="lede"> Clawpatch maps codebases into semantic feature slices, reviews them for bugs and quality issues, and records explicit fix attempts with validation. diff --git a/website/robots.txt b/website/robots.txt new file mode 100644 index 0000000..02eb7f6 --- /dev/null +++ b/website/robots.txt @@ -0,0 +1,4 @@ +User-agent: * +Allow: / + +Sitemap: https://clawpatch.ai/sitemap.xml diff --git a/website/sitemap.xml b/website/sitemap.xml new file mode 100644 index 0000000..685b1ee --- /dev/null +++ b/website/sitemap.xml @@ -0,0 +1,9 @@ +<?xml version="1.0" encoding="UTF-8"?> +<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> + <url> + <loc>https://clawpatch.ai/</loc> + <lastmod>2026-05-20</lastmod> + <changefreq>weekly</changefreq> + <priority>1.0</priority> + </url> +</urlset>