Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion packages/cli/src/capture/assetCataloger.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ export interface CatalogedAsset {
sectionClasses?: string;
/** Whether the image is above the fold (visible without scrolling) */
aboveFold?: boolean;
/** Element sits inside <header>, <nav>, or [role="banner"] — logo signal */
inBanner?: boolean;
/** Element sits inside <a> with site-root href ("/", "#", origin-only) — brand-home link */
inHomeLink?: boolean;
/** alt/aria-label/title contains the brand segment of document.title */
matchesTitleBrand?: boolean;
}

/**
Expand Down Expand Up @@ -62,6 +68,19 @@ export async function catalogAssets(page: Page): Promise<CatalogedAsset[]> {
var rect = el.getBoundingClientRect();
ctx.aboveFold = rect.top < window.innerHeight;
} catch(e) {}
// Structural logo-candidate signals: class-substring alone caught 0/32 SVGs on heygen.com.
ctx.inBanner = el.closest('header, nav, [role="banner"]') !== null;
var homeAnchor = el.closest('a[href]');
if (homeAnchor) {
var aHref = homeAnchor.getAttribute('href') || '';
ctx.inHomeLink = aHref === '/' || aHref === '#' || aHref === './' ||
/^https?:\\/\\/[^/]+\\/?$/.test(aHref);
}
var titleBrand = (document.title || '').split(/[-|—]/)[0].trim();
if (desc && titleBrand.length > 1 && titleBrand.length < 30 &&
desc.toLowerCase().indexOf(titleBrand.toLowerCase()) !== -1) {
ctx.matchesTitleBrand = true;
}
return ctx;
}

Expand Down Expand Up @@ -92,12 +111,15 @@ export async function catalogAssets(page: Page): Promise<CatalogedAsset[]> {
if (notes && !entry.notes) {
entry.notes = notes;
}
// Merge rich context (first one wins)
// Text fields: first-occurrence wins. Boolean signals: any positive sample wins.
if (richCtx) {
if (richCtx.description && !entry.description) entry.description = richCtx.description;
if (richCtx.nearestHeading && !entry.nearestHeading) entry.nearestHeading = richCtx.nearestHeading;
if (richCtx.sectionClasses && !entry.sectionClasses) entry.sectionClasses = richCtx.sectionClasses;
if (richCtx.aboveFold !== undefined && entry.aboveFold === undefined) entry.aboveFold = richCtx.aboveFold;
if (richCtx.inBanner) entry.inBanner = true;
if (richCtx.inHomeLink) entry.inHomeLink = true;
if (richCtx.matchesTitleBrand) entry.matchesTitleBrand = true;
}
}

Expand Down Expand Up @@ -324,6 +346,9 @@ function deduplicateSrcsetVariants(assets: CatalogedAsset[]): CatalogedAsset[] {
if (a.notes && !existing.notes) {
existing.notes = a.notes;
}
if (a.inBanner) existing.inBanner = true;
if (a.inHomeLink) existing.inHomeLink = true;
if (a.matchesTitleBrand) existing.matchesTitleBrand = true;
// Keep the URL with highest w= value (largest image)
const existingW = getWidthParam(existing.url);
const newW = getWidthParam(a.url);
Expand Down
33 changes: 27 additions & 6 deletions packages/cli/src/capture/assetDownloader.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,16 @@

import { writeFileSync, mkdirSync } from "node:fs";
import { join, extname } from "node:path";
import { createHash } from "node:crypto";
import type { DesignTokens, DownloadedAsset } from "./types.js";
import type { CatalogedAsset } from "./assetCataloger.js";

// Hash of the bytes — filename cannot drift from content.
function svgContentHashSlug(svgSource: string | Buffer, isLogo: boolean): string {
const hash = createHash("sha1").update(svgSource).digest("hex").slice(0, 8);
return isLogo ? `logo-${hash}` : `svg-${hash}`;
}

export async function downloadAssets(
tokens: DesignTokens,
outputDir: string,
Expand All @@ -22,15 +29,13 @@
const assets: DownloadedAsset[] = [];
const downloadedUrls = new Set<string>();

// 1. ALL inline SVGs — save as files (logos get priority naming)
// Inline SVGs → svg-<hash>.svg / logo-<hash>.svg. Label-derived names mis-assigned brands.
mkdirSync(join(outputDir, "assets", "svgs"), { recursive: true });
const usedSvgNames = new Set<string>();
for (let i = 0; i < tokens.svgs.length && i < 30; i++) {
const svg = tokens.svgs[i]!;
if (!svg.outerHTML || svg.outerHTML.length < 50) continue;
const label = svg.label?.replace(/[^a-zA-Z0-9-_ ]/g, "").trim();
let slug = label ? slugify(label) : svg.isLogo ? `logo-${i}` : `icon-${i}`;
// Deduplicate — two SVGs with same aria-label get suffixed
const slug = svgContentHashSlug(svg.outerHTML, !!svg.isLogo);
let finalSlug = slug;
let suffix = 2;
while (usedSvgNames.has(finalSlug)) {
Expand Down Expand Up @@ -135,8 +140,24 @@
if (result.status !== "fulfilled" || !result.value) continue;
const { url, isPoster, parsedUrl, ext, buffer, catalog } = result.value;
try {
// Generate human-readable name from catalog context
const slug = deriveAssetName(parsedUrl, catalog, isPoster, imgIdx, usedNames);
// SVGs → content-hash names (label-derived was mis-assigning brands). Rasters keep catalog slugs.
let slug: string;
if (ext === ".svg") {
const c = catalog;
const brandRe = /logo|brand|wordmark/i;
const isLogo = !!(
c?.inBanner ||
c?.inHomeLink ||
c?.matchesTitleBrand ||
c?.contexts?.some((s) => brandRe.test(s)) ||
(c?.description && brandRe.test(c.description)) ||
(c?.nearestHeading && brandRe.test(c.nearestHeading)) ||
(c?.sectionClasses && brandRe.test(c.sectionClasses))
);
slug = svgContentHashSlug(buffer, isLogo);
} else {
slug = deriveAssetName(parsedUrl, catalog, isPoster, imgIdx, usedNames);
}
const name = `${slug}${ext}`;
usedNames.add(slug);
const localPath = `assets/${name}`;
Expand Down Expand Up @@ -328,7 +349,7 @@
let current = url;
for (let hop = 0; hop <= MAX_FETCH_REDIRECTS; hop++) {
if (isPrivateUrl(current)) return null;
const res = await fetch(current, { ...init, redirect: "manual" });

Check warning

Code scanning / CodeQL

File data in outbound network request Medium

Outbound network request depends on
file data
.
if (res.status >= 300 && res.status < 400) {
const loc = res.headers.get("location");
if (!loc) return res;
Expand Down
60 changes: 43 additions & 17 deletions packages/cli/src/capture/contentExtractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import type { Page } from "puppeteer-core";
import { existsSync, readdirSync, statSync, readFileSync } from "node:fs";
import { join } from "node:path";
import sharp from "sharp";
import type { CatalogedAsset } from "./assetCataloger.js";
import type { DesignTokens } from "./types.js";

Expand Down Expand Up @@ -232,7 +233,7 @@ export async function captionImagesWithGemini(
}
progress("design", `${Object.keys(geminiCaptions).length} images captioned with Gemini`);

// Caption SVGs by sending source code as text (vision API rejects image/svg+xml).
// Rasterize SVGs to PNG before captioning — Vision hallucinates wordmarks when reading SVG path text.
const svgFiles: Array<{ file: string; relPath: string }> = [];
const assetsDir = join(outputDir, "assets");
for (const f of readdirSync(assetsDir)) {
Expand All @@ -246,30 +247,56 @@ export async function captionImagesWithGemini(
}

if (svgFiles.length > 0) {
progress("design", `Captioning ${svgFiles.length} SVGs via code analysis...`);
progress("design", `Rasterizing + captioning ${svgFiles.length} SVGs via vision API...`);
const SVG_BATCH = 20;
const MAX_SVG_CHARS = 10_000;
const SVG_RENDER_SIZE = 256; // px — enough resolution for Gemini to read wordmarks, small enough to keep payload sub-MB
for (let i = 0; i < svgFiles.length; i += SVG_BATCH) {
const batch = svgFiles.slice(i, i + SVG_BATCH);
const results = await Promise.allSettled(
batch.map(async ({ relPath }) => {
const filePath = join(assetsDir, relPath);
let svgText = readFileSync(filePath, "utf-8");
if (svgText.length > MAX_SVG_CHARS) {
svgText = svgText.slice(0, MAX_SVG_CHARS) + "\n<!-- truncated -->";
let pngBase64: string;
try {
// Flatten against a contrasting background — white-on-white SVGs render invisible to Vision.
const svgSource = readFileSync(filePath, "utf-8");
const lightFillHits = (
svgSource.match(/fill\s*=\s*["'](#fff(fff)?|white|#f[ef][ef])["']/gi) || []
).length;
const darkFillHits = (
svgSource.match(/fill\s*=\s*["'](#000(000)?|black|#[0-3]{6}|#[0-3]{3})["']/gi) || []
).length;
const bg =
lightFillHits > darkFillHits
? { r: 32, g: 32, b: 32 } // dark slate behind light glyphs
: { r: 255, g: 255, b: 255 }; // white behind dark glyphs (default)
const pngBuffer = await sharp(filePath)
.resize({
width: SVG_RENDER_SIZE,
height: SVG_RENDER_SIZE,
fit: "inside",
withoutEnlargement: false,
})
.flatten({ background: bg })
.png()
.toBuffer();
pngBase64 = pngBuffer.toString("base64");
} catch {
// exotic SVG features may break sharp; skip caption rather than block
return { file: relPath, caption: "" };
}
const response = await ai.models.generateContent({
model,
contents: [
{
role: "user",
parts: [
{ inlineData: { mimeType: "image/png", data: pngBase64 } },
{
text:
"This SVG code is from a website. Describe what it renders in ONE short sentence " +
"for a video storyboard. Focus on: what shape/icon/illustration it is, its colors. " +
"Be factual.\n\n" +
svgText,
"Describe this SVG asset rendered from a website in ONE short sentence for a video storyboard. " +
"Focus on: what shape/icon/illustration/wordmark it is, its colors, any text it contains. " +
"If you see a wordmark, READ THE LETTERS LITERALLY — do not guess a brand from context. " +
"Be factual.",
},
],
},
Expand Down Expand Up @@ -358,11 +385,6 @@ export function generateAssetDescriptions(
const svgsPath = join(assetsPath, "svgs");
for (const file of readdirSync(svgsPath)) {
if (!file.endsWith(".svg")) continue;
const geminiCaption = geminiCaptions[`svgs/${file}`];
if (geminiCaption) {
svgLines.push(`svgs/${file} — ${geminiCaption}`);
continue;
}
const svgMatch = tokens.svgs.find(
(s) =>
s.label &&
Expand All @@ -373,9 +395,13 @@ export function generateAssetDescriptions(
.slice(0, 15),
),
);
const geminiCaption = geminiCaptions[`svgs/${file}`];
if (geminiCaption) {
svgLines.push(`svgs/${file} — ${geminiCaption}`);
continue;
}
const label = svgMatch?.label || file.replace(".svg", "").replace(/-/g, " ");
const isLogo = svgMatch?.isLogo || file.includes("logo");
svgLines.push(`svgs/${file} — ${isLogo ? "logo: " : "icon: "}${label}`);
svgLines.push(`svgs/${file} — ${label}`);
}
} catch {
/* no svgs dir */
Expand Down
13 changes: 9 additions & 4 deletions packages/cli/src/capture/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -579,14 +579,19 @@ export async function captureWebsite(
const lines = generateAssetDescriptions(outputDir, tokens, catalogedAssets, geminiCaptions);

if (lines.length > 0) {
const hasGeminiKey = !!(process.env.GEMINI_API_KEY || process.env.GOOGLE_API_KEY);
const header = hasGeminiKey
? "# Asset Descriptions\n\nOne line per file. Read this instead of opening every image individually.\n\nTo find a specific brand or icon, **grep this file for the brand name in the description text** (e.g. `grep -i 'autodesk' asset-descriptions.md`). The Gemini Vision captions identify what's actually in each file — that's the agent's selector.\n\nThe `logo-<hash>.svg` filename prefix is a cheap structural hint (DOM said this SVG was inside a `<header>`, home-link `<a>`, or had an aria-label matching the page brand). It is NOT a content claim — many `logo-*` files are nav icons or decorative shapes. Trust the captions, not the filename prefix.\n\n"
: "# Asset Descriptions\n\n⚠️ GEMINI_API_KEY not set — descriptions below are catalog-derived (alt text, headings, section context, filename) instead of Vision-generated. To get richer Vision descriptions on the next capture, set GEMINI_API_KEY (or GOOGLE_API_KEY) and re-run.\n\nThe `logo-<hash>.svg` filename prefix is a structural hint (DOM said this SVG was inside a `<header>`, home-link `<a>`, or had an aria-label matching the page brand). To pick the actual brand logo without Vision, open the `logo-*` candidates in a previewer or rasterize them with `sharp` before referencing — composing a fake logo ships off-brand in the final video.\n\n";
writeFileSync(
join(outputDir, "extracted", "asset-descriptions.md"),
"# Asset Descriptions\n\nOne line per file. Read this instead of opening every image individually.\n\n" +
lines.map((l) => "- " + l).join("\n") +
"\n",
header + lines.map((l) => "- " + l).join("\n") + "\n",
"utf-8",
);
progress("design", `${lines.length} asset descriptions written`);
progress(
"design",
`${lines.length} asset descriptions written${hasGeminiKey ? "" : " (no Gemini key — catalog-fallback mode)"}`,
);
}
} catch {
/* non-critical */
Expand Down
30 changes: 29 additions & 1 deletion packages/cli/src/capture/tokenExtractor.ts
Original file line number Diff line number Diff line change
Expand Up @@ -353,14 +353,42 @@ const EXTRACT_SCRIPT = `(() => {
// Keep SVGs that have a label OR are at least 16px wide OR are inside a logo/brand context
var inLogoContext = svg.closest('[class*="logo"], [class*="brand"], [class*="partner"], [class*="customer"], [class*="marquee"]') !== null;
if (!label && !inLogoContext && (!w || parseInt(w) < 16)) return null;
// Broadened isLogo: class-substring alone caught 0/32 on heygen.com.
var isLogo = (label && label.toLowerCase().indexOf("logo") !== -1) ||
svg.closest('[class*="logo"], [class*="brand"], [class*="home"], [class*="marquee"], [class*="partner"], [class*="customer"]') !== null;
if (!isLogo) {
var bannerEl = svg.closest('header, nav, [role="banner"]');
if (bannerEl) {
var firstSvg = bannerEl.querySelector('svg');
if (firstSvg === svg) isLogo = true;
}
}
if (!isLogo) {
var anchor = svg.closest('a[href]');
if (anchor) {
var href = anchor.getAttribute('href') || '';
if (href === '/' || href === '#' || href === './' ||
/^https?:\\/\\/[^/]+\\/?$/.test(href)) {
isLogo = true;
}
}
}
if (!isLogo) {
var ariaLabel = svg.getAttribute('aria-label') || svg.getAttribute('title') || '';
var titleBrand = (document.title || '').split(/[-|—]/)[0].trim();
if (titleBrand.length > 1 && titleBrand.length < 30 &&
ariaLabel.toLowerCase().indexOf(titleBrand.toLowerCase()) !== -1) {
isLogo = true;
}
}
var rect = svg.getBoundingClientRect();
return {
label: label || undefined,
viewBox: svg.getAttribute("viewBox") || undefined,
width: Math.round(rect.width),
height: Math.round(rect.height),
outerHTML: svg.outerHTML.slice(0, 10000),
isLogo: (label && label.toLowerCase().indexOf("logo") !== -1) || svg.closest('[class*="logo"], [class*="brand"], [class*="home"], [class*="marquee"], [class*="partner"], [class*="customer"]') !== null
isLogo: isLogo
};
}).filter(Boolean).slice(0, 50);

Expand Down
1 change: 1 addition & 0 deletions packages/cli/src/cli.ts
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ const subCommands = {
validate: () => import("./commands/validate.js").then((m) => m.default),
snapshot: () => import("./commands/snapshot.js").then((m) => m.default),
capture: () => import("./commands/capture.js").then((m) => m.default),
"capture-video": () => import("./commands/capture-video.js").then((m) => m.default),
lambda: () => import("./commands/lambda.js").then((m) => m.default),
cloudrun: () => import("./commands/cloudrun.js").then((m) => m.default),
cloud: () => import("./commands/cloud.js").then((m) => m.default),
Expand Down
Loading
Loading