From a28550c01ae8e5c6571b98752aa8cd0a206e41a3 Mon Sep 17 00:00:00 2001 From: Rachael Rose Renk <91027132+rachaelrenk@users.noreply.github.com> Date: Mon, 4 May 2026 10:13:30 -1000 Subject: [PATCH] chore: port SEO audit skill from gitbook to Astro Starlight - Update seo_audit.py URL-to-source-file mapping for src/content/docs/ flat structure (.mdx, index.mdx instead of .md, README.md) - Update sitemap URL to /sitemap-index.xml and add 308 redirect handling - Rewrite SKILL.md: replace all SUMMARY.md refs with src/sidebar.ts, update file paths in title exceptions, document frontmatter title + sidebar.label pattern for fixing duplicate titles - Rename references/gitbook-seo.md to starlight-seo.md and rewrite with full title resolution precedence documentation Co-Authored-By: Oz --- .warp/skills/docs-seo-audit/SKILL.md | 72 ++++++++++---- .../docs-seo-audit/references/gitbook-seo.md | 11 --- .../references/starlight-seo.md | 34 +++++++ .../docs-seo-audit/scripts/seo_audit.py | 94 +++++++++---------- 4 files changed, 135 insertions(+), 76 deletions(-) delete mode 100644 .warp/skills/docs-seo-audit/references/gitbook-seo.md create mode 100644 .warp/skills/docs-seo-audit/references/starlight-seo.md diff --git a/.warp/skills/docs-seo-audit/SKILL.md b/.warp/skills/docs-seo-audit/SKILL.md index cf45583..20edb90 100644 --- a/.warp/skills/docs-seo-audit/SKILL.md +++ b/.warp/skills/docs-seo-audit/SKILL.md @@ -22,7 +22,7 @@ python3 .warp/skills/docs-seo-audit/scripts/seo_audit.py \ --output /tmp/seo-report.json ``` -The script fetches all pages listed in `https://docs.warp.dev/sitemap.xml` (a sitemap index including sub-sitemaps), extracts SEO metadata from each page's HTML, and checks for issues. +The script fetches all pages listed in `https://docs.warp.dev/sitemap-index.xml` (a sitemap index including sub-sitemaps), extracts SEO metadata from each page's HTML, and checks for issues. ### Options @@ -109,38 +109,77 @@ After reporting, ask the user which issues they want to fix before making change ## Fixing issues Before making any changes, read these references: -1. `references/gitbook-seo.md` in this skill directory — explains the non-obvious way Astro Starlight generates title tags from astro.config.mjs (sidebar config) link text (not the H1 heading). +1. `references/starlight-seo.md` in this skill directory — explains the non-obvious way Astro Starlight generates title tags from sidebar config label text (not the H1 heading). 2. `AGENTS.md` at the docs repo root — the single source of truth for documentation style, voice, terminology, and formatting. All titles and descriptions you write must follow these conventions (terminology, sentence case, active voice, frontmatter description format). ### Key principles -1. **Title tags come from astro.config.mjs (sidebar config) link text**, not the H1 heading. To fix a title, change the link text in the relevant space's `astro.config.mjs (sidebar config)`. +1. **Title tags come from the frontmatter `title` field if present, otherwise from sidebar config labels in `src/sidebar.ts`** — not the H1 heading. To fix a title without changing the sidebar label, add or update the `title` field in the page's frontmatter (and optionally add `sidebar.label` to preserve the short nav label). To fix a title by changing the nav label, change the `label` property in `src/sidebar.ts`. For pages listed as bare slugs (e.g., `'terminal/windows/tabs'`), add an explicit `{ slug, label }` object to override the label. 2. **Meta descriptions come from frontmatter**. To fix a description, edit the `description:` field in the page's YAML frontmatter. 3. **OG and Twitter tags mirror title and description** automatically. No separate fix needed. -4. **Changing astro.config.mjs (sidebar config) link text has side effects**: it also changes the sidebar label, breadcrumbs, and prev/next pagination. URLs are NOT affected. +4. **Changing a sidebar config label has side effects**: it also changes the sidebar label, breadcrumbs, and prev/next pagination. URLs are NOT affected (URLs are based on the file path/slug). 5. **When changing a title, also update the H1** in the markdown file for consistency. ### Title exceptions Some page titles are intentionally short or specific and must **not** be changed, even if they trigger a `title_too_short` warning. Skip these pages and note them as intentional exceptions in your report: -- **`src/content/docs/changelog/README.md`** (`Changelog`) — "Changelog" is a clear, universally understood industry term. Branding prefixes like "Warp changelog" or "Release changelog" add no descriptive value and this title should remain as-is. -- **`src/content/docs/terminal/appearance/app-icons.md`** (`App icons`) — The article explicitly explains that *custom* app icons are not available to users. Renaming to "Custom app icons" directly contradicts the page content and must be avoided. -- **`src/content/docs/university/README.md`** (`Guides`) — "Guides" is the landing page for the Guides space. The title is clear and matches the space name; prefixing it (e.g., "Developer Guides") adds no value and creates a mismatch with the space title shown in breadcrumbs. +- **`src/content/docs/changelog/index.mdx`** (`Changelog`) — "Changelog" is a clear, universally understood industry term. Branding prefixes like "Warp changelog" or "Release changelog" add no descriptive value and this title should remain as-is. +- **`src/content/docs/terminal/appearance/app-icons.mdx`** (`Custom app icons`) — The article explicitly explains that *custom* app icons are not available to users. The sidebar label is intentional and must not be changed. +- **`src/content/docs/guides/index.mdx`** (`Guides`) — "Guides" is the landing page for the Guides section. The title is clear and matches the section name; prefixing it (e.g., "Developer Guides") adds no value and creates a mismatch with the section title shown in breadcrumbs. +- **`src/content/docs/terminal/windows/tabs.mdx`** (`Tabs`) — The sidebar section header ("Windows and Tabs") already provides terminal context; adding a "Terminal" prefix creates a name not used in the UI or anywhere else in the docs. The `title_too_short` warning is intentionally suppressed — the title is short by design. Do not rename to "Terminal tabs". +- **`src/content/docs/terminal/windows/vertical-tabs.mdx`** (`Vertical Tabs`) — Same rationale: the section header disambiguates the terminal context. The `title_too_short` warning is intentionally suppressed. Do not rename to "Terminal vertical tabs". +- **`src/content/docs/terminal/windows/split-panes.mdx`** (`Split panes`) — Same rationale: the section header disambiguates the terminal context. The `title_too_short` warning is intentionally suppressed. Do not rename to "Terminal split panes". +- **`src/content/docs/terminal/windows/tab-configs.mdx`** (`Tab Configs`) — Same rationale: the section header disambiguates the terminal context. Additionally, "Tab Configs" is a proper feature name and should not be prefixed. The `title_too_short` warning is intentionally suppressed. Do not rename to "Terminal Tab Configs". +- **`src/content/docs/terminal/sessions/index.mdx`** (`Sessions`) — The sidebar section header ("Sessions") already provides terminal context. The `title_too_short` warning is intentionally suppressed. Do not rename to "Terminal sessions". When the audit flags these pages for `title_too_short`, exclude them from your fix list and include a note in your report explaining they are intentional exceptions. If you believe a new title should be added to this exceptions list, flag it for human review before proceeding. +### Sidebar config labels vs. H1 headings + +Sidebar config labels (the `label` property in `src/sidebar.ts`) and H1 page headings are **intentionally different** in some cases. Do not change either to match the other unless you are fixing a genuine duplicate title collision. Specifically: + +- Do **not** add section-context prefixes (like "Terminal", "Warp", or "Agent") to short but accurate titles just because the title appears generic in isolation. Sidebar context already provides that disambiguation. +- Do **not** rename sidebar config labels for pages in the exceptions list above. +- Do **not** sync sidebar config label text to match H1 headings (or vice versa) as a standalone change — the two are allowed to differ. + ### Fixing duplicate titles This is the most impactful issue. Common causes: -- Multiple pages named `[Overview](...)` under different sections of the same space -- Generic names like `[Getting Started](...)` or `[FAQs](...)` repeated across sections +- Multiple pages with `label: 'Overview'` under different sections — these all produce the same `` tag +- Generic labels like `'Getting Started'` or `'FAQs'` repeated across sections + +There are two ways to fix duplicate titles, depending on whether the short sidebar label is intentional: + +#### Preferred: frontmatter `title` + `sidebar.label` (keep short nav labels) + +When a page intentionally uses a short sidebar label like "Overview" because the section header provides context, **do not rename the sidebar config label**. Instead, add a descriptive `title` in the page's frontmatter and use `sidebar.label` to preserve the short nav label: + +```yaml +--- +title: Capabilities overview +sidebar: + label: "Overview" +--- +``` + +Astro Starlight uses `title` for the `<title>` tag and `sidebar.label` for the sidebar. This gives each page a unique, SEO-friendly title while keeping the sidebar clean. + +Example: +- `agent-platform/capabilities/index.mdx`: `title: 'Capabilities overview'` + `sidebar.label: 'Overview'` +- `agent-platform/cloud-agents/integrations/index.mdx`: `title: 'Integrations overview'` + `sidebar.label: 'Overview'` + +When using this approach, also update the H1 in the markdown file to match the new `title`. + +#### Alternative: rename the sidebar config label + +If the short label is not intentional, rename the `label` in `src/sidebar.ts` to be unique and descriptive. Use sentence case and correct terminology per `AGENTS.md` (e.g., capitalize proper feature names like "Agent Mode", "Warp Drive", "Codebase Context" — but not generic terms like "overview", "quickstart", or "agents"). Example: +- Before: `{ slug: 'agent-platform/local-agents', label: 'Overview' }` + `{ slug: 'agent-platform/cloud-agents', label: 'Overview' }` +- After: `{ slug: 'agent-platform/local-agents', label: 'Local agents overview' }` + `{ slug: 'agent-platform/cloud-agents', label: 'Cloud agents overview' }` -Fix by making each astro.config.mjs (sidebar config) link text unique and descriptive. The link text should identify the specific topic, not just the page type. Use sentence case and correct terminology per `AGENTS.md` (e.g., capitalize product feature names like "Agent Mode", "Warp Drive", "Codebase Context"). Example: -- Before: `[Overview](local-agents/overview.mdx)` + `[Overview](cloud-agents/overview.md)` -- After: `[Local Agents Overview](local-agents/overview.mdx)` + `[Cloud Agents Overview](cloud-agents/overview.md)` +When changing a sidebar config label, also update the H1 in the markdown file for consistency. ### Fixing missing descriptions @@ -165,12 +204,13 @@ Write descriptions that: After making fixes, review every change before presenting to the user. Run through this checklist, and if anything fails, fix it and re-run the checklist from the top. Only present changes when everything passes. - **Does this still mean the same thing?** Titles and descriptions must accurately represent the page content. Read the actual page before writing or rewriting anything. Never invent features, capabilities, or details that aren't on the page. If unsure what the page covers, read it first. -- **Did I introduce a new duplicate?** Scan the full astro.config.mjs (sidebar config) for the space you edited. Verify every link text is unique. This is the most common mistake — fixing one duplicate by picking a name that collides with an existing entry. -- **Does the H1 match?** Every astro.config.mjs (sidebar config) title change needs a corresponding H1 update in the markdown file. Mismatches between sidebar label and page heading confuse readers. +- **Did I introduce a new duplicate?** Scan the full sidebar config in `src/sidebar.ts`. Verify every label is unique within the site. This is the most common mistake — fixing one duplicate by picking a name that collides with an existing entry. +- **Does the H1 match?** Every sidebar config label change needs a corresponding H1 update in the markdown file. Mismatches between sidebar label and page heading confuse readers. - **Is the terminology right?** Cross-check against `AGENTS.md` and how the feature is actually referred to in the existing docs. Don't rename things to terms that aren't used elsewhere in the docs. -- **Does this read naturally in context?** Consider how the title appears (a) as a sidebar label under its section header, and (b) as a search result: `{Title} | {Space} | Warp`. If it sounds awkward or uses internal jargon that users wouldn't recognize, rephrase. +- **Is the casing right?** All labels and H1 headings must use sentence case. Proper product feature names (e.g., "Agent Mode", "Codebase Context", "Admin Panel", "Remote Control", "Warp Drive") retain their capitalization, but generic terms ("overview", "quickstart", "agents", "notifications") are lowercase. Never use title case. +- **Does this read naturally in context?** Consider how the title appears (a) as a sidebar label under its section header, and (b) as a search result: `{Title} | {Topic} | Warp`. If it sounds awkward or uses internal jargon that users wouldn't recognize, rephrase. - **Are descriptions grounded in page content?** Don't write descriptions based on the title alone. Read the page, then summarize what's actually there. -- **Any other improvements nearby?** Look at adjacent entries in the astro.config.mjs (sidebar config). Are there other generic titles ("Overview", "Getting Started", "FAQ") that could become duplicates in the future? Flag them proactively. +- **Any other improvements nearby?** Look at adjacent entries in `src/sidebar.ts`. Are there other generic labels ("Overview", "Getting Started", "FAQ") that could become duplicates in the future? Flag them proactively. ### Delivering changes diff --git a/.warp/skills/docs-seo-audit/references/gitbook-seo.md b/.warp/skills/docs-seo-audit/references/gitbook-seo.md deleted file mode 100644 index 3978cce..0000000 --- a/.warp/skills/docs-seo-audit/references/gitbook-seo.md +++ /dev/null @@ -1,11 +0,0 @@ -# How Astro Starlight generates SEO title tags - -Astro Starlight constructs the HTML `<title>` tag as: `{Page Title} | {Space Title} | {Site Title}` - -**The page title comes from the astro.config.mjs (sidebar config) link text, not the H1 heading.** For example, `[Skills](capabilities/skills.md)` in astro.config.mjs (sidebar config) produces `<title>Skills | Agents | Warp`, regardless of what the H1 says in the markdown file. - -This is not documented by Astro Starlight. Their SEO docs only say "the HTML title is based on the page and space title" without specifying that "page title" means the astro.config.mjs (sidebar config) link text. - -Changing the astro.config.mjs (sidebar config) link text also changes the sidebar label, breadcrumbs, and prev/next pagination. It does NOT change the URL (URLs are based on the file path). - -Meta descriptions come from the `description` field in YAML frontmatter — this part is straightforward. diff --git a/.warp/skills/docs-seo-audit/references/starlight-seo.md b/.warp/skills/docs-seo-audit/references/starlight-seo.md new file mode 100644 index 0000000..1350e78 --- /dev/null +++ b/.warp/skills/docs-seo-audit/references/starlight-seo.md @@ -0,0 +1,34 @@ +# How Astro Starlight generates SEO title tags + +Astro Starlight constructs the HTML `` tag as: `{Page Title} | {Site Title}` (e.g., `Skills | Warp`). + +**The page title is resolved in this order of precedence:** +1. The `title` field in the page's YAML frontmatter (if present) +2. The `label` property in the sidebar config (`src/sidebar.ts`) +3. The slug-derived title (auto-generated from the file name) + +The H1 heading does **not** affect the `<title>` tag. + +For example, if `src/sidebar.ts` has `{ slug: 'agent-platform/capabilities', label: 'Overview' }` and the page has no frontmatter `title`, the rendered title is `<title>Overview | Warp`. But if the page has `title: Capabilities overview` in frontmatter, that takes precedence and produces `Capabilities overview | Warp`. + +## Decoupling sidebar label from page title + +To give a page a unique SEO title while keeping a short sidebar label, use both `title` and `sidebar.label` in frontmatter: + +```yaml +--- +title: Capabilities overview +sidebar: + label: "Overview" +--- +``` + +This produces `Capabilities overview | Warp` while displaying "Overview" in the sidebar. + +## Side effects of changing sidebar config labels + +Changing the `label` in `src/sidebar.ts` also changes the sidebar label, breadcrumbs, and prev/next pagination. It does NOT change the URL (URLs are based on the file path/slug). + +## Meta descriptions + +Meta descriptions come from the `description` field in YAML frontmatter — this part is straightforward. diff --git a/.warp/skills/docs-seo-audit/scripts/seo_audit.py b/.warp/skills/docs-seo-audit/scripts/seo_audit.py index 4da0f27..bd7ac48 100644 --- a/.warp/skills/docs-seo-audit/scripts/seo_audit.py +++ b/.warp/skills/docs-seo-audit/scripts/seo_audit.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 -""" -SEO audit for docs.warp.dev +"""SEO audit for docs.warp.dev -Crawls the Astro Starlight sitemap index, fetches every page, and checks for common +Crawls the docs.warp.dev sitemap index, fetches every page, and checks for common SEO issues: - Duplicate title tags - Duplicate meta descriptions @@ -24,31 +23,21 @@ from concurrent.futures import ThreadPoolExecutor, as_completed from html.parser import HTMLParser from urllib.error import HTTPError, URLError -from urllib.request import Request, urlopen +from urllib.request import Request, urlopen, HTTPRedirectHandler, build_opener from xml.etree import ElementTree as ET -SITEMAP_INDEX_URL = "https://docs.warp.dev/sitemap.xml" +SITEMAP_INDEX_URL = "https://docs.warp.dev/sitemap-index.xml" NS = {"sm": "http://www.sitemaps.org/schemas/sitemap/0.9"} USER_AGENT = "WarpSEOAudit/1.0" REQUEST_DELAY = 0.15 # seconds between requests to avoid rate-limiting MAX_WORKERS = 6 # --------------------------------------------------------------------------- -# Astro Starlight space → local directory mapping +# Astro Starlight content directory # --------------------------------------------------------------------------- -# Astro Starlight publishes each space at a URL prefix. The "warp" space is the site -# root (no prefix). All others use their directory name as prefix. -SPACE_MAP = { - "agent-platform": "docs/agent-platform", - "reference": "docs/reference", - "changelog": "docs/changelog", - "support-and-community": "docs/support-and-community", - "enterprise": "docs/enterprise", - "university": "university", -} -# The warp space lives at the site root, so pages like /terminal/blocks map -# to src/content/docs/terminal/blocks. -ROOT_SPACE_DIR = "docs/warp" +# All content lives under src/content/docs/. URL paths map directly to this +# directory (e.g. /agent-platform/cloud-agents → src/content/docs/agent-platform/cloud-agents). +CONTENT_DIR = "src/content/docs" # --------------------------------------------------------------------------- # HTML parser @@ -131,12 +120,21 @@ def handle_data(self, data): # Fetching helpers # --------------------------------------------------------------------------- +# Build an opener that follows 308 redirects (Python's default handler +# only follows 301/302/303/307 but Vercel serves 308 for sitemap URLs). +class _Redirect308Handler(HTTPRedirectHandler): + def http_error_308(self, req, fp, code, msg, headers): + return self.http_error_302(req, fp, code, msg, headers) + +_opener = build_opener(_Redirect308Handler) + + def fetch(url, retries=2): """Fetch a URL and return its body as a string.""" req = Request(url, headers={"User-Agent": USER_AGENT}) for attempt in range(retries + 1): try: - with urlopen(req, timeout=15) as resp: + with _opener.open(req, timeout=15) as resp: return resp.read().decode("utf-8", errors="replace") except (HTTPError, URLError, OSError) as exc: if attempt == retries: @@ -196,39 +194,37 @@ def extract_seo(url): def url_to_source_path(url, repo_root): """Best-effort mapping from a live URL to the local markdown source file. + Astro Starlight serves content from src/content/docs/. URL paths map + directly to that directory. Files use .mdx (preferred) or .md extensions, + and directory landing pages are index.mdx (not README.md). + Returns the relative path from repo_root, or None if no file is found. """ from urllib.parse import urlparse parsed = urlparse(url) path = parsed.path.strip("/") # e.g. "agent-platform/local-agents/overview" - # Determine which space this URL belongs to - local_dir = None - remainder = path - for prefix, directory in SPACE_MAP.items(): - if path == prefix or path.startswith(prefix + "/"): - local_dir = directory - remainder = path[len(prefix):].strip("/") - break - if local_dir is None: - # Root space (warp) - local_dir = ROOT_SPACE_DIR - remainder = path - - if not remainder: - # Space landing page → README.md - candidate = os.path.join(repo_root, local_dir, "README.md") - return os.path.relpath(candidate, repo_root) if os.path.isfile(candidate) else None - - # Try direct file match: remainder.md - candidate = os.path.join(repo_root, local_dir, remainder + ".md") - if os.path.isfile(candidate): - return os.path.relpath(candidate, repo_root) - - # Try directory landing page: remainder/README.md - candidate = os.path.join(repo_root, local_dir, remainder, "README.md") - if os.path.isfile(candidate): - return os.path.relpath(candidate, repo_root) + base = os.path.join(repo_root, CONTENT_DIR) + + if not path: + # Site root landing page → index.mdx + for ext in (".mdx", ".md"): + candidate = os.path.join(base, "index" + ext) + if os.path.isfile(candidate): + return os.path.relpath(candidate, repo_root) + return None + + # Try direct file match: path.mdx, then path.md + for ext in (".mdx", ".md"): + candidate = os.path.join(base, path + ext) + if os.path.isfile(candidate): + return os.path.relpath(candidate, repo_root) + + # Try directory landing page: path/index.mdx, then path/index.md + for ext in (".mdx", ".md"): + candidate = os.path.join(base, path, "index" + ext) + if os.path.isfile(candidate): + return os.path.relpath(candidate, repo_root) return None @@ -238,7 +234,7 @@ def url_to_source_path(url, repo_root): # --------------------------------------------------------------------------- # Title tag length limits (Google typically truncates at ~60 chars). -# The "page title" portion is what we control; the " | Space | Warp" suffix +# The "page title" portion is what we control; the " | Topic | Warp" suffix # is appended by Astro Starlight. We check the full rendered title. TITLE_MIN = 20 TITLE_MAX = 70 @@ -370,7 +366,7 @@ def main(): parser.add_argument("--sitemap", default=SITEMAP_INDEX_URL, help="Sitemap index URL (default: %(default)s)") parser.add_argument("--repo-root", default=None, - help="Path to the docs repo root for source-file mapping") + help="Path to the docs repo root (contains src/content/docs/) for source-file mapping") parser.add_argument("--output", "-o", default=None, help="Write JSON report to this file instead of stdout") parser.add_argument("--max-pages", type=int, default=0,