From 9a308303c7692e5c7584a3db88b60b500b898874 Mon Sep 17 00:00:00 2001 From: Nico Alba Date: Thu, 21 May 2026 19:06:20 +0000 Subject: [PATCH 1/7] feat: add nightly sitemap drift detection with Mattermost alert --- .github/workflows/publish.yml | 59 +++++++++++++ unified-doc/publish-unified-doc.sh | 3 + unified-doc/scripts/check-sitemap-drift.mjs | 94 +++++++++++++++++++++ 3 files changed, 156 insertions(+) create mode 100644 unified-doc/scripts/check-sitemap-drift.mjs diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 30de6f8..2f9988b 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -106,6 +106,65 @@ jobs: path: unified-doc/build-site/ retention-days: 7 + notify-sitemap-drift: + name: Notify doc-alerts of removed paths + needs: publish-docs-from-container + if: always() && needs.publish-docs-from-container.result == 'success' && github.repository_owner == 'netfoundry' + runs-on: ubuntu-latest + steps: + - name: Download build artifact + uses: actions/download-artifact@v4 + with: + name: docusaurus-build-site + path: build-site/ + - name: Check for sitemap drift report + id: drift + run: | + REPORT="build-site/sitemap-drift.json" + if [ ! -f "$REPORT" ]; then + echo "has_drift=false" >> "$GITHUB_OUTPUT" + exit 0 + fi + COUNT=$(jq '.count' "$REPORT") + PATHS=$(jq -r '.removed[]' "$REPORT" | head -20 | sed 's/^/- /') + echo "has_drift=true" >> "$GITHUB_OUTPUT" + echo "count=$COUNT" >> "$GITHUB_OUTPUT" + { + echo "paths<> "$GITHUB_OUTPUT" + - name: Build drift event context + id: ctx + if: steps.drift.outputs.has_drift == 'true' + run: | + RUN_URL="https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" + BODY="⚠️ **${{ steps.drift.outputs.count }} path(s) removed** from the new build — redirects may be needed. + +${{ steps.drift.outputs.paths }} + +[View build logs](${RUN_URL})" + EVENT_JSON=$(jq -cn \ + --arg repo "$GITHUB_REPOSITORY" \ + --arg repo_url "https://github.com/$GITHUB_REPOSITORY" \ + --arg run_url "$RUN_URL" \ + --arg action "$BODY" \ + '{ + repository: { full_name: $repo, html_url: $repo_url, stargazers_count: 0 }, + sender: { login: "ziti-ci", url: "https://api.github.com/users/netfoundry", html_url: "https://github.com/netfoundry", avatar_url: "https://raw.githubusercontent.com/netfoundry/branding/refs/heads/main/images/png/icon/netfoundry-icon-color.png" }, + action: $action, + run_url: $run_url + }') + echo "event-json=$EVENT_JSON" >> "$GITHUB_OUTPUT" + - name: Send Mattermost alert + if: steps.drift.outputs.has_drift == 'true' + uses: openziti/ziti-mattermost-action-py@v1 + with: + zitiId: ${{ secrets.ZITI_MATTERMOST_IDENTITY }} + webhookUrl: ${{ secrets.ZHOOK_URL_DOC_NOTIFICATIONS }} + eventJson: ${{ steps.ctx.outputs.event-json }} + senderUsername: "GitHubZ" + # Notify the doc-alerts Mattermost channel only when the nightly scheduled # run fails. Push/workflow_dispatch runs are watched live by whoever triggered # them; the cron is unattended, so we only need a heads-up on failure. diff --git a/unified-doc/publish-unified-doc.sh b/unified-doc/publish-unified-doc.sh index dcfdb14..1cabe13 100755 --- a/unified-doc/publish-unified-doc.sh +++ b/unified-doc/publish-unified-doc.sh @@ -39,6 +39,9 @@ if [ -f "$SITEMAP" ]; then echo "Injected llms.txt entry into sitemap.xml" fi +# Check for paths removed vs production — writes sitemap-drift.json if any found. +node "${pub_script_root}/scripts/check-sitemap-drift.mjs" "$SITEMAP" || true + publish_docs() { local HOST=$1 PORT=$2 USER=$3 TARGET_DIR=$4 KEY_FILE=$5 local zip_target="unified-docs${qualifier}.zip" diff --git a/unified-doc/scripts/check-sitemap-drift.mjs b/unified-doc/scripts/check-sitemap-drift.mjs new file mode 100644 index 0000000..bcf50f9 --- /dev/null +++ b/unified-doc/scripts/check-sitemap-drift.mjs @@ -0,0 +1,94 @@ +#!/usr/bin/env node +/** + * Compares the newly built sitemap against the live production sitemap. + * Any path present in production but absent from the new build is a potential + * broken link — it should either get a redirect or the removal is intentional. + * + * Usage: node check-sitemap-drift.mjs + * + * Writes a JSON report to /sitemap-drift.json if paths were removed. + * Always exits 0 — never blocks the build. + */ + +import { readFileSync, writeFileSync, existsSync } from 'fs'; +import { dirname, join } from 'path'; +import { fileURLToPath } from 'url'; + +const PROD_SITEMAP_URL = 'https://netfoundry.io/docs/sitemap.xml'; + +const newSitemapPath = process.argv[2]; +if (!newSitemapPath) { + console.error('Usage: check-sitemap-drift.mjs '); + process.exit(0); +} + +if (!existsSync(newSitemapPath)) { + console.warn(`[sitemap-drift] New sitemap not found at ${newSitemapPath}, skipping.`); + process.exit(0); +} + +function extractPaths(xml) { + const paths = new Set(); + for (const match of xml.matchAll(/([^<]+)<\/loc>/g)) { + try { + const url = new URL(match[1]); + // Normalize: strip trailing slash except for root + const p = url.pathname.replace(/\/$/, '') || '/'; + paths.add(p); + } catch { + // ignore malformed URLs + } + } + return paths; +} + +// Paths to ignore — removed intentionally or not real doc pages +const IGNORE_PREFIXES = [ + '/docs/openziti/blog', + '/docs/openziti/1.x', + '/docs/openziti/tags', + '/docs/openziti/category', +]; + +function shouldIgnore(p) { + return IGNORE_PREFIXES.some(prefix => p === prefix || p.startsWith(prefix + '/')); +} + +async function main() { + // Fetch production sitemap + let prodXml; + try { + const res = await fetch(PROD_SITEMAP_URL); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + prodXml = await res.text(); + } catch (err) { + console.warn(`[sitemap-drift] Could not fetch production sitemap: ${err.message}. Skipping.`); + process.exit(0); + } + + const newXml = readFileSync(newSitemapPath, 'utf8'); + + const prodPaths = extractPaths(prodXml); + const newPaths = extractPaths(newXml); + + const removed = [...prodPaths] + .filter(p => !newPaths.has(p) && !shouldIgnore(p)) + .sort(); + + if (removed.length === 0) { + console.log('[sitemap-drift] No paths removed. All good.'); + process.exit(0); + } + + console.warn(`[sitemap-drift] ⚠️ ${removed.length} path(s) removed from the new build:`); + for (const p of removed) console.warn(` - ${p}`); + + const report = { removed, count: removed.length }; + const reportPath = join(dirname(newSitemapPath), 'sitemap-drift.json'); + writeFileSync(reportPath, JSON.stringify(report, null, 2)); + console.warn(`[sitemap-drift] Report written to ${reportPath}`); + + process.exit(0); +} + +main(); From 070e8d12409cfc96ff003cb132311e71d3771eb5 Mon Sep 17 00:00:00 2001 From: Nico Alba Date: Thu, 21 May 2026 20:06:57 +0000 Subject: [PATCH 2/7] refactor: rebuild sitemap drift check as pre-publish gate - Rewrite check-sitemap-drift.mjs: compare new build against cached baseline (not live prod), check redirect stubs for each removed path, exit 1 on unresolved removals so the publish is aborted - Update publish-unified-doc.sh: pass baseline path and build dir, remove || true so drift failures actually block the script - Update publish.yml: add actions/cache restore/save for baseline, drop the wrong notify-sitemap-drift job, add inline drift alert steps (if: failure()) that send to Mattermost doc-alerts when unresolved removals are found Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/publish.yml | 63 +++++++------- unified-doc/publish-unified-doc.sh | 7 +- unified-doc/scripts/check-sitemap-drift.mjs | 94 ++++++++++++++------- 3 files changed, 100 insertions(+), 64 deletions(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 2f9988b..9897c55 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -66,6 +66,14 @@ jobs: - name: Install csvtojson run: npm install -g csvtojson + - name: Restore sitemap baseline + uses: actions/cache/restore@v4 + with: + path: unified-doc/sitemap-baseline.xml + key: sitemap-baseline-${{ github.ref_name }}-${{ github.run_id }} + restore-keys: | + sitemap-baseline-${{ github.ref_name }}- + - name: Setup SSH run: | mkdir -p $HOME/.ssh @@ -99,49 +107,41 @@ jobs: DOCUSAURUS_URL: 'https://netfoundry.io' run: unified-doc/publish-unified-doc.sh -ds + - name: Update sitemap baseline + if: success() + run: cp unified-doc/build-site/sitemap.xml unified-doc/sitemap-baseline.xml + + - name: Cache sitemap baseline + if: success() + uses: actions/cache/save@v4 + with: + path: unified-doc/sitemap-baseline.xml + key: sitemap-baseline-${{ github.ref_name }}-${{ github.run_id }} + - name: Upload build artifact + if: success() uses: actions/upload-artifact@v4 with: name: docusaurus-build-site path: unified-doc/build-site/ retention-days: 7 - notify-sitemap-drift: - name: Notify doc-alerts of removed paths - needs: publish-docs-from-container - if: always() && needs.publish-docs-from-container.result == 'success' && github.repository_owner == 'netfoundry' - runs-on: ubuntu-latest - steps: - - name: Download build artifact - uses: actions/download-artifact@v4 - with: - name: docusaurus-build-site - path: build-site/ - - name: Check for sitemap drift report - id: drift + - name: Build drift alert context + id: drift-ctx + if: failure() run: | - REPORT="build-site/sitemap-drift.json" + REPORT="unified-doc/build-site/sitemap-drift.json" if [ ! -f "$REPORT" ]; then echo "has_drift=false" >> "$GITHUB_OUTPUT" exit 0 fi - COUNT=$(jq '.count' "$REPORT") - PATHS=$(jq -r '.removed[]' "$REPORT" | head -20 | sed 's/^/- /') echo "has_drift=true" >> "$GITHUB_OUTPUT" - echo "count=$COUNT" >> "$GITHUB_OUTPUT" - { - echo "paths<> "$GITHUB_OUTPUT" - - name: Build drift event context - id: ctx - if: steps.drift.outputs.has_drift == 'true' - run: | + COUNT=$(jq '.count' "$REPORT") + PATHS=$(jq -r '.unresolved[]' "$REPORT" | head -20 | sed 's/^/- /') RUN_URL="https://github.com/$GITHUB_REPOSITORY/actions/runs/$GITHUB_RUN_ID" - BODY="⚠️ **${{ steps.drift.outputs.count }} path(s) removed** from the new build — redirects may be needed. + BODY="❌ **${COUNT} path(s) removed with no redirect** — build blocked before publish. -${{ steps.drift.outputs.paths }} +${PATHS} [View build logs](${RUN_URL})" EVENT_JSON=$(jq -cn \ @@ -156,13 +156,14 @@ ${{ steps.drift.outputs.paths }} run_url: $run_url }') echo "event-json=$EVENT_JSON" >> "$GITHUB_OUTPUT" - - name: Send Mattermost alert - if: steps.drift.outputs.has_drift == 'true' + + - name: Send drift alert + if: failure() && steps.drift-ctx.outputs.has_drift == 'true' uses: openziti/ziti-mattermost-action-py@v1 with: zitiId: ${{ secrets.ZITI_MATTERMOST_IDENTITY }} webhookUrl: ${{ secrets.ZHOOK_URL_DOC_NOTIFICATIONS }} - eventJson: ${{ steps.ctx.outputs.event-json }} + eventJson: ${{ steps.drift-ctx.outputs.event-json }} senderUsername: "GitHubZ" # Notify the doc-alerts Mattermost channel only when the nightly scheduled diff --git a/unified-doc/publish-unified-doc.sh b/unified-doc/publish-unified-doc.sh index 1cabe13..0ddc856 100755 --- a/unified-doc/publish-unified-doc.sh +++ b/unified-doc/publish-unified-doc.sh @@ -39,8 +39,11 @@ if [ -f "$SITEMAP" ]; then echo "Injected llms.txt entry into sitemap.xml" fi -# Check for paths removed vs production — writes sitemap-drift.json if any found. -node "${pub_script_root}/scripts/check-sitemap-drift.mjs" "$SITEMAP" || true +# Gate: hard-fail if any paths were removed without a redirect stub. +# BASELINE is restored here by CI (actions/cache); absent on first run → seeds from live prod. +BASELINE="${pub_script_root}/sitemap-baseline.xml" +node "${pub_script_root}/scripts/check-sitemap-drift.mjs" \ + "$SITEMAP" "$BASELINE" "${pub_script_root}/build${qualifier}" publish_docs() { local HOST=$1 PORT=$2 USER=$3 TARGET_DIR=$4 KEY_FILE=$5 diff --git a/unified-doc/scripts/check-sitemap-drift.mjs b/unified-doc/scripts/check-sitemap-drift.mjs index bcf50f9..2cadff7 100644 --- a/unified-doc/scripts/check-sitemap-drift.mjs +++ b/unified-doc/scripts/check-sitemap-drift.mjs @@ -1,24 +1,33 @@ #!/usr/bin/env node /** - * Compares the newly built sitemap against the live production sitemap. - * Any path present in production but absent from the new build is a potential - * broken link — it should either get a redirect or the removal is intentional. + * Pre-publish sitemap drift gate. * - * Usage: node check-sitemap-drift.mjs + * Compares the newly built sitemap against the baseline from the previous build. + * For each path removed from the new build, checks whether a redirect stub exists + * in the build output (`plugin-client-redirects` writes `/index.html` stubs). + * Unresolved removals (path gone, no stub) cause a hard exit 1, aborting the publish + * before any files reach the server. * - * Writes a JSON report to /sitemap-drift.json if paths were removed. - * Always exits 0 — never blocks the build. + * Usage: node check-sitemap-drift.mjs + * + * - new-sitemap: path to the freshly built sitemap.xml + * - baseline-sitemap: path to the previous build's sitemap.xml (from CI cache); + * if absent, seeds from live prod on first run + * - build-dir: root of the build output (for redirect stub checks) + * + * Writes sitemap-drift.json next to new-sitemap on exit 1 for the alert step. + * Exit 0 = clean. Exit 1 = unresolved removals found. */ import { readFileSync, writeFileSync, existsSync } from 'fs'; import { dirname, join } from 'path'; -import { fileURLToPath } from 'url'; const PROD_SITEMAP_URL = 'https://netfoundry.io/docs/sitemap.xml'; -const newSitemapPath = process.argv[2]; -if (!newSitemapPath) { - console.error('Usage: check-sitemap-drift.mjs '); +const [newSitemapPath, baselineSitemapPath, buildDir] = process.argv.slice(2); + +if (!newSitemapPath || !baselineSitemapPath || !buildDir) { + console.error('Usage: check-sitemap-drift.mjs '); process.exit(0); } @@ -32,7 +41,6 @@ function extractPaths(xml) { for (const match of xml.matchAll(/([^<]+)<\/loc>/g)) { try { const url = new URL(match[1]); - // Normalize: strip trailing slash except for root const p = url.pathname.replace(/\/$/, '') || '/'; paths.add(p); } catch { @@ -42,7 +50,7 @@ function extractPaths(xml) { return paths; } -// Paths to ignore — removed intentionally or not real doc pages +// Paths removed intentionally — not real doc pages, expected churn const IGNORE_PREFIXES = [ '/docs/openziti/blog', '/docs/openziti/1.x', @@ -54,24 +62,34 @@ function shouldIgnore(p) { return IGNORE_PREFIXES.some(prefix => p === prefix || p.startsWith(prefix + '/')); } +function hasRedirectStub(p) { + // plugin-client-redirects writes //index.html for each redirect + return existsSync(join(buildDir, p, 'index.html')); +} + async function main() { - // Fetch production sitemap - let prodXml; - try { - const res = await fetch(PROD_SITEMAP_URL); - if (!res.ok) throw new Error(`HTTP ${res.status}`); - prodXml = await res.text(); - } catch (err) { - console.warn(`[sitemap-drift] Could not fetch production sitemap: ${err.message}. Skipping.`); - process.exit(0); - } + let baselineXml; - const newXml = readFileSync(newSitemapPath, 'utf8'); + if (existsSync(baselineSitemapPath)) { + console.log(`[sitemap-drift] Using cached baseline: ${baselineSitemapPath}`); + baselineXml = readFileSync(baselineSitemapPath, 'utf8'); + } else { + console.log(`[sitemap-drift] No cached baseline — seeding from ${PROD_SITEMAP_URL}`); + try { + const res = await fetch(PROD_SITEMAP_URL); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + baselineXml = await res.text(); + } catch (err) { + console.warn(`[sitemap-drift] Could not fetch baseline: ${err.message}. Skipping check.`); + process.exit(0); + } + } - const prodPaths = extractPaths(prodXml); - const newPaths = extractPaths(newXml); + const newXml = readFileSync(newSitemapPath, 'utf8'); + const baselinePaths = extractPaths(baselineXml); + const newPaths = extractPaths(newXml); - const removed = [...prodPaths] + const removed = [...baselinePaths] .filter(p => !newPaths.has(p) && !shouldIgnore(p)) .sort(); @@ -80,15 +98,29 @@ async function main() { process.exit(0); } - console.warn(`[sitemap-drift] ⚠️ ${removed.length} path(s) removed from the new build:`); - for (const p of removed) console.warn(` - ${p}`); + const covered = removed.filter(p => hasRedirectStub(p)); + const unresolved = removed.filter(p => !hasRedirectStub(p)); + + if (covered.length > 0) { + console.log(`[sitemap-drift] ${covered.length} removed path(s) covered by redirects:`); + for (const p of covered) console.log(` ✓ ${p}`); + } + + if (unresolved.length === 0) { + console.log('[sitemap-drift] All removed paths have redirects. All good.'); + process.exit(0); + } + + console.error(`[sitemap-drift] ❌ ${unresolved.length} path(s) removed with no redirect:`); + for (const p of unresolved) console.error(` ✗ ${p}`); + console.error('[sitemap-drift] Add redirects for the above paths, then rebuild.'); - const report = { removed, count: removed.length }; + const report = { unresolved, covered, count: unresolved.length }; const reportPath = join(dirname(newSitemapPath), 'sitemap-drift.json'); writeFileSync(reportPath, JSON.stringify(report, null, 2)); - console.warn(`[sitemap-drift] Report written to ${reportPath}`); + console.error(`[sitemap-drift] Report written to ${reportPath}`); - process.exit(0); + process.exit(1); } main(); From e5ff6c322ff23762b61ef8fa7831b1dfbb98c725 Mon Sep 17 00:00:00 2001 From: Nico Alba Date: Thu, 21 May 2026 20:30:29 +0000 Subject: [PATCH 3/7] fix: prevent double Mattermost alert on nightly drift failure Expose has_drift as a job output and skip notify-mattermost when drift already sent its own alert. Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/publish.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index 9897c55..c900222 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -47,6 +47,8 @@ jobs: # these are defined in a github environment: repo/settings/environments ZITI_HOTJAR_APPID: ${{ secrets.ZITI_HOTJAR_APPID }} environment: ${{ github.event.inputs.environment || (github.ref_name == 'main' && 'main' || 'stg') }} + outputs: + has_drift: ${{ steps.drift-ctx.outputs.has_drift }} steps: - name: Checkout Workspace uses: actions/checkout@v4 @@ -172,7 +174,7 @@ ${PATHS} notify-mattermost: name: Notify doc-alerts of nightly build failure needs: publish-docs-from-container - if: always() && github.event_name == 'schedule' && github.repository_owner == 'netfoundry' && needs.publish-docs-from-container.result != 'success' + if: always() && github.event_name == 'schedule' && github.repository_owner == 'netfoundry' && needs.publish-docs-from-container.result != 'success' && needs.publish-docs-from-container.outputs.has_drift != 'true' runs-on: ubuntu-latest steps: - name: Build schedule event context From ad732cacc052b1b91386d5ff83e4b2f2dbb99968 Mon Sep 17 00:00:00 2001 From: Nico Alba Date: Fri, 22 May 2026 18:04:24 +0000 Subject: [PATCH 4/7] feat: redirect quality checks, copy-paste output, baseline artifacts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Extract IGNORE_PREFIXES into sitemap-ignore.json (colocated with script, passed as optional 4th arg) so it's easy to find and edit - Add yarn check-drift / yarn unified:check-drift aliases so the gate can be run locally without memorizing the arg triple; publish script uses the alias instead of invoking node directly - Emit copy-paste redirect snippets for unresolved paths: grouped by product with the target repo/function named, fuzzy-guess the `to` by matching the last path segment against the new sitemap - Scan all redirect stubs (plugin-client-redirects index.html files), build a redirect graph, and detect: stale targets (final target not in sitemap → exit 1), loops (→ exit 1), chained >1 hop (warning), shadowed stubs where the path is also a real page (warning) - Archive input and output sitemap baselines as artifacts (90-day retention) for inspection and history via gh run download Co-Authored-By: Claude Sonnet 4.6 --- .github/workflows/publish.yml | 16 ++ package.json | 3 +- unified-doc/package.json | 3 +- unified-doc/publish-unified-doc.sh | 4 +- unified-doc/scripts/check-sitemap-drift.mjs | 278 ++++++++++++++++---- unified-doc/scripts/sitemap-ignore.json | 8 + 6 files changed, 252 insertions(+), 60 deletions(-) create mode 100644 unified-doc/scripts/sitemap-ignore.json diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml index c900222..b05297e 100644 --- a/.github/workflows/publish.yml +++ b/.github/workflows/publish.yml @@ -76,6 +76,14 @@ jobs: restore-keys: | sitemap-baseline-${{ github.ref_name }}- + - name: Archive input sitemap baseline + if: hashFiles('unified-doc/sitemap-baseline.xml') != '' + uses: actions/upload-artifact@v4 + with: + name: sitemap-baseline-input + path: unified-doc/sitemap-baseline.xml + retention-days: 90 + - name: Setup SSH run: | mkdir -p $HOME/.ssh @@ -120,6 +128,14 @@ jobs: path: unified-doc/sitemap-baseline.xml key: sitemap-baseline-${{ github.ref_name }}-${{ github.run_id }} + - name: Archive output sitemap baseline + if: success() + uses: actions/upload-artifact@v4 + with: + name: sitemap-baseline-output + path: unified-doc/sitemap-baseline.xml + retention-days: 90 + - name: Upload build artifact if: success() uses: actions/upload-artifact@v4 diff --git a/package.json b/package.json index 6974ca0..142d5eb 100644 --- a/package.json +++ b/package.json @@ -23,7 +23,8 @@ "dev": "yarn workspace test-site start", "build": "yarn workspace test-site build", - "reinstall": "node scripts/reinstall.mjs" + "reinstall": "node scripts/reinstall.mjs", + "unified:check-drift": "yarn --cwd unified-doc check-drift" }, "packageManager": "yarn@1.22.22" } diff --git a/unified-doc/package.json b/unified-doc/package.json index 9e37d44..c704600 100644 --- a/unified-doc/package.json +++ b/unified-doc/package.json @@ -45,7 +45,8 @@ "vrt:report:zlan": "backstop openReport --config=backstop.zlan.json", "vrt:report:home": "backstop openReport --config=backstop.home.json", "vrt": "node scripts/vrt-run.mjs", - "vrt:clean": "node scripts/vrt-run.mjs" + "vrt:clean": "node scripts/vrt-run.mjs", + "check-drift": "node scripts/check-sitemap-drift.mjs build-site/sitemap.xml sitemap-baseline.xml build-site" }, "dependencies": { "@docusaurus/core": "^3.10.1", diff --git a/unified-doc/publish-unified-doc.sh b/unified-doc/publish-unified-doc.sh index 0ddc856..aa80def 100755 --- a/unified-doc/publish-unified-doc.sh +++ b/unified-doc/publish-unified-doc.sh @@ -41,9 +41,7 @@ fi # Gate: hard-fail if any paths were removed without a redirect stub. # BASELINE is restored here by CI (actions/cache); absent on first run → seeds from live prod. -BASELINE="${pub_script_root}/sitemap-baseline.xml" -node "${pub_script_root}/scripts/check-sitemap-drift.mjs" \ - "$SITEMAP" "$BASELINE" "${pub_script_root}/build${qualifier}" +yarn --cwd "${pub_script_root}" check-drift publish_docs() { local HOST=$1 PORT=$2 USER=$3 TARGET_DIR=$4 KEY_FILE=$5 diff --git a/unified-doc/scripts/check-sitemap-drift.mjs b/unified-doc/scripts/check-sitemap-drift.mjs index 2cadff7..8c6f3e0 100644 --- a/unified-doc/scripts/check-sitemap-drift.mjs +++ b/unified-doc/scripts/check-sitemap-drift.mjs @@ -2,74 +2,203 @@ /** * Pre-publish sitemap drift gate. * - * Compares the newly built sitemap against the baseline from the previous build. - * For each path removed from the new build, checks whether a redirect stub exists - * in the build output (`plugin-client-redirects` writes `/index.html` stubs). - * Unresolved removals (path gone, no stub) cause a hard exit 1, aborting the publish - * before any files reach the server. + * Pass 1 — removed paths: compare new build vs baseline sitemap. + * Paths removed without a redirect stub → exit 1. * - * Usage: node check-sitemap-drift.mjs + * Pass 2 — redirect quality: scan every stub in the build output. + * Stale (stub → removed page) and loops → exit 1. + * Chained (>1 hop) and shadowed (real page + stub) → warning only. * - * - new-sitemap: path to the freshly built sitemap.xml - * - baseline-sitemap: path to the previous build's sitemap.xml (from CI cache); + * Usage: node check-sitemap-drift.mjs [ignore-config] + * + * new-sitemap: freshly built sitemap.xml + * baseline-sitemap: previous build's sitemap.xml (from CI cache); * if absent, seeds from live prod on first run - * - build-dir: root of the build output (for redirect stub checks) + * build-dir: root of the build output (for stub inspection) + * ignore-config: JSON file with { "prefixes": [...] }; + * defaults to sitemap-ignore.json next to this script * - * Writes sitemap-drift.json next to new-sitemap on exit 1 for the alert step. - * Exit 0 = clean. Exit 1 = unresolved removals found. + * Writes sitemap-drift.json next to new-sitemap on exit 1. + * Exit 0 = clean. Exit 1 = gate failed. */ -import { readFileSync, writeFileSync, existsSync } from 'fs'; -import { dirname, join } from 'path'; +import { readFileSync, writeFileSync, existsSync, readdirSync } from 'fs'; +import { dirname, join, resolve, basename } from 'path'; +import { fileURLToPath } from 'url'; const PROD_SITEMAP_URL = 'https://netfoundry.io/docs/sitemap.xml'; +const SCRIPT_DIR = dirname(fileURLToPath(import.meta.url)); + +// ---------- CLI args ---------- -const [newSitemapPath, baselineSitemapPath, buildDir] = process.argv.slice(2); +const [newSitemapPath, baselineSitemapPath, buildDir, ignoreConfigArg] = process.argv.slice(2); if (!newSitemapPath || !baselineSitemapPath || !buildDir) { - console.error('Usage: check-sitemap-drift.mjs '); + console.error('Usage: check-sitemap-drift.mjs [ignore-config]'); process.exit(0); } - if (!existsSync(newSitemapPath)) { console.warn(`[sitemap-drift] New sitemap not found at ${newSitemapPath}, skipping.`); process.exit(0); } +// ---------- Ignore config ---------- + +const ignoreConfigPath = ignoreConfigArg ?? join(SCRIPT_DIR, 'sitemap-ignore.json'); +let ignorePrefixes = []; +if (existsSync(ignoreConfigPath)) { + try { + ignorePrefixes = JSON.parse(readFileSync(ignoreConfigPath, 'utf8')).prefixes ?? []; + } catch { + console.warn(`[sitemap-drift] Could not parse ignore config at ${ignoreConfigPath}`); + } +} + +function shouldIgnore(p) { + return ignorePrefixes.some(prefix => p === prefix || p.startsWith(prefix + '/')); +} + +// ---------- Sitemap parsing ---------- + function extractPaths(xml) { const paths = new Set(); for (const match of xml.matchAll(/([^<]+)<\/loc>/g)) { try { - const url = new URL(match[1]); - const p = url.pathname.replace(/\/$/, '') || '/'; - paths.add(p); - } catch { - // ignore malformed URLs - } + paths.add(new URL(match[1]).pathname.replace(/\/$/, '') || '/'); + } catch { /* ignore malformed */ } } return paths; } -// Paths removed intentionally — not real doc pages, expected churn -const IGNORE_PREFIXES = [ - '/docs/openziti/blog', - '/docs/openziti/1.x', - '/docs/openziti/tags', - '/docs/openziti/category', +// ---------- Product grouping (for copy-paste output) ---------- + +const PRODUCTS = [ + { prefix: '/docs/openziti', name: 'openziti', repo: 'ziti-doc', fn: 'openzitiRedirects()' }, + { prefix: '/docs/zrok', name: 'zrok', repo: 'zrok', fn: 'zrokRedirects()' }, + { prefix: '/docs/selfhosted', name: 'selfhosted', repo: 'k8s-on-prem-installations', fn: 'selfhostedRedirects()' }, + { prefix: '/docs/frontdoor', name: 'frontdoor', repo: 'frontdoor', fn: 'frontdoorRedirects()' }, + { prefix: '/docs/zlan', name: 'zlan', repo: 'zlan', fn: 'zlanRedirects()' }, ]; -function shouldIgnore(p) { - return IGNORE_PREFIXES.some(prefix => p === prefix || p.startsWith(prefix + '/')); +function productFor(p) { + return PRODUCTS.find(m => p === m.prefix || p.startsWith(m.prefix + '/')) + ?? { prefix: '', name: 'unified-doc', repo: 'docusaurus-shared', fn: 'redirects' }; } -function hasRedirectStub(p) { - // plugin-client-redirects writes //index.html for each redirect - return existsSync(join(buildDir, p, 'index.html')); +// ---------- Fuzzy target guess ---------- + +function fuzzyGuess(removedPath, newPaths) { + const seg = removedPath.split('/').filter(Boolean).pop() ?? ''; + if (!seg || seg.length < 3) return []; + const { prefix } = productFor(removedPath); + return [...newPaths].filter(p => p.startsWith(prefix) && p.endsWith('/' + seg)); } +// ---------- Copy-paste output ---------- + +function printUnresolvedAsRedirects(unresolved, newPaths) { + console.error('\n[sitemap-drift] Paste into the appropriate redirects() function:\n'); + const byProduct = new Map(); + for (const p of unresolved) { + const prod = productFor(p); + if (!byProduct.has(prod.name)) byProduct.set(prod.name, { prod, paths: [] }); + byProduct.get(prod.name).paths.push(p); + } + for (const { prod, paths } of byProduct.values()) { + console.error(` // ${prod.name} — ${prod.repo} → ${prod.fn}`); + for (const p of paths) { + const guesses = fuzzyGuess(p, newPaths); + if (guesses.length === 1) { + console.error(` { from: '${p}', to: '${guesses[0]}' }, // ← guess — verify before using`); + } else if (guesses.length > 1) { + console.error(` { from: '${p}', to: '/docs/TODO-fill-in' }, // ← guesses: ${guesses.join(', ')}`); + } else { + console.error(` { from: '${p}', to: '/docs/TODO-fill-in' },`); + } + } + console.error(''); + } +} + +// ---------- Redirect stub scanning ---------- + +function buildRedirectMap(dir) { + const map = new Map(); // fromPath → toPath + const base = resolve(dir); + + function walk(current) { + for (const entry of readdirSync(current, { withFileTypes: true })) { + const full = join(current, entry.name); + if (entry.isDirectory()) { + walk(full); + } else if (entry.name === 'index.html') { + const html = readFileSync(full, 'utf8'); + // Match content="0; url=..." regardless of attribute order + const m = html.match(/content="[^"]*\burl=([^";\s]+)/i); + if (!m) continue; + let toPath = m[1]; + try { toPath = new URL(toPath, 'https://x').pathname.replace(/\/$/, '') || '/'; } catch { /* keep */ } + const fromPath = dirname(full).slice(base.length).replace(/\/$/, '') || '/'; + map.set(fromPath, toPath); + } + } + } + + walk(base); + return map; +} + +// ---------- Redirect quality checks ---------- + +function validateRedirects(redirectMap, newPaths) { + const stale = []; // stub's final target not in sitemap + const loops = []; // redirect cycle + const chained = []; // >1 hop + const shadowed = []; // stub path is also a live sitemap page + + for (const [fromPath, directTarget] of redirectMap) { + if (shouldIgnore(fromPath)) continue; + + if (newPaths.has(fromPath)) { + shadowed.push({ from: fromPath, to: directTarget }); + } + + // Walk the chain + const visited = [fromPath]; + let cur = directTarget; + let loopFound = false; + + while (redirectMap.has(cur)) { + if (visited.includes(cur)) { + loops.push({ from: fromPath, cycle: [...visited, cur] }); + loopFound = true; + break; + } + visited.push(cur); + cur = redirectMap.get(cur); + } + if (loopFound) continue; + + // visited = [fromPath, ...intermediateStubs], cur = final target + const hops = visited.length; // 1 = direct, >1 = chained + if (hops > 1) { + chained.push({ from: fromPath, via: visited.slice(1), target: cur, hops }); + } + + // Stale: final target not in sitemap (ignore external URLs) + if (!cur.startsWith('http') && !newPaths.has(cur)) { + stale.push({ from: fromPath, to: directTarget, finalTarget: cur }); + } + } + + return { stale, loops, chained, shadowed }; +} + +// ---------- Main ---------- + async function main() { + // Load baseline let baselineXml; - if (existsSync(baselineSitemapPath)) { console.log(`[sitemap-drift] Using cached baseline: ${baselineSitemapPath}`); baselineXml = readFileSync(baselineSitemapPath, 'utf8'); @@ -80,45 +209,84 @@ async function main() { if (!res.ok) throw new Error(`HTTP ${res.status}`); baselineXml = await res.text(); } catch (err) { - console.warn(`[sitemap-drift] Could not fetch baseline: ${err.message}. Skipping check.`); + console.warn(`[sitemap-drift] Could not fetch baseline: ${err.message}. Skipping.`); process.exit(0); } } - const newXml = readFileSync(newSitemapPath, 'utf8'); + const newXml = readFileSync(newSitemapPath, 'utf8'); const baselinePaths = extractPaths(baselineXml); const newPaths = extractPaths(newXml); - const removed = [...baselinePaths] - .filter(p => !newPaths.has(p) && !shouldIgnore(p)) - .sort(); - - if (removed.length === 0) { - console.log('[sitemap-drift] No paths removed. All good.'); - process.exit(0); - } - - const covered = removed.filter(p => hasRedirectStub(p)); - const unresolved = removed.filter(p => !hasRedirectStub(p)); + // --- Pass 1: removed paths --- + const removed = [...baselinePaths].filter(p => !newPaths.has(p) && !shouldIgnore(p)).sort(); + const covered = removed.filter(p => existsSync(join(buildDir, p, 'index.html'))); + const unresolved = removed.filter(p => !existsSync(join(buildDir, p, 'index.html'))); if (covered.length > 0) { console.log(`[sitemap-drift] ${covered.length} removed path(s) covered by redirects:`); for (const p of covered) console.log(` ✓ ${p}`); } - if (unresolved.length === 0) { - console.log('[sitemap-drift] All removed paths have redirects. All good.'); - process.exit(0); + // --- Pass 2: redirect quality --- + console.log(`[sitemap-drift] Scanning redirect stubs...`); + const redirectMap = buildRedirectMap(buildDir); + const { stale, loops, chained, shadowed } = validateRedirects(redirectMap, newPaths); + + // Non-blocking warnings + if (chained.length > 0) { + console.warn(`\n[sitemap-drift] ⚠️ ${chained.length} chained redirect(s) (>1 hop — consider flattening):`); + for (const { from, via, target, hops } of chained) { + console.warn(` ~ ${from} → ${[...via, target].join(' → ')} (${hops} hops)`); + } + } + if (shadowed.length > 0) { + console.warn(`\n[sitemap-drift] ⚠️ ${shadowed.length} shadowed redirect(s) (dead config — real page wins):`); + for (const { from, to } of shadowed) { + console.warn(` ~ ${from} → ${to}`); + } + } + + // Gate failures + const failed = unresolved.length > 0 || stale.length > 0 || loops.length > 0; + + if (unresolved.length > 0) { + console.error(`\n[sitemap-drift] ❌ ${unresolved.length} path(s) removed with no redirect:`); + for (const p of unresolved) console.error(` ✗ ${p}`); + printUnresolvedAsRedirects(unresolved, newPaths); } - console.error(`[sitemap-drift] ❌ ${unresolved.length} path(s) removed with no redirect:`); - for (const p of unresolved) console.error(` ✗ ${p}`); - console.error('[sitemap-drift] Add redirects for the above paths, then rebuild.'); + if (stale.length > 0) { + console.error(`\n[sitemap-drift] ❌ ${stale.length} stale redirect(s) — stub points to removed page:`); + for (const { from, to, finalTarget } of stale) { + const chain = to === finalTarget ? to : `${to} → ... → ${finalTarget}`; + console.error(` ✗ ${from} → ${chain}`); + } + } + + if (loops.length > 0) { + console.error(`\n[sitemap-drift] ❌ ${loops.length} redirect loop(s):`); + for (const { from, cycle } of loops) { + console.error(` ✗ ${from} → ${cycle.join(' → ')} (cycle)`); + } + } + + if (!failed) { + console.log(removed.length === 0 + ? '[sitemap-drift] No paths removed. All good.' + : '[sitemap-drift] All removed paths have redirects. All good.'); + process.exit(0); + } - const report = { unresolved, covered, count: unresolved.length }; + const report = { + unresolved, + covered, + count: unresolved.length, + redirectIssues: { stale, loops, chained, shadowed }, + }; const reportPath = join(dirname(newSitemapPath), 'sitemap-drift.json'); writeFileSync(reportPath, JSON.stringify(report, null, 2)); - console.error(`[sitemap-drift] Report written to ${reportPath}`); + console.error(`\n[sitemap-drift] Report written to ${reportPath}`); process.exit(1); } diff --git a/unified-doc/scripts/sitemap-ignore.json b/unified-doc/scripts/sitemap-ignore.json new file mode 100644 index 0000000..09a6f1b --- /dev/null +++ b/unified-doc/scripts/sitemap-ignore.json @@ -0,0 +1,8 @@ +{ + "prefixes": [ + "/docs/openziti/blog", + "/docs/openziti/1.x", + "/docs/openziti/tags", + "/docs/openziti/category" + ] +} From 6eb6d7981bb48b26648b7a072cb588d5dc8dbbb3 Mon Sep 17 00:00:00 2001 From: Nico Alba Date: Fri, 22 May 2026 19:08:03 +0000 Subject: [PATCH 5/7] updates to script : --- skills/doc-check/SKILL.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/skills/doc-check/SKILL.md b/skills/doc-check/SKILL.md index e985f54..3e84193 100644 --- a/skills/doc-check/SKILL.md +++ b/skills/doc-check/SKILL.md @@ -201,6 +201,15 @@ Mark as **internal** (skip, add to skipped list) if changes are limited to: ### 4. Cross-reference flagged PRs against existing docs +Before searching, pull the latest changes to the local doc path so coverage assessments reflect current content: + +```bash +git -C pull +``` + +If the pull fails (uncommitted changes, detached HEAD, network error), note it in the report and proceed with the +current local state — don't abort the scan. + For each customer-facing PR, search the local doc path for the product (see registry above) to determine whether coverage already exists. Use grep and file reads — do not guess. From b93759d9f9f8fb2272fa6f149d87f292eb9c2bec Mon Sep 17 00:00:00 2001 From: Nico Alba Date: Fri, 22 May 2026 22:05:50 +0000 Subject: [PATCH 6/7] fix: check-drift alias falls back to build/ when build-site/ absent --- unified-doc/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unified-doc/package.json b/unified-doc/package.json index c704600..c3cddc4 100644 --- a/unified-doc/package.json +++ b/unified-doc/package.json @@ -46,7 +46,7 @@ "vrt:report:home": "backstop openReport --config=backstop.home.json", "vrt": "node scripts/vrt-run.mjs", "vrt:clean": "node scripts/vrt-run.mjs", - "check-drift": "node scripts/check-sitemap-drift.mjs build-site/sitemap.xml sitemap-baseline.xml build-site" + "check-drift": "BUILD=$([ -d build-site ] && echo build-site || echo build) && node scripts/check-sitemap-drift.mjs $BUILD/sitemap.xml sitemap-baseline.xml $BUILD" }, "dependencies": { "@docusaurus/core": "^3.10.1", From ffb80658a808cec2a7ee6f3c03a557f9af65383a Mon Sep 17 00:00:00 2001 From: Nico Alba Date: Fri, 22 May 2026 22:29:34 +0000 Subject: [PATCH 7/7] fix: skip full Docusaurus pages in redirect scan; ignore llms.txt MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Full pages rendered by Docusaurus contain __docusaurus in their HTML — only bare plugin-client-redirects stubs should be scanned for the redirect graph. Also add /docs/llms.txt to sitemap-ignore.json since it is injected into prod by publish-unified-doc.sh post-build and never appears in a local build's sitemap. Co-Authored-By: Claude Sonnet 4.6 --- unified-doc/scripts/check-sitemap-drift.mjs | 3 +++ unified-doc/scripts/sitemap-ignore.json | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/unified-doc/scripts/check-sitemap-drift.mjs b/unified-doc/scripts/check-sitemap-drift.mjs index 8c6f3e0..83e878d 100644 --- a/unified-doc/scripts/check-sitemap-drift.mjs +++ b/unified-doc/scripts/check-sitemap-drift.mjs @@ -133,6 +133,9 @@ function buildRedirectMap(dir) { walk(full); } else if (entry.name === 'index.html') { const html = readFileSync(full, 'utf8'); + // Skip full Docusaurus pages — only process bare redirect stubs + // generated by plugin-client-redirects (they never contain __docusaurus) + if (html.includes('__docusaurus')) continue; // Match content="0; url=..." regardless of attribute order const m = html.match(/content="[^"]*\burl=([^";\s]+)/i); if (!m) continue; diff --git a/unified-doc/scripts/sitemap-ignore.json b/unified-doc/scripts/sitemap-ignore.json index 09a6f1b..ce95ccc 100644 --- a/unified-doc/scripts/sitemap-ignore.json +++ b/unified-doc/scripts/sitemap-ignore.json @@ -3,6 +3,7 @@ "/docs/openziti/blog", "/docs/openziti/1.x", "/docs/openziti/tags", - "/docs/openziti/category" + "/docs/openziti/category", + "/docs/llms.txt" ] }