diff --git a/.github/workflows/algolia-index.yml b/.github/workflows/algolia-index.yml index bb33584a203d8..5fb0c491648fa 100644 --- a/.github/workflows/algolia-index.yml +++ b/.github/workflows/algolia-index.yml @@ -3,10 +3,24 @@ on: push: branches: - master + # Smoke-test the indexing path on PRs that touch the indexing machinery, so runner/dependency + # regressions (e.g. the Bun->tsx swap that broke module resolution) are caught before merge + # instead of only surfacing on master. Pure docs-content PRs don't change this machinery and + # are validated by the normal build, so they're intentionally excluded to keep CI lean. + pull_request: + paths: + - '.github/workflows/algolia-index.yml' + - 'scripts/algolia.ts' + - 'scripts/tsconfig.json' + - 'src/mdx.ts' + - 'package.json' + - 'pnpm-lock.yaml' jobs: index: name: Update Algolia index runs-on: ubuntu-latest + # Only push events have access to the Algolia secrets and should mutate the live index. + if: github.event_name == 'push' steps: - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 @@ -40,7 +54,7 @@ jobs: - run: pnpm install --frozen-lockfile - name: Build index for user docs - run: pnpm enforce-redirects && pnpm generate-doctree && pnpm next build && npx tsx ./scripts/algolia.ts + run: pnpm enforce-redirects && pnpm generate-doctree && pnpm next build && npx tsx --tsconfig ./scripts/tsconfig.json ./scripts/algolia.ts if: steps.filter.outputs.docs == 'true' env: ALGOLIA_APP_ID: ${{ secrets.ALGOLIA_APP_ID }} @@ -53,7 +67,7 @@ jobs: NEXT_PUBLIC_SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0 - name: Build index for developer docs - run: git submodule init && git submodule update && pnpm enforce-redirects && pnpm generate-doctree && NEXT_PUBLIC_DEVELOPER_DOCS=1 pnpm next build && npx tsx ./scripts/algolia.ts + run: git submodule init && git submodule update && pnpm enforce-redirects && pnpm generate-doctree && NEXT_PUBLIC_DEVELOPER_DOCS=1 pnpm next build && npx tsx --tsconfig ./scripts/tsconfig.json ./scripts/algolia.ts if: steps.filter.outputs.dev-docs == 'true' env: ALGOLIA_APP_ID: ${{ secrets.ALGOLIA_APP_ID }} @@ -65,3 +79,34 @@ jobs: SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0 NEXT_PUBLIC_SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0 NEXT_PUBLIC_DEVELOPER_DOCS: 1 + + smoke-test: + name: Smoke-test Algolia indexing (dry run) + runs-on: ubuntu-latest + # PRs run the indexing script in dry-run mode (no secrets, no upload) purely to verify the + # script and its full import graph resolve under the configured runner -- the regression class + # that the Bun->tsx swap introduced, which happens at module load. This deliberately skips + # `next build`: building the ~10k-page site takes minutes and only validates build output, + # which Vercel's PR preview deploy already covers. With no build there are no .next HTML files, + # so ALGOLIA_SKIP_ON_ERROR lets the script tolerate the missing pages and finish in seconds. + if: github.event_name == 'pull_request' + steps: + - uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + + - uses: pnpm/action-setup@02f6c237bd2518259fed6c71566509edfb3f2b74 # v4 + + - uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v4 + id: setup-node + with: + node-version-file: 'package.json' + cache: 'pnpm' + + - run: pnpm install --frozen-lockfile + + - name: Dry-run index for user docs + run: npx tsx --tsconfig ./scripts/tsconfig.json ./scripts/algolia.ts + env: + ALGOLIA_DRY_RUN: 'true' + ALGOLIA_SKIP_ON_ERROR: 'true' + SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0 + NEXT_PUBLIC_SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0 diff --git a/scripts/algolia.ts b/scripts/algolia.ts index af79ec297c67a..c77260e4dd6b5 100644 --- a/scripts/algolia.ts +++ b/scripts/algolia.ts @@ -25,22 +25,32 @@ const staticHtmlFilesPath = join(process.cwd(), '.next', 'server', 'app'); const ALGOLIA_APP_ID = process.env.ALGOLIA_APP_ID; const ALGOLIA_API_KEY = process.env.ALGOLIA_API_KEY; const DOCS_INDEX_NAME = process.env.DOCS_INDEX_NAME; -const ALOGOLIA_SKIP_ON_ERROR = process.env.ALOGOLIA_SKIP_ON_ERROR === 'true'; - -if (!ALGOLIA_APP_ID) { - throw new Error('`ALGOLIA_APP_ID` env var must be configured in repo secrets'); -} -if (!ALGOLIA_API_KEY) { - throw new Error('`ALGOLIA_API_KEY` env var must be configured in repo secrets'); -} -if (!DOCS_INDEX_NAME) { - throw new Error('`DOCS_INDEX_NAME` env var must be configured in repo secrets'); +const ALGOLIA_SKIP_ON_ERROR = process.env.ALGOLIA_SKIP_ON_ERROR === 'true'; +// Dry run generates records but skips all Algolia API calls. Used by PR CI to exercise the +// build + indexing import graph without secrets or mutating the production index. +const DRY_RUN = process.env.ALGOLIA_DRY_RUN === 'true'; + +if (!DRY_RUN) { + if (!ALGOLIA_APP_ID) { + throw new Error('`ALGOLIA_APP_ID` env var must be configured in repo secrets'); + } + if (!ALGOLIA_API_KEY) { + throw new Error('`ALGOLIA_API_KEY` env var must be configured in repo secrets'); + } + if (!DOCS_INDEX_NAME) { + throw new Error('`DOCS_INDEX_NAME` env var must be configured in repo secrets'); + } } -const client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_API_KEY); -const index = client.initIndex(DOCS_INDEX_NAME); +const index = + ALGOLIA_APP_ID && ALGOLIA_API_KEY && DOCS_INDEX_NAME + ? algoliasearch(ALGOLIA_APP_ID, ALGOLIA_API_KEY).initIndex(DOCS_INDEX_NAME) + : null; const CONCURRENCY = 50; +// In dry-run we only need enough pages to exercise the build + import graph, not the full corpus. +// Processing all ~10k pages cold (no warm cache) exhausts the heap, so cap it. +const DRY_RUN_PAGE_LIMIT = 200; const CACHE_VERSION = 1; const CACHE_DIR = join(process.cwd(), '.next', 'cache', 'algolia-records'); @@ -64,10 +74,13 @@ async function indexAndUpload() { ? getDevDocsFrontMatter() : getDocsFrontMatter()); - const pages = pageFrontMatters.filter( + const allPages = pageFrontMatters.filter( frontMatter => !frontMatter.draft && !frontMatter.noindex && frontMatter.title ); - console.log(`๐Ÿ“„ Processing ${pages.length} pages with concurrency ${CONCURRENCY}`); + const pages = DRY_RUN ? allPages.slice(0, DRY_RUN_PAGE_LIMIT) : allPages; + console.log( + `๐Ÿ“„ Processing ${pages.length}${DRY_RUN ? ` of ${allPages.length} (dry-run cap)` : ''} pages with concurrency ${CONCURRENCY}` + ); const {records, cacheHits, cacheMisses} = await generateAlgoliaRecords(pages); const generateTime = performance.now(); @@ -85,46 +98,52 @@ async function indexAndUpload() { Sentry.metrics.gauge('algolia.cache_hits', cacheHits, {attributes: metricTags}); Sentry.metrics.gauge('algolia.cache_misses', cacheMisses, {attributes: metricTags}); - const existingRecordIds = await fetchExistingRecordIds(index); - console.log( - `๐Ÿ”ฅ Found ${existingRecordIds.length} existing records in \`${DOCS_INDEX_NAME}\`` - ); - - console.log(`๐Ÿ”ฅ Saving records to \`${DOCS_INDEX_NAME}\`...`); - const saveResult = await index.saveObjects(records, { - batchSize: 10000, - autoGenerateObjectIDIfNotExist: true, - }); - const newRecordIDs = new Set(saveResult.objectIDs); - console.log(`๐Ÿ”ฅ Saved ${newRecordIDs.size} records`); + if (DRY_RUN || !index) { + console.log( + `๐Ÿงช Dry run: generated ${records.length} records, skipping Algolia upload` + ); + } else { + const existingRecordIds = await fetchExistingRecordIds(index); + console.log( + `๐Ÿ”ฅ Found ${existingRecordIds.length} existing records in \`${DOCS_INDEX_NAME}\`` + ); + + console.log(`๐Ÿ”ฅ Saving records to \`${DOCS_INDEX_NAME}\`...`); + const saveResult = await index.saveObjects(records, { + batchSize: 10000, + autoGenerateObjectIDIfNotExist: true, + }); + const newRecordIDs = new Set(saveResult.objectIDs); + console.log(`๐Ÿ”ฅ Saved ${newRecordIDs.size} records`); - const recordsToDelete = existingRecordIds.filter(id => !newRecordIDs.has(id)); - if (recordsToDelete.length > 0) { - console.log(`๐Ÿ”ฅ Deleting ${recordsToDelete.length} stale records...`); - await index.deleteObjects(recordsToDelete); - } + const recordsToDelete = existingRecordIds.filter(id => !newRecordIDs.has(id)); + if (recordsToDelete.length > 0) { + console.log(`๐Ÿ”ฅ Deleting ${recordsToDelete.length} stale records...`); + await index.deleteObjects(recordsToDelete); + } - if (!isDeveloperDocs) { - await index.setSettings({ - ...sentryAlgoliaIndexSettings, - searchableAttributes: [ - 'unordered(title)', - 'unordered(section)', - 'unordered(keywords)', - 'text', - ], - ranking: [ - 'filters', - 'typo', - 'words', - 'attribute', - 'exact', - 'proximity', - 'desc(sectionRank)', - 'asc(position)', - 'asc(popularity)', - ], - }); + if (!isDeveloperDocs) { + await index.setSettings({ + ...sentryAlgoliaIndexSettings, + searchableAttributes: [ + 'unordered(title)', + 'unordered(section)', + 'unordered(keywords)', + 'text', + ], + ranking: [ + 'filters', + 'typo', + 'words', + 'attribute', + 'exact', + 'proximity', + 'desc(sectionRank)', + 'asc(position)', + 'asc(popularity)', + ], + }); + } } const totalSeconds = (performance.now() - startTime) / 1000; @@ -171,13 +190,17 @@ async function generateAlgoliaRecords(pages: FrontMatter[]) { ) ); - const allFiles = fs.readdirSync(CACHE_DIR); - const stale = allFiles.filter(f => !usedCacheFiles.has(f)); - for (const f of stale) { - fs.unlinkSync(join(CACHE_DIR, f)); - } - if (stale.length > 0) { - console.log(`๐Ÿงน Cleaned up ${stale.length} stale cache files`); + // Skip cleanup in dry-run: we only processed a subset of pages, so most cache files would look + // "stale" and get wrongly deleted, poisoning the shared cache. + if (!DRY_RUN) { + const allFiles = fs.readdirSync(CACHE_DIR); + const stale = allFiles.filter(f => !usedCacheFiles.has(f)); + for (const f of stale) { + fs.unlinkSync(join(CACHE_DIR, f)); + } + if (stale.length > 0) { + console.log(`๐Ÿงน Cleaned up ${stale.length} stale cache files`); + } } return {records: results.flat(), cacheHits, cacheMisses}; @@ -280,7 +303,7 @@ async function getRecords( const error = new Error(`๐Ÿ”ด Error processing ${pageFm.slug}: ${e.message}`, { cause: e, }); - if (ALOGOLIA_SKIP_ON_ERROR) { + if (ALGOLIA_SKIP_ON_ERROR) { console.error(error); return {records: [], cached: false}; } diff --git a/scripts/tsconfig.json b/scripts/tsconfig.json new file mode 100644 index 0000000000000..58d3f2f750e90 --- /dev/null +++ b/scripts/tsconfig.json @@ -0,0 +1,16 @@ +{ + // Used only when running scripts/algolia.ts via `tsx` (see .github/workflows/algolia-index.yml). + // tsx loads .ts files through Node's CJS resolver. `rehype-prism-diff` (imported transitively + // via src/mdx.ts) only declares an "import" condition in its package.json `exports` map, so the + // CJS resolver fails with ERR_PACKAGE_PATH_NOT_EXPORTED. Aliasing the bare specifier to the + // package's real dist file makes tsx resolve it as a plain file and bypass the exports map. + // Bun (the previous runner) tolerated the import-only exports map; tsx does not. + "extends": "../tsconfig.json", + "compilerOptions": { + "baseUrl": "..", + "paths": { + "sentry-docs/*": ["src/*"], + "rehype-prism-diff": ["node_modules/rehype-prism-diff/dist/index.js"] + } + } +}