Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 47 additions & 2 deletions .github/workflows/algolia-index.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,24 @@ on:
push:
branches:
- master
# Smoke-test the indexing path on PRs that touch the indexing machinery, so runner/dependency
# regressions (e.g. the Bun->tsx swap that broke module resolution) are caught before merge
# instead of only surfacing on master. Pure docs-content PRs don't change this machinery and
# are validated by the normal build, so they're intentionally excluded to keep CI lean.
pull_request:
paths:
- '.github/workflows/algolia-index.yml'
- 'scripts/algolia.ts'
- 'scripts/tsconfig.json'
- 'src/mdx.ts'
- 'package.json'
- 'pnpm-lock.yaml'
jobs:
index:
name: Update Algolia index
runs-on: ubuntu-latest
# Only push events have access to the Algolia secrets and should mutate the live index.
if: github.event_name == 'push'
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

Expand Down Expand Up @@ -40,7 +54,7 @@ jobs:
- run: pnpm install --frozen-lockfile

- name: Build index for user docs
run: pnpm enforce-redirects && pnpm generate-doctree && pnpm next build && npx tsx ./scripts/algolia.ts
run: pnpm enforce-redirects && pnpm generate-doctree && pnpm next build && npx tsx --tsconfig ./scripts/tsconfig.json ./scripts/algolia.ts
if: steps.filter.outputs.docs == 'true'
env:
ALGOLIA_APP_ID: ${{ secrets.ALGOLIA_APP_ID }}
Expand All @@ -53,7 +67,7 @@ jobs:
NEXT_PUBLIC_SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0

- name: Build index for developer docs
run: git submodule init && git submodule update && pnpm enforce-redirects && pnpm generate-doctree && NEXT_PUBLIC_DEVELOPER_DOCS=1 pnpm next build && npx tsx ./scripts/algolia.ts
run: git submodule init && git submodule update && pnpm enforce-redirects && pnpm generate-doctree && NEXT_PUBLIC_DEVELOPER_DOCS=1 pnpm next build && npx tsx --tsconfig ./scripts/tsconfig.json ./scripts/algolia.ts
if: steps.filter.outputs.dev-docs == 'true'
env:
ALGOLIA_APP_ID: ${{ secrets.ALGOLIA_APP_ID }}
Expand All @@ -65,3 +79,34 @@ jobs:
SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0
NEXT_PUBLIC_SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0
NEXT_PUBLIC_DEVELOPER_DOCS: 1

smoke-test:
name: Smoke-test Algolia indexing (dry run)
runs-on: ubuntu-latest
# PRs run the indexing script in dry-run mode (no secrets, no upload) purely to verify the
# script and its full import graph resolve under the configured runner -- the regression class
# that the Bun->tsx swap introduced, which happens at module load. This deliberately skips
# `next build`: building the ~10k-page site takes minutes and only validates build output,
# which Vercel's PR preview deploy already covers. With no build there are no .next HTML files,
# so ALGOLIA_SKIP_ON_ERROR lets the script tolerate the missing pages and finish in seconds.
if: github.event_name == 'pull_request'
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6

- uses: pnpm/action-setup@02f6c237bd2518259fed6c71566509edfb3f2b74 # v4

- uses: actions/setup-node@53b83947a5a98c8d113130e565377fae1a50d02f # v4
id: setup-node
with:
node-version-file: 'package.json'
cache: 'pnpm'

- run: pnpm install --frozen-lockfile

- name: Dry-run index for user docs
run: npx tsx --tsconfig ./scripts/tsconfig.json ./scripts/algolia.ts
env:
ALGOLIA_DRY_RUN: 'true'
ALGOLIA_SKIP_ON_ERROR: 'true'
SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0
NEXT_PUBLIC_SENTRY_DSN: https://examplePublicKey@o0.ingest.sentry.io/0
143 changes: 83 additions & 60 deletions scripts/algolia.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,22 +25,32 @@ const staticHtmlFilesPath = join(process.cwd(), '.next', 'server', 'app');
const ALGOLIA_APP_ID = process.env.ALGOLIA_APP_ID;
const ALGOLIA_API_KEY = process.env.ALGOLIA_API_KEY;
const DOCS_INDEX_NAME = process.env.DOCS_INDEX_NAME;
const ALOGOLIA_SKIP_ON_ERROR = process.env.ALOGOLIA_SKIP_ON_ERROR === 'true';

if (!ALGOLIA_APP_ID) {
throw new Error('`ALGOLIA_APP_ID` env var must be configured in repo secrets');
}
if (!ALGOLIA_API_KEY) {
throw new Error('`ALGOLIA_API_KEY` env var must be configured in repo secrets');
}
if (!DOCS_INDEX_NAME) {
throw new Error('`DOCS_INDEX_NAME` env var must be configured in repo secrets');
const ALGOLIA_SKIP_ON_ERROR = process.env.ALGOLIA_SKIP_ON_ERROR === 'true';
// Dry run generates records but skips all Algolia API calls. Used by PR CI to exercise the
// build + indexing import graph without secrets or mutating the production index.
const DRY_RUN = process.env.ALGOLIA_DRY_RUN === 'true';

if (!DRY_RUN) {
if (!ALGOLIA_APP_ID) {
throw new Error('`ALGOLIA_APP_ID` env var must be configured in repo secrets');
}
if (!ALGOLIA_API_KEY) {
throw new Error('`ALGOLIA_API_KEY` env var must be configured in repo secrets');
}
if (!DOCS_INDEX_NAME) {
throw new Error('`DOCS_INDEX_NAME` env var must be configured in repo secrets');
}
}

const client = algoliasearch(ALGOLIA_APP_ID, ALGOLIA_API_KEY);
const index = client.initIndex(DOCS_INDEX_NAME);
const index =
ALGOLIA_APP_ID && ALGOLIA_API_KEY && DOCS_INDEX_NAME
? algoliasearch(ALGOLIA_APP_ID, ALGOLIA_API_KEY).initIndex(DOCS_INDEX_NAME)
: null;

const CONCURRENCY = 50;
// In dry-run we only need enough pages to exercise the build + import graph, not the full corpus.
// Processing all ~10k pages cold (no warm cache) exhausts the heap, so cap it.
const DRY_RUN_PAGE_LIMIT = 200;
const CACHE_VERSION = 1;
const CACHE_DIR = join(process.cwd(), '.next', 'cache', 'algolia-records');

Expand All @@ -64,10 +74,13 @@ async function indexAndUpload() {
? getDevDocsFrontMatter()
: getDocsFrontMatter());

const pages = pageFrontMatters.filter(
const allPages = pageFrontMatters.filter(
frontMatter => !frontMatter.draft && !frontMatter.noindex && frontMatter.title
);
console.log(`📄 Processing ${pages.length} pages with concurrency ${CONCURRENCY}`);
const pages = DRY_RUN ? allPages.slice(0, DRY_RUN_PAGE_LIMIT) : allPages;
console.log(
`📄 Processing ${pages.length}${DRY_RUN ? ` of ${allPages.length} (dry-run cap)` : ''} pages with concurrency ${CONCURRENCY}`
);

const {records, cacheHits, cacheMisses} = await generateAlgoliaRecords(pages);
const generateTime = performance.now();
Expand All @@ -85,46 +98,52 @@ async function indexAndUpload() {
Sentry.metrics.gauge('algolia.cache_hits', cacheHits, {attributes: metricTags});
Sentry.metrics.gauge('algolia.cache_misses', cacheMisses, {attributes: metricTags});

const existingRecordIds = await fetchExistingRecordIds(index);
console.log(
`🔥 Found ${existingRecordIds.length} existing records in \`${DOCS_INDEX_NAME}\``
);

console.log(`🔥 Saving records to \`${DOCS_INDEX_NAME}\`...`);
const saveResult = await index.saveObjects(records, {
batchSize: 10000,
autoGenerateObjectIDIfNotExist: true,
});
const newRecordIDs = new Set(saveResult.objectIDs);
console.log(`🔥 Saved ${newRecordIDs.size} records`);
if (DRY_RUN || !index) {
console.log(
`🧪 Dry run: generated ${records.length} records, skipping Algolia upload`
);
} else {
const existingRecordIds = await fetchExistingRecordIds(index);
console.log(
`🔥 Found ${existingRecordIds.length} existing records in \`${DOCS_INDEX_NAME}\``
);

console.log(`🔥 Saving records to \`${DOCS_INDEX_NAME}\`...`);
const saveResult = await index.saveObjects(records, {
batchSize: 10000,
autoGenerateObjectIDIfNotExist: true,
});
const newRecordIDs = new Set(saveResult.objectIDs);
console.log(`🔥 Saved ${newRecordIDs.size} records`);

const recordsToDelete = existingRecordIds.filter(id => !newRecordIDs.has(id));
if (recordsToDelete.length > 0) {
console.log(`🔥 Deleting ${recordsToDelete.length} stale records...`);
await index.deleteObjects(recordsToDelete);
}
const recordsToDelete = existingRecordIds.filter(id => !newRecordIDs.has(id));
if (recordsToDelete.length > 0) {
console.log(`🔥 Deleting ${recordsToDelete.length} stale records...`);
await index.deleteObjects(recordsToDelete);
}

if (!isDeveloperDocs) {
await index.setSettings({
...sentryAlgoliaIndexSettings,
searchableAttributes: [
'unordered(title)',
'unordered(section)',
'unordered(keywords)',
'text',
],
ranking: [
'filters',
'typo',
'words',
'attribute',
'exact',
'proximity',
'desc(sectionRank)',
'asc(position)',
'asc(popularity)',
],
});
if (!isDeveloperDocs) {
await index.setSettings({
...sentryAlgoliaIndexSettings,
searchableAttributes: [
'unordered(title)',
'unordered(section)',
'unordered(keywords)',
'text',
],
ranking: [
'filters',
'typo',
'words',
'attribute',
'exact',
'proximity',
'desc(sectionRank)',
'asc(position)',
'asc(popularity)',
],
});
}
}

const totalSeconds = (performance.now() - startTime) / 1000;
Expand Down Expand Up @@ -171,13 +190,17 @@ async function generateAlgoliaRecords(pages: FrontMatter[]) {
)
);

const allFiles = fs.readdirSync(CACHE_DIR);
const stale = allFiles.filter(f => !usedCacheFiles.has(f));
for (const f of stale) {
fs.unlinkSync(join(CACHE_DIR, f));
}
if (stale.length > 0) {
console.log(`🧹 Cleaned up ${stale.length} stale cache files`);
// Skip cleanup in dry-run: we only processed a subset of pages, so most cache files would look
// "stale" and get wrongly deleted, poisoning the shared cache.
if (!DRY_RUN) {
const allFiles = fs.readdirSync(CACHE_DIR);
const stale = allFiles.filter(f => !usedCacheFiles.has(f));
for (const f of stale) {
fs.unlinkSync(join(CACHE_DIR, f));
}
if (stale.length > 0) {
console.log(`🧹 Cleaned up ${stale.length} stale cache files`);
}
}

return {records: results.flat(), cacheHits, cacheMisses};
Expand Down Expand Up @@ -280,7 +303,7 @@ async function getRecords(
const error = new Error(`🔴 Error processing ${pageFm.slug}: ${e.message}`, {
cause: e,
});
if (ALOGOLIA_SKIP_ON_ERROR) {
if (ALGOLIA_SKIP_ON_ERROR) {
console.error(error);
return {records: [], cached: false};
}
Expand Down
16 changes: 16 additions & 0 deletions scripts/tsconfig.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
{
// Used only when running scripts/algolia.ts via `tsx` (see .github/workflows/algolia-index.yml).
// tsx loads .ts files through Node's CJS resolver. `rehype-prism-diff` (imported transitively
// via src/mdx.ts) only declares an "import" condition in its package.json `exports` map, so the
// CJS resolver fails with ERR_PACKAGE_PATH_NOT_EXPORTED. Aliasing the bare specifier to the
// package's real dist file makes tsx resolve it as a plain file and bypass the exports map.
// Bun (the previous runner) tolerated the import-only exports map; tsx does not.
"extends": "../tsconfig.json",
"compilerOptions": {
"baseUrl": "..",
"paths": {
"sentry-docs/*": ["src/*"],
"rehype-prism-diff": ["node_modules/rehype-prism-diff/dist/index.js"]
}
}
}
Loading