diff --git a/npm/packages/ruvector/package.json b/npm/packages/ruvector/package.json index 7af805604..758ddc231 100644 --- a/npm/packages/ruvector/package.json +++ b/npm/packages/ruvector/package.json @@ -8,7 +8,7 @@ "ruvector": "./bin/cli.js" }, "scripts": { - "build": "tsc && cp src/core/onnx/pkg/package.json dist/core/onnx/pkg/", + "build": "tsc && node scripts/copy-onnx-assets.js", "verify-dist": "node scripts/verify-dist.js", "prepublishOnly": "npm run build && npm run verify-dist", "test": "node test/integration.js && node test/cli-commands.js" diff --git a/npm/packages/ruvector/scripts/copy-onnx-assets.js b/npm/packages/ruvector/scripts/copy-onnx-assets.js new file mode 100644 index 000000000..0e363b9c9 --- /dev/null +++ b/npm/packages/ruvector/scripts/copy-onnx-assets.js @@ -0,0 +1,64 @@ +#!/usr/bin/env node +/** + * copy-onnx-assets.js — copy the bundled ONNX runtime files from src/core/onnx + * into dist/core/onnx after `tsc`. + * + * Why: tsc only emits .js for .ts inputs. The ONNX subsystem ships a JS + * loader, a wasm-bindgen JS bridge, and the .wasm binary itself — none of + * which are TypeScript and so all of which are skipped by tsc. The previous + * build script copied only `pkg/package.json`, which left embed text and + * the embed-stream pipeline broken with: + * "Failed to initialize ONNX embedder: ONNX WASM files not bundled. + * The onnx/ directory is missing." + * + * This script does an explicit, cross-platform copy of every runtime asset + * the embedder reads via `path.join(__dirname, 'onnx', ...)` in + * src/core/onnx-embedder.ts. It is intentionally explicit (not a recursive + * dir copy) so a new file gets noticed and its inclusion is a deliberate + * decision, not an accident. + */ + +const fs = require('fs'); +const path = require('path'); + +const pkgRoot = path.resolve(__dirname, '..'); +const srcDir = path.join(pkgRoot, 'src', 'core', 'onnx'); +const dstDir = path.join(pkgRoot, 'dist', 'core', 'onnx'); + +const ASSETS = [ + // Outer loader (model fetch / cache layer) + 'loader.js', + // wasm-bindgen generated bridge + 'pkg/ruvector_onnx_embeddings_wasm.js', + 'pkg/ruvector_onnx_embeddings_wasm.d.ts', + 'pkg/ruvector_onnx_embeddings_wasm_bg.js', + 'pkg/ruvector_onnx_embeddings_wasm_bg.wasm', + 'pkg/ruvector_onnx_embeddings_wasm_bg.wasm.d.ts', + // Already-shipped metadata + license (kept for completeness) + 'pkg/package.json', + 'pkg/LICENSE', +]; + +let copied = 0; +let missing = 0; +for (const rel of ASSETS) { + const src = path.join(srcDir, rel); + const dst = path.join(dstDir, rel); + if (!fs.existsSync(src)) { + console.warn(`copy-onnx-assets: WARN source missing: ${rel}`); + missing++; + continue; + } + fs.mkdirSync(path.dirname(dst), { recursive: true }); + fs.copyFileSync(src, dst); + copied++; +} + +if (missing > 0) { + console.error( + `copy-onnx-assets: ${missing} expected source asset(s) missing — ONNX subsystem may not work at runtime.`, + ); + process.exit(1); +} + +console.log(`copy-onnx-assets: ${copied} asset(s) copied to dist/core/onnx/.`); diff --git a/npm/packages/ruvector/scripts/verify-dist.js b/npm/packages/ruvector/scripts/verify-dist.js index ad24b6fe9..9f8b47a83 100644 --- a/npm/packages/ruvector/scripts/verify-dist.js +++ b/npm/packages/ruvector/scripts/verify-dist.js @@ -1,51 +1,87 @@ #!/usr/bin/env node /** * verify-dist.js — pre-publish gate that fails the build if any file - * `bin/cli.js` requires from `../dist/...` is missing. + * `bin/cli.js` requires from `../dist/...` is missing, OR if any of the + * runtime asset paths read at startup by dist/core/onnx-embedder.js are + * missing. * * Why: 0.2.23 was published without a `dist/` directory at all (issue #399), - * which silently broke `ruvector doctor`, the entire `embed` subsystem, and - * `rvf` commands. tsc was supposed to run via `prepublishOnly`, but the - * hook didn't fire (or the build failed silently). This script makes the - * publish itself fail loudly when the artifact is incomplete. + * and 0.2.24/0.2.25 still shipped without the ONNX runtime assets (the + * embedder reads them via `path.join(__dirname, 'onnx', ...)` rather than + * `require()`, so the original CLI-only scan didn't notice them). + * + * This script makes the publish itself fail loudly when either category of + * artifact is incomplete. */ const fs = require('fs'); const path = require('path'); const pkgRoot = path.resolve(__dirname, '..'); -const cliPath = path.join(pkgRoot, 'bin', 'cli.js'); -if (!fs.existsSync(cliPath)) { - console.error('verify-dist: bin/cli.js not found — package layout is broken.'); +function fail(msg) { + console.error(msg); process.exit(1); } -const cliSource = fs.readFileSync(cliPath, 'utf8'); +// ──────────────────────────────────────────────────────────────────────── +// 1. CLI `require('../dist/...js')` scan (original behavior) +// ──────────────────────────────────────────────────────────────────────── +const cliPath = path.join(pkgRoot, 'bin', 'cli.js'); +if (!fs.existsSync(cliPath)) { + fail('verify-dist: bin/cli.js not found — package layout is broken.'); +} -// Collect every `require('../dist/...')` referenced by the CLI. +const cliSource = fs.readFileSync(cliPath, 'utf8'); const distRequires = Array.from( cliSource.matchAll(/require\(['"]\.\.\/(dist\/[^'"]+\.js)['"]\)/g), (m) => m[1], ); -const unique = Array.from(new Set(distRequires)).sort(); - -const missing = unique.filter( +const cliUnique = Array.from(new Set(distRequires)).sort(); +const cliMissing = cliUnique.filter( (rel) => !fs.existsSync(path.join(pkgRoot, rel)), ); -if (missing.length > 0) { +if (cliMissing.length > 0) { console.error( - `verify-dist: ${missing.length} dist file(s) referenced by bin/cli.js are missing:`, + `verify-dist: ${cliMissing.length} dist file(s) referenced by bin/cli.js are missing:`, ); - for (const rel of missing) { - console.error(` - ${rel}`); - } + for (const rel of cliMissing) console.error(` - ${rel}`); console.error( "\nRun `npm run build` and confirm tsc emitted under dist/. If a path was renamed,", ); - console.error('update bin/cli.js to match.'); - process.exit(1); + fail('update bin/cli.js to match.'); } -console.log(`verify-dist: ${unique.length} dist path(s) present.`); +// ──────────────────────────────────────────────────────────────────────── +// 2. Runtime asset gate — ONNX subsystem reads non-JS files via +// path.join(__dirname, 'onnx', ...). tsc never touches them, so a +// missing build-script copy is invisible to the CLI-only scan above. +// Each path here corresponds to a `path.join(__dirname, 'onnx', ...)` +// site in dist/core/onnx-embedder.js. +// ──────────────────────────────────────────────────────────────────────── +const RUNTIME_ASSETS = [ + 'dist/core/onnx/loader.js', + 'dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm.js', + 'dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.js', + 'dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.wasm', +]; + +const runtimeMissing = RUNTIME_ASSETS.filter( + (rel) => !fs.existsSync(path.join(pkgRoot, rel)), +); + +if (runtimeMissing.length > 0) { + console.error( + `verify-dist: ${runtimeMissing.length} ONNX runtime asset(s) missing — embed text and embed-stream pipelines will fail at startup:`, + ); + for (const rel of runtimeMissing) console.error(` - ${rel}`); + console.error( + "\nThese live under src/core/onnx/ in source. Make sure `npm run build` runs the", + ); + fail('copy-onnx-assets step (see scripts/copy-onnx-assets.js).'); +} + +console.log( + `verify-dist: ${cliUnique.length} dist require path(s) + ${RUNTIME_ASSETS.length} runtime asset(s) present.`, +);