Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion npm/packages/ruvector/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"ruvector": "./bin/cli.js"
},
"scripts": {
"build": "tsc && cp src/core/onnx/pkg/package.json dist/core/onnx/pkg/",
"build": "tsc && node scripts/copy-onnx-assets.js",
"verify-dist": "node scripts/verify-dist.js",
"prepublishOnly": "npm run build && npm run verify-dist",
"test": "node test/integration.js && node test/cli-commands.js"
Expand Down
64 changes: 64 additions & 0 deletions npm/packages/ruvector/scripts/copy-onnx-assets.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/env node
/**
* copy-onnx-assets.js — copy the bundled ONNX runtime files from src/core/onnx
* into dist/core/onnx after `tsc`.
*
* Why: tsc only emits .js for .ts inputs. The ONNX subsystem ships a JS
* loader, a wasm-bindgen JS bridge, and the .wasm binary itself — none of
* which are TypeScript and so all of which are skipped by tsc. The previous
* build script copied only `pkg/package.json`, which left embed text and
* the embed-stream pipeline broken with:
* "Failed to initialize ONNX embedder: ONNX WASM files not bundled.
* The onnx/ directory is missing."
*
* This script does an explicit, cross-platform copy of every runtime asset
* the embedder reads via `path.join(__dirname, 'onnx', ...)` in
* src/core/onnx-embedder.ts. It is intentionally explicit (not a recursive
* dir copy) so a new file gets noticed and its inclusion is a deliberate
* decision, not an accident.
*/

const fs = require('fs');
const path = require('path');

const pkgRoot = path.resolve(__dirname, '..');
const srcDir = path.join(pkgRoot, 'src', 'core', 'onnx');
const dstDir = path.join(pkgRoot, 'dist', 'core', 'onnx');

const ASSETS = [
// Outer loader (model fetch / cache layer)
'loader.js',
// wasm-bindgen generated bridge
'pkg/ruvector_onnx_embeddings_wasm.js',
'pkg/ruvector_onnx_embeddings_wasm.d.ts',
'pkg/ruvector_onnx_embeddings_wasm_bg.js',
'pkg/ruvector_onnx_embeddings_wasm_bg.wasm',
'pkg/ruvector_onnx_embeddings_wasm_bg.wasm.d.ts',
// Already-shipped metadata + license (kept for completeness)
'pkg/package.json',
'pkg/LICENSE',
];

let copied = 0;
let missing = 0;
for (const rel of ASSETS) {
const src = path.join(srcDir, rel);
const dst = path.join(dstDir, rel);
if (!fs.existsSync(src)) {
console.warn(`copy-onnx-assets: WARN source missing: ${rel}`);
missing++;
continue;
}
fs.mkdirSync(path.dirname(dst), { recursive: true });
fs.copyFileSync(src, dst);
copied++;
}

if (missing > 0) {
console.error(
`copy-onnx-assets: ${missing} expected source asset(s) missing — ONNX subsystem may not work at runtime.`,
);
process.exit(1);
}

console.log(`copy-onnx-assets: ${copied} asset(s) copied to dist/core/onnx/.`);
78 changes: 57 additions & 21 deletions npm/packages/ruvector/scripts/verify-dist.js
Original file line number Diff line number Diff line change
@@ -1,51 +1,87 @@
#!/usr/bin/env node
/**
* verify-dist.js — pre-publish gate that fails the build if any file
* `bin/cli.js` requires from `../dist/...` is missing.
* `bin/cli.js` requires from `../dist/...` is missing, OR if any of the
* runtime asset paths read at startup by dist/core/onnx-embedder.js are
* missing.
*
* Why: 0.2.23 was published without a `dist/` directory at all (issue #399),
* which silently broke `ruvector doctor`, the entire `embed` subsystem, and
* `rvf` commands. tsc was supposed to run via `prepublishOnly`, but the
* hook didn't fire (or the build failed silently). This script makes the
* publish itself fail loudly when the artifact is incomplete.
* and 0.2.24/0.2.25 still shipped without the ONNX runtime assets (the
* embedder reads them via `path.join(__dirname, 'onnx', ...)` rather than
* `require()`, so the original CLI-only scan didn't notice them).
*
* This script makes the publish itself fail loudly when either category of
* artifact is incomplete.
*/

const fs = require('fs');
const path = require('path');

const pkgRoot = path.resolve(__dirname, '..');
const cliPath = path.join(pkgRoot, 'bin', 'cli.js');

if (!fs.existsSync(cliPath)) {
console.error('verify-dist: bin/cli.js not found — package layout is broken.');
function fail(msg) {
console.error(msg);
process.exit(1);
}

const cliSource = fs.readFileSync(cliPath, 'utf8');
// ────────────────────────────────────────────────────────────────────────
// 1. CLI `require('../dist/...js')` scan (original behavior)
// ────────────────────────────────────────────────────────────────────────
const cliPath = path.join(pkgRoot, 'bin', 'cli.js');
if (!fs.existsSync(cliPath)) {
fail('verify-dist: bin/cli.js not found — package layout is broken.');
}

// Collect every `require('../dist/...')` referenced by the CLI.
const cliSource = fs.readFileSync(cliPath, 'utf8');
const distRequires = Array.from(
cliSource.matchAll(/require\(['"]\.\.\/(dist\/[^'"]+\.js)['"]\)/g),
(m) => m[1],
);
const unique = Array.from(new Set(distRequires)).sort();

const missing = unique.filter(
const cliUnique = Array.from(new Set(distRequires)).sort();
const cliMissing = cliUnique.filter(
(rel) => !fs.existsSync(path.join(pkgRoot, rel)),
);

if (missing.length > 0) {
if (cliMissing.length > 0) {
console.error(
`verify-dist: ${missing.length} dist file(s) referenced by bin/cli.js are missing:`,
`verify-dist: ${cliMissing.length} dist file(s) referenced by bin/cli.js are missing:`,
);
for (const rel of missing) {
console.error(` - ${rel}`);
}
for (const rel of cliMissing) console.error(` - ${rel}`);
console.error(
"\nRun `npm run build` and confirm tsc emitted under dist/. If a path was renamed,",
);
console.error('update bin/cli.js to match.');
process.exit(1);
fail('update bin/cli.js to match.');
}

console.log(`verify-dist: ${unique.length} dist path(s) present.`);
// ────────────────────────────────────────────────────────────────────────
// 2. Runtime asset gate — ONNX subsystem reads non-JS files via
// path.join(__dirname, 'onnx', ...). tsc never touches them, so a
// missing build-script copy is invisible to the CLI-only scan above.
// Each path here corresponds to a `path.join(__dirname, 'onnx', ...)`
// site in dist/core/onnx-embedder.js.
// ────────────────────────────────────────────────────────────────────────
const RUNTIME_ASSETS = [
'dist/core/onnx/loader.js',
'dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm.js',
'dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.js',
'dist/core/onnx/pkg/ruvector_onnx_embeddings_wasm_bg.wasm',
];

const runtimeMissing = RUNTIME_ASSETS.filter(
(rel) => !fs.existsSync(path.join(pkgRoot, rel)),
);

if (runtimeMissing.length > 0) {
console.error(
`verify-dist: ${runtimeMissing.length} ONNX runtime asset(s) missing — embed text and embed-stream pipelines will fail at startup:`,
);
for (const rel of runtimeMissing) console.error(` - ${rel}`);
console.error(
"\nThese live under src/core/onnx/ in source. Make sure `npm run build` runs the",
);
fail('copy-onnx-assets step (see scripts/copy-onnx-assets.js).');
}

console.log(
`verify-dist: ${cliUnique.length} dist require path(s) + ${RUNTIME_ASSETS.length} runtime asset(s) present.`,
);