diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2cc18345..ada8f3bf 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -20,7 +20,7 @@ jobs: matrix: # Linux runs the full Rust version matrix; Windows and macOS test stable only. os: [ubuntu-latest] - rust: [stable, beta, nightly, "1.87.0"] + rust: [stable, beta, nightly, "1.91.0"] include: - os: windows-latest rust: stable diff --git a/.gitignore b/.gitignore index 2b8399ec..89bd6d15 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,13 @@ # Cargo build files target +# Local-only Cargo config (copy from .cargo/config.toml.iis) +.cargo/config.toml + # Temporary test files tests/**/tmp tests/**/.bender +tests/**/.bender-kg # clangd .cache/clangd diff --git a/Cargo.lock b/Cargo.lock index 46090621..d377c06c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2,6 +2,27 @@ # It is not intended for manual editing. version = 4 +[[package]] +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" +dependencies = [ + "cfg-if", + "const-random", + "getrandom 0.3.4", + "once_cell", + "serde", + "version_check", + "zerocopy", +] + [[package]] name = "aho-corasick" version = "1.1.4" @@ -11,6 +32,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" + [[package]] name = "android_system_properties" version = "0.1.5" @@ -56,7 +83,7 @@ version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -67,7 +94,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -76,6 +103,92 @@ version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" +[[package]] +name = "arcstr" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "03918c3dbd7701a85c6b9887732e2921175f26c350b4563841d0958c21d57e6d" +dependencies = [ + "serde", +] + +[[package]] +name = "arrow-array" +version = "58.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841321891f247aa86c6112c80d83d89cb36e0addd020fa2425085b8eb6c3f579" +dependencies = [ + "ahash", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "chrono", + "half", + "hashbrown 0.17.1", + "num-complex", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-buffer" +version = "58.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f955dfb73fae000425f49c8226d2044dab60fb7ad4af1e24f961756354d996c9" +dependencies = [ + "bytes", + "half", + "num-bigint", + "num-traits", +] + +[[package]] +name = "arrow-data" +version = "58.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3b5846209775b6dc8056d77ff9a032b27043383dd5488abd0b663e265b9373" +dependencies = [ + "arrow-buffer", + "arrow-schema", + "half", + "num-integer", + "num-traits", +] + +[[package]] +name = "arrow-ipc" +version = "58.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd8907ddd8f9fbabf91ec2c85c1d81fe2874e336d2443eb36373595e28b98dd5" +dependencies = [ + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "arrow-select", + "flatbuffers", +] + +[[package]] +name = "arrow-schema" +version = "58.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18aa020f6bc8e5201dcd2d4b7f98c68f8a410ef37128263243e6ff2a47a67d4f" + +[[package]] +name = "arrow-select" +version = "58.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a657ab5132e9c8ca3b24eb15a823d0ced38017fe3930ff50167466b02e2d592c" +dependencies = [ + "ahash", + "arrow-array", + "arrow-buffer", + "arrow-data", + "arrow-schema", + "num-traits", +] + [[package]] name = "assert_cmd" version = "2.2.0" @@ -102,18 +215,46 @@ dependencies = [ "syn", ] +[[package]] +name = "async-trait" +version = "0.1.89" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9035ad2d096bed7955a320ee7e2230574d28fd3c3a0f186cbea1ff3c7eed5dbb" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "autocfg" version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "base64" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" + +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bender" version = "0.31.0" dependencies = [ "assert_cmd", "async-recursion", + "bender-kg-core", + "bender-kg-extract", + "bender-kg-mcp", + "bender-kg-models", + "bender-kg-similarity", "bender-slang", "blake2", "clap", @@ -124,7 +265,7 @@ dependencies = [ "futures", "glob", "indexmap", - "indicatif", + "indicatif 0.18.4", "itertools", "log", "miette", @@ -146,6 +287,89 @@ dependencies = [ "walkdir", ] +[[package]] +name = "bender-kg-core" +version = "0.1.0" +dependencies = [ + "bender-kg-extract", + "bender-kg-models", + "bender-kg-similarity", + "bender-kg-store", + "indexmap", + "log", + "serde", + "serde_json", + "tempfile", + "thiserror", + "tokio", +] + +[[package]] +name = "bender-kg-extract" +version = "0.1.0" +dependencies = [ + "bender-kg-models", + "bender-slang", + "indexmap", + "log", + "serde", + "serde_json", + "sha2", + "tempfile", + "thiserror", +] + +[[package]] +name = "bender-kg-mcp" +version = "0.1.0" +dependencies = [ + "anyhow", + "bender-kg-core", + "bender-kg-models", + "log", + "rmcp", + "schemars", + "serde", + "serde_json", + "tempfile", + "thiserror", + "tokio", +] + +[[package]] +name = "bender-kg-models" +version = "0.1.0" +dependencies = [ + "indexmap", + "serde", + "serde_json", + "sha2", + "thiserror", +] + +[[package]] +name = "bender-kg-similarity" +version = "0.1.0" +dependencies = [ + "model2vec-rs", + "serde", + "sha2", + "thiserror", +] + +[[package]] +name = "bender-kg-store" +version = "0.1.0" +dependencies = [ + "bender-kg-models", + "grafeo", + "grafeo-common", + "serde", + "serde_json", + "tempfile", + "thiserror", +] + [[package]] name = "bender-slang" version = "0.1.0" @@ -157,6 +381,26 @@ dependencies = [ "thiserror", ] +[[package]] +name = "bincode" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" +dependencies = [ + "bincode_derive", + "serde", + "unty", +] + +[[package]] +name = "bincode_derive" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" +dependencies = [ + "virtue", +] + [[package]] name = "bitflags" version = "2.11.0" @@ -198,12 +442,27 @@ version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" +[[package]] +name = "castaway" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dec551ab6e7578819132c713a93c022a05d60159dc86e7a7050223577484c55a" +dependencies = [ + "rustversion", +] + [[package]] name = "cc" version = "1.2.58" @@ -220,6 +479,17 @@ version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" +[[package]] +name = "chacha20" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f8d983286843e49675a4b7a2d174efe136dc93a18d69130dd18198a6c167601" +dependencies = [ + "cfg-if", + "cpufeatures 0.3.0", + "rand_core 0.10.1", +] + [[package]] name = "chrono" version = "0.4.44" @@ -227,7 +497,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ "iana-time-zone", + "js-sys", "num-traits", + "serde", + "wasm-bindgen", "windows-link", ] @@ -313,13 +586,12 @@ dependencies = [ [[package]] name = "codespan-reporting" -version = "0.13.1" +version = "0.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af491d569909a7e4dee0ad7db7f5341fef5c614d5b8ec8cf765732aba3cff681" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" dependencies = [ - "serde", "termcolor", - "unicode-width 0.2.2", + "unicode-width 0.1.14", ] [[package]] @@ -328,6 +600,34 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" +[[package]] +name = "compact_str" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" +dependencies = [ + "castaway", + "cfg-if", + "itoa", + "rustversion", + "ryu", + "serde", + "static_assertions", +] + +[[package]] +name = "console" +version = "0.15.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "054ccb5b10f9f2cbf51eb355ca1d05c2d279ce1804688d0db74b4733a5aeafd8" +dependencies = [ + "encode_unicode", + "libc", + "once_cell", + "unicode-width 0.2.2", + "windows-sys 0.59.0", +] + [[package]] name = "console" version = "0.16.3" @@ -337,7 +637,27 @@ dependencies = [ "encode_unicode", "libc", "unicode-width 0.2.2", - "windows-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "const-random" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" +dependencies = [ + "const-random-macro", +] + +[[package]] +name = "const-random-macro" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +dependencies = [ + "getrandom 0.2.17", + "once_cell", + "tiny-keccak", ] [[package]] @@ -355,6 +675,46 @@ dependencies = [ "libc", ] +[[package]] +name = "cpufeatures" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b2a41393f66f16b0823bb79094d54ac5fbd34ab292ddafb9a0456ac9f87d201" +dependencies = [ + "libc", +] + +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1137cd7e7fc0fb5d3c5a8678be38ec56e819125d8d7907411fe24ccb943faca8" +dependencies = [ + "crossbeam-channel", + "crossbeam-deque", + "crossbeam-epoch", + "crossbeam-queue", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-channel" +version = "0.5.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-deque" version = "0.8.6" @@ -374,12 +734,27 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "crossbeam-queue" +version = "0.3.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" +dependencies = [ + "crossbeam-utils", +] + [[package]] name = "crossbeam-utils" version = "0.8.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" +[[package]] +name = "crunchy" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" + [[package]] name = "crypto-common" version = "0.1.7" @@ -392,28 +767,26 @@ dependencies = [ [[package]] name = "cxx" -version = "1.0.194" +version = "1.0.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "747d8437319e3a2f43d93b341c137927ca70c0f5dabeea7a005a73665e247c7e" +checksum = "3956d60afa98653c5a57f60d7056edd513bfe0307ef6fb06f6167400c3884459" dependencies = [ "cc", - "cxx-build", "cxxbridge-cmd", "cxxbridge-flags", "cxxbridge-macro", - "foldhash 0.2.0", + "foldhash 0.1.5", "link-cplusplus", ] [[package]] name = "cxx-build" -version = "1.0.194" +version = "1.0.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0f4697d190a142477b16aef7da8a99bfdc41e7e8b1687583c0d23a79c7afc1e" +checksum = "9a4b7522f539fe056f1d6fc8577d8ab731451f6f33a89b1e5912e22b76c553e7" dependencies = [ "cc", "codespan-reporting", - "indexmap", "proc-macro2", "quote", "scratch", @@ -422,13 +795,12 @@ dependencies = [ [[package]] name = "cxxbridge-cmd" -version = "1.0.194" +version = "1.0.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0956799fa8678d4c50eed028f2de1c0552ae183c76e976cf7ca8c4e36a7c328" +checksum = "0f01e92ab4ce9fd4d16e3bb11b158d98cbdcca803c1417aa43130a6526fbf208" dependencies = [ "clap", "codespan-reporting", - "indexmap", "proc-macro2", "quote", "syn", @@ -436,27 +808,150 @@ dependencies = [ [[package]] name = "cxxbridge-flags" -version = "1.0.194" +version = "1.0.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23384a836ab4f0ad98ace7e3955ad2de39de42378ab487dc28d3990392cb283a" +checksum = "8c41cbfab344869e70998b388923f7d1266588f56c8ca284abf259b1c1ffc695" [[package]] name = "cxxbridge-macro" -version = "1.0.194" +version = "1.0.138" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6acc6b5822b9526adfb4fc377b67128fdd60aac757cc4a741a6278603f763cf" +checksum = "88d82a2f759f0ad3eae43b96604efd42b1d4729a35a6f2dc7bdb797ae25d9284" dependencies = [ - "indexmap", "proc-macro2", "quote", + "rustversion", "syn", ] [[package]] -name = "deunicode" -version = "1.6.2" +name = "darling" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" +dependencies = [ + "darling_core 0.20.11", + "darling_macro 0.20.11", +] + +[[package]] +name = "darling" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25ae13da2f202d56bd7f91c25fba009e7717a1e4a1cc98a76d844b65ae912e9d" +dependencies = [ + "darling_core 0.23.0", + "darling_macro 0.23.0", +] + +[[package]] +name = "darling_core" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0d00b9596d185e565c2207a0b01f8bd1a135483d02d9b7b0a54b11da8d53412e" +dependencies = [ + "fnv", + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_core" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9865a50f7c335f53564bb694ef660825eb8610e0a53d3e11bf1b0d3df31e03b0" +dependencies = [ + "ident_case", + "proc-macro2", + "quote", + "strsim", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.20.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" +dependencies = [ + "darling_core 0.20.11", + "quote", + "syn", +] + +[[package]] +name = "darling_macro" +version = "0.23.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3984ec7bd6cfa798e62b4a642426a5be0e68f9401cfc2a01e3fa9ea2fcdb8d" +dependencies = [ + "darling_core 0.23.0", + "quote", + "syn", +] + +[[package]] +name = "dary_heap" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b1e3a325bc115f096c8b77bbf027a7c2592230e70be2d985be950d3d5e60ebe" +dependencies = [ + "serde", +] + +[[package]] +name = "dashmap" +version = "6.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" +dependencies = [ + "cfg-if", + "crossbeam-utils", + "hashbrown 0.14.5", + "lock_api", + "once_cell", + "parking_lot_core", +] + +[[package]] +name = "derive_builder" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +dependencies = [ + "derive_builder_macro", +] + +[[package]] +name = "derive_builder_core" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" +dependencies = [ + "darling 0.20.11", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "derive_builder_macro" +version = "0.20.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" +dependencies = [ + "derive_builder_core", + "syn", +] + +[[package]] +name = "deunicode" +version = "1.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abd57806937c9cc163efc8ea3910e00a62e2aeb0b8119f1793a978088f8f6b04" [[package]] name = "diff" @@ -499,7 +994,18 @@ dependencies = [ "libc", "option-ext", "redox_users", - "windows-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", ] [[package]] @@ -508,6 +1014,12 @@ version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" +[[package]] +name = "dyn-clone" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" + [[package]] name = "either" version = "1.15.0" @@ -554,7 +1066,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "esaxx-rs" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" +dependencies = [ + "cc", ] [[package]] @@ -569,6 +1090,32 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" +[[package]] +name = "flatbuffers" +version = "25.12.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "35f6839d7b3b98adde531effaf34f0c2badc6f4735d26fe74709d8e513a96ef3" +dependencies = [ + "bitflags", + "rustc_version", +] + +[[package]] +name = "flate2" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + +[[package]] +name = "fnv" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" + [[package]] name = "foldhash" version = "0.1.5" @@ -581,6 +1128,25 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" +[[package]] +name = "form_urlencoded" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf" +dependencies = [ + "percent-encoding", +] + +[[package]] +name = "fs2" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "futures" version = "0.3.32" @@ -690,6 +1256,18 @@ dependencies = [ "wasi", ] +[[package]] +name = "getrandom" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" +dependencies = [ + "cfg-if", + "libc", + "r-efi 5.3.0", + "wasip2", +] + [[package]] name = "getrandom" version = "0.4.2" @@ -698,7 +1276,8 @@ checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" dependencies = [ "cfg-if", "libc", - "r-efi", + "r-efi 6.0.0", + "rand_core 0.10.1", "wasip2", "wasip3", ] @@ -733,6 +1312,138 @@ dependencies = [ "walkdir", ] +[[package]] +name = "grafeo" +version = "0.5.42" +dependencies = [ + "grafeo-adapters", + "grafeo-common", + "grafeo-core", + "grafeo-engine", +] + +[[package]] +name = "grafeo-adapters" +version = "0.5.42" +dependencies = [ + "bincode", + "grafeo-common", + "grafeo-core", + "hashbrown 0.17.1", + "parking_lot", + "rayon", + "serde", + "smallvec", + "thiserror", +] + +[[package]] +name = "grafeo-common" +version = "0.5.42" +dependencies = [ + "arcstr", + "bincode", + "bumpalo", + "byteorder", + "bytes", + "dashmap", + "foldhash 0.2.0", + "hashbrown 0.17.1", + "indexmap", + "parking_lot", + "serde", + "smallvec", + "thiserror", +] + +[[package]] +name = "grafeo-core" +version = "0.5.42" +dependencies = [ + "arcstr", + "bincode", + "byteorder", + "bytes", + "crc32fast", + "crossbeam", + "dashmap", + "foldhash 0.2.0", + "grafeo-common", + "hashbrown 0.17.1", + "indexmap", + "ordered-float", + "parking_lot", + "rand 0.10.1", + "rayon", + "regex", + "serde", + "smallvec", + "thiserror", + "unicode-normalization", +] + +[[package]] +name = "grafeo-engine" +version = "0.5.42" +dependencies = [ + "arcstr", + "arrow-array", + "arrow-ipc", + "arrow-schema", + "bincode", + "bytes", + "crc32fast", + "crossbeam", + "grafeo-adapters", + "grafeo-common", + "grafeo-core", + "grafeo-storage", + "hashbrown 0.17.1", + "indexmap", + "parking_lot", + "rayon", + "regex", + "serde", + "smallvec", + "thiserror", +] + +[[package]] +name = "grafeo-storage" +version = "0.5.42" +dependencies = [ + "bincode", + "byteorder", + "bytes", + "crc32fast", + "crossbeam", + "fs2", + "grafeo-common", + "memmap2", + "parking_lot", + "serde", + "thiserror", + "tokio", +] + +[[package]] +name = "half" +version = "2.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" +dependencies = [ + "cfg-if", + "crunchy", + "num-traits", + "zerocopy", +] + +[[package]] +name = "hashbrown" +version = "0.14.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" + [[package]] name = "hashbrown" version = "0.15.5" @@ -748,6 +1459,19 @@ version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", + "serde", + "serde_core", +] + [[package]] name = "heck" version = "0.5.0" @@ -760,6 +1484,35 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" +[[package]] +name = "hf-hub" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97" +dependencies = [ + "dirs", + "http", + "indicatif 0.17.11", + "libc", + "log", + "rand 0.9.4", + "serde", + "serde_json", + "thiserror", + "ureq", + "windows-sys 0.60.2", +] + +[[package]] +name = "http" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" +dependencies = [ + "bytes", + "itoa", +] + [[package]] name = "humansize" version = "2.1.3" @@ -793,12 +1546,121 @@ dependencies = [ "cc", ] +[[package]] +name = "icu_collections" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" +dependencies = [ + "displaydoc", + "potential_utf", + "utf8_iter", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locale_core" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_normalizer" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" +dependencies = [ + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" + +[[package]] +name = "icu_properties" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" +dependencies = [ + "icu_collections", + "icu_locale_core", + "icu_properties_data", + "icu_provider", + "zerotrie", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" + +[[package]] +name = "icu_provider" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" +dependencies = [ + "displaydoc", + "icu_locale_core", + "writeable", + "yoke", + "zerofrom", + "zerotrie", + "zerovec", +] + [[package]] name = "id-arena" version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" +[[package]] +name = "ident_case" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39" + +[[package]] +name = "idna" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de" +dependencies = [ + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" +dependencies = [ + "icu_normalizer", + "icu_properties", +] + [[package]] name = "ignore" version = "0.4.25" @@ -827,13 +1689,26 @@ dependencies = [ "serde_core", ] +[[package]] +name = "indicatif" +version = "0.17.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" +dependencies = [ + "console 0.15.11", + "number_prefix", + "portable-atomic", + "unicode-width 0.2.2", + "web-time", +] + [[package]] name = "indicatif" version = "0.18.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "25470f23803092da7d239834776d653104d551bc4d7eacaf31e6837854b8e9eb" dependencies = [ - "console", + "console 0.16.3", "portable-atomic", "unicode-width 0.2.2", "unit-prefix", @@ -848,7 +1723,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -936,6 +1811,12 @@ version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" +[[package]] +name = "litemap" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" + [[package]] name = "lock_api" version = "0.4.14" @@ -951,12 +1832,47 @@ version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" +[[package]] +name = "macro_rules_attribute" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65049d7923698040cd0b1ddcced9b0eb14dd22c5f86ae59c3740eab64a676520" +dependencies = [ + "macro_rules_attribute-proc_macro", + "paste", +] + +[[package]] +name = "macro_rules_attribute-proc_macro" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" + +[[package]] +name = "matrixmultiply" +version = "0.3.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" +dependencies = [ + "autocfg", + "rawpointer", +] + [[package]] name = "memchr" version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" +[[package]] +name = "memmap2" +version = "0.9.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" +dependencies = [ + "libc", +] + [[package]] name = "miette" version = "7.6.0" @@ -979,6 +1895,22 @@ dependencies = [ "syn", ] +[[package]] +name = "minimal-lexical" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" + +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "mio" version = "1.2.0" @@ -987,7 +1919,96 @@ checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", "wasi", - "windows-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "model2vec-rs" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23693c16304bc11674c991f47aa33794e641204e67547e0cef31c794c38ea00f" +dependencies = [ + "anyhow", + "clap", + "half", + "hf-hub", + "ndarray", + "safetensors", + "serde_json", + "tokenizers", +] + +[[package]] +name = "monostate" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3341a273f6c9d5bef1908f17b7267bbab0e95c9bf69a0d4dcf8e9e1b2c76ef67" +dependencies = [ + "monostate-impl", + "serde", + "serde_core", +] + +[[package]] +name = "monostate-impl" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e4db6d5580af57bf992f59068d4ea26fd518574ff48d7639b255a36f9de6e7e9" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "ndarray" +version = "0.15.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adb12d4e967ec485a5f71c6311fe28158e9d6f4bc4a447b474184d0f91a8fa32" +dependencies = [ + "matrixmultiply", + "num-complex", + "num-integer", + "num-traits", + "rawpointer", +] + +[[package]] +name = "nom" +version = "7.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a" +dependencies = [ + "memchr", + "minimal-lexical", +] + +[[package]] +name = "num-bigint" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" +dependencies = [ + "num-integer", + "num-traits", +] + +[[package]] +name = "num-complex" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "73f88a1307638156682bada9d7604135552957b7818057dcef22705b4d509495" +dependencies = [ + "num-traits", +] + +[[package]] +name = "num-integer" +version = "0.1.46" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" +dependencies = [ + "num-traits", ] [[package]] @@ -997,8 +2018,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ "autocfg", + "libm", ] +[[package]] +name = "number_prefix" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" + [[package]] name = "once_cell" version = "1.21.4" @@ -1006,10 +2034,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] -name = "once_cell_polyfill" -version = "1.70.2" +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "onig" +version = "6.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc3cbf698f9438986c11a880c90a6d04b9de27575afd28bbf45b154b6c709e2" +dependencies = [ + "bitflags", + "libc", + "once_cell", + "onig_sys", +] + +[[package]] +name = "onig_sys" +version = "69.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" +checksum = "1e68317604e77e53b85896388e1a803c1d21b74c899ec9e5e1112db90735edd7" +dependencies = [ + "cc", + "pkg-config", +] [[package]] name = "option-ext" @@ -1017,6 +2067,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" +[[package]] +name = "ordered-float" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7d950ca161dc355eaf28f82b11345ed76c6e1f6eb1f4f4479e0323b9e2fbd0e" +dependencies = [ + "num-traits", +] + [[package]] name = "owo-colors" version = "4.3.0" @@ -1059,6 +2118,18 @@ dependencies = [ "regex", ] +[[package]] +name = "paste" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" + +[[package]] +name = "pastey" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c5a797f0e07bdf071d15742978fc3128ec6c22891c31a3a931513263904c982a" + [[package]] name = "pathdiff" version = "0.2.3" @@ -1140,7 +2211,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" dependencies = [ "phf_shared", - "rand", + "rand 0.8.5", ] [[package]] @@ -1158,12 +2229,27 @@ version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" +[[package]] +name = "pkg-config" +version = "0.3.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" + [[package]] name = "portable-atomic" version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" +[[package]] +name = "potential_utf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" +dependencies = [ + "zerovec", +] + [[package]] name = "ppv-lite86" version = "0.2.21" @@ -1238,6 +2324,12 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "r-efi" +version = "5.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" + [[package]] name = "r-efi" version = "6.0.0" @@ -1251,8 +2343,29 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" dependencies = [ "libc", - "rand_chacha", - "rand_core", + "rand_chacha 0.3.1", + "rand_core 0.6.4", +] + +[[package]] +name = "rand" +version = "0.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" +dependencies = [ + "rand_chacha 0.9.0", + "rand_core 0.9.5", +] + +[[package]] +name = "rand" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2e8e8bcc7961af1fdac401278c6a831614941f6164ee3bf4ce61b7edb162207" +dependencies = [ + "chacha20", + "getrandom 0.4.2", + "rand_core 0.10.1", ] [[package]] @@ -1262,7 +2375,17 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" dependencies = [ "ppv-lite86", - "rand_core", + "rand_core 0.6.4", +] + +[[package]] +name = "rand_chacha" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.5", ] [[package]] @@ -1274,6 +2397,58 @@ dependencies = [ "getrandom 0.2.17", ] +[[package]] +name = "rand_core" +version = "0.9.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" +dependencies = [ + "getrandom 0.3.4", +] + +[[package]] +name = "rand_core" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63b8176103e19a2643978565ca18b50549f6101881c443590420e4dc998a3c69" + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + +[[package]] +name = "rayon" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-cond" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2964d0cf57a3e7a06e8183d14a8b527195c706b7983549cd5462d5aa3747438f" +dependencies = [ + "either", + "itertools", + "rayon", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -1294,6 +2469,26 @@ dependencies = [ "thiserror", ] +[[package]] +name = "ref-cast" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f354300ae66f76f1c85c5f84693f0ce81d747e2c3f21a45fef496d89c960bf7d" +dependencies = [ + "ref-cast-impl", +] + +[[package]] +name = "ref-cast-impl" +version = "1.0.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7186006dcb21920990093f30e3dea63b7d6e977bf1256be20c3563a5db070da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "regex" version = "1.12.3" @@ -1323,6 +2518,64 @@ version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.17", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + +[[package]] +name = "rmcp" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e12ca9067b5ebfbd5b3fcdc4acfceb81aa7d5ab2a879dff7cb75d22434276aad" +dependencies = [ + "async-trait", + "base64 0.22.1", + "chrono", + "futures", + "pastey", + "pin-project-lite", + "rmcp-macros", + "schemars", + "serde", + "serde_json", + "thiserror", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "rmcp-macros" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7caa6743cc0888e433105fe1bc551a7f607940b126a37bc97b478e86064627eb" +dependencies = [ + "darling 0.23.0", + "proc-macro2", + "quote", + "serde_json", + "syn", +] + +[[package]] +name = "rustc_version" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfcb3a22ef46e85b45de6ee7e79d063319ebb6594faafcf1c225ea92ab6e9b92" +dependencies = [ + "semver", +] + [[package]] name = "rustix" version = "1.1.4" @@ -1333,7 +2586,42 @@ dependencies = [ "errno", "libc", "linux-raw-sys", - "windows-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "rustls" +version = "0.23.40" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" +dependencies = [ + "log", + "once_cell", + "ring", + "rustls-pki-types", + "rustls-webpki", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-pki-types" +version = "1.14.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" +dependencies = [ + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", ] [[package]] @@ -1348,6 +2636,16 @@ version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" +[[package]] +name = "safetensors" +version = "0.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc0cdb7198d738a111f6df8fef42cb175412c311d0c4ac9126ff4e550ad1a0e8" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "same-file" version = "1.0.6" @@ -1357,6 +2655,32 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "schemars" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2b42f36aa1cd011945615b92222f6bf73c599a102a300334cd7f8dbeec726cc" +dependencies = [ + "chrono", + "dyn-clone", + "ref-cast", + "schemars_derive", + "serde", + "serde_json", +] + +[[package]] +name = "schemars_derive" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d115b50f4aaeea07e79c1912f645c7513d81715d0420f8bc77a18c6260b307f" +dependencies = [ + "proc-macro2", + "quote", + "serde_derive_internals", + "syn", +] + [[package]] name = "scopeguard" version = "1.2.0" @@ -1409,6 +2733,17 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "serde_json" version = "1.0.149" @@ -1442,7 +2777,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ "cfg-if", - "cpufeatures", + "cpufeatures 0.2.17", "digest", ] @@ -1462,6 +2797,12 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" + [[package]] name = "siphasher" version = "1.0.2" @@ -1497,9 +2838,44 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys", + "windows-sys 0.61.2", +] + +[[package]] +name = "socks" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0c3dbbd9ae980613c6dd8e28a9407b50509d3803b57624d5dfe8315218cd58b" +dependencies = [ + "byteorder", + "libc", + "winapi", +] + +[[package]] +name = "spm_precompiled" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5851699c4033c63636f7ea4cf7b7c1f1bf06d0cc03cfb42e711de5a5c46cf326" +dependencies = [ + "base64 0.13.1", + "nom", + "serde", + "unicode-segmentation", ] +[[package]] +name = "stable_deref_trait" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" + +[[package]] +name = "static_assertions" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" + [[package]] name = "strsim" version = "0.11.1" @@ -1552,6 +2928,17 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "synstructure" +version = "0.13.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tabwriter" version = "1.4.1" @@ -1571,7 +2958,7 @@ dependencies = [ "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -1588,7 +2975,7 @@ dependencies = [ "percent-encoding", "pest", "pest_derive", - "rand", + "rand 0.8.5", "regex", "serde", "serde_json", @@ -1606,29 +2993,98 @@ dependencies = [ ] [[package]] -name = "termtree" -version = "0.5.1" +name = "termtree" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tiny-keccak" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" +dependencies = [ + "crunchy", +] + +[[package]] +name = "tinystr" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f50febec83f5ee1df3015341d8bd429f2d1cc62bcba7ea2076759d315084683" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" +dependencies = [ + "displaydoc", + "zerovec", +] [[package]] -name = "thiserror" -version = "2.0.18" +name = "tinyvec" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ - "thiserror-impl", + "tinyvec_macros", ] [[package]] -name = "thiserror-impl" -version = "2.0.18" +name = "tinyvec_macros" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" + +[[package]] +name = "tokenizers" +version = "0.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a620b996116a59e184c2fa2dfd8251ea34a36d0a514758c6f966386bd2e03476" dependencies = [ - "proc-macro2", - "quote", - "syn", + "ahash", + "aho-corasick", + "compact_str", + "dary_heap", + "derive_builder", + "esaxx-rs", + "getrandom 0.3.4", + "hf-hub", + "indicatif 0.17.11", + "itertools", + "log", + "macro_rules_attribute", + "monostate", + "onig", + "paste", + "rand 0.9.4", + "rayon", + "rayon-cond", + "regex", + "regex-syntax", + "serde", + "serde_json", + "spm_precompiled", + "thiserror", + "unicode-normalization-alignments", + "unicode-segmentation", + "unicode_categories", ] [[package]] @@ -1645,7 +3101,7 @@ dependencies = [ "signal-hook-registry", "socket2", "tokio-macros", - "windows-sys", + "windows-sys 0.61.2", ] [[package]] @@ -1659,6 +3115,50 @@ dependencies = [ "syn", ] +[[package]] +name = "tokio-util" +version = "0.7.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tracing" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" +dependencies = [ + "pin-project-lite", + "tracing-attributes", + "tracing-core", +] + +[[package]] +name = "tracing-attributes" +version = "0.1.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "tracing-core" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" +dependencies = [ + "once_cell", +] + [[package]] name = "typed-arena" version = "2.0.2" @@ -1683,6 +3183,24 @@ version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + +[[package]] +name = "unicode-normalization-alignments" +version = "0.1.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43f613e4fa046e69818dd287fdc4bc78175ff20331479dab6e1b0f98d57062de" +dependencies = [ + "smallvec", +] + [[package]] name = "unicode-segmentation" version = "1.13.2" @@ -1707,6 +3225,12 @@ version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853" +[[package]] +name = "unicode_categories" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" + [[package]] name = "unit-prefix" version = "0.5.2" @@ -1719,6 +3243,55 @@ version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "673aac59facbab8a9007c7f6108d11f63b603f7cabff99fabf650fea5c32b861" +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + +[[package]] +name = "unty" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" + +[[package]] +name = "ureq" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" +dependencies = [ + "base64 0.22.1", + "flate2", + "log", + "once_cell", + "rustls", + "rustls-pki-types", + "serde", + "serde_json", + "socks", + "url", + "webpki-roots 0.26.11", +] + +[[package]] +name = "url" +version = "2.5.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" +dependencies = [ + "form_urlencoded", + "idna", + "percent-encoding", + "serde", +] + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "utf8parse" version = "0.2.2" @@ -1731,6 +3304,12 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "virtue" +version = "0.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" + [[package]] name = "wait-timeout" version = "0.2.1" @@ -1863,15 +3442,55 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "webpki-roots" +version = "0.26.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" +dependencies = [ + "webpki-roots 1.0.7", +] + +[[package]] +name = "webpki-roots" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" +dependencies = [ + "rustls-pki-types", +] + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys", + "windows-sys 0.61.2", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-core" version = "0.62.2" @@ -1931,6 +3550,33 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.5", +] + [[package]] name = "windows-sys" version = "0.61.2" @@ -1940,6 +3586,135 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + +[[package]] +name = "windows-targets" +version = "0.53.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" +dependencies = [ + "windows-link", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + +[[package]] +name = "windows_i686_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + +[[package]] +name = "windows_i686_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.53.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" + [[package]] name = "wit-bindgen" version = "0.51.0" @@ -2028,12 +3803,41 @@ dependencies = [ "wasmparser", ] +[[package]] +name = "writeable" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" + [[package]] name = "yansi" version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cfe53a6657fd280eaa890a3bc59152892ffa3e30101319d168b781ed6529b049" +[[package]] +name = "yoke" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" +dependencies = [ + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + [[package]] name = "zerocopy" version = "0.8.48" @@ -2054,6 +3858,66 @@ dependencies = [ "syn", ] +[[package]] +name = "zerofrom" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + +[[package]] +name = "zerotrie" +version = "0.2.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", +] + +[[package]] +name = "zerovec" +version = "0.11.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zmij" version = "1.0.21" diff --git a/Cargo.toml b/Cargo.toml index d5e6b50e..d6e20a44 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,10 +11,47 @@ description = "A dependency management tool for hardware projects." readme = "README.md" license = "Apache-2.0 OR MIT" edition = "2024" -rust-version = "1.87.0" +# 1.91.1 matches Grafeo's MSRV. +rust-version = "1.91.1" [workspace] -members = ["crates/bender-slang"] +members = [ + "crates/bender-slang", + "crates/bender-kg-models", + "crates/bender-kg-extract", + "crates/bender-kg-store", + "crates/bender-kg-similarity", + "crates/bender-kg-core", + "crates/bender-kg-mcp", +] + +[workspace.dependencies] +serde = { version = "1", features = ["derive"] } +serde_json = "1" +thiserror = "2.0.18" +miette = "7.6.0" +log = "0.4" +sha2 = "0.10" +clap = { version = "4.0", features = ["derive", "env"] } +indexmap = { version = "2", features = ["serde"] } +anyhow = "1" +tokio = { version = "1.27", features = ["macros", "rt", "rt-multi-thread"] } +# Embedded graph + vector + text store. The `embedded` profile bakes in +# the LPG model + GQL parser + HNSW vectors + BM25 text index + algos + +# parallel execution + single-file `.grafeo` storage; we add `cypher` for +# our query strings and `wal` for crash-safe writes. We use a path dep +# to the vendored Grafeo checkout next to the workspace. +grafeo = { path = "../grafeo/crates/grafeo", default-features = false, features = ["embedded", "cypher", "wal"] } +grafeo-common = { path = "../grafeo/crates/grafeo-common" } +rmcp = { version = "1", features = ["server", "macros", "transport-io"] } +schemars = "1" +bender-slang = { path = "crates/bender-slang", version = "0.1.0" } +bender-kg-models = { path = "crates/bender-kg-models", version = "0.1.0" } +bender-kg-extract = { path = "crates/bender-kg-extract", version = "0.1.0" } +bender-kg-store = { path = "crates/bender-kg-store", version = "0.1.0" } +bender-kg-similarity = { path = "crates/bender-kg-similarity", version = "0.1.0" } +bender-kg-core = { path = "crates/bender-kg-core", version = "0.1.0" } +bender-kg-mcp = { path = "crates/bender-kg-mcp", version = "0.1.0" } [profile.dist] inherits = "release" @@ -22,6 +59,11 @@ lto = "thin" [dependencies] bender-slang = { version = "0.1.0", path = "crates/bender-slang", optional = true } +bender-kg-core = { version = "0.1.0", path = "crates/bender-kg-core", optional = true } +bender-kg-extract = { version = "0.1.0", path = "crates/bender-kg-extract", optional = true } +bender-kg-similarity = { version = "0.1.0", path = "crates/bender-kg-similarity", optional = true } +bender-kg-mcp = { version = "0.1.0", path = "crates/bender-kg-mcp", optional = true } +bender-kg-models = { version = "0.1.0", path = "crates/bender-kg-models", optional = true } serde = { version = "1", features = ["derive"] } serde_yaml_ng = "0.10" @@ -63,4 +105,11 @@ pretty_assertions = "1.4" [features] default = ["slang"] -slang = ["dep:bender-slang"] +slang = [ + "dep:bender-slang", + "dep:bender-kg-core", + "dep:bender-kg-extract", + "dep:bender-kg-similarity", + "dep:bender-kg-mcp", + "dep:bender-kg-models", +] diff --git a/README.md b/README.md index 3947c274..c7148243 100644 --- a/README.md +++ b/README.md @@ -587,6 +587,56 @@ bender pickle --top my_top --prefix p_ --suffix _s --exclude-rename my_top ``` +### `kg` --- Build and query the design knowledge graph + +The `bender kg` command builds and queries a knowledge graph of the design's module hierarchy. The graph stores module declarations, port lists, parameters, package imports, and instantiation edges extracted from SystemVerilog sources using Slang. + +This command is only available when Bender is built with Slang support (for example via `cargo install bender --all-features`). + +#### Subcommands + +- **`bender kg build`** — extract the design and populate the knowledge graph. Re-run after source changes to keep the graph in sync. +- **`bender kg query `** — query the graph. See query operations below. +- **`bender kg mcp-server`** — start a stdio MCP server exposing all query operations as MCP tools for use with AI assistants (Claude, Cursor, etc.). + +#### Query operations + +- `search-modules ` — semantic and keyword search for modules matching a natural-language description. +- `get-module ` — full module record: ports, parameters, imports, instantiation count. +- `get-subgraph ` — instantiation sub-graph rooted at a module. +- `get-instance-context ` — parameter and port binding details for a specific instantiation. +- `get-parents ` — modules that instantiate the given module. +- `get-children ` — modules instantiated by the given module. +- `get-ports ` — port list with resolved widths and types. +- `find-by-protocol ` — find modules by port protocol pattern. +- `get-source-snippet ` — source file excerpt for a module. +- `trace-hierarchy-path ` — shortest instantiation path between two modules. +- `check-connectivity ` — validate port connectivity between two modules. +- `trace-parameter [--recursive [--depth N]]` — trace parameter propagation down the hierarchy. +- `trace-signal [--recursive [--depth N]]` — trace signal connectivity down the hierarchy. +- `match-interfaces ` — find modules with matching port lists. +- `find-structurally-similar ` — structurally similar module candidates. + +Examples: + +```sh +# Build the knowledge graph for the current design. +bender kg build + +# Search for AXI crossbar modules. +bender kg query search-modules "AXI crossbar" + +# Trace a clock signal recursively up to 3 hops. +bender kg query trace-signal smu clk_smu_i --recursive --depth 3 + +# Trace a configuration struct parameter. +bender kg query trace-parameter smu Cfg --recursive + +# Start the MCP server for AI assistant integration. +bender kg mcp-server +``` + + ### `update` --- Re-resolve dependencies Whenever you update the list of dependencies, you likely have to run `bender update` to re-resolve the dependency versions, and recreate the `Bender.lock` file. diff --git a/crates/bender-kg-core/Cargo.toml b/crates/bender-kg-core/Cargo.toml new file mode 100644 index 00000000..4c12b51b --- /dev/null +++ b/crates/bender-kg-core/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "bender-kg-core" +version = "0.1.0" +edition = "2024" +description = "Internal bender crate: kg orchestrator (extract + grafeo store/vectors)" +license = "Apache-2.0" +authors = ["Alessandro Ottaviano "] + +[dependencies] +bender-kg-models = { workspace = true } +bender-kg-extract = { workspace = true } +bender-kg-store = { workspace = true } +bender-kg-similarity = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +log = { workspace = true } +indexmap = { workspace = true } + +[dev-dependencies] +tempfile = "3.5" +tokio = { workspace = true, features = ["macros", "rt"] } + +[package.metadata.dist] +dist = false diff --git a/crates/bender-kg-core/README.md b/crates/bender-kg-core/README.md new file mode 100644 index 00000000..0f2a97ac --- /dev/null +++ b/crates/bender-kg-core/README.md @@ -0,0 +1,23 @@ +# bender-kg-core + +> **Internal crate:** `bender-kg-core` is an internal crate of [Bender](https://github.com/pulp-platform/bender). It does not provide a stable public API — breaking changes may occur at any time without notice. + +`bender-kg-core` is the orchestration layer of the `bender kg` subsystem. It composes extraction (`bender-kg-extract`), storage (`bender-kg-store`), and embedding (`bender-kg-similarity`) into a single typed API consumed by both the CLI (`bender kg query`) and the MCP server (`bender-kg-mcp`). + +## Responsibilities + +- **Build / update** — drive extraction and incremental ingestion into the graph and vector stores. +- **Query** — expose a typed `Engine` with methods for every supported query operation: + - `search_modules` / `search_modules_batch` — semantic and keyword search. + - `get_module` / `get_subgraph` / `get_instance_context` — module inspection. + - `get_parents` / `get_children` — hierarchy navigation. + - `get_ports` / `find_by_protocol` / `match_interfaces` — port and protocol analysis. + - `get_source_snippet` — source location lookup. + - `trace_hierarchy_path` — shortest instantiation path between two modules. + - `trace_parameter` / `trace_parameter_recursive` — parameter dataflow tracing. + - `trace_signal` / `trace_signal_recursive` — signal connectivity tracing. + - `check_connectivity` — reachability and port binding validation. + - `find_structurally_similar` — structural similarity search. + - `graph_stats` — database statistics. + +Configuration (database path, embedding model, etc.) is provided through `CoreConfig`. diff --git a/crates/bender-kg-core/src/build.rs b/crates/bender-kg-core/src/build.rs new file mode 100644 index 00000000..6a473fab --- /dev/null +++ b/crates/bender-kg-core/src/build.rs @@ -0,0 +1,268 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Pipeline that turns IR into a populated graph + embedding store. +//! +//! The build always runs in this order: +//! 1. `clear_design` (idempotent reset of any prior state for the alias) +//! 2. `register_design` +//! 3. `Store::upsert_modules` — one Grafeo transaction; child stubs +//! created by a parent's instantiation can be upgraded to full nodes +//! by their own MERGE within the same transaction. +//! 4. `Store::upsert_embedding` per module — embeddings are stored as +//! `Module.embedding` properties and auto-indexed by Grafeo's HNSW. + +use std::path::Path; +use std::time::Instant; + +use bender_kg_extract::{ExtractInputs, VecSink}; +use bender_kg_models::{IrRecord, Manifest, ModuleData}; + +use crate::module_document; +use crate::{BuildOutcome, Engine, Result}; + +impl Engine { + /// End-to-end build: extract -> persist IR + manifest -> upsert into + /// store -> embed + index. Idempotent. + /// + /// When `inputs.elab && cfg.pipeline_elab`, slang's `walk_elaborated` + /// runs on a worker thread in parallel with the base + /// `Store::upsert_modules` call; resolved param values + port widths + /// are patched onto the existing `INSTANTIATES` edges via a targeted + /// UNWIND SET after both finish. Otherwise the simpler sequential + /// path is used. + pub async fn build(&mut self, inputs: &ExtractInputs) -> Result { + let t_total = Instant::now(); + if inputs.elab && self.cfg.pipeline_elab { + self.build_pipelined(inputs, t_total).await + } else { + self.build_sequential(inputs, t_total).await + } + } + + async fn build_sequential( + &mut self, + inputs: &ExtractInputs, + t_total: Instant, + ) -> Result { + let mut sink = VecSink::default(); + let (manifest, mut phases) = bender_kg_extract::extract(inputs, &mut sink)?; + self.persist_extract_artifacts(&sink.records, &manifest)?; + self.reset_design(&manifest)?; + + let modules: Vec<&ModuleData> = sink + .records + .iter() + .filter_map(|r| match r { + IrRecord::Module(m) => Some(m), + _ => None, + }) + .collect(); + let t_upsert = Instant::now(); + let modules_indexed = self.store.upsert_modules(modules.iter().copied())?; + phases.store_upsert_s = t_upsert.elapsed().as_secs_f64(); + log::info!( + "kg.phase store_upsert {:.3}s ({} modules)", + phases.store_upsert_s, + modules_indexed + ); + + let t_embed = Instant::now(); + let embeddings_indexed = self.embed_and_index(&modules)?; + phases.embed_s = t_embed.elapsed().as_secs_f64(); + if embeddings_indexed > 0 { + log::info!( + "kg.phase embed {:.3}s ({} vectors)", + phases.embed_s, + embeddings_indexed + ); + } + + phases.total_s = t_total.elapsed().as_secs_f64(); + log::info!("kg.phase total {:.3}s", phases.total_s); + Ok(BuildOutcome { + manifest, + modules_indexed, + embeddings_indexed, + phases, + }) + } + + /// Pipelined build path. Runs slang elaboration on a worker thread + /// while the main thread drives the base graph upsert; merges + /// resolved values via UNWIND SET once both finish. Time savings + /// scale with the smaller of the two phase times. For large designs, + /// `--elab` can add tens of seconds of slang work that would otherwise + /// block upsert; with pipelining the wall-clock is bounded by + /// `max(elab, upsert)`. + async fn build_pipelined( + &mut self, + inputs: &ExtractInputs, + t_total: Instant, + ) -> Result { + let mut sink = VecSink::default(); + let (manifest, modules, mut phases, handle_opt) = + bender_kg_extract::extract_pipelined(inputs, &mut sink)?; + self.persist_extract_artifacts(&sink.records, &manifest)?; + self.reset_design(&manifest)?; + + let handle = handle_opt + .expect("extract_pipelined must yield an ElabHandle when inputs.elab is true"); + + let store = &self.store; + let module_refs: Vec<&ModuleData> = modules.iter().collect(); + let t_par = Instant::now(); + let (upsert_res, elab_res) = std::thread::scope(|s| { + let upsert_handle = s.spawn(|| store.upsert_modules(module_refs.iter().copied())); + let elab_handle = s.spawn(move || handle.run()); + ( + upsert_handle.join().expect("kg upsert worker panicked"), + elab_handle.join().expect("kg elab worker panicked"), + ) + }); + let modules_indexed = upsert_res?; + let (resolved_updates, elab_warnings) = elab_res?; + let par_elapsed = t_par.elapsed().as_secs_f64(); + // Both phases ran concurrently; report the wall-clock of the + // longer of the two as the "elab" + "upsert" cost. Without + // per-thread timers we attribute the parallel time to upsert + // (the I/O-bound side) and leave `elaborate_s` at zero so + // bench summaries make the parallelism visible. + phases.store_upsert_s = par_elapsed; + log::info!( + "kg.phase parallel(upsert,elab) {:.3}s ({} modules, {} resolved updates, {} warnings)", + par_elapsed, + modules_indexed, + resolved_updates.len(), + elab_warnings.len() + ); + + if !resolved_updates.is_empty() { + let t_apply = Instant::now(); + let n = self.store.update_resolved_edges(&resolved_updates)?; + log::info!( + "kg.phase apply_resolved {:.3}s ({} edge updates)", + t_apply.elapsed().as_secs_f64(), + n, + ); + } + + let t_embed = Instant::now(); + let embeddings_indexed = self.embed_and_index(&module_refs)?; + phases.embed_s = t_embed.elapsed().as_secs_f64(); + if embeddings_indexed > 0 { + log::info!( + "kg.phase embed {:.3}s ({} vectors)", + phases.embed_s, + embeddings_indexed + ); + } + + phases.total_s = t_total.elapsed().as_secs_f64(); + log::info!("kg.phase total {:.3}s [pipelined]", phases.total_s); + Ok(BuildOutcome { + manifest, + modules_indexed, + embeddings_indexed, + phases, + }) + } + + /// Load IR JSONL into the store + embedding index without + /// re-extracting. + pub async fn index_from_jsonl(&mut self, jsonl_path: impl AsRef) -> Result { + let f = std::fs::File::open(&jsonl_path)?; + let r = std::io::BufReader::new(f); + let mut design_alias: Option = None; + let mut modules: Vec = Vec::new(); + for rec in bender_kg_models::read_ir_jsonl(r) { + match rec? { + IrRecord::Manifest(m) => { + self.reset_design(&m)?; + design_alias = Some(m.identity.alias.clone()); + } + IrRecord::Module(mut m) => { + if m.design.is_empty() { + if let Some(a) = &design_alias { + m.design = a.clone(); + } + } + modules.push(m); + } + } + } + let count = self.store.upsert_modules(modules.iter())?; + let refs: Vec<&ModuleData> = modules.iter().collect(); + self.embed_and_index(&refs)?; + Ok(count) + } + + pub async fn clear_design(&mut self, alias: &str) -> Result<()> { + // Store::clear_design wipes both module nodes and their embeddings + // since the embedding lives as a node property. + self.store.clear_design(alias)?; + Ok(()) + } + + pub async fn clear_all(&mut self) -> Result<()> { + self.store.clear_all()?; + for p in [self.cfg.manifest_path(), self.cfg.ir_path()] { + if p.exists() { + let _ = std::fs::remove_file(p); + } + } + Ok(()) + } + + // ----- helpers ---------------------------------------------------------- + + fn persist_extract_artifacts(&self, records: &[IrRecord], manifest: &Manifest) -> Result<()> { + write_ir(records, &self.cfg.ir_path())?; + std::fs::write( + self.cfg.manifest_path(), + serde_json::to_string_pretty(manifest)?, + )?; + Ok(()) + } + + fn reset_design(&mut self, manifest: &Manifest) -> Result<()> { + let alias = manifest.identity.alias.clone(); + self.store.clear_design(&alias).ok(); + self.store.register_design( + &alias, + &manifest.identity.id, + Some(manifest.identity.workspace.as_str()), + manifest.identity.top.as_deref(), + &manifest.identity.targets, + &manifest.identity.defines, + )?; + Ok(()) + } + + fn embed_and_index(&self, modules: &[&ModuleData]) -> Result { + if self.cfg.skip_embeddings || modules.is_empty() { + return Ok(0); + } + let model = self.embedder.model().to_string(); + let mut n = 0; + for m in modules { + let v = self.embedder.embed_one(&module_document(m))?; + self.store + .upsert_embedding(&m.design, &m.name, &v, &model)?; + n += 1; + } + Ok(n) + } +} + +fn write_ir(records: &[IrRecord], path: &Path) -> Result<()> { + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent)?; + } + let mut writer = std::io::BufWriter::new(std::fs::File::create(path)?); + for rec in records { + bender_kg_models::write_ir_record(&mut writer, rec)?; + } + std::io::Write::flush(&mut writer)?; + Ok(()) +} diff --git a/crates/bender-kg-core/src/lib.rs b/crates/bender-kg-core/src/lib.rs new file mode 100644 index 00000000..69e22434 --- /dev/null +++ b/crates/bender-kg-core/src/lib.rs @@ -0,0 +1,182 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Knowledge-graph orchestrator. Composes extraction with the Grafeo +//! store (graph + HNSW vectors + BM25 text index) into a typed API used +//! by both the `bender kg` CLI and the MCP adapter. +//! +//! The `Engine` surface area is split across focused modules: +//! * [`build`] — extraction + ingest into the Grafeo store. +//! * [`query`] — sync graph reads (modules, hierarchy, structural +//! analysis). +//! * [`search`] — vector-aware semantic search. +//! * [`snippet`] — file-system reads of source-line ranges. +//! +//! Public methods stay `async fn` for back-compat with the MCP adapter +//! and CLI runtime even though Grafeo's surface is synchronous; the +//! bodies are now non-awaiting. + +mod build; +mod query; +mod search; +mod snippet; + +use std::path::PathBuf; + +use bender_kg_extract::SourceGroupInput; +use bender_kg_models::{BuildPhases, Manifest, ModuleData}; +use bender_kg_similarity::{Embedder, build as build_embedder}; +use bender_kg_store::{Store, StoreConfig}; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +pub use bender_kg_store::{DesignStat, GraphStats, InstanceEdge, Subgraph, VectorHit}; + +#[derive(Debug, Error)] +pub enum CoreError { + #[error("extract error: {0}")] + Extract(#[from] bender_kg_extract::ExtractError), + #[error("store error: {0}")] + Store(#[from] bender_kg_store::StoreError), + #[error("embed error: {0}")] + Embed(#[from] bender_kg_similarity::EmbedError), + #[error("models error: {0}")] + Models(#[from] bender_kg_models::ModelsError), + #[error("io error: {0}")] + Io(#[from] std::io::Error), + #[error("serde error: {0}")] + Serde(#[from] serde_json::Error), + #[error("not found: {0}")] + NotFound(String), +} + +pub type Result = std::result::Result; + +#[derive(Debug, Clone)] +pub struct CoreConfig { + /// Directory holding all kg artifacts, typically `/.bender-kg/`. + pub root: PathBuf, + pub embed: bender_kg_similarity::EmbedConfig, + /// Skip the embedding/index step (faster builds, disables search). + pub skip_embeddings: bool, + /// Maximum rows per UNWIND batch in the store's `upsert_modules`. Larger + /// = fewer Cypher round-trips, more memory per call. Defaults to + /// [`bender_kg_store::DEFAULT_UPSERT_CHUNK_SIZE`]. + pub upsert_chunk_size: usize, + /// Overlap slang's `walk_elaborated` with the base graph upsert when + /// `inputs.elab` is on. Default `true`. Set to `false` to fall back + /// to the simpler sequential path (mostly useful for debugging). + pub pipeline_elab: bool, +} + +impl CoreConfig { + pub fn new(root: impl Into) -> Self { + Self { + root: root.into(), + embed: bender_kg_similarity::EmbedConfig::default(), + skip_embeddings: false, + upsert_chunk_size: bender_kg_store::DEFAULT_UPSERT_CHUNK_SIZE, + pipeline_elab: true, + } + } + pub fn ir_path(&self) -> PathBuf { + self.root.join("ir.jsonl") + } + pub fn manifest_path(&self) -> PathBuf { + self.root.join("manifest.json") + } + pub fn store_config(&self, dim: usize) -> StoreConfig { + StoreConfig::new(&self.root) + .with_embedding_dim(dim) + .with_upsert_chunk_size(self.upsert_chunk_size) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct BuildOutcome { + pub manifest: Manifest, + pub modules_indexed: usize, + pub embeddings_indexed: usize, + /// Wall-clock breakdown of the build's major phases. Defaults to all + /// zeros for callers (e.g. `index_from_jsonl`) that don't measure. + /// Consumed by the `bender kg build` JSON summary; can be ignored. + #[serde(default)] + pub phases: BuildPhases, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ModuleSearchResult { + pub name: String, + pub score: f32, + pub file_path: String, + pub design: String, + pub description: String, + pub num_ports: usize, + pub num_params: usize, + pub num_instantiations: usize, +} + +pub struct Engine { + pub(crate) cfg: CoreConfig, + pub(crate) store: Store, + pub(crate) embedder: Box, +} + +impl Engine { + /// Open or create the engine artifacts under `cfg.root`. The signature + /// stays async for back-compat with the MCP adapter and CLI; the body + /// is synchronous because Grafeo is sync end-to-end. + pub async fn open(cfg: CoreConfig) -> Result { + std::fs::create_dir_all(&cfg.root)?; + let embedder = build_embedder(&cfg.embed)?; + let store = Store::open(&cfg.store_config(embedder.dim()))?; + Ok(Self { + cfg, + store, + embedder, + }) + } + + pub fn config(&self) -> &CoreConfig { + &self.cfg + } + + pub fn store(&self) -> &Store { + &self.store + } +} + +/// Compose the document string used as the embedding input for a module. +pub(crate) fn module_document(m: &ModuleData) -> String { + let mut parts = vec![format!("module {}", m.name)]; + if !m.file_path.is_empty() { + parts.push(format!("path {}", m.file_path)); + } + if !m.parameters.is_empty() { + let plist: Vec<&str> = m.parameters.iter().map(|p| p.name.as_str()).collect(); + parts.push(format!("params {}", plist.join(" "))); + } + if !m.ports.is_empty() { + let plist: Vec<&str> = m.ports.iter().map(|p| p.name.as_str()).collect(); + parts.push(format!("ports {}", plist.join(" "))); + } + if let Some(desc) = &m.description { + if !desc.is_empty() { + parts.push(desc.clone()); + } + } + parts.join(" ") +} + +/// Convenience: build a [`SourceGroupInput`] from flat lists. +pub fn one_group( + files: Vec, + includes: Vec, + defines: Vec, +) -> SourceGroupInput { + SourceGroupInput { + files, + include_dirs: includes, + defines, + } +} diff --git a/crates/bender-kg-core/src/query.rs b/crates/bender-kg-core/src/query.rs new file mode 100644 index 00000000..85fce691 --- /dev/null +++ b/crates/bender-kg-core/src/query.rs @@ -0,0 +1,138 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Synchronous graph reads. These all delegate to the Grafeo-backed +//! [`Store`], wrapping its result type into [`crate::CoreError`] for +//! callers that compose graph + vector + IR errors uniformly. + +use std::collections::HashSet; + +use bender_kg_models::ModuleData; + +use crate::{Engine, GraphStats, InstanceEdge, Result, Subgraph}; + +impl Engine { + pub fn get_module(&self, name: &str) -> Result> { + Ok(self.store.get_module(name)?) + } + pub fn get_subgraph(&self, name: &str, depth: i32) -> Result { + Ok(self.store.get_subgraph(name, depth)?) + } + pub fn get_parents(&self, name: &str) -> Result> { + Ok(self.store.get_parents(name)?) + } + pub fn get_children(&self, name: &str) -> Result> { + Ok(self.store.get_children(name)?) + } + pub fn get_instance_context(&self, parent: &str, child: &str) -> Result> { + Ok(self.store.get_instance_context(parent, child)?) + } + pub fn trace_hierarchy_path(&self, from: &str, to: &str) -> Result> { + Ok(self.store.trace_hierarchy_path(from, to)?) + } + pub fn check_connectivity(&self, module: &str, depth: i32) -> Result> { + Ok(self.store.check_connectivity(module, depth)?) + } + pub fn trace_parameter(&self, module: &str, param: &str) -> Result> { + Ok(self.store.trace_parameter(module, param)?) + } + pub fn trace_signal(&self, module: &str, signal: &str) -> Result> { + Ok(self.store.trace_signal(module, signal)?) + } + + /// Recursively follow a signal through the instantiation hierarchy. + /// Returns a nested structure where each entry gains a `"children"` array + /// containing connections at the next level (signal name as it appears in + /// the child module). + pub fn trace_signal_recursive( + &self, + module: &str, + signal: &str, + max_depth: i32, + ) -> Result> { + let fetch = |m: &str, k: &str| Ok(self.store.trace_signal(m, k)?); + self.trace_rec(module, signal, max_depth, 0, &mut HashSet::new(), &fetch, "child_port") + } + + /// Recursively follow a parameter through the instantiation hierarchy. + /// Returns a nested structure where each entry gains a `"children"` array + /// containing further propagations from the child module's perspective. + pub fn trace_parameter_recursive( + &self, + module: &str, + param: &str, + max_depth: i32, + ) -> Result> { + let fetch = |m: &str, k: &str| Ok(self.store.trace_parameter(m, k)?); + self.trace_rec(module, param, max_depth, 0, &mut HashSet::new(), &fetch, "child_parameter") + } + + /// Generic DFS with cycle detection. `fetch` retrieves the flat one-hop + /// connections for a (module, key) pair; `child_field` names the JSON + /// field carrying the key as it appears in the child module. + fn trace_rec( + &self, + module: &str, + key: &str, + max_depth: i32, + depth: i32, + on_path: &mut HashSet<(String, String)>, + fetch: &dyn Fn(&str, &str) -> Result>, + child_field: &str, + ) -> Result> { + if depth >= max_depth { + return Ok(vec![]); + } + let path_key = (module.to_string(), key.to_string()); + if on_path.contains(&path_key) { + return Ok(vec![]); // cycle guard + } + on_path.insert(path_key.clone()); + let flat = fetch(module, key)?; + let mut result = Vec::new(); + for conn in flat { + let child_module = conn["child"].as_str().unwrap_or("").to_string(); + let child_key = conn[child_field].as_str().unwrap_or("").to_string(); + let children = if !child_module.is_empty() && !child_key.is_empty() { + self.trace_rec(&child_module, &child_key, max_depth, depth + 1, on_path, fetch, child_field)? + } else { + vec![] + }; + let mut entry = conn; + entry["children"] = serde_json::json!(children); + result.push(entry); + } + on_path.remove(&path_key); + Ok(result) + } + + pub fn find_by_protocol( + &self, + protocol: &str, + design: Option<&str>, + ) -> Result> { + Ok(self.store.find_by_protocol(protocol, design)?) + } + pub fn match_interfaces( + &self, + a: &str, + b: &str, + prefix_a: &str, + prefix_b: &str, + ) -> Result { + Ok(self.store.match_interfaces(a, b, prefix_a, prefix_b)?) + } + pub fn find_structurally_similar( + &self, + module: &str, + min_overlap: f64, + design: Option<&str>, + ) -> Result> { + Ok(self + .store + .find_structurally_similar(module, min_overlap, design)?) + } + pub fn stats(&self, design: Option<&str>) -> Result { + Ok(self.store.stats(design)?) + } +} diff --git a/crates/bender-kg-core/src/search.rs b/crates/bender-kg-core/src/search.rs new file mode 100644 index 00000000..80010f01 --- /dev/null +++ b/crates/bender-kg-core/src/search.rs @@ -0,0 +1,91 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Semantic search over the Grafeo HNSW vector index. +//! +//! Each hit is hydrated from the graph store so callers always get a +//! self-contained [`crate::ModuleSearchResult`] (name + score + source +//! metadata). Batch search dedupes by module name and keeps the best score. + +use indexmap::IndexMap; + +use crate::{Engine, ModuleSearchResult, Result}; + +impl Engine { + pub async fn search_modules( + &self, + query: &str, + top_k: usize, + design: Option<&str>, + ) -> Result> { + let qv = self.embedder.embed_one(query)?; + let hits = self.store.search_modules_by_vector(&qv, top_k, design)?; + let mut out = Vec::with_capacity(hits.len()); + for h in hits { + if let Some(m) = self.store.get_module(&h.module)? { + out.push(ModuleSearchResult { + name: m.name, + score: h.score, + file_path: m.file_path, + design: m.design, + description: m.description.unwrap_or_default(), + num_ports: m.ports.len(), + num_params: m.parameters.len(), + num_instantiations: m.instantiations.len(), + }); + } + } + Ok(out) + } + + pub async fn search_modules_batch( + &self, + queries: &[String], + top_k: usize, + design: Option<&str>, + ) -> Result> { + let mut by_name: IndexMap = IndexMap::new(); + for q in queries { + for r in self.search_modules(q, top_k, design).await? { + let entry = by_name.entry(r.name.clone()).or_insert_with(|| r.clone()); + if r.score > entry.score { + *entry = r; + } + } + } + let mut out: Vec = by_name.into_values().collect(); + out.sort_by(|a, b| { + b.score + .partial_cmp(&a.score) + .unwrap_or(std::cmp::Ordering::Equal) + }); + Ok(out) + } +} + +#[cfg(test)] +mod tests { + use crate::{CoreConfig, Engine, module_document}; + use bender_kg_models::ModuleData; + + #[tokio::test(flavor = "current_thread")] + async fn search_returns_self_for_seeded_index() { + let tmp = tempfile::tempdir().unwrap(); + let mut cfg = CoreConfig::new(tmp.path()); + cfg.embed.force_hash = true; + let eng = Engine::open(cfg).await.unwrap(); + let mut m = ModuleData::default(); + m.name = "tt_fpu_v2".into(); + m.design = "d".into(); + eng.store + .register_design("d", "ID", None, None, &["rtl".to_string()], &[]) + .unwrap(); + eng.store.upsert_module(&m).unwrap(); + let v = eng.embedder.embed_one(&module_document(&m)).unwrap(); + eng.store + .upsert_embedding(&m.design, &m.name, &v, eng.embedder.model()) + .unwrap(); + let hits = eng.search_modules("tt_fpu_v2", 5, None).await.unwrap(); + assert_eq!(hits[0].name, "tt_fpu_v2"); + } +} diff --git a/crates/bender-kg-core/src/snippet.rs b/crates/bender-kg-core/src/snippet.rs new file mode 100644 index 00000000..b368041c --- /dev/null +++ b/crates/bender-kg-core/src/snippet.rs @@ -0,0 +1,90 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Source-line retrieval for ports / params / module / instance ranges. +//! All inputs come from the graph store; the file system is only touched +//! to read the requested slice. + +use crate::{CoreError, Engine, Result}; + +impl Engine { + pub fn get_source_snippet( + &self, + module_name: &str, + element: &str, + instance_name: &str, + ) -> Result { + let m = self + .get_module(module_name)? + .ok_or_else(|| CoreError::NotFound(module_name.into()))?; + let (start, end): (Option, Option) = match element { + "module" => (m.line_start, m.line_end), + "ports" => match m.port_block_lines { + Some((s, e)) => (Some(s), Some(e)), + None => (None, None), + }, + "params" => match m.param_block_lines { + Some((s, e)) => (Some(s), Some(e)), + None => (None, None), + }, + "instance" => { + if instance_name.is_empty() { + return Ok(serde_json::json!({ + "error": "instance_name required for element='instance'" + })); + } + let mut s = None; + let mut e = None; + for inst in &m.instantiations { + if inst.instance_name == instance_name { + s = inst.line_start; + e = inst.line_end; + break; + } + } + (s, e) + } + _ => { + return Ok(serde_json::json!({ + "error": format!( + "unknown element '{}'. Use: module, ports, params, instance", + element + ), + })); + } + }; + let (Some(start), Some(end)) = (start, end) else { + return Ok(serde_json::json!({ + "error": format!( + "no line range for element '{}' on module '{}'", + element, module_name + ), + })); + }; + if m.file_path.is_empty() || !std::path::Path::new(&m.file_path).exists() { + return Ok(serde_json::json!({ + "error": format!("source file not found: {}", m.file_path), + "file_path": m.file_path, + "line_start": start, + "line_end": end, + })); + } + let text = std::fs::read_to_string(&m.file_path)?; + let lines: Vec<&str> = text.lines().collect(); + let s = start.max(1) as usize; + let e = (end as usize).min(lines.len()); + let snippet = if s <= e { + lines[s - 1..e].join("\n") + } else { + String::new() + }; + Ok(serde_json::json!({ + "file_path": m.file_path, + "line_start": start, + "line_end": end, + "element": element, + "module_name": module_name, + "snippet": snippet, + })) + } +} diff --git a/crates/bender-kg-core/tests/integration.rs b/crates/bender-kg-core/tests/integration.rs new file mode 100644 index 00000000..8e03a323 --- /dev/null +++ b/crates/bender-kg-core/tests/integration.rs @@ -0,0 +1,297 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! End-to-end test driving the full kg pipeline (extract -> store -> search) +//! against the existing bender pickle fixtures. Verifies that the parsed +//! design produces the expected module / instantiation / import topology. + +use std::path::PathBuf; + +use bender_kg_core::{CoreConfig, Engine}; +use bender_kg_extract::{ExtractInputs, SourceGroupInput}; + +fn pickle_root() -> PathBuf { + PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .join("../..") + .join("tests/pickle") + .canonicalize() + .expect("pickle fixture should exist") +} + +fn fixture_file(rel: &str) -> String { + pickle_root().join(rel).to_string_lossy().into_owned() +} + +fn fixture_inputs(workspace: &str) -> ExtractInputs { + ExtractInputs { + workspace: workspace.to_string(), + targets: vec!["rtl".into()], + tops: vec!["top".into()], + // Default fixture turns elab ON so tests can assert against + // `resolved_param_values` / `resolved_port_widths`. Tests that + // exercise the no-elab path flip this off explicitly. + elab: true, + design_alias: Some("pickle".into()), + groups: vec![SourceGroupInput { + files: vec![ + fixture_file("src/common_pkg.sv"), + fixture_file("src/bus_intf.sv"), + fixture_file("src/leaf.sv"), + fixture_file("src/core.sv"), + fixture_file("src/top.sv"), + ], + include_dirs: vec![pickle_root().join("include").to_string_lossy().into_owned()], + defines: vec![], + }], + ..Default::default() + } +} + +#[tokio::test(flavor = "current_thread")] +async fn build_extracts_modules_and_hierarchy_from_pickle_fixture() { + let tmp = tempfile::tempdir().unwrap(); + let mut cfg = CoreConfig::new(tmp.path()); + cfg.skip_embeddings = false; + let mut engine = Engine::open(cfg).await.expect("open engine"); + + let inputs = fixture_inputs(&pickle_root().to_string_lossy()); + let outcome = engine.build(&inputs).await.expect("build should succeed"); + + // Five top-level declarations: top, core, leaf, bus_intf, common_pkg. + assert!( + outcome.modules_indexed >= 5, + "expected at least 5 modules, got {}", + outcome.modules_indexed + ); + assert!(outcome.embeddings_indexed > 0); + + // Module retrieval -------------------------------------------------------- + let top = engine + .get_module("top") + .unwrap() + .expect("top module should be present"); + assert!(!top.is_package); + assert_eq!(top.design, outcome.manifest.identity.alias); + + let core = engine + .get_module("core") + .unwrap() + .expect("core module should be present"); + assert!(!core.is_package); + assert!( + core.parameters.iter().any(|p| p.name == "DefaultState"), + "core should expose DefaultState parameter, got {:?}", + core.parameters + ); + + let common_pkg = engine + .get_module("common_pkg") + .unwrap() + .expect("common_pkg should be present"); + assert!(common_pkg.is_package); + + // Subgraph traversal ------------------------------------------------------ + let sub = engine.get_subgraph("top", 2).expect("subgraph"); + let modules: Vec<&str> = sub.nodes.iter().map(|m| m.name.as_str()).collect(); + assert!(modules.contains(&"top")); + assert!(modules.contains(&"core")); + assert!( + modules.contains(&"leaf"), + "expected leaf reachable at depth=2, got {modules:?}" + ); + + // Hierarchy path ---------------------------------------------------------- + let path = engine + .trace_hierarchy_path("top", "leaf") + .expect("hierarchy path"); + assert!(!path.is_empty(), "expected a top -> leaf path"); + + // Elaborated parameter forwarding ---------------------------------------- + // top instantiates core with .DefaultState(common_pkg::Error). The walk + // captures the textual expression in `param_bindings`; elab folds it to + // a literal in `resolved_param_values`. Both must be present. + let top_to_core = engine + .get_instance_context("top", "core") + .expect("top->core context"); + assert_eq!(top_to_core.len(), 1); + let edge = &top_to_core[0]; + assert_eq!( + edge.param_bindings.get("DefaultState").map(String::as_str), + Some("common_pkg::Error"), + "textual call-site expression should survive elab", + ); + assert!( + edge.resolved_param_values + .get("DefaultState") + .is_some_and(|v| !v.is_empty()), + "elab should fold DefaultState to a literal, got: {:?}", + edge.resolved_param_values + ); + + // Parents ----------------------------------------------------------------- + let parents = engine.get_parents("leaf").expect("parents"); + let parent_names: Vec<&str> = parents.iter().map(|m| m.name.as_str()).collect(); + assert!( + parent_names.contains(&"core"), + "leaf should have core as a parent, got {parent_names:?}" + ); + + // Vector search by exact name should surface the module itself. + let hits = engine + .search_modules("top", 5, None) + .await + .expect("search_modules"); + assert!( + hits.iter().any(|h| h.name == "top"), + "search hits: {hits:?}" + ); + + // clear_design ----------------------------------------------------------- + engine + .clear_design(&outcome.manifest.identity.alias) + .await + .expect("clear_design"); + assert!( + engine.get_module("top").unwrap().is_none(), + "clear_design should remove all modules for the alias" + ); +} + +/// Default path: `--top top` but no `--elab`. Slang elaboration is skipped, +/// so `resolved_port_widths` stay empty, but the graph and every other +/// query path must still work end-to-end across the pruned-but-complete +/// set of modules reachable from `top`. +#[tokio::test(flavor = "current_thread")] +async fn build_without_elab_still_indexes_reachable_graph() { + let tmp = tempfile::tempdir().unwrap(); + let mut cfg = CoreConfig::new(tmp.path()); + cfg.skip_embeddings = true; + let mut engine = Engine::open(cfg).await.expect("open engine"); + + let mut inputs = fixture_inputs(&pickle_root().to_string_lossy()); + inputs.elab = false; + let outcome = engine + .build(&inputs) + .await + .expect("build without --elab should succeed"); + assert!(outcome.modules_indexed >= 5); + + let ctx = engine + .get_instance_context("core", "leaf") + .expect("instance context"); + assert!(!ctx.is_empty(), "core should still instantiate leaf"); + assert!( + ctx.iter().all(|e| e.resolved_port_widths.is_empty()), + "no --elab means no resolved port widths, got: {:?}", + ctx + ); + assert!( + ctx.iter().all(|e| e.parent_file_path.ends_with("core.sv")), + "every instance edge should carry the parent's source file, got: {:?}", + ctx.iter().map(|e| &e.parent_file_path).collect::>() + ); + + let path = engine + .trace_hierarchy_path("top", "leaf") + .expect("hierarchy path"); + assert!(!path.is_empty()); +} + +/// Build the dedicated `struct_port.sv` fixture with `--top bus_top +/// --elab` and confirm: +/// 1. `resolved_param_values` is empty here (no parameters bound) but +/// the `param_bindings` map likewise stays untouched. +/// 2. Scalar ports (`clk_i`) report `total > 0` and an empty `fields` +/// map. +/// 3. Packed-struct ports (`req_i`) report `total = 36` with the field +/// breakdown `{addr: 32, prot: 3, valid: 1}`. +/// 4. Nested packed-struct ports (`resp_o`) flatten via dot notation: +/// `{status: 8, nested_req.addr: 32, nested_req.prot: 3, +/// nested_req.valid: 1}` and `total = 44`. +/// 5. Packed-array-of-structs ports (`req_arr_i`) report +/// `total = 4 * 36 = 144`, `element_count = 4`, an empty top-level +/// `fields` map, and an `element` template carrying the per-element +/// `total = 36` plus the same dot-flattened struct breakdown. +#[tokio::test(flavor = "current_thread")] +async fn elab_top_populates_struct_port_field_breakdown() { + let tmp = tempfile::tempdir().unwrap(); + let mut cfg = CoreConfig::new(tmp.path()); + cfg.skip_embeddings = true; + let mut engine = Engine::open(cfg).await.expect("open engine"); + + let inputs = ExtractInputs { + workspace: pickle_root().to_string_lossy().into(), + targets: vec!["rtl".into()], + tops: vec!["bus_top".into()], + elab: true, + design_alias: Some("struct_port".into()), + groups: vec![SourceGroupInput { + files: vec![fixture_file("src/struct_port.sv")], + include_dirs: vec![], + defines: vec![], + }], + ..Default::default() + }; + engine.build(&inputs).await.expect("struct_port build"); + + let ctx = engine + .get_instance_context("bus_top", "bus_consumer") + .expect("instance context"); + assert_eq!(ctx.len(), 1); + let edge = &ctx[0]; + + let clk = edge.resolved_port_widths.get("clk_i").expect("clk_i"); + assert_eq!(clk.total, 1); + assert!(clk.fields.is_empty()); + + let req = edge.resolved_port_widths.get("req_i").expect("req_i"); + assert_eq!(req.total, 36); + assert_eq!(req.fields.get("addr"), Some(&32)); + assert_eq!(req.fields.get("prot"), Some(&3)); + assert_eq!(req.fields.get("valid"), Some(&1)); + + let resp = edge.resolved_port_widths.get("resp_o").expect("resp_o"); + assert_eq!(resp.total, 44); + assert_eq!(resp.fields.get("status"), Some(&8)); + assert_eq!(resp.fields.get("nested_req.addr"), Some(&32)); + assert_eq!(resp.fields.get("nested_req.prot"), Some(&3)); + assert_eq!(resp.fields.get("nested_req.valid"), Some(&1)); + + let arr = edge + .resolved_port_widths + .get("req_arr_i") + .expect("req_arr_i"); + assert_eq!(arr.total, 144); + assert_eq!(arr.element_count, Some(4)); + assert!( + arr.fields.is_empty(), + "top-level fields stay empty for arrays" + ); + let elem = arr.element.as_deref().expect("element template populated"); + assert_eq!(elem.total, 36); + assert_eq!(elem.fields.get("addr"), Some(&32)); + assert_eq!(elem.fields.get("prot"), Some(&3)); + assert_eq!(elem.fields.get("valid"), Some(&1)); +} + +/// Asking for a `--top` that does not exist must hard-error rather than +/// silently producing an empty graph (both with and without `--elab`). +#[tokio::test(flavor = "current_thread")] +async fn unknown_top_hard_errors() { + let tmp = tempfile::tempdir().unwrap(); + let mut cfg = CoreConfig::new(tmp.path()); + cfg.skip_embeddings = true; + let mut engine = Engine::open(cfg).await.expect("open engine"); + + let mut inputs = fixture_inputs(&pickle_root().to_string_lossy()); + inputs.tops = vec!["definitely_not_a_module".into()]; + let err = engine + .build(&inputs) + .await + .expect_err("build should fail when --top doesn't match"); + let msg = format!("{err}"); + assert!( + msg.contains("--top") && msg.contains("definitely_not_a_module"), + "error should mention the offending top: {msg}" + ); +} diff --git a/crates/bender-kg-extract/Cargo.toml b/crates/bender-kg-extract/Cargo.toml new file mode 100644 index 00000000..7e0f3008 --- /dev/null +++ b/crates/bender-kg-extract/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "bender-kg-extract" +version = "0.1.0" +edition = "2024" +description = "Internal bender crate: SystemVerilog -> kg.v1 extraction pipeline" +license = "Apache-2.0" +authors = ["Alessandro Ottaviano "] + +[dependencies] +bender-kg-models = { workspace = true } +bender-slang = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +indexmap = { workspace = true } +log = { workspace = true } +sha2 = { workspace = true } + +[dev-dependencies] +tempfile = "3.5" + +[package.metadata.dist] +dist = false diff --git a/crates/bender-kg-extract/README.md b/crates/bender-kg-extract/README.md new file mode 100644 index 00000000..c79a629a --- /dev/null +++ b/crates/bender-kg-extract/README.md @@ -0,0 +1,16 @@ +# bender-kg-extract + +> **Internal crate:** `bender-kg-extract` is an internal crate of [Bender](https://github.com/pulp-platform/bender). It does not provide a stable public API — breaking changes may occur at any time without notice. + +`bender-kg-extract` implements the SystemVerilog → knowledge-graph extraction pipeline. It drives `bender-slang` to parse source files and converts the resulting Slang AST into `bender-kg-models` IR types that can be ingested by `bender-kg-store`. + +## Pipeline + +1. **Parse** — invoke Slang on the source files collected by `bender sources`. +2. **Extract** — walk the Slang AST to collect module declarations, port lists, parameter declarations, package imports, and instantiation edges. +3. **Emit** — write IR records to a caller-supplied `IrSink` (in-memory or JSONL file). + +The main entry points are: +- `extract()` — single-threaded extraction to a sink. +- `extract_pipelined()` — multi-threaded extraction with a worker pool. +- `extract_to_jsonl()` — convenience wrapper that writes IR to a JSONL manifest file. diff --git a/crates/bender-kg-extract/src/defines.rs b/crates/bender-kg-extract/src/defines.rs new file mode 100644 index 00000000..56c31354 --- /dev/null +++ b/crates/bender-kg-extract/src/defines.rs @@ -0,0 +1,40 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Synthetic preprocessor defines, mirroring `bender script flist-plus`. +//! +//! `flist-plus` automatically emits `+define+TARGET_` for each active +//! target plus `+define+TARGET_FLIST`. Many manifests rely on this convention +//! (`\`ifdef TARGET_SIMULATION ...`). `bender sources` does not synthesize +//! them, so consumers that bypass `flist-plus` (pickle, kg parse) have to do +//! it themselves to stay in parity. + +/// Emit `TARGET_` defines for each active target plus the bookkeeping +/// `TARGET_FLIST` flag, matching `flist-plus` output. +pub fn target_defines(targets: &[String]) -> Vec { + let mut out = Vec::with_capacity(targets.len() + 1); + for t in targets { + out.push(format!("TARGET_{}", t.to_uppercase())); + } + out.push("TARGET_FLIST".to_string()); + out +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn emits_uppercased_target_defines() { + let out = target_defines(&["sim".into(), "smc_chiplet".into()]); + assert_eq!( + out, + vec!["TARGET_SIM", "TARGET_SMC_CHIPLET", "TARGET_FLIST"] + ); + } + + #[test] + fn empty_targets_still_emit_flist_marker() { + assert_eq!(target_defines(&[]), vec!["TARGET_FLIST"]); + } +} diff --git a/crates/bender-kg-extract/src/elab.rs b/crates/bender-kg-extract/src/elab.rs new file mode 100644 index 00000000..4c35170d --- /dev/null +++ b/crates/bender-kg-extract/src/elab.rs @@ -0,0 +1,161 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Merge the elaborated walk's per-instance contexts (resolved parameters and +//! port widths) into module-level `InstantiationInfo`. +//! +//! Elaboration is *opt-in* via `--elab` and only enriches +//! `resolved_param_values` / `resolved_port_widths`. The graph still +//! contains every parsed module that the prior pruning pass deemed +//! reachable from `--top`; callers who don't pass `--elab` skip this pass +//! entirely and pay zero elaboration cost. The textual `param_bindings` +//! map (call-site expressions captured by the syntactic walk) is never +//! mutated -- it is the source-of-truth for "what was written" and lives +//! alongside `resolved_param_values` ("what slang folded it to"). + +use std::collections::{BTreeMap, HashMap}; + +use bender_kg_models::{ModuleData, ResolvedPortWidth}; +use bender_slang::{KgElabResult, KgKeyValue, KgPortWidth, SlangSession}; + +use crate::{ExtractError, Result}; + +/// Run `walk_elaborated` against `session` and merge resolved parameter +/// bindings + port widths into the matching `InstantiationInfo` records on +/// `modules`. Match key is `(parent_module_name, instance_name, +/// child_module)`. +/// +/// Complexity is `O(M + total_I + C)` for `M` modules, `total_I` +/// instantiations across all modules, and `C` elab contexts: we build +/// a parent-name index and a per-module instantiation index once up +/// front, then look up in O(1) inside the hot loop. Previously this +/// loop was `O(C * (M + I))` which dominated the build wall-clock on +/// designs with many contexts (hundreds of seconds for large designs). +/// +/// Returns an `InvalidInput` error if the caller named one or more `tops` +/// but slang resolved zero instance contexts — typically a mistyped module +/// name. Silent-empty was a documented footgun; surfacing it as an error +/// matches the principle "if you opt into enrichment, opt-in must succeed". +pub(crate) fn enrich( + session: &SlangSession, + tops: &[String], + modules: &mut [ModuleData], +) -> Result> { + if tops.is_empty() { + return Ok(Vec::new()); + } + let elab: KgElabResult = session.walk_elaborated(tops)?; + if elab.contexts.is_empty() { + return Err(ExtractError::InvalidInput(format!( + "--top {:?} did not match any instance in the elaborated design; \ + check the name(s) or omit --elab to build a graph without \ + instance-level enrichment", + tops + ))); + } + + // Indexes: `name -> module_idx` and per-module `(instance_name, + // child_module) -> inst_idx`. Owned `String` keys because the + // resulting borrows from `modules` will alias the mutable slice + // inside the hot loop. + let by_name: HashMap = modules + .iter() + .enumerate() + .map(|(i, m)| (m.name.clone(), i)) + .collect(); + let inst_idx_per_module: Vec> = modules + .iter() + .map(|m| { + m.instantiations + .iter() + .enumerate() + .map(|(i, inst)| ((inst.instance_name.clone(), inst.module_name.clone()), i)) + .collect() + }) + .collect(); + + let mut warnings = elab.warnings.clone(); + let mut merged = 0usize; + let mut parent_miss = 0usize; + let mut inst_miss = 0usize; + + for ctx in &elab.contexts { + // Top instances have no parent in our schema; nothing to merge into. + if ctx.parent_module.is_empty() { + continue; + } + let Some(&pi) = by_name.get(ctx.parent_module.as_str()) else { + parent_miss += 1; + continue; + }; + let Some(&ii) = + inst_idx_per_module[pi].get(&(ctx.instance_name.clone(), ctx.child_module.clone())) + else { + inst_miss += 1; + continue; + }; + merged += 1; + let inst = &mut modules[pi].instantiations[ii]; + for kv in &ctx.param_bindings { + // Skip empties so a partial elab failure on one symbol doesn't + // shadow the textual call-site value with "". + if !kv.value.is_empty() { + inst.resolved_param_values + .insert(kv.key.clone(), kv.value.clone()); + } + } + for pw in &ctx.port_widths { + inst.resolved_port_widths + .insert(pw.name.clone(), to_resolved_width(pw)); + } + } + warnings.push(format!( + "elab: {} contexts, merged={merged} (parent_miss={parent_miss}, inst_miss={inst_miss})", + elab.contexts.len() + )); + + Ok(warnings) +} + +/// Convert the FFI `KgPortWidth` (string-encoded field widths over cxx) into +/// the typed model record. Field-width strings that fail to parse are +/// dropped silently — slang only emits them via `std::to_string`, so the +/// fallback is purely defensive. +/// +/// When `pw.element_count > 0` the port's canonical type was a packed array, +/// so we surface a one-level `element` template (per-element `total` plus +/// flattened struct fields). Scalar arrays produce an `element` with empty +/// `fields`; non-array ports leave both `element` and `element_count` as +/// `None`. +pub(crate) fn to_resolved_width(pw: &KgPortWidth) -> ResolvedPortWidth { + let fields = parse_kv_widths(&pw.fields); + let (element_count, element) = if pw.element_count > 0 { + ( + Some(pw.element_count), + Some(Box::new(ResolvedPortWidth { + total: pw.element_total, + fields: parse_kv_widths(&pw.element_fields), + element_count: None, + element: None, + })), + ) + } else { + (None, None) + }; + ResolvedPortWidth { + total: pw.total, + fields, + element_count, + element, + } +} + +fn parse_kv_widths(kvs: &[KgKeyValue]) -> BTreeMap { + let mut out = BTreeMap::new(); + for kv in kvs { + if let Ok(w) = kv.value.parse::() { + out.insert(kv.key.clone(), w); + } + } + out +} diff --git a/crates/bender-kg-extract/src/emit.rs b/crates/bender-kg-extract/src/emit.rs new file mode 100644 index 00000000..fce7f6d2 --- /dev/null +++ b/crates/bender-kg-extract/src/emit.rs @@ -0,0 +1,79 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! IR sinks (streaming + in-memory) and manifest assembly. + +use std::io::Write; + +use bender_kg_models::{IrRecord, Manifest, ModuleData}; + +use crate::{ExtractError, Result}; + +/// Streaming output target. +pub trait IrSink { + fn emit(&mut self, rec: &IrRecord) -> Result<()>; +} + +impl IrSink for W { + fn emit(&mut self, rec: &IrRecord) -> Result<()> { + bender_kg_models::write_ir_record(self, rec).map_err(ExtractError::Models) + } +} + +/// In-memory sink, useful for tests and the core's typed API. +#[derive(Debug, Default)] +pub struct VecSink { + pub records: Vec, +} + +impl IrSink for VecSink { + fn emit(&mut self, rec: &IrRecord) -> Result<()> { + self.records.push(rec.clone()); + Ok(()) + } +} + +/// Build the manifest from per-module statistics + parse metadata. +pub(crate) fn build_manifest( + identity: bender_kg_models::DesignIdentity, + modules: &[ModuleData], + file_count: usize, + srclist_hash: String, + warnings: Vec, +) -> Manifest { + let module_count = modules.iter().filter(|m| !m.is_package).count(); + let package_count = modules.iter().filter(|m| m.is_package).count(); + let edge_count: usize = modules.iter().map(|m| m.instantiations.len()).sum(); + + let mut manifest = Manifest::new(identity); + manifest.file_count = file_count; + manifest.module_count = module_count; + manifest.package_count = package_count; + manifest.edge_count = edge_count; + manifest.srclist_hash = srclist_hash; + manifest.slang_version = Some(env!("CARGO_PKG_VERSION").to_string()); + manifest.created_at = current_timestamp(); + manifest.extraction_warnings = warnings; + manifest +} + +fn current_timestamp() -> Option { + use std::time::{SystemTime, UNIX_EPOCH}; + SystemTime::now() + .duration_since(UNIX_EPOCH) + .ok() + .map(|d| format!("unix:{}", d.as_secs())) +} + +/// Stream a Manifest record followed by one Module record per module. +pub(crate) fn stream( + sink: &mut S, + manifest: &Manifest, + modules: &[ModuleData], +) -> Result<()> { + sink.emit(&IrRecord::Manifest(manifest.clone()))?; + for m in modules { + sink.emit(&IrRecord::Module(m.clone()))?; + } + Ok(()) +} diff --git a/crates/bender-kg-extract/src/lib.rs b/crates/bender-kg-extract/src/lib.rs new file mode 100644 index 00000000..1d46228f --- /dev/null +++ b/crates/bender-kg-extract/src/lib.rs @@ -0,0 +1,573 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! SystemVerilog -> `kg.v3` extraction pipeline. +//! +//! Mirrors `bender pickle`'s shape: per Bender source group we drive +//! `SlangSession::parse_group`, then walk the parsed trees syntactically +//! (`walk_design`) to capture every declared module, package, and +//! interface. The graph contains the *parsed* view of the design. +//! +//! `ExtractInputs.tops` (`--top` on the CLI) is REQUIRED: the parsed tree +//! set is pruned to those reachable from these tops before the syntactic +//! walk, so the resulting graph captures exactly the modules used by the +//! design. Elaboration (`walk_elaborated`) is a separate opt-in via +//! `ExtractInputs.elab` (`--elab`); when set, slang specializes parameters +//! / resolves port widths from the named hierarchy roots and we merge +//! those resolved values into the matching `InstantiationInfo` records. + +mod defines; +mod elab; +mod emit; +mod parse; +mod walk; + +pub use defines::target_defines; +pub use emit::{IrSink, VecSink}; + +use std::path::PathBuf; + +use bender_kg_models::{Manifest, ModuleData}; +use indexmap::IndexMap; +use serde::{Deserialize, Serialize}; +use thiserror::Error; + +#[derive(Debug, Error)] +pub enum ExtractError { + #[error("bender-slang error: {0}")] + Slang(#[from] bender_slang::SlangError), + #[error("models error: {0}")] + Models(#[from] bender_kg_models::ModelsError), + #[error("io error: {0}")] + Io(#[from] std::io::Error), + #[error("serde error: {0}")] + Serde(#[from] serde_json::Error), + #[error("invalid input: {0}")] + InvalidInput(String), +} + +pub type Result = std::result::Result; + +/// One Bender source group (already filtered for the active target set). +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct SourceGroupInput { + pub files: Vec, + pub include_dirs: Vec, + /// Preprocessor defines, formatted as `NAME` or `NAME=VALUE`. + pub defines: Vec, +} + +/// Aggregate input describing the full design build. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ExtractInputs { + pub workspace: String, + pub targets: Vec, + /// One or more elaboration roots. REQUIRED. The graph is pruned to + /// only the syntax trees reachable from these tops (via slang's + /// symbol-reference graph) before the downstream walk, so the + /// resulting graph captures exactly the modules used by the design. + /// The first entry is recorded in the manifest for traceability. + pub tops: Vec, + /// When `true`, run slang's elaboration pass from `tops` and enrich + /// `InstantiationInfo` with `resolved_param_values` and + /// `resolved_port_widths`. When `false` (default), elaboration is + /// skipped entirely; pruning still happens. + #[serde(default)] + pub elab: bool, + pub design_alias: Option, + pub groups: Vec, + /// Treat all source groups as one slang compilation unit (vcs / `vlog + /// -mfcu` semantics): `\`define`s declared in earlier groups become + /// visible to later groups. Default `false` keeps the per-group + /// preprocessor scoping that the simulator-script paths use. + #[serde(default)] + pub single_unit: bool, + /// Best-effort parsing: report parse-time errors but don't abort the + /// build. The indexer ingests whichever modules survived parsing. + /// Useful for repos with encrypted vendor IP, missing `\`include`s, or + /// other hostile inputs. Default `false` (strict). + #[serde(default)] + pub lenient: bool, + /// Hint for the maximum number of parallel parse workers (`0` means + /// "use the default of 1"). + /// + /// Pruning (Phase 1, mandatory) requires `reachable_tree_indices` to + /// resolve symbol references across every parsed tree from a single + /// `SlangSession`; the C++ analyzer has no public API to merge two + /// slang sessions, so per-worker sessions cannot share their + /// `SourceManager` / symbol tables. As a result this hint is + /// currently capped to `1` internally — the field exists so the CLI + /// flag stays stable while a future change to `bender-slang` adds + /// session merging. Setting it to `>1` today is a no-op and emits an + /// informational warning. + #[serde(default)] + pub parse_jobs: u32, +} + +pub use bender_kg_models::ResolvedEdgeUpdate; + +/// Owned post-pipeline elaboration handle. +/// +/// Yielded by [`extract_pipelined`] when `inputs.elab` is `true`; carries +/// the parsed, pruned `SlangSession` plus the `--top` roots so a caller +/// can drive `walk_elaborated` on a worker thread, overlapping it with +/// the base graph upsert. Implements `Send` (via the underlying +/// `bender_slang::SlangSession` impl) so it can be moved into a +/// `std::thread::scope` worker. Drop the handle to release the slang +/// state. +pub struct ElabHandle { + session: bender_slang::SlangSession, + tops: Vec, + design: String, +} + +impl ElabHandle { + /// Drive `walk_elaborated` against the carried session and translate + /// every resolved instance context into a flat + /// [`ResolvedEdgeUpdate`]. Returns the updates plus the elab-emitted + /// warnings. Errors mirror the inline `elab::enrich` path: returns + /// `InvalidInput` if no instance contexts come back (i.e. the + /// requested `--top` matched nothing in the elaborated design). + pub fn run(&self) -> Result<(Vec, Vec)> { + let elab = self.session.walk_elaborated(&self.tops)?; + if elab.contexts.is_empty() { + return Err(ExtractError::InvalidInput(format!( + "--top {:?} did not match any instance in the elaborated design; \ + check the name(s) or omit --elab to build a graph without \ + instance-level enrichment", + self.tops + ))); + } + let mut updates = Vec::with_capacity(elab.contexts.len()); + for ctx in &elab.contexts { + if ctx.parent_module.is_empty() { + continue; + } + let mut rpv: std::collections::BTreeMap = + std::collections::BTreeMap::new(); + for kv in &ctx.param_bindings { + if !kv.value.is_empty() { + rpv.insert(kv.key.clone(), kv.value.clone()); + } + } + let mut rpw: std::collections::BTreeMap = + std::collections::BTreeMap::new(); + for pw in &ctx.port_widths { + rpw.insert(pw.name.clone(), elab::to_resolved_width(pw)); + } + updates.push(ResolvedEdgeUpdate { + parent_module: ctx.parent_module.clone(), + child_module: ctx.child_module.clone(), + instance_name: ctx.instance_name.clone(), + design: self.design.clone(), + resolved_param_values_json: serde_json::to_string(&rpv)?, + resolved_port_widths_json: serde_json::to_string(&rpw)?, + }); + } + Ok((updates, elab.warnings.clone())) + } +} + +/// Run the extraction pipeline against fully populated inputs. +/// +/// Emits exactly one `IrRecord::Manifest` followed by N `IrRecord::Module` +/// records. Returns the manifest plus a (partial) [`BuildPhases`] populated +/// with `slang_parse_*` / `walk_design_s` / `elaborate_s` / `ir_write_s`. The +/// caller is expected to fill `store_upsert_s`, `embed_s`, and `total_s`. +pub fn extract( + inputs: &ExtractInputs, + sink: &mut S, +) -> Result<(Manifest, bender_kg_models::BuildPhases)> { + if inputs.workspace.is_empty() { + return Err(ExtractError::InvalidInput( + "ExtractInputs.workspace must be set".into(), + )); + } + if inputs.groups.is_empty() { + return Err(ExtractError::InvalidInput( + "ExtractInputs.groups must be non-empty".into(), + )); + } + if inputs.tops.is_empty() { + return Err(ExtractError::InvalidInput( + "ExtractInputs.tops must be non-empty: pass at least one --top MODULE".into(), + )); + } + + let mut phases = bender_kg_models::BuildPhases::default(); + + log_parse_jobs_advisory(inputs); + + // 1. Parse: per-group SlangSession::parse_group. + let t_parse = std::time::Instant::now(); + let mut p = parse::parse(&inputs.groups, inputs.single_unit, inputs.lenient)?; + phases.slang_parse_s = t_parse.elapsed().as_secs_f64(); + phases.slang_parse_group_count = p.group_durations.len(); + phases.slang_parse_max_group_s = p + .group_durations + .iter() + .map(|d| d.as_secs_f64()) + .fold(0.0_f64, f64::max); + log::info!( + "kg.phase slang_parse {:.3}s ({} groups, max {:.3}s, single_unit={}, lenient={})", + phases.slang_parse_s, + phases.slang_parse_group_count, + phases.slang_parse_max_group_s, + inputs.single_unit, + inputs.lenient, + ); + + // 2. Prune: keep only the trees reachable from `tops`. `walk_design` + // and `walk_elaborated` both iterate `session.trees()`, so retaining + // the subset in-place automatically narrows downstream work. + let trees_before = p.session.tree_count(); + let t_prune = std::time::Instant::now(); + let kept_u32 = match p.session.reachable_indices(&inputs.tops) { + Ok(idx) => idx.into_iter().map(|i| i as u32).collect::>(), + Err(e) => { + return Err(ExtractError::InvalidInput(format!( + "--top {:?} did not match any parsed module: {}; \ + check the name(s) or your source list", + inputs.tops, e + ))); + } + }; + p.session.retain_trees(&kept_u32); + phases.prune_s = t_prune.elapsed().as_secs_f64(); + log::info!( + "kg.phase prune {:.3}s ({} -> {} trees)", + phases.prune_s, + trees_before, + kept_u32.len(), + ); + if kept_u32.is_empty() { + return Err(ExtractError::InvalidInput(format!( + "no syntax trees reachable from --top {:?}; check the name(s) or your source list", + inputs.tops + ))); + } + + // 3. Syntactic walk: every declared module/package/interface in the + // pruned set. + let t_walk = std::time::Instant::now(); + let walked = p.session.walk_design()?; + phases.walk_design_s = t_walk.elapsed().as_secs_f64(); + log::info!("kg.phase walk_design {:.3}s", phases.walk_design_s); + + let identity = bender_kg_models::DesignIdentity::build( + &inputs.workspace, + inputs.targets.clone(), + p.all_defines.clone(), + inputs.tops.first().cloned(), + inputs.design_alias.clone(), + ); + + let modules: Vec = walked + .modules + .iter() + .map(|m| walk::convert_module(m, &identity.alias)) + .collect(); + let mut warnings: Vec = walked.warnings.clone(); + + // 4. Deduplicate by name; keep the richer record (better location info or + // more instantiations). + let mut by_name: IndexMap = IndexMap::new(); + for m in modules.into_iter() { + match by_name.get_mut(&m.name) { + Some(existing) + if (m.line_start.is_some() && existing.line_start.is_none()) + || m.instantiations.len() > existing.instantiations.len() => + { + *existing = m; + } + Some(_) => {} + None => { + by_name.insert(m.name.clone(), m); + } + } + } + let mut modules: Vec = by_name.into_values().collect(); + + // 5. Optional: elaborate from `--top` roots and merge resolved + // parameter bindings + port widths. Gated by `inputs.elab`; pruning + // above already used the same roots regardless. + let t_elab = std::time::Instant::now(); + let elab_warnings = if inputs.elab { + let w = elab::enrich(&p.session, &inputs.tops, &mut modules)?; + phases.elaborate_s = t_elab.elapsed().as_secs_f64(); + log::info!("kg.phase elaborate {:.3}s", phases.elaborate_s); + w + } else { + Vec::new() + }; + warnings.extend(elab_warnings); + + // 6. Manifest + stream. + let t_ir = std::time::Instant::now(); + let manifest = emit::build_manifest(identity, &modules, p.file_count, p.srclist_hash, warnings); + emit::stream(sink, &manifest, &modules)?; + phases.ir_write_s = t_ir.elapsed().as_secs_f64(); + log::info!("kg.phase ir_write {:.3}s", phases.ir_write_s); + + Ok((manifest, phases)) +} + +/// Like [`extract`] but defers the elaboration merge. +/// +/// Runs parse + prune + walk_design + dedup + IR streaming (with +/// `resolved_*` fields empty), then returns: +/// * the manifest, +/// * the deduplicated, un-enriched modules (caller can re-use them +/// for upsert, embedding, etc.), +/// * the partially-populated [`bender_kg_models::BuildPhases`] (no +/// `elaborate_s` populated; the caller fills it after running the +/// handle), +/// * an [`ElabHandle`] when `inputs.elab` is `true`, otherwise +/// `None`. +/// +/// The intended caller (`bender_kg_core::Engine::build`) drives +/// `handle.run()` on a worker thread in parallel with the base graph +/// upsert, then applies the resulting [`ResolvedEdgeUpdate`] list via +/// `Store::update_resolved_edges`. Everything else is identical to +/// [`extract`]. +pub fn extract_pipelined( + inputs: &ExtractInputs, + sink: &mut S, +) -> Result<( + Manifest, + Vec, + bender_kg_models::BuildPhases, + Option, +)> { + if inputs.workspace.is_empty() { + return Err(ExtractError::InvalidInput( + "ExtractInputs.workspace must be set".into(), + )); + } + if inputs.groups.is_empty() { + return Err(ExtractError::InvalidInput( + "ExtractInputs.groups must be non-empty".into(), + )); + } + if inputs.tops.is_empty() { + return Err(ExtractError::InvalidInput( + "ExtractInputs.tops must be non-empty: pass at least one --top MODULE".into(), + )); + } + + let mut phases = bender_kg_models::BuildPhases::default(); + + log_parse_jobs_advisory(inputs); + + let t_parse = std::time::Instant::now(); + let mut p = parse::parse(&inputs.groups, inputs.single_unit, inputs.lenient)?; + phases.slang_parse_s = t_parse.elapsed().as_secs_f64(); + phases.slang_parse_group_count = p.group_durations.len(); + phases.slang_parse_max_group_s = p + .group_durations + .iter() + .map(|d| d.as_secs_f64()) + .fold(0.0_f64, f64::max); + log::info!( + "kg.phase slang_parse {:.3}s ({} groups, max {:.3}s, single_unit={}, lenient={}) [pipelined]", + phases.slang_parse_s, + phases.slang_parse_group_count, + phases.slang_parse_max_group_s, + inputs.single_unit, + inputs.lenient, + ); + + let trees_before = p.session.tree_count(); + let t_prune = std::time::Instant::now(); + let kept_u32 = match p.session.reachable_indices(&inputs.tops) { + Ok(idx) => idx.into_iter().map(|i| i as u32).collect::>(), + Err(e) => { + return Err(ExtractError::InvalidInput(format!( + "--top {:?} did not match any parsed module: {}; \ + check the name(s) or your source list", + inputs.tops, e + ))); + } + }; + p.session.retain_trees(&kept_u32); + phases.prune_s = t_prune.elapsed().as_secs_f64(); + log::info!( + "kg.phase prune {:.3}s ({} -> {} trees) [pipelined]", + phases.prune_s, + trees_before, + kept_u32.len(), + ); + if kept_u32.is_empty() { + return Err(ExtractError::InvalidInput(format!( + "no syntax trees reachable from --top {:?}; check the name(s) or your source list", + inputs.tops + ))); + } + + let t_walk = std::time::Instant::now(); + let walked = p.session.walk_design()?; + phases.walk_design_s = t_walk.elapsed().as_secs_f64(); + log::info!( + "kg.phase walk_design {:.3}s [pipelined]", + phases.walk_design_s + ); + + let identity = bender_kg_models::DesignIdentity::build( + &inputs.workspace, + inputs.targets.clone(), + p.all_defines.clone(), + inputs.tops.first().cloned(), + inputs.design_alias.clone(), + ); + + let raw_modules: Vec = walked + .modules + .iter() + .map(|m| walk::convert_module(m, &identity.alias)) + .collect(); + let warnings: Vec = walked.warnings.clone(); + + let mut by_name: IndexMap = IndexMap::new(); + for m in raw_modules.into_iter() { + match by_name.get_mut(&m.name) { + Some(existing) + if (m.line_start.is_some() && existing.line_start.is_none()) + || m.instantiations.len() > existing.instantiations.len() => + { + *existing = m; + } + Some(_) => {} + None => { + by_name.insert(m.name.clone(), m); + } + } + } + let modules: Vec = by_name.into_values().collect(); + + // Stream un-enriched IR. + let t_ir = std::time::Instant::now(); + let manifest = emit::build_manifest( + identity.clone(), + &modules, + p.file_count, + p.srclist_hash, + warnings, + ); + emit::stream(sink, &manifest, &modules)?; + phases.ir_write_s = t_ir.elapsed().as_secs_f64(); + log::info!("kg.phase ir_write {:.3}s [pipelined]", phases.ir_write_s); + + let handle = if inputs.elab { + let parse::ParseOutcome { session, .. } = p; + Some(ElabHandle { + session, + tops: inputs.tops.clone(), + design: identity.alias, + }) + } else { + None + }; + + Ok((manifest, modules, phases, handle)) +} + +/// One-shot advisory printed when the caller sets `parse_jobs > 1`. We +/// don't run parallel parsing yet because pruning needs a single slang +/// session; this keeps the user informed that the flag is a no-op. +fn log_parse_jobs_advisory(inputs: &ExtractInputs) { + if inputs.parse_jobs > 1 { + log::warn!( + "kg.parse_jobs={} ignored: parallel parsing is incompatible with --top pruning \ + (slang sessions cannot share trees today). Falling back to single-threaded parse.", + inputs.parse_jobs + ); + } +} + +/// Convenience: write the IR straight to a JSONL file path. +pub fn extract_to_jsonl(inputs: &ExtractInputs, out_path: &PathBuf) -> Result { + use std::fs::File; + use std::io::BufWriter; + if let Some(parent) = out_path.parent() { + if !parent.as_os_str().is_empty() { + std::fs::create_dir_all(parent)?; + } + } + use std::io::Write; + let f = File::create(out_path)?; + let mut bw = BufWriter::new(f); + let (m, _phases) = extract(inputs, &mut bw)?; + bw.flush()?; + Ok(m) +} + +#[cfg(test)] +mod tests { + use super::*; + use std::io::Write; + + #[test] + fn invalid_inputs_rejected() { + let mut sink = VecSink::default(); + let inputs = ExtractInputs::default(); + let r = extract(&inputs, &mut sink); + assert!(r.is_err()); + } + + fn write_temp(dir: &std::path::Path, name: &str, body: &str) -> String { + let p = dir.join(name); + let mut f = std::fs::File::create(&p).unwrap(); + f.write_all(body.as_bytes()).unwrap(); + p.to_string_lossy().into_owned() + } + + #[test] + fn single_unit_propagates_macros_across_groups() { + let tmp = tempfile::tempdir().unwrap(); + // Group A: header-only `.sv` defining a function-style macro. + let header = write_temp( + tmp.path(), + "axi_typedef.sv", + "`define MK_T(N, W) typedef logic [W-1:0] N\n", + ); + // Group B: uses the macro WITHOUT a `\`include`. With per-group + // scoping (single_unit=false) this cannot resolve. With single-unit + // it does. + let user = write_temp( + tmp.path(), + "user.sv", + "module user;\n `MK_T(my_t, 8);\nendmodule\n", + ); + let inputs = |single_unit| ExtractInputs { + workspace: tmp.path().to_string_lossy().into(), + targets: vec![], + tops: vec!["user".into()], + elab: false, + design_alias: Some("xunit".into()), + groups: vec![ + SourceGroupInput { + files: vec![header.clone()], + include_dirs: vec![], + defines: vec![], + }, + SourceGroupInput { + files: vec![user.clone()], + include_dirs: vec![], + defines: vec![], + }, + ], + single_unit, + lenient: false, + parse_jobs: 1, + }; + + let mut s_off = VecSink::default(); + let r_off = extract(&inputs(false), &mut s_off); + assert!(r_off.is_err(), "expected per-group scoping to fail"); + + let mut s_on = VecSink::default(); + let r_on = extract(&inputs(true), &mut s_on); + let (m, _phases) = r_on.expect("expected single-unit to succeed"); + assert!(m.module_count >= 1, "user module must be indexed"); + } +} diff --git a/crates/bender-kg-extract/src/parse.rs b/crates/bender-kg-extract/src/parse.rs new file mode 100644 index 00000000..5d218915 --- /dev/null +++ b/crates/bender-kg-extract/src/parse.rs @@ -0,0 +1,97 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Drive `bender-slang` over a sequence of source groups, mirroring the loop +//! in `bender pickle`. + +use std::time::{Duration, Instant}; + +use bender_slang::SlangSession; +use sha2::{Digest, Sha256}; + +use crate::{Result, SourceGroupInput}; + +pub(crate) struct ParseOutcome { + pub session: SlangSession, + pub all_defines: Vec, + pub file_count: usize, + pub srclist_hash: String, + /// Per-group wall-clock duration of the corresponding `parse_group` call. + /// Same length as the groups vec passed in. Used by callers to derive + /// `slang_parse_max_group_s` for the build's phase summary. + pub group_durations: Vec, +} + +/// Per-group parse: one `parse_group` call per Bender SourceGroup. Each group +/// gets its own preprocessor scope (incdirs + defines) just like pickle does. +/// When `single_unit` is set, slang inherits `\`define`s declared in earlier +/// groups so cross-package macro use (e.g. `\`AXI_TYPEDEF_ALL` defined in +/// `axi/typedef.svh` and used by `tt_noc2axi.sv` in another package) parses +/// without per-file `\`include`s. Mirrors vcs / `vlog -mfcu`. +/// When `lenient` is set, parse-time error diagnostics are reported but do +/// not abort the build (best-effort mode for repos with hostile inputs). +pub(crate) fn parse( + groups: &[SourceGroupInput], + single_unit: bool, + lenient: bool, +) -> Result { + let mut session = SlangSession::new(); + session.set_single_unit(single_unit); + session.set_lenient(lenient); + let mut all_defines: Vec = Vec::new(); + let mut file_count = 0usize; + let mut hasher = Sha256::new(); + let mut group_durations: Vec = Vec::with_capacity(groups.len()); + + // Hash uses 3 bits to capture (single_unit, lenient) so that re-runs + // with different parsing modes produce distinct srclist hashes. + let mode_byte: u8 = + (if single_unit { 0x02 } else { 0x01 }) | (if lenient { 0x10 } else { 0x00 }); + hasher.update([mode_byte]); + for (i, group) in groups.iter().enumerate() { + hasher.update(b"\x1e"); + for f in &group.files { + hasher.update(f.as_bytes()); + hasher.update(b"\x1f"); + } + hasher.update(b"\x1d"); + for d in &group.defines { + hasher.update(d.as_bytes()); + hasher.update(b"\x1f"); + all_defines.push(d.clone()); + } + hasher.update(b"\x1d"); + for inc in &group.include_dirs { + hasher.update(inc.as_bytes()); + hasher.update(b"\x1f"); + } + file_count += group.files.len(); + let t0 = Instant::now(); + session.parse_group(&group.files, &group.include_dirs, &group.defines)?; + let dt = t0.elapsed(); + group_durations.push(dt); + log::debug!( + "parse_group #{i:03} files={} dt={:.3}s", + group.files.len(), + dt.as_secs_f64() + ); + } + + Ok(ParseOutcome { + session, + all_defines, + file_count, + srclist_hash: hex_lower(&hasher.finalize()), + group_durations, + }) +} + +fn hex_lower(bytes: &[u8]) -> String { + const HEX: &[u8] = b"0123456789abcdef"; + let mut out = String::with_capacity(bytes.len() * 2); + for &b in bytes { + out.push(HEX[(b >> 4) as usize] as char); + out.push(HEX[(b & 0x0f) as usize] as char); + } + out +} diff --git a/crates/bender-kg-extract/src/walk.rs b/crates/bender-kg-extract/src/walk.rs new file mode 100644 index 00000000..54afd110 --- /dev/null +++ b/crates/bender-kg-extract/src/walk.rs @@ -0,0 +1,112 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Convert `bender_slang::KgModule` records (syntactic walk output) into +//! `bender_kg_models::ModuleData`. + +use bender_kg_models::{ + Direction, ImportInfo, InstantiationInfo, ModuleData, ParamInfo, ParamKind, PortInfo, +}; +use bender_slang::{KgImport, KgInstance, KgModule, KgParam, KgPort}; + +pub(crate) fn convert_module(m: &KgModule, design_alias: &str) -> ModuleData { + ModuleData { + name: m.name.clone(), + file_path: m.file_path.clone(), + design: design_alias.to_string(), + is_package: m.is_package, + line_start: positive(m.line_start), + line_end: positive(m.line_end), + param_block_lines: pair(m.param_block_start, m.param_block_end), + port_block_lines: pair(m.port_block_start, m.port_block_end), + parameters: m.parameters.iter().map(convert_param).collect(), + ports: m.ports.iter().map(convert_port).collect(), + instantiations: m.instantiations.iter().map(convert_instance).collect(), + imports: m.imports.iter().map(convert_import).collect(), + includes: Vec::new(), + exported_typedefs: Vec::new(), + description: None, + } +} + +fn convert_param(p: &KgParam) -> ParamInfo { + ParamInfo { + name: p.name.clone(), + kind: parse_param_kind(&p.kind), + default_value: p.default_value.clone(), + is_type_param: p.is_type_param, + } +} + +fn parse_param_kind(s: &str) -> ParamKind { + match s { + "int" => ParamKind::Int, + "bit" => ParamKind::Bit, + "type" => ParamKind::Type, + "string" => ParamKind::String, + _ => ParamKind::Other, + } +} + +fn convert_port(p: &KgPort) -> PortInfo { + PortInfo { + name: p.name.clone(), + direction: parse_direction(&p.direction), + type_str: p.type_str.clone(), + width_expr: p.width_expr.clone(), + bit_width: if p.bit_width >= 0 { + Some(p.bit_width) + } else { + None + }, + is_type_param: p.is_type_param, + type_ref: None, + } +} + +fn parse_direction(s: &str) -> Direction { + match s { + "input" => Direction::Input, + "output" => Direction::Output, + "inout" => Direction::Inout, + "ref" => Direction::Ref, + _ => Direction::Input, + } +} + +fn convert_instance(i: &KgInstance) -> InstantiationInfo { + let mut param_bindings = std::collections::BTreeMap::new(); + for kv in &i.param_bindings { + param_bindings.insert(kv.key.clone(), kv.value.clone()); + } + let mut port_bindings = std::collections::BTreeMap::new(); + for kv in &i.port_bindings { + port_bindings.insert(kv.key.clone(), kv.value.clone()); + } + InstantiationInfo { + module_name: i.module_name.clone(), + instance_name: i.instance_name.clone(), + param_bindings, + resolved_param_values: std::collections::BTreeMap::new(), + port_bindings, + resolved_port_widths: std::collections::BTreeMap::new(), + condition: None, + line_start: positive(i.line_start), + line_end: positive(i.line_end), + } +} + +fn convert_import(im: &KgImport) -> ImportInfo { + ImportInfo { + package_name: im.package_name.clone(), + is_wildcard: im.is_wildcard, + specific_symbols: im.specific_symbols.clone(), + } +} + +fn positive(n: i64) -> Option { + if n > 0 { Some(n) } else { None } +} +fn pair(a: i64, b: i64) -> Option<(i64, i64)> { + if a > 0 && b > 0 { Some((a, b)) } else { None } +} diff --git a/crates/bender-kg-mcp/Cargo.toml b/crates/bender-kg-mcp/Cargo.toml new file mode 100644 index 00000000..c9d19b95 --- /dev/null +++ b/crates/bender-kg-mcp/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "bender-kg-mcp" +version = "0.1.0" +edition = "2024" +description = "Internal bender crate: rmcp-based MCP adapter for the knowledge graph" +license = "Apache-2.0" +authors = ["Alessandro Ottaviano "] + +[dependencies] +bender-kg-core = { workspace = true } +bender-kg-models = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +log = { workspace = true } +anyhow = { workspace = true } +tokio = { workspace = true } +rmcp = { workspace = true } +schemars = { workspace = true } + +[dev-dependencies] +tempfile = "3.5" + +[package.metadata.dist] +dist = false diff --git a/crates/bender-kg-mcp/README.md b/crates/bender-kg-mcp/README.md new file mode 100644 index 00000000..3cbaf6a4 --- /dev/null +++ b/crates/bender-kg-mcp/README.md @@ -0,0 +1,31 @@ +# bender-kg-mcp + +> **Internal crate:** `bender-kg-mcp` is an internal crate of [Bender](https://github.com/pulp-platform/bender). It does not provide a stable public API — breaking changes may occur at any time without notice. + +`bender-kg-mcp` exposes the `bender kg` query surface as an [MCP](https://modelcontextprotocol.io/) server over stdio. It is launched by `bender kg mcp-server` and lets AI assistants (Claude, Cursor, etc.) query the design knowledge graph directly. + +## MCP Tools + +| Tool | Description | +|------|-------------| +| `search_modules` | Semantic and keyword search for modules | +| `search_modules_batch` | Batch variant of `search_modules` | +| `get_module` | Full module record: ports, parameters, imports | +| `get_subgraph` | Instantiation sub-graph rooted at a module | +| `get_instance_context` | Instance binding details for a specific instantiation | +| `get_parents` | Modules that instantiate a given module | +| `get_children` | Modules instantiated by a given module | +| `get_ports` | Port list with widths and types | +| `find_by_protocol` | Find modules by port protocol pattern | +| `get_source_snippet` | Source file excerpt for a module | +| `trace_hierarchy_path` | Shortest instantiation path between two modules | +| `check_connectivity` | Validate port connectivity between modules | +| `trace_parameter` | Trace parameter propagation (optionally recursive) | +| `trace_signal` | Trace signal connectivity (optionally recursive) | +| `match_interfaces` | Match port lists across candidate modules | +| `find_structurally_similar` | Structurally similar module candidates | +| `graph_stats` | Database statistics | + +## Usage + +The server is started automatically when an MCP client connects via `bender kg mcp-server`. It communicates over stdin/stdout using the MCP JSON-RPC protocol. diff --git a/crates/bender-kg-mcp/src/lib.rs b/crates/bender-kg-mcp/src/lib.rs new file mode 100644 index 00000000..ef7be732 --- /dev/null +++ b/crates/bender-kg-mcp/src/lib.rs @@ -0,0 +1,524 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Stdio MCP adapter for `bender kg`, built on the official Rust SDK +//! (`rmcp`). Exposes the kg query surface as MCP tools. + +use std::sync::Arc; + +use bender_kg_core::{CoreConfig, Engine}; +use rmcp::handler::server::router::tool::ToolRouter; +use rmcp::handler::server::wrapper::Parameters; +use rmcp::model::{ServerCapabilities, ServerInfo}; +use rmcp::transport::stdio; +use rmcp::{ErrorData as McpError, ServerHandler, ServiceExt, tool, tool_handler, tool_router}; +use schemars::JsonSchema; +use serde::Deserialize; +use serde_json::Value; + +const SERVER_NAME: &str = "bender-kg"; +const SERVER_VERSION: &str = env!("CARGO_PKG_VERSION"); +const INSTRUCTIONS: &str = + "RTL knowledge graph: search, browse, and query SystemVerilog module data."; + +// ===================================================================== +// Param structs (one per tool, all derive serde + schemars) +// ===================================================================== + +fn d_5_i32() -> i32 { + 5 +} +fn d_15() -> usize { + 15 +} +fn d_10() -> usize { + 10 +} +fn d_3_i32() -> i32 { + 3 +} +fn d_1_i32() -> i32 { + 1 +} +fn d_1_usize() -> usize { + 1 +} +fn d_overlap() -> f64 { + 0.3 +} +fn d_module() -> String { + "module".into() +} +fn d_input() -> String { + "input".into() +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct SearchModulesParams { + /// Natural-language description. + pub query: String, + #[serde(default = "d_15")] + pub top_k: usize, + /// Restrict search to a design alias (empty for all). + #[serde(default)] + pub design: String, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct SearchModulesBatchParams { + pub queries: Vec, + #[serde(default = "d_10")] + pub top_k: usize, + #[serde(default)] + pub design: String, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct NameParams { + pub name: String, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct GetSubgraphParams { + pub name: String, + #[serde(default = "d_3_i32")] + pub depth: i32, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct GetInstanceContextParams { + pub parent: String, + pub child: String, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct FindByProtocolParams { + pub protocol: String, + #[serde(default)] + pub design: String, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct GetSourceSnippetParams { + pub module_name: String, + #[serde(default = "d_module")] + pub element: String, + #[serde(default)] + pub instance_name: String, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct TraceHierarchyPathParams { + pub from_module: String, + pub to_module: String, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct PropagatePortParams { + pub from_module: String, + pub to_module: String, + pub signal_name: String, + #[serde(default = "d_input")] + pub direction: String, + #[serde(default = "d_1_usize")] + pub width: usize, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct CheckConnectivityParams { + pub module_name: String, + #[serde(default = "d_1_i32")] + pub depth: i32, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct TraceParameterParams { + pub module_name: String, + pub param_name: String, + /// Follow parameter propagation recursively through the hierarchy. + #[serde(default)] + pub recursive: bool, + /// Maximum recursion depth (only active when recursive=true). + #[serde(default = "d_5_i32")] + pub max_depth: i32, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct TraceSignalParams { + pub module_name: String, + pub signal_name: String, + /// Follow signal connections recursively through the hierarchy. + #[serde(default)] + pub recursive: bool, + /// Maximum recursion depth (only active when recursive=true). + #[serde(default = "d_5_i32")] + pub max_depth: i32, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct MatchInterfacesParams { + pub module_a: String, + pub module_b: String, + #[serde(default)] + pub prefix_a: String, + #[serde(default)] + pub prefix_b: String, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct FindStructurallySimilarParams { + pub module_name: String, + #[serde(default = "d_overlap")] + pub min_overlap: f64, + #[serde(default)] + pub design: String, +} + +#[derive(Debug, Deserialize, JsonSchema)] +pub struct GraphStatsParams { + #[serde(default)] + pub design: String, +} + +// ===================================================================== +// Server impl +// ===================================================================== + +#[derive(Clone)] +pub struct BenderKg { + engine: Arc, + // The `#[tool_router]` macro reads this field via generated code that + // looks invisible to rustc's dead-code analysis. + #[allow(dead_code)] + tool_router: ToolRouter, +} + +#[tool_router] +impl BenderKg { + pub fn new(engine: Arc) -> Self { + Self { + engine, + tool_router: Self::tool_router(), + } + } + + #[tool(description = "Semantic search for RTL modules by natural-language description.")] + async fn search_modules( + &self, + Parameters(p): Parameters, + ) -> Result { + let hits = self + .engine + .search_modules(&p.query, p.top_k, opt(&p.design)) + .await + .map_err(internal)?; + as_json(&hits) + } + + #[tool(description = "Run multiple semantic searches in one call (deduplicated by name).")] + async fn search_modules_batch( + &self, + Parameters(p): Parameters, + ) -> Result { + let hits = self + .engine + .search_modules_batch(&p.queries, p.top_k, opt(&p.design)) + .await + .map_err(internal)?; + as_json(&hits) + } + + #[tool(description = "Get full details for a single RTL module by exact name.")] + async fn get_module(&self, Parameters(p): Parameters) -> Result { + match self.engine.get_module(&p.name).map_err(internal)? { + Some(m) => as_json(&m), + None => { + as_json(&serde_json::json!({"error": format!("Module '{}' not found", p.name)})) + } + } + } + + #[tool(description = "Get the instantiation tree rooted at a module (depth-limited BFS).")] + async fn get_subgraph( + &self, + Parameters(p): Parameters, + ) -> Result { + let sg = self + .engine + .get_subgraph(&p.name, p.depth) + .map_err(internal)?; + as_json(&sg) + } + + #[tool( + description = "Get resolved parameter bindings and port widths for a (parent,child) edge." + )] + async fn get_instance_context( + &self, + Parameters(p): Parameters, + ) -> Result { + let edges = self + .engine + .get_instance_context(&p.parent, &p.child) + .map_err(internal)?; + if edges.is_empty() { + return as_json(&serde_json::json!({ + "error": format!("No INSTANTIATES edge from '{}' to '{}'", p.parent, p.child), + })); + } + as_json(&edges) + } + + #[tool( + description = "Find all modules that instantiate the given module (reverse dependency)." + )] + async fn get_parents(&self, Parameters(p): Parameters) -> Result { + let parents = self.engine.get_parents(&p.name).map_err(internal)?; + as_json(&parents) + } + + #[tool(description = "Find all distinct module types directly instantiated by the given module.")] + async fn get_children( + &self, + Parameters(p): Parameters, + ) -> Result { + let children = self.engine.get_children(&p.name).map_err(internal)?; + as_json(&children) + } + + #[tool(description = "Get just the port list for a module (lightweight query).")] + async fn get_ports(&self, Parameters(p): Parameters) -> Result { + match self.engine.get_module(&p.name).map_err(internal)? { + Some(m) => as_json(&m.ports), + None => { + as_json(&serde_json::json!({"error": format!("Module '{}' not found", p.name)})) + } + } + } + + #[tool(description = "Find modules whose port types contain a protocol keyword (e.g. 'axi').")] + async fn find_by_protocol( + &self, + Parameters(p): Parameters, + ) -> Result { + let mods = self + .engine + .find_by_protocol(&p.protocol, opt(&p.design)) + .map_err(internal)?; + let summarised: Vec = mods + .iter() + .map(|m| serde_json::json!({"name": m.name, "file_path": m.file_path})) + .collect(); + as_json(&summarised) + } + + #[tool( + description = "Read targeted source lines for a module element (module/ports/params/instance)." + )] + async fn get_source_snippet( + &self, + Parameters(p): Parameters, + ) -> Result { + let v = self + .engine + .get_source_snippet(&p.module_name, &p.element, &p.instance_name) + .map_err(internal)?; + as_json(&v) + } + + #[tool(description = "Trace the BFS path between two modules and return hop metadata.")] + async fn trace_hierarchy_path( + &self, + Parameters(p): Parameters, + ) -> Result { + let chain = self + .engine + .trace_hierarchy_path(&p.from_module, &p.to_module) + .map_err(internal)?; + if chain.is_empty() { + return as_json(&serde_json::json!({ + "error": format!("No path from '{}' to '{}'", p.from_module, p.to_module), + })); + } + as_json(&chain) + } + + #[tool(description = "Generate an edit plan for propagating a new port through the hierarchy.")] + async fn propagate_port( + &self, + Parameters(p): Parameters, + ) -> Result { + let chain = self + .engine + .trace_hierarchy_path(&p.from_module, &p.to_module) + .map_err(internal)?; + if chain.is_empty() { + return as_json(&serde_json::json!({ + "error": format!("No path from '{}' to '{}'", p.from_module, p.to_module), + })); + } + as_json(&serde_json::json!({ + "signal": p.signal_name, + "direction": p.direction, + "width": p.width, + "from": p.from_module, + "to": p.to_module, + "hops": chain, + })) + } + + #[tool(description = "Run structural connectivity checks on instantiations under a module.")] + async fn check_connectivity( + &self, + Parameters(p): Parameters, + ) -> Result { + let findings = self + .engine + .check_connectivity(&p.module_name, p.depth) + .map_err(internal)?; + as_json(&serde_json::json!({ + "module": p.module_name, + "depth": p.depth, + "issue_count": findings.len(), + "findings": findings, + })) + } + + #[tool(description = "Trace cascading impact of a parameter through the hierarchy. Set recursive=true to follow across multiple levels.")] + async fn trace_parameter( + &self, + Parameters(p): Parameters, + ) -> Result { + let res = if p.recursive { + self.engine + .trace_parameter_recursive(&p.module_name, &p.param_name, p.max_depth) + .map_err(internal)? + } else { + self.engine + .trace_parameter(&p.module_name, &p.param_name) + .map_err(internal)? + }; + as_json(&serde_json::json!({ + "module": p.module_name, + "parameter": p.param_name, + "recursive": p.recursive, + "max_depth": p.max_depth, + "affected_instances": res.len(), + "instances": res, + })) + } + + #[tool(description = "Trace where a signal (port) from a module is connected in child instantiations. Set recursive=true to follow across multiple levels.")] + async fn trace_signal( + &self, + Parameters(p): Parameters, + ) -> Result { + let res = if p.recursive { + self.engine + .trace_signal_recursive(&p.module_name, &p.signal_name, p.max_depth) + .map_err(internal)? + } else { + self.engine + .trace_signal(&p.module_name, &p.signal_name) + .map_err(internal)? + }; + as_json(&serde_json::json!({ + "module": p.module_name, + "signal": p.signal_name, + "recursive": p.recursive, + "max_depth": p.max_depth, + "connections": res.len(), + "instances": res, + })) + } + + #[tool(description = "Compare the port interfaces of two modules for wiring compatibility.")] + async fn match_interfaces( + &self, + Parameters(p): Parameters, + ) -> Result { + let v = self + .engine + .match_interfaces(&p.module_a, &p.module_b, &p.prefix_a, &p.prefix_b) + .map_err(internal)?; + as_json(&v) + } + + #[tool(description = "Find modules with structurally similar port signatures (Jaccard).")] + async fn find_structurally_similar( + &self, + Parameters(p): Parameters, + ) -> Result { + let res = self + .engine + .find_structurally_similar(&p.module_name, p.min_overlap, opt(&p.design)) + .map_err(internal)?; + as_json(&serde_json::json!({"module": p.module_name, "candidates": res})) + } + + #[tool(description = "Return basic statistics about the graph (counts + per-design).")] + async fn graph_stats( + &self, + Parameters(p): Parameters, + ) -> Result { + let stats = self.engine.stats(opt(&p.design)).map_err(internal)?; + as_json(&stats) + } +} + +#[tool_handler] +impl ServerHandler for BenderKg { + fn get_info(&self) -> ServerInfo { + // ServerInfo / Implementation are #[non_exhaustive], so we fill in + // a Default and assign through field access rather than literal + // construction. + let mut info = ServerInfo::default(); + info.capabilities = ServerCapabilities::builder().enable_tools().build(); + info.server_info.name = SERVER_NAME.into(); + info.server_info.version = SERVER_VERSION.into(); + info.server_info.title = Some("Bender RTL Knowledge Graph".into()); + info.instructions = Some(INSTRUCTIONS.into()); + info + } +} + +// ===================================================================== +// Entry point + helpers +// ===================================================================== + +/// Run the stdio MCP server until the peer disconnects. +pub async fn serve_stdio(cfg: CoreConfig) -> anyhow::Result<()> { + let engine = Engine::open(cfg).await?; + let server = BenderKg::new(Arc::new(engine)); + let service = server.serve(stdio()).await?; + service.waiting().await?; + Ok(()) +} + +fn opt(s: &str) -> Option<&str> { + if s.is_empty() { None } else { Some(s) } +} + +fn as_json(v: &T) -> Result { + serde_json::to_string(v).map_err(|e| McpError::internal_error(format!("serde: {e}"), None)) +} + +fn internal(e: bender_kg_core::CoreError) -> McpError { + McpError::internal_error(e.to_string(), None) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[tokio::test(flavor = "current_thread")] + async fn server_advertises_full_tool_catalog() { + let tmp = tempfile::tempdir().unwrap(); + let path = tmp.keep(); + let cfg = CoreConfig::new(path); + let engine = Engine::open(cfg).await.unwrap(); + let server = BenderKg::new(Arc::new(engine)); + // The tool router carries one route per #[tool]-annotated method. + assert!(server.tool_router.list_all().len() >= 18); + } +} diff --git a/crates/bender-kg-models/Cargo.toml b/crates/bender-kg-models/Cargo.toml new file mode 100644 index 00000000..1099f8bd --- /dev/null +++ b/crates/bender-kg-models/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "bender-kg-models" +version = "0.1.0" +edition = "2024" +description = "Internal bender crate: kg.v1 IR types for the bender knowledge-graph subsystem" +license = "Apache-2.0" +authors = ["Alessandro Ottaviano "] + +[dependencies] +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } +sha2 = { workspace = true } +indexmap = { workspace = true } + +[package.metadata.dist] +dist = false diff --git a/crates/bender-kg-models/README.md b/crates/bender-kg-models/README.md new file mode 100644 index 00000000..e960a50f --- /dev/null +++ b/crates/bender-kg-models/README.md @@ -0,0 +1,15 @@ +# bender-kg-models + +> **Internal crate:** `bender-kg-models` is an internal crate of [Bender](https://github.com/pulp-platform/bender). It does not provide a stable public API — breaking changes may occur at any time without notice. + +`bender-kg-models` defines the intermediate representation (IR) types shared across the knowledge-graph subsystem. All other `bender-kg-*` crates depend on these types. + +## Types + +- **`ModuleData`** — a parsed SystemVerilog module: name, design, file path, ports, parameters, instantiations, and package imports. +- **`PortInfo`** — port metadata: name, direction (`Direction`), type string, struct breakdown, and resolved width. +- **`ParamInfo`** — parameter metadata: name, kind (`ParamKind`), and default value. +- **`ImportInfo`** — a `import pkg::*;` or selective import statement. +- **`InstantiationInfo`** — an instantiation edge: parent module, child module, instance name, parameter bindings, and port bindings. + +All types implement `serde::Serialize` / `serde::Deserialize` and are stored in the Grafeo graph database by `bender-kg-store`. diff --git a/crates/bender-kg-models/src/lib.rs b/crates/bender-kg-models/src/lib.rs new file mode 100644 index 00000000..e3f63b07 --- /dev/null +++ b/crates/bender-kg-models/src/lib.rs @@ -0,0 +1,444 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! `kg.v3` intermediate representation for the bender knowledge graph. +//! +//! Every public record is `serde`-serialisable as JSON; the streaming on-disk +//! form is JSONL with one [`IrRecord`] per line, prefixed by a single +//! [`Manifest`] envelope record. + +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use std::collections::BTreeMap; +use std::path::Path; +use thiserror::Error; + +/// Schema version emitted by this crate. Bump when wire-incompatible changes +/// are made. +pub const KG_SCHEMA_VERSION: &str = "kg.v3"; + +/// Wall-clock breakdown of a `bender kg build` invocation. +/// +/// Populated incrementally: the extract crate fills in +/// `slang_parse_*` / `walk_design_s` / `elaborate_s` / `ir_write_s`; +/// `bender-kg-core::Engine::build` fills in `store_upsert_s`, `embed_s`, +/// and `total_s` before returning. Default value is all-zero (so callers +/// that don't care can ignore it). +/// +/// Removable: this struct is only consumed by the JSON summary of +/// `bender kg build` and the bench script; deleting it just removes the +/// `phases_seconds` block from the build output. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +#[serde(default)] +pub struct BuildPhases { + pub slang_parse_s: f64, + pub slang_parse_group_count: usize, + pub slang_parse_max_group_s: f64, + /// Wall-clock spent pruning the parsed-tree set to those reachable from + /// the requested top modules. Reported separately from `walk_design_s` + /// so callers can attribute the cost of `reachable_tree_indices` + + /// `retain_trees`. + pub prune_s: f64, + pub walk_design_s: f64, + pub elaborate_s: f64, + pub ir_write_s: f64, + pub store_upsert_s: f64, + pub embed_s: f64, + pub total_s: f64, +} + +#[derive(Debug, Error)] +pub enum ModelsError { + #[error("serde error: {0}")] + Serde(#[from] serde_json::Error), + #[error("io error: {0}")] + Io(#[from] std::io::Error), +} + +pub type Result = std::result::Result; + +/// Port direction. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum Direction { + Input, + Output, + Inout, + Ref, +} + +impl Default for Direction { + fn default() -> Self { + Direction::Input + } +} + +/// Parameter kind. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)] +#[serde(rename_all = "lowercase")] +pub enum ParamKind { + Int, + Bit, + Type, + String, + Other, +} + +impl Default for ParamKind { + fn default() -> Self { + ParamKind::Other + } +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct PortInfo { + pub name: String, + pub direction: Direction, + #[serde(default)] + pub type_str: String, + #[serde(default)] + pub width_expr: String, + #[serde(default)] + pub bit_width: Option, + #[serde(default)] + pub is_type_param: bool, + #[serde(default)] + pub type_ref: Option, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ParamInfo { + pub name: String, + pub kind: ParamKind, + #[serde(default)] + pub default_value: String, + #[serde(default)] + pub is_type_param: bool, +} + +/// Resolved bit width of a port, including a per-subfield breakdown when +/// the port's type is a packed struct or union. `total` is the canonical +/// `getBitWidth()` of the port type; `fields` is dot-flattened across +/// nested packed structs/unions and is empty for scalar ports. +/// +/// For packed arrays of structs (`req_t [N-1:0]`), `element_count` carries +/// the array length and `element` describes one element's layout (so the +/// reader knows "every one of N elements has this `total`/`fields`"). For +/// scalar arrays and non-array ports both are `None`. Only one level of +/// array unwrap is exposed; deeper nesting collapses into the parent +/// `element.total`. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ResolvedPortWidth { + pub total: i64, + #[serde(default)] + pub fields: BTreeMap, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub element_count: Option, + #[serde(default, skip_serializing_if = "Option::is_none")] + pub element: Option>, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct InstantiationInfo { + pub module_name: String, + pub instance_name: String, + /// Textual call-site expressions, keyed by the *child*'s parameter + /// name. Captured during the syntactic walk and never overwritten. + #[serde(default)] + pub param_bindings: BTreeMap, + /// Folded literal values produced by elaboration (e.g. `"32'd32"`). + /// Empty when the build ran without `--elab` or when slang failed + /// to resolve a particular symbol. + #[serde(default)] + pub resolved_param_values: BTreeMap, + #[serde(default)] + pub port_bindings: BTreeMap, + #[serde(default)] + pub resolved_port_widths: BTreeMap, + #[serde(default)] + pub condition: Option, + #[serde(default)] + pub line_start: Option, + #[serde(default)] + pub line_end: Option, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ImportInfo { + pub package_name: String, + #[serde(default)] + pub is_wildcard: bool, + #[serde(default)] + pub specific_symbols: Vec, +} + +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct IncludeInfo { + pub path: String, +} + +/// All extracted data for one module/interface/package. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ModuleData { + pub name: String, + #[serde(default)] + pub file_path: String, + #[serde(default)] + pub design: String, + #[serde(default)] + pub is_package: bool, + + #[serde(default)] + pub line_start: Option, + #[serde(default)] + pub line_end: Option, + #[serde(default)] + pub param_block_lines: Option<(i64, i64)>, + #[serde(default)] + pub port_block_lines: Option<(i64, i64)>, + + #[serde(default)] + pub parameters: Vec, + #[serde(default)] + pub ports: Vec, + #[serde(default)] + pub instantiations: Vec, + #[serde(default)] + pub imports: Vec, + #[serde(default)] + pub includes: Vec, + + #[serde(default)] + pub exported_typedefs: Vec, + #[serde(default)] + pub description: Option, +} + +/// Identity of a build, deterministic across invocations. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct DesignIdentity { + /// Stable id derived from `(workspace, sorted(targets), sorted(defines), top)`. + pub id: String, + /// Human-readable alias, either explicitly passed via `--design` or + /// auto-derived as `__`. + pub alias: String, + /// Top module that drove the elaboration (may be empty for whole-package builds). + pub top: Option, + /// The Bender targets that were active. + pub targets: Vec, + /// `+define+` plus per-target defines, formatted as `NAME` or `NAME=VALUE`. + pub defines: Vec, + /// Absolute path of the workspace root. + pub workspace: String, +} + +impl DesignIdentity { + /// Compute the deterministic id. + pub fn compute_id( + workspace: impl AsRef, + targets: &[String], + defines: &[String], + top: Option<&str>, + ) -> String { + let mut hasher = Sha256::new(); + hasher.update(workspace.as_ref().to_string_lossy().as_bytes()); + hasher.update(b"\x1e"); + let mut sorted_targets = targets.to_vec(); + sorted_targets.sort(); + for t in &sorted_targets { + hasher.update(t.as_bytes()); + hasher.update(b"\x1f"); + } + hasher.update(b"\x1e"); + let mut sorted_defines = defines.to_vec(); + sorted_defines.sort(); + for d in &sorted_defines { + hasher.update(d.as_bytes()); + hasher.update(b"\x1f"); + } + hasher.update(b"\x1e"); + if let Some(t) = top { + hasher.update(t.as_bytes()); + } + let digest = hasher.finalize(); + hex_lower(&digest) + } + + pub fn build( + workspace: impl AsRef, + targets: Vec, + defines: Vec, + top: Option, + explicit_alias: Option, + ) -> Self { + let id = Self::compute_id(&workspace, &targets, &defines, top.as_deref()); + let id_short: String = id.chars().take(8).collect(); + let alias = match explicit_alias { + Some(a) if !a.is_empty() => a, + _ => match top.as_deref() { + Some(t) if !t.is_empty() => format!("{t}__{id_short}"), + _ => format!("design__{id_short}"), + }, + }; + Self { + id, + alias, + top, + targets, + defines, + workspace: workspace.as_ref().to_string_lossy().into_owned(), + } + } +} + +fn hex_lower(bytes: &[u8]) -> String { + const HEX: &[u8] = b"0123456789abcdef"; + let mut out = String::with_capacity(bytes.len() * 2); + for &b in bytes { + out.push(HEX[(b >> 4) as usize] as char); + out.push(HEX[(b & 0x0f) as usize] as char); + } + out +} + +/// Build manifest written next to the IR jsonl. Doubles as a dedup key for +/// incremental rebuilds (`bender kg build` is idempotent when the manifest's +/// `srclist_hash` matches the prior run). +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct Manifest { + pub schema_version: String, + pub identity: DesignIdentity, + pub slang_version: Option, + pub created_at: Option, + pub file_count: usize, + pub module_count: usize, + pub package_count: usize, + pub edge_count: usize, + /// Hash of the resolved srclist (after target/define filtering). + pub srclist_hash: String, + pub extraction_warnings: Vec, +} + +impl Manifest { + pub fn new(identity: DesignIdentity) -> Self { + Self { + schema_version: KG_SCHEMA_VERSION.to_string(), + identity, + ..Default::default() + } + } +} + +/// One resolved-edge patch produced by deferred elaboration. +/// +/// Identifies an `INSTANTIATES` edge by its `(parent_module, child_module, +/// instance_name)` triple plus the design alias and carries the JSON- +/// serialised resolved param values + port widths. Consumed by +/// `Store::update_resolved_edges`, which translates the list into one +/// `UNWIND $rows AS r MATCH (...)-[e:INSTANTIATES {...}]->(...) SET ...` +/// statement (or chunked statements when the list is large). +/// +/// Lives here (not in `bender-kg-extract`) so `bender-kg-store` can +/// consume it without taking a back-edge to the extract crate. +#[derive(Debug, Clone, Default, Serialize, Deserialize)] +pub struct ResolvedEdgeUpdate { + pub parent_module: String, + pub child_module: String, + pub instance_name: String, + pub design: String, + pub resolved_param_values_json: String, + pub resolved_port_widths_json: String, +} + +/// One record on the IR jsonl wire. +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum IrRecord { + Manifest(Manifest), + Module(ModuleData), +} + +/// Streaming JSONL reader. +pub fn read_ir_jsonl(reader: R) -> impl Iterator> { + reader.lines().filter_map(|line| match line { + Err(e) => Some(Err(ModelsError::Io(e))), + Ok(line) => { + let trimmed = line.trim(); + if trimmed.is_empty() || trimmed.starts_with('#') { + None + } else { + Some(serde_json::from_str(trimmed).map_err(ModelsError::Serde)) + } + } + }) +} + +/// Streaming JSONL writer, emitting one record per line. +pub fn write_ir_record(writer: &mut W, rec: &IrRecord) -> Result<()> { + serde_json::to_writer(&mut *writer, rec)?; + writer.write_all(b"\n")?; + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn identity_is_deterministic_and_order_independent() { + let a = DesignIdentity::compute_id( + "/ws", + &vec!["b".to_string(), "a".to_string()], + &vec!["X=1".to_string(), "Y".to_string()], + Some("top"), + ); + let b = DesignIdentity::compute_id( + "/ws", + &vec!["a".to_string(), "b".to_string()], + &vec!["Y".to_string(), "X=1".to_string()], + Some("top"), + ); + assert_eq!(a, b); + assert_eq!(a.len(), 64); + } + + #[test] + fn alias_falls_back_to_top_plus_short_id() { + let id = DesignIdentity::build( + "/ws", + vec!["t".to_string()], + vec![], + Some("top_module".to_string()), + None, + ); + assert!(id.alias.starts_with("top_module__")); + assert_eq!(id.alias.len(), "top_module".len() + 2 + 8); + } + + #[test] + fn module_roundtrip() { + let mut m = ModuleData::default(); + m.name = "tt_fpu_v2".into(); + m.file_path = "/x/tt_fpu_v2.sv".into(); + m.parameters.push(ParamInfo { + name: "WIDTH".into(), + kind: ParamKind::Int, + default_value: "32".into(), + is_type_param: false, + }); + m.ports.push(PortInfo { + name: "clk".into(), + direction: Direction::Input, + type_str: "logic".into(), + width_expr: "1".into(), + bit_width: Some(1), + is_type_param: false, + type_ref: None, + }); + let s = serde_json::to_string(&m).unwrap(); + let m2: ModuleData = serde_json::from_str(&s).unwrap(); + assert_eq!(m2.name, m.name); + assert_eq!(m2.parameters[0].name, "WIDTH"); + assert_eq!(m2.ports[0].direction, Direction::Input); + } +} diff --git a/crates/bender-kg-similarity/Cargo.toml b/crates/bender-kg-similarity/Cargo.toml new file mode 100644 index 00000000..7f787f6b --- /dev/null +++ b/crates/bender-kg-similarity/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "bender-kg-similarity" +version = "0.1.0" +edition = "2024" +description = "Internal bender crate: text -> vector embedding for the knowledge graph" +license = "Apache-2.0" +authors = ["Alessandro Ottaviano "] + +[dependencies] +serde = { workspace = true } +thiserror = { workspace = true } +sha2 = { workspace = true } + +# Real local embeddings via model2vec-rs (pure Rust, no ONNX, no glibc dep). +model2vec-rs = { version = "0.1", optional = true } + +[features] +# Default-on real embeddings via model2vec-rs. +# Disable with --no-default-features to keep only the deterministic +# HashEmbedder fallback (no model download, no extra deps). +default = ["model2vec"] +model2vec = ["dep:model2vec-rs"] + +[package.metadata.dist] +dist = false diff --git a/crates/bender-kg-similarity/README.md b/crates/bender-kg-similarity/README.md new file mode 100644 index 00000000..efd1e110 --- /dev/null +++ b/crates/bender-kg-similarity/README.md @@ -0,0 +1,13 @@ +# bender-kg-similarity + +> **Internal crate:** `bender-kg-similarity` is an internal crate of [Bender](https://github.com/pulp-platform/bender). It does not provide a stable public API — breaking changes may occur at any time without notice. + +`bender-kg-similarity` is the embedding adapter for the `bender kg` subsystem. It converts a module's textual representation into a dense vector that is stored in the HNSW index inside `bender-kg-store` and used for semantic module search. + +## Responsibilities + +- Accept a `ModuleData` record and produce a fixed-dimension `f32` embedding vector. +- Provide a configurable backend (currently a local ONNX model via `ort`). +- Be called by `bender-kg-core` during `bender kg build` and incremental updates. + +The embedding model path and dimension are configured through `CoreConfig`, which is managed by `bender-kg-core`. diff --git a/crates/bender-kg-similarity/src/lib.rs b/crates/bender-kg-similarity/src/lib.rs new file mode 100644 index 00000000..0181e599 --- /dev/null +++ b/crates/bender-kg-similarity/src/lib.rs @@ -0,0 +1,202 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Text -> dense-vector embedding adapter. +//! +//! Two backends: +//! - [`HashEmbedder`]: deterministic, no model download, fine for tests and a +//! degraded `bender kg search` flow. Uses signed feature hashing. +//! - `Model2VecEmbedder` (`model2vec` feature, default-on): pure-Rust static +//! embeddings via [`model2vec-rs`](https://docs.rs/model2vec-rs). No ONNX, +//! no glibc dependency, fast on CPU. Default model is +//! `minishlab/potion-base-8M` (256-dim). + +use serde::{Deserialize, Serialize}; +use sha2::{Digest, Sha256}; +use thiserror::Error; + +#[cfg(feature = "model2vec")] +mod model2vec; +#[cfg(feature = "model2vec")] +pub use model2vec::Model2VecEmbedder; + +/// Default dimensionality of the deterministic-fallback embedder. Matches +/// `model2vec` `minishlab/potion-base-8M` so the two backends are +/// interchangeable for downstream code that fixes a dimension. +pub const DEFAULT_DIM: usize = 256; + +/// Default model id resolved by the `model2vec` backend. +pub const DEFAULT_MODEL: &str = "minishlab/potion-base-8M"; + +#[derive(Debug, Error)] +pub enum EmbedError { + #[error("embed init error: {0}")] + Init(String), + #[error("embed runtime error: {0}")] + Runtime(String), +} + +pub type Result = std::result::Result; + +/// Configuration for [`build`]. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct EmbedConfig { + /// Embedding dimensionality (only used by the [`HashEmbedder`] fallback; + /// model2vec inherits dim from the loaded model). + pub dim: usize, + /// Model id. For the `model2vec` backend this is a HuggingFace repo id + /// (e.g. `minishlab/potion-base-8M`) or a local path containing + /// `tokenizer.json`, `model.safetensors`, `config.json`. + pub model: String, + /// Force the deterministic [`HashEmbedder`] even when `model2vec` is + /// compiled in. Useful for tests and offline CI. + pub force_hash: bool, +} + +impl Default for EmbedConfig { + fn default() -> Self { + Self { + dim: DEFAULT_DIM, + model: DEFAULT_MODEL.to_string(), + force_hash: false, + } + } +} + +/// Generic embedder interface. +pub trait Embedder: Send + Sync { + fn dim(&self) -> usize; + fn model(&self) -> &str; + fn embed_one(&self, text: &str) -> Result>; + fn embed_batch(&self, texts: &[String]) -> Result>> { + texts.iter().map(|t| self.embed_one(t)).collect() + } +} + +/// Deterministic hash-based embedder. Produces unit-norm vectors of length +/// `dim` via signed feature hashing on whitespace tokens. +pub struct HashEmbedder { + dim: usize, + model: String, +} + +impl HashEmbedder { + pub fn new(dim: usize) -> Self { + Self { + dim, + model: format!("hash-fallback@{dim}"), + } + } +} + +impl Embedder for HashEmbedder { + fn dim(&self) -> usize { + self.dim + } + fn model(&self) -> &str { + &self.model + } + fn embed_one(&self, text: &str) -> Result> { + let mut buckets = vec![0.0f32; self.dim]; + for tok in text + .split(|c: char| !c.is_alphanumeric() && c != '_') + .filter(|s| !s.is_empty()) + { + let h = sha256_u64(tok); + let bin = (h as usize) % self.dim; + // Sign from a separate bit so collisions don't always reinforce. + let sign = if (h >> 32) & 1 == 0 { 1.0 } else { -1.0 }; + buckets[bin] += sign; + } + let norm: f32 = buckets.iter().map(|v| v * v).sum::().sqrt(); + if norm > 0.0 { + for v in buckets.iter_mut() { + *v /= norm; + } + } + Ok(buckets) + } +} + +fn sha256_u64(s: &str) -> u64 { + let d = Sha256::digest(s.as_bytes()); + u64::from_le_bytes([d[0], d[1], d[2], d[3], d[4], d[5], d[6], d[7]]) +} + +/// Build the configured embedder. +/// +/// Resolution order: +/// 1. If `force_hash` is set, always [`HashEmbedder`]. +/// 2. If the `model2vec` feature is enabled, attempt to load +/// [`Model2VecEmbedder`]. On load failure we log nothing (this crate +/// has no logger dep) and fall back to [`HashEmbedder`]. +/// 3. Otherwise [`HashEmbedder`] at `cfg.dim`. +pub fn build(cfg: &EmbedConfig) -> Result> { + if cfg.force_hash { + return Ok(Box::new(HashEmbedder::new(cfg.dim))); + } + #[cfg(feature = "model2vec")] + { + match Model2VecEmbedder::load(&cfg.model) { + Ok(e) => return Ok(Box::new(e)), + Err(_) => { + // Model load can fail offline or on bad path; fall through. + } + } + } + Ok(Box::new(HashEmbedder::new(cfg.dim))) +} + +/// Cosine similarity between two same-length vectors. Returns 0 on length +/// mismatch or zero-magnitude inputs. +pub fn cosine(a: &[f32], b: &[f32]) -> f32 { + if a.len() != b.len() || a.is_empty() { + return 0.0; + } + let mut dot = 0.0f32; + let mut na = 0.0f32; + let mut nb = 0.0f32; + for (x, y) in a.iter().zip(b.iter()) { + dot += x * y; + na += x * x; + nb += y * y; + } + if na == 0.0 || nb == 0.0 { + return 0.0; + } + dot / (na.sqrt() * nb.sqrt()) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn hash_embedder_is_deterministic() { + let e = HashEmbedder::new(64); + let a = e.embed_one("clock domain crossing fifo").unwrap(); + let b = e.embed_one("clock domain crossing fifo").unwrap(); + assert_eq!(a, b); + assert!((cosine(&a, &b) - 1.0).abs() < 1e-5); + } + + #[test] + fn similar_texts_score_higher_than_unrelated() { + let e = HashEmbedder::new(256); + let a = e.embed_one("axi master interface module").unwrap(); + let b = e.embed_one("axi master interface").unwrap(); + let c = e.embed_one("totally unrelated thing about coffee").unwrap(); + assert!(cosine(&a, &b) > cosine(&a, &c)); + } + + #[test] + fn force_hash_returns_hash_backend_under_default_features() { + let cfg = EmbedConfig { + force_hash: true, + ..EmbedConfig::default() + }; + let e = build(&cfg).unwrap(); + assert!(e.model().starts_with("hash-fallback@")); + assert_eq!(e.dim(), DEFAULT_DIM); + } +} diff --git a/crates/bender-kg-similarity/src/model2vec.rs b/crates/bender-kg-similarity/src/model2vec.rs new file mode 100644 index 00000000..bb327895 --- /dev/null +++ b/crates/bender-kg-similarity/src/model2vec.rs @@ -0,0 +1,54 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! `model2vec-rs` backend. Pure-Rust static embeddings; no ONNX, no glibc dep. + +use model2vec_rs::model::StaticModel; + +use crate::{EmbedError, Embedder, Result}; + +/// Real text embedder backed by [`model2vec_rs::model::StaticModel`]. +pub struct Model2VecEmbedder { + inner: StaticModel, + dim: usize, + model_id: String, +} + +impl Model2VecEmbedder { + /// Load a model from a HuggingFace repo id (e.g. `minishlab/potion-base-8M`) + /// or a local directory containing `tokenizer.json`, `model.safetensors`, + /// and `config.json`. + pub fn load(repo_or_path: &str) -> Result { + let inner = StaticModel::from_pretrained(repo_or_path, None, None, None) + .map_err(|e| EmbedError::Init(format!("model2vec load {repo_or_path}: {e}")))?; + // Probe the embedding dim with a single-token call. Cheap (<1 ms). + let probe = inner.encode(&["probe".to_string()]); + let dim = probe + .first() + .map(|v| v.len()) + .ok_or_else(|| EmbedError::Init("model2vec returned no embeddings on probe".into()))?; + Ok(Self { + inner, + dim, + model_id: repo_or_path.to_string(), + }) + } +} + +impl Embedder for Model2VecEmbedder { + fn dim(&self) -> usize { + self.dim + } + + fn model(&self) -> &str { + &self.model_id + } + + fn embed_one(&self, text: &str) -> Result> { + Ok(self.inner.encode_single(text)) + } + + fn embed_batch(&self, texts: &[String]) -> Result>> { + Ok(self.inner.encode(texts)) + } +} diff --git a/crates/bender-kg-store/Cargo.toml b/crates/bender-kg-store/Cargo.toml new file mode 100644 index 00000000..5651820d --- /dev/null +++ b/crates/bender-kg-store/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "bender-kg-store" +version = "0.1.0" +edition = "2024" +description = "Internal bender crate: Grafeo-backed knowledge graph + vector store" +license = "Apache-2.0" +authors = ["Alessandro Ottaviano "] + +[dependencies] +bender-kg-models = { workspace = true } +grafeo = { workspace = true } +grafeo-common = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +thiserror = { workspace = true } + +[dev-dependencies] +tempfile = "3.5" + +[package.metadata.dist] +dist = false diff --git a/crates/bender-kg-store/README.md b/crates/bender-kg-store/README.md new file mode 100644 index 00000000..21113d6d --- /dev/null +++ b/crates/bender-kg-store/README.md @@ -0,0 +1,15 @@ +# bender-kg-store + +> **Internal crate:** `bender-kg-store` is an internal crate of [Bender](https://github.com/pulp-platform/bender). It does not provide a stable public API — breaking changes may occur at any time without notice. + +`bender-kg-store` is the persistence layer of the `bender kg` subsystem. It manages a Grafeo graph database (property graph + HNSW vector index + BM25 text index) that stores the extracted design knowledge. + +## Responsibilities + +- **Module nodes** — upsert and retrieve `ModuleData` records in the graph, including ports, parameters, and import lists serialised as JSON blobs. +- **Instantiation edges** — store and query `INSTANTIATES` relationships with parameter and port binding metadata. +- **Vector embeddings** — store and nearest-neighbour search over dense module embeddings for semantic and structural similarity. +- **Graph traversal** — BFS/DFS helpers (`graph.rs`) for hierarchy path-finding and parent/child queries. +- **Parameter and signal tracing** — `param.rs` utilities for matching parameter references and signal identifiers across module boundaries. + +The database files are written to `~/.local/share/bender/kg/` by default. diff --git a/crates/bender-kg-store/src/graph.rs b/crates/bender-kg-store/src/graph.rs new file mode 100644 index 00000000..31c7e307 --- /dev/null +++ b/crates/bender-kg-store/src/graph.rs @@ -0,0 +1,83 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Graph traversal primitives - BFS, parent/child queries, path finding. + +use crate::instance::{row_to_instance_edge, INSTANCE_EDGE_QUERY}; +use crate::{cparam, InstanceEdge, Result}; +use grafeo::GrafeoDB; +use std::collections::{BTreeMap, BTreeSet, VecDeque}; + +/// List all instance edges originating from `parent` module. +/// +/// This is used by BFS traversal and other algorithms that need to +/// explore the instantiation graph. +pub fn list_instance_edges_from( + db: &GrafeoDB, + parent: &str, + parent_file: &str, +) -> Result> { + let r = db.execute_cypher_with_params(INSTANCE_EDGE_QUERY, cparam("p", parent))?; + + Ok(r.rows() + .iter() + .map(|row| row_to_instance_edge(row, parent_file)) + .collect()) +} + +/// Find the shortest instantiation path from `from` module to `to` module +/// using BFS traversal. +/// +/// Returns the sequence of `InstanceEdge`s along the path, or an empty +/// vector if no path exists. +/// +/// This is more efficient than using Cypher's `shortestPath` because we +/// need to extract all the edge metadata anyway, so we might as well do +/// the traversal in Rust. +pub fn trace_hierarchy_path( + db: &GrafeoDB, + module_meta_fn: &dyn Fn(&str) -> Result<(String, String)>, + from: &str, + to: &str, +) -> Result> { + let mut prev: BTreeMap = BTreeMap::new(); + let mut visited: BTreeSet = BTreeSet::new(); + let mut queue: VecDeque = VecDeque::new(); + + queue.push_back(from.to_string()); + visited.insert(from.to_string()); + + while let Some(cur) = queue.pop_front() { + if cur == to { + break; + } + + // Get parent file path for this module + let (_design, parent_file) = module_meta_fn(&cur)?; + + for edge in list_instance_edges_from(db, &cur, &parent_file)? { + if visited.insert(edge.child.clone()) { + prev.insert(edge.child.clone(), (cur.clone(), edge.clone())); + queue.push_back(edge.child.clone()); + } + } + } + + if !visited.contains(to) { + return Ok(Vec::new()); + } + + // Reconstruct path by walking backwards from `to` to `from` + let mut chain: Vec = Vec::new(); + let mut cur = to.to_string(); + while let Some((parent, edge)) = prev.remove(&cur) { + chain.push(edge); + cur = parent; + if cur == from { + break; + } + } + + chain.reverse(); + Ok(chain) +} diff --git a/crates/bender-kg-store/src/instance.rs b/crates/bender-kg-store/src/instance.rs new file mode 100644 index 00000000..09b1f829 --- /dev/null +++ b/crates/bender-kg-store/src/instance.rs @@ -0,0 +1,66 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Instance context extraction - shared logic for retrieving parameter +//! and port bindings from module instantiation edges. + +use crate::{as_i64_or_none, as_string, decode_json, InstanceEdge, ResolvedPortWidth}; +use grafeo::Value; +use std::collections::BTreeMap; + +/// Convert a Cypher query row into an `InstanceEdge` struct. +/// +/// Expected row layout: `p.name, c.name, instance_name, design, +/// param_bindings_json, port_bindings_json, resolved_param_values_json, +/// resolved_port_widths_json, line_start, line_end`. +pub fn row_to_instance_edge(row: &[Value], parent_file: &str) -> InstanceEdge { + let parent = as_string(&row[0]); + let child = as_string(&row[1]); + let instance_name = as_string(&row[2]); + let design = as_string(&row[3]); + let param_bindings: BTreeMap = + decode_json(&as_string(&row[4])).unwrap_or_default(); + let port_bindings: BTreeMap = + decode_json(&as_string(&row[5])).unwrap_or_default(); + let resolved_param_values: BTreeMap = + decode_json(&as_string(&row[6])).unwrap_or_default(); + let resolved_port_widths: BTreeMap = + decode_json(&as_string(&row[7])).unwrap_or_default(); + let line_start = as_i64_or_none(&row[8]).filter(|v| *v >= 0); + let line_end = as_i64_or_none(&row[9]).filter(|v| *v >= 0); + + InstanceEdge { + parent, + child, + instance_name, + param_bindings, + resolved_param_values, + port_bindings, + resolved_port_widths, + parent_file_path: parent_file.to_string(), + line_start, + line_end, + design, + } +} + +/// Standard Cypher query for fetching instance edge data. +/// +/// Returns: `p.name, c.name, r.instance_name, r.design, +/// r.param_bindings_json, r.port_bindings_json, +/// r.resolved_param_values_json, r.resolved_port_widths_json, +/// r.line_start, r.line_end`. +pub const INSTANCE_EDGE_QUERY: &str = + "MATCH (p:Module {name: $p})-[r:INSTANTIATES]->(c:Module) \ + RETURN p.name, c.name, r.instance_name, r.design, \ + r.param_bindings_json, r.port_bindings_json, \ + r.resolved_param_values_json, r.resolved_port_widths_json, \ + r.line_start, r.line_end"; + +/// Cypher query for fetching a specific parent->child edge. +pub const INSTANCE_EDGE_QUERY_FILTERED: &str = + "MATCH (p:Module {name: $p})-[r:INSTANTIATES]->(c:Module {name: $c}) \ + RETURN p.name, c.name, r.instance_name, r.design, \ + r.param_bindings_json, r.port_bindings_json, \ + r.resolved_param_values_json, r.resolved_port_widths_json, \ + r.line_start, r.line_end"; diff --git a/crates/bender-kg-store/src/lib.rs b/crates/bender-kg-store/src/lib.rs new file mode 100644 index 00000000..63ab937e --- /dev/null +++ b/crates/bender-kg-store/src/lib.rs @@ -0,0 +1,1555 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Grafeo-backed knowledge-graph store for `bender kg`. +//! +//! Schema: +//! Node labels: `Module`, `Design` +//! Edge types : `INSTANTIATES` (Module -> Module), +//! `IMPORTS` (Module -> Module), +//! `BELONGS_TO` (Module -> Design) +//! +//! Each module's ports / parameters / imports are JSON strings on the +//! `Module` node so a single MATCH returns everything needed to +//! reconstruct a [`ModuleData`]. Embeddings live as a `Module.embedding` +//! `Value::Vector` property and are searched through Grafeo's HNSW index. +//! +//! ## Per-design identity +//! +//! Modules are scoped per design. A `Module` node's identity is a compound +//! key `::` stored as `m.key`; the user-visible name lives +//! on `m.name`. Two designs that both define `axi_pkg` produce two +//! independent nodes with identical `name` but distinct `key`. This lets +//! `clear_design(alias)` cleanly wipe everything that design owns without +//! touching modules from other designs that happen to share a name. +//! +//! ## Edges +//! +//! Grafeo allows parallel edges between the same `(src, dst, type)`, so +//! every instantiation is its own edge with a self-contained property set +//! (`instance_name`, `param_bindings_json`, `port_bindings_json`, +//! `resolved_param_values_json`, `resolved_port_widths_json`, +//! `line_start`, `line_end`). No more JSON-array packing. + +use std::collections::{BTreeMap, BTreeSet, HashMap}; +use std::path::PathBuf; + +use bender_kg_models::{ + ModuleData, PortInfo, ResolvedPortWidth, +}; +use grafeo::{Error as GrafeoError, GrafeoDB, Session, Value}; +use grafeo_common::types::PropertyKey; +use thiserror::Error; + +// Modular query components +mod instance; +mod graph; +mod port; +mod param; + +#[derive(Debug, Error)] +pub enum StoreError { + #[error("grafeo error: {0}")] + Db(#[from] GrafeoError), + #[error("io error: {0}")] + Io(#[from] std::io::Error), + #[error("serde error: {0}")] + Serde(#[from] serde_json::Error), + #[error("not found: {0}")] + NotFound(String), + #[error("schema mismatch: {0}")] + Schema(String), +} + +pub type Result = std::result::Result; + +/// Default UNWIND batch size for `upsert_modules`. Tuned for ~1.7k-module +/// designs; override via [`StoreConfig::with_upsert_chunk_size`] when +/// memory pressure or unusually wide modules suggest a smaller batch. +pub const DEFAULT_UPSERT_CHUNK_SIZE: usize = 4096; + +/// Configuration for opening a knowledge-graph database. +#[derive(Debug, Clone)] +pub struct StoreConfig { + /// Directory holding the database files. `bender kg` defaults to + /// `/.bender-kg/`. + pub root: PathBuf, + /// Optional database directory name (default: `graph.db`). + pub db_filename: Option, + /// Embedding dimensionality. Used when creating the `:Module(embedding)` + /// vector index on first open. Caller must keep this stable across + /// rebuilds; passing a different dim against an existing index will + /// fail at insert time. + pub embedding_dim: Option, + /// Maximum rows per UNWIND batch in `upsert_modules`. Larger means + /// fewer Cypher round-trips; smaller means lower per-call memory. + pub upsert_chunk_size: usize, +} + +impl StoreConfig { + pub fn new(root: impl Into) -> Self { + Self { + root: root.into(), + db_filename: None, + embedding_dim: None, + upsert_chunk_size: DEFAULT_UPSERT_CHUNK_SIZE, + } + } + pub fn with_embedding_dim(mut self, dim: usize) -> Self { + self.embedding_dim = Some(dim); + self + } + pub fn with_upsert_chunk_size(mut self, n: usize) -> Self { + self.upsert_chunk_size = n.max(1); + self + } + pub fn db_path(&self) -> PathBuf { + self.root + .join(self.db_filename.as_deref().unwrap_or("graph.db")) + } +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct InstanceEdge { + pub parent: String, + pub child: String, + pub instance_name: String, + /// Textual call-site expressions, keyed by the *child*'s parameter + /// name. Captured by the syntactic walk, never overwritten by elab. + pub param_bindings: BTreeMap, + /// Folded literal values produced by elaboration (e.g. `"32'd32"`). + /// Empty for builds that ran without `--elab`. + #[serde(default)] + pub resolved_param_values: BTreeMap, + pub port_bindings: BTreeMap, + /// Per-port resolved width with optional packed-struct breakdown. + /// Empty for builds that ran without `--elab`. + pub resolved_port_widths: BTreeMap, + /// Source file of the *parent* module — i.e. the file where the + /// instantiation statement lives. `line_start` / `line_end` are + /// offsets into this file. + pub parent_file_path: String, + pub line_start: Option, + pub line_end: Option, + pub design: String, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct Subgraph { + pub nodes: Vec, + pub edges: Vec, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct GraphStats { + pub modules: usize, + pub packages: usize, + pub instantiations: usize, + pub imports: usize, + pub designs: Vec, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct DesignStat { + pub design: String, + pub modules: usize, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct VectorHit { + pub module: String, + pub design: String, + pub score: f32, +} + +/// Grafeo-backed knowledge graph + vector store. +pub struct Store { + db: GrafeoDB, + db_path: PathBuf, + upsert_chunk_size: usize, +} + +impl Store { + pub fn open(cfg: &StoreConfig) -> Result { + std::fs::create_dir_all(&cfg.root)?; + let path = cfg.db_path(); + let db = GrafeoDB::open(&path)?; + // Ensure schema (idempotent). Both indexes auto-maintain on + // subsequent `set_node_property` / Cypher SET writes. + if let Some(dim) = cfg.embedding_dim { + // Empty vector index pre-allocated; Grafeo populates it as we + // write `Module.embedding` properties. + db.create_vector_index( + "Module", + "embedding", + Some(dim), + Some("cosine"), + None, + None, + None, + )?; + } + // BM25 inverted index for `find_by_protocol`. No-op if already present. + db.create_text_index("Module", "ports_json")?; + Ok(Self { + db, + db_path: path, + upsert_chunk_size: cfg.upsert_chunk_size.max(1), + }) + } + + /// Maximum number of rows per UNWIND batch in [`Self::upsert_modules`]. + pub fn upsert_chunk_size(&self) -> usize { + self.upsert_chunk_size + } + + pub fn db_path(&self) -> Result { + Ok(self.db_path.to_string_lossy().into_owned()) + } + + // ------------------------------------------------------------------ + // Mutations + // ------------------------------------------------------------------ + + /// Upsert a single module. Convenience wrapper around + /// [`Self::upsert_modules`] for callers that have one module in hand. + pub fn upsert_module(&self, m: &ModuleData) -> Result<()> { + self.upsert_modules(std::iter::once(m))?; + Ok(()) + } + + /// Upsert a batch of modules and all their outgoing edges atomically. + /// + /// Everything happens inside a single Grafeo transaction. Each phase + /// (node merge, design stub, child stub, INSTANTIATES, IMPORTS, + /// BELONGS_TO) drives one or more `UNWIND $rows AS r ...` Cypher + /// statements. Rows are chunked at [`Store::upsert_chunk_size`] so + /// very large batches stay within Grafeo's per-call memory budget. + /// Parallel edges between the same `(src, dst, type)` are preserved + /// by `CREATE` (one row -> one edge). + /// + /// Identity is compound `::` so two designs can each + /// carry their own copy of `axi_pkg` without colliding on a shared + /// node. + pub fn upsert_modules<'a, I>(&self, modules: I) -> Result + where + I: IntoIterator, + { + let modules: Vec<&ModuleData> = modules.into_iter().collect(); + if modules.is_empty() { + return Ok(0); + } + + let mut session = self.db.session(); + session.begin_transaction()?; + + let chunk = self.upsert_chunk_size.max(1); + + // Pass 1a: bulk MERGE of every module node in the batch (one + // Cypher per chunk). + merge_module_nodes_batch(&session, &modules, chunk)?; + + // Pass 1b: bulk MERGE of each touched design alias. Deduped via + // a sorted set so we never MERGE the same alias twice. + let design_aliases: BTreeSet = modules + .iter() + .filter(|m| !m.design.is_empty()) + .map(|m| m.design.clone()) + .collect(); + merge_design_stubs_batch(&session, &design_aliases, chunk)?; + + // Pass 1c: bulk MERGE of every child stub (instantiation and + // import targets). Deduped per `::` so external + // children that appear in many parents are merged once. + merge_child_stubs_batch(&session, &modules, chunk)?; + + // Pass 2: bulk CREATE of all outgoing edges, one Cypher per kind. + create_instantiates_batch(&session, &modules, chunk)?; + create_imports_batch(&session, &modules, chunk)?; + create_belongs_to_batch(&session, &modules, chunk)?; + session.commit()?; + Ok(modules.len()) + } + + /// Apply a batch of resolved-edge patches produced by deferred + /// elaboration ([`bender_kg_extract::ElabHandle::run`]). + /// + /// Each row identifies an existing `INSTANTIATES` edge by + /// `(parent_name, child_name, instance_name, design)` and SETs + /// `resolved_param_values_json` / `resolved_port_widths_json` on + /// it. Rows whose triple does not match any edge silently no-op + /// (mirrors the prior inline merge's `inst_miss` warning case). + /// Runs inside one transaction; chunked at the same + /// [`Self::upsert_chunk_size`] used by `upsert_modules`. + pub fn update_resolved_edges( + &self, + updates: &[bender_kg_models::ResolvedEdgeUpdate], + ) -> Result { + if updates.is_empty() { + return Ok(0); + } + let mut session = self.db.session(); + session.begin_transaction()?; + + let rows: Vec = updates + .iter() + .map(|u| { + row([ + ("pn", Value::from(u.parent_module.as_str())), + ("cn", Value::from(u.child_module.as_str())), + ("inn", Value::from(u.instance_name.as_str())), + ("d", Value::from(u.design.as_str())), + ("rpv", Value::from(u.resolved_param_values_json.as_str())), + ("rpw", Value::from(u.resolved_port_widths_json.as_str())), + ]) + }) + .collect(); + unwind_in_chunks( + &session, + "UNWIND $rows AS r \ + MATCH (p:Module {name: r.pn, design: r.d}) \ + -[e:INSTANTIATES {instance_name: r.inn, design: r.d}]-> \ + (c:Module {name: r.cn, design: r.d}) \ + SET e.resolved_param_values_json = r.rpv, \ + e.resolved_port_widths_json = r.rpw", + rows, + self.upsert_chunk_size.max(1), + )?; + + session.commit()?; + Ok(updates.len()) + } + + /// Register design metadata. Idempotent: re-running with the same + /// alias updates the existing node's properties via MERGE. + pub fn register_design( + &self, + alias: &str, + identity_id: &str, + workspace: Option<&str>, + top: Option<&str>, + targets: &[String], + defines: &[String], + ) -> Result<()> { + let p = HashMap::from([ + ("a".into(), Value::from(alias)), + ("id".into(), Value::from(identity_id)), + ("ws".into(), Value::from(workspace.unwrap_or(""))), + ("top".into(), Value::from(top.unwrap_or(""))), + ("tg".into(), Value::from(serde_json::to_string(targets)?)), + ("df".into(), Value::from(serde_json::to_string(defines)?)), + ("ca".into(), Value::from(now_unix_ts())), + ]); + self.db.execute_cypher_with_params( + "MERGE (d:Design {alias: $a}) \ + SET d.identity_id = $id, d.workspace = $ws, d.top = $top, \ + d.targets = $tg, d.defines = $df, d.created_at = $ca", + p, + )?; + Ok(()) + } + + pub fn clear_design(&self, alias: &str) -> Result<()> { + // Drop every Module owned by this design (with edges), then the + // Design node itself. Parameterised DETACH DELETE handles both. + self.db.execute_cypher_with_params( + "MATCH (m:Module) WHERE m.design = $a DETACH DELETE m", + cparam("a", alias), + )?; + self.db.execute_cypher_with_params( + "MATCH (d:Design {alias: $a}) DETACH DELETE d", + cparam("a", alias), + )?; + Ok(()) + } + + pub fn clear_all(&self) -> Result<()> { + self.db.execute_cypher("MATCH (m:Module) DETACH DELETE m")?; + self.db.execute_cypher("MATCH (d:Design) DETACH DELETE d")?; + Ok(()) + } + + // ------------------------------------------------------------------ + // Reads + // ------------------------------------------------------------------ + + pub fn get_module(&self, name: &str) -> Result> { + let r = self.db.execute_cypher_with_params( + "MATCH (m:Module {name: $n}) \ + RETURN m.name, m.file_path, m.design, m.is_package, \ + m.line_start, m.line_end, \ + m.param_block_start, m.param_block_end, \ + m.port_block_start, m.port_block_end, \ + m.description, m.ports_json, m.params_json, m.imports_json", + cparam("n", name), + )?; + let Some(row) = r.rows().first() else { + return Ok(None); + }; + // A bare stub (no full upsert yet) has nulls past column 1. + if row_is_stub(row) { + return Ok(None); + } + Ok(Some(row_to_module(row)?)) + } + + pub fn get_parents(&self, module: &str) -> Result> { + let r = self.db.execute_cypher_with_params( + "MATCH (p:Module)-[:INSTANTIATES]->(:Module {name: $n}) \ + RETURN DISTINCT p.name", + cparam("n", module), + )?; + let mut out = Vec::new(); + for row in r.rows() { + let n = as_string(&row[0]); + if let Some(m) = self.get_module(&n)? { + out.push(m); + } + } + Ok(out) + } + + pub fn get_children(&self, module: &str) -> Result> { + let r = self.db.execute_cypher_with_params( + "MATCH (:Module {name: $n})-[:INSTANTIATES]->(c:Module) \ + RETURN DISTINCT c.name", + cparam("n", module), + )?; + let mut out = Vec::new(); + for row in r.rows() { + let n = as_string(&row[0]); + if let Some(m) = self.get_module(&n)? { + out.push(m); + } + } + Ok(out) + } + + pub fn get_subgraph(&self, root: &str, depth: i32) -> Result { + let mut nodes_set: BTreeSet = BTreeSet::new(); + let mut edges = Vec::new(); + let mut frontier: Vec = vec![root.to_string()]; + nodes_set.insert(root.to_string()); + let mut steps = 0; + while !frontier.is_empty() && steps < depth.max(0) { + let mut next: Vec = Vec::new(); + for parent in &frontier { + for edge in self.list_instance_edges_from(parent)? { + if nodes_set.insert(edge.child.clone()) { + next.push(edge.child.clone()); + } + edges.push(edge); + } + } + frontier = next; + steps += 1; + } + let mut nodes = Vec::with_capacity(nodes_set.len()); + for name in nodes_set { + if let Some(m) = self.get_module(&name)? { + nodes.push(m); + } + } + Ok(Subgraph { nodes, edges }) + } + + pub fn get_instance_context(&self, parent: &str, child: &str) -> Result> { + let parent_file = self.module_meta(parent)?.1; + let cypher = if child.is_empty() { + instance::INSTANCE_EDGE_QUERY + } else { + instance::INSTANCE_EDGE_QUERY_FILTERED + }; + let mut p = std::collections::HashMap::new(); + p.insert("p".into(), Value::from(parent)); + if !child.is_empty() { + p.insert("c".into(), Value::from(child)); + } + let r = self.db.execute_cypher_with_params(cypher, p)?; + let mut out: Vec = Vec::new(); + for row in r.rows() { + out.push(instance::row_to_instance_edge(row, &parent_file)); + } + Ok(out) + } + + /// Trace the BFS hierarchy path between two modules. Implemented in + /// Rust because reconstructing a path from a Cypher `shortestPath` + /// requires walking edge metadata anyway. + pub fn trace_hierarchy_path(&self, from: &str, to: &str) -> Result> { + graph::trace_hierarchy_path(&self.db, &|name| self.module_meta(name), from, to) + } + + pub fn check_connectivity(&self, module: &str, depth: i32) -> Result> { + let mut findings = Vec::new(); + let sub = self.get_subgraph(module, depth)?; + for edge in &sub.edges { + let Some(child) = self.get_module(&edge.child)? else { + continue; + }; + for port in &child.ports { + let Some(entry) = edge.resolved_port_widths.get(&port.name) else { + continue; + }; + if entry.total == 0 { + continue; + } + let Some(decl) = port.bit_width else { continue }; + if entry.total != decl { + findings.push(serde_json::json!({ + "kind": "width_mismatch", + "parent": edge.parent, + "child": edge.child, + "instance": edge.instance_name, + "port": port.name, + "instance_width": entry.total, + "declared_width": decl, + "field_breakdown": entry.fields, + })); + } + } + } + Ok(findings) + } + + /// Find every instantiation that binds `param` along the + /// `INSTANTIATES` edges incident to `module` (either as parent or + /// child). Each parallel edge is its own row, so we just iterate. + pub fn trace_parameter(&self, module: &str, param: &str) -> Result> { + let r = self.db.execute_cypher_with_params( + "MATCH (p:Module)-[r:INSTANTIATES]->(c:Module) \ + WHERE p.name = $n OR c.name = $n \ + RETURN p.name, c.name, r.instance_name, r.design, \ + r.param_bindings_json, r.port_bindings_json, \ + r.resolved_param_values_json, r.resolved_port_widths_json, \ + r.line_start, r.line_end", + cparam("n", module), + )?; + let mut file_cache: BTreeMap = BTreeMap::new(); + let mut child_param_cache: BTreeMap> = BTreeMap::new(); + let mut out = Vec::new(); + for row in r.rows() { + let parent_name = as_string(&row[0]); + let parent_file = self.cached_file(&parent_name, &mut file_cache)?; + let edge = instance::row_to_instance_edge(row, &parent_file); + + // Cache child module parameter defaults + if !child_param_cache.contains_key(&edge.child) { + let defaults = match self.get_module(&edge.child)? { + Some(m) => m.parameters.iter() + .map(|p| (p.name.clone(), p.default_value.clone())) + .collect(), + None => BTreeMap::new(), + }; + child_param_cache.insert(edge.child.clone(), defaults); + } + + // Check each parameter binding to see if it references our parameter + // This now handles struct field accesses like "Cfg.JTAG_BSR_ENABLE" + for (child_param_name, binding_value) in &edge.param_bindings { + if param::value_references_param(param, binding_value) { + let child_param_default = child_param_cache + .get(&edge.child) + .and_then(|defaults| defaults.get(child_param_name).cloned()); + + out.push(serde_json::json!({ + "parent": edge.parent, + "child": edge.child, + "instance": edge.instance_name, + "child_parameter": child_param_name, + "call_site_expression": binding_value, + "resolved_value": edge.resolved_param_values.get(child_param_name), + "child_param_default": child_param_default, + "affected_port_widths": edge.resolved_port_widths, + "parent_file_path": edge.parent_file_path, + "line_start": edge.line_start, + "line_end": edge.line_end, + })); + } + } + } + Ok(out) + } + + /// Trace how a signal (port) on `module` propagates to child instances. + /// + /// Searches all instantiation edges where `module` is the parent and looks + /// through `port_bindings` for any child port whose binding expression + /// references `signal`. Returns one entry per matching (instance, port) pair. + pub fn trace_signal(&self, module: &str, signal: &str) -> Result> { + let r = self.db.execute_cypher_with_params( + "MATCH (p:Module)-[r:INSTANTIATES]->(c:Module) \ + WHERE p.name = $n \ + RETURN p.name, c.name, r.instance_name, r.design, \ + r.param_bindings_json, r.port_bindings_json, \ + r.resolved_param_values_json, r.resolved_port_widths_json, \ + r.line_start, r.line_end", + cparam("n", module), + )?; + let mut file_cache: BTreeMap = BTreeMap::new(); + let mut out = Vec::new(); + for row in r.rows() { + let parent_name = as_string(&row[0]); + let parent_file = self.cached_file(&parent_name, &mut file_cache)?; + let edge = instance::row_to_instance_edge(row, &parent_file); + + for (child_port_name, binding_expr) in &edge.port_bindings { + if param::value_references_signal(signal, binding_expr) { + out.push(serde_json::json!({ + "parent": edge.parent, + "child": edge.child, + "instance": edge.instance_name, + "child_port": child_port_name, + "parent_expression": binding_expr, + "parent_file_path": edge.parent_file_path, + "line_start": edge.line_start, + "line_end": edge.line_end, + })); + } + } + } + Ok(out) + } + + /// Find modules whose `ports_json` mentions the protocol keyword. + /// Grafeo's planner pushes `CONTAINS` into the `:Module(ports_json)` + /// inverted index when the BM25 token matches; otherwise it falls + /// back to a property scan with the same correctness semantics. + pub fn find_by_protocol( + &self, + protocol: &str, + design: Option<&str>, + ) -> Result> { + let kw = protocol.to_lowercase(); + let cypher = if design.is_some() { + "MATCH (m:Module) \ + WHERE m.design = $d AND lower(m.ports_json) CONTAINS $kw \ + RETURN m.name" + } else { + "MATCH (m:Module) WHERE lower(m.ports_json) CONTAINS $kw RETURN m.name" + }; + let r = self.db.execute_cypher_with_params(cypher, cparam_d("kw", kw.clone(), design))?; + let mut out = Vec::new(); + for row in r.rows() { + let n = as_string(&row[0]); + // Re-confirm the match against the typed PortInfo to drop + // false positives where the token appears outside type_str. + if let Some(m) = self.get_module(&n)? { + if m.ports + .iter() + .any(|p| p.type_str.to_lowercase().contains(&kw)) + { + out.push(m); + } + } + } + Ok(out) + } + + pub fn match_interfaces( + &self, + a: &str, + b: &str, + prefix_a: &str, + prefix_b: &str, + ) -> Result { + let ma = self + .get_module(a)? + .ok_or_else(|| StoreError::NotFound(a.into()))?; + let mb = self + .get_module(b)? + .ok_or_else(|| StoreError::NotFound(b.into()))?; + + let comparison = port::compare_ports(&ma.ports, &mb.ports, prefix_a, prefix_b); + + let matched: Vec = comparison + .matched + .iter() + .map(|m| { + serde_json::json!({ + "port": m.name, + "a_direction": port::direction_str(m.a_direction), + "b_direction": port::direction_str(m.b_direction), + "direction_complementary": m.direction_complementary, + "a_width": m.a_width, + "b_width": m.b_width, + }) + }) + .collect(); + + let width_conflicts: Vec = comparison + .width_conflicts + .iter() + .map(|c| { + serde_json::json!({ + "port": c.name, + "a_width": c.a_width, + "b_width": c.b_width, + }) + }) + .collect(); + + Ok(serde_json::json!({ + "module_a": a, + "module_b": b, + "matched": matched, + "width_conflicts": width_conflicts, + "unmatched_a": comparison.unmatched_a, + "unmatched_b": comparison.unmatched_b, + })) + } + + /// Find modules whose port set has Jaccard overlap >= `min_overlap` + /// with `module`'s port set. + /// + /// One Cypher (`MATCH (m:Module) ... RETURN m.name, m.port_set_json, + /// m.port_set_card`) feeds a linear two-pointer Jaccard against the + /// target's already-sorted port set. Total cost is `O(N * P_avg)` for + /// `N` candidates and average port-set cardinality `P_avg`, vs the + /// previous `O(N * (Cypher round-trip + |A| + |B|))`. Port sets are + /// pre-stamped on each Module node by the upsert path so we never + /// reparse the full `ports_json`. + pub fn find_structurally_similar( + &self, + module: &str, + min_overlap: f64, + design: Option<&str>, + ) -> Result> { + // Look up target's pre-stamped port set in one round-trip. + let cypher = match design { + Some(_) => { + "MATCH (m:Module {name: $n, design: $d}) \ + RETURN m.port_set_json AS pj, m.port_set_card AS pc" + } + None => { + "MATCH (m:Module {name: $n}) \ + RETURN m.port_set_json AS pj, m.port_set_card AS pc" + } + }; + let r = self.db.execute_cypher_with_params(cypher, cparam_d("n", module, design))?; + let rows = r.rows(); + let target_row = rows + .first() + .ok_or_else(|| StoreError::NotFound(module.into()))?; + let target_sorted = port::parse_port_set_json(&as_string(&target_row[0])); + let target_card = target_row[1].as_int64().unwrap_or(0); + if target_sorted.is_empty() || target_card == 0 { + return Ok(Vec::new()); + } + + // Pull every candidate's pre-stamped set in one round-trip (or + // two if a design filter is in play). The vector index isn't + // applicable here so we just scan every Module node in scope. + let cypher = match design { + Some(_) => { + "MATCH (m:Module) WHERE m.design = $d AND m.name <> $n \ + AND m.port_set_card > 0 \ + RETURN m.name AS name, m.port_set_json AS pj, m.port_set_card AS pc" + } + None => { + "MATCH (m:Module) WHERE m.name <> $n AND m.port_set_card > 0 \ + RETURN m.name AS name, m.port_set_json AS pj, m.port_set_card AS pc" + } + }; + let r = self.db.execute_cypher_with_params(cypher, cparam_d("n", module, design))?; + let mut out = Vec::new(); + for row_v in r.rows() { + let name = as_string(&row_v[0]); + let cand_sorted = port::parse_port_set_json(&as_string(&row_v[1])); + let cand_card = row_v[2].as_int64().unwrap_or(0); + if cand_card == 0 { + continue; + } + // Cardinality-only Jaccard upper bound: the ratio of the + // smaller card over the larger card is an upper bound on + // any possible Jaccard between the two sets. Skip work + // when even that upper bound can't clear `min_overlap`. + let lo = target_card.min(cand_card) as f64; + let hi = target_card.max(cand_card) as f64; + if hi <= 0.0 || lo / hi < min_overlap { + continue; + } + + let score = port::compute_jaccard_similarity( + &target_sorted, + &cand_sorted, + target_card, + cand_card, + ); + + if score >= min_overlap { + let inter = port::sorted_intersection_count(&target_sorted, &cand_sorted); + out.push(serde_json::json!({ + "name": name, + "score": score, + "shared_ports": inter as i64, + })); + } + } + out.sort_by(|a, b| { + b["score"] + .as_f64() + .unwrap_or(0.0) + .partial_cmp(&a["score"].as_f64().unwrap_or(0.0)) + .unwrap_or(std::cmp::Ordering::Equal) + }); + Ok(out) + } + + pub fn stats(&self, design: Option<&str>) -> Result { + let modules = self.count_modules(design, false)?; + let packages = self.count_modules(design, true)?; + let instantiations = match design { + Some(d) => self.count_with_param( + "MATCH ()-[r:INSTANTIATES]->() WHERE r.design = $d RETURN count(r)", + "d", + d, + )?, + None => self.count_simple("MATCH ()-[r:INSTANTIATES]->() RETURN count(r)")?, + }; + let imports = self.count_simple("MATCH ()-[r:IMPORTS]->() RETURN count(r)")?; + let r = self + .db + .execute_cypher("MATCH (m:Module) RETURN m.design AS d, count(m) AS n ORDER BY d")?; + let designs = r + .rows() + .iter() + .map(|row| DesignStat { + design: as_string(&row[0]), + modules: row[1].as_int64().unwrap_or(0) as usize, + }) + .collect(); + Ok(GraphStats { + modules, + packages, + instantiations, + imports, + designs, + }) + } + + // ------------------------------------------------------------------ + // Vector API + // ------------------------------------------------------------------ + + /// Stamp an embedding vector + model name on a module node. Uses + /// Grafeo's typed `set_node_property` so the `:Module(embedding)` + /// HNSW index auto-syncs without a rebuild. + pub fn upsert_embedding( + &self, + design: &str, + name: &str, + vector: &[f32], + model: &str, + ) -> Result<()> { + let key = module_key(design, name); + let key_value = Value::from(key.as_str()); + let nodes = self.db.find_nodes_by_property("key", &key_value); + let Some(node_id) = nodes.into_iter().next() else { + return Err(StoreError::NotFound(format!( + "Module {key} not present; cannot stamp embedding" + ))); + }; + self.db + .set_node_property(node_id, "embedding", Value::Vector(vector.to_vec().into())); + self.db + .set_node_property(node_id, "embedding_model", Value::from(model)); + Ok(()) + } + + /// HNSW top-k vector search with optional design filter. + pub fn search_modules_by_vector( + &self, + query: &[f32], + top_k: usize, + design: Option<&str>, + ) -> Result> { + let filters = design.map(|d| cparam("design", d)); + let hits = + self.db + .vector_search("Module", "embedding", query, top_k, None, filters.as_ref())?; + let mut out = Vec::with_capacity(hits.len()); + for (node_id, dist) in hits { + let Some(node) = self.db.get_node(node_id) else { + continue; + }; + let module = node + .get_property("name") + .and_then(|v| v.as_str().map(String::from)) + .unwrap_or_default(); + let design = node + .get_property("design") + .and_then(|v| v.as_str().map(String::from)) + .unwrap_or_default(); + // Cosine distance is in [0, 2]; convert to a [-1, 1] similarity + // so callers can sort descending. Unit-norm vectors keep this + // stable, but we don't enforce normalisation here. + let score = (1.0 - dist * 0.5).clamp(-1.0, 1.0); + out.push(VectorHit { + module, + design, + score, + }); + } + Ok(out) + } + + /// Drop the embedding properties for every module of `alias`. Cheap; + /// `clear_design` already covers this when wiping a whole design. + pub fn clear_embeddings_for_design(&self, alias: &str) -> Result<()> { + self.db.execute_cypher_with_params( + "MATCH (m:Module {design: $a}) REMOVE m.embedding, m.embedding_model", + cparam("a", alias), + )?; + Ok(()) + } + + // ------------------------------------------------------------------ + // Helpers + // ------------------------------------------------------------------ + + fn list_instance_edges_from(&self, parent: &str) -> Result> { + let parent_file = self.module_meta(parent)?.1; + graph::list_instance_edges_from(&self.db, parent, &parent_file) + } + + /// Return the file path for `name`, consulting `cache` first to avoid + /// repeated round-trips when the same parent appears on many edges. + fn cached_file(&self, name: &str, cache: &mut BTreeMap) -> Result { + if let Some(f) = cache.get(name) { + return Ok(f.clone()); + } + let f = self.module_meta(name)?.1; + cache.insert(name.to_string(), f.clone()); + Ok(f) + } + + /// Fetch the `(design, file_path)` pair for `name` in a single + /// round-trip. + fn module_meta(&self, name: &str) -> Result<(String, String)> { + let r = self.db.execute_cypher_with_params( + "MATCH (m:Module {name: $n}) RETURN m.design, m.file_path", + cparam("n", name), + )?; + Ok(r.rows() + .first() + .map(|row| (as_string(&row[0]), as_string(&row[1]))) + .unwrap_or_default()) + } + + fn count_modules(&self, design: Option<&str>, is_package: bool) -> Result { + let cypher = match design { + Some(_) => "MATCH (m:Module) WHERE m.is_package = $ip AND m.design = $d RETURN count(m)", + None => "MATCH (m:Module) WHERE m.is_package = $ip RETURN count(m)", + }; + let r = self.db.execute_cypher_with_params(cypher, cparam_d("ip", is_package, design))?; + Ok(r.rows()[0][0].as_int64().unwrap_or(0) as usize) + } + + fn count_simple(&self, cypher: &str) -> Result { + let r = self.db.execute_cypher(cypher)?; + Ok(r.rows()[0][0].as_int64().unwrap_or(0) as usize) + } + + fn count_with_param(&self, cypher: &str, k: &str, v: &str) -> Result { + let r = self.db.execute_cypher_with_params(cypher, cparam(k, v))?; + Ok(r.rows()[0][0].as_int64().unwrap_or(0) as usize) + } +} + +// ===================================================================== +// Cypher mutation helpers (in-session, parameterised) +// ===================================================================== + +/// Build a single-entry Cypher parameter map. +pub(crate) fn cparam(key: &str, val: impl Into) -> HashMap { + let mut m = HashMap::new(); + m.insert(key.to_string(), val.into()); + m +} + +/// Build a parameter map with an optional design filter (`$d`). +/// Always inserts `key → val`; adds `"d" → design` when `design` is `Some`. +fn cparam_d(key: &str, val: impl Into, design: Option<&str>) -> HashMap { + let mut m = cparam(key, val); + if let Some(d) = design { + m.insert("d".to_string(), Value::from(d)); + } + m +} + +/// Compose the per-design Module identity. Stored on `m.key` and used as +/// the MERGE match key. +fn module_key(design: &str, name: &str) -> String { + format!("{design}::{name}") +} + +// Build a single `Value::Map` row from `(key, value)` pairs. The map is +// `Arc`'d once at the end so it ships as one heap allocation per row. +fn row(entries: impl IntoIterator) -> Value { + let mut map: BTreeMap = BTreeMap::new(); + for (k, v) in entries { + map.insert(PropertyKey::new(k), v); + } + Value::Map(std::sync::Arc::new(map)) +} + +// Run `UNWIND $rows AS r ` in chunks of `chunk` rows. Empty input is +// a no-op. Each chunk is one Cypher round-trip. +fn unwind_in_chunks(session: &Session, cypher: &str, rows: Vec, chunk: usize) -> Result<()> { + if rows.is_empty() { + return Ok(()); + } + let chunk = chunk.max(1); + for slice in rows.chunks(chunk) { + let params = std::collections::HashMap::from([( + "rows".to_string(), + Value::List(slice.to_vec().into()), + )]); + session.execute_language(cypher, "cypher", Some(params))?; + } + Ok(()) +} + +// Pass 1a: MERGE every full module node. +fn merge_module_nodes_batch( + session: &Session, + modules: &[&ModuleData], + chunk: usize, +) -> Result<()> { + let mut rows: Vec = Vec::with_capacity(modules.len()); + for m in modules { + let ports_json = serde_json::to_string(&m.ports)?; + let params_json = serde_json::to_string(&m.parameters)?; + let imports_json = serde_json::to_string(&m.imports)?; + let (pbs, pbe) = m.param_block_lines.unwrap_or((-1, -1)); + let (pos_, poe) = m.port_block_lines.unwrap_or((-1, -1)); + let key = module_key(&m.design, &m.name); + let (port_set_json, port_set_card) = build_port_set(&m.ports); + rows.push(row([ + ("k", Value::from(key)), + ("name", Value::from(m.name.as_str())), + ("fp", Value::from(m.file_path.as_str())), + ("d", Value::from(m.design.as_str())), + ("ip", Value::from(m.is_package)), + ("ls", Value::from(m.line_start.unwrap_or(-1))), + ("le", Value::from(m.line_end.unwrap_or(-1))), + ("pbs", Value::from(pbs)), + ("pbe", Value::from(pbe)), + ("ps", Value::from(pos_)), + ("pe", Value::from(poe)), + ("desc", Value::from(m.description.as_deref().unwrap_or(""))), + ("pj", Value::from(ports_json)), + ("paj", Value::from(params_json)), + ("ij", Value::from(imports_json)), + ("psj", Value::from(port_set_json)), + ("psc", Value::from(port_set_card)), + ])); + } + unwind_in_chunks( + session, + "UNWIND $rows AS r \ + MERGE (m:Module {key: r.k}) \ + SET m.name = r.name, m.file_path = r.fp, m.design = r.d, \ + m.is_package = r.ip, m.line_start = r.ls, m.line_end = r.le, \ + m.param_block_start = r.pbs, m.param_block_end = r.pbe, \ + m.port_block_start = r.ps, m.port_block_end = r.pe, \ + m.description = r.desc, \ + m.ports_json = r.pj, m.params_json = r.paj, m.imports_json = r.ij, \ + m.port_set_json = r.psj, m.port_set_card = r.psc", + rows, + chunk, + ) +} + +/// Build a sorted-dedup-normalized port name set as a compact JSON array +/// string plus its cardinality. Stored at upsert time so +/// `find_structurally_similar` doesn't have to reparse the full +/// `ports_json` for every candidate. +fn build_port_set(ports: &[PortInfo]) -> (String, i64) { + let set: BTreeSet = ports.iter().map(|p| normalize_port_name(&p.name)).collect(); + let card = set.len() as i64; + let json = serde_json::to_string(&set.into_iter().collect::>()).unwrap_or_default(); + (json, card) +} + +// Pass 1b: MERGE every touched Design alias as a stub. +fn merge_design_stubs_batch( + session: &Session, + aliases: &BTreeSet, + chunk: usize, +) -> Result<()> { + if aliases.is_empty() { + return Ok(()); + } + let rows: Vec = aliases + .iter() + .map(|a| row([("a", Value::from(a.as_str()))])) + .collect(); + unwind_in_chunks( + session, + "UNWIND $rows AS r MERGE (d:Design {alias: r.a})", + rows, + chunk, + ) +} + +// Pass 1c: MERGE every child Module stub (instantiation + import targets) +// once per `::` key. +fn merge_child_stubs_batch(session: &Session, modules: &[&ModuleData], chunk: usize) -> Result<()> { + let mut seen: BTreeSet = BTreeSet::new(); + let mut rows: Vec = Vec::new(); + for m in modules { + for inst in &m.instantiations { + let key = module_key(&m.design, &inst.module_name); + if seen.insert(key.clone()) { + rows.push(row([ + ("k", Value::from(key)), + ("name", Value::from(inst.module_name.as_str())), + ("d", Value::from(m.design.as_str())), + ])); + } + } + for imp in &m.imports { + let key = module_key(&m.design, &imp.package_name); + if seen.insert(key.clone()) { + rows.push(row([ + ("k", Value::from(key)), + ("name", Value::from(imp.package_name.as_str())), + ("d", Value::from(m.design.as_str())), + ])); + } + } + } + unwind_in_chunks( + session, + "UNWIND $rows AS r \ + MERGE (m:Module {key: r.k}) \ + ON CREATE SET m.name = r.name, m.design = r.d", + rows, + chunk, + ) +} + +// Pass 2a: CREATE every INSTANTIATES edge (one row per call site; +// parallel edges preserved). +fn create_instantiates_batch( + session: &Session, + modules: &[&ModuleData], + chunk: usize, +) -> Result<()> { + let mut rows: Vec = Vec::new(); + for m in modules { + let parent_key = module_key(&m.design, &m.name); + for inst in &m.instantiations { + let child_key = module_key(&m.design, &inst.module_name); + rows.push(row([ + ("pk", Value::from(parent_key.as_str())), + ("ck", Value::from(child_key)), + ("inst", Value::from(inst.instance_name.as_str())), + ("d", Value::from(m.design.as_str())), + ( + "pb", + Value::from(serde_json::to_string(&inst.param_bindings)?), + ), + ( + "ob", + Value::from(serde_json::to_string(&inst.port_bindings)?), + ), + ( + "rpv", + Value::from(serde_json::to_string(&inst.resolved_param_values)?), + ), + ( + "rpw", + Value::from(serde_json::to_string(&inst.resolved_port_widths)?), + ), + ("ls", Value::from(inst.line_start.unwrap_or(-1))), + ("le", Value::from(inst.line_end.unwrap_or(-1))), + ])); + } + } + unwind_in_chunks( + session, + "UNWIND $rows AS r \ + MATCH (p:Module {key: r.pk}), (c:Module {key: r.ck}) \ + CREATE (p)-[:INSTANTIATES { \ + instance_name: r.inst, design: r.d, \ + param_bindings_json: r.pb, port_bindings_json: r.ob, \ + resolved_param_values_json: r.rpv, \ + resolved_port_widths_json: r.rpw, \ + line_start: r.ls, line_end: r.le }]->(c)", + rows, + chunk, + ) +} + +// Pass 2b: CREATE every IMPORTS edge. +fn create_imports_batch(session: &Session, modules: &[&ModuleData], chunk: usize) -> Result<()> { + let mut rows: Vec = Vec::new(); + for m in modules { + let parent_key = module_key(&m.design, &m.name); + for imp in &m.imports { + let pkg_key = module_key(&m.design, &imp.package_name); + rows.push(row([ + ("pk", Value::from(parent_key.as_str())), + ("ck", Value::from(pkg_key)), + ("wc", Value::from(imp.is_wildcard)), + ( + "syms", + Value::from(serde_json::to_string(&imp.specific_symbols)?), + ), + ])); + } + } + unwind_in_chunks( + session, + "UNWIND $rows AS r \ + MATCH (p:Module {key: r.pk}), (c:Module {key: r.ck}) \ + CREATE (p)-[:IMPORTS { is_wildcard: r.wc, specific_symbols_json: r.syms }]->(c)", + rows, + chunk, + ) +} + +// Pass 2c: CREATE every BELONGS_TO edge. +fn create_belongs_to_batch(session: &Session, modules: &[&ModuleData], chunk: usize) -> Result<()> { + let rows: Vec = modules + .iter() + .filter(|m| !m.design.is_empty()) + .map(|m| { + row([ + ("pk", Value::from(module_key(&m.design, &m.name))), + ("a", Value::from(m.design.as_str())), + ]) + }) + .collect(); + unwind_in_chunks( + session, + "UNWIND $rows AS r \ + MATCH (p:Module {key: r.pk}), (d:Design {alias: r.a}) \ + CREATE (p)-[:BELONGS_TO]->(d)", + rows, + chunk, + ) +} + +// ===================================================================== +// Row parsing +// ===================================================================== + +pub(crate) fn as_string(v: &Value) -> String { + v.as_str().map(|s| s.to_string()).unwrap_or_default() +} + +fn as_bool_opt(v: &Value) -> Option { + v.as_bool().or_else(|| v.as_int64().map(|i| i != 0)) +} + +pub(crate) fn as_i64_or_none(v: &Value) -> Option { + match v { + Value::Null => None, + _ => v.as_int64(), + } +} + +fn row_is_stub(row: &[Value]) -> bool { + // file_path / design / ports_json all empty/null => never given a full upsert. + row.get(1).is_none_or(is_empty_str) + && row.get(2).is_none_or(is_empty_str) + && row.get(11).is_none_or(is_empty_str) +} + +fn is_empty_str(v: &Value) -> bool { + match v { + Value::Null => true, + Value::String(s) => s.is_empty(), + _ => false, + } +} + +fn row_to_module(row: &[Value]) -> Result { + let name = as_string(&row[0]); + let file_path = as_string(&row[1]); + let design = as_string(&row[2]); + let is_package = as_bool_opt(&row[3]).unwrap_or(false); + let line_start = as_i64_or_none(&row[4]); + let line_end = as_i64_or_none(&row[5]); + let pbs = row[6].as_int64().unwrap_or(-1); + let pbe = row[7].as_int64().unwrap_or(-1); + let pos_ = row[8].as_int64().unwrap_or(-1); + let poe = row[9].as_int64().unwrap_or(-1); + let description = { + let s = as_string(&row[10]); + if s.is_empty() { None } else { Some(s) } + }; + Ok(ModuleData { + name, + file_path, + design, + is_package, + line_start: line_start.filter(|v| *v >= 0), + line_end: line_end.filter(|v| *v >= 0), + param_block_lines: if pbs >= 0 && pbe >= 0 { + Some((pbs, pbe)) + } else { + None + }, + port_block_lines: if pos_ >= 0 && poe >= 0 { + Some((pos_, poe)) + } else { + None + }, + parameters: serde_json::from_str(&as_string(&row[12])).unwrap_or_default(), + ports: serde_json::from_str(&as_string(&row[11])).unwrap_or_default(), + instantiations: Vec::new(), + imports: serde_json::from_str(&as_string(&row[13])).unwrap_or_default(), + includes: Vec::new(), + exported_typedefs: Vec::new(), + description, + }) +} + +pub(crate) fn decode_json(s: &str) -> Option { + if s.trim().is_empty() { + return None; + } + serde_json::from_str(s).ok() +} + +fn normalize_port_name(name: &str) -> String { + let mut s = name.to_lowercase(); + for prefix in &["i_", "o_", "io_", "in_", "out_", "inout_"] { + if s.starts_with(prefix) { + s = s[prefix.len()..].to_string(); + break; + } + } + for suffix in &["_i", "_o", "_io", "_in", "_out"] { + if s.ends_with(suffix) { + s = s[..s.len() - suffix.len()].to_string(); + break; + } + } + s +} + +fn now_unix_ts() -> String { + use std::time::{SystemTime, UNIX_EPOCH}; + let secs = SystemTime::now() + .duration_since(UNIX_EPOCH) + .map(|d| d.as_secs()) + .unwrap_or(0); + format!("{secs}") +} + +#[cfg(test)] +mod tests { + use super::*; + use bender_kg_models::{Direction, InstantiationInfo, ParamInfo, ParamKind}; + use tempfile::tempdir; + + fn make_module(name: &str, design: &str) -> ModuleData { + let mut m = ModuleData::default(); + m.name = name.into(); + m.design = design.into(); + m.is_package = false; + m.line_start = Some(1); + m.line_end = Some(10); + m + } + + #[test] + fn store_round_trip_module_and_instantiation() -> Result<()> { + let tmp = tempdir().unwrap(); + let cfg = StoreConfig::new(tmp.path()); + let store = Store::open(&cfg)?; + let mut parent = make_module("parent", "d1"); + parent.parameters.push(ParamInfo { + name: "WIDTH".into(), + kind: ParamKind::Int, + default_value: "32".into(), + is_type_param: false, + }); + parent.ports.push(PortInfo { + name: "clk".into(), + direction: Direction::Input, + type_str: "logic".into(), + ..Default::default() + }); + let mut inst0 = InstantiationInfo { + module_name: "child".into(), + instance_name: "u_child0".into(), + ..Default::default() + }; + inst0 + .param_bindings + .insert("WIDTH".into(), "AddrWidth".into()); + inst0 + .resolved_param_values + .insert("WIDTH".into(), "32'd32".into()); + inst0.resolved_port_widths.insert( + "apb_req".into(), + ResolvedPortWidth { + total: 32, + fields: BTreeMap::from([("foo".into(), 16i64), ("bar.baz".into(), 16i64)]), + ..Default::default() + }, + ); + inst0.resolved_port_widths.insert( + "req_arr".into(), + ResolvedPortWidth { + total: 64, + element_count: Some(4), + element: Some(Box::new(ResolvedPortWidth { + total: 16, + fields: BTreeMap::from([("paddr".into(), 16i64)]), + ..Default::default() + })), + ..Default::default() + }, + ); + parent.instantiations.push(inst0); + // Second parallel instantiation of `child` — Grafeo keeps it as a + // distinct edge. + parent.instantiations.push(InstantiationInfo { + module_name: "child".into(), + instance_name: "u_child1".into(), + ..Default::default() + }); + let child = make_module("child", "d1"); + store.register_design("d1", "ID1", None, None, &["rtl".to_string()], &[])?; + store.upsert_modules([&parent, &child])?; + let got = store.get_module("parent")?.unwrap(); + assert_eq!(got.parameters.len(), 1); + assert_eq!(got.ports.len(), 1); + let parents = store.get_parents("child")?; + assert_eq!(parents.len(), 1); + let stats = store.stats(None)?; + assert_eq!(stats.modules, 2); + let ctx = store.get_instance_context("parent", "child")?; + let names: BTreeSet<&str> = ctx.iter().map(|e| e.instance_name.as_str()).collect(); + assert!(names.contains("u_child0") && names.contains("u_child1")); + let edge0 = ctx.iter().find(|e| e.instance_name == "u_child0").unwrap(); + assert_eq!( + edge0.resolved_param_values.get("WIDTH"), + Some(&"32'd32".to_string()) + ); + let pw = edge0.resolved_port_widths.get("apb_req").unwrap(); + assert_eq!(pw.total, 32); + assert_eq!(pw.fields.get("foo"), Some(&16)); + assert_eq!(pw.fields.get("bar.baz"), Some(&16)); + assert!(pw.element_count.is_none() && pw.element.is_none()); + let arr = edge0.resolved_port_widths.get("req_arr").unwrap(); + assert_eq!(arr.total, 64); + assert_eq!(arr.element_count, Some(4)); + let elem = arr.element.as_deref().unwrap(); + assert_eq!(elem.total, 16); + assert_eq!(elem.fields.get("paddr"), Some(&16)); + Ok(()) + } + + /// Two parallel `INSTANTIATES` edges from `parent` to `child` with + /// different bindings must round-trip as two distinct edges. + #[test] + fn parallel_instantiations_round_trip_as_separate_edges() -> Result<()> { + let tmp = tempdir().unwrap(); + let cfg = StoreConfig::new(tmp.path()); + let store = Store::open(&cfg)?; + let mut parent = make_module("parent", "d1"); + let mut a = InstantiationInfo { + module_name: "child".into(), + instance_name: "u_lo".into(), + ..Default::default() + }; + a.param_bindings.insert("AW".into(), "32".into()); + let mut b = InstantiationInfo { + module_name: "child".into(), + instance_name: "u_hi".into(), + ..Default::default() + }; + b.param_bindings.insert("AW".into(), "64".into()); + parent.instantiations.extend([a, b]); + let child = make_module("child", "d1"); + store.register_design("d1", "ID1", None, None, &[], &[])?; + store.upsert_modules([&parent, &child])?; + let ctx = store.get_instance_context("parent", "child")?; + assert_eq!(ctx.len(), 2); + let lo = ctx.iter().find(|e| e.instance_name == "u_lo").unwrap(); + let hi = ctx.iter().find(|e| e.instance_name == "u_hi").unwrap(); + assert_eq!(lo.param_bindings.get("AW"), Some(&"32".to_string())); + assert_eq!(hi.param_bindings.get("AW"), Some(&"64".to_string())); + Ok(()) + } + + #[test] + fn trace_parameter_emits_call_site_expression() -> Result<()> { + let tmp = tempdir().unwrap(); + let cfg = StoreConfig::new(tmp.path()); + let store = Store::open(&cfg)?; + let mut parent = make_module("parent", "d1"); + parent.parameters.push(ParamInfo { + name: "WIDTH".into(), + kind: ParamKind::Int, + default_value: "32".into(), + is_type_param: false, + }); + let mut inst = InstantiationInfo { + module_name: "child".into(), + instance_name: "u_child".into(), + ..Default::default() + }; + // child.AW gets parent.WIDTH (expression = "WIDTH", binds to parent's WIDTH param) + inst.param_bindings + .insert("AW".into(), "WIDTH".into()); + inst.resolved_param_values + .insert("AW".into(), "32'd32".into()); + parent.instantiations.push(inst); + let child = make_module("child", "d1"); + store.register_design("d1", "ID1", None, None, &["rtl".to_string()], &[])?; + store.upsert_modules([&parent, &child])?; + + let rows = store.trace_parameter("parent", "WIDTH")?; + assert_eq!(rows.len(), 1); + let row = &rows[0]; + assert_eq!(row["call_site_expression"], "WIDTH"); + assert_eq!(row["resolved_value"], "32'd32"); + // Old field name must not leak. + assert!(row.get("bound_value").is_none()); + Ok(()) + } + + #[test] + fn cross_design_modules_with_same_name_are_isolated() -> Result<()> { + let tmp = tempdir().unwrap(); + let cfg = StoreConfig::new(tmp.path()); + let store = Store::open(&cfg)?; + store.register_design("d1", "ID1", None, None, &[], &[])?; + store.register_design("d2", "ID2", None, None, &[], &[])?; + store.upsert_module(&make_module("axi_pkg", "d1"))?; + store.upsert_module(&make_module("axi_pkg", "d2"))?; + assert_eq!(store.stats(Some("d1"))?.modules, 1); + assert_eq!(store.stats(Some("d2"))?.modules, 1); + store.clear_design("d1")?; + assert_eq!(store.stats(Some("d1"))?.modules, 0); + assert_eq!(store.stats(Some("d2"))?.modules, 1); + Ok(()) + } + + #[test] + fn embedding_round_trip() -> Result<()> { + let tmp = tempdir().unwrap(); + let cfg = StoreConfig::new(tmp.path()).with_embedding_dim(4); + let store = Store::open(&cfg)?; + store.register_design("d1", "ID1", None, None, &[], &[])?; + let m1 = make_module("a", "d1"); + let m2 = make_module("b", "d1"); + store.upsert_modules([&m1, &m2])?; + store.upsert_embedding("d1", "a", &[1.0, 0.0, 0.0, 0.0], "test-model")?; + store.upsert_embedding("d1", "b", &[0.0, 1.0, 0.0, 0.0], "test-model")?; + let hits = store.search_modules_by_vector(&[1.0, 0.0, 0.0, 0.0], 2, None)?; + assert!(!hits.is_empty(), "expected at least one hit"); + assert_eq!(hits[0].module, "a"); + Ok(()) + } +} diff --git a/crates/bender-kg-store/src/param.rs b/crates/bender-kg-store/src/param.rs new file mode 100644 index 00000000..e322d865 --- /dev/null +++ b/crates/bender-kg-store/src/param.rs @@ -0,0 +1,111 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Parameter tracing utilities - handle struct field propagation and +//! parameter dataflow analysis. + +/// Check if a parameter binding value references the given parameter. +/// +/// This handles both direct parameter references and struct field accesses. +/// +/// Examples: +/// - `param="SEP"`, `value="SEP"` → true (direct match) +/// - `param="Cfg"`, `value="Cfg.JTAG_BSR_ENABLE"` → true (struct field) +/// - `param="Cfg"`, `value="Cfg"` → true (whole struct) +/// - `param="SEP"`, `value="1'b1"` → false (literal, no reference) +/// - `param="NUM_CORES"`, `value="DTP_NUM_CORES"` → false (different param) +/// +/// The matching is done by checking if the value: +/// 1. Exactly matches the parameter name, OR +/// 2. Starts with `param.` (struct field access) +pub fn value_references_param(param: &str, value: &str) -> bool { + if value == param { + return true; + } + + // Check for word-boundary prefix: struct field "Cfg.field", or bare use "NUM_CORES + 2" + if value.starts_with(param) { + let rest = &value[param.len()..]; + // At a word boundary when followed by '.', end-of-string, or a non-identifier char. + // This avoids false positives like param="Cfg" matching value="CfgOther". + let at_boundary = rest.is_empty() + || rest.starts_with('.') + || !rest.starts_with(|c: char| c.is_alphanumeric() || c == '_'); + if at_boundary { + return true; + } + } + + // Check if the parameter appears anywhere in a more complex expression + // For example: "(Cfg.SMC_CPU_CONFIG == smc_pkg::SMC_4CORE)" + // We want to detect that this expression uses "Cfg" + value.contains(&format!("{}.", param)) || value.contains(&format!(" {}", param)) +} + +/// Check if a port binding expression references the given signal name. +/// +/// Signal bindings are typically bare identifiers (e.g. `.clk_i(clk_smu_i)`), +/// so exact match covers most cases. Word-boundary matching catches the rare +/// cases where the signal appears inside an aggregate or conditional expression. +/// +/// Examples: +/// - `signal="clk_smu_i"`, `expr="clk_smu_i"` → true (exact) +/// - `signal="clk_i"`, `expr="clk_smu_i"` → false (substring, not a word) +/// - `signal="clk_smu_i"`, `expr="{a, clk_smu_i}"` → true (word boundary) +pub fn value_references_signal(signal: &str, expr: &str) -> bool { + if expr == signal { + return true; + } + // Check as a whole word inside an expression. A "word boundary" here means + // the signal is preceded/followed by a non-identifier character. + let is_id_char = |c: char| c.is_alphanumeric() || c == '_'; + if let Some(pos) = expr.find(signal) { + let before_ok = pos == 0 || !is_id_char(expr[..pos].chars().next_back().unwrap()); + let after_ok = pos + signal.len() == expr.len() + || !is_id_char(expr[pos + signal.len()..].chars().next().unwrap()); + if before_ok && after_ok { + return true; + } + } + false +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_value_references_param_direct() { + assert!(value_references_param("SEP", "SEP")); + assert!(!value_references_param("SEP", "SEP_OTHER")); + assert!(!value_references_param("SEP", "PRESEP")); + } + + #[test] + fn test_value_references_param_struct() { + assert!(value_references_param("Cfg", "Cfg.JTAG_BSR_ENABLE")); + assert!(value_references_param("Cfg", "Cfg")); + assert!(!value_references_param("Cfg", "CfgOther")); + assert!(!value_references_param("Cfg", "OtherCfg")); + } + + #[test] + fn test_value_references_param_expression() { + assert!(value_references_param("Cfg", "(Cfg.SMC_CPU_CONFIG == smc_pkg::SMC_4CORE)")); + assert!(value_references_param("NUM_CORES", "NUM_CORES + 2")); + } + + #[test] + fn test_value_references_signal_exact() { + assert!(value_references_signal("clk_smu_i", "clk_smu_i")); + assert!(!value_references_signal("clk_i", "clk_smu_i")); + assert!(!value_references_signal("clk_smu_i", "clk_i")); + } + + #[test] + fn test_value_references_signal_word_boundary() { + assert!(value_references_signal("clk_smu_i", "{a, clk_smu_i, b}")); + assert!(!value_references_signal("clk_i", "{a, clk_smu_i, b}")); + } + +} diff --git a/crates/bender-kg-store/src/port.rs b/crates/bender-kg-store/src/port.rs new file mode 100644 index 00000000..c7d5ffcd --- /dev/null +++ b/crates/bender-kg-store/src/port.rs @@ -0,0 +1,190 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! Port analysis utilities - matching, similarity, comparison. + +use bender_kg_models::{Direction, PortInfo}; +use std::collections::BTreeMap; + +/// Convert a Direction enum to a string. +pub fn direction_str(d: Direction) -> &'static str { + match d { + Direction::Input => "input", + Direction::Output => "output", + Direction::Inout => "inout", + Direction::Ref => "ref", + } +} + +/// Check if two port directions are compatible for connection. +/// +/// Returns true if: +/// - One is Input and the other is Output +/// - Both are Inout +pub fn directions_complement(a: Direction, b: Direction) -> bool { + matches!( + (a, b), + (Direction::Input, Direction::Output) + | (Direction::Output, Direction::Input) + | (Direction::Inout, Direction::Inout) + ) +} + +/// Strip a prefix from a port name. +/// +/// Used to normalize port names when comparing interfaces with different +/// prefixes (e.g., comparing `slv_req` and `mst_req` by stripping `slv_` +/// and `mst_` respectively). +pub fn strip_prefix(name: &str, prefix: &str) -> String { + if !prefix.is_empty() && name.starts_with(prefix) { + name[prefix.len()..].to_string() + } else { + name.to_string() + } +} + +/// Parse a `port_set_json` payload back into the sorted-dedup vector. +/// +/// Returns an empty vector on parse error (graceful degradation). +pub fn parse_port_set_json(raw: &str) -> Vec { + if raw.is_empty() { + return Vec::new(); + } + serde_json::from_str(raw).unwrap_or_default() +} + +/// Linear two-pointer intersection count over two sorted, dedup'd port +/// name vectors. O(|a| + |b|). +pub fn sorted_intersection_count(a: &[String], b: &[String]) -> usize { + let mut i = 0; + let mut j = 0; + let mut n = 0; + while i < a.len() && j < b.len() { + match a[i].cmp(&b[j]) { + std::cmp::Ordering::Equal => { + n += 1; + i += 1; + j += 1; + } + std::cmp::Ordering::Less => i += 1, + std::cmp::Ordering::Greater => j += 1, + } + } + n +} + +/// Result of comparing two module interfaces. +#[derive(Debug, Clone)] +pub struct InterfaceComparison { + pub matched: Vec, + pub width_conflicts: Vec, + pub unmatched_a: Vec, + pub unmatched_b: Vec, +} + +#[derive(Debug, Clone)] +pub struct PortMatch { + pub name: String, + pub a_direction: Direction, + pub b_direction: Direction, + pub direction_complementary: bool, + pub a_width: Option, + pub b_width: Option, +} + +#[derive(Debug, Clone)] +pub struct WidthConflict { + pub name: String, + pub a_width: Option, + pub b_width: Option, +} + +/// Compare ports between two modules, optionally stripping prefixes. +/// +/// Returns matched ports, width conflicts, and unmatched ports from each +/// module. +pub fn compare_ports( + a_ports: &[PortInfo], + b_ports: &[PortInfo], + prefix_a: &str, + prefix_b: &str, +) -> InterfaceComparison { + let a_map: BTreeMap = a_ports + .iter() + .map(|p| (strip_prefix(&p.name, prefix_a), p)) + .collect(); + + let b_map: BTreeMap = b_ports + .iter() + .map(|p| (strip_prefix(&p.name, prefix_b), p)) + .collect(); + + let mut matched = Vec::new(); + let mut width_conflicts = Vec::new(); + let mut unmatched_a = Vec::new(); + + for (name, pa) in &a_map { + match b_map.get(name) { + Some(pb) => { + let dir_ok = directions_complement(pa.direction, pb.direction); + let width_ok = pa.bit_width == pb.bit_width + || pa.bit_width.is_none() + || pb.bit_width.is_none(); + + matched.push(PortMatch { + name: name.clone(), + a_direction: pa.direction, + b_direction: pb.direction, + direction_complementary: dir_ok, + a_width: pa.bit_width, + b_width: pb.bit_width, + }); + + if !width_ok { + width_conflicts.push(WidthConflict { + name: name.clone(), + a_width: pa.bit_width, + b_width: pb.bit_width, + }); + } + } + None => unmatched_a.push(name.clone()), + } + } + + let unmatched_b: Vec = b_map + .keys() + .filter(|k| !a_map.contains_key(*k)) + .cloned() + .collect(); + + InterfaceComparison { + matched, + width_conflicts, + unmatched_a, + unmatched_b, + } +} + +/// Compute Jaccard similarity score between two port sets. +/// +/// Returns a score between 0.0 and 1.0, where 1.0 means identical port sets. +pub fn compute_jaccard_similarity( + a_sorted: &[String], + b_sorted: &[String], + a_cardinality: i64, + b_cardinality: i64, +) -> f64 { + if a_cardinality == 0 || b_cardinality == 0 { + return 0.0; + } + + let intersection = sorted_intersection_count(a_sorted, b_sorted) as f64; + let union = (a_cardinality as f64) + (b_cardinality as f64) - intersection; + + if union <= 0.0 { + return 0.0; + } + + intersection / union +} diff --git a/crates/bender-slang/Cargo.toml b/crates/bender-slang/Cargo.toml index 005d3fc3..74afa8ee 100644 --- a/crates/bender-slang/Cargo.toml +++ b/crates/bender-slang/Cargo.toml @@ -18,7 +18,10 @@ include = [ ] [dependencies] -cxx = "1.0.194" +# Kuzu (used by bender-kg-store-kuzu) hard-pins `cxx = =1.0.138`. +# Keep both bender-slang and kuzu on the same cxx to avoid duplicate +# C++ runtime symbols / version conflicts. +cxx = "=1.0.138" thiserror = "2.0.12" [target.'cfg(windows)'.dependencies] @@ -26,7 +29,7 @@ dunce = "1.0.4" [build-dependencies] cmake = "0.1.57" -cxx-build = "1.0.194" +cxx-build = "=1.0.138" [package.metadata.dist] dist = false diff --git a/crates/bender-slang/build.rs b/crates/bender-slang/build.rs index a9228691..0f62e620 100644 --- a/crates/bender-slang/build.rs +++ b/crates/bender-slang/build.rs @@ -147,6 +147,8 @@ fn main() { .file("cpp/rewriter.cpp") .file("cpp/print.cpp") .file("cpp/analysis.cpp") + .file("cpp/walker.cpp") + .file("cpp/elab.cpp") .flag_if_supported("-std=c++20") .include(&slang_include_dir) .include(&slang_generated_include_dir) @@ -169,4 +171,6 @@ fn main() { println!("cargo:rerun-if-changed=cpp/rewriter.cpp"); println!("cargo:rerun-if-changed=cpp/print.cpp"); println!("cargo:rerun-if-changed=cpp/analysis.cpp"); + println!("cargo:rerun-if-changed=cpp/walker.cpp"); + println!("cargo:rerun-if-changed=cpp/elab.cpp"); } diff --git a/crates/bender-slang/cpp/analysis.cpp b/crates/bender-slang/cpp/analysis.cpp index c1989b5d..e0391c10 100644 --- a/crates/bender-slang/cpp/analysis.cpp +++ b/crates/bender-slang/cpp/analysis.cpp @@ -3,7 +3,6 @@ #include "slang_bridge.h" -#include #include #include #include @@ -51,20 +50,32 @@ rust::Vec reachable_tree_indices(const SlangSession& session, con startIndices.push_back(it->second); } - // Perform a DFS from the top modules to find all reachable trees. + // Perform an iterative DFS from the top modules to find all reachable + // trees. We use an explicit stack rather than recursion because the + // dependency graph can be deep (notably in single-unit + lenient mode + // where macros propagate across hundreds of trees), and recursive DFS + // overflows the thread stack on real designs (e.g. ws-tensix with + // ~470+ trees and dense `\`define` dependencies). std::vector reachable(treeVec.size(), false); - std::function dfs = [&](size_t index) { - if (reachable[index]) { - return; - } - reachable[index] = true; - for (auto dep : deps[index]) { - dfs(dep); - } - }; - + std::vector stack; + stack.reserve(treeVec.size()); for (auto start : startIndices) { - dfs(start); + if (start < reachable.size() && !reachable[start]) { + stack.push_back(start); + while (!stack.empty()) { + size_t cur = stack.back(); + stack.pop_back(); + if (reachable[cur]) { + continue; + } + reachable[cur] = true; + for (auto dep : deps[cur]) { + if (!reachable[dep]) { + stack.push_back(dep); + } + } + } + } } rust::Vec result; diff --git a/crates/bender-slang/cpp/elab.cpp b/crates/bender-slang/cpp/elab.cpp new file mode 100644 index 00000000..cca9ccca --- /dev/null +++ b/crates/bender-slang/cpp/elab.cpp @@ -0,0 +1,218 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +// Elaboration walker. Builds a slang::ast::Compilation from the session's +// parsed SyntaxTrees, forces elaboration from the requested top modules, and +// emits resolved per-instance parameter bindings and port widths. + +#include "slang_bridge.h" + +#include "slang/ast/ASTVisitor.h" +#include "slang/ast/Compilation.h" +#include "slang/ast/Scope.h" +#include "slang/ast/symbols/CompilationUnitSymbols.h" +#include "slang/ast/symbols/InstanceSymbols.h" +#include "slang/ast/symbols/ParameterSymbols.h" +#include "slang/ast/symbols/PortSymbols.h" +#include "slang/ast/symbols/VariableSymbols.h" +#include "slang/ast/types/AllTypes.h" +#include "slang/ast/types/Type.h" +#include "slang/numeric/ConstantValue.h" +#include "slang/util/Bag.h" + +#include "bender-slang/src/lib.rs.h" + +#include +#include +#include + +using namespace slang; +using namespace slang::ast; + +namespace { + +// Stringify a (value or type) parameter's resolved binding. Defensive against +// uninitialized values which can show up if elaboration partially failed. +std::string param_value(const ParameterSymbolBase& p) { + if (p.symbol.kind == SymbolKind::TypeParameter) { + const auto& tp = p.symbol.as(); + try { + return tp.getTypeAlias().toString(); + } catch (...) { + return "type"; + } + } + if (p.symbol.kind == SymbolKind::Parameter) { + const auto& vp = p.symbol.as(); + try { + return vp.getValue().toString(); + } catch (...) { + return ""; + } + } + return ""; +} + +// Recursively flatten a packed struct/union type. `prefix` accumulates the +// dot-joined field path; the empty string denotes the port type itself, in +// which case scalar leaves contribute nothing (the parent's `total` already +// covers them). Packed arrays / unpacked / opaque types are leaves with a +// well-defined `getBitWidth()` and emit a single entry. Non-Scope types +// (anything that is not a packed struct/union) terminate the recursion. +void flatten_type(const Type& ty, const std::string& prefix, + std::vector& out) { + const Type& canon = ty.getCanonicalType(); + const Scope* fields_scope = nullptr; + if (canon.kind == SymbolKind::PackedStructType) { + fields_scope = &canon.as(); + } else if (canon.kind == SymbolKind::PackedUnionType) { + fields_scope = &canon.as(); + } + if (fields_scope) { + for (const Symbol& m : fields_scope->members()) { + if (m.kind != SymbolKind::Field) continue; + const auto& f = m.as(); + std::string child = prefix.empty() + ? std::string(f.name) + : prefix + "." + std::string(f.name); + flatten_type(f.getType(), child, out); + } + return; + } + if (prefix.empty()) return; // top-level scalar: covered by `total` + KgKeyValue kv; + kv.key = rust::String(prefix); + std::int64_t w = 0; + try { + w = static_cast(canon.getBitWidth()); + } catch (...) {} + kv.value = rust::String(std::to_string(w)); + out.push_back(std::move(kv)); +} + +// Build a KgPortWidth for one port: a `total` from `getType().getBitWidth()` +// plus a dot-flattened breakdown across nested packed structs / unions. If +// the canonical port type is `T [N-1:0]` (a packed array of T), record +// `element_count = N` and recurse `flatten_type` over T into the element +// template fields. Only one level of array is unwrapped; deeper nesting +// stays opaque inside `element_total` / per-field totals. +KgPortWidth collect_port(const Symbol& port) { + KgPortWidth pw; + pw.name = rust::String(std::string(port.name)); + pw.total = 0; + pw.element_count = 0; + pw.element_total = 0; + if (port.kind != SymbolKind::Port) return pw; + try { + const Type& t = port.as().getType(); + pw.total = static_cast(t.getBitWidth()); + std::vector fields; + flatten_type(t, "", fields); + for (auto& kv : fields) pw.fields.push_back(std::move(kv)); + + const Type& canon = t.getCanonicalType(); + if (canon.kind == SymbolKind::PackedArrayType) { + const auto& arr = canon.as(); + pw.element_count = static_cast(arr.range.width()); + pw.element_total = static_cast(arr.elementType.getBitWidth()); + std::vector elem_fields; + flatten_type(arr.elementType, "", elem_fields); + for (auto& kv : elem_fields) pw.element_fields.push_back(std::move(kv)); + } + } catch (...) {} + return pw; +} + +// Resolve the *defining module name* of the scope containing `inst`. The +// containing scope is typically an `InstanceBodySymbol`; for instances inside +// a generate block we walk up parent scopes until we find one. Returns "" for +// top-level instances. +std::string parent_module_name(const InstanceSymbol& inst) { + const Scope* scope = inst.getParentScope(); + while (scope) { + const Symbol& asSym = scope->asSymbol(); + if (asSym.kind == SymbolKind::InstanceBody) { + return std::string(asSym.as().getDefinition().name); + } + if (asSym.kind == SymbolKind::Root) { + return {}; + } + scope = asSym.getParentScope(); + } + return {}; +} + +struct ElabVisitor : public ASTVisitor { + KgElabResult& out; + explicit ElabVisitor(KgElabResult& o) : out(o) {} + + void handle(const InstanceSymbol& inst) { + KgInstanceContext ctx; + ctx.parent_module = rust::String(parent_module_name(inst)); + ctx.instance_name = rust::String(std::string(inst.name)); + ctx.child_module = rust::String(std::string(inst.getDefinition().name)); + + const InstanceBodySymbol& body = inst.body; + for (const ParameterSymbolBase* p : body.getParameters()) { + KgKeyValue kv; + kv.key = rust::String(std::string(p->symbol.name)); + kv.value = rust::String(param_value(*p)); + ctx.param_bindings.push_back(std::move(kv)); + } + for (const Symbol* port : body.getPortList()) { + ctx.port_widths.push_back(collect_port(*port)); + } + out.contexts.push_back(std::move(ctx)); + visitDefault(inst); + } +}; + +} // namespace + +KgElabResult walk_elaborated(const SlangSession& session, const rust::Vec& tops) { + KgElabResult out; + + // CompilationOptions::topModules stores std::string_view (non-owning), so + // the underlying std::strings must outlive the Compilation. Keep them in a + // pre-sized vector to avoid reallocation invalidating the views. + std::vector top_storage; + top_storage.reserve(tops.size()); + CompilationOptions opts; + for (const auto& t : tops) { + top_storage.emplace_back(t.data(), t.size()); + opts.topModules.insert(top_storage.back()); + } + Bag bag; + bag.set(opts); + Compilation comp(bag); + + for (const auto& tree : session.trees()) { + try { + comp.addSyntaxTree(tree); + } catch (const std::exception& ex) { + out.warnings.push_back(rust::String(std::string("addSyntaxTree: ") + ex.what())); + } + } + + const RootSymbol* root = nullptr; + try { + root = &comp.getRoot(); + } catch (const std::exception& ex) { + out.warnings.push_back(rust::String(std::string("getRoot: ") + ex.what())); + return out; + } + + // Walk only the user-requested top instances. Root also exposes synthetic + // InstanceSymbols for uninstantiated definitions (parent=Root, name=""); + // visiting those would flood the output with definition-level noise. + ElabVisitor v(out); + try { + for (const InstanceSymbol* top : root->topInstances) { + top->visit(v); + } + } catch (const std::exception& ex) { + out.warnings.push_back(rust::String(std::string("visit: ") + ex.what())); + } + return out; +} diff --git a/crates/bender-slang/cpp/session.cpp b/crates/bender-slang/cpp/session.cpp index 783e4bd8..ac1f4b70 100644 --- a/crates/bender-slang/cpp/session.cpp +++ b/crates/bender-slang/cpp/session.cpp @@ -3,6 +3,8 @@ #include "slang_bridge.h" +#include +#include #include using namespace slang; @@ -14,7 +16,9 @@ using std::string_view; std::unique_ptr new_slang_session() { return std::make_unique(); } -SlangContext::SlangContext() : diagEngine(sourceManager), diagClient(std::make_shared()) { +SlangContext::SlangContext(SourceManager& sm) + : sourceManager(sm), diagEngine(sourceManager), + diagClient(std::make_shared()) { diagEngine.addClient(diagClient); } @@ -35,59 +39,181 @@ void SlangContext::set_defines(const rust::Vec& defs) { } // Parses a list of source files and returns the resulting syntax trees as a vector (of shared pointers). -// If any file fails to parse, an exception is thrown with the error message(s) from the diagnostic engine. -std::vector> SlangContext::parse_files(const rust::Vec& paths) { +// All files in the group are parsed into a single SystemVerilog compilation +// unit so that `\`define`s declared in earlier files are visible in later +// ones. This matches how downstream simulators (VCS, Verilator, ...) treat +// the files of a Bender source group when invoked on a single command line. +// When `inherited` is non-empty, the listed macros are predefined into this +// group's preprocessor (used to propagate `define`s from prior groups). +// If parsing fails, an exception is thrown with the error message(s) from the +// diagnostic engine. +std::vector> +SlangContext::parse_files(const rust::Vec& paths, + SyntaxTree::MacroList inherited, + bool lenient) { Bag options; options.set(ppOptions); - std::vector> out; - out.reserve(paths.size()); + if (paths.empty()) { + return {}; + } - for (const auto& path : paths) { - string_view pathView(path.data(), path.size()); - auto result = SyntaxTree::fromFile(pathView, sourceManager, options); + std::shared_ptr tree; + if (inherited.empty()) { + // Fast path: load files directly via slang's path-span overload. + std::vector path_storage; + path_storage.reserve(paths.size()); + std::vector path_views; + path_views.reserve(paths.size()); + for (const auto& p : paths) { + path_storage.emplace_back(p.data(), p.size()); + path_views.emplace_back(path_storage.back()); + } + auto result = SyntaxTree::fromFiles(path_views, sourceManager, options); if (!result) { auto& err = result.error(); - std::string msg = "System Error loading '" + std::string(err.second) + "': " + err.first.message(); + std::string msg = "System Error loading '" + std::string(err.second) + + "': " + err.first.message(); throw std::runtime_error(msg); } + tree = *result; + } else { + // Single-unit path: read each file into a SourceBuffer and feed them + // to fromBuffers along with the macros inherited from prior groups. + std::vector buffers; + buffers.reserve(paths.size()); + for (const auto& p : paths) { + std::filesystem::path fpath(std::string(p.data(), p.size())); + auto buf = sourceManager.readSource(fpath, /*library=*/nullptr); + if (!buf) { + std::string msg = "System Error loading '" + fpath.string() + + "': " + buf.error().message(); + throw std::runtime_error(msg); + } + buffers.push_back(*buf); + } + tree = SyntaxTree::fromBuffers(buffers, sourceManager, options, inherited); + } - auto tree = *result; - diagClient->clear(); - diagEngine.clearIncludeStack(); + diagClient->clear(); + diagEngine.clearIncludeStack(); - bool hasErrors = false; - for (const auto& diag : tree->diagnostics()) { - hasErrors |= diag.isError(); - diagEngine.issue(diag); + bool hasErrors = false; + std::size_t errorCount = 0; + for (const auto& diag : tree->diagnostics()) { + if (diag.isError()) { + hasErrors = true; + ++errorCount; } + diagEngine.issue(diag); + } + + if (hasErrors && !lenient) { + std::string rendered = diagClient->getString(); + if (rendered.empty()) { + rendered = "Failed to parse source group"; + } + throw std::runtime_error(rendered); + } + + // Lenient mode: a parse error inside slang frequently leaves dangling / + // null pointers in the syntax tree (e.g. missing declarator, header, + // generate-block clauses) which the kg walker dereferences without null + // checks. To keep clean files in the same group available to the walker, + // we fall back to per-file parsing here: each file is parsed independently + // (accumulating `\`define`s from the prior good files in the group so the + // single-unit semantics still hold for clean files), the files that error + // are dropped, the others are kept. This matches pyslang's "best effort" + // result more closely than dropping the whole group, which previously + // hid healthy modules like ws-tensix's `tt_tensix_with_l1` because a + // sibling file in the same Bender entry pulled in an encrypted include. + if (hasErrors && lenient) { + std::vector> kept; + kept.reserve(paths.size()); + + // Per-file accumulator seeded with the macros inherited from prior + // groups (so cross-group `--single-unit` propagation is preserved + // even when a group falls back to per-file parsing). + std::vector intraGroupMacros( + inherited.begin(), inherited.end()); + + std::size_t droppedFiles = 0; + for (const auto& p : paths) { + std::filesystem::path fpath(std::string(p.data(), p.size())); + auto buf = sourceManager.readSource(fpath, /*library=*/nullptr); + if (!buf) { + std::fprintf(stderr, + "[bender-slang] lenient: skipping unreadable file '%s': %s\n", + fpath.string().c_str(), buf.error().message().c_str()); + ++droppedFiles; + continue; + } - if (hasErrors) { - std::string rendered = diagClient->getString(); - if (rendered.empty()) { - rendered = "Failed to parse '" + std::string(pathView) + "'."; + SyntaxTree::MacroList macros(intraGroupMacros.data(), intraGroupMacros.size()); + std::vector oneBuffer{*buf}; + auto oneTree = SyntaxTree::fromBuffers(oneBuffer, sourceManager, options, macros); + + bool fileHasErrors = false; + std::size_t fileErrors = 0; + for (const auto& diag : oneTree->diagnostics()) { + if (diag.isError()) { + fileHasErrors = true; + ++fileErrors; + } } - throw std::runtime_error(rendered); + + if (fileHasErrors) { + std::fprintf(stderr, + "[bender-slang] lenient: dropping file '%s' (%zu parse error(s))\n", + fpath.string().c_str(), fileErrors); + ++droppedFiles; + continue; + } + + auto fresh = oneTree->getDefinedMacros(); + intraGroupMacros.insert(intraGroupMacros.end(), fresh.begin(), fresh.end()); + kept.push_back(oneTree); } - out.push_back(tree); + std::fprintf(stderr, + "[bender-slang] lenient: source group had %zu group-level error(s); per-file fallback kept %zu/%zu file(s), dropped %zu\n", + errorCount, kept.size(), paths.size(), droppedFiles); + return kept; } - return out; + return { tree }; } // Parses a group of files with the given include paths and preprocessor defines. // Stores the resulting syntax trees and contexts in the session for later retrieval and analysis. +// In single-unit mode, macros defined by prior groups are predefined into this +// group's preprocessor and the macros newly defined here are appended to the +// session's accumulator so the next group can inherit them too. void SlangSession::parse_group(const rust::Vec& files, const rust::Vec& includes, const rust::Vec& defines) { - // Create a new context for this group of files. - auto ctx = std::make_unique(); + // The SourceManager is shared across groups (required for cross-group + // elaboration via slang::ast::Compilation). Predefines stay per-group. + auto ctx = std::make_unique(sourceManager); ctx->set_includes(includes); ctx->set_defines(defines); - // Parse the files and store the resulting syntax trees in the session. - auto parsed = ctx->parse_files(files); + SyntaxTree::MacroList inherited{}; + if (singleUnit && !accumulatedMacros.empty()) { + inherited = SyntaxTree::MacroList(accumulatedMacros.data(), accumulatedMacros.size()); + } + auto parsed = ctx->parse_files(files, inherited, lenient); + + if (singleUnit) { + // In lenient mode the failing files inside this group are dropped + // file-by-file (see `parse_files`); the clean siblings still expose + // their `\`define`s for subsequent groups via `getDefinedMacros`. + for (const auto& tree : parsed) { + auto fresh = tree->getDefinedMacros(); + accumulatedMacros.insert(accumulatedMacros.end(), fresh.begin(), fresh.end()); + } + } + allTrees.reserve(allTrees.size() + parsed.size()); for (const auto& tree : parsed) { allTrees.push_back(tree); @@ -96,6 +222,21 @@ void SlangSession::parse_group(const rust::Vec& files, const rust: contexts.push_back(std::move(ctx)); } +// Keep only the trees at the given indices and drop the rest. Indices outside +// the current `allTrees` range are silently skipped so callers can pass the +// raw output of `reachable_tree_indices` without bounds-checking. Runs in +// O(N+K): one pass over the index list and one move-assignment. +void SlangSession::retain_trees(const rust::Vec& indices) { + std::vector> kept; + kept.reserve(indices.size()); + for (auto i : indices) { + if (i < allTrees.size()) { + kept.push_back(allTrees[i]); + } + } + allTrees = std::move(kept); +} + // Returns the number of syntax trees currently stored in the session. std::size_t tree_count(const SlangSession& session) { return session.trees().size(); } diff --git a/crates/bender-slang/cpp/slang_bridge.h b/crates/bender-slang/cpp/slang_bridge.h index ac423363..b845adff 100644 --- a/crates/bender-slang/cpp/slang_bridge.h +++ b/crates/bender-slang/cpp/slang_bridge.h @@ -19,19 +19,50 @@ #include #include +// The kg walker shared types (KgPort/KgParam/KgInstance/KgImport/KgModule/ +// KgKeyValue/KgWalkResult) are emitted by cxx with their full definitions in +// bender-slang/src/lib.rs.h. We forward declare them here so the prototype of +// `walk_design` is visible to the cxx-generated bridge .cc, where the cxx +// translation unit also contains the full type definitions afterwards. +struct KgKeyValue; +struct KgParam; +struct KgPort; +struct KgInstance; +struct KgImport; +struct KgModule; +struct KgWalkResult; +struct KgInstanceContext; +struct KgElabResult; + struct SlangPrintOpts; +// A SlangContext wraps a per-group preprocessor configuration. The +// SourceManager is owned by the parent SlangSession and shared across all +// groups so the resulting SyntaxTrees can be combined into a single +// `slang::ast::Compilation` for elaboration. class SlangContext { public: - SlangContext(); + explicit SlangContext(slang::SourceManager& sm); void set_includes(const rust::Vec& includes); void set_defines(const rust::Vec& defines); - std::vector> parse_files(const rust::Vec& paths); + // Parse `paths` into a single SyntaxTree (one slang compilation unit). + // When `inherited` is non-empty, slang predefines those macros into the + // preprocessor before parsing, propagating `define`s declared in earlier + // source groups (used to emulate vcs/`vlog -mfcu` "single-unit" mode). + // When `lenient` is true, parse-time error diagnostics are reported on + // the diagnostic engine but do NOT abort: the partially built syntax + // tree is returned and downstream walks ingest whatever survived. This + // matches pyslang's best-effort policy for hostile inputs (encrypted + // vendor IP, missing includes, ...). + std::vector> + parse_files(const rust::Vec& paths, + slang::syntax::SyntaxTree::MacroList inherited, + bool lenient); private: - slang::SourceManager sourceManager; + slang::SourceManager& sourceManager; slang::parsing::PreprocessorOptions ppOptions; slang::DiagnosticEngine diagEngine; std::shared_ptr diagClient; @@ -42,11 +73,36 @@ class SlangSession { void parse_group(const rust::Vec& files, const rust::Vec& includes, const rust::Vec& defines); + // Toggle cross-group macro propagation. When true, every subsequent + // `parse_group` call inherits all `define`s collected from prior groups, + // matching `vcs` (default) / `vlog -mfcu` semantics. + void set_single_unit(bool enable) { singleUnit = enable; } + bool single_unit() const { return singleUnit; } + + // Toggle lenient (best-effort) parsing. When true, parse-time error + // diagnostics are still reported but do NOT abort the build; whatever + // syntax was successfully recovered is retained for downstream walks. + void set_lenient(bool enable) { lenient = enable; } + bool is_lenient() const { return lenient; } + const std::vector>& trees() const { return allTrees; } + slang::SourceManager& source_manager() { return sourceManager; } + + // Keep only the syntax trees at the given indices (in-place); drop the + // rest. Used by the kg pipeline to prune to the set of trees reachable + // from one or more top modules before the downstream walks. Subsequent + // `walk_design` / `walk_elaborated` calls operate on the pruned set. + void retain_trees(const rust::Vec& indices); private: + slang::SourceManager sourceManager; std::vector> contexts; std::vector> allTrees; + // Macros defined by previously-parsed groups; backed by SyntaxTree + // ownership in `allTrees` so the pointers remain valid. + std::vector accumulatedMacros; + bool singleUnit = false; + bool lenient = false; }; class SyntaxTreeRewriter { @@ -83,4 +139,14 @@ std::shared_ptr tree_at(const SlangSession& session, std::uint64_t renamed_declarations(const SyntaxTreeRewriter& rewriter); std::uint64_t renamed_references(const SyntaxTreeRewriter& rewriter); +// Walk every parsed syntax tree in `session` and emit knowledge-graph records. +// Defined in cpp/walker.cpp; the cxx bridge generates the Rust-side glue and +// the matching definition of KgWalkResult in bender-slang/src/lib.rs.h. +KgWalkResult walk_design(const SlangSession& session); + +// Build a slang::ast::Compilation from the session's parsed trees, force +// elaboration from the given tops, and harvest resolved parameter bindings +// and port widths for each elaborated InstanceSymbol. Defined in cpp/elab.cpp. +KgElabResult walk_elaborated(const SlangSession& session, const rust::Vec& tops); + #endif // BENDER_SLANG_BRIDGE_H diff --git a/crates/bender-slang/cpp/walker.cpp b/crates/bender-slang/cpp/walker.cpp new file mode 100644 index 00000000..2bcfd02f --- /dev/null +++ b/crates/bender-slang/cpp/walker.cpp @@ -0,0 +1,441 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +// Knowledge-graph walker. Traverses every parsed SyntaxTree in a SlangSession +// and emits structured records describing each declared module/interface/ +// package. +// +// This is the C++ counterpart of `bender_slang::walk_design`; the kg.v3 +// data contract is owned by `bender-kg-models`. + +#include "slang_bridge.h" + +#include "slang/syntax/AllSyntax.h" +#include "slang/syntax/SyntaxKind.h" +#include "slang/syntax/SyntaxNode.h" +#include "slang/syntax/SyntaxTree.h" +#include "slang/syntax/SyntaxVisitor.h" +#include "slang/text/SourceLocation.h" +#include "slang/text/SourceManager.h" +#include "slang/util/Util.h" + +#include "bender-slang/src/lib.rs.h" + +#include +#include +#include +#include +#include + +using namespace slang; +using namespace slang::syntax; +using namespace slang::parsing; + +namespace { + +// --- helpers ---------------------------------------------------------------- + +std::string trim_text(std::string_view sv) { + auto start = sv.find_first_not_of(" \t\r\n"); + auto end = sv.find_last_not_of(" \t\r\n"); + if (start == std::string::npos) return {}; + return std::string(sv.substr(start, end - start + 1)); +} + +std::string node_text(const SyntaxNode& node) { + std::string out; + auto sr = node.toString(); + out.assign(sr.data(), sr.size()); + return trim_text(out); +} + +// Resolve start/end line numbers for a syntax node via its SourceManager. +void node_lines(const SyntaxNode& node, const SourceManager& sm, std::int64_t& start_line, + std::int64_t& end_line) { + auto range = node.sourceRange(); + auto s = range.start(); + auto e = range.end(); + if (s) { + start_line = static_cast(sm.getLineNumber(s)); + } + if (e) { + end_line = static_cast(sm.getLineNumber(e)); + } +} + +std::string node_file(const SyntaxNode& node, const SourceManager& sm) { + auto range = node.sourceRange(); + auto s = range.start(); + if (!s) return {}; + auto path = sm.getFullPath(s.buffer()); + return path.string(); +} + +// Get token text safely. +std::string tok_text(Token t) { + if (!t.valueText().empty()) { + return std::string(t.valueText()); + } + return std::string(t.rawText()); +} + +// Parameter kind classification, kept in lockstep with the kg.v3 IR. +rust::String classify_param_kind(const std::string& type_text, bool is_type_param) { + if (is_type_param) return rust::String("type"); + auto t = type_text; + std::transform(t.begin(), t.end(), t.begin(), + [](unsigned char c) { return static_cast(std::tolower(c)); }); + if (t.find("string") != std::string::npos) return rust::String("string"); + if (t.find("int") != std::string::npos || t.find("integer") != std::string::npos) + return rust::String("int"); + if (t.find("bit") != std::string::npos || t.find("logic") != std::string::npos || + t.find("reg") != std::string::npos) + return rust::String("bit"); + return rust::String("other"); +} + +rust::String dir_to_str(TokenKind kind) { + switch (kind) { + case TokenKind::InputKeyword: + return rust::String("input"); + case TokenKind::OutputKeyword: + return rust::String("output"); + case TokenKind::InOutKeyword: + return rust::String("inout"); + case TokenKind::RefKeyword: + return rust::String("ref"); + default: + return rust::String("input"); + } +} + +// Walk a parameter port list and emit ParamRecords. +void collect_parameters(const ParameterPortListSyntax* params, rust::Vec& out) { + if (!params) return; + for (const auto* decl : params->declarations) { + if (auto* td = decl->as_if()) { + for (const auto* td_decl : td->declarators) { + KgParam p; + p.name = rust::String(tok_text(td_decl->name)); + p.kind = rust::String("type"); + p.is_type_param = true; + if (td_decl->assignment) { + p.default_value = rust::String(node_text(*td_decl->assignment->type)); + } + out.push_back(std::move(p)); + } + } else if (auto* pd = decl->as_if()) { + std::string type_text = pd->type ? node_text(*pd->type) : std::string(); + for (const auto* p_decl : pd->declarators) { + KgParam p; + p.name = rust::String(tok_text(p_decl->name)); + p.kind = classify_param_kind(type_text, false); + p.is_type_param = false; + if (p_decl->initializer) { + p.default_value = rust::String(node_text(*p_decl->initializer->expr)); + } + out.push_back(std::move(p)); + } + } + } +} + +// Walk an ANSI port list and emit PortRecords. +void collect_ports(const PortListSyntax* ports, rust::Vec& out) { + if (!ports) return; + auto* ansi = ports->as_if(); + if (!ansi) { + // Non-ANSI port lists are still walkable but shapes are different. For + // parity v1 we emit a placeholder port list so consumers can detect. + for (size_t i = 0; i < ports->getChildCount(); ++i) { + (void)ports->childNode(i); + } + return; + } + std::string current_dir = "input"; + std::string current_type; + for (const auto* port : ansi->ports) { + if (auto* impl = port->as_if()) { + if (impl->header) { + if (auto* vh = impl->header->as_if()) { + if (vh->direction.kind != TokenKind::Unknown) { + current_dir = std::string(dir_to_str(vh->direction.kind).data(), + dir_to_str(vh->direction.kind).size()); + } + current_type = vh->dataType ? node_text(*vh->dataType) : std::string(); + } else if (auto* nh = impl->header->as_if()) { + if (nh->direction.kind != TokenKind::Unknown) { + current_dir = std::string(dir_to_str(nh->direction.kind).data(), + dir_to_str(nh->direction.kind).size()); + } + current_type = nh->dataType ? node_text(*nh->dataType) : std::string(); + } + } + KgPort p; + p.name = rust::String(tok_text(impl->declarator->name)); + p.direction = rust::String(current_dir); + p.type_str = rust::String(current_type); + // dimensions -> width_expr + if (!impl->declarator->dimensions.empty()) { + std::string dims; + for (const auto* d : impl->declarator->dimensions) { + dims += node_text(*d); + } + p.width_expr = rust::String(dims); + } + p.bit_width = -1; + p.is_type_param = false; + out.push_back(std::move(p)); + } else if (auto* expl = port->as_if()) { + KgPort p; + p.name = rust::String(tok_text(expl->name)); + p.direction = rust::String(current_dir); + p.type_str = rust::String(current_type); + p.bit_width = -1; + p.is_type_param = false; + out.push_back(std::move(p)); + } + } +} + +// Walk a HierarchyInstantiation (e.g. `tt_fpu_v2 #(...) u_fpu(...)`) and emit a +// KgInstance for every named instance. +void collect_instances(const HierarchyInstantiationSyntax& inst, const SourceManager& sm, + rust::Vec& out) { + std::string module_name = tok_text(inst.type); + // Param assignments, e.g. #(.WIDTH(32)) or #(32, 16). + rust::Vec param_bindings; + if (inst.parameters) { + if (auto* pa = inst.parameters->as_if()) { + int positional = 0; + for (const auto* p : pa->parameters) { + if (auto* named = p->as_if()) { + KgKeyValue kv; + kv.key = rust::String(tok_text(named->name)); + kv.value = + rust::String(named->expr ? node_text(*named->expr) : std::string()); + param_bindings.push_back(std::move(kv)); + } else if (auto* ord = p->as_if()) { + KgKeyValue kv; + kv.key = rust::String("$" + std::to_string(positional)); + kv.value = + rust::String(ord->expr ? node_text(*ord->expr) : std::string()); + param_bindings.push_back(std::move(kv)); + ++positional; + } + } + } + } + for (const auto* h_inst : inst.instances) { + if (!h_inst) continue; + KgInstance kgi; + kgi.module_name = rust::String(module_name); + if (h_inst->decl) { + kgi.instance_name = rust::String(tok_text(h_inst->decl->name)); + } else { + kgi.instance_name = rust::String(""); + } + kgi.line_start = -1; + kgi.line_end = -1; + node_lines(*h_inst, sm, kgi.line_start, kgi.line_end); + // Copy param bindings. + for (const auto& kv : param_bindings) { + KgKeyValue copy; + copy.key = kv.key; + copy.value = kv.value; + kgi.param_bindings.push_back(std::move(copy)); + } + // Port connections, if any. + { + int positional = 0; + for (const auto* conn : h_inst->connections) { + if (auto* named = conn->as_if()) { + KgKeyValue kv; + kv.key = rust::String(tok_text(named->name)); + kv.value = rust::String( + named->expr ? node_text(*named->expr) : std::string()); + kgi.port_bindings.push_back(std::move(kv)); + } else if (auto* ord = conn->as_if()) { + KgKeyValue kv; + kv.key = rust::String("$" + std::to_string(positional)); + kv.value = + rust::String(ord->expr ? node_text(*ord->expr) : std::string()); + kgi.port_bindings.push_back(std::move(kv)); + ++positional; + } + } + } + out.push_back(std::move(kgi)); + } +} + +// Walk a member list looking for instantiations and imports. Recurses into +// generate-blocks to capture conditional/looped instantiations. +void scan_member_list(const SyntaxList& members, const SourceManager& sm, + rust::Vec& insts, rust::Vec& imports); + +void scan_member(const MemberSyntax& m, const SourceManager& sm, + rust::Vec& insts, rust::Vec& imports) { + if (auto* hi = m.as_if()) { + collect_instances(*hi, sm, insts); + } else if (auto* pi = m.as_if()) { + for (const auto* item : pi->items) { + if (!item) continue; + KgImport imp; + imp.package_name = rust::String(tok_text(item->package)); + imp.is_wildcard = item->item.kind == TokenKind::Star; + if (!imp.is_wildcard) { + rust::String sym(tok_text(item->item)); + imp.specific_symbols.push_back(std::move(sym)); + } + imports.push_back(std::move(imp)); + } + } else if (auto* gen_blk = m.as_if()) { + scan_member_list(gen_blk->members, sm, insts, imports); + } else if (auto* if_gen = m.as_if()) { + if (if_gen->block) scan_member(*if_gen->block, sm, insts, imports); + } else if (auto* loop_gen = m.as_if()) { + if (loop_gen->block) scan_member(*loop_gen->block, sm, insts, imports); + } else if (auto* case_gen = m.as_if()) { + for (const auto* item : case_gen->items) { + if (!item) continue; + if (auto* def = item->as_if()) { + if (def->clause) { + if (auto* mb = def->clause->as_if()) { + scan_member(*mb, sm, insts, imports); + } + } + } else if (auto* std_item = item->as_if()) { + if (std_item->clause) { + if (auto* mb = std_item->clause->as_if()) { + scan_member(*mb, sm, insts, imports); + } + } + } + } + } else if (auto* gen_region = m.as_if()) { + scan_member_list(gen_region->members, sm, insts, imports); + } +} + +void scan_member_list(const SyntaxList& members, const SourceManager& sm, + rust::Vec& insts, rust::Vec& imports) { + for (const auto* m : members) { + if (!m) continue; + scan_member(*m, sm, insts, imports); + } +} + +KgModule build_module(const ModuleDeclarationSyntax& m, const SourceManager& sm, + bool is_package) { + KgModule out; + out.is_package = is_package; + out.is_interface = false; + out.line_start = -1; + out.line_end = -1; + out.param_block_start = -1; + out.param_block_end = -1; + out.port_block_start = -1; + out.port_block_end = -1; + // Header may be null when the parser bailed mid-declaration (e.g. lenient + // mode on a partially malformed unit). Guard every access so we can still + // emit a placeholder module record and let downstream walks skip it. + if (m.header) { + out.name = rust::String(tok_text(m.header->name)); + } else { + out.name = rust::String(""); + } + node_lines(m, sm, out.line_start, out.line_end); + out.file_path = rust::String(node_file(m, sm)); + + // Doc comments would require token trivia inspection; leave empty for v1. + // Description can be filled by a later pass that reads comments. + + if (m.header && m.header->parameters) { + std::int64_t s = -1, e = -1; + node_lines(*m.header->parameters, sm, s, e); + out.param_block_start = s; + out.param_block_end = e; + collect_parameters(m.header->parameters, out.parameters); + } + if (m.header && m.header->ports) { + std::int64_t s = -1, e = -1; + node_lines(*m.header->ports, sm, s, e); + out.port_block_start = s; + out.port_block_end = e; + collect_ports(m.header->ports, out.ports); + } + + // Walk members for instantiations/imports (only meaningful for non-packages). + scan_member_list(m.members, sm, out.instantiations, out.imports); + return out; +} + +void walk_tree(const std::shared_ptr& tree, KgWalkResult& out) { + if (!tree) return; + auto& root = tree->root(); + auto& sm = tree->sourceManager(); + auto* unit = root.as_if(); + if (!unit) return; + + // Pre-pass: collect compilation-unit-scope imports. Per SV LRM these are + // implicitly visible to all modules in the same compilation unit, so we + // attach them to every module-like record we emit from this tree. + rust::Vec unit_imports; + { + rust::Vec _scratch_insts; + for (const auto* member : unit->members) { + if (!member) continue; + if (auto* pi = member->as_if()) { + for (const auto* item : pi->items) { + if (!item) continue; + KgImport imp; + imp.package_name = rust::String(tok_text(item->package)); + imp.is_wildcard = item->item.kind == TokenKind::Star; + if (!imp.is_wildcard) { + rust::String sym(tok_text(item->item)); + imp.specific_symbols.push_back(std::move(sym)); + } + unit_imports.push_back(std::move(imp)); + } + } + } + (void)_scratch_insts; + } + + for (const auto* member : unit->members) { + if (!member) continue; + if (auto* m = member->as_if()) { + bool is_package = m->kind == SyntaxKind::PackageDeclaration; + // Interfaces also go through ModuleDeclarationSyntax under slang. + bool is_interface = m->kind == SyntaxKind::InterfaceDeclaration; + KgModule rec = build_module(*m, sm, is_package); + rec.is_interface = is_interface; + // Merge in file-level imports. + for (const auto& imp : unit_imports) { + KgImport copy; + copy.package_name = imp.package_name; + copy.is_wildcard = imp.is_wildcard; + for (const auto& s : imp.specific_symbols) { + copy.specific_symbols.push_back(rust::String(s)); + } + rec.imports.push_back(std::move(copy)); + } + out.modules.push_back(std::move(rec)); + } + } +} + +} // namespace + +KgWalkResult walk_design(const SlangSession& session) { + KgWalkResult out; + for (const auto& tree : session.trees()) { + try { + walk_tree(tree, out); + } catch (const std::exception& ex) { + out.warnings.push_back(rust::String(std::string("walk error: ") + ex.what())); + } + } + return out; +} diff --git a/crates/bender-slang/src/lib.rs b/crates/bender-slang/src/lib.rs index 6930abb5..fde72227 100644 --- a/crates/bender-slang/src/lib.rs +++ b/crates/bender-slang/src/lib.rs @@ -20,6 +20,8 @@ pub enum SlangError { TreeAccess { message: String }, #[error("Failed to rewrite syntax trees: {message}")] Rewrite { message: String }, + #[error("Failed to walk design: {message}")] + Walk { message: String }, } #[derive(Debug, Clone, Copy, Default)] @@ -39,6 +41,116 @@ mod ffi { squash_newlines: bool, } + /// A simple ordered key/value pair shared with C++. Used for parameter and + /// port bindings on instantiations to preserve declaration order. + #[derive(Clone)] + struct KgKeyValue { + key: String, + value: String, + } + + /// Knowledge-graph parameter record. + #[derive(Clone)] + struct KgParam { + name: String, + kind: String, + default_value: String, + is_type_param: bool, + } + + /// Knowledge-graph port record. + #[derive(Clone)] + struct KgPort { + name: String, + direction: String, + type_str: String, + width_expr: String, + bit_width: i64, + is_type_param: bool, + } + + /// Knowledge-graph instantiation record. + #[derive(Clone)] + struct KgInstance { + module_name: String, + instance_name: String, + param_bindings: Vec, + port_bindings: Vec, + line_start: i64, + line_end: i64, + } + + /// Knowledge-graph package import record. + #[derive(Clone)] + struct KgImport { + package_name: String, + is_wildcard: bool, + specific_symbols: Vec, + } + + /// Knowledge-graph module record. + #[derive(Clone)] + struct KgModule { + name: String, + file_path: String, + is_package: bool, + is_interface: bool, + line_start: i64, + line_end: i64, + param_block_start: i64, + param_block_end: i64, + port_block_start: i64, + port_block_end: i64, + parameters: Vec, + ports: Vec, + instantiations: Vec, + imports: Vec, + } + + /// Result of `walk_design`. + #[derive(Clone)] + struct KgWalkResult { + modules: Vec, + warnings: Vec, + } + + /// Resolved bit width for a single port. `total` is the type's + /// `getBitWidth()`; `fields` carries a dot-flattened breakdown across + /// nested packed structs / unions and is empty for scalar ports. + /// + /// `element_count > 0` indicates the canonical type is a packed array + /// (`req_t [N-1:0]`), in which case `element_total` and `element_fields` + /// describe one array element's layout (recursively flattened). Cxx + /// bridge types can't easily nest, so the array template is kept flat + /// here and folded into the typed model on the Rust side. + #[derive(Clone)] + struct KgPortWidth { + name: String, + total: i64, + fields: Vec, + element_count: i64, + element_total: i64, + element_fields: Vec, + } + + /// Per-instance context produced by the elaborated walk: resolved + /// parameter values and port widths in the parent module's frame. + #[derive(Clone)] + struct KgInstanceContext { + parent_module: String, + instance_name: String, + child_module: String, + param_bindings: Vec, + port_widths: Vec, + } + + /// Result of `walk_elaborated`. + #[derive(Clone)] + struct KgElabResult { + contexts: Vec, + warnings: Vec, + } + unsafe extern "C++" { include!("bender-slang/cpp/slang_bridge.h"); include!("slang/syntax/SyntaxTree.h"); @@ -53,6 +165,18 @@ mod ffi { fn new_slang_session() -> UniquePtr; + /// Toggle cross-group macro propagation. When enabled (before any + /// `parse_group` call), `\`define`s declared in earlier groups are + /// inherited by later groups, mirroring vcs / `vlog -mfcu` semantics. + fn set_single_unit(self: Pin<&mut SlangSession>, enable: bool); + + /// Toggle lenient parsing. When enabled, parse-time error + /// diagnostics are still reported but do NOT abort the build; the + /// indexer ingests whichever modules survived parsing. Useful for + /// repos with encrypted vendor IP, missing `\`include`s, or other + /// hostile inputs that still admit a partial graph. + fn set_lenient(self: Pin<&mut SlangSession>, enable: bool); + fn parse_group( self: Pin<&mut SlangSession>, files: &Vec, @@ -62,6 +186,12 @@ mod ffi { fn reachable_tree_indices(session: &SlangSession, tops: &Vec) -> Result>; + /// Keep only the trees at the given indices (in-place); drop the rest. + /// Used by the kg pipeline to prune to the set of trees reachable from + /// a top module before the downstream walks. Subsequent `walk_design` + /// / `walk_elaborated` calls operate on the pruned set automatically. + fn retain_trees(self: Pin<&mut SlangSession>, indices: &Vec); + fn tree_count(session: &SlangSession) -> usize; fn tree_at(session: &SlangSession, index: usize) -> Result>; @@ -84,14 +214,39 @@ mod ffi { fn print_tree(tree: SharedPtr, options: SlangPrintOpts) -> String; fn dump_tree_json(tree: SharedPtr) -> String; + + /// Walk the parsed design, extracting structured records suitable for + /// building a knowledge graph. Returns one [`KgModule`] per declared + /// module/interface/package across all parsed source groups. + fn walk_design(session: &SlangSession) -> Result; + + /// Build a slang `Compilation` from the session's parsed trees, force + /// elaboration from the requested top modules, and harvest resolved + /// per-instance parameter bindings and port widths. + fn walk_elaborated(session: &SlangSession, tops: &Vec) -> Result; } } +pub use ffi::{ + KgElabResult, KgImport, KgInstance, KgInstanceContext, KgKeyValue, KgModule, KgParam, KgPort, + KgPortWidth, KgWalkResult, +}; + /// Public owner for all parsed trees and parse contexts. pub struct SlangSession { inner: UniquePtr, } +// SAFETY: the underlying C++ `SlangSession` owns all of its state through +// either `std::vector` or `std::shared_ptr` and is never aliased across +// threads internally. We use it to run `walk_elaborated` on a worker +// thread while the main thread drives Grafeo writes, so the only +// thread-related operation is moving the unique-pointer wrapper between +// threads — that's safe so long as both threads observe a happens-before +// boundary (the `std::thread::Builder::spawn` / scope-thread join calls +// already provide one). +unsafe impl Send for SlangSession {} + /// Borrowed syntax-tree handle tied to the owning session lifetime. pub struct SyntaxTree<'a> { inner: SharedPtr, @@ -148,6 +303,22 @@ impl SlangSession { } } + /// Toggle cross-group macro propagation. Call before any `parse_group` + /// call to enable vcs-style "single compilation unit" semantics, where + /// `` `define ``s declared in earlier groups are visible to later ones. + pub fn set_single_unit(&mut self, enable: bool) { + self.inner.pin_mut().set_single_unit(enable); + } + + /// Toggle lenient (best-effort) parsing. When enabled, parse-time error + /// diagnostics are not fatal: the indexer ingests whichever modules + /// survived parsing. Mirrors pyslang's default policy and lets `bender + /// kg build` survive repos with encrypted vendor IP or unsatisfied + /// includes. + pub fn set_lenient(&mut self, enable: bool) { + self.inner.pin_mut().set_lenient(enable); + } + /// Parses one source group with scoped include directories and defines. pub fn parse_group( &mut self, @@ -208,6 +379,15 @@ impl SlangSession { Ok(out) } + /// Prune the session's parsed trees to the given indices. After this + /// call, every other API on the session (`tree_count`, `all_trees`, + /// `walk_design`, `walk_elaborated`, ...) operates on the retained + /// subset only. Out-of-range indices are silently skipped. + pub fn retain_trees(&mut self, indices: &[u32]) { + let owned: Vec = indices.to_vec(); + self.inner.pin_mut().retain_trees(&owned); + } + /// Returns a handle to the syntax tree at the given index. pub fn tree(&self, index: usize) -> Result> { Ok(SyntaxTree { @@ -219,6 +399,26 @@ impl SlangSession { _session: PhantomData, }) } + + /// Walks every parsed syntax tree and emits structured records suitable + /// for building a knowledge graph (module/package/interface declarations, + /// instantiations, ports, parameters, and imports). + pub fn walk_design(&self) -> Result { + ffi::walk_design(self.inner.as_ref().unwrap()).map_err(|cause| SlangError::Walk { + message: cause.to_string(), + }) + } + + /// Force elaboration from the given top modules and emit per-instance + /// resolved parameter bindings and port widths. + pub fn walk_elaborated(&self, tops: &[String]) -> Result { + let tops_vec = tops.to_vec(); + ffi::walk_elaborated(self.inner.as_ref().unwrap(), &tops_vec).map_err(|cause| { + SlangError::Walk { + message: cause.to_string(), + } + }) + } } impl Default for SlangSession { diff --git a/crates/bender-slang/tests/basic.rs b/crates/bender-slang/tests/basic.rs index 5b405f26..9cd9c64b 100644 --- a/crates/bender-slang/tests/basic.rs +++ b/crates/bender-slang/tests/basic.rs @@ -94,3 +94,85 @@ fn rewriter_build_from_trees_is_repeatable() { .contains("module p_top_s (") ); } + +#[test] +fn walk_design_extracts_modules_packages_and_instantiations() { + let mut session = bender_slang::SlangSession::new(); + let files = vec![ + fixture_path("src/common_pkg.sv"), + fixture_path("src/bus_intf.sv"), + fixture_path("src/leaf.sv"), + fixture_path("src/core.sv"), + fixture_path("src/top.sv"), + ]; + let includes = vec![fixture_path("include")]; + let defines = vec![]; + session + .parse_group(&files, &includes, &defines) + .expect("parse should succeed"); + + let result = session.walk_design().expect("walk should succeed"); + + // Expect one record per module/package/interface declaration. + let by_name: std::collections::BTreeMap<_, _> = + result.modules.iter().map(|m| (m.name.clone(), m)).collect(); + + let common_pkg = by_name + .get("common_pkg") + .expect("common_pkg package should be present"); + assert!( + common_pkg.is_package, + "common_pkg should be flagged as a package" + ); + + let bus_intf = by_name + .get("bus_intf") + .expect("bus_intf interface should be present"); + assert!( + bus_intf.is_interface, + "bus_intf should be flagged as an interface" + ); + + let leaf = by_name.get("leaf").expect("leaf module should be present"); + assert!(!leaf.is_package && !leaf.is_interface); + + let core = by_name.get("core").expect("core module should be present"); + let core_params: Vec<&str> = core.parameters.iter().map(|p| p.name.as_str()).collect(); + assert!( + core_params.contains(&"DefaultState"), + "core should have DefaultState parameter, got {core_params:?}" + ); + let core_insts: Vec<&str> = core + .instantiations + .iter() + .map(|i| i.module_name.as_str()) + .collect(); + assert!( + core_insts.contains(&"leaf"), + "core should instantiate leaf, got {core_insts:?}" + ); + + let top = by_name.get("top").expect("top module should be present"); + let inst_modules: Vec<&str> = top + .instantiations + .iter() + .map(|i| i.module_name.as_str()) + .collect(); + assert!( + inst_modules.contains(&"core"), + "top should instantiate core, got {inst_modules:?}" + ); + assert!( + inst_modules.contains(&"bus_intf"), + "top should instantiate bus_intf, got {inst_modules:?}" + ); + let imports: Vec<&str> = top + .imports + .iter() + .map(|i| i.package_name.as_str()) + .collect(); + assert!( + imports.contains(&"common_pkg"), + "top should import common_pkg, got {imports:?}" + ); +} diff --git a/src/cli.rs b/src/cli.rs index ece76a5c..3ed03bc9 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -112,6 +112,9 @@ enum Commands { Audit(cmd::audit::AuditArgs), #[cfg(feature = "slang")] Pickle(cmd::pickle::PickleArgs), + /// Build, query, and serve the local knowledge graph. + #[cfg(feature = "slang")] + Kg(cmd::kg::KgArgs), #[command(external_subcommand)] Plugin(Vec), } @@ -338,6 +341,8 @@ pub fn main() -> Result<()> { Commands::Audit(args) => cmd::audit::run(&sess, &args), #[cfg(feature = "slang")] Commands::Pickle(args) => cmd::pickle::run(&sess, args), + #[cfg(feature = "slang")] + Commands::Kg(args) => cmd::kg::run(&sess, args), Commands::Plugin(args) => { let (plugin_name, plugin_args) = args .split_first() diff --git a/src/cmd.rs b/src/cmd.rs index bbae6227..60159ecd 100644 --- a/src/cmd.rs +++ b/src/cmd.rs @@ -16,6 +16,8 @@ pub mod completion; pub mod config; pub mod fusesoc; pub mod init; +#[cfg(feature = "slang")] +pub mod kg; pub mod packages; pub mod parents; pub mod path; diff --git a/src/cmd/kg.rs b/src/cmd/kg.rs new file mode 100644 index 00000000..d9ef9a94 --- /dev/null +++ b/src/cmd/kg.rs @@ -0,0 +1,1966 @@ +// Copyright (c) 2026 ETH Zurich +// Alessandro Ottaviano + +//! The `kg` subcommand: build, query, and serve the local knowledge graph. +//! +//! Reuses the existing Bender source resolution (the same path the `script` +//! and `sources` subcommands use), drives `bender-slang` to parse and walk +//! the design, persists the result into a single Grafeo file (graph + +//! HNSW vectors + BM25 text index), and exposes both a typed CLI and an +//! MCP stdio adapter. + +#![allow(missing_docs)] + +use std::collections::{BTreeMap, BTreeSet}; +use std::fmt; +use std::fmt::Write as _; +use std::io::Write; +use std::path::{Path, PathBuf}; + +use clap::{Args, Subcommand, ValueEnum}; +use indexmap::{IndexMap, IndexSet}; +use miette::{Context as _, IntoDiagnostic as _}; +use owo_colors::{OwoColorize, Stream}; +use tokio::runtime::Runtime; + +use bender_kg_core::{CoreConfig, Engine}; +use bender_kg_extract::{ExtractInputs, SourceGroupInput}; + +use crate::Result; +use crate::cmd::sources::get_passed_targets; +use crate::config::{Validate, ValidationContext}; +use crate::sess::{Session, SessionIo}; +use crate::src::{SourceFile, SourceType}; +use crate::target::TargetSet; + +#[derive(Args, Debug)] +pub struct KgArgs { + #[command(subcommand)] + pub command: KgCommands, +} + +#[derive(Subcommand, Debug)] +pub enum KgCommands { + /// Build the knowledge graph end-to-end (extract -> index). + Build(BuildArgs), + /// Run extraction only and write `kg.v3` IR to a JSONL file. + Parse(ParseArgs), + /// Index a previously-produced IR JSONL into the local graph store. + Index(IndexArgs), + /// Query the graph; mirrors the MCP tool surface. + Query(QueryArgs), + /// Drop a single design's data (or everything with `--all`). + Clear(ClearArgs), + /// Print summary statistics. + Stats(StatsArgs), + /// Run the stdio MCP server. + Mcp(McpArgs), +} + +/// Base arguments shared by all kg subcommands. +#[derive(Args, Debug, Clone)] +pub struct BaseKgArgs { + /// Root directory for kg artifacts (defaults to `/.bender-kg`). + #[arg(long, env = "BENDER_KG_ROOT")] + pub root: Option, + /// Output format (tree for human-readable, json for scripts/LLM). + #[arg(long, value_enum, default_value_t = OutputFormat::Tree)] + pub format: OutputFormat, +} + +/// Build configuration arguments for commands that build/index the knowledge graph. +#[derive(Args, Debug, Clone)] +pub struct BuildConfigArgs { + /// Skip the embedding step (faster builds, but disables `kg search`). + #[arg(long)] + pub no_embed: bool, + /// Embedding dimensionality used by the deterministic-fallback embedder. + #[arg(long, default_value_t = bender_kg_similarity::DEFAULT_DIM as u64)] + pub embed_dim: u64, + /// Maximum rows per UNWIND batch when upserting modules and edges into + /// the Grafeo store. Larger = fewer Cypher round-trips, more memory + /// per call. Default 4096 is tuned for ~1.7k-module designs; set + /// lower on memory-tight hosts. + #[arg(long, default_value_t = 4096, value_name = "N")] + pub upsert_chunk_size: u32, + /// Disable the parallel pipeline that overlaps slang's `walk_elaborated` + /// with the base graph upsert when `--elab` is on. Falls back to running + /// the two phases sequentially. Mostly useful for debugging; the parallel + /// path is correctness-equivalent. + #[arg(long)] + pub no_pipeline_elab: bool, +} + +#[derive(Args, Debug, Clone)] +pub struct ResolutionArgs { + /// Select specific target from Bender.yml (repeatable). + #[arg(short, long, action = clap::ArgAction::Append)] + pub target: Vec, + /// Select specific package (repeatable). + #[arg(short = 'p', long, action = clap::ArgAction::Append)] + pub package: Vec, + /// Exclude package from dependency resolution (repeatable). + #[arg(long, action = clap::ArgAction::Append)] + pub exclude: Vec, + /// Don't include dependencies, only direct sources. + #[arg(long)] + pub no_deps: bool, + /// Include directory for SystemVerilog `include directives. + #[arg(short = 'I', action = clap::ArgAction::Append)] + pub include_dir: Vec, + /// Define macro for SystemVerilog preprocessing. + #[arg(short = 'D', action = clap::ArgAction::Append)] + pub define: Vec, + /// One or more elaboration roots. REQUIRED for `kg build` / `kg parse` + /// / `kg index`. The graph is pruned to only the syntax trees reachable + /// from these tops (via slang's symbol-reference graph) before the + /// downstream walk, so the resulting graph captures exactly the modules + /// used by the design. Repeatable; pass once per root. + #[arg( + long = "top", + action = clap::ArgAction::Append, + value_name = "MODULE", + required = true + )] + pub top: Vec, + /// Run slang's elaboration pass from `--top` and enrich + /// `InstantiationInfo` with `resolved_param_values` and + /// `resolved_port_widths`. Off by default (skips a costly Compilation + /// build). Orthogonal to `--top`: pruning still happens regardless. + #[arg(long)] + pub elab: bool, + /// Design identifier for multi-design workspaces. + #[arg(long)] + pub design: Option, + /// Treat all source groups as one slang compilation unit (vcs / `vlog + /// -mfcu` semantics). `\`define`s declared in earlier groups become + /// visible to later groups, which lets cross-package macro use parse + /// without per-file `\`include`s. Off by default; enable when a build + /// fails on `unknown macro or compiler directive` errors that point to + /// macros defined in another Bender package. + #[arg(long)] + pub single_unit: bool, + /// Best-effort parsing: report parse-time errors but don't abort the + /// build. The indexer ingests whichever modules survived parsing. + /// Useful for repos with encrypted vendor IP (`\`protect`), unsatisfied + /// `\`include`s, or other hostile inputs that still admit a partial + /// graph. Off by default (strict). + #[arg(long, alias = "keep-going")] + pub lenient: bool, +} + +#[derive(Args, Debug)] +pub struct BuildArgs { + #[command(flatten)] + pub base: BaseKgArgs, + #[command(flatten)] + pub build_config: BuildConfigArgs, + #[command(flatten)] + pub res: ResolutionArgs, +} + +#[derive(Args, Debug)] +pub struct ParseArgs { + #[command(flatten)] + pub base: BaseKgArgs, + /// Skip the embedding step (faster builds, but disables `kg search`). + #[arg(long)] + pub no_embed: bool, + /// Embedding dimensionality used by the deterministic-fallback embedder. + #[arg(long, default_value_t = bender_kg_similarity::DEFAULT_DIM as u64)] + pub embed_dim: u64, + /// Disable the parallel pipeline that overlaps slang's `walk_elaborated` + /// with the base graph upsert when `--elab` is on. + #[arg(long)] + pub no_pipeline_elab: bool, + #[command(flatten)] + pub res: ResolutionArgs, + /// Output file path for extracted JSONL. + #[arg(short, long)] + pub output: Option, +} + +#[derive(Args, Debug)] +pub struct IndexArgs { + #[command(flatten)] + pub base: BaseKgArgs, + /// Skip the embedding step (faster builds, but disables `kg search`). + #[arg(long)] + pub no_embed: bool, + /// Embedding dimensionality used by the deterministic-fallback embedder. + #[arg(long, default_value_t = bender_kg_similarity::DEFAULT_DIM as u64)] + pub embed_dim: u64, + /// Maximum rows per UNWIND batch when upserting modules and edges. + #[arg(long, default_value_t = 4096, value_name = "N")] + pub upsert_chunk_size: u32, + /// Input JSONL file path to index. + #[arg(short, long)] + pub input: Option, +} + +#[derive(Args, Debug)] +pub struct ClearArgs { + #[command(flatten)] + pub base: BaseKgArgs, + /// Design to clear from graph. + #[arg(long)] + pub design: Option, + /// Clear all designs from graph. + #[arg(long)] + pub all: bool, +} + +#[derive(Args, Debug)] +pub struct StatsArgs { + #[command(flatten)] + pub base: BaseKgArgs, + /// Show statistics for specific design. + #[arg(long)] + pub design: Option, +} + +#[derive(Args, Debug)] +pub struct McpArgs { + #[command(flatten)] + pub base: BaseKgArgs, +} + +#[derive(Args, Debug)] +pub struct QueryArgs { + #[command(flatten)] + pub base: BaseKgArgs, + #[command(subcommand)] + pub op: QueryOp, +} + +#[derive(ValueEnum, Debug, Clone, Copy, PartialEq, Eq)] +pub enum OutputFormat { + /// Compact single-line JSON; ideal for piping into `jq` or LLM consumption. + Json, + /// Human-friendly tree format (default). For pretty JSON, pipe json format to `jq`. + Tree, +} + +#[derive(Subcommand, Debug)] +pub enum QueryOp { + SearchModules { + query: String, + #[arg(long, default_value_t = 15)] + top_k: usize, + #[arg(long)] + design: Option, + }, + GetModule { + name: String, + }, + GetSubgraph { + name: String, + #[arg(long, default_value_t = 3)] + depth: i32, + }, + GetInstanceContext { + parent: String, + child: String, + }, + GetParents { + name: String, + }, + GetChildren { + name: String, + }, + GetPorts { + name: String, + }, + FindByProtocol { + protocol: String, + #[arg(long)] + design: Option, + }, + GetSourceSnippet { + module_name: String, + #[arg(long, default_value = "module")] + element: String, + #[arg(long, default_value = "")] + instance_name: String, + }, + TraceHierarchyPath { + from_module: String, + to_module: String, + }, + CheckConnectivity { + module_name: String, + #[arg(long, default_value_t = 1)] + depth: i32, + }, + TraceParameter { + module_name: String, + param_name: String, + /// Follow parameter propagation recursively through the hierarchy. + #[arg(long)] + recursive: bool, + /// Maximum recursion depth (only active with --recursive). + #[arg(long, default_value_t = 5, value_name = "N")] + depth: i32, + }, + TraceSignal { + module_name: String, + signal_name: String, + /// Follow signal connections recursively through the hierarchy. + #[arg(long)] + recursive: bool, + /// Maximum recursion depth (only active with --recursive). + #[arg(long, default_value_t = 5, value_name = "N")] + depth: i32, + }, + MatchInterfaces { + module_a: String, + module_b: String, + #[arg(long, default_value = "")] + prefix_a: String, + #[arg(long, default_value = "")] + prefix_b: String, + }, + FindStructurallySimilar { + module_name: String, + #[arg(long, default_value_t = 0.3)] + min_overlap: f64, + #[arg(long)] + design: Option, + }, +} + +pub fn run(sess: &Session, args: KgArgs) -> Result<()> { + let workspace = sess.root.to_path_buf(); + match args.command { + KgCommands::Build(a) => run_build(sess, &workspace, a), + KgCommands::Parse(a) => run_parse(sess, &workspace, a), + KgCommands::Index(a) => run_index(&workspace, a), + KgCommands::Query(a) => run_query(&workspace, a), + KgCommands::Clear(a) => run_clear(&workspace, a), + KgCommands::Stats(a) => run_stats(&workspace, a), + KgCommands::Mcp(a) => run_mcp(&workspace, a), + } +} + + +fn rt() -> Result { + Runtime::new().into_diagnostic().wrap_err("tokio runtime") +} + +fn open_engine(rt: &Runtime, cfg: CoreConfig) -> Result { + rt.block_on(Engine::open(cfg)) + .into_diagnostic() + .wrap_err("open kg engine") +} + +fn resolve_inputs( + sess: &Session, + workspace: &Path, + res: &ResolutionArgs, + rt: &Runtime, +) -> Result { + let io = SessionIo::new(sess); + let srcs = rt.block_on(io.sources(false, &[]))?; + + let targets = TargetSet::new(res.target.iter().map(|s| s.as_str())); + let package_set: IndexSet = IndexSet::from_iter(res.package.iter().cloned()); + let exclude_set: IndexSet = IndexSet::from_iter(res.exclude.iter().cloned()); + + let packages = &srcs.get_package_list( + sess.manifest.package.name.to_string(), + &package_set, + &exclude_set, + res.no_deps, + ); + + let (targets, packages) = get_passed_targets(sess, rt, &io, &targets, packages, &package_set)?; + + let srcs = srcs + .filter_targets(&targets) + .unwrap_or_default() + .filter_packages(&packages) + .unwrap_or_default(); + + let srcs_flat = srcs + .flatten() + .into_iter() + .map(|f| f.validate(&ValidationContext::default())) + .collect::>>()?; + + let active_targets: Vec = res.target.iter().cloned().collect(); + let target_defs = bender_kg_extract::target_defines(&active_targets); + + let mut groups: IndexMap = IndexMap::new(); + for grp in srcs_flat { + let key = grp.package.unwrap_or("").to_string(); + let entry = groups.entry(key).or_insert_with(|| { + let mut defs: Vec = target_defs.clone(); + defs.extend(res.define.iter().cloned()); + SourceGroupInput { + files: Vec::new(), + include_dirs: res.include_dir.clone(), + defines: defs, + } + }); + for src in &grp.files { + if let SourceFile::File(p, Some(SourceType::Verilog)) = src { + entry.files.push(p.to_string_lossy().into_owned()); + } + } + for (_, p) in grp + .include_dirs + .iter() + .chain(grp.export_incdirs.values().flatten()) + { + let s = p.to_string_lossy().into_owned(); + if !entry.include_dirs.contains(&s) { + entry.include_dirs.push(s); + } + } + for (name, (_, val)) in &grp.defines { + let entry_def = match val { + Some(v) => format!("{name}={v}"), + None => name.to_string(), + }; + if !entry.defines.contains(&entry_def) { + entry.defines.push(entry_def); + } + } + } + let groups: Vec = groups + .into_values() + .filter(|g| !g.files.is_empty()) + .collect(); + + let tops: Vec = res.top.iter().cloned().collect(); + + Ok(ExtractInputs { + workspace: workspace.to_string_lossy().into_owned(), + targets: active_targets, + tops, + elab: res.elab, + design_alias: res.design.clone(), + groups, + single_unit: res.single_unit, + lenient: res.lenient, + parse_jobs: 1, + }) +} + +fn run_build(sess: &Session, workspace: &Path, args: BuildArgs) -> Result<()> { + let rt = rt()?; + let root = args.base.root.clone().unwrap_or_else(|| workspace.join(".bender-kg")); + let mut cfg = CoreConfig::new(root); + cfg.embed.dim = args.build_config.embed_dim as usize; + cfg.skip_embeddings = args.build_config.no_embed; + cfg.upsert_chunk_size = args.build_config.upsert_chunk_size.max(1) as usize; + cfg.pipeline_elab = !args.build_config.no_pipeline_elab; + + let inputs = resolve_inputs(sess, workspace, &args.res, &rt)?; + let mut engine = open_engine(&rt, cfg)?; + let outcome = rt + .block_on(engine.build(&inputs)) + .into_diagnostic() + .wrap_err("kg build")?; + let summary = serde_json::json!({ + "design": outcome.manifest.identity.alias, + "id": outcome.manifest.identity.id, + "modules": outcome.modules_indexed, + "embeddings": outcome.embeddings_indexed, + "ir_path": engine.config().ir_path(), + "manifest_path": engine.config().manifest_path(), + "db_path": engine.store().db_path().ok(), + "phases_seconds": outcome.phases, + }); + emit(&summary, args.base.format, None) +} + +fn run_parse(sess: &Session, workspace: &Path, args: ParseArgs) -> Result<()> { + let rt = rt()?; + let root = args.base.root.clone().unwrap_or_else(|| workspace.join(".bender-kg")); + let mut cfg = CoreConfig::new(root); + cfg.embed.dim = args.embed_dim as usize; + cfg.skip_embeddings = args.no_embed; + cfg.pipeline_elab = !args.no_pipeline_elab; + + let inputs = resolve_inputs(sess, workspace, &args.res, &rt)?; + let path = args.output.unwrap_or_else(|| cfg.ir_path()); + if let Some(parent) = path.parent() { + std::fs::create_dir_all(parent).into_diagnostic()?; + } + let manifest = bender_kg_extract::extract_to_jsonl(&inputs, &path) + .into_diagnostic() + .wrap_err("kg parse")?; + emit( + &serde_json::json!({ + "design": manifest.identity.alias, + "ir_path": path, + "modules": manifest.module_count, + "edges": manifest.edge_count, + "warnings": manifest.extraction_warnings, + }), + args.base.format, + None, + ) +} + +fn run_index(workspace: &Path, args: IndexArgs) -> Result<()> { + let rt = rt()?; + let root = args.base.root.clone().unwrap_or_else(|| workspace.join(".bender-kg")); + let mut cfg = CoreConfig::new(root); + cfg.embed.dim = args.embed_dim as usize; + cfg.skip_embeddings = args.no_embed; + cfg.upsert_chunk_size = args.upsert_chunk_size.max(1) as usize; + + let path = args.input.unwrap_or_else(|| cfg.ir_path()); + let mut engine = open_engine(&rt, cfg)?; + let count = rt + .block_on(engine.index_from_jsonl(&path)) + .into_diagnostic() + .wrap_err("kg index")?; + emit( + &serde_json::json!({"indexed": count, "from": path}), + args.base.format, + None, + ) +} + +fn run_clear(workspace: &Path, args: ClearArgs) -> Result<()> { + let rt = rt()?; + let root = args.base.root.clone().unwrap_or_else(|| workspace.join(".bender-kg")); + let cfg = CoreConfig::new(root); + let mut engine = open_engine(&rt, cfg)?; + let value = if args.all { + rt.block_on(engine.clear_all()).into_diagnostic()?; + serde_json::json!({"cleared": "all"}) + } else { + let alias = args + .design + .ok_or_else(|| miette::miette!("--design is required (or pass --all)"))?; + rt.block_on(engine.clear_design(&alias)).into_diagnostic()?; + serde_json::json!({"cleared_design": alias}) + }; + emit(&value, args.base.format, None) +} + +fn run_stats(workspace: &Path, args: StatsArgs) -> Result<()> { + let rt = rt()?; + let root = args.base.root.clone().unwrap_or_else(|| workspace.join(".bender-kg")); + let cfg = CoreConfig::new(root); + let engine = open_engine(&rt, cfg)?; + let stats = engine + .stats(args.design.as_deref()) + .into_diagnostic() + .wrap_err("stats")?; + emit( + &serde_json::to_value(stats).into_diagnostic()?, + args.base.format, + None, + ) +} + +fn run_mcp(workspace: &Path, args: McpArgs) -> Result<()> { + let rt = rt()?; + let root = args.base.root.clone().unwrap_or_else(|| workspace.join(".bender-kg")); + let cfg = CoreConfig::new(root); + rt.block_on(bender_kg_mcp::serve_stdio(cfg)) + .map_err(|e| miette::miette!("kg mcp: {e}"))?; + Ok(()) +} + +fn run_query(workspace: &Path, args: QueryArgs) -> Result<()> { + let rt = rt()?; + let root = args.base.root.clone().unwrap_or_else(|| workspace.join(".bender-kg")); + let cfg = CoreConfig::new(root); + let engine = open_engine(&rt, cfg)?; + let value = dispatch_query(&rt, &engine, &args.op)?; + emit(&value, args.base.format, Some(&args.op)) +} + +fn dispatch_query( + rt: &tokio::runtime::Runtime, + engine: &Engine, + op: &QueryOp, +) -> Result { + let v = match op { + QueryOp::SearchModules { query, top_k, design } => { + let hits = rt + .block_on(engine.search_modules(query, *top_k, design.as_deref().filter(|s| !s.is_empty()))) + .into_diagnostic()?; + serde_json::to_value(hits).into_diagnostic()? + } + QueryOp::GetModule { name } => match engine.get_module(name).into_diagnostic()? { + Some(m) => serde_json::to_value(m).into_diagnostic()?, + None => serde_json::json!({"error": format!("Module '{name}' not found")}), + }, + QueryOp::GetSubgraph { name, depth } => { + let sg = engine.get_subgraph(name, *depth).into_diagnostic()?; + serde_json::json!({ "root": name, "nodes": sg.nodes, "edges": sg.edges }) + } + QueryOp::GetInstanceContext { parent, child } => { + serde_json::to_value(engine.get_instance_context(parent, child).into_diagnostic()?) + .into_diagnostic()? + } + QueryOp::GetParents { name } => { + serde_json::to_value(engine.get_parents(name).into_diagnostic()?).into_diagnostic()? + } + QueryOp::GetChildren { name } => { + serde_json::to_value(engine.get_children(name).into_diagnostic()?).into_diagnostic()? + } + QueryOp::GetPorts { name } => match engine.get_module(name).into_diagnostic()? { + Some(m) => serde_json::to_value(m.ports).into_diagnostic()?, + None => serde_json::json!({"error": format!("Module '{name}' not found")}), + }, + QueryOp::FindByProtocol { protocol, design } => { + serde_json::to_value( + engine.find_by_protocol(protocol, design.as_deref().filter(|s| !s.is_empty())) + .into_diagnostic()?, + ) + .into_diagnostic()? + } + QueryOp::GetSourceSnippet { module_name, element, instance_name } => { + engine.get_source_snippet(module_name, element, instance_name).into_diagnostic()? + } + QueryOp::TraceHierarchyPath { from_module, to_module } => { + serde_json::to_value(engine.trace_hierarchy_path(from_module, to_module).into_diagnostic()?) + .into_diagnostic()? + } + QueryOp::CheckConnectivity { module_name, depth } => { + let findings = engine.check_connectivity(module_name, *depth).into_diagnostic()?; + serde_json::json!({ + "module": module_name, "depth": depth, + "issue_count": findings.len(), "findings": findings, + }) + } + QueryOp::TraceParameter { module_name, param_name, recursive, depth } => { + if *recursive { + let res = engine.trace_parameter_recursive(module_name, param_name, *depth).into_diagnostic()?; + serde_json::json!({ + "module": module_name, "parameter": param_name, + "recursive": true, "max_depth": depth, + "affected_instances": res.len(), "instances": res, + }) + } else { + let res = engine.trace_parameter(module_name, param_name).into_diagnostic()?; + serde_json::json!({ + "module": module_name, "parameter": param_name, + "affected_instances": res.len(), "instances": res, + }) + } + } + QueryOp::TraceSignal { module_name, signal_name, recursive, depth } => { + if *recursive { + let res = engine.trace_signal_recursive(module_name, signal_name, *depth).into_diagnostic()?; + serde_json::json!({ + "module": module_name, "signal": signal_name, + "recursive": true, "max_depth": depth, + "connections": res.len(), "instances": res, + }) + } else { + let res = engine.trace_signal(module_name, signal_name).into_diagnostic()?; + serde_json::json!({ + "module": module_name, "signal": signal_name, + "connections": res.len(), "instances": res, + }) + } + } + QueryOp::MatchInterfaces { module_a, module_b, prefix_a, prefix_b } => { + engine.match_interfaces(module_a, module_b, prefix_a, prefix_b).into_diagnostic()? + } + QueryOp::FindStructurallySimilar { module_name, min_overlap, design } => { + let res = engine + .find_structurally_similar(module_name, *min_overlap, design.as_deref().filter(|s| !s.is_empty())) + .into_diagnostic()?; + serde_json::json!({"module": module_name, "candidates": res}) + } + }; + Ok(v) +} + +/// Single output dispatcher used by every `kg` subcommand. `op` is `Some` +/// only for `kg query` calls; for the summary outputs of the other +/// subcommands, `Tree` collapses to `Pretty`. +fn emit(value: &serde_json::Value, format: OutputFormat, op: Option<&QueryOp>) -> Result<()> { + let s = match format { + OutputFormat::Json => serde_json::to_string(value).into_diagnostic()?, + OutputFormat::Tree => op + .and_then(|op| format_tree(value, op)) + .map(|s| { + if s.ends_with('\n') { + s + } else { + format!("{s}\n") + } + }) + .unwrap_or_else(|| { + // Fallback to pretty JSON for commands without tree renderer + serde_json::to_string_pretty(value).unwrap_or_else(|_| value.to_string()) + }), + }; + let stdout = std::io::stdout(); + let mut handle = stdout.lock(); + handle.write_all(s.as_bytes()).into_diagnostic()?; + if !s.ends_with('\n') { + handle.write_all(b"\n").into_diagnostic()?; + } + Ok(()) +} + +/// Render a query op's JSON output as a human-readable indented tree. +/// Every `QueryOp` variant has a dedicated renderer; the function still +/// returns `Option` so callers can defensively fall back to JSON +/// if a future variant is added without a renderer. +fn format_tree(value: &serde_json::Value, op: &QueryOp) -> Option { + let mut out = String::new(); + match op { + QueryOp::GetInstanceContext { .. } | QueryOp::TraceHierarchyPath { .. } => { + render_edges(&mut out, value) + } + QueryOp::GetSubgraph { .. } => render_subgraph(&mut out, value), + QueryOp::GetParents { .. } => render_module_list(&mut out, value, "(no parents)"), + QueryOp::GetChildren { .. } => render_module_list(&mut out, value, "(no children)"), + QueryOp::SearchModules { .. } => render_module_list(&mut out, value, "(no hits)"), + QueryOp::FindByProtocol { .. } => { + render_module_list(&mut out, value, "(no matching modules)") + } + QueryOp::FindStructurallySimilar { .. } => render_similar(&mut out, value), + QueryOp::GetModule { .. } => render_module(&mut out, value), + QueryOp::GetPorts { .. } => render_ports(&mut out, value), + QueryOp::GetSourceSnippet { .. } => render_snippet(&mut out, value), + QueryOp::TraceParameter { .. } => render_trace_parameter(&mut out, value), + QueryOp::TraceSignal { .. } => render_trace_signal(&mut out, value), + QueryOp::CheckConnectivity { .. } => render_check_connectivity(&mut out, value), + QueryOp::MatchInterfaces { .. } => render_match_interfaces(&mut out, value), + } + Some(out) +} + +struct Indent(usize); +impl fmt::Display for Indent { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for _ in 0..self.0 { + f.write_str(" ")?; + } + Ok(()) + } +} + +fn location_string(file: &str, lstart: Option, lend: Option) -> String { + match (file.is_empty(), lstart, lend) { + (false, Some(s), Some(e)) => format!(" [{file}:{s}-{e}]"), + (false, Some(s), None) => format!(" [{file}:{s}]"), + (false, _, _) => format!(" [{file}]"), + _ => String::new(), + } +} + +/// Convert absolute path to relative path for better readability. +/// Strips common prefixes like workspace root or /proj_soc paths. +fn relative_path(path: &str) -> &str { + // Try to find a good cut point - look for common patterns + if let Some(pos) = path.rfind("/hw/") { + return &path[pos + 1..]; // Return "hw/smu/rtl/smu.sv" + } + if let Some(pos) = path.rfind("/src/") { + return &path[pos + 1..]; + } + // If no pattern found, return the filename portion at minimum + path.rsplit('/').next().unwrap_or(path) +} + +fn render_edges(out: &mut String, value: &serde_json::Value) { + let arr = value.as_array().filter(|a| !a.is_empty()); + let Some(arr) = arr else { + out.push_str("(no edges)\n"); + return; + }; + for edge in arr { + render_edge(out, edge, 0); + } +} + +fn render_subgraph(out: &mut String, value: &serde_json::Value) { + let root = value.get("root").and_then(|v| v.as_str()).unwrap_or("?"); + let node_count = value.get("nodes").and_then(|v| v.as_array()).map(|a| a.len()).unwrap_or(0); + let edges = value.get("edges").and_then(|v| v.as_array()).cloned().unwrap_or_default(); + let edge_count = edges.len(); + + let _ = writeln!(out, "{} {} module(s), {} instantiation(s)\n", + root.if_supports_color(Stream::Stdout, |t| t.yellow()), + node_count, edge_count); + + // Build adjacency map: parent -> Vec + let mut adj: BTreeMap> = BTreeMap::new(); + for edge in &edges { + let parent = edge.get("parent").and_then(|v| v.as_str()).unwrap_or(""); + adj.entry(parent.to_string()).or_default().push(edge); + } + + let mut on_path = std::collections::HashSet::new(); + render_subgraph_nodes(out, root, &adj, "", &mut on_path); +} + +fn render_subgraph_nodes( + out: &mut String, + module: &str, + adj: &BTreeMap>, + prefix: &str, + on_path: &mut std::collections::HashSet, +) { + let children = match adj.get(module) { + Some(c) if !c.is_empty() => c, + _ => return, + }; + let total = children.len(); + on_path.insert(module.to_string()); + for (idx, edge) in children.iter().enumerate() { + let is_last = idx + 1 == total; + let box_char = if is_last { "└─" } else { "├─" }; + let child_pfx = if is_last { format!("{} ", prefix) } else { format!("{}│ ", prefix) }; + let child_mod = edge.get("child").and_then(|v| v.as_str()).unwrap_or("?"); + let inst_name = edge.get("instance_name").and_then(|v| v.as_str()).unwrap_or("?"); + let file = edge.get("parent_file_path").and_then(|v| v.as_str()).unwrap_or(""); + let lstart = edge.get("line_start").and_then(|v| v.as_i64()); + let lend = edge.get("line_end").and_then(|v| v.as_i64()); + let relpath = relative_path(file); + let loc = match (lstart, lend) { + (Some(s), Some(e)) => format!(" {relpath}:{s}-{e}"), + (Some(s), None) => format!(" {relpath}:{s}"), + _ if !file.is_empty() => format!(" {relpath}"), + _ => String::new(), + }; + let cycle = on_path.contains(child_mod); + let cycle_mark = if cycle { " (↑ cycle)" } else { "" }; + + let _ = writeln!(out, "{}{} {} → {}{}{}", + prefix, + box_char.if_supports_color(Stream::Stdout, |t| t.blue()), + inst_name, + child_mod.if_supports_color(Stream::Stdout, |t| t.yellow()), + loc.if_supports_color(Stream::Stdout, |t| t.dimmed()), + cycle_mark.if_supports_color(Stream::Stdout, |t| t.yellow()), + ); + + if !cycle { + render_subgraph_nodes(out, child_mod, adj, &child_pfx, on_path); + } + } + on_path.remove(module); +} + +/// Shared module-list renderer: one line per module, pulling in only the +/// optional fields that exist in the JSON payload. Used by `GetParents`, +/// `SearchModules`, `FindByProtocol`, and (via `render_similar`) the +/// candidate list of `FindStructurallySimilar`. +fn render_module_list(out: &mut String, value: &serde_json::Value, empty_msg: &str) { + let arr = value.as_array().filter(|a| !a.is_empty()); + let Some(arr) = arr else { + let _ = writeln!(out, "{empty_msg}"); + return; + }; + for m in arr { + render_module_list_item(out, m); + } +} + +fn render_module_list_item(out: &mut String, m: &serde_json::Value) { + let name = m.get("name").and_then(|v| v.as_str()).unwrap_or("?"); + let design = m.get("design").and_then(|v| v.as_str()).filter(|s| !s.is_empty()); + let score = m.get("score").and_then(|v| v.as_f64()); + let shared = m.get("shared_ports").and_then(|v| v.as_i64()); + let file = m.get("file_path").and_then(|v| v.as_str()).filter(|s| !s.is_empty()); + + let mut parts = vec![name.if_supports_color(Stream::Stdout, |t| t.yellow()).to_string()]; + if let Some(d) = design { + parts.push(format!("{}", format!("[{d}]").if_supports_color(Stream::Stdout, |t| t.dimmed()))); + } + if let Some(s) = score { + parts.push(format!("{}", format!("score={s:.3}").if_supports_color(Stream::Stdout, |t| t.green()))); + } + if let Some(sp) = shared { + parts.push(format!("shared_ports={sp}")); + } + if let Some(f) = file { + parts.push(format!("{}", f.if_supports_color(Stream::Stdout, |t| t.dimmed()))); + } + let _ = writeln!(out, "{}", parts.join(" ")); +} + +fn render_similar(out: &mut String, value: &serde_json::Value) { + let module = value.get("module").and_then(|v| v.as_str()).unwrap_or("?"); + let candidates = value + .get("candidates") + .cloned() + .unwrap_or(serde_json::Value::Null); + let _ = writeln!(out, "{module}: structurally-similar candidates"); + let arr = candidates.as_array().filter(|a| !a.is_empty()); + let Some(arr) = arr else { + out.push_str(" (none)\n"); + return; + }; + for m in arr { + out.push_str(" "); + render_module_list_item(out, m); + } +} + +fn render_module(out: &mut String, value: &serde_json::Value) { + if let Some(err) = value.get("error").and_then(|v| v.as_str()) { + let _ = writeln!(out, "{err}"); + return; + } + let name = value.get("name").and_then(|v| v.as_str()).unwrap_or("?"); + let design = value.get("design").and_then(|v| v.as_str()).unwrap_or("?"); + let file = value.get("file_path").and_then(|v| v.as_str()).unwrap_or(""); + let lstart = value.get("line_start").and_then(|v| v.as_i64()); + let lend = value.get("line_end").and_then(|v| v.as_i64()); + + let relpath = relative_path(file); + let loc = match (lstart, lend) { + (Some(s), Some(e)) if !file.is_empty() => format!(" {relpath}:{s}-{e}"), + _ if !file.is_empty() => format!(" {relpath}"), + _ => String::new(), + }; + + let _ = writeln!(out, "{} {}{}", + name.if_supports_color(Stream::Stdout, |t| t.yellow()), + format!("[{design}]").if_supports_color(Stream::Stdout, |t| t.dimmed()), + loc.if_supports_color(Stream::Stdout, |t| t.dimmed()), + ); + + if let Some(desc) = value.get("description").and_then(|v| v.as_str()).filter(|s| !s.is_empty()) { + let _ = writeln!(out, " description: {desc}"); + } + + if let Some(params) = value.get("parameters").and_then(|v| v.as_array()).filter(|a| !a.is_empty()) { + let _ = writeln!(out, " {} ({}):", + "parameters".if_supports_color(Stream::Stdout, |t| t.bold()), + params.len()); + for p in params { + let pname = p.get("name").and_then(|v| v.as_str()).unwrap_or("?"); + let kind = p.get("kind").and_then(|v| v.as_str()).unwrap_or("?"); + let dv = p.get("default_value").and_then(|v| v.as_str()).unwrap_or(""); + if dv.is_empty() { + let _ = writeln!(out, " {}: {}", + pname.if_supports_color(Stream::Stdout, |t| t.magenta()), + kind.if_supports_color(Stream::Stdout, |t| t.dimmed())); + } else { + let _ = writeln!(out, " {}: {} = {}", + pname.if_supports_color(Stream::Stdout, |t| t.magenta()), + kind.if_supports_color(Stream::Stdout, |t| t.dimmed()), + dv.if_supports_color(Stream::Stdout, |t| t.green())); + } + } + } + + if let Some(ports) = value.get("ports").and_then(|v| v.as_array()).filter(|a| !a.is_empty()) { + let _ = writeln!(out, " {} ({}):", + "ports".if_supports_color(Stream::Stdout, |t| t.bold()), + ports.len()); + for p in ports { + render_port(out, p, 2); + } + } + + if let Some(insts) = value.get("instantiations").and_then(|v| v.as_array()).filter(|a| !a.is_empty()) { + let _ = writeln!(out, " {} ({}):", + "instantiations".if_supports_color(Stream::Stdout, |t| t.bold()), + insts.len()); + for inst in insts { + let mn = inst.get("module_name").and_then(|v| v.as_str()).unwrap_or("?"); + let inm = inst.get("instance_name").and_then(|v| v.as_str()).unwrap_or("?"); + let _ = writeln!(out, " {} ({})", + mn.if_supports_color(Stream::Stdout, |t| t.yellow()), + inm.if_supports_color(Stream::Stdout, |t| t.dimmed())); + } + } + + if let Some(imps) = value.get("imports").and_then(|v| v.as_array()).filter(|a| !a.is_empty()) { + let names: Vec<&str> = imps + .iter() + .filter_map(|i| i.get("package_name").and_then(|v| v.as_str())) + .collect(); + let _ = writeln!(out, " {} ({}): {}", + "imports".if_supports_color(Stream::Stdout, |t| t.bold()), + names.len(), + names.join(", ").if_supports_color(Stream::Stdout, |t| t.dimmed())); + } +} + +fn render_ports(out: &mut String, value: &serde_json::Value) { + if let Some(err) = value.get("error").and_then(|v| v.as_str()) { + let _ = writeln!(out, "{err}"); + return; + } + let Some(arr) = value.as_array().filter(|a| !a.is_empty()) else { + out.push_str("(no ports)\n"); + return; + }; + for p in arr { + render_port(out, p, 0); + } +} + +fn render_port(out: &mut String, p: &serde_json::Value, depth: usize) { + let name = p.get("name").and_then(|v| v.as_str()).unwrap_or("?"); + let dir = p.get("direction").and_then(|v| v.as_str()).unwrap_or("?"); + let bw = p.get("bit_width").and_then(|v| v.as_i64()); + let we = p.get("width_expr").and_then(|v| v.as_str()).filter(|s| !s.is_empty()); + let ts = p.get("type_str").and_then(|v| v.as_str()).filter(|s| !s.is_empty()); + + let width_s = match (bw, we) { + (Some(b), _) => format!(" width={b}"), + (None, Some(e)) => format!(" width={e}"), + _ => String::new(), + }; + let type_s = ts.map(|t| format!(" type={t}")).unwrap_or_default(); + let dir_colored = match dir { + "input" => format!("{}", dir.if_supports_color(Stream::Stdout, |t| t.green())), + "output" => format!("{}", dir.if_supports_color(Stream::Stdout, |t| t.bright_green())), + "inout" => format!("{}", dir.if_supports_color(Stream::Stdout, |t| t.blue())), + _ => dir.to_string(), + }; + let _ = writeln!(out, "{}{} ({}){}{}", + Indent(depth), + name.if_supports_color(Stream::Stdout, |t| t.cyan()), + dir_colored, + width_s.if_supports_color(Stream::Stdout, |t| t.dimmed()), + type_s.if_supports_color(Stream::Stdout, |t| t.dimmed()), + ); +} + +fn render_snippet(out: &mut String, value: &serde_json::Value) { + if let Some(s) = value.as_str() { + out.push_str(s); + if !s.ends_with('\n') { + out.push('\n'); + } + } else if let Some(err) = value.get("error").and_then(|v| v.as_str()) { + let _ = writeln!(out, "{err}"); + } else { + let _ = writeln!(out, "(no snippet)"); + } +} + +fn render_trace_parameter(out: &mut String, value: &serde_json::Value) { + if value.get("recursive").and_then(|v| v.as_bool()).unwrap_or(false) { + render_trace_parameter_recursive(out, value); + return; + } + + let module = value.get("module").and_then(|v| v.as_str()).unwrap_or("?"); + let param = value.get("parameter").and_then(|v| v.as_str()).unwrap_or("?"); + let n = value.get("affected_instances").and_then(|v| v.as_u64()).unwrap_or(0); + + let Some(arr) = value + .get("instances") + .and_then(|v| v.as_array()) + .filter(|a| !a.is_empty()) + else { + let _ = writeln!(out, "{}.{} {} no propagations found", + module.if_supports_color(Stream::Stdout, |t| t.yellow()), + param.if_supports_color(Stream::Stdout, |t| t.magenta()), + "→".if_supports_color(Stream::Stdout, |t| t.blue()), + ); + return; + }; + + // Group parameters by instance + let mut instances: BTreeMap> = BTreeMap::new(); + for inst in arr { + let parent = inst.get("parent").and_then(|v| v.as_str()).unwrap_or("?"); + let child = inst.get("child").and_then(|v| v.as_str()).unwrap_or("?"); + let inm = inst.get("instance").and_then(|v| v.as_str()).unwrap_or("?"); + instances.entry(format!("{parent}::{child}::{inm}")).or_default().push(inst); + } + + let inst_count = instances.len(); + let _ = writeln!(out, "{}.{} {} {n} propagations across {inst_count} instance(s)\n", + module.if_supports_color(Stream::Stdout, |t| t.yellow()), + param.if_supports_color(Stream::Stdout, |t| t.magenta()), + "→".if_supports_color(Stream::Stdout, |t| t.blue()), + ); + + for (idx, (_key, params)) in instances.into_iter().enumerate() { + let is_last = idx + 1 == inst_count; + let box_char = if is_last { "└─" } else { "├─" }; + let indent_char = if is_last { " " } else { "│ " }; + // All params in group share same instance metadata + let first = params[0]; + let parent = first.get("parent").and_then(|v| v.as_str()).unwrap_or("?"); + let child = first.get("child").and_then(|v| v.as_str()).unwrap_or("?"); + let inm = first.get("instance").and_then(|v| v.as_str()).unwrap_or("?"); + let file = first.get("parent_file_path").and_then(|v| v.as_str()).unwrap_or(""); + let lstart = first.get("line_start").and_then(|v| v.as_i64()); + let lend = first.get("line_end").and_then(|v| v.as_i64()); + + let relpath = relative_path(file); + let loc = match (lstart, lend) { + (Some(s), Some(e)) => format!("{relpath}:{s}-{e}"), + (Some(s), None) => format!("{relpath}:{s}"), + _ => relpath.to_string(), + }; + + let count = params.len(); + let plural = if count == 1 { "parameter" } else { "parameters" }; + let _ = writeln!(out, "{} {} ({} {} {}) {}", + box_char.if_supports_color(Stream::Stdout, |t| t.blue()), + inm, + parent.if_supports_color(Stream::Stdout, |t| t.yellow()), + "→".if_supports_color(Stream::Stdout, |t| t.blue()), + child.if_supports_color(Stream::Stdout, |t| t.yellow()), + loc.if_supports_color(Stream::Stdout, |t| t.dimmed()), + ); + let _ = writeln!(out, "{} {count} {plural}:", indent_char); + + let param_count = params.len(); + for (pidx, inst) in params.into_iter().enumerate() { + let is_last_param = pidx + 1 == param_count; + let param_box = if is_last_param { " └─" } else { " ├─" }; + let child_param = inst.get("child_parameter").and_then(|v| v.as_str()).unwrap_or("?"); + let call = inst.get("call_site_expression").and_then(|v| v.as_str()).unwrap_or("?"); + let default_val = inst.get("child_param_default").and_then(|v| v.as_str()); + + let default_str = match default_val { + Some(d) if !d.is_empty() => + format!(" (default: {})", d.if_supports_color(Stream::Stdout, |t| t.dimmed())), + _ => String::new(), + }; + let _ = writeln!(out, "{}{} {:<22} {} {}{}", + indent_char, + param_box.if_supports_color(Stream::Stdout, |t| t.blue()), + child_param.if_supports_color(Stream::Stdout, |t| t.magenta()), + "←".if_supports_color(Stream::Stdout, |t| t.blue()), + call.if_supports_color(Stream::Stdout, |t| t.green()), + default_str, + ); + + // Show resolved value if available (elab mode) + if let Some(rv) = inst.get("resolved_value").and_then(|v| v.as_str()) { + let _ = writeln!(out, "{} resolved: {}", + indent_char, + rv.if_supports_color(Stream::Stdout, |t| t.bright_green()), + ); + } + } + + out.push('\n'); // Blank line between instances + } +} + +fn render_trace_signal(out: &mut String, value: &serde_json::Value) { + if value.get("recursive").and_then(|v| v.as_bool()).unwrap_or(false) { + render_trace_signal_recursive(out, value); + return; + } + + let module = value.get("module").and_then(|v| v.as_str()).unwrap_or("?"); + let signal = value.get("signal").and_then(|v| v.as_str()).unwrap_or("?"); + let n = value.get("connections").and_then(|v| v.as_u64()).unwrap_or(0); + + let Some(arr) = value + .get("instances") + .and_then(|v| v.as_array()) + .filter(|a| !a.is_empty()) + else { + let _ = writeln!(out, "{}.{} {} no connections found", + module.if_supports_color(Stream::Stdout, |t| t.yellow()), + signal.if_supports_color(Stream::Stdout, |t| t.cyan()), + "→".if_supports_color(Stream::Stdout, |t| t.blue()), + ); + return; + }; + + // Group ports by instance (parent::child::instance_name) + let mut instances: BTreeMap> = BTreeMap::new(); + for conn in arr { + let parent = conn.get("parent").and_then(|v| v.as_str()).unwrap_or("?"); + let child = conn.get("child").and_then(|v| v.as_str()).unwrap_or("?"); + let inm = conn.get("instance").and_then(|v| v.as_str()).unwrap_or("?"); + instances.entry(format!("{parent}::{child}::{inm}")).or_default().push(conn); + } + + let inst_count = instances.len(); + let _ = writeln!(out, "{}.{} {} {n} connection(s) across {inst_count} instance(s)\n", + module.if_supports_color(Stream::Stdout, |t| t.yellow()), + signal.if_supports_color(Stream::Stdout, |t| t.cyan()), + "→".if_supports_color(Stream::Stdout, |t| t.blue()), + ); + + for (idx, (_key, ports)) in instances.into_iter().enumerate() { + let is_last = idx + 1 == inst_count; + let box_char = if is_last { "└─" } else { "├─" }; + let indent_char = if is_last { " " } else { "│ " }; + let first = ports[0]; + let parent = first.get("parent").and_then(|v| v.as_str()).unwrap_or("?"); + let child = first.get("child").and_then(|v| v.as_str()).unwrap_or("?"); + let inm = first.get("instance").and_then(|v| v.as_str()).unwrap_or("?"); + let file = first.get("parent_file_path").and_then(|v| v.as_str()).unwrap_or(""); + let lstart = first.get("line_start").and_then(|v| v.as_i64()); + let lend = first.get("line_end").and_then(|v| v.as_i64()); + + let relpath = relative_path(file); + let loc = match (lstart, lend) { + (Some(s), Some(e)) => format!("{relpath}:{s}-{e}"), + (Some(s), None) => format!("{relpath}:{s}"), + _ => relpath.to_string(), + }; + + let count = ports.len(); + let plural = if count == 1 { "port" } else { "ports" }; + let _ = writeln!(out, "{} {} ({} {} {}) {}", + box_char.if_supports_color(Stream::Stdout, |t| t.blue()), + inm, + parent.if_supports_color(Stream::Stdout, |t| t.yellow()), + "→".if_supports_color(Stream::Stdout, |t| t.blue()), + child.if_supports_color(Stream::Stdout, |t| t.yellow()), + loc.if_supports_color(Stream::Stdout, |t| t.dimmed()), + ); + let _ = writeln!(out, "{} {count} {plural}:", indent_char); + + let port_count = ports.len(); + for (pidx, conn) in ports.into_iter().enumerate() { + let is_last_port = pidx + 1 == port_count; + let port_box = if is_last_port { " └─" } else { " ├─" }; + let child_port = conn.get("child_port").and_then(|v| v.as_str()).unwrap_or("?"); + let expr = conn.get("parent_expression").and_then(|v| v.as_str()).unwrap_or("?"); + let _ = writeln!(out, "{}{} {:<22} {} {}", + indent_char, + port_box.if_supports_color(Stream::Stdout, |t| t.blue()), + child_port.if_supports_color(Stream::Stdout, |t| t.cyan()), + "←".if_supports_color(Stream::Stdout, |t| t.blue()), + expr.if_supports_color(Stream::Stdout, |t| t.green()), + ); + } + + out.push('\n'); + } +} + +fn render_trace_signal_recursive(out: &mut String, value: &serde_json::Value) { + let module = value.get("module").and_then(|v| v.as_str()).unwrap_or("?"); + let signal = value.get("signal").and_then(|v| v.as_str()).unwrap_or("?"); + let max_depth = value.get("max_depth").and_then(|v| v.as_i64()).unwrap_or(5); + let n = value.get("connections").and_then(|v| v.as_u64()).unwrap_or(0); + + let Some(arr) = value + .get("instances") + .and_then(|v| v.as_array()) + .filter(|a| !a.is_empty()) + else { + let _ = writeln!(out, "{}.{} {} no connections found", + module.if_supports_color(Stream::Stdout, |t| t.yellow()), + signal.if_supports_color(Stream::Stdout, |t| t.cyan()), + "→".if_supports_color(Stream::Stdout, |t| t.blue()), + ); + return; + }; + + let _ = writeln!(out, "{}.{} {} {n} connection(s), depth ≤ {max_depth}\n", + module.if_supports_color(Stream::Stdout, |t| t.yellow()), + signal.if_supports_color(Stream::Stdout, |t| t.cyan()), + "→".if_supports_color(Stream::Stdout, |t| t.blue()), + ); + render_trace_tree_nodes(out, arr, "", &|inst| { + let port = inst["child_port"].as_str().unwrap_or("?"); + format!("[→ {}]", port.if_supports_color(Stream::Stdout, |t| t.cyan())) + }); +} + +/// Generic recursive tree renderer for `trace-signal --recursive` and +/// `trace-parameter --recursive`. `annotate(inst)` returns the trailing +/// `[→ ...]` annotation string that differs between the two commands. +fn render_trace_tree_nodes( + out: &mut String, + instances: &[serde_json::Value], + prefix: &str, + annotate: &dyn Fn(&serde_json::Value) -> String, +) { + let total = instances.len(); + for (idx, inst) in instances.iter().enumerate() { + let is_last = idx + 1 == total; + let box_char = if is_last { "└─" } else { "├─" }; + let child_pfx = if is_last { format!("{} ", prefix) } else { format!("{}│ ", prefix) }; + + let inm = inst["instance"].as_str().unwrap_or("?"); + let parent = inst["parent"].as_str().unwrap_or("?"); + let child = inst["child"].as_str().unwrap_or("?"); + let file = inst["parent_file_path"].as_str().unwrap_or(""); + let lstart = inst["line_start"].as_i64(); + let lend = inst["line_end"].as_i64(); + let relpath = relative_path(file); + let loc = match (lstart, lend) { + (Some(s), Some(e)) => format!("{relpath}:{s}-{e}"), + (Some(s), None) => format!("{relpath}:{s}"), + _ => relpath.to_string(), + }; + let children = inst["children"].as_array().map(|a| a.as_slice()).unwrap_or(&[]); + let leaf = if children.is_empty() { " (leaf)" } else { "" }; + let ann = annotate(inst); + + let _ = writeln!(out, "{}{} {} ({} {} {}) {} {}{}", + prefix, + box_char.if_supports_color(Stream::Stdout, |t| t.blue()), + inm, + parent.if_supports_color(Stream::Stdout, |t| t.yellow()), + "→".if_supports_color(Stream::Stdout, |t| t.blue()), + child.if_supports_color(Stream::Stdout, |t| t.yellow()), + loc.if_supports_color(Stream::Stdout, |t| t.dimmed()), + ann, + leaf.if_supports_color(Stream::Stdout, |t| t.dimmed()), + ); + + if !children.is_empty() { + render_trace_tree_nodes(out, children, &child_pfx, annotate); + } + } +} + +fn render_trace_parameter_recursive(out: &mut String, value: &serde_json::Value) { + let module = value.get("module").and_then(|v| v.as_str()).unwrap_or("?"); + let param = value.get("parameter").and_then(|v| v.as_str()).unwrap_or("?"); + let max_depth = value.get("max_depth").and_then(|v| v.as_i64()).unwrap_or(5); + let n = value.get("affected_instances").and_then(|v| v.as_u64()).unwrap_or(0); + + let Some(arr) = value + .get("instances") + .and_then(|v| v.as_array()) + .filter(|a| !a.is_empty()) + else { + let _ = writeln!(out, "{}.{} {} no propagations found", + module.if_supports_color(Stream::Stdout, |t| t.yellow()), + param.if_supports_color(Stream::Stdout, |t| t.magenta()), + "→".if_supports_color(Stream::Stdout, |t| t.blue()), + ); + return; + }; + + let _ = writeln!(out, "{}.{} {} {n} propagation(s), depth ≤ {max_depth}\n", + module.if_supports_color(Stream::Stdout, |t| t.yellow()), + param.if_supports_color(Stream::Stdout, |t| t.magenta()), + "→".if_supports_color(Stream::Stdout, |t| t.blue()), + ); + render_trace_tree_nodes(out, arr, "", &|inst| { + let child_param = inst["child_parameter"].as_str().unwrap_or("?"); + let call = inst["call_site_expression"].as_str().unwrap_or("?"); + format!("[→ {} ← {}]", + child_param.if_supports_color(Stream::Stdout, |t| t.magenta()), + call.if_supports_color(Stream::Stdout, |t| t.green()), + ) + }); +} + +fn render_check_connectivity(out: &mut String, value: &serde_json::Value) { + let module = value.get("module").and_then(|v| v.as_str()).unwrap_or("?"); + let depth = value.get("depth").and_then(|v| v.as_i64()).unwrap_or(0); + let n = value + .get("issue_count") + .and_then(|v| v.as_u64()) + .unwrap_or(0); + let _ = writeln!(out, "{module} @ depth={depth}: {n} issue(s)"); + let Some(arr) = value + .get("findings") + .and_then(|v| v.as_array()) + .filter(|a| !a.is_empty()) + else { + return; + }; + for f in arr { + let kind = f.get("kind").and_then(|v| v.as_str()).unwrap_or("?"); + let parent = f.get("parent").and_then(|v| v.as_str()).unwrap_or("?"); + let child = f.get("child").and_then(|v| v.as_str()).unwrap_or("?"); + let inst = f.get("instance").and_then(|v| v.as_str()).unwrap_or("?"); + let port = f.get("port").and_then(|v| v.as_str()).unwrap_or("?"); + let iw = f.get("instance_width").and_then(|v| v.as_i64()); + let dw = f.get("declared_width").and_then(|v| v.as_i64()); + let mut line = format!(" - {kind}: {parent} -> {child} ({inst}).{port}"); + if let (Some(iw), Some(dw)) = (iw, dw) { + line.push_str(&format!(" instance={iw} declared={dw}")); + } + let _ = writeln!(out, "{line}"); + if let Some(fields) = f + .get("field_breakdown") + .and_then(|v| v.as_object()) + .filter(|m| !m.is_empty()) + { + for (fname, fw) in fields { + let fv = fw.as_i64().unwrap_or(0); + let _ = writeln!(out, " {fname} {fv}"); + } + } + } +} + +fn render_match_interfaces(out: &mut String, value: &serde_json::Value) { + let a = value + .get("module_a") + .and_then(|v| v.as_str()) + .unwrap_or("?"); + let b = value + .get("module_b") + .and_then(|v| v.as_str()) + .unwrap_or("?"); + let _ = writeln!(out, "{a} <-> {b}"); + let matched = value + .get("matched") + .and_then(|v| v.as_array()) + .map(|a| a.as_slice()) + .unwrap_or(&[]); + let _ = writeln!(out, " matched ({}):", matched.len()); + for m in matched { + let port = m.get("port").and_then(|v| v.as_str()).unwrap_or("?"); + let ad = m.get("a_direction").and_then(|v| v.as_str()).unwrap_or("?"); + let bd = m.get("b_direction").and_then(|v| v.as_str()).unwrap_or("?"); + let aw = m.get("a_width").and_then(|v| v.as_i64()); + let bw = m.get("b_width").and_then(|v| v.as_i64()); + let dir_ok = m + .get("direction_complementary") + .and_then(|v| v.as_bool()) + .unwrap_or(false); + let dir_marker = if dir_ok { "" } else { " (NOT COMPLEMENTARY)" }; + let aw_s = aw.map(|w| w.to_string()).unwrap_or_else(|| "?".into()); + let bw_s = bw.map(|w| w.to_string()).unwrap_or_else(|| "?".into()); + let _ = writeln!( + out, + " {port} a:{ad}/{aw_s} <-> b:{bd}/{bw_s}{dir_marker}" + ); + } + if let Some(conflicts) = value + .get("width_conflicts") + .and_then(|v| v.as_array()) + .filter(|a| !a.is_empty()) + { + let _ = writeln!(out, " width_conflicts ({}):", conflicts.len()); + for c in conflicts { + let port = c.get("port").and_then(|v| v.as_str()).unwrap_or("?"); + let aw = c.get("a_width").and_then(|v| v.as_i64()); + let bw = c.get("b_width").and_then(|v| v.as_i64()); + let _ = writeln!( + out, + " {port} a={} b={}", + aw.map(|w| w.to_string()).unwrap_or_else(|| "?".into()), + bw.map(|w| w.to_string()).unwrap_or_else(|| "?".into()), + ); + } + } + render_unmatched(out, value, "unmatched_a", a); + render_unmatched(out, value, "unmatched_b", b); +} + +fn render_unmatched(out: &mut String, value: &serde_json::Value, key: &str, label: &str) { + let Some(arr) = value + .get(key) + .and_then(|v| v.as_array()) + .filter(|a| !a.is_empty()) + else { + return; + }; + let names: Vec<&str> = arr.iter().filter_map(|v| v.as_str()).collect(); + let _ = writeln!( + out, + " unmatched in {label} ({}): {}", + names.len(), + names.join(", ") + ); +} + +fn render_edge(out: &mut String, edge: &serde_json::Value, base: usize) { + let parent = edge.get("parent").and_then(|v| v.as_str()).unwrap_or("?"); + let child = edge.get("child").and_then(|v| v.as_str()).unwrap_or("?"); + let inst = edge.get("instance_name").and_then(|v| v.as_str()).unwrap_or("?"); + let file = edge.get("parent_file_path").and_then(|v| v.as_str()).unwrap_or(""); + let lstart = edge.get("line_start").and_then(|v| v.as_i64()); + let lend = edge.get("line_end").and_then(|v| v.as_i64()); + let loc = location_string(file, lstart, lend); + let _ = writeln!(out, "{}{} {} {} ({}){}", + Indent(base), + parent.if_supports_color(Stream::Stdout, |t| t.yellow()), + "→".if_supports_color(Stream::Stdout, |t| t.blue()), + child.if_supports_color(Stream::Stdout, |t| t.yellow()), + inst, + loc.if_supports_color(Stream::Stdout, |t| t.dimmed()), + ); + + let textual = edge.get("param_bindings").and_then(|v| v.as_object()); + let resolved = edge + .get("resolved_param_values") + .and_then(|v| v.as_object()); + let has_params = + textual.is_some_and(|m| !m.is_empty()) || resolved.is_some_and(|m| !m.is_empty()); + if has_params { + let _ = writeln!(out, "{}params:", Indent(base + 1)); + let mut keys: BTreeSet<&str> = BTreeSet::new(); + if let Some(t) = textual { + keys.extend(t.keys().map(|k| k.as_str())); + } + if let Some(r) = resolved { + keys.extend(r.keys().map(|k| k.as_str())); + } + for k in keys { + let r = resolved.and_then(|m| m.get(k)).and_then(|v| v.as_str()); + let t = textual.and_then(|m| m.get(k)).and_then(|v| v.as_str()); + match (r, t) { + (Some(rv), Some(tv)) => { + let _ = writeln!(out, "{}{k} = {rv} (call site: {tv})", Indent(base + 2)); + } + (Some(rv), None) => { + let _ = writeln!(out, "{}{k} = {rv}", Indent(base + 2)); + } + (None, Some(tv)) => { + let _ = writeln!(out, "{}{k} (call site: {tv})", Indent(base + 2)); + } + (None, None) => {} + } + } + } + + let pw = edge.get("resolved_port_widths").and_then(|v| v.as_object()); + if let Some(pw) = pw.filter(|m| !m.is_empty()) { + let _ = writeln!(out, "{}ports:", Indent(base + 1)); + for (name, w) in pw { + render_port_width(out, name, w, base + 2); + } + } +} + +fn render_port_width(out: &mut String, name: &str, w: &serde_json::Value, depth: usize) { + let total = w.get("total").and_then(|v| v.as_i64()).unwrap_or(0); + let count = w.get("element_count").and_then(|v| v.as_i64()); + match count { + Some(c) => { + let _ = writeln!( + out, + "{}{name} total={total} element_count={c}", + Indent(depth) + ); + } + None => { + let _ = writeln!(out, "{}{name} total={total}", Indent(depth)); + } + } + if let Some(fields) = w + .get("fields") + .and_then(|v| v.as_object()) + .filter(|m| !m.is_empty()) + { + for (fname, fw) in fields { + let fv = fw.as_i64().unwrap_or(0); + let _ = writeln!(out, "{}{fname} {fv}", Indent(depth + 1)); + } + } + if let Some(elem) = w.get("element").filter(|v| !v.is_null()) { + let _ = writeln!(out, "{}element:", Indent(depth + 1)); + let etot = elem.get("total").and_then(|v| v.as_i64()).unwrap_or(0); + let _ = writeln!(out, "{}total={etot}", Indent(depth + 2)); + if let Some(ef) = elem + .get("fields") + .and_then(|v| v.as_object()) + .filter(|m| !m.is_empty()) + { + for (fname, fw) in ef { + let fv = fw.as_i64().unwrap_or(0); + let _ = writeln!(out, "{}{fname} {fv}", Indent(depth + 2)); + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + fn instance_op() -> QueryOp { + QueryOp::GetInstanceContext { + parent: "p".into(), + child: "c".into(), + } + } + + fn subgraph_op() -> QueryOp { + QueryOp::GetSubgraph { + name: "p".into(), + depth: 2, + } + } + + fn parents_op() -> QueryOp { + QueryOp::GetParents { name: "c".into() } + } + + fn trace_path_op() -> QueryOp { + QueryOp::TraceHierarchyPath { + from_module: "p".into(), + to_module: "c".into(), + } + } + + #[test] + fn tree_renders_instance_edge_with_struct_and_array_ports() { + let value = serde_json::json!([{ + "parent": "avsbus_controller", + "child": "axi_lite_to_apb", + "instance_name": "u_axi_lite_to_apb", + "parent_file_path": "rtl/avsbus_controller.sv", + "line_start": 334, + "line_end": 342, + "design": "test_design", + "param_bindings": { + "AddrWidth": "avsbus_controller_pkg::ADDR_WIDTH", + "DataWidth": "32" + }, + "resolved_param_values": { + "AddrWidth": "32'd32", + "DataWidth": "32'd32" + }, + "port_bindings": {}, + "resolved_port_widths": { + "apb_req_o": { + "total": 74, + "fields": { "paddr": 32, "pprot": 3, "psel": 1 } + }, + "req_arr_i": { + "total": 144, + "fields": {}, + "element_count": 4, + "element": { + "total": 36, + "fields": { "addr": 32, "valid": 1 } + } + }, + "clk_i": { "total": 1, "fields": {} } + } + }]); + let out = format_tree(&value, &instance_op()).expect("instance ctx renders"); + let expected = "\ +avsbus_controller → axi_lite_to_apb (u_axi_lite_to_apb) [rtl/avsbus_controller.sv:334-342] + params: + AddrWidth = 32'd32 (call site: avsbus_controller_pkg::ADDR_WIDTH) + DataWidth = 32'd32 (call site: 32) + ports: + apb_req_o total=74 + paddr 32 + pprot 3 + psel 1 + clk_i total=1 + req_arr_i total=144 element_count=4 + element: + total=36 + addr 32 + valid 1 +"; + assert_eq!(out, expected); + } + + #[test] + fn tree_renders_empty_instance_context() { + let out = format_tree(&serde_json::json!([]), &instance_op()).unwrap(); + assert_eq!(out, "(no edges)\n"); + } + + #[test] + fn tree_renders_subgraph() { + let value = serde_json::json!({ + "root": "top", + "nodes": [{"name": "top"}, {"name": "mid"}, {"name": "leaf"}], + "edges": [{ + "parent": "top", + "child": "mid", + "instance_name": "u_mid", + "parent_file_path": "", + "line_start": null, + "line_end": null, + "design": "d", + "param_bindings": {}, + "resolved_param_values": {}, + "port_bindings": {}, + "resolved_port_widths": {} + }] + }); + let out = format_tree(&value, &subgraph_op()).unwrap(); + assert_eq!( + out, + "top 3 module(s), 1 instantiation(s)\n\n└─ u_mid → mid\n", + ); + } + + #[test] + fn tree_renders_parents() { + let value = serde_json::json!([ + {"name": "top", "design": "d1", "file_path": "rtl/top.sv"}, + {"name": "wrapper", "design": "d1", "file_path": "rtl/wrap.sv"} + ]); + let out = format_tree(&value, &parents_op()).unwrap(); + assert_eq!(out, "top [d1] rtl/top.sv\nwrapper [d1] rtl/wrap.sv\n"); + } + + #[test] + fn tree_renders_trace_hierarchy_path() { + let value = serde_json::json!([ + { + "parent": "top", + "child": "mid", + "instance_name": "u_mid", + "parent_file_path": "rtl/top.sv", + "line_start": 10, + "line_end": 12, + "design": "d", + "param_bindings": {}, + "resolved_param_values": {}, + "port_bindings": {}, + "resolved_port_widths": {} + } + ]); + let out = format_tree(&value, &trace_path_op()).unwrap(); + assert_eq!(out, "top → mid (u_mid) [rtl/top.sv:10-12]\n"); + } + + #[test] + fn tree_renders_search_module_hits() { + let value = serde_json::json!([ + { + "name": "axi_lite_to_apb", + "design": "test_design", + "score": 0.828, + "file_path": "rtl/axi_lite_to_apb.sv", + "description": "", + "num_ports": 4, + "num_params": 5, + "num_instantiations": 0, + }, + { + "name": "prim_axi_lite_to_apb", + "design": "test_design", + "score": 0.7195, + "file_path": "", + "description": "", + "num_ports": 0, + "num_params": 0, + "num_instantiations": 0, + } + ]); + let op = QueryOp::SearchModules { + query: "axi".into(), + top_k: 5, + design: None, + }; + let out = format_tree(&value, &op).unwrap(); + assert_eq!( + out, + "axi_lite_to_apb [test_design] score=0.828 rtl/axi_lite_to_apb.sv\n\ + prim_axi_lite_to_apb [test_design] score=0.720\n", + ); + } + + #[test] + fn tree_renders_get_module_with_params_and_ports() { + let value = serde_json::json!({ + "name": "axi_lite_to_apb", + "design": "test_design", + "file_path": "rtl/axi_lite_to_apb.sv", + "is_package": false, + "line_start": 5, + "line_end": 120, + "description": "AXI-lite to APB bridge", + "parameters": [ + {"name": "AddrWidth", "kind": "int", "default_value": "32", "is_type_param": false}, + {"name": "apb_req_t", "kind": "type", "default_value": "", "is_type_param": true} + ], + "ports": [ + {"name": "clk_i", "direction": "input", "type_str": "logic", "width_expr": "", "bit_width": 1}, + {"name": "req_i", "direction": "input", "type_str": "axi_lite_req_t", "width_expr": "", "bit_width": null} + ], + "instantiations": [ + {"module_name": "addr_decode", "instance_name": "i_dec"} + ], + "imports": [ + {"package_name": "avsbus_pkg", "is_wildcard": true, "specific_symbols": []} + ] + }); + let op = QueryOp::GetModule { + name: "axi_lite_to_apb".into(), + }; + let out = format_tree(&value, &op).unwrap(); + let expected = "\ +axi_lite_to_apb [test_design] axi_lite_to_apb.sv:5-120 + description: AXI-lite to APB bridge + parameters (2): + AddrWidth: int = 32 + apb_req_t: type + ports (2): + clk_i (input) width=1 type=logic + req_i (input) type=axi_lite_req_t + instantiations (1): + addr_decode (i_dec) + imports (1): avsbus_pkg +"; + assert_eq!(out, expected); + } + + #[test] + fn tree_renders_get_module_error_payload() { + let value = serde_json::json!({"error": "Module 'foo' not found"}); + let op = QueryOp::GetModule { name: "foo".into() }; + let out = format_tree(&value, &op).unwrap(); + assert_eq!(out, "Module 'foo' not found\n"); + } + + #[test] + fn tree_renders_get_ports_with_struct_breakdown() { + let value = serde_json::json!([ + {"name": "clk_i", "direction": "input", "type_str": "logic", "bit_width": 1}, + {"name": "req_i", "direction": "input", "type_str": "axi_req_t", "bit_width": null, "width_expr": ""} + ]); + let op = QueryOp::GetPorts { name: "x".into() }; + let out = format_tree(&value, &op).unwrap(); + assert_eq!( + out, + "clk_i (input) width=1 type=logic\n\ + req_i (input) type=axi_req_t\n", + ); + } + + #[test] + fn tree_renders_trace_parameter_with_affected_widths() { + let value = serde_json::json!({ + "module": "avsbus_controller", + "parameter": "AddrWidth", + "affected_instances": 1, + "instances": [{ + "parent": "avsbus_controller", + "child": "axi_lite_to_apb", + "instance": "u_axi_lite_to_apb", + "call_site_expression": "avsbus_controller_pkg::ADDR_WIDTH", + "resolved_value": "32'd32", + "affected_port_widths": { + "apb_req_o": {"total": 74, "fields": {"paddr": 32}} + }, + "parent_file_path": "rtl/avsbus_controller.sv", + "line_start": 334, + "line_end": 342 + }] + }); + let op = QueryOp::TraceParameter { + module_name: "avsbus_controller".into(), + param_name: "AddrWidth".into(), + recursive: false, + depth: 5, + }; + let out = format_tree(&value, &op).unwrap(); + let expected = "\ +avsbus_controller.AddrWidth → 1 propagations across 1 instance(s) + +└─ u_axi_lite_to_apb (avsbus_controller → axi_lite_to_apb) avsbus_controller.sv:334-342 + 1 parameter: + └─ ? ← avsbus_controller_pkg::ADDR_WIDTH + resolved: 32'd32 + +"; + assert_eq!(out, expected); + } + + #[test] + fn tree_renders_check_connectivity_findings() { + let value = serde_json::json!({ + "module": "top", + "depth": 2, + "issue_count": 1, + "findings": [{ + "kind": "width_mismatch", + "parent": "top", + "child": "leaf", + "instance": "u_leaf", + "port": "data_i", + "instance_width": 32, + "declared_width": 64, + "field_breakdown": {"hi": 16, "lo": 16} + }] + }); + let op = QueryOp::CheckConnectivity { + module_name: "top".into(), + depth: 2, + }; + let out = format_tree(&value, &op).unwrap(); + let expected = "\ +top @ depth=2: 1 issue(s) + - width_mismatch: top -> leaf (u_leaf).data_i instance=32 declared=64 + hi 16 + lo 16 +"; + assert_eq!(out, expected); + } + + #[test] + fn tree_renders_match_interfaces_pairs() { + let value = serde_json::json!({ + "module_a": "axi_master", + "module_b": "axi_slave", + "matched": [{ + "port": "valid", + "a_direction": "output", + "b_direction": "input", + "direction_complementary": true, + "a_width": 1, + "b_width": 1 + }, { + "port": "data", + "a_direction": "output", + "b_direction": "output", + "direction_complementary": false, + "a_width": 32, + "b_width": 32 + }], + "width_conflicts": [], + "unmatched_a": ["dbg_o"], + "unmatched_b": [] + }); + let op = QueryOp::MatchInterfaces { + module_a: "axi_master".into(), + module_b: "axi_slave".into(), + prefix_a: "".into(), + prefix_b: "".into(), + }; + let out = format_tree(&value, &op).unwrap(); + let expected = "\ +axi_master <-> axi_slave + matched (2): + valid a:output/1 <-> b:input/1 + data a:output/32 <-> b:output/32 (NOT COMPLEMENTARY) + unmatched in axi_master (1): dbg_o +"; + assert_eq!(out, expected); + } + + #[test] + fn tree_renders_snippet_string() { + let op = QueryOp::GetSourceSnippet { + module_name: "x".into(), + element: "module".into(), + instance_name: "".into(), + }; + let out = format_tree(&serde_json::json!("module x;\n"), &op).unwrap(); + assert_eq!(out, "module x;\n"); + } + + #[test] + fn tree_renders_find_structurally_similar_candidates() { + let value = serde_json::json!({ + "module": "axi_demux", + "candidates": [ + {"name": "axi_mux", "score": 0.75, "shared_ports": 6}, + {"name": "axi_xbar", "score": 0.4, "shared_ports": 3} + ] + }); + let op = QueryOp::FindStructurallySimilar { + module_name: "axi_demux".into(), + min_overlap: 0.3, + design: None, + }; + let out = format_tree(&value, &op).unwrap(); + let expected = "\ +axi_demux: structurally-similar candidates + axi_mux score=0.750 shared_ports=6 + axi_xbar score=0.400 shared_ports=3 +"; + assert_eq!(out, expected); + } +} diff --git a/src/sess.rs b/src/sess.rs index 43c76e65..c3ab1a11 100644 --- a/src/sess.rs +++ b/src/sess.rs @@ -453,7 +453,10 @@ impl<'ctx> Session<'ctx> { .map(|file| match *file { config::SourceFile::File(ref path) => { let ty = match path.extension().and_then(std::ffi::OsStr::to_str) { - Some("sv") | Some("v") | Some("vp") | Some("svh") => { + // `.vh` headers are de-facto SystemVerilog macro files; + // downstream tools (VCS, slang, verilator) parse them as part + // of the unit, so classify them as Verilog here too. + Some("sv") | Some("v") | Some("vp") | Some("svh") | Some("vh") => { Some(SourceType::Verilog) } Some("vhd") | Some("vhdl") => Some(SourceType::Vhdl), diff --git a/tests/pickle/src/struct_port.sv b/tests/pickle/src/struct_port.sv new file mode 100644 index 00000000..3d45eda0 --- /dev/null +++ b/tests/pickle/src/struct_port.sv @@ -0,0 +1,40 @@ +// Fixture for the kg integration test exercising the elab-time packed-struct +// port-width breakdown. `bus_top` is the elaboration root; `bus_consumer` +// receives a typedef'd packed struct port (`req_i`), a packed-array-of-structs +// port (`req_arr_i`), and returns one whose type nests another packed struct +// (`resp_o.nested_req`). + +package bus_pkg; + typedef struct packed { + logic [31:0] addr; + logic [2:0] prot; + logic valid; + } req_t; + + typedef struct packed { + logic [7:0] status; + req_t nested_req; + } resp_t; +endpackage + +module bus_consumer ( + input logic clk_i, + input bus_pkg::req_t req_i, + input bus_pkg::req_t [3:0] req_arr_i, + output bus_pkg::resp_t resp_o +); +endmodule + +module bus_top ( + input logic clk +); + bus_pkg::req_t the_req; + bus_pkg::req_t [3:0] the_req_arr; + bus_pkg::resp_t the_resp; + bus_consumer u_cons ( + .clk_i (clk), + .req_i (the_req), + .req_arr_i(the_req_arr), + .resp_o (the_resp) + ); +endmodule