diff --git a/blogpost/plots/showcase/javascript/knex.svg b/blogpost/plots/showcase/javascript/knex.svg index 994a946..aeb772c 100644 --- a/blogpost/plots/showcase/javascript/knex.svg +++ b/blogpost/plots/showcase/javascript/knex.svg @@ -24,7 +24,8 @@ - + + @@ -261,7 +262,7 @@ Is this specifier a node.js core module? - last commit: 2024-12-21 + ⚠ last commit: 2024-12-21 ★ 26 diff --git a/blogpost/plots/showcase/javascript/knex_mobile.svg b/blogpost/plots/showcase/javascript/knex_mobile.svg index 67b2456..b4639f4 100644 --- a/blogpost/plots/showcase/javascript/knex_mobile.svg +++ b/blogpost/plots/showcase/javascript/knex_mobile.svg @@ -24,7 +24,8 @@ - + + @@ -261,7 +262,7 @@ Is this specifier a node.js core module? - last commit: 2024-12-21 + ⚠ last commit: 2024-12-21 ★ 26 diff --git a/blogpost/plots/showcase/rust/diesel.svg b/blogpost/plots/showcase/rust/diesel.svg index 2248524..cf6518d 100644 --- a/blogpost/plots/showcase/rust/diesel.svg +++ b/blogpost/plots/showcase/rust/diesel.svg @@ -12,7 +12,7 @@ - + @@ -24,7 +24,7 @@ - + @@ -43,8 +43,8 @@ diesel - - diesel_derives + + diesel_derives @@ -83,8 +83,8 @@ quote - - syn + + syn diff --git a/blogpost/plots/showcase/rust/diesel_mobile.svg b/blogpost/plots/showcase/rust/diesel_mobile.svg index b57ee3b..705ce68 100644 --- a/blogpost/plots/showcase/rust/diesel_mobile.svg +++ b/blogpost/plots/showcase/rust/diesel_mobile.svg @@ -12,7 +12,7 @@ - + @@ -20,11 +20,11 @@ - - + + - + @@ -43,8 +43,8 @@ diesel - - diesel_derives + + diesel_derives @@ -71,8 +71,8 @@ heck - - ident_case + + ident_case @@ -83,8 +83,8 @@ quote - - syn + + syn diff --git a/examples/real/cobra.json b/examples/real/cobra.json index 174c3cc..4f52522 100644 --- a/examples/real/cobra.json +++ b/examples/real/cobra.json @@ -1,105 +1,107 @@ { "nodes": [ { - "id": "github.com/creack/pty", + "id": "golang.org/x/crypto", "meta": { - "version": "v1.1.24" + "version": "v0.46.0" } }, { - "id": "github.com/cpuguy83/go-md2man/v2", + "id": "golang.org/x/term", "meta": { - "version": "v2.0.7" + "version": "v0.38.0" } }, { - "id": "github.com/spf13/pflag", + "id": "github.com/rogpeppe/go-internal", "meta": { - "repo_archived": false, - "repo_language": "Go", - "repo_last_commit": "2025-12-16", - "repo_last_release": "2025-04-28", - "repo_maintainers": [ - "strib", - "patrickxb", - "chrisnojima", - "maxtaco", - "cjb" - ], - "repo_owner": "keybase", - "repo_stars": 9128, - "repo_topics": [ - "chat", - "end-to-end-encryption", - "go", - "kbfs", - "keybase", - "pgp", - "react", - "react-native" - ], - "repo_url": "https://github.com/keybase/client", - "version": "v1.0.10" + "version": "v1.14.1" } }, { - "id": "gopkg.in/check.v1", + "id": "golang.org/x/mod", "meta": { - "version": "v1.0.0-20201130134442-10cb98267c6c" + "version": "v0.31.0" } }, { - "id": "github.com/kr/text", + "id": "github.com/creack/pty", "meta": { - "version": "v0.2.0" + "version": "v1.1.24" } }, { - "id": "github.com/rogpeppe/go-internal", + "id": "github.com/google/go-cmp", "meta": { - "version": "v1.14.1" + "version": "v0.7.0" } }, { - "id": "golang.org/x/mod", + "id": "github.com/yuin/goldmark", "meta": { - "version": "v0.31.0" + "version": "v1.7.13" } }, { - "id": "golang.org/x/sync", + "id": "golang.org/x/tools", "meta": { - "version": "v0.19.0" + "version": "v0.40.0" } }, { - "id": "github.com/russross/blackfriday/v2", + "id": "github.com/cpuguy83/go-md2man/v2", "meta": { - "version": "v2.1.0" + "repo_archived": false, + "repo_language": "Go", + "repo_last_commit": "2025-12-22", + "repo_last_release": "2025-11-20", + "repo_maintainers": [ + "Roasbeef", + "guggero", + "yyforyongyu", + "cfromknecht", + "halseth" + ], + "repo_owner": "lightningnetwork", + "repo_stars": 8074, + "repo_topics": [ + "bitcoin", + "blockchain", + "cryptocurrency", + "cryptography", + "lightning", + "lightning-network", + "micropayments", + "payments", + "peer-to-peer", + "protocol" + ], + "repo_url": "https://github.com/lightningnetwork/lnd", + "version": "v2.0.7" } }, { - "id": "github.com/kr/pretty", + "id": "github.com/inconshreveable/mousetrap", "meta": { - "version": "v0.3.1" + "version": "v1.1.0" } }, { - "id": "golang.org/x/sys", + "id": "github.com/russross/blackfriday/v2", "meta": { - "version": "v0.39.0" + "version": "v2.1.0" } }, { - "id": "golang.org/x/tools", + "id": "gopkg.in/check.v1", "meta": { - "version": "v0.40.0" + "version": "v1.0.0-20201130134442-10cb98267c6c" } }, { - "id": "golang.org/x/crypto", + "id": "github.com/kr/pretty", "meta": { - "version": "v0.46.0" + "version": "v0.3.1" } }, { @@ -109,18 +111,30 @@ } }, { - "id": "github.com/inconshreveable/mousetrap", + "id": "github.com/kr/text", "meta": { - "version": "v1.1.0" + "version": "v0.2.0" } }, { - "id": "go.yaml.in/yaml/v3", + "id": "golang.org/x/sys", + "meta": { + "version": "v0.39.0" + } + }, + { + "id": "golang.org/x/telemetry", + "meta": { + "version": "v0.0.0-20251222180846-3f2a21fb04ff" + } + }, + { + "id": "github.com/spf13/cobra", "meta": { "repo_archived": false, "repo_language": "TypeScript", - "repo_last_commit": "2025-12-16", - "repo_last_release": "2025-11-19", + "repo_last_commit": "2025-12-22", + "repo_last_release": "2025-12-16", "repo_maintainers": [ "torkelo", "bergquist", @@ -128,7 +142,7 @@ "marefr" ], "repo_owner": "grafana", - "repo_stars": 71280, + "repo_stars": 71404, "repo_topics": [ "alerting", "analytics", @@ -147,67 +161,57 @@ "prometheus" ], "repo_url": "https://github.com/grafana/grafana", - "version": "v3.0.4" - } - }, - { - "id": "github.com/yuin/goldmark", - "meta": { - "version": "v1.7.13" - } - }, - { - "id": "golang.org/x/net", - "meta": { - "version": "v0.48.0" - } - }, - { - "id": "golang.org/x/telemetry", - "meta": { - "version": "v0.0.0-20251215142616-e75fd47794af" + "version": "v1.10.2" } }, { - "id": "github.com/spf13/cobra", + "id": "github.com/spf13/pflag", "meta": { "repo_archived": false, "repo_language": "Go", - "repo_last_commit": "2025-12-16", - "repo_last_release": "2025-04-28", + "repo_last_commit": "2025-12-22", + "repo_last_release": "2025-11-20", "repo_maintainers": [ - "strib", - "patrickxb", - "chrisnojima", - "maxtaco", - "cjb" + "Roasbeef", + "guggero", + "yyforyongyu", + "cfromknecht", + "halseth" ], - "repo_owner": "keybase", - "repo_stars": 9128, + "repo_owner": "lightningnetwork", + "repo_stars": 8074, "repo_topics": [ - "chat", - "end-to-end-encryption", - "go", - "kbfs", - "keybase", - "pgp", - "react", - "react-native" + "bitcoin", + "blockchain", + "cryptocurrency", + "cryptography", + "lightning", + "lightning-network", + "micropayments", + "payments", + "peer-to-peer", + "protocol" ], - "repo_url": "https://github.com/keybase/client", - "version": "v1.10.2" + "repo_url": "https://github.com/lightningnetwork/lnd", + "version": "v1.0.10" } }, { - "id": "github.com/google/go-cmp", + "id": "go.yaml.in/yaml/v3", "meta": { - "version": "v0.7.0" + "version": "v3.0.4" } }, { - "id": "golang.org/x/term", + "id": "golang.org/x/net", "meta": { - "version": "v0.38.0" + "version": "v0.48.0" + } + }, + { + "id": "golang.org/x/sync", + "meta": { + "version": "v0.19.0" } } ], @@ -228,14 +232,14 @@ "from": "github.com/spf13/cobra", "to": "go.yaml.in/yaml/v3" }, - { - "from": "go.yaml.in/yaml/v3", - "to": "gopkg.in/check.v1" - }, { "from": "github.com/cpuguy83/go-md2man/v2", "to": "github.com/russross/blackfriday/v2" }, + { + "from": "go.yaml.in/yaml/v3", + "to": "gopkg.in/check.v1" + }, { "from": "gopkg.in/check.v1", "to": "github.com/kr/pretty" @@ -264,10 +268,6 @@ "from": "github.com/kr/text", "to": "github.com/creack/pty" }, - { - "from": "golang.org/x/mod", - "to": "golang.org/x/tools" - }, { "from": "golang.org/x/tools", "to": "github.com/google/go-cmp" @@ -292,6 +292,22 @@ "from": "golang.org/x/tools", "to": "golang.org/x/telemetry" }, + { + "from": "golang.org/x/mod", + "to": "golang.org/x/tools" + }, + { + "from": "golang.org/x/telemetry", + "to": "golang.org/x/mod" + }, + { + "from": "golang.org/x/telemetry", + "to": "golang.org/x/sync" + }, + { + "from": "golang.org/x/telemetry", + "to": "golang.org/x/sys" + }, { "from": "golang.org/x/net", "to": "golang.org/x/crypto" @@ -309,20 +325,8 @@ "to": "golang.org/x/text" }, { - "from": "golang.org/x/telemetry", - "to": "golang.org/x/mod" - }, - { - "from": "golang.org/x/telemetry", - "to": "golang.org/x/sync" - }, - { - "from": "golang.org/x/telemetry", - "to": "golang.org/x/sys" - }, - { - "from": "golang.org/x/term", - "to": "golang.org/x/sys" + "from": "golang.org/x/text", + "to": "golang.org/x/tools" }, { "from": "golang.org/x/crypto", @@ -337,8 +341,8 @@ "to": "golang.org/x/term" }, { - "from": "golang.org/x/text", - "to": "golang.org/x/tools" + "from": "golang.org/x/term", + "to": "golang.org/x/sys" } ] } diff --git a/examples/real/com.google.guava_guava.json b/examples/real/com.google.guava_guava.json index 94c1e95..73aa99b 100644 --- a/examples/real/com.google.guava_guava.json +++ b/examples/real/com.google.guava_guava.json @@ -1,49 +1,11 @@ { "nodes": [ - { - "id": "com.google.errorprone:error_prone_annotations", - "meta": { - "repo_archived": false, - "repo_language": "Java", - "repo_last_commit": "2025-12-15", - "repo_maintainers": [ - "normanmaurer", - "trustin", - "Scottmitch", - "netty-project-bot", - "chrisvest" - ], - "repo_owner": "netty", - "repo_stars": 34645, - "repo_url": "https://github.com/netty/netty", - "version": "2.39.0" - } - }, - { - "id": "com.google.j2objc:j2objc-annotations", - "meta": { - "repo_archived": false, - "repo_language": "Java", - "repo_last_commit": "2025-12-15", - "repo_maintainers": [ - "normanmaurer", - "trustin", - "Scottmitch", - "netty-project-bot", - "chrisvest" - ], - "repo_owner": "netty", - "repo_stars": 34645, - "repo_url": "https://github.com/netty/netty", - "version": "3.0.0" - } - }, { "id": "com.google.guava:guava", "meta": { "repo_archived": false, "repo_language": "Java", - "repo_last_commit": "2025-09-24", + "repo_last_commit": "2025-12-18", "repo_last_release": "2025-06-26", "repo_maintainers": [ "Randgalt", @@ -53,7 +15,7 @@ "kezhuw" ], "repo_owner": "apache", - "repo_stars": 3168, + "repo_stars": 3165, "repo_topics": [ "consensus", "curator", @@ -70,7 +32,7 @@ "meta": { "repo_archived": false, "repo_language": "Scala", - "repo_last_commit": "2025-12-16", + "repo_last_commit": "2025-12-23", "repo_maintainers": [ "dongjoon-hyun", "HyukjinKwon", @@ -79,7 +41,7 @@ "rxin" ], "repo_owner": "apache", - "repo_stars": 42497, + "repo_stars": 42526, "repo_topics": [ "big-data", "java", @@ -99,8 +61,8 @@ "meta": { "repo_archived": false, "repo_language": "Java", - "repo_last_commit": "2025-12-15", - "repo_last_release": "2025-12-10", + "repo_last_commit": "2025-12-22", + "repo_last_release": "2025-12-17", "repo_maintainers": [ "gsmet", "geoand", @@ -108,7 +70,7 @@ "gastaldi" ], "repo_owner": "quarkusio", - "repo_stars": 15345, + "repo_stars": 15356, "repo_topics": [ "cloud-native", "hacktoberfest", @@ -125,7 +87,7 @@ "meta": { "repo_archived": false, "repo_language": "Java", - "repo_last_commit": "2025-12-15", + "repo_last_commit": "2025-12-22", "repo_last_release": "2025-09-16", "repo_maintainers": [ "codahale", @@ -134,7 +96,7 @@ "jplock" ], "repo_owner": "dropwizard", - "repo_stars": 8576, + "repo_stars": 8577, "repo_topics": [ "dropwizard", "hibernate", @@ -151,6 +113,44 @@ "repo_url": "https://github.com/dropwizard/dropwizard", "version": "1.0.0" } + }, + { + "id": "com.google.errorprone:error_prone_annotations", + "meta": { + "repo_archived": false, + "repo_language": "Java", + "repo_last_commit": "2025-12-19", + "repo_maintainers": [ + "normanmaurer", + "trustin", + "Scottmitch", + "netty-project-bot", + "chrisvest" + ], + "repo_owner": "netty", + "repo_stars": 34658, + "repo_url": "https://github.com/netty/netty", + "version": "2.39.0" + } + }, + { + "id": "com.google.j2objc:j2objc-annotations", + "meta": { + "repo_archived": false, + "repo_language": "Java", + "repo_last_commit": "2025-12-19", + "repo_maintainers": [ + "normanmaurer", + "trustin", + "Scottmitch", + "netty-project-bot", + "chrisvest" + ], + "repo_owner": "netty", + "repo_stars": 34658, + "repo_url": "https://github.com/netty/netty", + "version": "3.0.0" + } } ], "edges": [ diff --git a/examples/real/console.json b/examples/real/console.json index accf9c5..ed1eb03 100644 --- a/examples/real/console.json +++ b/examples/real/console.json @@ -1,161 +1,174 @@ { "nodes": [ { - "id": "symfony/deprecation-contracts", + "id": "symfony/polyfill-mbstring", "meta": { "author": "Nicolas Grekas", - "description": "A generic function and convention to trigger deprecation notices", + "description": "Symfony polyfill for the Mbstring extension", "license": "MIT", "repo_archived": false, "repo_language": "PHP", - "repo_last_commit": "2025-05-25", - "repo_last_release": "2020-09-08", + "repo_last_commit": "2025-08-19", "repo_maintainers": [ "nicolas-grekas", "fabpot", - "derrabus", - "xabbuh", - "cedric-anne" + "stof", + "IonBazan", + "keradus" ], "repo_owner": "symfony", - "repo_stars": 2108, + "repo_stars": 7859, "repo_topics": [ - "contract", - "deprecation", - "php", + "compatibility", + "component", + "javascript", + "mbstring", + "polyfill", + "portable", + "shim", "symfony", - "symfony-contract" + "symfony-component", + "symfony-polyfill" ], - "repo_url": "https://github.com/symfony/deprecation-contracts", - "version": "v3.6.0" + "repo_url": "https://github.com/symfony/polyfill-mbstring", + "version": "v1.33.0" } }, { - "id": "psr/container", + "id": "symfony/service-contracts", "meta": { - "author": "PHP-FIG", - "description": "Common Container Interface (PHP FIG PSR-11)", + "author": "Nicolas Grekas", + "description": "Generic abstractions related to writing services", "license": "MIT", "repo_archived": false, "repo_language": "PHP", - "repo_last_commit": "2024-02-05", - "repo_last_release": "2021-11-05", + "repo_last_commit": "2025-12-13", + "repo_last_release": "2020-09-08", "repo_maintainers": [ - "mnapoli", - "moufmouf", - "Ocramius", - "weierophinney", - "Jean85" + "nicolas-grekas", + "fabpot", + "derrabus", + "kbond", + "xabbuh" ], - "repo_owner": "php-fig", - "repo_stars": 10022, - "repo_url": "https://github.com/php-fig/container", - "version": "2.0.2" + "repo_owner": "symfony", + "repo_stars": 2630, + "repo_topics": [ + "contract", + "php", + "service", + "symfony", + "symfony-contract" + ], + "repo_url": "https://github.com/symfony/service-contracts", + "version": "v3.6.1" } }, { - "id": "symfony/polyfill-intl-grapheme", + "id": "symfony/string", "meta": { "author": "Nicolas Grekas", - "description": "Symfony polyfill for intl's grapheme_* functions", + "description": "Provides an object-oriented API to strings and deals with bytes, UTF-8 code points and grapheme clusters in a unified way", "license": "MIT", "repo_archived": false, "repo_language": "PHP", - "repo_last_commit": "2025-08-19", + "repo_last_commit": "2025-12-19", + "repo_last_release": "2025-12-07", "repo_maintainers": [ "nicolas-grekas", "fabpot", - "gharlan", - "Ayesh", - "azjezz" + "derrabus", + "xabbuh", + "fancyweb" ], "repo_owner": "symfony", - "repo_stars": 1733, + "repo_stars": 1781, "repo_topics": [ - "compatibility", "component", "grapheme", - "intl", - "javascript", - "polyfill", - "portable", - "shim", + "i18n", + "php", + "string", "symfony", "symfony-component", - "symfony-polyfill" + "unicode", + "utf-8", + "utf8" ], - "repo_url": "https://github.com/symfony/polyfill-intl-grapheme", - "version": "v1.33.0" + "repo_url": "https://github.com/symfony/string", + "version": "v8.0.1" } }, { - "id": "symfony/service-contracts", + "id": "symfony/deprecation-contracts", "meta": { "author": "Nicolas Grekas", - "description": "Generic abstractions related to writing services", + "description": "A generic function and convention to trigger deprecation notices", "license": "MIT", "repo_archived": false, "repo_language": "PHP", - "repo_last_commit": "2025-12-13", + "repo_last_commit": "2025-05-25", "repo_last_release": "2020-09-08", "repo_maintainers": [ "nicolas-grekas", "fabpot", "derrabus", - "kbond", - "xabbuh" + "xabbuh", + "cedric-anne" ], "repo_owner": "symfony", - "repo_stars": 2630, + "repo_stars": 2108, "repo_topics": [ "contract", + "deprecation", "php", - "service", "symfony", "symfony-contract" ], - "repo_url": "https://github.com/symfony/service-contracts", - "version": "v3.6.1" + "repo_url": "https://github.com/symfony/deprecation-contracts", + "version": "v3.6.0" } }, { - "id": "symfony/polyfill-ctype", + "id": "symfony/polyfill-intl-normalizer", "meta": { - "author": "Gert de Pagter", - "description": "Symfony polyfill for ctype functions", + "author": "Nicolas Grekas", + "description": "Symfony polyfill for intl's Normalizer class and related functions", "license": "MIT", "repo_archived": false, "repo_language": "PHP", "repo_last_commit": "2025-08-19", "repo_maintainers": [ "nicolas-grekas", - "BackEndTea", "fabpot", - "cedric-anne", - "GrahamCampbell" + "derrabus", + "stof", + "DavidPrevot" ], "repo_owner": "symfony", - "repo_stars": 4053, + "repo_stars": 2071, "repo_topics": [ "compatibility", "component", - "ctype", + "intl", "javascript", + "normalizer", "polyfill", "portable", + "shim", "symfony", "symfony-component", "symfony-polyfill" ], - "repo_url": "https://github.com/symfony/polyfill-ctype", + "repo_url": "https://github.com/symfony/polyfill-intl-normalizer", "version": "v1.33.0" } }, { - "id": "symfony/polyfill-intl-normalizer", + "id": "symfony/polyfill-intl-grapheme", "meta": { "author": "Nicolas Grekas", - "description": "Symfony polyfill for intl's Normalizer class and related functions", + "description": "Symfony polyfill for intl's grapheme_* functions", "license": "MIT", "repo_archived": false, "repo_language": "PHP", @@ -163,18 +176,18 @@ "repo_maintainers": [ "nicolas-grekas", "fabpot", - "derrabus", - "stof", - "DavidPrevot" + "gharlan", + "Ayesh", + "azjezz" ], "repo_owner": "symfony", - "repo_stars": 2071, + "repo_stars": 1733, "repo_topics": [ "compatibility", "component", + "grapheme", "intl", "javascript", - "normalizer", "polyfill", "portable", "shim", @@ -182,149 +195,136 @@ "symfony-component", "symfony-polyfill" ], - "repo_url": "https://github.com/symfony/polyfill-intl-normalizer", + "repo_url": "https://github.com/symfony/polyfill-intl-grapheme", "version": "v1.33.0" } }, { - "id": "symfony/console", + "id": "psr/container", "meta": { - "author": "Fabien Potencier", - "description": "Eases the creation of beautiful and testable command line interfaces", + "author": "PHP-FIG", + "description": "Common Container Interface (PHP FIG PSR-11)", "license": "MIT", "repo_archived": false, "repo_language": "PHP", - "repo_last_commit": "2025-12-14", - "repo_last_release": "2025-12-07", + "repo_last_commit": "2024-02-05", + "repo_last_release": "2021-11-05", "repo_maintainers": [ - "fabpot", - "nicolas-grekas", - "xabbuh", - "derrabus", - "chalasr" - ], - "repo_owner": "symfony", - "repo_stars": 9834, - "repo_topics": [ - "cli", - "command-line", - "component", - "console", - "php", - "symfony", - "symfony-component", - "terminal" + "mnapoli", + "moufmouf", + "Ocramius", + "weierophinney", + "Jean85" ], - "repo_url": "https://github.com/symfony/console", - "version": "v8.0.1" + "repo_owner": "php-fig", + "repo_stars": 10023, + "repo_url": "https://github.com/php-fig/container", + "version": "2.0.2" } }, { - "id": "symfony/string", + "id": "symfony/polyfill-ctype", "meta": { - "author": "Nicolas Grekas", - "description": "Provides an object-oriented API to strings and deals with bytes, UTF-8 code points and grapheme clusters in a unified way", + "author": "Gert de Pagter", + "description": "Symfony polyfill for ctype functions", "license": "MIT", "repo_archived": false, "repo_language": "PHP", - "repo_last_commit": "2025-12-07", - "repo_last_release": "2025-12-07", + "repo_last_commit": "2025-08-19", "repo_maintainers": [ "nicolas-grekas", + "BackEndTea", "fabpot", - "derrabus", - "xabbuh", - "fancyweb" + "cedric-anne", + "GrahamCampbell" ], "repo_owner": "symfony", - "repo_stars": 1781, + "repo_stars": 4053, "repo_topics": [ + "compatibility", "component", - "grapheme", - "i18n", - "php", - "string", + "ctype", + "javascript", + "polyfill", + "portable", "symfony", "symfony-component", - "unicode", - "utf-8", - "utf8" + "symfony-polyfill" ], - "repo_url": "https://github.com/symfony/string", - "version": "v8.0.1" + "repo_url": "https://github.com/symfony/polyfill-ctype", + "version": "v1.33.0" } }, { - "id": "symfony/polyfill-mbstring", + "id": "symfony/console", "meta": { - "author": "Nicolas Grekas", - "description": "Symfony polyfill for the Mbstring extension", + "author": "Fabien Potencier", + "description": "Eases the creation of beautiful and testable command line interfaces", "license": "MIT", "repo_archived": false, "repo_language": "PHP", - "repo_last_commit": "2025-08-19", + "repo_last_commit": "2025-12-22", + "repo_last_release": "2025-12-07", "repo_maintainers": [ - "nicolas-grekas", "fabpot", - "stof", - "IonBazan", - "keradus" + "nicolas-grekas", + "xabbuh", + "derrabus", + "chalasr" ], "repo_owner": "symfony", - "repo_stars": 7859, + "repo_stars": 9834, "repo_topics": [ - "compatibility", + "cli", + "command-line", "component", - "javascript", - "mbstring", - "polyfill", - "portable", - "shim", + "console", + "php", "symfony", "symfony-component", - "symfony-polyfill" + "terminal" ], - "repo_url": "https://github.com/symfony/polyfill-mbstring", - "version": "v1.33.0" + "repo_url": "https://github.com/symfony/console", + "version": "v8.0.1" } } ], "edges": [ { "from": "symfony/console", - "to": "symfony/service-contracts" + "to": "symfony/polyfill-mbstring" }, { "from": "symfony/console", - "to": "symfony/string" + "to": "symfony/service-contracts" }, { "from": "symfony/console", - "to": "symfony/polyfill-mbstring" + "to": "symfony/string" }, { "from": "symfony/service-contracts", - "to": "symfony/deprecation-contracts" + "to": "psr/container" }, { "from": "symfony/service-contracts", - "to": "psr/container" + "to": "symfony/deprecation-contracts" }, { "from": "symfony/string", - "to": "symfony/polyfill-ctype" + "to": "symfony/polyfill-intl-normalizer" }, { "from": "symfony/string", - "to": "symfony/polyfill-intl-grapheme" + "to": "symfony/polyfill-mbstring" }, { "from": "symfony/string", - "to": "symfony/polyfill-intl-normalizer" + "to": "symfony/polyfill-ctype" }, { "from": "symfony/string", - "to": "symfony/polyfill-mbstring" + "to": "symfony/polyfill-intl-grapheme" } ] } diff --git a/examples/real/flask.json b/examples/real/flask.json index 49f6159..98e303e 100644 --- a/examples/real/flask.json +++ b/examples/real/flask.json @@ -1,5 +1,52 @@ { "nodes": [ + { + "id": "importlib-metadata", + "meta": { + "description": "Read metadata from Python packages", + "repo_archived": false, + "repo_language": "Python", + "repo_last_commit": "2025-12-21", + "repo_last_release": "2025-12-21", + "repo_maintainers": [ + "jaraco", + "warsaw", + "abravalheri", + "jherland", + "sjma3" + ], + "repo_owner": "python", + "repo_stars": 138, + "repo_url": "https://github.com/python/importlib_metadata", + "version": "8.7.1" + } + }, + { + "id": "itsdangerous", + "meta": { + "description": "Safely pass data to untrusted environments and back.", + "repo_archived": false, + "repo_language": "Python", + "repo_last_commit": "2025-06-14", + "repo_last_release": "2024-04-16", + "repo_maintainers": [ + "davidism", + "mitsuhiko" + ], + "repo_owner": "pallets", + "repo_stars": 3093, + "repo_topics": [ + "hmac", + "itsdangerous", + "pallets", + "python", + "security", + "serialization" + ], + "repo_url": "https://github.com/pallets/itsdangerous", + "version": "2.2.0" + } + }, { "id": "werkzeug", "meta": { @@ -15,7 +62,7 @@ "DasIch" ], "repo_owner": "pallets", - "repo_stars": 6824, + "repo_stars": 6825, "repo_topics": [ "http", "pallets", @@ -42,11 +89,38 @@ "jdufresne" ], "repo_owner": "tartley", - "repo_stars": 3756, + "repo_stars": 3758, "repo_url": "https://github.com/tartley/colorama", "version": "0.4.6" } }, + { + "id": "jinja2", + "meta": { + "description": "A very fast and expressive template engine.", + "repo_archived": false, + "repo_language": "Python", + "repo_last_commit": "2025-06-14", + "repo_last_release": "2025-03-05", + "repo_maintainers": [ + "mitsuhiko", + "davidism", + "untitaker" + ], + "repo_owner": "pallets", + "repo_stars": 11342, + "repo_topics": [ + "jinja", + "jinja2", + "pallets", + "python", + "template-engine", + "templates" + ], + "repo_url": "https://github.com/pallets/jinja", + "version": "3.1.6" + } + }, { "id": "markupsafe", "meta": { @@ -60,7 +134,7 @@ "mitsuhiko" ], "repo_owner": "pallets", - "repo_stars": 680, + "repo_stars": 682, "repo_topics": [ "html", "html-escape", @@ -74,12 +148,31 @@ "version": "3.0.3" } }, + { + "id": "zipp", + "meta": { + "description": "Backport of pathlib-compatible object wrapper for zip files", + "repo_archived": false, + "repo_language": "Python", + "repo_last_commit": "2025-10-19", + "repo_last_release": "2025-06-08", + "repo_maintainers": [ + "jaraco", + "DimitriPapadopoulos", + "layday", + "Avasam", + "barneygale" + ], + "repo_owner": "jaraco", + "repo_stars": 67, + "repo_url": "https://github.com/jaraco/zipp", + "version": "3.23.0" + } + }, { "id": "flask", "meta": { - "author": "Armin Ronacher", - "description": "A micro web framework", - "license": "BSD-3-Clause", + "description": "A simple framework for building complex web applications.", "repo_archived": false, "repo_language": "Python", "repo_last_commit": "2025-11-28", @@ -92,7 +185,7 @@ "greyli" ], "repo_owner": "pallets", - "repo_stars": 70924, + "repo_stars": 70955, "repo_topics": [ "flask", "jinja", @@ -103,7 +196,33 @@ "wsgi" ], "repo_url": "https://github.com/pallets/flask", - "version": "2.0.0" + "version": "3.1.2" + } + }, + { + "id": "blinker", + "meta": { + "author": "Jason Kirtland", + "description": "Fast, simple object-to-object and broadcast signaling", + "repo_archived": false, + "repo_language": "Python", + "repo_last_commit": "2025-11-19", + "repo_last_release": "2024-11-08", + "repo_maintainers": [ + "davidism", + "jek", + "Secrus", + "pgjones" + ], + "repo_owner": "pallets-eco", + "repo_stars": 2008, + "repo_topics": [ + "blinker", + "python", + "signals" + ], + "repo_url": "https://github.com/pallets-eco/blinker", + "version": "1.9.0" } }, { @@ -121,7 +240,7 @@ "untitaker" ], "repo_owner": "pallets", - "repo_stars": 17060, + "repo_stars": 17074, "repo_topics": [ "cli", "click", @@ -134,21 +253,49 @@ } ], "edges": [ + { + "from": "flask", + "to": "blinker" + }, { "from": "flask", "to": "click" }, + { + "from": "flask", + "to": "importlib-metadata" + }, + { + "from": "flask", + "to": "itsdangerous" + }, + { + "from": "flask", + "to": "jinja2" + }, + { + "from": "flask", + "to": "markupsafe" + }, { "from": "flask", "to": "werkzeug" }, + { + "from": "werkzeug", + "to": "markupsafe" + }, + { + "from": "jinja2", + "to": "markupsafe" + }, { "from": "click", "to": "colorama" }, { - "from": "werkzeug", - "to": "markupsafe" + "from": "importlib-metadata", + "to": "zipp" } ] } diff --git a/examples/real/openai.json b/examples/real/openai.json index 4de5ff7..35215c3 100644 --- a/examples/real/openai.json +++ b/examples/real/openai.json @@ -1,170 +1,204 @@ { "nodes": [ { - "id": "tqdm", + "id": "h11", "meta": { - "description": "Fast, Extensible Progress Meter", - "license": "MPL-2.0 AND MIT", + "author": "Nathaniel J. Smith", + "description": "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1", + "license": "MIT", "repo_archived": false, "repo_language": "Python", - "repo_last_commit": "2025-05-22", - "repo_last_release": "2024-11-27", + "repo_last_commit": "2025-04-24", "repo_maintainers": [ - "casperdcl", - "lrq3000", - "altendky", - "hadim", - "richardsheridan" + "njsmith", + "pgjones", + "Lukasa", + "cdeler", + "lovelydinosaur" ], - "repo_owner": "tqdm", - "repo_stars": 30778, + "repo_owner": "python-hyper", + "repo_stars": 538, "repo_topics": [ - "cli", - "closember", - "console", - "discord", - "gui", - "jupyter", - "keras", - "meter", - "pandas", - "parallel", - "progress", - "progress-bar", - "progressbar", - "progressmeter", + "http", "python", - "rate", - "telegram", - "terminal", - "time", - "utilities" + "sans-io" ], - "repo_url": "https://github.com/tqdm/tqdm", - "version": "4.67.1" + "repo_url": "https://github.com/python-hyper/h11", + "version": "0.16.0" } }, { - "id": "typing-extensions", + "id": "exceptiongroup", "meta": { - "description": "Backported and Experimental Type Hints for Python 3.9+", + "description": "Backport of PEP 654 (exception groups)", "repo_archived": false, "repo_language": "Python", - "repo_last_commit": "2025-12-02", - "repo_last_release": "2025-08-25", + "repo_last_commit": "2025-11-21", + "repo_last_release": "2025-11-21", "repo_maintainers": [ - "JelleZijlstra", - "AlexWaygood", - "srittau", - "brianschubert", - "Daraan" + "agronholm", + "jakkdl", + "Zac-HD", + "cfbolz" ], - "repo_owner": "python", - "repo_stars": 541, - "repo_url": "https://github.com/python/typing_extensions", - "version": "4.15.0" + "repo_owner": "agronholm", + "repo_stars": 48, + "repo_url": "https://github.com/agronholm/exceptiongroup", + "version": "1.3.1" } }, { - "id": "idna", + "id": "httpx", "meta": { - "description": "Internationalized Domain Names in Applications (IDNA)", + "description": "The next generation HTTP client.", + "license": "BSD-3-Clause", "repo_archived": false, "repo_language": "Python", - "repo_last_commit": "2025-10-20", - "repo_last_release": "2025-10-12", + "repo_last_commit": "2025-12-17", + "repo_last_release": "2024-12-06", "repo_maintainers": [ - "kjd", - "hugovk", - "jdufresne", - "jribbens", - "slingamn" + "lovelydinosaur", + "florimondmanca", + "sethmlarson", + "karpetrosyan" ], - "repo_owner": "kjd", - "repo_stars": 273, + "repo_owner": "encode", + "repo_stars": 14857, "repo_topics": [ - "dns", - "hacktoberfest", - "idna", + "asyncio", + "http", "python", - "unicode" + "trio" ], - "repo_url": "https://github.com/kjd/idna", - "version": "3.11" + "repo_url": "https://github.com/encode/httpx", + "version": "0.28.1" } }, { - "id": "httpcore", + "id": "jiter", "meta": { - "description": "A minimal low-level HTTP client.", + "description": "Fast iterable JSON parser.", "repo_archived": false, - "repo_language": "Python", - "repo_last_commit": "2025-12-06", - "repo_last_release": "2025-04-24", + "repo_language": "Rust", + "repo_last_commit": "2025-11-09", + "repo_last_release": "2025-11-09", "repo_maintainers": [ - "lovelydinosaur", - "florimondmanca", - "karpetrosyan", - "cdeler" + "samuelcolvin", + "davidhewitt", + "Viicos", + "jessekrubin", + "msimacek" ], - "repo_owner": "encode", - "repo_stars": 526, - "repo_url": "https://github.com/encode/httpcore", - "version": "1.0.9" + "repo_owner": "pydantic", + "repo_stars": 481, + "repo_topics": [ + "json", + "json-parser", + "pydantic", + "rust" + ], + "repo_url": "https://github.com/pydantic/jiter", + "version": "0.12.0" } }, { - "id": "h11", + "id": "pydantic-core", "meta": { - "author": "Nathaniel J. Smith", - "description": "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1", - "license": "MIT", + "description": "Core functionality for Pydantic validation and serialization", "repo_archived": false, "repo_language": "Python", - "repo_last_commit": "2025-04-24", + "repo_last_commit": "2025-11-10", + "repo_last_release": "2025-11-04", "repo_maintainers": [ - "njsmith", - "pgjones", - "Lukasa", - "cdeler", - "lovelydinosaur" + "samuelcolvin", + "davidhewitt", + "adriangb", + "sydney-runkle" ], - "repo_owner": "python-hyper", - "repo_stars": 534, + "repo_owner": "pydantic", + "repo_stars": 1739, "repo_topics": [ - "http", - "python", - "sans-io" + "json-schema", + "parsing", + "pydantic", + "rust", + "schema", + "validation" ], - "repo_url": "https://github.com/python-hyper/h11", - "version": "0.16.0" + "repo_url": "https://github.com/pydantic/pydantic-core", + "version": "2.41.5" } }, { - "id": "httpx", + "id": "typing-inspection", "meta": { - "description": "The next generation HTTP client.", - "license": "BSD-3-Clause", + "description": "Runtime typing introspection tools", "repo_archived": false, "repo_language": "Python", - "repo_last_commit": "2025-12-10", - "repo_last_release": "2024-12-06", + "repo_last_commit": "2025-10-01", + "repo_last_release": "2025-10-01", "repo_maintainers": [ - "lovelydinosaur", - "florimondmanca", - "sethmlarson", - "karpetrosyan" + "Viicos", + "samuelcolvin", + "cdce8p" ], - "repo_owner": "encode", - "repo_stars": 14844, + "repo_owner": "pydantic", + "repo_stars": 57, + "repo_url": "https://github.com/pydantic/typing-inspection", + "version": "0.4.2" + } + }, + { + "id": "anyio", + "meta": { + "description": "High-level concurrency and networking framework on top of asyncio or Trio", + "repo_archived": false, + "repo_language": "Python", + "repo_last_commit": "2025-12-22", + "repo_last_release": "2025-11-30", + "repo_maintainers": [ + "agronholm", + "graingert", + "gschaffner", + "davidbrochart" + ], + "repo_owner": "agronholm", + "repo_stars": 2331, "repo_topics": [ + "async-await", "asyncio", - "http", - "python", "trio" ], - "repo_url": "https://github.com/encode/httpx", - "version": "0.28.1" + "repo_url": "https://github.com/agronholm/anyio", + "version": "4.12.0" + } + }, + { + "id": "idna", + "meta": { + "description": "Internationalized Domain Names in Applications (IDNA)", + "repo_archived": false, + "repo_language": "Python", + "repo_last_commit": "2025-10-20", + "repo_last_release": "2025-10-12", + "repo_maintainers": [ + "kjd", + "hugovk", + "jdufresne", + "jribbens", + "slingamn" + ], + "repo_owner": "kjd", + "repo_stars": 273, + "repo_topics": [ + "dns", + "hacktoberfest", + "idna", + "python", + "unicode" + ], + "repo_url": "https://github.com/kjd/idna", + "version": "3.11" } }, { @@ -175,85 +209,56 @@ "license": "MPL-2.0", "repo_archived": false, "repo_language": "Python", - "repo_last_commit": "2025-12-15", + "repo_last_commit": "2025-12-22", "repo_maintainers": [ "Lukasa", "alex", "sigmavirus24" ], "repo_owner": "certifi", - "repo_stars": 942, + "repo_stars": 943, "repo_url": "https://github.com/certifi/python-certifi", "version": "2025.11.12" } }, { - "id": "colorama", - "meta": { - "description": "Cross-platform colored terminal text.", - "repo_archived": false, - "repo_language": "Python", - "repo_last_commit": "2025-07-09", - "repo_maintainers": [ - "tartley", - "wiggin15", - "hugovk", - "njsmith", - "jdufresne" - ], - "repo_owner": "tartley", - "repo_stars": 3756, - "repo_url": "https://github.com/tartley/colorama", - "version": "0.4.6" - } - }, - { - "id": "annotated-types", + "id": "httpcore", "meta": { - "description": "Reusable constraint types to use with typing.Annotated", + "description": "A minimal low-level HTTP client.", "repo_archived": false, "repo_language": "Python", - "repo_last_commit": "2025-11-10", - "repo_last_release": "2024-05-20", + "repo_last_commit": "2025-12-06", + "repo_last_release": "2025-04-24", "repo_maintainers": [ - "adriangb", - "samuelcolvin", - "Zac-HD", - "hugovk", - "Viicos" + "lovelydinosaur", + "florimondmanca", + "karpetrosyan", + "cdeler" ], - "repo_owner": "annotated-types", - "repo_stars": 584, - "repo_url": "https://github.com/annotated-types/annotated-types", - "version": "0.7.0" + "repo_owner": "encode", + "repo_stars": 527, + "repo_url": "https://github.com/encode/httpcore", + "version": "1.0.9" } }, { - "id": "pydantic-core", + "id": "colorama", "meta": { - "description": "Core functionality for Pydantic validation and serialization", + "description": "Cross-platform colored terminal text.", "repo_archived": false, "repo_language": "Python", - "repo_last_commit": "2025-11-10", - "repo_last_release": "2025-11-04", + "repo_last_commit": "2025-07-09", "repo_maintainers": [ - "samuelcolvin", - "davidhewitt", - "adriangb", - "sydney-runkle" - ], - "repo_owner": "pydantic", - "repo_stars": 1741, - "repo_topics": [ - "json-schema", - "parsing", - "pydantic", - "rust", - "schema", - "validation" + "tartley", + "wiggin15", + "hugovk", + "njsmith", + "jdufresne" ], - "repo_url": "https://github.com/pydantic/pydantic-core", - "version": "2.41.5" + "repo_owner": "tartley", + "repo_stars": 3758, + "repo_url": "https://github.com/tartley/colorama", + "version": "0.4.6" } }, { @@ -263,8 +268,8 @@ "license": "Apache-2.0", "repo_archived": false, "repo_language": "Python", - "repo_last_commit": "2025-12-15", - "repo_last_release": "2025-12-15", + "repo_last_commit": "2025-12-19", + "repo_last_release": "2025-12-19", "repo_maintainers": [ "stainless-bot", "RobertCraigie", @@ -272,40 +277,45 @@ "rachellim" ], "repo_owner": "openai", - "repo_stars": 29492, + "repo_stars": 29537, "repo_topics": [ "openai", "python" ], "repo_url": "https://github.com/openai/openai-python", - "version": "2.12.0" + "version": "2.14.0" } }, { - "id": "jiter", + "id": "distro", "meta": { - "description": "Fast iterable JSON parser.", + "author": "Nir Cohen", + "description": "Distro - an OS platform information API", + "license": "Apache License, Version 2.0", "repo_archived": false, - "repo_language": "Rust", - "repo_last_commit": "2025-11-09", - "repo_last_release": "2025-11-09", + "repo_language": "Python", + "repo_last_commit": "2025-11-14", + "repo_last_release": "2024-01-14", "repo_maintainers": [ - "samuelcolvin", - "davidhewitt", - "Viicos", - "jessekrubin", - "msimacek" + "nir0s", + "andy-maier", + "jdufresne", + "HorlogeSkynet", + "SethMichaelLarson" ], - "repo_owner": "pydantic", - "repo_stars": 479, + "repo_owner": "python-distro", + "repo_stars": 279, "repo_topics": [ - "json", - "json-parser", - "pydantic", - "rust" + "distribution", + "distro", + "hacktoberfest", + "linux", + "linux-distribution", + "python", + "python-library" ], - "repo_url": "https://github.com/pydantic/jiter", - "version": "0.12.0" + "repo_url": "https://github.com/python-distro/distro", + "version": "1.9.0" } }, { @@ -314,7 +324,7 @@ "description": "Data validation using Python type hints", "repo_archived": false, "repo_language": "Python", - "repo_last_commit": "2025-12-15", + "repo_last_commit": "2025-12-21", "repo_last_release": "2025-11-26", "repo_maintainers": [ "samuelcolvin", @@ -323,7 +333,7 @@ "dmontagu" ], "repo_owner": "pydantic", - "repo_stars": 26169, + "repo_stars": 26251, "repo_topics": [ "hints", "json-schema", @@ -341,51 +351,6 @@ "version": "2.12.5" } }, - { - "id": "exceptiongroup", - "meta": { - "description": "Backport of PEP 654 (exception groups)", - "repo_archived": false, - "repo_language": "Python", - "repo_last_commit": "2025-11-21", - "repo_last_release": "2025-11-21", - "repo_maintainers": [ - "agronholm", - "jakkdl", - "Zac-HD", - "cfbolz" - ], - "repo_owner": "agronholm", - "repo_stars": 48, - "repo_url": "https://github.com/agronholm/exceptiongroup", - "version": "1.3.1" - } - }, - { - "id": "anyio", - "meta": { - "description": "High-level concurrency and networking framework on top of asyncio or Trio", - "repo_archived": false, - "repo_language": "Python", - "repo_last_commit": "2025-12-16", - "repo_last_release": "2025-11-30", - "repo_maintainers": [ - "agronholm", - "graingert", - "gschaffner", - "davidbrochart" - ], - "repo_owner": "agronholm", - "repo_stars": 2325, - "repo_topics": [ - "async-await", - "asyncio", - "trio" - ], - "repo_url": "https://github.com/agronholm/anyio", - "version": "4.12.0" - } - }, { "id": "sniffio", "meta": { @@ -402,7 +367,7 @@ "hugovk" ], "repo_owner": "python-trio", - "repo_stars": 143, + "repo_stars": 144, "repo_topics": [ "async", "asyncio", @@ -414,54 +379,89 @@ } }, { - "id": "typing-inspection", + "id": "tqdm", "meta": { - "description": "Runtime typing introspection tools", + "description": "Fast, Extensible Progress Meter", + "license": "MPL-2.0 AND MIT", "repo_archived": false, "repo_language": "Python", - "repo_last_commit": "2025-10-01", - "repo_last_release": "2025-10-01", + "repo_last_commit": "2025-05-22", + "repo_last_release": "2024-11-27", "repo_maintainers": [ - "Viicos", - "samuelcolvin", - "cdce8p" + "casperdcl", + "lrq3000", + "altendky", + "hadim", + "richardsheridan" ], - "repo_owner": "pydantic", - "repo_stars": 56, - "repo_url": "https://github.com/pydantic/typing-inspection", - "version": "0.4.2" + "repo_owner": "tqdm", + "repo_stars": 30795, + "repo_topics": [ + "cli", + "closember", + "console", + "discord", + "gui", + "jupyter", + "keras", + "meter", + "pandas", + "parallel", + "progress", + "progress-bar", + "progressbar", + "progressmeter", + "python", + "rate", + "telegram", + "terminal", + "time", + "utilities" + ], + "repo_url": "https://github.com/tqdm/tqdm", + "version": "4.67.1" } }, { - "id": "distro", + "id": "typing-extensions", "meta": { - "author": "Nir Cohen", - "description": "Distro - an OS platform information API", - "license": "Apache License, Version 2.0", + "description": "Backported and Experimental Type Hints for Python 3.9+", "repo_archived": false, "repo_language": "Python", - "repo_last_commit": "2025-11-14", - "repo_last_release": "2024-01-14", + "repo_last_commit": "2025-12-02", + "repo_last_release": "2025-08-25", "repo_maintainers": [ - "nir0s", - "andy-maier", - "jdufresne", - "HorlogeSkynet", - "SethMichaelLarson" + "JelleZijlstra", + "AlexWaygood", + "srittau", + "brianschubert", + "Daraan" ], - "repo_owner": "python-distro", - "repo_stars": 278, - "repo_topics": [ - "distribution", - "distro", - "hacktoberfest", - "linux", - "linux-distribution", - "python", - "python-library" + "repo_owner": "python", + "repo_stars": 543, + "repo_url": "https://github.com/python/typing_extensions", + "version": "4.15.0" + } + }, + { + "id": "annotated-types", + "meta": { + "description": "Reusable constraint types to use with typing.Annotated", + "repo_archived": false, + "repo_language": "Python", + "repo_last_commit": "2025-11-10", + "repo_last_release": "2024-05-20", + "repo_maintainers": [ + "adriangb", + "samuelcolvin", + "Zac-HD", + "hugovk", + "Viicos" ], - "repo_url": "https://github.com/python-distro/distro", - "version": "1.9.0" + "repo_owner": "annotated-types", + "repo_stars": 585, + "repo_url": "https://github.com/annotated-types/annotated-types", + "version": "0.7.0" } } ], @@ -498,6 +498,26 @@ "from": "openai", "to": "typing-extensions" }, + { + "from": "httpx", + "to": "anyio" + }, + { + "from": "httpx", + "to": "certifi" + }, + { + "from": "httpx", + "to": "httpcore" + }, + { + "from": "httpx", + "to": "idna" + }, + { + "from": "tqdm", + "to": "colorama" + }, { "from": "anyio", "to": "exceptiongroup" @@ -527,24 +547,12 @@ "to": "typing-inspection" }, { - "from": "httpx", - "to": "anyio" - }, - { - "from": "httpx", + "from": "httpcore", "to": "certifi" }, { - "from": "httpx", - "to": "httpcore" - }, - { - "from": "httpx", - "to": "idna" - }, - { - "from": "tqdm", - "to": "colorama" + "from": "httpcore", + "to": "h11" }, { "from": "exceptiongroup", @@ -554,21 +562,13 @@ "from": "annotated-types", "to": "typing-extensions" }, - { - "from": "pydantic-core", - "to": "typing-extensions" - }, { "from": "typing-inspection", "to": "typing-extensions" }, { - "from": "httpcore", - "to": "certifi" - }, - { - "from": "httpcore", - "to": "h11" + "from": "pydantic-core", + "to": "typing-extensions" } ] } diff --git a/examples/real/rspec.json b/examples/real/rspec.json index d02e411..8c8bf71 100644 --- a/examples/real/rspec.json +++ b/examples/real/rspec.json @@ -1,34 +1,11 @@ { "nodes": [ { - "id": "rspec-core", - "meta": { - "author": "Steven Baker, David Chelimsky, Chad Humphries, Myron Marston", - "description": "BDD for Ruby. RSpec runner and example groups.", - "downloads": 1089707881, - "license": "MIT", - "repo_archived": false, - "repo_language": "Ruby", - "repo_last_commit": "2025-12-12", - "repo_maintainers": [ - "myronmarston", - "JonRowe", - "dchelimsky", - "alindeman", - "xaviershay" - ], - "repo_owner": "rspec", - "repo_stars": 77, - "repo_url": "https://github.com/rspec/rspec", - "version": "3.13.6" - } - }, - { - "id": "rspec-expectations", + "id": "rspec-mocks", "meta": { "author": "Steven Baker, David Chelimsky, Myron Marston", - "description": "rspec-expectations provides a simple, readable API to express expected outcomes of a code example.", - "downloads": 1090827252, + "description": "RSpec's 'test double' framework, with support for stubbing and mocking", + "downloads": 1086156206, "license": "MIT", "repo_archived": false, "repo_language": "Ruby", @@ -41,17 +18,17 @@ "xaviershay" ], "repo_owner": "rspec", - "repo_stars": 77, + "repo_stars": 81, "repo_url": "https://github.com/rspec/rspec", - "version": "3.13.5" + "version": "3.13.7" } }, { - "id": "rspec-mocks", + "id": "rspec-support", "meta": { - "author": "Steven Baker, David Chelimsky, Myron Marston", - "description": "RSpec's 'test double' framework, with support for stubbing and mocking", - "downloads": 1084000936, + "author": "David Chelimsky, Myron Marson, Jon Rowe, Sam Phippen, Xaviery Shay, Bradley Schaefer", + "description": "Support utilities for RSpec gems", + "downloads": 1076638938, "license": "MIT", "repo_archived": false, "repo_language": "Ruby", @@ -64,9 +41,9 @@ "xaviershay" ], "repo_owner": "rspec", - "repo_stars": 77, + "repo_stars": 81, "repo_url": "https://github.com/rspec/rspec", - "version": "3.13.7" + "version": "3.13.6" } }, { @@ -74,7 +51,7 @@ "meta": { "author": "Austin Ziegler", "description": "Diff::LCS computes the difference between two Enumerable sequences using the\nMcIlroy-Hunt longest common subsequence (LCS) algorithm. It includes utilities\nto create a simple HTML diff output format and a standard diff-like tool.\n\nThis is release 1.6.1, providing a simple extension that allows for\nDiff::LCS::Change objects to be treated implicitly as arrays and fixes a number\nof formatting issues.\n\nRuby versions below 2.5 are soft-deprecated, which means that older versions are\nno longer part of the CI test suite. If any changes have been introduced that\nbreak those versions, bug reports and patches will be accepted, but it will be\nup to the reporter to verify any fixes prior to release. The next major release\nwill completely break compatibility.", - "downloads": 1113773186, + "downloads": 1116219630, "license": "MIT, Artistic-1.0-Perl, GPL-2.0-or-later", "repo_archived": false, "repo_language": "Ruby", @@ -86,17 +63,40 @@ "JonRowe" ], "repo_owner": "halostatue", - "repo_stars": 299, + "repo_stars": 300, "repo_url": "https://github.com/halostatue/diff-lcs", "version": "1.6.2" } }, { - "id": "rspec-support", + "id": "rspec", "meta": { - "author": "David Chelimsky, Myron Marson, Jon Rowe, Sam Phippen, Xaviery Shay, Bradley Schaefer", - "description": "Support utilities for RSpec gems", - "downloads": 1074451731, + "author": "Steven Baker, David Chelimsky, Myron Marston", + "description": "BDD for Ruby", + "downloads": 938043191, + "license": "MIT", + "repo_archived": false, + "repo_language": "Ruby", + "repo_last_commit": "2025-12-12", + "repo_maintainers": [ + "myronmarston", + "JonRowe", + "dchelimsky", + "alindeman", + "xaviershay" + ], + "repo_owner": "rspec", + "repo_stars": 81, + "repo_url": "https://github.com/rspec/rspec", + "version": "3.13.2" + } + }, + { + "id": "rspec-core", + "meta": { + "author": "Steven Baker, David Chelimsky, Chad Humphries, Myron Marston", + "description": "BDD for Ruby. RSpec runner and example groups.", + "downloads": 1091901505, "license": "MIT", "repo_archived": false, "repo_language": "Ruby", @@ -109,17 +109,17 @@ "xaviershay" ], "repo_owner": "rspec", - "repo_stars": 77, + "repo_stars": 81, "repo_url": "https://github.com/rspec/rspec", "version": "3.13.6" } }, { - "id": "rspec", + "id": "rspec-expectations", "meta": { "author": "Steven Baker, David Chelimsky, Myron Marston", - "description": "BDD for Ruby", - "downloads": 936385469, + "description": "rspec-expectations provides a simple, readable API to express expected outcomes of a code example.", + "downloads": 1092953359, "license": "MIT", "repo_archived": false, "repo_language": "Ruby", @@ -132,9 +132,9 @@ "xaviershay" ], "repo_owner": "rspec", - "repo_stars": 77, + "repo_stars": 81, "repo_url": "https://github.com/rspec/rspec", - "version": "3.13.2" + "version": "3.13.5" } } ], @@ -152,11 +152,7 @@ "to": "rspec-mocks" }, { - "from": "rspec-mocks", - "to": "diff-lcs" - }, - { - "from": "rspec-mocks", + "from": "rspec-core", "to": "rspec-support" }, { @@ -168,7 +164,11 @@ "to": "rspec-support" }, { - "from": "rspec-core", + "from": "rspec-mocks", + "to": "diff-lcs" + }, + { + "from": "rspec-mocks", "to": "rspec-support" } ] diff --git a/examples/real/serde.json b/examples/real/serde.json index f1fed86..15337f1 100644 --- a/examples/real/serde.json +++ b/examples/real/serde.json @@ -1,13 +1,91 @@ { "nodes": [ + { + "id": "serde_core", + "meta": { + "description": "Serde traits only, with no support for derive -- use the `serde` crate instead", + "downloads": 57875040, + "repo_archived": false, + "repo_language": "Rust", + "repo_last_commit": "2025-12-22", + "repo_last_release": "2025-09-27", + "repo_maintainers": [ + "dtolnay", + "erickt", + "Mingun", + "oli-obk", + "mitsuhiko" + ], + "repo_owner": "serde-rs", + "repo_stars": 10241, + "repo_topics": [ + "derive", + "no-std", + "rust", + "serde" + ], + "repo_url": "https://github.com/serde-rs/serde", + "version": "1.0.228" + } + }, + { + "id": "serde_derive", + "meta": { + "description": "Macros 1.1 implementation of #[derive(Serialize, Deserialize)]", + "downloads": 694132674, + "repo_archived": false, + "repo_language": "Rust", + "repo_last_commit": "2025-12-22", + "repo_last_release": "2025-09-27", + "repo_maintainers": [ + "dtolnay", + "erickt", + "Mingun", + "oli-obk", + "mitsuhiko" + ], + "repo_owner": "serde-rs", + "repo_stars": 10241, + "repo_topics": [ + "derive", + "no-std", + "rust", + "serde" + ], + "repo_url": "https://github.com/serde-rs/serde", + "version": "1.0.228" + } + }, + { + "id": "proc-macro2", + "meta": { + "description": "A substitute implementation of the compiler's `proc_macro` API to decouple token-based libraries from the procedural macro use case.", + "downloads": 862284054, + "repo_archived": false, + "repo_language": "Rust", + "repo_last_commit": "2025-12-20", + "repo_last_release": "2025-10-23", + "repo_maintainers": [ + "dtolnay", + "alexcrichton", + "mystor", + "bjorn3", + "SergioBenitez" + ], + "repo_owner": "dtolnay", + "repo_stars": 885, + "repo_url": "https://github.com/dtolnay/proc-macro2", + "version": "1.0.103" + } + }, { "id": "quote", "meta": { "description": "Quasi-quoting macro quote!(...)", - "downloads": 833890336, + "downloads": 843664819, "repo_archived": false, "repo_language": "Rust", - "repo_last_commit": "2025-12-14", + "repo_last_commit": "2025-12-20", "repo_last_release": "2025-11-06", "repo_maintainers": [ "dtolnay", @@ -31,10 +109,10 @@ "id": "syn", "meta": { "description": "Parser for Rust source code", - "downloads": 1237024275, + "downloads": 1251312086, "repo_archived": false, "repo_language": "Rust", - "repo_last_commit": "2025-12-15", + "repo_last_commit": "2025-12-20", "repo_last_release": "2025-11-23", "repo_maintainers": [ "dtolnay", @@ -44,7 +122,7 @@ "CAD97" ], "repo_owner": "dtolnay", - "repo_stars": 3238, + "repo_stars": 3245, "repo_topics": [ "proc-macro" ], @@ -56,10 +134,10 @@ "id": "unicode-ident", "meta": { "description": "Determine whether characters have the XID_Start or XID_Continue properties according to Unicode Standard Annex #31", - "downloads": 655874819, + "downloads": 665270247, "repo_archived": false, "repo_language": "Rust", - "repo_last_commit": "2025-12-14", + "repo_last_commit": "2025-12-21", "repo_last_release": "2025-10-30", "repo_maintainers": [ "dtolnay", @@ -76,12 +154,11 @@ { "id": "serde", "meta": { - "description": "A serialization framework", - "downloads": 1000000, - "license": "MIT", + "description": "A generic serialization/deserialization framework", + "downloads": 750943583, "repo_archived": false, "repo_language": "Rust", - "repo_last_commit": "2025-11-20", + "repo_last_commit": "2025-12-22", "repo_last_release": "2025-09-27", "repo_maintainers": [ "dtolnay", @@ -91,35 +168,7 @@ "mitsuhiko" ], "repo_owner": "serde-rs", - "repo_stars": 10226, - "repo_topics": [ - "derive", - "no-std", - "rust", - "serde" - ], - "repo_url": "https://github.com/serde-rs/serde", - "version": "1.0.0" - } - }, - { - "id": "serde_derive", - "meta": { - "description": "Macros 1.1 implementation of #[derive(Serialize, Deserialize)]", - "downloads": 685582831, - "repo_archived": false, - "repo_language": "Rust", - "repo_last_commit": "2025-11-20", - "repo_last_release": "2025-09-27", - "repo_maintainers": [ - "dtolnay", - "erickt", - "Mingun", - "oli-obk", - "mitsuhiko" - ], - "repo_owner": "serde-rs", - "repo_stars": 10226, + "repo_stars": 10241, "repo_topics": [ "derive", "no-std", @@ -129,33 +178,15 @@ "repo_url": "https://github.com/serde-rs/serde", "version": "1.0.228" } - }, - { - "id": "proc-macro2", - "meta": { - "description": "A substitute implementation of the compiler's `proc_macro` API to decouple token-based libraries from the procedural macro use case.", - "downloads": 852478003, - "repo_archived": false, - "repo_language": "Rust", - "repo_last_commit": "2025-12-14", - "repo_last_release": "2025-10-23", - "repo_maintainers": [ - "dtolnay", - "alexcrichton", - "mystor", - "bjorn3", - "SergioBenitez" - ], - "repo_owner": "dtolnay", - "repo_stars": 886, - "repo_url": "https://github.com/dtolnay/proc-macro2", - "version": "1.0.103" - } } ], "edges": [ { "from": "serde", + "to": "serde_core" + }, + { + "from": "serde_core", "to": "serde_derive" }, { @@ -171,20 +202,20 @@ "to": "syn" }, { - "from": "syn", + "from": "quote", "to": "proc-macro2" }, - { - "from": "syn", - "to": "unicode-ident" - }, { "from": "proc-macro2", "to": "unicode-ident" }, { - "from": "quote", + "from": "syn", "to": "proc-macro2" + }, + { + "from": "syn", + "to": "unicode-ident" } ] } diff --git a/examples/real/yargs.json b/examples/real/yargs.json index a0435bd..0f63106 100644 --- a/examples/real/yargs.json +++ b/examples/real/yargs.json @@ -7,7 +7,7 @@ "license": "MIT", "repo_archived": false, "repo_language": "JavaScript", - "repo_last_commit": "2025-12-15", + "repo_last_commit": "2025-12-20", "repo_last_release": "2025-05-27", "repo_maintainers": [ "bcoe", @@ -16,7 +16,7 @@ "mleguen" ], "repo_owner": "yargs", - "repo_stars": 11406, + "repo_stars": 11410, "repo_url": "https://github.com/yargs/yargs", "version": "18.0.0" } @@ -43,25 +43,24 @@ } }, { - "id": "cliui", + "id": "yargs-parser", "meta": { "author": "Ben Coe", - "description": "easily create complex multi-column command-line-interfaces", + "description": "the mighty option parser used by yargs", "license": "ISC", "repo_archived": false, "repo_language": "JavaScript", "repo_last_commit": "2025-12-15", - "repo_last_release": "2025-03-17", + "repo_last_release": "2025-05-26", "repo_maintainers": [ "bcoe", - "greenkeeperio-bot", - "coreyfarrell", - "nexdrew" + "elas7", + "juergba" ], "repo_owner": "yargs", - "repo_stars": 383, - "repo_url": "https://github.com/yargs/cliui", - "version": "9.0.1" + "repo_stars": 520, + "repo_url": "https://github.com/yargs/yargs-parser", + "version": "22.0.0" } }, { @@ -87,26 +86,23 @@ } }, { - "id": "string-width", + "id": "get-east-asian-width", "meta": { "author": "Sindre Sorhus", - "description": "Get the visual width of a string - the number of columns required to display it", + "description": "Determine the East Asian Width of a Unicode character", "license": "MIT", "repo_archived": false, "repo_language": "JavaScript", - "repo_last_commit": "2025-09-01", - "repo_last_release": "2025-09-01", + "repo_last_commit": "2025-09-11", + "repo_last_release": "2025-09-09", "repo_maintainers": [ "sindresorhus", - "fisker", - "coreyfarrell", - "BendingBender", - "adam2k" + "fisker" ], "repo_owner": "sindresorhus", - "repo_stars": 513, - "repo_url": "https://github.com/sindresorhus/string-width", - "version": "8.1.0" + "repo_stars": 43, + "repo_url": "https://github.com/sindresorhus/get-east-asian-width", + "version": "1.4.0" } }, { @@ -150,7 +146,7 @@ "ExE-Boss" ], "repo_owner": "chalk", - "repo_stars": 446, + "repo_stars": 447, "repo_topics": [ "chalk" ], @@ -159,24 +155,48 @@ } }, { - "id": "yargs-parser", + "id": "string-width", + "meta": { + "author": "Sindre Sorhus", + "description": "Get the visual width of a string - the number of columns required to display it", + "license": "MIT", + "repo_archived": false, + "repo_language": "JavaScript", + "repo_last_commit": "2025-09-01", + "repo_last_release": "2025-09-01", + "repo_maintainers": [ + "sindresorhus", + "fisker", + "coreyfarrell", + "BendingBender", + "adam2k" + ], + "repo_owner": "sindresorhus", + "repo_stars": 513, + "repo_url": "https://github.com/sindresorhus/string-width", + "version": "8.1.0" + } + }, + { + "id": "cliui", "meta": { "author": "Ben Coe", - "description": "the mighty option parser used by yargs", + "description": "easily create complex multi-column command-line-interfaces", "license": "ISC", "repo_archived": false, "repo_language": "JavaScript", "repo_last_commit": "2025-12-15", - "repo_last_release": "2025-05-26", + "repo_last_release": "2025-03-17", "repo_maintainers": [ "bcoe", - "elas7", - "juergba" + "greenkeeperio-bot", + "coreyfarrell", + "nexdrew" ], "repo_owner": "yargs", - "repo_stars": 520, - "repo_url": "https://github.com/yargs/yargs-parser", - "version": "22.0.0" + "repo_stars": 383, + "repo_url": "https://github.com/yargs/cliui", + "version": "9.0.1" } }, { @@ -219,31 +239,11 @@ "kevva" ], "repo_owner": "chalk", - "repo_stars": 428, + "repo_stars": 430, "repo_url": "https://github.com/chalk/strip-ansi", "version": "7.1.2" } }, - { - "id": "get-east-asian-width", - "meta": { - "author": "Sindre Sorhus", - "description": "Determine the East Asian Width of a Unicode character", - "license": "MIT", - "repo_archived": false, - "repo_language": "JavaScript", - "repo_last_commit": "2025-09-11", - "repo_last_release": "2025-09-09", - "repo_maintainers": [ - "sindresorhus", - "fisker" - ], - "repo_owner": "sindresorhus", - "repo_stars": 43, - "repo_url": "https://github.com/sindresorhus/get-east-asian-width", - "version": "1.4.0" - } - }, { "id": "ansi-regex", "meta": { @@ -269,6 +269,10 @@ } ], "edges": [ + { + "from": "yargs", + "to": "string-width" + }, { "from": "yargs", "to": "y18n" @@ -290,8 +294,12 @@ "to": "get-caller-file" }, { - "from": "yargs", - "to": "string-width" + "from": "string-width", + "to": "get-east-asian-width" + }, + { + "from": "string-width", + "to": "strip-ansi" }, { "from": "cliui", @@ -305,29 +313,21 @@ "from": "cliui", "to": "wrap-ansi" }, - { - "from": "string-width", - "to": "get-east-asian-width" - }, - { - "from": "string-width", - "to": "strip-ansi" - }, { "from": "strip-ansi", "to": "ansi-regex" }, { "from": "wrap-ansi", - "to": "string-width" + "to": "strip-ansi" }, { "from": "wrap-ansi", - "to": "strip-ansi" + "to": "ansi-styles" }, { "from": "wrap-ansi", - "to": "ansi-styles" + "to": "string-width" } ] } diff --git a/examples/test/microservices.json b/examples/test/microservices.json index 5532634..7c90d03 100644 --- a/examples/test/microservices.json +++ b/examples/test/microservices.json @@ -74,44 +74,44 @@ ], "edges": [ { - "from": "user-db", - "to": "web-server" + "from": "web-server", + "to": "user-db" }, { - "from": "message-queue", - "to": "web-server" + "from": "web-server", + "to": "message-queue" }, { - "from": "logger", - "to": "web-server" + "from": "web-server", + "to": "logger" }, { - "from": "metrics", - "to": "web-server" + "from": "web-server", + "to": "metrics" }, { - "from": "message-queue", - "to": "worker-pool" + "from": "worker-pool", + "to": "message-queue" }, { - "from": "analytics", - "to": "worker-pool" + "from": "worker-pool", + "to": "analytics" }, { - "from": "external-api", - "to": "worker-pool" + "from": "worker-pool", + "to": "external-api" }, { - "from": "logger", - "to": "worker-pool" + "from": "worker-pool", + "to": "logger" }, { - "from": "user-db", - "to": "analytics" + "from": "analytics", + "to": "user-db" }, { - "from": "metrics", - "to": "analytics" + "from": "analytics", + "to": "metrics" } ] } diff --git a/internal/cli/cache.go b/internal/cli/cache.go index fe6d300..01bdff5 100644 --- a/internal/cli/cache.go +++ b/internal/cli/cache.go @@ -8,6 +8,8 @@ import ( "github.com/spf13/cobra" ) +// newCacheCmd creates the cache management command with subcommands for clearing and inspecting the cache. +// The cache stores HTTP responses from package registries to reduce network calls and improve performance. func newCacheCmd() *cobra.Command { cmd := &cobra.Command{ Use: "cache", @@ -20,6 +22,10 @@ func newCacheCmd() *cobra.Command { return cmd } +// newCacheClearCmd creates the "cache clear" subcommand. +// It removes all non-directory files from the cache directory. +// If the cache directory does not exist, the command prints "Cache is empty" and succeeds. +// Failed removals are silently skipped; only successful deletions are counted. func newCacheClearCmd() *cobra.Command { return &cobra.Command{ Use: "clear", @@ -54,6 +60,9 @@ func newCacheClearCmd() *cobra.Command { } } +// newCachePathCmd creates the "cache path" subcommand. +// It prints the absolute path to the cache directory. +// The directory may not exist if no cached responses have been stored yet. func newCachePathCmd() *cobra.Command { return &cobra.Command{ Use: "path", @@ -69,6 +78,11 @@ func newCachePathCmd() *cobra.Command { } } +// cacheDir returns the absolute path to the stacktower cache directory. +// The directory is located at $HOME/.cache/stacktower and follows XDG conventions. +// The directory is created on-demand by the deps package; this function only computes the path. +// +// It returns an error if the user's home directory cannot be determined. func cacheDir() (string, error) { home, err := os.UserHomeDir() if err != nil { diff --git a/internal/cli/log.go b/internal/cli/log.go index 7bc0b28..e411c0e 100644 --- a/internal/cli/log.go +++ b/internal/cli/log.go @@ -1,3 +1,32 @@ +// Package cli implements the stacktower command-line interface. +// +// This package provides commands for parsing dependency graphs from various +// package managers, rendering them as visualizations, and managing the HTTP +// response cache. The CLI is built using cobra and supports verbose logging +// via the charmbracelet/log library. +// +// # Commands +// +// The main commands are: +// - parse: Extract dependency graphs from package managers or manifest files +// - render: Generate SVG, PDF, or PNG visualizations +// - cache: Manage the HTTP response cache +// - pqtree: Debug tool for PQ-tree constraint visualization +// +// # Logging +// +// All commands support --verbose (-v) for debug-level logging. Loggers are +// passed through context.Context to allow structured progress tracking. +// +// # Example +// +// import "github.com/matzehuels/stacktower/internal/cli" +// +// func main() { +// if err := cli.Execute(); err != nil { +// os.Exit(1) +// } +// } package cli import ( @@ -8,6 +37,9 @@ import ( "github.com/charmbracelet/log" ) +// newLogger creates a new logger with timestamp formatting. +// The logger writes to w and filters messages at the specified level. +// Timestamps are formatted as "HH:MM:SS.ms" (e.g., "14:32:01.45"). func newLogger(w io.Writer, level log.Level) *log.Logger { return log.NewWithOptions(w, log.Options{ ReportTimestamp: true, @@ -16,27 +48,42 @@ func newLogger(w io.Writer, level log.Level) *log.Logger { }) } +// progress tracks the start time of an operation and logs completion with elapsed duration. +// It is safe for sequential use by a single goroutine; concurrent calls to done will race. type progress struct { logger *log.Logger start time.Time } +// newProgress creates a progress tracker that captures the current time as start. +// The returned progress should call done when the operation completes. func newProgress(l *log.Logger) *progress { return &progress{logger: l, start: time.Now()} } +// done logs msg along with the elapsed time since progress was created. +// The duration is rounded to the nearest millisecond. +// Example output: "Resolved 42 packages (1.234s)" func (p *progress) done(msg string) { p.logger.Infof("%s (%s)", msg, time.Since(p.start).Round(time.Millisecond)) } +// ctxKey is the type for context keys used in this package. +// Using a distinct type prevents collisions with other packages. type ctxKey int +// loggerKey is the context key for storing a logger. const loggerKey ctxKey = 0 +// withLogger returns a new context with the given logger attached. +// The logger can be retrieved later with loggerFromContext. func withLogger(ctx context.Context, l *log.Logger) context.Context { return context.WithValue(ctx, loggerKey, l) } +// loggerFromContext retrieves the logger from ctx. +// If no logger is attached, it returns log.Default(). +// This ensures commands always have a valid logger even if context setup fails. func loggerFromContext(ctx context.Context) *log.Logger { if l, ok := ctx.Value(loggerKey).(*log.Logger); ok { return l diff --git a/internal/cli/ordering.go b/internal/cli/ordering.go index 9eafe3e..85e9b83 100644 --- a/internal/cli/ordering.go +++ b/internal/cli/ordering.go @@ -11,6 +11,11 @@ import ( "github.com/matzehuels/stacktower/pkg/render/tower/ordering" ) +// optimalOrderer wraps ordering.OptimalSearch with progress logging and debug output. +// It logs initial solutions, improvements, periodic status updates (every 10 seconds), +// and warnings when the search encounters combinatorial bottlenecks. +// +// The orderer is not safe for concurrent use; it maintains internal state for logging. type optimalOrderer struct { ordering.OptimalSearch prog *progress @@ -20,6 +25,11 @@ type optimalOrderer struct { start, lastLog time.Time } +// newOptimalOrderer creates an optimal orderer with a timeout and logger from ctx. +// The timeoutSec parameter controls how long the search runs before returning the best solution found. +// Longer timeouts may find better orderings (fewer edge crossings) at the cost of increased runtime. +// +// The orderer logs progress updates including initial solutions, improvements, and periodic heartbeats. func newOptimalOrderer(ctx context.Context, timeoutSec int) ordering.Orderer { logger := loggerFromContext(ctx) o := &optimalOrderer{ @@ -37,6 +47,14 @@ func newOptimalOrderer(ctx context.Context, timeoutSec int) ordering.Orderer { return o } +// onProgress is called by the underlying OptimalSearch during the search. +// It logs the initial solution, improvements when bestScore decreases, and periodic heartbeats +// every 10 seconds to show the search is still running. +// +// Parameters: +// - explored: number of partial solutions examined +// - pruned: number of branches eliminated via bounds +// - bestScore: current best edge crossing count (lower is better) func (o *optimalOrderer) onProgress(explored, pruned, bestScore int) { o.lastExplored, o.lastPruned = explored, pruned if bestScore < 0 || (explored == 0 && pruned == 0) { @@ -61,6 +79,9 @@ func (o *optimalOrderer) onProgress(explored, pruned, bestScore int) { o.lastBest = bestScore } +// onDebug is called when the search completes to report diagnostic information. +// It logs the search space size, maximum depth reached, and identifies bottleneck rows +// with >100 candidate orderings that may have caused incomplete search. func (o *optimalOrderer) onDebug(info ordering.DebugInfo) { o.logger.Debugf("Search space: %d rows, max depth reached: %d/%d", info.TotalRows, info.MaxDepth, info.TotalRows) @@ -77,6 +98,10 @@ func (o *optimalOrderer) onDebug(info ordering.DebugInfo) { } } +// OrderRows implements ordering.Orderer by delegating to OptimalSearch and logging the final result. +// It counts edge crossings in the returned ordering and warns if crossings remain. +// +// If crossings > 0, users are advised to increase --ordering-timeout for better results. func (o *optimalOrderer) OrderRows(g *dag.DAG) map[int][]string { result := o.OptimalSearch.OrderRows(g) crossings := dag.CountCrossings(g, result) diff --git a/internal/cli/parse.go b/internal/cli/parse.go index f0cb2c9..9dfd6ce 100644 --- a/internal/cli/parse.go +++ b/internal/cli/parse.go @@ -22,6 +22,8 @@ import ( pkgio "github.com/matzehuels/stacktower/pkg/io" ) +// languages is the list of supported package ecosystems. +// Each language provides resolvers for package registries and manifest parsers. var languages = []*deps.Language{ python.Language, rust.Language, @@ -32,15 +34,20 @@ var languages = []*deps.Language{ golang.Language, } +// parseOpts holds the command-line flags for the parse command. +// These options control dependency resolution depth, caching, and metadata enrichment. type parseOpts struct { - maxDepth int - maxNodes int - enrich bool - refresh bool - output string - name string + maxDepth int // maximum dependency tree depth + maxNodes int // maximum total nodes to fetch + enrich bool // whether to fetch GitHub metadata + refresh bool // bypass HTTP cache + output string // output file path (stdout if empty) + name string // override project name for manifest parsing } +// resolveOptions converts parseOpts into deps.Options for the resolver. +// If metadata enrichment fails (e.g., missing GITHUB_TOKEN), a warning is logged +// and enrichment is disabled rather than failing the entire operation. func (o *parseOpts) resolveOptions(ctx context.Context) deps.Options { logger := loggerFromContext(ctx) providers, err := metadataProviders(o.enrich) @@ -57,6 +64,14 @@ func (o *parseOpts) resolveOptions(ctx context.Context) deps.Options { } } +// newParseCmd creates the parse command with language-specific subcommands. +// It supports parsing from package registries (e.g., "parse python requests") +// or from local manifest files (e.g., "parse python poetry.lock"). +// +// Default options: +// - maxDepth: 10 levels of transitive dependencies +// - maxNodes: 5000 packages maximum +// - enrich: true (fetch GitHub metadata if GITHUB_TOKEN is set) func newParseCmd() *cobra.Command { opts := parseOpts{maxDepth: 10, maxNodes: 5000, enrich: true} @@ -88,6 +103,9 @@ Examples: return cmd } +// langCmd creates a language-specific parse subcommand (e.g., "parse python"). +// The command auto-detects whether the argument is a package name or manifest file +// using smartParse. func langCmd(lang *deps.Language, opts *parseOpts) *cobra.Command { cmd := &cobra.Command{ Use: fmt.Sprintf("%s ", lang.Name), @@ -106,6 +124,8 @@ func langCmd(lang *deps.Language, opts *parseOpts) *cobra.Command { return cmd } +// registryCmd creates the "registry" subcommand for explicit registry selection. +// Example: "parse python registry pypi requests" to force PyPI even if the default changes. func registryCmd(lang *deps.Language, opts *parseOpts) *cobra.Command { return &cobra.Command{ Use: fmt.Sprintf("registry <%s> ", lang.DefaultRegistry), @@ -116,11 +136,18 @@ func registryCmd(lang *deps.Language, opts *parseOpts) *cobra.Command { if err != nil { return err } - return resolve(c.Context(), opts, res, args[1]) + pkg := args[1] + // Normalize package name if the language supports it + if lang.NormalizeName != nil { + pkg = lang.NormalizeName(pkg) + } + return resolve(c.Context(), opts, res, pkg) }, } } +// manifestCmd creates the "manifest" subcommand for explicit manifest type selection. +// Example: "parse python manifest poetry poetry.lock" to force Poetry format. func manifestCmd(lang *deps.Language, opts *parseOpts) *cobra.Command { return &cobra.Command{ Use: fmt.Sprintf("manifest <%s> ", strings.Join(lang.ManifestTypes, "|")), @@ -140,6 +167,9 @@ func manifestCmd(lang *deps.Language, opts *parseOpts) *cobra.Command { } } +// smartParse auto-detects whether arg is a manifest file or package name. +// If looksLikeFile returns true and the language supports manifests, it parses as a file. +// Otherwise, it resolves as a package from the default registry. func smartParse(ctx context.Context, lang *deps.Language, opts *parseOpts, arg string) error { if lang.HasManifests() && looksLikeFile(arg) { return parseManifestAuto(ctx, lang, opts, arg) @@ -148,9 +178,15 @@ func smartParse(ctx context.Context, lang *deps.Language, opts *parseOpts, arg s if err != nil { return err } + // Normalize package name if the language supports it (e.g., Maven coordinate normalization) + if lang.NormalizeName != nil { + arg = lang.NormalizeName(arg) + } return resolve(ctx, opts, res, arg) } +// parseManifestAuto attempts to detect the manifest file type and parse it. +// It returns an error if detection fails or if the file format is unsupported. func parseManifestAuto(ctx context.Context, lang *deps.Language, opts *parseOpts, filePath string) error { res, err := lang.Resolver() if err != nil { @@ -163,6 +199,8 @@ func parseManifestAuto(ctx context.Context, lang *deps.Language, opts *parseOpts return parseManifest(ctx, opts, parser, filePath) } +// resolve fetches the dependency graph for pkg from the given resolver. +// It logs progress and writes the resulting graph as JSON to opts.output (or stdout). func resolve(ctx context.Context, opts *parseOpts, res deps.Resolver, pkg string) error { logger := loggerFromContext(ctx) logger.Infof("Resolving %s from %s", pkg, res.Name()) @@ -177,6 +215,8 @@ func resolve(ctx context.Context, opts *parseOpts, res deps.Resolver, pkg string return writeGraph(g, opts.output, logger) } +// parseManifest parses a manifest file and writes the resulting dependency graph. +// If opts.name is set, it renames the root node from "__project__" to the specified name. func parseManifest(ctx context.Context, opts *parseOpts, parser deps.ManifestParser, filePath string) error { logger := loggerFromContext(ctx) logger.Infof("Parsing %s (%s)", filePath, parser.Type()) @@ -201,6 +241,9 @@ func parseManifest(ctx context.Context, opts *parseOpts, parser deps.ManifestPar return writeGraph(g, opts.output, logger) } +// looksLikeFile returns true if arg appears to be a file path rather than a package name. +// It checks if the file exists or has a known manifest extension (.txt, .lock, .toml, .xml). +// Known manifest files like "go.mod" and "pom.xml" always return true. func looksLikeFile(arg string) bool { if _, err := os.Stat(arg); err == nil { return true @@ -214,6 +257,8 @@ func looksLikeFile(arg string) bool { lower == "pom.xml" } +// writeGraph serializes g as JSON to the specified path (or stdout if empty). +// The logger is notified on success with the output path. func writeGraph(g *dag.DAG, path string, logger interface{ Infof(string, ...any) }) error { out, err := openOutput(path) if err != nil { @@ -230,6 +275,9 @@ func writeGraph(g *dag.DAG, path string, logger interface{ Infof(string, ...any) return nil } +// metadataProviders returns GitHub metadata providers if enrich is true. +// It requires the GITHUB_TOKEN environment variable. +// If the token is missing or invalid, an error is returned. func metadataProviders(enrich bool) ([]deps.MetadataProvider, error) { if !enrich { return nil, nil @@ -245,10 +293,16 @@ func metadataProviders(enrich bool) ([]deps.MetadataProvider, error) { return []deps.MetadataProvider{gh}, nil } +// nopCloser wraps an io.Writer with a no-op Close method. +// It is used to make os.Stdout compatible with io.WriteCloser. type nopCloser struct{ io.Writer } +// Close implements io.Closer with a no-op. func (nopCloser) Close() error { return nil } +// openOutput returns a WriteCloser for the given path. +// If path is empty, it returns os.Stdout wrapped in nopCloser. +// Otherwise, it creates the file at path, overwriting if it exists. func openOutput(path string) (io.WriteCloser, error) { if path == "" { return nopCloser{os.Stdout}, nil diff --git a/internal/cli/pqtree.go b/internal/cli/pqtree.go index 51a82e7..c79d997 100644 --- a/internal/cli/pqtree.go +++ b/internal/cli/pqtree.go @@ -11,6 +11,18 @@ import ( "github.com/matzehuels/stacktower/pkg/dag/perm" ) +// newPQTreeCmd creates the pqtree command for visualizing PQ-tree constraints. +// This is a debug tool for testing adjacency constraint solving. +// +// A PQ-tree represents all valid permutations of elements subject to adjacency constraints. +// Each constraint specifies elements that must appear consecutively (but in any order). +// +// Example: +// +// stacktower pqtree --labels A,B,C,D -o tree.svg 0,1 +// +// This creates a tree with 4 elements where indices 0 and 1 (A and B) must be adjacent, +// allowing permutations like ABCD, BACD, CDAB, DCBA but rejecting ACBD, CADB, etc. func newPQTreeCmd() *cobra.Command { var output string var labels string @@ -78,6 +90,9 @@ Example: "0,1" means elements 0 and 1 must be adjacent.`, return cmd } +// parseConstraint parses a constraint string like "0,1,2" into a slice of indices. +// Each index must be a valid integer. At least 2 indices are required. +// Leading and trailing whitespace is trimmed from each index. func parseConstraint(s string) ([]int, error) { parts := strings.Split(s, ",") if len(parts) < 2 { diff --git a/internal/cli/render.go b/internal/cli/render.go index 29c81b5..8a744fe 100644 --- a/internal/cli/render.go +++ b/internal/cli/render.go @@ -21,32 +21,44 @@ import ( ) const ( - styleSimple = "simple" - styleHanddrawn = "handdrawn" - defaultWidth = 800 - defaultHeight = 600 - defaultSeed = 42 + styleSimple = "simple" // plain rectangular blocks + styleHanddrawn = "handdrawn" // hand-drawn sketch style with randomized widths + defaultWidth = 800 // default SVG viewport width + defaultHeight = 600 // default SVG viewport height + defaultSeed = 42 // random seed for reproducible randomization ) +// renderOpts holds the command-line flags for the render command. +// These options control visualization style, layout algorithms, and output formats. type renderOpts struct { - output string - vizTypes []string - formats []string - detailed bool - normalize bool - width float64 - height float64 - showEdges bool - style string - ordering string - orderTimeout int - randomize bool - merge bool - nebraska bool - popups bool - topDown bool + output string // output file path (or base path for multiple outputs) + vizTypes []string // visualization types: "tower", "nodelink" + formats []string // output formats: "svg", "pdf", "png", "json" + detailed bool // show detailed metadata in nodelink diagrams + normalize bool // apply DAG normalization (remove cycles, transitive edges, add subdividers) + width float64 // viewport width in pixels + height float64 // viewport height in pixels + showEdges bool // draw dependency edges in tower view + style string // visual style: "simple" or "handdrawn" + ordering string // ordering algorithm: "optimal" or "barycentric" + orderTimeout int // timeout in seconds for optimal search + randomize bool // randomize block widths for hand-drawn effect + merge bool // merge subdivider blocks into single towers + nebraska bool // show Nebraska guy maintainer ranking + popups bool // enable hover popups with metadata + topDown bool // use top-down width allocation (roots get equal width) } +// newRenderCmd creates the render command for generating visualizations. +// It supports multiple visualization types (tower, nodelink) and output formats (SVG, PDF, PNG, JSON). +// +// Default settings: +// - normalize: true (clean up cycles and transitive edges) +// - style: handdrawn (sketch-style with randomized widths) +// - width: 800px, height: 600px +// - ordering: optimal (with 60s timeout) +// - merge: true (combine subdividers into single towers) +// - popups: true (show metadata on hover) func newRenderCmd() *cobra.Command { var vizTypesStr, formatsStr string opts := renderOpts{ @@ -96,6 +108,8 @@ func newRenderCmd() *cobra.Command { return cmd } +// parseVizTypes parses the --type flag into a slice of visualization types. +// If empty, defaults to ["tower"]. func parseVizTypes(s string) []string { if s == "" { return []string{"tower"} @@ -103,6 +117,8 @@ func parseVizTypes(s string) []string { return strings.Split(s, ",") } +// parseFormats parses the --format flag into a slice of output formats. +// If empty, defaults to ["svg"]. func parseFormats(s string) []string { if s == "" { return []string{"svg"} @@ -110,8 +126,11 @@ func parseFormats(s string) []string { return strings.Split(s, ",") } +// validFormats is the set of supported output formats. var validFormats = map[string]bool{"svg": true, "json": true, "pdf": true, "png": true} +// validateFormats checks that all requested formats are valid. +// It returns an error if any format is not in validFormats. func validateFormats(formats []string) error { for _, f := range formats { if !validFormats[f] { @@ -121,6 +140,7 @@ func validateFormats(formats []string) error { return nil } +// validateStyle checks that the style is either "simple" or "handdrawn". func validateStyle(s string) error { if s != styleSimple && s != styleHanddrawn { return fmt.Errorf("invalid style: %s (must be 'simple' or 'handdrawn')", s) @@ -128,6 +148,10 @@ func validateStyle(s string) error { return nil } +// basePath derives the base output path from the output and input file paths. +// If output is empty, it strips the extension from input. +// If output has a format extension (.svg, .pdf, etc.), it strips that extension. +// This is used when generating multiple files (e.g., graph_tower.svg, graph_nodelink.svg). func basePath(output, input string) string { if output == "" { return strings.TrimSuffix(input, filepath.Ext(input)) @@ -140,6 +164,8 @@ func basePath(output, input string) string { return output } +// runRender loads the graph from input, optionally normalizes it, and renders it to the requested formats. +// If opts.normalize is true, the graph is cleaned up by removing cycles, transitive edges, and adding subdividers. func runRender(ctx context.Context, input string, opts *renderOpts) error { logger := loggerFromContext(ctx) logger.Infof("Rendering %s", input) @@ -151,9 +177,11 @@ func runRender(ctx context.Context, input string, opts *renderOpts) error { logger.Infof("Loaded graph: %d nodes, %d edges", g.NodeCount(), g.EdgeCount()) if opts.normalize { - before := g.NodeCount() - g = dagtransform.Normalize(g) - logger.Infof("Normalized: %d nodes (%+d), %d edges", g.NodeCount(), g.NodeCount()-before, g.EdgeCount()) + result := dagtransform.Normalize(g) + logger.Infof("Normalized: %d nodes, %d edges (removed %d cycles, %d transitive edges; added %d subdividers, %d separators)", + g.NodeCount(), g.EdgeCount(), + result.CyclesRemoved, result.TransitiveEdgesRemoved, + result.SubdividersAdded, result.SeparatorsAdded) } if len(opts.vizTypes) == 1 && len(opts.formats) == 1 { @@ -162,6 +190,8 @@ func runRender(ctx context.Context, input string, opts *renderOpts) error { return renderMultiple(ctx, g, input, opts) } +// renderSingle renders a single visualization type and format to a single output file. +// If opts.output is empty, the output path is derived from the input file name. func renderSingle(ctx context.Context, g *dag.DAG, vizType, format string, input string, opts *renderOpts) error { logger := loggerFromContext(ctx) @@ -191,6 +221,8 @@ func renderSingle(ctx context.Context, g *dag.DAG, vizType, format string, input return nil } +// renderMultiple renders all requested visualization type/format combinations to separate files. +// File names are derived from basePath and include the visualization type when multiple types are requested. func renderMultiple(ctx context.Context, g *dag.DAG, input string, opts *renderOpts) error { base := basePath(opts.output, input) @@ -204,6 +236,8 @@ func renderMultiple(ctx context.Context, g *dag.DAG, input string, opts *renderO return nil } +// renderAndWrite renders a single viz/format combination and writes it to a file. +// If the combination is unsupported (e.g., nodelink JSON), it is silently skipped with a debug log. func renderAndWrite(ctx context.Context, g *dag.DAG, vizType, format, basePath string, opts *renderOpts) error { logger := loggerFromContext(ctx) @@ -238,8 +272,11 @@ func renderAndWrite(ctx context.Context, g *dag.DAG, vizType, format, basePath s return nil } +// errSkipFormat is a sentinel error indicating an unsupported format/visualization combination. var errSkipFormat = fmt.Errorf("skip unsupported format") +// renderGraph dispatches to the appropriate renderer based on vizType. +// It returns errSkipFormat for unsupported combinations (e.g., nodelink JSON). func renderGraph(ctx context.Context, g *dag.DAG, vizType, format string, opts *renderOpts) ([]byte, error) { switch vizType { case "nodelink": @@ -251,6 +288,8 @@ func renderGraph(ctx context.Context, g *dag.DAG, vizType, format string, opts * } } +// renderNodeLink generates a node-link (force-directed) diagram using Graphviz. +// It supports SVG, PDF, and PNG formats. JSON is not supported (returns errSkipFormat). func renderNodeLink(ctx context.Context, g *dag.DAG, format string, opts *renderOpts) ([]byte, error) { logger := loggerFromContext(ctx) logger.Info("Generating node-link diagram") @@ -273,6 +312,9 @@ func renderNodeLink(ctx context.Context, g *dag.DAG, format string, opts *render } } +// renderTower generates a tower visualization with layered blocks. +// It computes the layout using the specified ordering algorithm (optimal or barycentric), +// optionally merges subdividers and randomizes widths, then renders to the requested format. func renderTower(ctx context.Context, g *dag.DAG, format string, opts *renderOpts) ([]byte, error) { logger := loggerFromContext(ctx) @@ -317,6 +359,8 @@ func renderTower(ctx context.Context, g *dag.DAG, format string, opts *renderOpt } } +// buildLayoutOpts constructs layout.Options based on the ordering algorithm and width flow settings. +// The "optimal" algorithm uses branch-and-bound search with a timeout; "barycentric" is the default. func buildLayoutOpts(ctx context.Context, opts *renderOpts) ([]layout.Option, error) { logger := loggerFromContext(ctx) var layoutOpts []layout.Option @@ -338,6 +382,8 @@ func buildLayoutOpts(ctx context.Context, opts *renderOpts) ([]layout.Option, er return layoutOpts, nil } +// buildRenderOpts constructs SVG rendering options based on style, edges, and feature flags. +// The handdrawn style supports Nebraska maintainer ranking and hover popups. func buildRenderOpts(g *dag.DAG, opts *renderOpts) []sink.SVGOption { result := []sink.SVGOption{sink.WithGraph(g)} if opts.showEdges { @@ -358,6 +404,7 @@ func buildRenderOpts(g *dag.DAG, opts *renderOpts) []sink.SVGOption { return result } +// buildJSONOpts constructs JSON rendering options including graph metadata and feature flags. func buildJSONOpts(g *dag.DAG, opts *renderOpts) []sink.JSONOption { result := []sink.JSONOption{sink.WithJSONGraph(g)} if opts.merge { diff --git a/internal/cli/root.go b/internal/cli/root.go index 09768c4..472b104 100644 --- a/internal/cli/root.go +++ b/internal/cli/root.go @@ -10,17 +10,45 @@ import ( ) var ( - version string - commit string - date string + version string // semantic version (e.g., "v1.2.3") + commit string // git commit SHA + date string // build timestamp ) +// SetVersion sets the version information displayed by --version. +// This is typically called by the main package during initialization with values +// injected via ldflags at build time. +// +// Parameters: +// - v: semantic version string (e.g., "v1.2.3") +// - c: git commit SHA (short or long form) +// - d: build timestamp (e.g., "2025-12-20T14:32:01Z") func SetVersion(v, c, d string) { version = v commit = c date = d } +// Execute runs the stacktower CLI and returns an error if any command fails. +// This is the main entry point for the CLI application. +// +// The function sets up the root command with all subcommands (parse, render, cache, pqtree), +// configures logging based on the --verbose flag, and executes the command tree. +// +// Logging: +// - Default: info level (logs to stderr) +// - With --verbose (-v): debug level +// +// The logger is attached to the context and accessible to all commands via loggerFromContext. +// +// Example: +// +// func main() { +// cli.SetVersion("v1.0.0", "abc123", "2025-12-20") +// if err := cli.Execute(); err != nil { +// os.Exit(1) +// } +// } func Execute() error { var verbose bool diff --git a/pkg/dag/crossings.go b/pkg/dag/crossings.go index e3b59d4..0d16d46 100644 --- a/pkg/dag/crossings.go +++ b/pkg/dag/crossings.go @@ -5,15 +5,31 @@ import ( "slices" ) -// CrossingWorkspace provides reusable buffers for crossing calculations. -// Create with NewCrossingWorkspace and reuse across multiple calls. +// CrossingWorkspace provides reusable buffers for crossing calculations to avoid +// repeated allocations. Create with [NewCrossingWorkspace] and reuse across multiple +// calls to [CountCrossingsIdx]. This optimization matters when evaluating millions +// of candidate orderings during branch-and-bound search. +// +// The workspace is not safe for concurrent use - each goroutine should have its own. type CrossingWorkspace struct { - ft []int - pos []int + ft []int // Fenwick tree for counting inversions + pos []int // Position lookup buffer } -// NewCrossingWorkspace creates a workspace for counting crossings. -// maxWidth should be the maximum number of nodes in any row. +// NewCrossingWorkspace creates a workspace for counting crossings efficiently. +// The maxWidth parameter should be the maximum number of nodes in any single row +// across all calls that will use this workspace. Using a workspace smaller than +// needed will cause CountCrossingsIdx to produce incorrect results. +// +// For typical use, set maxWidth to the size of the largest row in your graph: +// +// maxWidth := 0 +// for _, row := range g.RowIDs() { +// if n := len(g.NodesInRow(row)); n > maxWidth { +// maxWidth = n +// } +// } +// ws := dag.NewCrossingWorkspace(maxWidth) func NewCrossingWorkspace(maxWidth int) *CrossingWorkspace { return &CrossingWorkspace{ ft: make([]int, maxWidth+2), @@ -21,7 +37,22 @@ func NewCrossingWorkspace(maxWidth int) *CrossingWorkspace { } } -// CountCrossings returns the total edge crossings for the given row orderings. +// CountCrossings returns the total number of edge crossings for the given row orderings. +// It sums the crossings between each pair of consecutive rows. The orders map should +// contain node IDs in left-to-right order for each row. Rows without entries in the +// map are treated as empty. +// +// Example: +// +// orders := map[int][]string{ +// 0: {"app", "cli"}, // row 0: app on left, cli on right +// 1: {"lib1", "lib2", "lib3"}, // row 1: three nodes +// } +// crossings := dag.CountCrossings(g, orders) +// +// This function is typically used during optimization to evaluate candidate orderings. +// It runs in O(R × E log V) time where R is the number of rows, E is edges per layer, +// and V is nodes per layer. func CountCrossings(g *DAG, orders map[int][]string) int { rows := slices.Sorted(maps.Keys(orders)) crossings := 0 @@ -32,8 +63,19 @@ func CountCrossings(g *DAG, orders map[int][]string) int { return crossings } -// CountLayerCrossings counts edge crossings between two adjacent rows. -// Uses a Fenwick tree for O(E log V) performance. +// CountLayerCrossings counts edge crossings between two adjacent rows using a +// Fenwick tree (binary indexed tree) for O(E log V) performance where E is the +// number of edges between the rows and V is the number of nodes in the lower row. +// +// Two edges (u1,v1) and (u2,v2) cross if and only if: +// +// pos(u1) < pos(u2) AND pos(v1) > pos(v2) +// +// This is equivalent to counting inversions in the sequence of target positions +// when edges are sorted by source position. The Fenwick tree enables efficient +// inversion counting compared to the naive O(E²) algorithm. +// +// Returns 0 if either row is empty or nil, as no crossings can exist without edges. func CountLayerCrossings(g *DAG, upper, lower []string) int { if len(upper) == 0 || len(lower) == 0 { return 0 @@ -54,6 +96,7 @@ func CountLayerCrossings(g *DAG, upper, lower []string) int { return 0 } + // Sort edges by source position, then by target position slices.SortFunc(edges, func(a, b edge) int { if a.upper != b.upper { return a.upper - b.upper @@ -61,15 +104,19 @@ func CountLayerCrossings(g *DAG, upper, lower []string) int { return a.lower - b.lower }) + // Count inversions using Fenwick tree fenwick := make([]int, len(lower)+1) crossings, total := 0, 0 for _, e := range edges { + // Query: count edges seen so far with target <= e.lower lessOrEqual := 0 for q := e.lower + 1; q > 0; q -= q & (-q) { lessOrEqual += fenwick[q] } + // Crossings = edges seen so far with target > e.lower crossings += total - lessOrEqual + // Update: increment count at target position total++ for idx := e.lower + 1; idx < len(fenwick); idx += idx & (-idx) { fenwick[idx]++ @@ -80,24 +127,40 @@ func CountLayerCrossings(g *DAG, upper, lower []string) int { // CountCrossingsIdx counts crossings using index-based edges and permutations. // This is an optimized version for the branch-and-bound search that avoids -// string lookups. edges[i] contains the indices of children for upper node i. +// string lookups by using integer indices throughout. +// +// The edges parameter should be a slice where edges[i] contains the indices +// (into the lower row) of all children of upper row node i. The upperPerm +// and lowerPerm parameters are permutations (orderings) of node indices. +// The ws parameter must be a workspace created with [NewCrossingWorkspace] +// with maxWidth >= len(lowerPerm). +// +// This function is typically only used internally by optimization code that +// needs to evaluate thousands of orderings per second. Most callers should +// use [CountCrossings] or [CountLayerCrossings] instead. +// +// Performance: O(E log V) where E is the total number of edges and V is len(lowerPerm). func CountCrossingsIdx(edges [][]int, upperPerm, lowerPerm []int, ws *CrossingWorkspace) int { if len(upperPerm) == 0 || len(lowerPerm) == 0 { return 0 } + // Build position lookup: where is each original index in the permutation? for pos, origIdx := range lowerPerm { ws.pos[origIdx] = pos } + // Clear Fenwick tree limit := len(lowerPerm) + 1 for i := 0; i < limit; i++ { ws.ft[i] = 0 } + // Count inversions using Fenwick tree crossings, total := 0, 0 for _, upperIdx := range upperPerm { targets := edges[upperIdx] + // Query phase: count crossings for all edges from this source for _, targetIdx := range targets { targetPos := ws.pos[targetIdx] lessOrEqual := 0 @@ -107,6 +170,7 @@ func CountCrossingsIdx(edges [][]int, upperPerm, lowerPerm []int, ws *CrossingWo crossings += total - lessOrEqual } + // Update phase: mark all these edges as processed for _, targetIdx := range targets { targetPos := ws.pos[targetIdx] total++ @@ -118,13 +182,26 @@ func CountCrossingsIdx(edges [][]int, upperPerm, lowerPerm []int, ws *CrossingWo return crossings } -// CountPairCrossings counts how many crossings swapping left and right would cause. -// If useParents is true, considers edges to the row above; otherwise, to the row below. +// CountPairCrossings counts how many crossings would result from swapping two +// adjacent nodes (left and right) in their row. If useParents is true, considers +// edges to the row above; otherwise, considers edges to the row below. +// +// This is used by local search heuristics (e.g., adjacent node swapping) to +// decide whether a swap would reduce crossings. The adjOrder slice should +// contain the node IDs of the adjacent row in left-to-right order. +// +// Returns 0 if either node has no edges to the adjacent row, or if no crossings +// would occur. This function does not modify the graph. func CountPairCrossings(g *DAG, left, right string, adjOrder []string, useParents bool) int { return CountPairCrossingsWithPos(g, left, right, PosMap(adjOrder), useParents) } -// CountPairCrossingsWithPos is like CountPairCrossings but takes a precomputed position map. +// CountPairCrossingsWithPos is like [CountPairCrossings] but takes a precomputed +// position map for the adjacent row. This avoids repeated calls to [PosMap] when +// checking multiple swaps against the same adjacent row. +// +// The adjPos map should map node IDs to their positions (0-indexed) in the +// adjacent row. Nodes not in the map are ignored. func CountPairCrossingsWithPos(g *DAG, left, right string, adjPos map[string]int, useParents bool) int { var lnbr, rnbr []string if useParents { @@ -142,6 +219,7 @@ func CountPairCrossingsWithPos(g *DAG, left, right string, adjPos map[string]int continue } for _, rn := range rnbr { + // If left's neighbor is to the right of right's neighbor, they cross if rp, ok := adjPos[rn]; ok && lp > rp { crossings++ } diff --git a/pkg/dag/dag.go b/pkg/dag/dag.go index 602950d..c5cae4c 100644 --- a/pkg/dag/dag.go +++ b/pkg/dag/dag.go @@ -7,11 +7,12 @@ import ( ) var ( - // ErrInvalidNodeID is returned by [DAG.AddNode] when the node ID is empty. + // ErrInvalidNodeID is returned by [DAG.AddNode] and [DAG.RenameNode] when + // the node ID is empty. All nodes must have non-empty identifiers. ErrInvalidNodeID = errors.New("node ID must not be empty") - // ErrDuplicateNodeID is returned by [DAG.AddNode] when a node with the - // same ID already exists in the graph. + // ErrDuplicateNodeID is returned by [DAG.AddNode] and [DAG.RenameNode] when + // a node with the same ID already exists in the graph. Node IDs must be unique. ErrDuplicateNodeID = errors.New("duplicate node ID") // ErrUnknownSourceNode is returned by [DAG.AddEdge] when the From node @@ -23,44 +24,64 @@ var ( ErrUnknownTargetNode = errors.New("unknown target node") // ErrInvalidEdgeEndpoint is returned by [DAG.Validate] when an edge - // references a node that doesn't exist. + // references a node that doesn't exist. This indicates graph corruption. ErrInvalidEdgeEndpoint = errors.New("invalid edge endpoint") // ErrNonConsecutiveRows is returned by [DAG.Validate] when an edge // connects nodes that are not in adjacent rows (From.Row+1 != To.Row). + // All edges must connect consecutive rows for layered layouts. ErrNonConsecutiveRows = errors.New("edges must connect consecutive rows") // ErrGraphHasCycle is returned by [DAG.Validate] when a cycle is detected. - // This indicates the graph is not a valid DAG. + // This indicates the graph is not a valid DAG. Cycles are detected using + // depth-first search with white/gray/black coloring. ErrGraphHasCycle = errors.New("graph contains a cycle") ) // Metadata stores arbitrary key-value pairs attached to nodes or the graph. +// It is commonly used to store package metadata (version, description, repo URL) +// or rendering options (style, seed). Metadata maps are never nil - they are +// automatically initialized to empty maps when needed. type Metadata map[string]any -// NodeKind distinguishes between original and synthetic nodes. +// NodeKind distinguishes between original and synthetic nodes created during +// graph transformation. type NodeKind int const ( - NodeKindRegular NodeKind = iota // Original graph nodes - NodeKindSubdivider // Inserted to subdivide long edges - NodeKindAuxiliary // Helper nodes for layout + // NodeKindRegular represents an original graph node from dependency data. + NodeKindRegular NodeKind = iota + // NodeKindSubdivider represents a synthetic node inserted to subdivide a long edge. + // Subdividers maintain a MasterID linking to their origin node. + NodeKindSubdivider + // NodeKindAuxiliary represents a helper node for layout (e.g., separator beams). + // Auxiliary nodes resolve impossible crossing patterns by providing intermediate points. + NodeKindAuxiliary ) -// Node represents a vertex in the dependency graph. +// Node represents a vertex in the dependency graph with an assigned row (layer). +// Nodes can be original vertices from dependency data (NodeKindRegular) or synthetic +// nodes created during transformation (NodeKindSubdivider, NodeKindAuxiliary). +// +// The zero value is not usable - ID and Row must be set before adding to a DAG. type Node struct { ID string // Unique identifier (also used as display label) - Row int // Layer assignment (0 = root/top) - Meta Metadata // Arbitrary key-value metadata - - Kind NodeKind // Node type (regular, subdivider, auxiliary) - MasterID string // Links synthetic nodes to their origin + Row int // Layer assignment (0 = root/top, increasing downward) + Meta Metadata // Arbitrary key-value metadata (never nil after AddNode) + + // Kind indicates whether this is an original or synthetic node. + Kind NodeKind + // MasterID links subdivider chains back to their origin node. + // For subdividers, EffectiveID() returns MasterID instead of ID. + MasterID string } // IsSubdivider reports whether the node was inserted to break a long edge. +// Subdivider nodes are synthetic and maintain a MasterID linking to their origin. func (n Node) IsSubdivider() bool { return n.Kind == NodeKindSubdivider } // IsAuxiliary reports whether the node is a helper for layout (e.g., separator beam). +// Auxiliary nodes are synthetic and resolve impossible crossing patterns. func (n Node) IsAuxiliary() bool { return n.Kind == NodeKindAuxiliary } // IsSynthetic reports whether the node was created during graph transformation @@ -68,7 +89,8 @@ func (n Node) IsAuxiliary() bool { return n.Kind == NodeKindAuxiliary } func (n Node) IsSynthetic() bool { return n.Kind != NodeKindRegular } // EffectiveID returns MasterID if set (for subdividers), otherwise the node's ID. -// This allows subdivider chains to be treated as a single logical entity. +// This allows subdivider chains to be treated as a single logical entity during +// rendering, where they appear as continuous vertical blocks. func (n Node) EffectiveID() string { if n.MasterID != "" { return n.MasterID @@ -76,24 +98,34 @@ func (n Node) EffectiveID() string { return n.ID } -// Edge represents a directed connection between two nodes. +// Edge represents a directed connection between two nodes in consecutive rows. +// For a valid edge, the target must be exactly one row below the source: +// dst.Row == src.Row + 1. This constraint is enforced by Validate. type Edge struct { From string // Source node ID To string // Target node ID - Meta Metadata // Arbitrary key-value metadata + Meta Metadata // Arbitrary key-value metadata (never nil after AddEdge) } // DAG is a directed acyclic graph optimized for row-based layered layouts. +// Nodes are organized into horizontal rows (layers), and edges can only connect +// nodes in consecutive rows. This structure enables efficient crossing reduction +// algorithms for tower visualizations. +// +// The zero value is not usable - use New to create a valid DAG instance. +// DAG is not safe for concurrent use without external synchronization. type DAG struct { nodes map[string]*Node edges []Edge - outgoing map[string][]string - incoming map[string][]string - rows map[int][]*Node + outgoing map[string][]string // nodeID -> children IDs + incoming map[string][]string // nodeID -> parent IDs + rows map[int][]*Node // row -> nodes in that row meta Metadata } // New creates an empty DAG with optional graph-level metadata. +// The metadata parameter can be nil, in which case an empty map is created. +// Graph-level metadata is typically used to store rendering options. func New(meta Metadata) *DAG { if meta == nil { meta = Metadata{} @@ -107,11 +139,16 @@ func New(meta Metadata) *DAG { } } -// Meta returns the graph-level metadata. +// Meta returns the graph-level metadata map. +// The returned map is never nil and can be safely modified. func (d *DAG) Meta() Metadata { return d.meta } -// AddNode adds a node to the graph. Returns an error if the node ID is empty -// or already exists. The node is automatically indexed by its Row. +// AddNode adds a node to the graph and automatically indexes it by its Row. +// Returns ErrInvalidNodeID if the node ID is empty, or ErrDuplicateNodeID +// if a node with the same ID already exists. The node's Meta field is +// automatically initialized to an empty map if nil. +// +// Node IDs must be unique across the entire graph, regardless of row assignment. func (d *DAG) AddNode(n Node) error { if n.ID == "" { return ErrInvalidNodeID @@ -129,7 +166,11 @@ func (d *DAG) AddNode(n Node) error { } // SetRows updates the row assignments for nodes and rebuilds the row index. -// Nodes not in the map retain their current row assignment. +// Nodes not present in the rows map retain their current row assignment. +// This is typically used after layer assignment algorithms compute optimal depths. +// +// The row index (used by NodesInRow) is completely rebuilt, so this operation +// is O(N) where N is the total number of nodes. func (d *DAG) SetRows(rows map[string]int) { d.rows = make(map[int][]*Node) for _, n := range d.nodes { @@ -141,7 +182,13 @@ func (d *DAG) SetRows(rows map[string]int) { } // AddEdge adds a directed edge between two existing nodes. -// Returns an error if either endpoint does not exist. +// Returns ErrUnknownSourceNode if the From node doesn't exist, or +// ErrUnknownTargetNode if the To node doesn't exist. The edge's Meta +// field is automatically initialized to an empty map if nil. +// +// AddEdge does not validate that From.Row+1 == To.Row - use Validate +// to check this constraint after building the graph. Multiple edges +// between the same nodes are allowed (though unusual in dependency graphs). func (d *DAG) AddEdge(e Edge) error { if _, ok := d.nodes[e.From]; !ok { return ErrUnknownSourceNode @@ -158,8 +205,9 @@ func (d *DAG) AddEdge(e Edge) error { return nil } -// RemoveEdge removes the edge from→to if it exists. No error is returned -// if the edge does not exist. +// RemoveEdge removes the edge from→to if it exists. +// No error is returned if the edge does not exist. If multiple edges +// exist between the same nodes, only the first is removed. func (d *DAG) RemoveEdge(from, to string) { d.edges = slices.DeleteFunc(d.edges, func(e Edge) bool { return e.From == from && e.To == to }) d.outgoing[from] = slices.DeleteFunc(d.outgoing[from], func(s string) bool { return s == to }) @@ -167,7 +215,11 @@ func (d *DAG) RemoveEdge(from, to string) { } // RenameNode changes a node's ID, updating all edges and indices. -// Returns an error if oldID doesn't exist or newID is empty/duplicate. +// Returns ErrInvalidNodeID if newID is empty, ErrUnknownSourceNode if +// oldID doesn't exist, or ErrDuplicateNodeID if newID is already in use. +// +// This is an O(N+E) operation where N is the number of nodes and E is +// the number of edges, as all adjacency lists must be updated. func (d *DAG) RenameNode(oldID, newID string) error { if newID == "" { return ErrInvalidNodeID @@ -216,7 +268,9 @@ func (d *DAG) RenameNode(oldID, newID string) error { return nil } -// Nodes returns all nodes in the graph. The order is not guaranteed. +// Nodes returns all nodes in the graph. +// The order is not guaranteed. The returned slice contains pointers to +// the actual node structs, so modifications affect the graph. func (d *DAG) Nodes() []*Node { nodes := make([]*Node, 0, len(d.nodes)) for _, n := range d.nodes { @@ -226,6 +280,8 @@ func (d *DAG) Nodes() []*Node { } // Edges returns a copy of all edges in the graph. +// The order matches insertion order. Modifications to the returned +// slice or its edge structs do not affect the graph. func (d *DAG) Edges() []Edge { return slices.Clone(d.edges) } // NodeCount returns the number of nodes in the graph. @@ -235,24 +291,34 @@ func (d *DAG) NodeCount() int { return len(d.nodes) } func (d *DAG) EdgeCount() int { return len(d.edges) } // Children returns the IDs of nodes that this node has edges to (dependencies). +// Returns nil if the node has no children or doesn't exist. The returned slice +// should not be modified - use it as a read-only view. func (d *DAG) Children(id string) []string { return d.outgoing[id] } // Parents returns the IDs of nodes that have edges to this node (dependents). +// Returns nil if the node has no parents or doesn't exist. The returned slice +// should not be modified - use it as a read-only view. func (d *DAG) Parents(id string) []string { return d.incoming[id] } // OutDegree returns the number of outgoing edges from the node. +// Returns 0 if the node doesn't exist. func (d *DAG) OutDegree(id string) int { return len(d.outgoing[id]) } // InDegree returns the number of incoming edges to the node. +// Returns 0 if the node doesn't exist. func (d *DAG) InDegree(id string) int { return len(d.incoming[id]) } // Node returns the node with the given ID and true, or nil and false if not found. +// The returned node pointer refers to the actual node in the graph, so modifications +// affect the graph (except for ID changes - use RenameNode instead). func (d *DAG) Node(id string) (*Node, bool) { n, ok := d.nodes[id] return n, ok } // ChildrenInRow returns children of the node that are in the specified row. +// This is useful for row-by-row traversals in layered layouts. Returns nil +// if the node has no children in that row or doesn't exist. func (d *DAG) ChildrenInRow(id string, row int) []string { var result []string for _, c := range d.outgoing[id] { @@ -264,6 +330,8 @@ func (d *DAG) ChildrenInRow(id string, row int) []string { } // ParentsInRow returns parents of the node that are in the specified row. +// This is useful for row-by-row traversals in layered layouts. Returns nil +// if the node has no parents in that row or doesn't exist. func (d *DAG) ParentsInRow(id string, row int) []string { var result []string for _, p := range d.incoming[id] { @@ -275,17 +343,25 @@ func (d *DAG) ParentsInRow(id string, row int) []string { } // NodesInRow returns all nodes assigned to the given row. +// Returns nil if the row is empty or doesn't exist. The returned slice +// contains pointers to the actual nodes, so modifications affect the graph. +// The order is insertion order (order in which AddNode or SetRows added them). func (d *DAG) NodesInRow(row int) []*Node { return d.rows[row] } // RowCount returns the number of distinct rows (layers) in the graph. +// Returns 0 for an empty graph. Rows don't need to be consecutive - +// a graph with nodes in rows 0 and 5 has RowCount() == 2. func (d *DAG) RowCount() int { return len(d.rows) } -// RowIDs returns all row indices in sorted order. +// RowIDs returns all row indices in sorted ascending order. +// Returns an empty slice for an empty graph. Use this to iterate +// through rows from top to bottom. func (d *DAG) RowIDs() []int { return slices.Sorted(maps.Keys(d.rows)) } // MaxRow returns the highest row index, or 0 if the graph is empty. +// For a non-empty graph, this is the bottom-most layer. func (d *DAG) MaxRow() int { if len(d.rows) == 0 { return 0 @@ -295,6 +371,8 @@ func (d *DAG) MaxRow() int { } // Sources returns nodes with no incoming edges (roots/entry points). +// These are typically application entry points or top-level packages. +// The order is not guaranteed. Returns nil for an empty graph. func (d *DAG) Sources() []*Node { var sources []*Node for _, n := range d.nodes { @@ -306,6 +384,8 @@ func (d *DAG) Sources() []*Node { } // Sinks returns nodes with no outgoing edges (leaves/terminals). +// These are typically low-level libraries with no dependencies. +// The order is not guaranteed. Returns nil for an empty graph. func (d *DAG) Sinks() []*Node { var sinks []*Node for _, n := range d.nodes { @@ -316,8 +396,18 @@ func (d *DAG) Sinks() []*Node { return sinks } -// Validate checks graph integrity: edges must connect consecutive rows -// and the graph must be acyclic. Returns nil if valid. +// Validate checks graph integrity and returns nil if valid. +// It verifies two constraints: +// +// 1. All edges connect existing nodes in consecutive rows (From.Row+1 == To.Row) +// 2. The graph is acyclic (no directed cycles exist) +// +// Returns ErrInvalidEdgeEndpoint if an edge references a missing node, +// ErrNonConsecutiveRows if edges don't connect adjacent rows, or +// ErrGraphHasCycle if a cycle is detected. Use this before rendering +// or applying transformations that assume a valid DAG. +// +// Cycle detection runs in O(N+E) time using depth-first search. func (d *DAG) Validate() error { if err := d.validateEdgeConsistency(); err != nil { return err @@ -376,6 +466,9 @@ func (d *DAG) detectCycles() error { } // PosMap creates a position lookup map from a slice of node IDs. +// The returned map maps each ID to its index in the slice. +// This is commonly used to convert node orderings into fast position lookups +// for crossing calculations. Returns an empty map for a nil or empty slice. func PosMap(ids []string) map[string]int { m := make(map[string]int, len(ids)) for i, id := range ids { @@ -385,6 +478,9 @@ func PosMap(ids []string) map[string]int { } // NodePosMap creates a position lookup map from a slice of nodes. +// The returned map maps each node ID to its index in the slice. +// This is a convenience wrapper around PosMap for node slices. +// Returns an empty map for a nil or empty slice. func NodePosMap(nodes []*Node) map[string]int { m := make(map[string]int, len(nodes)) for i, n := range nodes { @@ -394,6 +490,8 @@ func NodePosMap(nodes []*Node) map[string]int { } // NodeIDs extracts the ID from each node in a slice. +// Returns a new slice containing the IDs in the same order as the input. +// Returns an empty slice for a nil or empty input. func NodeIDs(nodes []*Node) []string { ids := make([]string, len(nodes)) for i, n := range nodes { diff --git a/pkg/dag/doc.go b/pkg/dag/doc.go index 0118cca..4847989 100644 --- a/pkg/dag/doc.go +++ b/pkg/dag/doc.go @@ -12,6 +12,21 @@ // drawing that powers tower visualizations. It enables efficient crossing // detection and ordering algorithms. // +// # Basic Usage +// +// Create a new graph with [New], add nodes with [DAG.AddNode], and edges with +// [DAG.AddEdge]. Nodes must have unique IDs, and edges can only connect +// existing nodes in consecutive rows (From.Row+1 == To.Row): +// +// g := dag.New(nil) +// g.AddNode(dag.Node{ID: "app", Row: 0}) +// g.AddNode(dag.Node{ID: "lib", Row: 1}) +// g.AddEdge(dag.Edge{From: "app", To: "lib"}) +// +// Query the graph structure with [DAG.Children], [DAG.Parents], [DAG.NodesInRow], +// and related methods. Use [DAG.Validate] to verify structural integrity before +// rendering or transformations. +// // # Node Types // // The package supports three node kinds to handle real-world graph structures: @@ -35,25 +50,19 @@ // (binary indexed tree) to count inversions in O(E log V) time, enabling // fast evaluation of millions of candidate orderings during optimization. // -// # Working with the DAG -// -// Create a new graph with [New], add nodes with [DAG.AddNode], and edges with -// [DAG.AddEdge]. Nodes must have unique IDs, and edges can only connect -// existing nodes. -// -// g := dag.New(nil) -// g.AddNode(dag.Node{ID: "app", Row: 0}) -// g.AddNode(dag.Node{ID: "lib", Row: 1}) -// g.AddEdge(dag.Edge{From: "app", To: "lib"}) -// -// Query the graph structure with [DAG.Children], [DAG.Parents], [DAG.NodesInRow], -// and related methods. Use [DAG.Validate] to verify structural integrity. -// // # Metadata // // Both nodes and the graph itself support arbitrary metadata via [Metadata] maps. // This is used to store package information (version, description, repository URL) -// and render options (style, seed) that flow through the pipeline. +// and render options (style, seed) that flow through the pipeline. Metadata maps +// are never nil after creation - empty maps are automatically initialized. +// +// # Concurrency +// +// DAG instances are not safe for concurrent use. Callers must synchronize access +// if multiple goroutines read or modify the same graph. Immutable operations like +// counting crossings on a read-only graph can safely run in parallel across +// different goroutines. // // # Related Packages // @@ -64,7 +73,8 @@ // - Layer assignment (assign rows based on depth) // // The [perm] subpackage provides permutation algorithms including the PQ-tree -// data structure for efficiently generating only valid orderings. +// data structure for efficiently generating only valid orderings that preserve +// crossing-free constraints. // // [transform]: github.com/matzehuels/stacktower/pkg/dag/transform // [perm]: github.com/matzehuels/stacktower/pkg/dag/perm diff --git a/pkg/dag/example_test.go b/pkg/dag/example_test.go index de88a76..213f385 100644 --- a/pkg/dag/example_test.go +++ b/pkg/dag/example_test.go @@ -79,6 +79,38 @@ func ExampleDAG_metadata() { // Version: 0.100.0 } +func ExampleDAG_Validate() { + // Validate checks for consecutive rows and cycles + g := dag.New(nil) + _ = g.AddNode(dag.Node{ID: "a", Row: 0}) + _ = g.AddNode(dag.Node{ID: "b", Row: 1}) + _ = g.AddNode(dag.Node{ID: "c", Row: 2}) + _ = g.AddEdge(dag.Edge{From: "a", To: "b"}) + _ = g.AddEdge(dag.Edge{From: "b", To: "c"}) + + if err := g.Validate(); err != nil { + fmt.Println("Invalid:", err) + } else { + fmt.Println("Valid DAG") + } + // Output: + // Valid DAG +} + +func ExampleDAG_Validate_nonConsecutive() { + // Edges must connect consecutive rows + g := dag.New(nil) + _ = g.AddNode(dag.Node{ID: "a", Row: 0}) + _ = g.AddNode(dag.Node{ID: "b", Row: 2}) // skips row 1 + _ = g.AddEdge(dag.Edge{From: "a", To: "b"}) + + if err := g.Validate(); err != nil { + fmt.Println("Error:", err) + } + // Output: + // Error: edges must connect consecutive rows +} + func ExampleNode_synthetic() { // Synthetic nodes are created during graph transformation regular := dag.Node{ID: "lib", Kind: dag.NodeKindRegular} @@ -122,3 +154,74 @@ func ExampleCountLayerCrossings() { // Crossings: 1 // After reorder: 0 } + +func ExampleCountCrossings() { + // Count total crossings across all row pairs + g := dag.New(nil) + _ = g.AddNode(dag.Node{ID: "A", Row: 0}) + _ = g.AddNode(dag.Node{ID: "B", Row: 0}) + _ = g.AddNode(dag.Node{ID: "C", Row: 1}) + _ = g.AddNode(dag.Node{ID: "D", Row: 1}) + _ = g.AddNode(dag.Node{ID: "E", Row: 2}) + _ = g.AddNode(dag.Node{ID: "F", Row: 2}) + + // Create a crossing pattern + _ = g.AddEdge(dag.Edge{From: "A", To: "D"}) + _ = g.AddEdge(dag.Edge{From: "B", To: "C"}) + _ = g.AddEdge(dag.Edge{From: "C", To: "F"}) + _ = g.AddEdge(dag.Edge{From: "D", To: "E"}) + + orders := map[int][]string{ + 0: {"A", "B"}, + 1: {"C", "D"}, + 2: {"E", "F"}, + } + + total := dag.CountCrossings(g, orders) + fmt.Println("Total crossings:", total) + // Output: + // Total crossings: 2 +} + +func ExamplePosMap() { + // Convert a node ordering to a position lookup map + ordering := []string{"app", "lib", "core"} + positions := dag.PosMap(ordering) + + fmt.Println("Position of 'lib':", positions["lib"]) + fmt.Println("Position of 'core':", positions["core"]) + // Output: + // Position of 'lib': 1 + // Position of 'core': 2 +} + +func ExampleDAG_ChildrenInRow() { + // Query children in a specific row + g := dag.New(nil) + _ = g.AddNode(dag.Node{ID: "a", Row: 0}) + _ = g.AddNode(dag.Node{ID: "b", Row: 1}) + _ = g.AddNode(dag.Node{ID: "c", Row: 2}) + _ = g.AddNode(dag.Node{ID: "d", Row: 2}) + _ = g.AddEdge(dag.Edge{From: "a", To: "b"}) + _ = g.AddEdge(dag.Edge{From: "a", To: "c"}) // skips row 1 + _ = g.AddEdge(dag.Edge{From: "a", To: "d"}) // skips row 1 + + // Find children specifically in row 2 + childrenInRow2 := g.ChildrenInRow("a", 2) + fmt.Println("Children in row 2:", len(childrenInRow2)) + // Output: + // Children in row 2: 2 +} + +func ExampleNewCrossingWorkspace() { + // Reuse a workspace for efficient crossing calculations + // Determine maximum row width in your graph + maxWidth := 10 + + // Create a workspace sized for that maximum + ws := dag.NewCrossingWorkspace(maxWidth) + + // Now use ws with CountCrossingsIdx for optimization loops + // (typically used internally by ordering algorithms) + _ = ws +} diff --git a/pkg/dag/perm/dot.go b/pkg/dag/perm/dot.go index 9012378..49b9a32 100644 --- a/pkg/dag/perm/dot.go +++ b/pkg/dag/perm/dot.go @@ -8,6 +8,29 @@ import ( "github.com/goccy/go-graphviz" ) +// ToDOT returns a Graphviz DOT representation of the tree structure. +// +// The DOT format can be rendered with Graphviz tools (dot, neato, etc.) or +// programmatically with RenderSVG. The output is a complete DOT digraph with +// styling suitable for documentation and debugging. +// +// Node representation: +// - P-nodes: labeled "P", ellipse shape +// - Q-nodes: labeled "Q", box shape +// - Leaf nodes: labeled with element value or label, rounded box shape +// +// The labels parameter works the same as in StringWithLabels: if labels[i] +// exists, element i is shown as labels[i], otherwise as a numeric index. +// Pass nil to use default numeric labels. +// +// The labels slice is not modified. +// +// Example: +// +// tree := perm.NewPQTree(3) +// tree.Reduce([]int{0, 1}) +// dot := tree.ToDOT([]string{"A", "B", "C"}) +// // Use 'dot' command or RenderSVG to visualize func (t *PQTree) ToDOT(labels []string) string { var buf bytes.Buffer buf.WriteString("digraph PQTree {\n") @@ -51,6 +74,33 @@ func (t *PQTree) writeDOTNode(buf *bytes.Buffer, n *pqNode, id int, labels []str return next } +// RenderSVG renders the tree structure as an SVG image. +// +// RenderSVG generates a DOT representation via ToDOT, then uses Graphviz to +// render it to SVG format. The returned bytes are a complete SVG document +// suitable for embedding in HTML or saving to a file. +// +// The labels parameter is passed to ToDOT and works identically. Pass nil for +// default numeric labels. +// +// RenderSVG requires the Graphviz library (github.com/goccy/go-graphviz) and +// its C dependencies to be installed. Errors are returned if Graphviz cannot +// initialize, the DOT is malformed, or rendering fails. +// +// All errors are wrapped with context using fmt.Errorf with %w, suitable for +// unwrapping with errors.Unwrap or errors.Is. +// +// The labels slice is not modified. +// +// Example: +// +// tree := perm.NewPQTree(4) +// tree.Reduce([]int{1, 2}) +// svg, err := tree.RenderSVG([]string{"app", "auth", "cache", "db"}) +// if err != nil { +// log.Fatal(err) +// } +// os.WriteFile("tree.svg", svg, 0644) func (t *PQTree) RenderSVG(labels []string) ([]byte, error) { dot := t.ToDOT(labels) diff --git a/pkg/dag/perm/example_test.go b/pkg/dag/perm/example_test.go index f232a60..dbe3424 100644 --- a/pkg/dag/perm/example_test.go +++ b/pkg/dag/perm/example_test.go @@ -149,3 +149,42 @@ func ExampleSeq() { // Output: // [0 1 2 3 4] } + +func ExamplePQTree_EnumerateFunc() { + tree := perm.NewPQTree(4) + tree.Reduce([]int{0, 1, 2}) + + // Stream permutations without allocating all at once + count := 0 + tree.EnumerateFunc(func(perm []int) bool { + fmt.Println(perm) + count++ + return count < 3 // Stop after 3 + }) + fmt.Printf("Processed %d permutations\n", count) + // Output: + // [0 1 2 3] + // [1 0 2 3] + // [2 0 1 3] + // Processed 3 permutations +} + +func ExamplePQTree_Clone() { + tree := perm.NewPQTree(4) + tree.Reduce([]int{0, 1}) + + // Try different constraints on branches + branch1 := tree.Clone() + branch1.Reduce([]int{2, 3}) + + branch2 := tree.Clone() + branch2.Reduce([]int{1, 2}) + + fmt.Println("Original:", tree.ValidCount()) + fmt.Println("Branch 1:", branch1.ValidCount()) + fmt.Println("Branch 2:", branch2.ValidCount()) + // Output: + // Original: 12 + // Branch 1: 8 + // Branch 2: 4 +} diff --git a/pkg/dag/perm/perm.go b/pkg/dag/perm/perm.go index f988b06..e72adbd 100644 --- a/pkg/dag/perm/perm.go +++ b/pkg/dag/perm/perm.go @@ -2,8 +2,10 @@ package perm import "slices" -// Seq returns a slice [0, 1, 2, ..., n-1]. -// Useful for initializing permutations or creating index sequences. +// Seq returns a slice containing the sequence [0, 1, 2, ..., n-1]. +// This is useful for initializing permutation arrays or creating index sequences. +// +// For n <= 0, Seq returns an empty slice. func Seq(n int) []int { result := make([]int, n) for i := range result { @@ -12,7 +14,11 @@ func Seq(n int) []int { return result } -// Factorial returns n! (n factorial). Returns 1 for n <= 1. +// Factorial returns n! (n factorial), the product 1 × 2 × ... × n. +// For n <= 1, Factorial returns 1. +// +// This function is useful for calculating the size of the full permutation space. +// Note that factorials grow extremely fast: 13! = 6,227,020,800 exceeds 32-bit int. func Factorial(n int) int { result := 1 for i := 2; i <= n; i++ { @@ -22,8 +28,21 @@ func Factorial(n int) int { } // Generate returns permutations of [0, 1, ..., n-1] using Heap's algorithm. -// If limit > 0, returns at most limit permutations; otherwise returns all n!. -// Each returned slice is a separate allocation safe to modify. +// +// If limit > 0, Generate returns at most limit permutations. +// If limit <= 0, Generate returns all n! permutations. +// +// Each returned slice is a separate allocation, safe to modify without affecting others. +// +// Generate handles edge cases gracefully: +// - n = 0: returns [[]] (one empty permutation) +// - n = 1: returns [[0]] (one single-element permutation) +// +// For n >= 13, the number of permutations exceeds billions. Always use a limit +// when n is large, or your program will exhaust memory. +// +// Heap's algorithm generates permutations in a non-lexicographic order, but +// efficiently produces each permutation exactly once. func Generate(n, limit int) [][]int { if n == 0 { return [][]int{{}} diff --git a/pkg/dag/perm/pqtree.go b/pkg/dag/perm/pqtree.go index 6419e0c..5bdbfe8 100644 --- a/pkg/dag/perm/pqtree.go +++ b/pkg/dag/perm/pqtree.go @@ -5,6 +5,21 @@ import ( "strings" ) +// PQTree is a data structure that compactly represents a family of permutations +// satisfying "consecutive ones" constraints. +// +// A PQ-tree encodes the valid orderings of n elements where certain subsets +// must appear consecutively. The tree represents all permutations that satisfy +// the applied constraints, allowing efficient pruning of invalid orderings. +// +// The tree has two types of internal nodes: +// - P-nodes (Permutable): children can appear in any order (n! orderings) +// - Q-nodes (seQuence): children have a fixed order, reversible (2 orderings) +// +// PQTree is not safe for concurrent use. If multiple goroutines access a PQTree, +// they must be synchronized with external locking. +// +// The zero value of PQTree is not usable; use NewPQTree to create instances. type PQTree struct { root *pqNode leaves []*pqNode @@ -37,6 +52,20 @@ type pqNode struct { partialCount int } +// NewPQTree creates a PQ-tree representing all n! permutations of n elements. +// +// The elements are numbered [0, 1, ..., n-1]. Initially, no constraints are +// applied, so all n! orderings are valid. Call Reduce to apply consecutive-ones +// constraints that restrict the set of valid permutations. +// +// For n = 0, NewPQTree returns a tree representing one empty permutation. +// For n = 1, NewPQTree returns a tree with a single element. +// +// The returned PQTree is ready to use and can be modified with Reduce or +// queried with Enumerate, ValidCount, and String methods. +// +// To explore multiple constraint branches without mutating the original tree, +// use Clone to create independent copies. func NewPQTree(n int) *PQTree { if n == 0 { return &PQTree{} @@ -59,6 +88,29 @@ func NewPQTree(n int) *PQTree { return &PQTree{root: root, leaves: leaves} } +// Reduce applies a consecutive-ones constraint to the tree. +// +// After calling Reduce(constraint), only permutations where all elements in +// constraint appear consecutively (in any order) remain valid. Multiple calls +// to Reduce apply cumulative constraints, further restricting the valid set. +// +// Reduce returns true if the constraint is satisfiable with previously applied +// constraints, false if the constraint creates a contradiction. When Reduce +// returns false, the tree is left in an undefined state and should not be used +// further. +// +// The constraint slice is not modified. Element indices must be in the range +// [0, n-1] where n is the value passed to NewPQTree. Out-of-range indices are +// silently ignored. +// +// Trivial constraints (length 0, 1, or equal to tree size) are always satisfiable +// and have no effect on the tree structure. +// +// Example: +// +// tree := perm.NewPQTree(5) +// tree.Reduce([]int{1, 2, 3}) // Elements 1, 2, 3 must be consecutive +// tree.Reduce([]int{0, 1}) // Elements 0, 1 must be consecutive func (t *PQTree) Reduce(constraint []int) bool { if t.root == nil || len(constraint) <= 1 || len(constraint) == len(t.leaves) { return true @@ -78,6 +130,82 @@ func (t *PQTree) Reduce(constraint []int) bool { return t.reduce(t.root) } +// Clone creates an independent deep copy of the PQ-tree. +// +// The cloned tree has identical structure and represents the same set of valid +// permutations, but can be modified independently without affecting the original. +// This is useful for exploring multiple constraint branches in search algorithms. +// +// Clone copies the entire internal tree structure. For large trees, this operation +// may be expensive. Consider cloning only when necessary. +// +// Example: +// +// tree := perm.NewPQTree(5) +// tree.Reduce([]int{1, 2, 3}) +// +// // Try two different additional constraints +// branch1 := tree.Clone() +// branch1.Reduce([]int{0, 1}) // Branch 1 constraint +// +// branch2 := tree.Clone() +// branch2.Reduce([]int{3, 4}) // Branch 2 constraint +// +// // Original tree unchanged +// fmt.Println(tree.ValidCount()) +func (t *PQTree) Clone() *PQTree { + if t.root == nil { + return &PQTree{} + } + + // Map old nodes to new nodes for parent/leaf pointer fixup + nodeMap := make(map[*pqNode]*pqNode) + + // Clone the tree structure + newRoot := t.cloneNode(t.root, nodeMap) + + // Rebuild leaves slice + newLeaves := make([]*pqNode, len(t.leaves)) + for i, oldLeaf := range t.leaves { + newLeaves[i] = nodeMap[oldLeaf] + } + + return &PQTree{ + root: newRoot, + leaves: newLeaves, + } +} + +func (t *PQTree) cloneNode(n *pqNode, nodeMap map[*pqNode]*pqNode) *pqNode { + if n == nil { + return nil + } + + // Check if already cloned (shouldn't happen in tree, but safe) + if clone, exists := nodeMap[n]; exists { + return clone + } + + // Create new node with same basic fields + clone := &pqNode{ + kind: n.kind, + value: n.value, + mark: n.mark, + } + nodeMap[n] = clone + + // Clone children recursively + if len(n.children) > 0 { + clone.children = make([]*pqNode, len(n.children)) + for i, child := range n.children { + clone.children[i] = t.cloneNode(child, nodeMap) + clone.children[i].parent = clone + } + } + + return clone +} + func (t *PQTree) clearMarks(n *pqNode) { n.mark = unmarked n.fullCount = 0 @@ -355,6 +483,30 @@ func (t *PQTree) mergeQNodes(parent *pqNode, idx int) { parent.children = newChildren } +// Enumerate returns all valid permutations represented by the tree. +// +// If limit > 0, Enumerate returns at most limit permutations. +// If limit <= 0, Enumerate returns all valid permutations. +// +// Each returned slice is a separate allocation containing element indices in +// permuted order. The slices are safe to modify without affecting the tree or +// other returned permutations. +// +// The order of returned permutations is not specified and may change between +// calls or Go versions. +// +// For trees with a large ValidCount, always use a limit to avoid memory exhaustion. +// A tree with strong constraints might have only a few hundred valid orderings, +// while an unconstrained tree has n! orderings. +// +// For memory-efficient streaming without allocating all results at once, use +// EnumerateFunc instead. +// +// Example: +// +// tree := perm.NewPQTree(4) +// tree.Reduce([]int{0, 1, 2}) +// orderings := tree.Enumerate(10) // Get first 10 valid orderings func (t *PQTree) Enumerate(limit int) [][]int { if t.root == nil { return [][]int{{}} @@ -368,6 +520,49 @@ func (t *PQTree) Enumerate(limit int) [][]int { return results } +// EnumerateFunc generates valid permutations one at a time via callback. +// +// EnumerateFunc calls fn for each valid permutation until fn returns false or +// all permutations are exhausted. This is memory-efficient for large result sets +// since permutations are generated on-demand rather than allocated all at once. +// +// The callback fn receives a permutation slice that is valid only for the +// duration of the call. If the caller needs to retain the permutation, it must +// copy it (e.g., with slices.Clone). +// +// EnumerateFunc returns the number of permutations processed before stopping. +// If fn always returns true, the return value equals ValidCount(). +// +// The order of generated permutations is not specified and may change between +// calls or Go versions. +// +// Example: +// +// tree := perm.NewPQTree(10) +// tree.Reduce([]int{0, 1, 2, 3, 4}) +// +// // Process first 100 permutations without allocating all at once +// count := 0 +// tree.EnumerateFunc(func(perm []int) bool { +// // Process perm here +// fmt.Println(perm) +// count++ +// return count < 100 // Stop after 100 +// }) +func (t *PQTree) EnumerateFunc(fn func([]int) bool) int { + if t.root == nil { + fn([]int{}) + return 1 + } + + count := 0 + t.enumerateLazy(t.root, nil, func(perm []int) bool { + count++ + return fn(perm) + }) + return count +} + // enumerateLazy generates permutations one at a time via callback. // Returns false if callback signaled stop, true otherwise. func (t *PQTree) enumerateLazy(node *pqNode, prefix []int, emit func([]int) bool) bool { @@ -447,6 +642,25 @@ func (t *PQTree) enumerateChildrenLazy(children []*pqNode, prefix []int, emit fu }) } +// ValidCount returns the number of valid permutations represented by the tree. +// +// ValidCount efficiently computes the count without enumerating all permutations. +// The count reflects all constraints applied via Reduce. +// +// The count is computed from the tree structure: +// - P-nodes multiply by n! (factorial of child count) +// - Q-nodes multiply by 2 (forward and reverse) +// - Leaf nodes contribute 1 +// +// For large trees, the count may be accurate even when Enumerate(0) would +// exhaust memory. Use ValidCount to check if enumeration is feasible before +// calling Enumerate without a limit. +// +// Example: +// +// tree := perm.NewPQTree(5) // 5! = 120 permutations +// tree.Reduce([]int{1, 2, 3}) +// fmt.Println(tree.ValidCount()) // Much less than 120 func (t *PQTree) ValidCount() int { if t.root == nil { return 1 @@ -472,10 +686,36 @@ func (t *PQTree) countPerms(node *pqNode) int { } } +// String returns a human-readable representation of the tree structure. +// +// The representation uses a nested notation: +// - P-nodes (permutable): enclosed in curly braces {} +// - Q-nodes (sequence): enclosed in square brackets [] +// - Leaf nodes: shown as single digits 0-9, or (a), (b), ... for indices >= 10 +// +// String is equivalent to StringWithLabels(nil). +// +// Example output: "{0 {1 2 3} 4}" represents a tree where elements 1, 2, 3 +// must be consecutive but can permute among themselves. func (t *PQTree) String() string { return t.StringWithLabels(nil) } +// StringWithLabels returns a human-readable representation using custom labels. +// +// The labels slice maps element indices to strings. If labels[i] exists, element i +// is displayed as labels[i] instead of its numeric index. This is useful for +// showing meaningful names in debugging output or examples. +// +// If labels is nil or shorter than needed, numeric indices are used as fallback. +// The labels slice is not modified. +// +// Example: +// +// tree := perm.NewPQTree(3) +// labels := []string{"app", "auth", "db"} +// tree.Reduce([]int{0, 1}) +// fmt.Println(tree.StringWithLabels(labels)) // "{app auth} db" (or similar) func (t *PQTree) StringWithLabels(labels []string) string { if t.root == nil { return "(empty)" diff --git a/pkg/dag/perm/pqtree_test.go b/pkg/dag/perm/pqtree_test.go index 5a9f3d5..2655c78 100644 --- a/pkg/dag/perm/pqtree_test.go +++ b/pkg/dag/perm/pqtree_test.go @@ -149,6 +149,136 @@ func TestPQTree_ValidCount(t *testing.T) { } } +func TestPQTree_Clone(t *testing.T) { + // Test that clone creates independent copy + original := NewPQTree(5) + original.Reduce([]int{0, 1, 2}) + + originalCountBefore := original.ValidCount() + originalStringBefore := original.String() + + clone := original.Clone() + + // Verify initial state matches + if clone.ValidCount() != originalCountBefore { + t.Errorf("Clone ValidCount %d != original %d", clone.ValidCount(), originalCountBefore) + } + + if clone.String() != originalStringBefore { + t.Errorf("Clone structure doesn't match:\nClone: %s\nOriginal: %s", clone.String(), originalStringBefore) + } + + // Modify clone - should not affect original + ok := clone.Reduce([]int{3, 4}) + if !ok { + t.Fatal("Clone reduce failed") + } + + cloneCountAfter := clone.ValidCount() + originalCountAfter := original.ValidCount() + originalStringAfter := original.String() + + t.Logf("Original before: count=%d, structure=%s", originalCountBefore, originalStringBefore) + t.Logf("Original after: count=%d, structure=%s", originalCountAfter, originalStringAfter) + t.Logf("Clone after: count=%d, structure=%s", cloneCountAfter, clone.String()) + + // Verify original unchanged + if originalCountAfter != originalCountBefore { + t.Errorf("Original ValidCount changed from %d to %d", originalCountBefore, originalCountAfter) + } + + if originalStringAfter != originalStringBefore { + t.Errorf("Original structure changed from %s to %s", originalStringBefore, originalStringAfter) + } + + // Verify clone changed + if cloneCountAfter >= originalCountBefore { + t.Errorf("Clone ValidCount should be less than %d after constraint, got %d", originalCountBefore, cloneCountAfter) + } +} + +func TestPQTree_CloneEmpty(t *testing.T) { + original := NewPQTree(0) + clone := original.Clone() + + if clone.ValidCount() != 1 { + t.Errorf("Empty clone ValidCount = %d, want 1", clone.ValidCount()) + } +} + +func TestPQTree_EnumerateFunc(t *testing.T) { + tree := NewPQTree(3) + + // Collect via EnumerateFunc + var collected [][]int + count := tree.EnumerateFunc(func(perm []int) bool { + collected = append(collected, slices.Clone(perm)) + return true + }) + + if count != 6 { + t.Errorf("EnumerateFunc returned count %d, want 6", count) + } + + if len(collected) != 6 { + t.Errorf("Collected %d permutations, want 6", len(collected)) + } + + // Verify all unique + seen := make(map[string]bool) + for _, p := range collected { + key := "" + for _, v := range p { + key += string(rune('0' + v)) + } + if seen[key] { + t.Errorf("EnumerateFunc generated duplicate: %v", p) + } + seen[key] = true + } +} + +func TestPQTree_EnumerateFuncEarlyStop(t *testing.T) { + tree := NewPQTree(4) + + // Stop after 3 permutations + stopAfter := 3 + collected := 0 + count := tree.EnumerateFunc(func(perm []int) bool { + collected++ + return collected < stopAfter + }) + + if count != stopAfter { + t.Errorf("EnumerateFunc count = %d, want %d", count, stopAfter) + } + + if collected != stopAfter { + t.Errorf("Collected %d permutations, want %d", collected, stopAfter) + } +} + +func TestPQTree_EnumerateFuncEmptyTree(t *testing.T) { + tree := NewPQTree(0) + + called := false + count := tree.EnumerateFunc(func(perm []int) bool { + called = true + if len(perm) != 0 { + t.Errorf("Expected empty permutation, got %v", perm) + } + return true + }) + + if !called { + t.Error("EnumerateFunc should call function for empty tree") + } + + if count != 1 { + t.Errorf("EnumerateFunc count = %d, want 1", count) + } +} + func areConsecutive(perm, subset []int) bool { if len(subset) <= 1 { return true diff --git a/pkg/dag/transform/cycles.go b/pkg/dag/transform/cycles.go index ca3d54b..fe79a80 100644 --- a/pkg/dag/transform/cycles.go +++ b/pkg/dag/transform/cycles.go @@ -2,6 +2,38 @@ package transform import "github.com/matzehuels/stacktower/pkg/dag" +// BreakCycles removes back-edges from the graph to ensure it is a valid +// directed acyclic graph (DAG). +// +// BreakCycles uses depth-first search with white/gray/black coloring to detect +// cycles. When a gray node is encountered (indicating a back-edge that would +// complete a cycle), that edge is marked for removal. The function returns the +// number of edges removed. +// +// # Algorithm +// +// The DFS starts from all source nodes (nodes with in-degree 0), then visits +// any remaining unvisited nodes to handle disconnected components. A node is: +// - white: not yet visited +// - gray: currently being visited (on the DFS stack) +// - black: fully processed (all descendants visited) +// +// Any edge pointing to a gray node creates a cycle and is removed. +// +// # Edge Selection +// +// When multiple edges could break a cycle, the choice is deterministic but not +// guaranteed to minimize the total number removed across all cycles. For +// minimal cycle-breaking, consider using a feedback arc set algorithm instead. +// +// # Nil Handling +// +// BreakCycles panics if g is nil. If g is empty (zero nodes), it returns 0. +// +// # Performance +// +// Time complexity is O(V + E) where V is nodes and E is edges. Space +// complexity is O(V) for the color map and recursion stack. func BreakCycles(g *dag.DAG) int { const ( white = iota diff --git a/pkg/dag/transform/doc.go b/pkg/dag/transform/doc.go index ebfbfab..b77d0bc 100644 --- a/pkg/dag/transform/doc.go +++ b/pkg/dag/transform/doc.go @@ -62,13 +62,30 @@ // dependencies. This function removes the minimum edges needed to restore // acyclicity using a DFS-based approach. // +// # Goroutine Safety +// +// All functions in this package modify the input DAG in place and are NOT safe +// for concurrent use. Callers must ensure exclusive access to the DAG during +// transformation. The DAG itself is not internally synchronized. +// // # Usage // // For most use cases, call [Normalize] which applies all transformations: // -// transform.Normalize(g) // Modifies g in place +// g := dag.New(nil) +// // ... populate graph ... +// result := transform.Normalize(g) // Modifies g in place, returns metrics +// fmt.Printf("Removed %d cycles, %d transitive edges\n", +// result.CyclesRemoved, result.TransitiveEdgesRemoved) +// +// To skip specific transformations, use [NormalizeWithOptions]: +// +// result := transform.NormalizeWithOptions(g, transform.NormalizeOptions{ +// SkipTransitiveReduction: true, // Keep all edges +// SkipSeparators: true, // Accept crossings +// }) // -// For fine-grained control, apply transformations individually: +// For fine-grained control, apply transformations individually in this order: // // transform.BreakCycles(g) // transform.TransitiveReduction(g) diff --git a/pkg/dag/transform/example_test.go b/pkg/dag/transform/example_test.go index 661b73e..42ee798 100644 --- a/pkg/dag/transform/example_test.go +++ b/pkg/dag/transform/example_test.go @@ -27,12 +27,16 @@ func ExampleNormalize() { fmt.Println(" Edges:", g.EdgeCount()) // Normalize: assigns layers, removes transitive edges, subdivides long edges - transform.Normalize(g) + result := transform.Normalize(g) fmt.Println("After normalize:") fmt.Println(" Nodes:", g.NodeCount()) fmt.Println(" Edges:", g.EdgeCount()) fmt.Println(" Rows:", g.RowCount()) + fmt.Println("Transformation metrics:") + fmt.Println(" Cycles removed:", result.CyclesRemoved) + fmt.Println(" Transitive edges removed:", result.TransitiveEdgesRemoved) + fmt.Println(" Subdividers added:", result.SubdividersAdded) // Output: // Before normalize: // Nodes: 4 @@ -41,6 +45,10 @@ func ExampleNormalize() { // Nodes: 4 // Edges: 4 // Rows: 3 + // Transformation metrics: + // Cycles removed: 0 + // Transitive edges removed: 1 + // Subdividers added: 0 } func ExampleTransitiveReduction() { @@ -127,9 +135,125 @@ func ExampleBreakCycles() { _ = g.AddEdge(dag.Edge{From: "C", To: "A"}) // Creates cycle fmt.Println("Edges before:", g.EdgeCount()) - transform.BreakCycles(g) + removed := transform.BreakCycles(g) fmt.Println("Edges after:", g.EdgeCount()) + fmt.Println("Removed:", removed) // Output: // Edges before: 3 // Edges after: 2 + // Removed: 1 +} + +func ExampleResolveSpanOverlaps() { + // Create a complete bipartite graph K(2,2) - the classic crossing pattern + g := dag.New(nil) + _ = g.AddNode(dag.Node{ID: "auth", Row: 0}) + _ = g.AddNode(dag.Node{ID: "api", Row: 0}) + _ = g.AddNode(dag.Node{ID: "logging", Row: 1}) + _ = g.AddNode(dag.Node{ID: "metrics", Row: 1}) + + // Both parents connect to both children (guaranteed crossing) + _ = g.AddEdge(dag.Edge{From: "auth", To: "logging"}) + _ = g.AddEdge(dag.Edge{From: "auth", To: "metrics"}) + _ = g.AddEdge(dag.Edge{From: "api", To: "logging"}) + _ = g.AddEdge(dag.Edge{From: "api", To: "metrics"}) + + fmt.Println("Before resolution:") + fmt.Println(" Nodes:", g.NodeCount()) + fmt.Println(" Edges:", g.EdgeCount()) + + transform.ResolveSpanOverlaps(g) + + fmt.Println("After resolution:") + fmt.Println(" Nodes:", g.NodeCount()) + fmt.Println(" Edges:", g.EdgeCount()) + + // Check for separator nodes + hasSeparator := false + for _, n := range g.Nodes() { + if n.IsAuxiliary() { + hasSeparator = true + break + } + } + fmt.Println(" Separator inserted:", hasSeparator) + // Output: + // Before resolution: + // Nodes: 4 + // Edges: 4 + // After resolution: + // Nodes: 5 + // Edges: 4 + // Separator inserted: true +} + +func ExampleNormalizeWithOptions() { + // Build a graph that we know is already acyclic + g := dag.New(nil) + _ = g.AddNode(dag.Node{ID: "api"}) + _ = g.AddNode(dag.Node{ID: "auth"}) + _ = g.AddNode(dag.Node{ID: "db"}) + _ = g.AddEdge(dag.Edge{From: "api", To: "auth"}) + _ = g.AddEdge(dag.Edge{From: "api", To: "db"}) // Transitive + _ = g.AddEdge(dag.Edge{From: "auth", To: "db"}) + + // Skip cycle breaking (we know it's acyclic) but keep transitive reduction + result := transform.NormalizeWithOptions(g, transform.NormalizeOptions{ + SkipCycleBreaking: true, + }) + + fmt.Println("Cycles removed:", result.CyclesRemoved) + fmt.Println("Transitive edges removed:", result.TransitiveEdgesRemoved) + fmt.Println("Final edge count:", g.EdgeCount()) + // Output: + // Cycles removed: 0 + // Transitive edges removed: 1 + // Final edge count: 2 +} + +func ExampleNormalizeWithOptions_preserveTransitive() { + // Sometimes you want to preserve all edges even if redundant + g := dag.New(nil) + _ = g.AddNode(dag.Node{ID: "A"}) + _ = g.AddNode(dag.Node{ID: "B"}) + _ = g.AddNode(dag.Node{ID: "C"}) + _ = g.AddEdge(dag.Edge{From: "A", To: "B"}) + _ = g.AddEdge(dag.Edge{From: "B", To: "C"}) + _ = g.AddEdge(dag.Edge{From: "A", To: "C"}) // Keep this transitive edge + + result := transform.NormalizeWithOptions(g, transform.NormalizeOptions{ + SkipTransitiveReduction: true, + }) + + fmt.Println("Transitive edges removed:", result.TransitiveEdgesRemoved) + fmt.Println("Subdividers added:", result.SubdividersAdded) + // Note: Edge count increases due to subdividers for A→C (0→2 requires subdivider) + fmt.Println("Final edge count:", g.EdgeCount()) + // Output: + // Transitive edges removed: 0 + // Subdividers added: 1 + // Final edge count: 4 +} + +func ExampleNormalizeWithOptions_skipSeparators() { + // Accept edge crossings instead of inserting separator beams + g := dag.New(nil) + _ = g.AddNode(dag.Node{ID: "auth"}) + _ = g.AddNode(dag.Node{ID: "api"}) + _ = g.AddNode(dag.Node{ID: "log"}) + _ = g.AddNode(dag.Node{ID: "metrics"}) + _ = g.AddEdge(dag.Edge{From: "auth", To: "log"}) + _ = g.AddEdge(dag.Edge{From: "auth", To: "metrics"}) + _ = g.AddEdge(dag.Edge{From: "api", To: "log"}) + _ = g.AddEdge(dag.Edge{From: "api", To: "metrics"}) + + result := transform.NormalizeWithOptions(g, transform.NormalizeOptions{ + SkipSeparators: true, + }) + + fmt.Println("Separators added:", result.SeparatorsAdded) + fmt.Println("Node count unchanged:", g.NodeCount() == 4) + // Output: + // Separators added: 0 + // Node count unchanged: true } diff --git a/pkg/dag/transform/layering.go b/pkg/dag/transform/layering.go index fe73a9f..62d74ea 100644 --- a/pkg/dag/transform/layering.go +++ b/pkg/dag/transform/layering.go @@ -2,6 +2,41 @@ package transform import "github.com/matzehuels/stacktower/pkg/dag" +// AssignLayers assigns nodes to horizontal rows (layers) based on their depth +// in the graph. +// +// AssignLayers uses a longest-path algorithm via topological sort (Kahn's +// algorithm) to compute row assignments. Each node is placed at one plus the +// maximum row of any of its parents, ensuring that: +// - Source nodes (no incoming edges) are at row 0 +// - All parents are strictly above their children +// - Each node is pushed as deep as necessary to avoid parent conflicts +// +// Existing row assignments in the DAG are overwritten. +// +// # Algorithm +// +// AssignLayers performs a topological traversal: +// 1. Initialize all source nodes (in-degree 0) at row 0 and add to queue +// 2. Process queue: for each node, assign children to max(current_row + 1) +// 3. Decrement in-degree counters; add newly zero-degree nodes to queue +// 4. Repeat until queue is empty +// +// # Cycles +// +// AssignLayers assumes the graph is acyclic. If cycles exist, nodes in the +// cycle will never reach zero in-degree and will remain at row 0 (their +// default). Run [BreakCycles] first to ensure correct layering. +// +// # Nil Handling +// +// AssignLayers panics if g is nil. If g is empty (zero nodes), the function +// returns immediately. +// +// # Performance +// +// Time complexity is O(V + E), where V is nodes and E is edges. Space +// complexity is O(V) for the queue and row/degree maps. func AssignLayers(g *dag.DAG) { nodes := g.Nodes() inDegree := make(map[string]int, len(nodes)) diff --git a/pkg/dag/transform/normalize.go b/pkg/dag/transform/normalize.go index 4fc6009..4922ff9 100644 --- a/pkg/dag/transform/normalize.go +++ b/pkg/dag/transform/normalize.go @@ -2,11 +2,98 @@ package transform import "github.com/matzehuels/stacktower/pkg/dag" -func Normalize(g *dag.DAG) *dag.DAG { - BreakCycles(g) - TransitiveReduction(g) +// Normalize prepares a DAG for tower rendering by applying a sequence of +// transformations that satisfy the layout's structural constraints. +// +// Normalize modifies g in place and returns transformation metrics. All +// transformations are applied in this specific order: +// +// 1. [BreakCycles]: Remove back-edges to ensure it is a true DAG. +// 2. [TransitiveReduction]: Remove redundant edges to simplify the visual. +// 3. [AssignLayers]: Assign horizontal rows (layers) based on node depth. +// 4. [Subdivide]: Break edges crossing multiple rows into single-row segments. +// 5. [ResolveSpanOverlaps]: Insert separator beams to resolve layout conflicts. +// +// This order is critical: cycles must be broken before transitive reduction, +// layers must be assigned before subdivision, and span overlaps can only be +// detected after edges are subdivided into single-row segments. +// +// To skip specific transformations, use [NormalizeWithOptions]. +// +// # Return Value +// +// Normalize returns a [TransformResult] containing metrics about the +// transformations applied (cycles removed, edges reduced, nodes added, etc.). +// This is useful for logging and understanding graph complexity. +// +// # Nil Handling +// +// Normalize panics if g is nil. The DAG must be non-nil, but may be empty +// (zero nodes). An empty DAG is returned unchanged with zero metrics. +// +// # Performance +// +// Complexity is O(V²·E) in the worst case due to transitive reduction, where +// V is the number of nodes and E is the number of edges. For typical +// dependency graphs with limited fan-out, performance is near-linear. +func Normalize(g *dag.DAG) *TransformResult { + return NormalizeWithOptions(g, NormalizeOptions{}) +} + +// NormalizeWithOptions prepares a DAG for tower rendering with configurable +// transformation steps. +// +// NormalizeWithOptions is like [Normalize] but allows skipping specific +// transformations via opts. This is useful when: +// - The input is known to be acyclic (skip cycle breaking) +// - Transitive edges should be preserved (skip reduction) +// - Edge crossings are acceptable (skip separators) +// +// The transformations are applied in this order (unless skipped): +// +// 1. [BreakCycles]: Remove back-edges (unless opts.SkipCycleBreaking) +// 2. [TransitiveReduction]: Remove redundant edges (unless opts.SkipTransitiveReduction) +// 3. [AssignLayers]: Assign rows (always applied) +// 4. [Subdivide]: Break long edges (always applied) +// 5. [ResolveSpanOverlaps]: Insert separators (unless opts.SkipSeparators) +// +// Layer assignment and edge subdivision are always applied because they are +// required for valid tower layouts. +// +// # Nil Handling +// +// NormalizeWithOptions panics if g is nil. An empty DAG returns zero metrics. +// +// # Performance +// +// See [Normalize]. Skipping transitive reduction reduces worst-case complexity +// from O(V²·E) to O(V·E). +func NormalizeWithOptions(g *dag.DAG, opts NormalizeOptions) *TransformResult { + result := &TransformResult{} + + if !opts.SkipCycleBreaking { + result.CyclesRemoved = BreakCycles(g) + } + + if !opts.SkipTransitiveReduction { + edgesBefore := g.EdgeCount() + TransitiveReduction(g) + result.TransitiveEdgesRemoved = edgesBefore - g.EdgeCount() + } + AssignLayers(g) + + nodesBefore := g.NodeCount() Subdivide(g) - ResolveSpanOverlaps(g) - return g + result.SubdividersAdded = g.NodeCount() - nodesBefore + + if !opts.SkipSeparators { + nodesBefore := g.NodeCount() + ResolveSpanOverlaps(g) + result.SeparatorsAdded = g.NodeCount() - nodesBefore + } + + result.MaxRow = g.MaxRow() + + return result } diff --git a/pkg/dag/transform/reduction.go b/pkg/dag/transform/reduction.go index c2142a8..ec06b30 100644 --- a/pkg/dag/transform/reduction.go +++ b/pkg/dag/transform/reduction.go @@ -2,6 +2,42 @@ package transform import "github.com/matzehuels/stacktower/pkg/dag" +// TransitiveReduction removes redundant edges from the graph. +// +// TransitiveReduction removes any edge (u, v) where there exists an alternate +// path from u to v through at least one intermediate node. For example, if +// edges A→B, B→C, and A→C all exist, then A→C is redundant and is removed +// because A reaches C via B. +// +// This simplifies visualization by showing only direct dependencies, which is +// critical for tower layouts where transitive edges create impossible geometry +// (a block cannot rest on both adjacent and distant floors simultaneously). +// +// # Algorithm +// +// TransitiveReduction computes full transitive closure using DFS-based +// reachability, then removes any edge (u, v) where u can reach v through an +// intermediate node w (where u→w and w reaches v). +// +// # Nil Handling +// +// TransitiveReduction panics if g is nil. If g is empty (zero nodes), the +// function returns immediately without error. +// +// # Performance +// +// Time complexity is O(V²·E) in the worst case, where V is the number of nodes +// and E is the number of edges. For sparse graphs (typical dependency graphs +// with limited fan-out), performance approaches O(V·E). +// +// Space complexity is O(V²) for the reachability matrix. For large dense +// graphs (thousands of nodes with high connectivity), this may consume +// significant memory. +// +// # Edge Metadata +// +// TransitiveReduction preserves edge metadata for all non-redundant edges. +// Metadata on removed edges is discarded. func TransitiveReduction(g *dag.DAG) { nodes := g.Nodes() if len(nodes) == 0 { diff --git a/pkg/dag/transform/result.go b/pkg/dag/transform/result.go new file mode 100644 index 0000000..d96a881 --- /dev/null +++ b/pkg/dag/transform/result.go @@ -0,0 +1,53 @@ +package transform + +// TransformResult contains metrics about transformations applied to a DAG. +// +// TransformResult is returned by [Normalize] and [NormalizeWithOptions] to +// provide visibility into what transformations occurred. This is useful for +// logging, debugging, and understanding graph complexity. +type TransformResult struct { + // CyclesRemoved is the number of back-edges removed by cycle breaking. + // Zero indicates the input was already acyclic. + CyclesRemoved int + + // TransitiveEdgesRemoved is the number of redundant edges removed by + // transitive reduction. Higher values indicate more redundancy in the + // original dependency graph. + TransitiveEdgesRemoved int + + // SubdividersAdded is the number of synthetic subdivider nodes inserted + // to break long edges into single-row segments. Higher values indicate + // deeper dependency chains. + SubdividersAdded int + + // SeparatorsAdded is the number of auxiliary separator beam nodes inserted + // to resolve impossible crossing patterns. Non-zero values indicate the + // presence of tangle motifs (e.g., complete bipartite subgraphs). + SeparatorsAdded int + + // MaxRow is the final depth (maximum row number) after all transformations. + // This represents the height of the tower layout. + MaxRow int +} + +// NormalizeOptions configures which transformations are applied by +// [NormalizeWithOptions]. +// +// The zero value applies all transformations (equivalent to calling [Normalize]). +type NormalizeOptions struct { + // SkipCycleBreaking disables cycle detection and removal. Use only when + // the input graph is guaranteed to be acyclic. If cycles exist and this + // is true, subsequent transformations may behave incorrectly. + SkipCycleBreaking bool + + // SkipTransitiveReduction disables removal of redundant edges. This + // preserves all edges from the input but may result in cluttered + // visualizations with impossible geometry in tower layouts. + SkipTransitiveReduction bool + + // SkipSeparators disables insertion of separator beams for tangle motifs. + // If true, the output may contain unavoidable edge crossings. Use this + // when crossings are acceptable or when the graph structure guarantees + // no overlaps. + SkipSeparators bool +} diff --git a/pkg/dag/transform/spans.go b/pkg/dag/transform/spans.go index d551e49..f74f95a 100644 --- a/pkg/dag/transform/spans.go +++ b/pkg/dag/transform/spans.go @@ -9,12 +9,78 @@ import ( "github.com/matzehuels/stacktower/pkg/dag" ) +// ResolveSpanOverlaps identifies and resolves impossible crossing patterns by +// inserting separator beam nodes. +// +// ResolveSpanOverlaps detects "tangle motifs"—subgraph patterns where multiple +// parent nodes share multiple child nodes in a way that guarantees edge +// crossings regardless of child ordering. The canonical example is a complete +// bipartite graph K(2,2): +// +// auth → logging auth → metrics +// api → logging api → metrics +// +// No matter how you order {logging, metrics}, edges must cross. Rather than +// accepting crossings, ResolveSpanOverlaps inserts a [dag.NodeKindAuxiliary] +// separator node that routes edges through a shared intermediate: +// +// auth → separator → logging +// api → separator → metrics +// +// This eliminates crossings by factoring shared dependencies through a beam. +// +// # Detection Algorithm +// +// ResolveSpanOverlaps processes rows bottom-up. For each row, it: +// 1. Computes the "span" of each parent (min/max child positions) +// 2. Counts how many parent spans overlap each gap between children +// 3. Where 2+ parents overlap, inserts a separator and reroutes edges +// 4. Repeats until no overlaps remain (may insert multiple separators per row) +// +// # Separator Nodes +// +// Separator nodes are inserted in a new row between parents and children, +// shifting all lower rows down. Separator IDs are generated as +// "Sep_row_firstChild_lastChild" with numeric suffixes if needed for +// uniqueness. +// +// # Eligibility Rules +// +// A parent is eligible for separator insertion only if: +// - It has 2+ children in the target row +// - ALL its children are in that single row (no splitting across rows) +// - None of its children are subdividers of the same master (avoids splitting logical columns) +// +// Separators are inserted in gaps between children where canInsertBetween +// returns true (respects subdivider master boundaries). +// +// # Multiple Passes +// +// ResolveSpanOverlaps may make multiple passes over a row, inserting separators +// iteratively until no overlaps remain. Each insertion shifts rows and +// recomputes spans. +// +// # Nil Handling +// +// ResolveSpanOverlaps panics if d is nil. If d is empty (zero nodes), the +// function returns immediately. +// +// # Performance +// +// Time complexity is O(R·P·C·I) where R is the number of rows, P is the +// average number of parents per row, C is children per parent, and I is the +// number of separator insertion iterations (typically 1-3). For typical +// dependency graphs, this is effectively O(V) where V is the number of nodes. +// +// Space complexity is O(V) for tracking used node IDs. func ResolveSpanOverlaps(d *dag.DAG) { usedIDs := nodeIDSet(d.Nodes()) - for _, row := range d.RowIDs() { - if row > 0 { - for insertSeparatorAt(d, row, usedIDs) { - } + // Process row boundaries by index (not row number) since separator insertion + // shifts row numbers but not our position in the traversal. + for i := 1; i < d.RowCount(); i++ { + row := d.RowIDs()[i] + for insertSeparatorAt(d, row, usedIDs) { + row = d.RowIDs()[i] // re-fetch: same index, new row number } } } diff --git a/pkg/dag/transform/subdivide.go b/pkg/dag/transform/subdivide.go index c7386c4..1899673 100644 --- a/pkg/dag/transform/subdivide.go +++ b/pkg/dag/transform/subdivide.go @@ -6,6 +6,48 @@ import ( "github.com/matzehuels/stacktower/pkg/dag" ) +// Subdivide breaks edges that span multiple rows into sequences of single-row +// edges connected by synthetic subdivider nodes. +// +// Subdivide ensures every edge in the graph connects nodes in consecutive rows +// (parent.Row + 1 == child.Row). Any edge spanning multiple rows is replaced +// by a chain of [dag.NodeKindSubdivider] nodes. For example: +// +// Before: app (row 0) → core (row 3) [spans 3 rows] +// After: app → app_sub_1 → app_sub_2 → core [3 single-row edges] +// +// Each subdivider maintains a MasterID field linking back to the original +// source node, allowing renderers to visually merge subdividers into +// continuous vertical blocks. +// +// # Sink Extension +// +// Subdivide also extends all sink nodes (nodes with out-degree 0) to the +// bottom row of the graph by appending subdivider chains. This ensures tower +// layouts have a flat foundation where all columns reach the bottom. +// +// # Node IDs +// +// Subdivider nodes are assigned unique IDs of the form "master_sub_row" (e.g., +// "app_sub_1"). If a collision occurs, a numeric suffix is appended +// ("app_sub_1__2"). All generated IDs are tracked to guarantee uniqueness. +// +// # Edge Metadata +// +// Subdivide preserves edge metadata only on the final edge in each subdivided +// chain (the edge entering the original target). Intermediate subdivider edges +// have no metadata. +// +// # Nil Handling +// +// Subdivide panics if g is nil. If g is empty (zero nodes), the function +// returns immediately. +// +// # Performance +// +// Time complexity is O(V·D) where V is nodes and D is the maximum depth (row +// count), as each node may spawn subdividers equal to the depth. Space +// complexity is O(V) for tracking used IDs. func Subdivide(g *dag.DAG) { gen := newIDGen(g.Nodes()) subdivideLongEdges(g, gen) diff --git a/pkg/dag/transform/transform_test.go b/pkg/dag/transform/transform_test.go index 2a2297e..9058007 100644 --- a/pkg/dag/transform/transform_test.go +++ b/pkg/dag/transform/transform_test.go @@ -32,17 +32,23 @@ func buildDiamondDAG() *dag.DAG { func TestNormalize_EmptyGraph_ReturnsEmpty(t *testing.T) { g := dag.New(nil) result := Normalize(g) - if result.NodeCount() != 0 { - t.Errorf("expected 0 nodes, got %d", result.NodeCount()) + if g.NodeCount() != 0 { + t.Errorf("expected 0 nodes, got %d", g.NodeCount()) + } + if result.CyclesRemoved != 0 || result.TransitiveEdgesRemoved != 0 || result.SubdividersAdded != 0 { + t.Errorf("expected zero metrics for empty graph, got %+v", result) } } func TestNormalize_SimpleGraph_AppliesPipeline(t *testing.T) { g := buildSimpleDAG() result := Normalize(g) - if result.NodeCount() == 0 { + if g.NodeCount() == 0 { t.Error("expected non-empty result") } + if result == nil { + t.Error("expected non-nil result") + } } func TestTransitiveReduction_EmptyGraph_Noop(t *testing.T) { @@ -354,17 +360,17 @@ func TestNormalize_CompleteWorkflow(t *testing.T) { result := Normalize(g) - if result.EdgeCount() != 3 { - t.Errorf("expected 3 edges after reduction, got %d", result.EdgeCount()) + if g.EdgeCount() != 3 { + t.Errorf("expected 3 edges after reduction, got %d", g.EdgeCount()) } - nodeA, _ := result.Node("a") + nodeA, _ := g.Node("a") if nodeA.Row != 0 { t.Errorf("expected node a at row 0, got %d", nodeA.Row) } allNodesHaveRows := true - for _, n := range result.Nodes() { + for _, n := range g.Nodes() { if !n.IsSubdivider() { if n.Row < 0 { allNodesHaveRows = false @@ -375,16 +381,23 @@ func TestNormalize_CompleteWorkflow(t *testing.T) { if !allNodesHaveRows { t.Error("not all nodes have valid rows assigned") } + + if result.TransitiveEdgesRemoved != 1 { + t.Errorf("expected 1 transitive edge removed, got %d", result.TransitiveEdgesRemoved) + } } -func TestNormalize_ReturnsSameInstance(t *testing.T) { +func TestNormalize_ReturnsResult(t *testing.T) { g := dag.New(nil) _ = g.AddNode(dag.Node{ID: "a"}) result := Normalize(g) - if result != g { - t.Error("Normalize should return the same DAG instance (in-place modification)") + if result == nil { + t.Error("Normalize should return a non-nil TransformResult") + } + if g.NodeCount() != 1 { + t.Error("Normalize should modify DAG in-place") } } @@ -398,9 +411,9 @@ func TestNormalize_IntegrationWithSubdivision(t *testing.T) { result := Normalize(g) - nodeA, _ := result.Node("a") - nodeB, _ := result.Node("b") - nodeC, _ := result.Node("c") + nodeA, _ := g.Node("a") + nodeB, _ := g.Node("b") + nodeC, _ := g.Node("c") if nodeA.Row != 0 || nodeB.Row != 0 { t.Error("roots should be at row 0") @@ -409,9 +422,13 @@ func TestNormalize_IntegrationWithSubdivision(t *testing.T) { t.Errorf("node c should be at row 1, got %d", nodeC.Row) } - for _, n := range result.Nodes() { + for _, n := range g.Nodes() { if n.Row < 0 { t.Errorf("node %s has invalid row %d", n.ID, n.Row) } } + + if result == nil { + t.Error("expected non-nil result") + } } diff --git a/pkg/deps/deps.go b/pkg/deps/deps.go index f4a1e96..ea0d5d3 100644 --- a/pkg/deps/deps.go +++ b/pkg/deps/deps.go @@ -7,22 +7,64 @@ import ( ) const ( - DefaultMaxDepth = 50 // Default maximum dependency depth - DefaultMaxNodes = 5000 // Default maximum packages to fetch - DefaultCacheTTL = 24 * time.Hour // Default HTTP cache duration + // DefaultMaxDepth is the default maximum dependency depth (50 levels). + // This prevents infinite recursion in circular or very deep dependency trees. + DefaultMaxDepth = 50 + + // DefaultMaxNodes is the default maximum number of packages to fetch (5000 nodes). + // This caps memory usage and prevents unbounded crawling of large ecosystems. + DefaultMaxNodes = 5000 + + // DefaultCacheTTL is the default HTTP cache duration (24 hours). + // Cached registry responses are reused within this window unless Refresh is true. + DefaultCacheTTL = 24 * time.Hour ) // Options configures dependency resolution behavior. +// +// All fields are optional. Zero values are replaced by defaults when passed +// to WithDefaults. Options is safe to copy and does not modify any inputs. type Options struct { - MaxDepth int // Maximum depth to traverse (default: 50) - MaxNodes int // Maximum packages to fetch (default: 5000) - CacheTTL time.Duration // HTTP cache duration (default: 24h) - Refresh bool // Bypass cache for fresh data - MetadataProviders []MetadataProvider // Sources for enrichment (GitHub, etc.) - Logger func(string, ...any) // Progress/error callback (optional) + // MaxDepth limits how many levels deep to traverse. A value of 1 fetches + // only direct dependencies. Zero or negative values use DefaultMaxDepth (50). + MaxDepth int + + // MaxNodes limits the total number of packages to fetch. When this limit + // is reached, deeper dependencies are ignored but already-queued packages + // may still be fetched. Zero or negative values use DefaultMaxNodes (5000). + MaxNodes int + + // CacheTTL controls how long HTTP responses are cached. Registry clients + // will reuse cached data within this duration. Zero or negative values use + // DefaultCacheTTL (24 hours). + CacheTTL time.Duration + + // Refresh bypasses the HTTP cache when true, forcing fresh registry fetches. + // This is useful for getting the latest package versions but increases latency. + Refresh bool + + // MetadataProviders is an optional list of enrichment sources (e.g., GitHub) + // that add extra metadata to package nodes. Providers are called concurrently + // after fetching each package. Nil or empty is safe. + MetadataProviders []MetadataProvider + + // Logger is an optional callback for progress and error messages. If nil, + // WithDefaults replaces it with a no-op logger. The format string follows + // fmt.Printf conventions. Logger may be called concurrently from multiple + // goroutines and must be safe for concurrent use. + Logger func(string, ...any) } // WithDefaults returns a copy of Options with zero values replaced by defaults. +// +// This method is safe to call on a zero Options value. It fills in: +// - MaxDepth: DefaultMaxDepth (50) +// - MaxNodes: DefaultMaxNodes (5000) +// - CacheTTL: DefaultCacheTTL (24h) +// - Logger: no-op function if nil +// +// All other fields (Refresh, MetadataProviders) are preserved as-is, including +// nil slices. The original Options value is not modified. func (o Options) WithDefaults() Options { opts := o if opts.MaxDepth <= 0 { @@ -41,38 +83,108 @@ func (o Options) WithDefaults() Options { } // MetadataProvider enriches package nodes with external data (e.g., GitHub stars). +// +// Implementations fetch supplementary information that is not available in package +// registries, such as repository activity, maintainer counts, or security metrics. +// Providers are called concurrently after fetching each package during resolution. type MetadataProvider interface { - // Name returns the provider identifier (e.g., "github"). + // Name returns the provider identifier (e.g., "github", "gitlab"). + // This is used for logging and error messages. Name() string + // Enrich fetches additional metadata for the package. + // + // The pkg parameter contains registry information and URLs for lookup. + // If refresh is true, the provider should bypass its cache. + // + // Returns a map of metadata keys to values, which are merged into the + // package node's metadata. Keys should be provider-specific (e.g., + // "github_stars") to avoid conflicts with other providers. + // + // Returns an error if enrichment fails. The resolver logs the error + // but continues without failing the entire resolution. Enrich(ctx context.Context, pkg *PackageRef, refresh bool) (map[string]any, error) } // PackageRef identifies a package for metadata enrichment lookups. +// +// It contains the information metadata providers need to look up external data +// like GitHub repository statistics. Created by [Package.Ref]. type PackageRef struct { - Name string // Package name - Version string // Package version - ProjectURLs map[string]string // URLs from registry (repository, homepage, etc.) - HomePage string // Homepage URL - ManifestFile string // Associated manifest file type + // Name is the package name as it appears in the registry. + Name string + + // Version is the specific version being referenced. + Version string + + // ProjectURLs contains URL mappings from the package registry, typically + // including "repository", "homepage", "documentation", etc. The keys + // depend on the registry. May be nil or empty. + ProjectURLs map[string]string + + // HomePage is the project's home page URL, if available. May be empty. + HomePage string + + // ManifestFile is the associated manifest type (e.g., "poetry", "cargo") + // when the package comes from manifest parsing. Empty for registry-only packages. + ManifestFile string } // Package holds metadata fetched from a package registry. +// +// This is the core data structure returned by [Fetcher.Fetch] and used throughout +// the resolution process. Not all fields are populated by every registry—consult +// the specific integration documentation for field availability. type Package struct { - Name string // Package name - Version string // Latest or specified version - Dependencies []string // Direct dependency names - Description string // Package summary/description - License string // License identifier - Author string // Primary author or maintainer - Downloads int // Download count (where available) - Repository string // Source repository URL - HomePage string // Project homepage URL - ProjectURLs map[string]string // Additional URLs from registry - ManifestFile string // Associated manifest type + // Name is the package identifier in the registry (e.g., "requests", "serde"). + Name string + + // Version is the package version (e.g., "2.31.0"). For registry lookups + // without a version constraint, this is typically the latest stable version. + Version string + + // Dependencies lists direct dependency names. The resolver recursively fetches + // these to build the dependency tree. Nil and empty slices are equivalent. + Dependencies []string + + // Description is a short summary of the package purpose. May be empty. + Description string + + // License is the package license identifier (e.g., "MIT", "Apache-2.0"). + // May be empty or unknown. + License string + + // Author is the primary package author or maintainer. May be empty. + Author string + + // Downloads is the total download count or recent download rate, depending + // on the registry. Zero if unavailable. Not all registries provide this. + Downloads int + + // Repository is the source code repository URL (e.g., GitHub, GitLab). + // May be empty if not specified in registry metadata. + Repository string + + // HomePage is the project home page URL. May be empty or identical to Repository. + HomePage string + + // ProjectURLs contains additional URLs from the registry (docs, issues, etc.). + // Keys and availability vary by registry. May be nil. + ProjectURLs map[string]string + + // ManifestFile identifies the manifest type when this Package comes from + // manifest parsing (e.g., "poetry", "cargo"). Empty for registry packages. + ManifestFile string } // Metadata converts Package fields to a map for node metadata. +// +// The returned map always contains "version". Optional fields (description, +// license, author, downloads) are included only if non-empty/non-zero. +// +// This map is suitable for use as dag.Node.Meta and can be further enriched +// by [MetadataProvider] implementations. The map is newly allocated and safe +// to modify. Returns a non-nil map even if the Package has no optional fields. func (p *Package) Metadata() map[string]any { m := map[string]any{"version": p.Version} if p.Description != "" { @@ -91,6 +203,17 @@ func (p *Package) Metadata() map[string]any { } // Ref creates a PackageRef for metadata provider lookups. +// +// The returned PackageRef consolidates URL information from multiple Package +// fields (ProjectURLs, Repository, HomePage) into a single ProjectURLs map +// for convenient provider lookups. +// +// The ProjectURLs map is a clone of the original, so modifying it does not +// affect the Package. If the Package has nil ProjectURLs, an empty map is +// allocated. Repository and HomePage are added to the map under "repository" +// and "homepage" keys if non-empty. +// +// This method never returns nil. Safe to call on a zero Package value. func (p *Package) Ref() *PackageRef { urls := maps.Clone(p.ProjectURLs) if urls == nil { diff --git a/pkg/deps/doc.go b/pkg/deps/doc.go index 46b233f..558a391 100644 --- a/pkg/deps/doc.go +++ b/pkg/deps/doc.go @@ -3,105 +3,151 @@ // // # Overview // -// Stacktower can fetch dependency data from multiple sources: +// Package deps is the core abstraction layer for fetching dependency trees +// from multiple sources: // -// - Package registries (PyPI, npm, crates.io, RubyGems, Packagist, Maven, Go Proxy) -// - Manifest files (requirements.txt, package.json, Cargo.toml, etc.) +// - Package registries: PyPI, npm, crates.io, RubyGems, Packagist, Maven, Go Proxy +// - Manifest files: requirements.txt, package.json, Cargo.toml, poetry.lock, etc. // -// This package provides the core abstractions and concurrent resolver that -// powers the `stacktower parse` command. +// It provides a concurrent resolver that crawls dependencies in parallel while +// respecting depth and node limits. The resulting dependency graphs are returned +// as [dag.DAG] structures suitable for visualization and analysis. +// +// This package powers the `stacktower parse` command and is language-agnostic, +// delegating language-specific details to subpackages (python, rust, javascript, etc.). // // # Architecture // // The dependency resolution system has three layers: // -// 1. Integrations ([integrations]): Low-level HTTP clients for each registry API -// 2. Language definitions (this package): Registry/manifest mappings -// 3. CLI ([internal/cli]): User-facing commands and output +// 1. Registry integrations ([integrations]): HTTP clients for each registry API +// 2. Language definitions (this package): [Language] values that map registries and manifests +// 3. CLI ([internal/cli]): User commands like `stacktower parse` // // # Resolving Dependencies // -// Use [Language.Resolve] to fetch a complete dependency tree: +// Use a [Language]'s resolver to fetch a complete dependency tree from a registry: +// +// import "github.com/matzehuels/stacktower/pkg/deps/python" // -// lang := python.Language -// resolver, _ := lang.NewResolver(24 * time.Hour) +// resolver, _ := python.Language.Resolver() // g, _ := resolver.Resolve(ctx, "fastapi", deps.Options{ // MaxDepth: 10, // MaxNodes: 1000, // }) // -// The resolver: +// The resolver crawls dependencies concurrently: // // 1. Fetches the root package from the registry -// 2. Recursively fetches dependencies (with configurable depth/node limits) -// 3. Builds a [dag.DAG] with package metadata -// 4. Optionally enriches with GitHub metadata (stars, maintainers) +// 2. Recursively fetches dependencies up to MaxDepth levels +// 3. Builds a [dag.DAG] with nodes for packages and edges for dependencies +// 4. Optionally enriches nodes with metadata from [MetadataProvider] sources // // # Options // -// [Options] controls resolution behavior: +// [Options] configures resolution behavior. All fields are optional and have +// sensible defaults via [Options.WithDefaults]: // // - MaxDepth: Maximum dependency depth (default 50) // - MaxNodes: Maximum packages to fetch (default 5000) -// - CacheTTL: How long to cache HTTP responses (default 24h) -// - Refresh: Bypass cache for fresh data -// - MetadataProviders: Enrichment sources (GitHub, GitLab) -// - Logger: Progress callback +// - CacheTTL: HTTP cache duration (default 24h) +// - Refresh: Bypass cache to force fresh data +// - MetadataProviders: External enrichment sources (e.g., GitHub, GitLab) +// - Logger: Progress and error callback (must be goroutine-safe) // // # Package Data // -// Each resolved package becomes a [Package] with: +// Each resolved package is represented by a [Package] struct containing: // -// - Name, Version: Package identity -// - Dependencies: Direct dependency names -// - Description, License, Author: Registry metadata -// - Repository, HomePage: Source URLs -// - Downloads: Popularity metric (where available) +// - Name, Version: Package identity from the registry +// - Dependencies: Direct dependency names (recursively fetched) +// - Description, License, Author: Registry metadata (availability varies) +// - Repository, HomePage: Source code and documentation URLs +// - Downloads: Popularity metric (when available from the registry) // -// The [Package.Metadata] method converts this to a map suitable for -// node metadata in the DAG. +// Use [Package.Metadata] to convert package fields to a map suitable for +// [dag.Node] metadata. Use [Package.Ref] to create a [PackageRef] for +// metadata provider lookups. // // # Manifest Parsing // -// For local projects, parse manifest files directly: +// For local projects, parse dependency information directly from manifest files: // -// parser := python.PoetryParser{} +// import "github.com/matzehuels/stacktower/pkg/deps/python" +// +// parsers := python.Language.ManifestParsers(nil) +// parser, _ := deps.DetectManifest("poetry.lock", parsers...) // result, _ := parser.Parse("poetry.lock", opts) -// g := result.Graph +// g := result.Graph.(*dag.DAG) +// +// Manifest parsers implement [ManifestParser] and vary in completeness: // -// Manifest parsers implement [ManifestParser] and may provide: +// - Direct dependencies only: requirements.txt, package.json (base) +// - Full transitive closure: poetry.lock, Cargo.lock, package-lock.json // -// - Direct dependencies only (requirements.txt) -// - Full transitive closure (poetry.lock, Cargo.lock) +// Use [ManifestParser.IncludesTransitive] to check if additional resolution +// is needed. Use [DetectManifest] to find the right parser for a file. // // # Metadata Enrichment // -// [MetadataProvider] implementations add data from external sources: +// [MetadataProvider] implementations add supplementary data from external sources: +// +// import "github.com/matzehuels/stacktower/pkg/deps/metadata" // // providers := []deps.MetadataProvider{ // metadata.NewGitHubProvider(token, ttl), // } // opts := deps.Options{MetadataProviders: providers} // -// The GitHub provider adds: repo_stars, repo_owner, repo_maintainers, -// repo_last_commit, repo_archived—all used by Nebraska ranking and -// brittle detection. +// The GitHub provider (see [metadata.GitHubProvider]) adds: +// - repo_stars: GitHub star count +// - repo_owner, repo_maintainers: Maintainer information +// - repo_last_commit: Last commit timestamp +// - repo_archived: Whether the repository is archived +// +// These fields power Nebraska ranking and brittle package detection. // // # Supported Languages // -// Each language has a subpackage with its [Language] definition: +// Each language has a subpackage exporting a Language definition: // -// - [python]: PyPI, poetry.lock, requirements.txt, pyproject.toml -// - [rust]: crates.io, Cargo.toml -// - [javascript]: npm, package.json -// - [ruby]: RubyGems, Gemfile -// - [php]: Packagist, composer.json -// - [java]: Maven Central, pom.xml +// - [python]: PyPI registry, poetry.lock, requirements.txt, pyproject.toml +// - [rust]: crates.io registry, Cargo.toml, Cargo.lock +// - [javascript]: npm registry, package.json, package-lock.json +// - [ruby]: RubyGems registry, Gemfile, Gemfile.lock +// - [php]: Packagist registry, composer.json, composer.lock +// - [java]: Maven Central registry, pom.xml // - [golang]: Go Module Proxy, go.mod // +// Language subpackages also provide registry-specific [Fetcher] implementations +// that wrap HTTP clients from the [integrations] package. +// +// # Concurrency +// +// The resolver uses a worker pool (20 concurrent goroutines by default) to fetch +// packages in parallel. All public types are safe for concurrent use: +// +// - [Fetcher.Fetch] must be goroutine-safe (called by multiple workers) +// - [MetadataProvider.Enrich] must be goroutine-safe (called concurrently per package) +// - [Options.Logger] must be goroutine-safe (called from multiple workers) +// +// Manifest parsers are not required to be goroutine-safe as they are typically +// called once per file. +// +// # Error Handling +// +// Resolution errors fall into two categories: +// +// - Fatal: Root package not found or unreachable. [Resolver.Resolve] returns an error. +// - Non-fatal: Transitive dependency failures. Logged via [Options.Logger] but don't fail resolution. +// +// Manifest parsing errors are always fatal and returned by [ManifestParser.Parse]. +// Metadata enrichment errors are non-fatal and logged. +// // [integrations]: github.com/matzehuels/stacktower/pkg/integrations // [internal/cli]: github.com/matzehuels/stacktower/internal/cli // [dag.DAG]: github.com/matzehuels/stacktower/pkg/dag.DAG +// [dag.Node]: github.com/matzehuels/stacktower/pkg/dag.Node // [python]: github.com/matzehuels/stacktower/pkg/deps/python // [rust]: github.com/matzehuels/stacktower/pkg/deps/rust // [javascript]: github.com/matzehuels/stacktower/pkg/deps/javascript @@ -109,4 +155,5 @@ // [php]: github.com/matzehuels/stacktower/pkg/deps/php // [java]: github.com/matzehuels/stacktower/pkg/deps/java // [golang]: github.com/matzehuels/stacktower/pkg/deps/golang +// [metadata.GitHubProvider]: github.com/matzehuels/stacktower/pkg/deps/metadata.GitHubProvider package deps diff --git a/pkg/deps/example_test.go b/pkg/deps/example_test.go index 7d41fc7..b3ff643 100644 --- a/pkg/deps/example_test.go +++ b/pkg/deps/example_test.go @@ -72,10 +72,17 @@ func ExampleDetectManifest() { // DetectManifest finds the right parser for a manifest file. // In real usage, you would get parsers from a Language definition: // + // import "github.com/matzehuels/stacktower/pkg/deps/python" + // // parsers := python.Language.ManifestParsers(nil) // parser, err := deps.DetectManifest("poetry.lock", parsers...) + // if err != nil { + // log.Fatal(err) + // } + // result, err := parser.Parse("poetry.lock", opts) // // The function matches filename patterns to find a suitable parser. + // Returns an error if no parser recognizes the file. fmt.Println("DetectManifest matches filename to parser type") // Output: @@ -129,3 +136,30 @@ func ExampleOptions_limits() { // MaxNodes: 100 // Refresh: true } + +func ExampleNewRegistry() { + // NewRegistry wraps a Fetcher with concurrent crawling logic. + // In real usage, you would pass a registry-specific fetcher: + // + // import ( + // "github.com/matzehuels/stacktower/pkg/deps" + // "github.com/matzehuels/stacktower/pkg/integrations/pypi" + // ) + // + // client := pypi.NewClient(24 * time.Hour) + // fetcher := pypi.NewFetcher(client) + // resolver := deps.NewRegistry("pypi", fetcher) + // + // ctx := context.Background() + // g, err := resolver.Resolve(ctx, "requests", deps.Options{ + // MaxDepth: 5, + // MaxNodes: 100, + // }) + // + // The resolver uses a worker pool to fetch packages concurrently, + // respecting the MaxDepth and MaxNodes limits. + + fmt.Println("NewRegistry creates a concurrent dependency resolver") + // Output: + // NewRegistry creates a concurrent dependency resolver +} diff --git a/pkg/deps/golang/gomod.go b/pkg/deps/golang/gomod.go index 7e2e8be..b89139b 100644 --- a/pkg/deps/golang/gomod.go +++ b/pkg/deps/golang/gomod.go @@ -12,6 +12,9 @@ import ( const projectRoot = "__project__" +// GoModParser parses go.mod files. It extracts direct dependencies and +// optionally resolves them via the Go Module Proxy if a [deps.Resolver] +// is provided. type GoModParser struct { resolver deps.Resolver } diff --git a/pkg/deps/java/java.go b/pkg/deps/java/java.go index c7f3de4..69686b7 100644 --- a/pkg/deps/java/java.go +++ b/pkg/deps/java/java.go @@ -2,6 +2,7 @@ package java import ( "context" + "strings" "time" "github.com/matzehuels/stacktower/pkg/deps" @@ -19,6 +20,7 @@ var Language = &deps.Language{ NewResolver: newResolver, NewManifest: newManifest, ManifestParsers: manifestParsers, + NormalizeName: NormalizeCoordinate, } func newResolver(ttl time.Duration) (deps.Resolver, error) { @@ -32,7 +34,8 @@ func newResolver(ttl time.Duration) (deps.Resolver, error) { type fetcher struct{ *maven.Client } func (f fetcher) Fetch(ctx context.Context, name string, refresh bool) (*deps.Package, error) { - a, err := f.FetchArtifact(ctx, name, refresh) + coord := NormalizeCoordinate(name) + a, err := f.FetchArtifact(ctx, coord, refresh) if err != nil { return nil, err } @@ -46,6 +49,27 @@ func (f fetcher) Fetch(ctx context.Context, name string, refresh bool) (*deps.Pa }, nil } +// NormalizeCoordinate converts filename-safe coordinates to Maven format. +// Since colons are not allowed in filenames (especially on Windows and in some +// build tools), underscores can be used as a substitute. This function converts +// "groupId_artifactId" to "groupId:artifactId" when no colon is present. +// +// Examples: +// - "com.google.guava:guava" → "com.google.guava:guava" (unchanged) +// - "com.google.guava_guava" → "com.google.guava:guava" (converted) +func NormalizeCoordinate(coord string) string { + if strings.Contains(coord, ":") { + return coord + } + // Replace the last underscore with a colon + // GroupIds follow reverse domain notation (no underscores typically) + // while artifactIds may contain hyphens or underscores + if idx := strings.LastIndex(coord, "_"); idx != -1 { + return coord[:idx] + ":" + coord[idx+1:] + } + return coord +} + func newManifest(name string, res deps.Resolver) deps.ManifestParser { switch name { case "pom": diff --git a/pkg/deps/java/java_test.go b/pkg/deps/java/java_test.go new file mode 100644 index 0000000..0783413 --- /dev/null +++ b/pkg/deps/java/java_test.go @@ -0,0 +1,37 @@ +package java + +import "testing" + +func TestNormalizeCoordinate(t *testing.T) { + tests := []struct { + input string + want string + }{ + // Already has colon - unchanged + {"com.google.guava:guava", "com.google.guava:guava"}, + {"org.apache.commons:commons-lang3", "org.apache.commons:commons-lang3"}, + + // Underscore converted to colon + {"com.google.guava_guava", "com.google.guava:guava"}, + {"org.apache.commons_commons-lang3", "org.apache.commons:commons-lang3"}, + + // No colon or underscore - unchanged + {"simple-name", "simple-name"}, + {"", ""}, + + // Multiple underscores - only last one converted + {"com.example_foo_bar", "com.example_foo:bar"}, + + // Edge case: underscore at start or end + {"_test", ":test"}, + {"test_", "test:"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + if got := NormalizeCoordinate(tt.input); got != tt.want { + t.Errorf("NormalizeCoordinate(%q) = %q, want %q", tt.input, got, tt.want) + } + }) + } +} diff --git a/pkg/deps/java/pom.go b/pkg/deps/java/pom.go index 3cb2764..f7a1c22 100644 --- a/pkg/deps/java/pom.go +++ b/pkg/deps/java/pom.go @@ -12,6 +12,8 @@ import ( const projectRoot = "__project__" +// POMParser parses Maven pom.xml files. It extracts dependencies and +// optionally resolves them via Maven Central. type POMParser struct { resolver deps.Resolver } diff --git a/pkg/deps/javascript/packagejson.go b/pkg/deps/javascript/packagejson.go index be63f87..5dd6e60 100644 --- a/pkg/deps/javascript/packagejson.go +++ b/pkg/deps/javascript/packagejson.go @@ -12,6 +12,8 @@ import ( const projectRoot = "__project__" +// PackageJSON parses package.json files. It extracts dependencies, +// devDependencies, and peerDependencies. type PackageJSON struct { resolver deps.Resolver } diff --git a/pkg/deps/language.go b/pkg/deps/language.go index af2ff83..3aee61c 100644 --- a/pkg/deps/language.go +++ b/pkg/deps/language.go @@ -6,19 +6,70 @@ import ( ) // Language defines how to resolve dependencies for a programming language. -// It maps registry names to resolvers and manifest file types to parsers. +// +// Each language subpackage (python, rust, javascript, etc.) exports a +// Language value that describes its registry API, manifest formats, and +// how to construct resolvers and parsers. +// +// Language values are typically used by the CLI to dispatch commands like +// "stacktower parse" based on file type or registry name. type Language struct { - Name string // Language identifier (e.g., "python", "rust") - DefaultRegistry string // Primary registry (e.g., "pypi", "crates") - RegistryAliases map[string]string // Alternative names for registries - ManifestTypes []string // Supported manifest types (e.g., "poetry", "cargo") - ManifestAliases map[string]string // Filename to type mappings - NewResolver func(ttl time.Duration) (Resolver, error) // Factory for registry resolver - NewManifest func(name string, res Resolver) ManifestParser // Factory for manifest parsers - ManifestParsers func(res Resolver) []ManifestParser // All available manifest parsers + // Name is the language identifier (e.g., "python", "rust", "javascript"). + Name string + + // DefaultRegistry is the primary registry name for this language + // (e.g., "pypi" for Python, "crates" for Rust). Used when no registry + // is explicitly specified. + DefaultRegistry string + + // RegistryAliases maps alternative registry names to canonical names. + // For example, {"npm": "npm", "npmjs": "npm"}. May be nil or empty. + RegistryAliases map[string]string + + // ManifestTypes lists supported manifest type identifiers + // (e.g., ["poetry", "requirements", "pipfile"] for Python). These are + // the canonical names passed to NewManifest. May be nil or empty. + ManifestTypes []string + + // ManifestAliases maps filenames to manifest types. For example, + // {"poetry.lock": "poetry", "requirements.txt": "requirements"}. + // Used by DetectManifest to match file paths. May be nil or empty. + ManifestAliases map[string]string + + // NewResolver creates a registry Resolver with the given HTTP cache TTL. + // Returns an error if resolver construction fails (e.g., missing + // configuration). May be nil if the language has no registry support. + NewResolver func(ttl time.Duration) (Resolver, error) + + // NewManifest creates a ManifestParser for the given type name and resolver. + // The name is typically a value from ManifestTypes or ManifestAliases. + // Returns nil if the type is unrecognized. May be nil if the language + // has no manifest support. The resolver may be nil for parsers that don't + // need to fetch additional data. + NewManifest func(name string, res Resolver) ManifestParser + + // ManifestParsers returns all available ManifestParser implementations + // for this language. The resolver is passed to each parser and may be nil. + // Returns nil or an empty slice if the language has no manifest support. + ManifestParsers func(res Resolver) []ManifestParser + + // NormalizeName transforms a package name to its canonical form. + // For example, Maven coordinates may use underscores as a filesystem-safe + // alternative to colons: "com.google.guava_guava" -> "com.google.guava:guava". + // May be nil if the language doesn't require name normalization. + NormalizeName func(name string) string } // Registry returns a Resolver for the named registry, resolving aliases. +// +// The name is first resolved through RegistryAliases. If it doesn't match +// DefaultRegistry, an error is returned. This method currently only supports +// a single registry per language. +// +// The resolver is created with DefaultCacheTTL (24 hours). Use NewResolver +// directly for custom TTLs. +// +// Returns an error if the registry name is unknown or if NewResolver fails. func (l *Language) Registry(name string) (Resolver, error) { name = l.alias(l.RegistryAliases, name) if name != l.DefaultRegistry { @@ -28,12 +79,24 @@ func (l *Language) Registry(name string) (Resolver, error) { } // Resolver returns the default registry resolver for this language. +// +// This is a convenience wrapper around NewResolver(DefaultCacheTTL). +// Returns an error if NewResolver is nil or fails. func (l *Language) Resolver() (Resolver, error) { return l.NewResolver(DefaultCacheTTL) } // Manifest returns a parser for the named manifest type, resolving aliases. -// Returns nil, false if the manifest type is not supported. +// +// The name is first resolved through ManifestAliases (e.g., "poetry.lock" -> "poetry"), +// then passed to NewManifest. The resolver may be nil for parsers that don't fetch +// additional data. +// +// Returns (parser, true) if successful, or (nil, false) if: +// - NewManifest is nil (language has no manifest support) +// - The manifest type is unrecognized by NewManifest +// +// Safe to call on a zero Language value. func (l *Language) Manifest(name string, res Resolver) (ManifestParser, bool) { if l.NewManifest == nil { return nil, false @@ -43,6 +106,10 @@ func (l *Language) Manifest(name string, res Resolver) (ManifestParser, bool) { } // HasManifests reports whether this language supports manifest file parsing. +// +// Returns true if NewManifest is non-nil, meaning at least one manifest +// format is supported. This does not guarantee that a specific manifest +// type is recognized—use Manifest to check. func (l *Language) HasManifests() bool { return l.NewManifest != nil } diff --git a/pkg/deps/manifest.go b/pkg/deps/manifest.go index 8f70349..8ca34c7 100644 --- a/pkg/deps/manifest.go +++ b/pkg/deps/manifest.go @@ -6,28 +6,88 @@ import ( ) // ManifestParser reads dependency information from local manifest files. +// +// Manifest files describe a project's dependencies and may be either: +// - Requirement files (package.json, requirements.txt) with direct deps only +// - Lock files (poetry.lock, Cargo.lock) with full transitive closures +// +// Implementations are found in language subpackages (e.g., python.PoetryParser). type ManifestParser interface { - // Parse reads the manifest at path and returns the dependency graph. + // Parse reads the manifest file at path and returns the dependency graph. + // + // The path is typically a local file system path. Options may influence + // parsing behavior (e.g., MaxDepth for resolvers that fetch additional data). + // + // Returns an error if the file cannot be read, is malformed, or if + // dependency resolution fails. Common errors: + // - File not found or unreadable + // - Invalid JSON/TOML/YAML syntax + // - Missing required fields + // - Dependency fetching failures (if the parser resolves transitive deps) Parse(path string, opts Options) (*ManifestResult, error) + // Supports reports whether this parser handles the given filename. + // + // The filename is typically the basename of a path (e.g., "package.json"). + // Returns true if this parser recognizes the file format. Supports(filename string) bool - // Type returns the manifest type identifier (e.g., "poetry", "cargo"). + + // Type returns the manifest type identifier (e.g., "poetry", "cargo", "npm"). + // + // This identifier appears in ManifestResult.Type and is used for + // logging and error messages. Type() string - // IncludesTransitive reports whether the manifest contains the full - // transitive closure (like lock files) or just direct dependencies. + + // IncludesTransitive reports whether this parser produces transitive deps. + // + // Returns true for lock files (poetry.lock, Cargo.lock) that contain the + // full dependency closure. Returns false for requirement files (requirements.txt, + // package.json) that only list direct dependencies. + // + // This is used by the CLI to decide whether additional resolution is needed. IncludesTransitive() bool } // ManifestResult holds the parsed dependency data from a manifest file. +// +// Returned by [ManifestParser.Parse] after successfully reading a manifest. type ManifestResult struct { - Graph any // The dependency graph (typically *dag.DAG) - Type string // Parser type that produced this result - IncludesTransitive bool // Whether Graph includes transitive dependencies - RootPackage string // Name of the root package, if determinable + // Graph is the dependency graph, typically a *dag.DAG with nodes for + // packages and edges for dependencies. The concrete type depends on + // the parser implementation. + Graph any + + // Type is the manifest type identifier (from ManifestParser.Type). + // Examples: "poetry", "cargo", "npm", "requirements". + Type string + + // IncludesTransitive indicates whether Graph contains the full transitive + // closure (true for lock files) or just direct dependencies (false). + IncludesTransitive bool + + // RootPackage is the name of the root package, if determinable from the + // manifest. Empty if the manifest doesn't specify a package name (e.g., + // requirements.txt has no root package). + RootPackage string } // DetectManifest finds a parser that supports the given file path. -// Returns an error if no parser matches. +// +// The path is matched against each parser's Supports method using the basename. +// Parsers are checked in order, and the first match is returned. +// +// Typical usage: +// +// lang := python.Language +// parsers := lang.ManifestParsers(nil) +// parser, err := deps.DetectManifest("poetry.lock", parsers...) +// if err != nil { +// // No parser supports poetry.lock +// } +// result, err := parser.Parse("poetry.lock", opts) +// +// Returns an error if no parser in the list supports the file. An empty +// parsers list always returns an error. func DetectManifest(path string, parsers ...ManifestParser) (ManifestParser, error) { name := filepath.Base(path) for _, p := range parsers { diff --git a/pkg/deps/php/composer.go b/pkg/deps/php/composer.go index d33b3f7..94f9b4d 100644 --- a/pkg/deps/php/composer.go +++ b/pkg/deps/php/composer.go @@ -12,6 +12,8 @@ import ( const projectRoot = "__project__" +// ComposerJSON parses composer.json files. It extracts direct and dev +// dependencies and optionally resolves them via Packagist. type ComposerJSON struct { resolver deps.Resolver } diff --git a/pkg/deps/python/poetry.go b/pkg/deps/python/poetry.go index 2c3caca..e8694c0 100644 --- a/pkg/deps/python/poetry.go +++ b/pkg/deps/python/poetry.go @@ -14,6 +14,8 @@ import ( const projectRoot = "__project__" +// PoetryLock parses poetry.lock files. It provides a full transitive closure +// of the dependency graph without needing to contact a registry. type PoetryLock struct{} func (p *PoetryLock) Type() string { return "poetry.lock" } diff --git a/pkg/deps/python/requirements.go b/pkg/deps/python/requirements.go index 67d1766..01ed9cb 100644 --- a/pkg/deps/python/requirements.go +++ b/pkg/deps/python/requirements.go @@ -14,6 +14,9 @@ import ( var depNameRE = regexp.MustCompile(`^([a-zA-Z0-9][-a-zA-Z0-9._]*)`) +// Requirements parses requirements.txt files. By default, it only provides +// direct dependencies. If a [deps.Resolver] is provided, it can resolve +// the full transitive closure. type Requirements struct { resolver deps.Resolver } diff --git a/pkg/deps/resolver.go b/pkg/deps/resolver.go index dbded38..44fee9a 100644 --- a/pkg/deps/resolver.go +++ b/pkg/deps/resolver.go @@ -9,31 +9,90 @@ import ( "github.com/matzehuels/stacktower/pkg/dag" ) +// workers is the number of concurrent goroutines for fetching packages. +// This limits parallelism to prevent overwhelming registries and to bound +// memory usage. Each worker consumes one job at a time from a buffered channel. const workers = 20 // Fetcher retrieves package metadata from a registry. +// +// Implementations wrap HTTP clients for specific registries (PyPI, npm, crates.io). +// The Fetcher is responsible for HTTP caching, rate limiting, and error handling. +// +// Fetchers are found in the integrations subpackages (e.g., integrations/pypi). type Fetcher interface { - // Fetch retrieves package information by name. If refresh is true, - // cached data is bypassed. + // Fetch retrieves package information by name. + // + // The name is the package identifier in the registry (e.g., "requests", "serde"). + // If refresh is true, cached HTTP responses are bypassed and fresh data is fetched. + // + // Returns an error if: + // - The package does not exist in the registry + // - The registry API is unreachable or returns an error + // - The response cannot be parsed + // + // Implementations should respect context cancellation and return ctx.Err() + // when the context is canceled. + // + // Fetch must be safe for concurrent use by multiple goroutines. Fetch(ctx context.Context, name string, refresh bool) (*Package, error) } // Resolver builds a dependency graph starting from a root package. +// +// Implementations typically wrap a [Fetcher] and provide concurrent crawling +// logic. The [Registry] type is the standard implementation. type Resolver interface { - // Resolve fetches the package and its transitive dependencies, - // returning a DAG with nodes for each package and edges for dependencies. + // Resolve fetches the package and its transitive dependencies. + // + // Starting from pkg, the resolver recursively fetches dependencies up to + // Options.MaxDepth levels deep and Options.MaxNodes total packages. + // + // Returns a [dag.DAG] where: + // - Nodes represent packages (ID = package name) + // - Edges represent dependencies (From depends on To) + // - Node.Meta contains package metadata from [Package.Metadata] and + // enrichment from Options.MetadataProviders + // + // The DAG is fully connected from the root package. Isolated nodes may + // appear if dependency fetching fails for non-root packages. + // + // Returns an error if: + // - The root package cannot be fetched (registry error or does not exist) + // - The context is canceled + // - Internal errors occur + // + // Partial failures (missing transitive dependencies) are logged via + // Options.Logger but do not fail the entire resolution. + // + // Resolve is safe for concurrent use if the underlying Fetcher is safe. Resolve(ctx context.Context, pkg string, opts Options) (*dag.DAG, error) - // Name returns the resolver's identifier (e.g., "pypi", "npm"). + + // Name returns the resolver's identifier (e.g., "pypi", "npm", "crates"). + // + // This is used for logging and error messages. Name() string } // Registry implements Resolver by wrapping a Fetcher with concurrent crawling. +// +// Registry uses a worker pool to fetch packages concurrently, respecting +// Options limits (MaxDepth, MaxNodes). It tracks visited packages to avoid +// redundant fetches and handles cycles gracefully. +// +// Use [NewRegistry] to construct instances. type Registry struct { name string fetcher Fetcher } // NewRegistry creates a Resolver that crawls dependencies using the given Fetcher. +// +// The name is the resolver identifier (e.g., "pypi", "npm") and appears in +// Name() results and error messages. +// +// The fetcher must be safe for concurrent use, as multiple worker goroutines +// will call Fetch simultaneously. func NewRegistry(name string, fetcher Fetcher) *Registry { return &Registry{name: name, fetcher: fetcher} } @@ -42,6 +101,21 @@ func NewRegistry(name string, fetcher Fetcher) *Registry { func (r *Registry) Name() string { return r.name } // Resolve crawls dependencies starting from pkg, respecting Options limits. +// +// This method spawns a worker pool of goroutines that fetch packages concurrently. +// The crawl proceeds breadth-first, tracking visited packages to avoid duplicates. +// +// Resolution stops when: +// - All reachable dependencies are fetched +// - MaxDepth is reached +// - MaxNodes is reached (deeper dependencies are ignored) +// - The context is canceled +// +// Failed fetches for non-root packages are logged but do not fail resolution. +// Failed fetches for the root package return an error immediately. +// +// The returned DAG has nodes for all successfully fetched packages and edges +// for all declared dependencies (even if the target package fetch failed). func (r *Registry) Resolve(ctx context.Context, pkg string, opts Options) (*dag.DAG, error) { c := &crawler{ ctx: ctx, @@ -56,41 +130,55 @@ func (r *Registry) Resolve(ctx context.Context, pkg string, opts Options) (*dag. return c.run(pkg) } +// crawler manages concurrent package fetching with depth and node limits. +// +// It uses a worker pool pattern: jobs are enqueued to a channel, workers fetch +// packages concurrently, and results are collected in a single goroutine to +// avoid data races on the DAG and metadata map. +// +// The crawler tracks visited packages to avoid duplicate fetches and maintains +// a pending counter to know when all work is complete. type crawler struct { ctx context.Context opts Options fetch func(context.Context, string, bool) (*Package, error) - g *dag.DAG - meta map[string]map[string]any + g *dag.DAG // The dependency graph being built + meta map[string]map[string]any // Metadata to apply after crawl completes - jobs chan job - results chan result + jobs chan job // Work queue for package fetch jobs + results chan result // Results from worker goroutines wg sync.WaitGroup - mu sync.Mutex - visited map[string]bool - pending int64 - nodeCount int32 + mu sync.Mutex // Protects visited map and meta map writes + visited map[string]bool // Tracks which packages have been queued + pending int64 // Atomic counter of in-flight jobs + nodeCount int32 // Atomic counter of total nodes added } +// job represents a package fetch task with depth tracking. type job struct { - name string - depth int + name string // Package name to fetch + depth int // Depth from root (root = 0) } +// result holds the outcome of a fetch job. type result struct { job - pkg *Package - err error + pkg *Package // Fetched package metadata (nil if err is set) + err error // Fetch error (nil on success) } +// run executes the crawl by starting workers, enqueuing the root, collecting +// results, and applying metadata. Returns the completed DAG or an error. func (c *crawler) run(root string) (*dag.DAG, error) { + // Start worker pool for range workers { c.wg.Add(1) go c.worker() } + // Kick off crawl with root package c.enqueue(job{name: root}) if err := c.collect(root); err != nil { close(c.jobs) @@ -98,6 +186,7 @@ func (c *crawler) run(root string) (*dag.DAG, error) { return nil, err } + // Wait for workers to finish and apply collected metadata close(c.jobs) c.wg.Wait() c.applyMeta() @@ -105,9 +194,12 @@ func (c *crawler) run(root string) (*dag.DAG, error) { return c.g, nil } +// worker fetches packages from the jobs channel until it closes. +// Each fetch result is sent to the results channel for processing. func (c *crawler) worker() { defer c.wg.Done() for j := range c.jobs { + // Respect context cancellation if c.ctx.Err() != nil { atomic.AddInt64(&c.pending, -1) continue @@ -117,6 +209,8 @@ func (c *crawler) worker() { } } +// enqueue adds a job to the work queue if the package hasn't been visited. +// Returns false if the package was already visited (duplicate). func (c *crawler) enqueue(j job) bool { c.mu.Lock() if c.visited[j.name] { @@ -128,10 +222,13 @@ func (c *crawler) enqueue(j job) bool { atomic.AddInt64(&c.pending, 1) + // Send to jobs channel in a goroutine to avoid blocking go func() { c.jobs <- j }() return true } +// collect processes results from workers until all pending jobs complete. +// Returns an error if the root package fails or if the context is canceled. func (c *crawler) collect(root string) error { for { select { @@ -139,6 +236,7 @@ func (c *crawler) collect(root string) error { if err := c.handle(r, root); err != nil { return err } + // Check if all work is done if atomic.AddInt64(&c.pending, -1) == 0 { return nil } @@ -148,8 +246,11 @@ func (c *crawler) collect(root string) error { } } +// handle processes a single fetch result: adds nodes/edges, enriches metadata, +// and enqueues dependencies. Returns an error only if the root package fails. func (c *crawler) handle(r result, root string) error { if r.err != nil { + // Root package errors are fatal; others are logged if r.name == root { return r.err } @@ -157,9 +258,11 @@ func (c *crawler) handle(r result, root string) error { return nil } + // Add package node to graph _ = c.g.AddNode(dag.Node{ID: r.name}) atomic.AddInt32(&c.nodeCount, 1) + // Collect metadata for later application if meta := c.enrich(r.pkg); len(meta) > 0 { c.mu.Lock() c.meta[r.name] = meta @@ -170,7 +273,9 @@ func (c *crawler) handle(r result, root string) error { return nil } +// enqueueDeps adds dependency edges and enqueues child packages if limits allow. func (c *crawler) enqueueDeps(r result) { + // Stop if at depth limit or no dependencies if r.depth >= c.opts.MaxDepth || len(r.pkg.Dependencies) == 0 { return } @@ -179,15 +284,19 @@ func (c *crawler) enqueueDeps(r result) { count := atomic.LoadInt32(&c.nodeCount) for _, dep := range r.pkg.Dependencies { + // Always add nodes and edges, even if not fetching _ = c.g.AddNode(dag.Node{ID: dep}) _ = c.g.AddEdge(dag.Edge{From: r.name, To: dep}) + // Only fetch if under node limit if int(count) < c.opts.MaxNodes { c.enqueue(job{name: dep, depth: next}) } } } +// applyMeta attaches collected metadata to nodes in the DAG. +// Called after all fetching is complete to avoid concurrent node modifications. func (c *crawler) applyMeta() { c.mu.Lock() defer c.mu.Unlock() @@ -198,6 +307,8 @@ func (c *crawler) applyMeta() { } } +// enrich combines package metadata with external provider data. +// Calls all MetadataProviders concurrently (providers must be goroutine-safe). func (c *crawler) enrich(pkg *Package) map[string]any { m := pkg.Metadata() ref := pkg.Ref() diff --git a/pkg/deps/ruby/gemfile.go b/pkg/deps/ruby/gemfile.go index 4de4ddc..cbd10ca 100644 --- a/pkg/deps/ruby/gemfile.go +++ b/pkg/deps/ruby/gemfile.go @@ -14,6 +14,8 @@ import ( const projectRoot = "__project__" +// Gemfile parses Ruby Gemfiles. It extracts gems and optionally resolves +// them via RubyGems. type Gemfile struct { resolver deps.Resolver } diff --git a/pkg/deps/rust/cargo.go b/pkg/deps/rust/cargo.go index 63a678c..08c8cc8 100644 --- a/pkg/deps/rust/cargo.go +++ b/pkg/deps/rust/cargo.go @@ -13,6 +13,8 @@ import ( const projectRoot = "__project__" +// CargoToml parses Cargo.toml files. It extracts direct, dev, and build +// dependencies. type CargoToml struct { resolver deps.Resolver } diff --git a/pkg/httputil/cache.go b/pkg/httputil/cache.go index 325fb9c..8d93116 100644 --- a/pkg/httputil/cache.go +++ b/pkg/httputil/cache.go @@ -11,19 +11,59 @@ import ( ) // ErrExpired is returned by [Cache.Get] when a cached entry exists but has -// exceeded its TTL. The caller should fetch fresh data and update the cache. +// exceeded its time-to-live (TTL). +// +// When you receive ErrExpired, the cached data still exists on disk but is +// considered stale. Callers should fetch fresh data from the source and +// update the cache with [Cache.Set]. +// +// Use errors.Is to check for this error: +// +// ok, err := cache.Get("key", &value) +// if errors.Is(err, httputil.ErrExpired) { +// // Fetch fresh data and update cache +// } var ErrExpired = errors.New("cache entry expired") -// Cache provides file-based caching for HTTP responses. -// Each entry is stored as a JSON file with a SHA-256 hash of the key as filename. +// Cache provides file-based caching of arbitrary JSON-marshalable data. +// +// Each cache entry is stored as a JSON file in the cache directory, with +// the filename derived from a SHA-256 hash of the cache key. This design +// ensures safe key names (no filesystem special characters) and prevents +// key collisions across different namespaces. +// +// Cache operations are not goroutine-safe. If multiple goroutines access +// the same Cache instance, the caller must synchronize access. However, +// multiple Cache instances (even in different processes) can safely share +// the same directory, as the filesystem provides atomic file operations. +// +// Cache entries have a time-to-live (TTL) based on file modification time. +// A TTL of 0 means entries never expire. +// +// Use [Cache.Namespace] to create scoped views that automatically prefix +// keys, avoiding collisions between different data sources: +// +// pypi := cache.Namespace("pypi:") +// npm := cache.Namespace("npm:") +// pypi.Set("requests", data) // key becomes "pypi:requests" type Cache struct { - dir string - ttl time.Duration + dir string + ttl time.Duration + prefix string } -// NewCache creates a cache in the specified directory with the given TTL. -// If dir is empty, defaults to ~/.cache/stacktower/. The directory is created -// if it doesn't exist. +// NewCache creates a Cache that stores entries in dir with the given TTL. +// +// If dir is empty, NewCache uses the default directory ~/.cache/stacktower/. +// The directory is created with mode 0755 if it doesn't exist. If directory +// creation fails (e.g., due to permissions), NewCache returns an error. +// +// Parameters: +// - dir: Cache directory path. Use "" for default (~/.cache/stacktower/). +// - ttl: Time-to-live for cache entries. Use 0 for no expiration. +// +// The returned Cache is ready to use. Directory creation errors are the +// only possible source of failure. func NewCache(dir string, ttl time.Duration) (*Cache, error) { if dir == "" { home, err := os.UserHomeDir() @@ -35,20 +75,36 @@ func NewCache(dir string, ttl time.Duration) (*Cache, error) { if err := os.MkdirAll(dir, 0o755); err != nil { return nil, err } - return &Cache{dir: dir, ttl: ttl}, nil + return &Cache{dir: dir, ttl: ttl, prefix: ""}, nil } -// Dir returns the cache directory path. +// Dir returns the absolute path to the cache directory. func (c *Cache) Dir() string { return c.dir } -// TTL returns the cache time-to-live duration. +// TTL returns the time-to-live duration for cache entries. +// A TTL of 0 means cache entries never expire. func (c *Cache) TTL() time.Duration { return c.ttl } -// Get retrieves a cached value by key into v. Returns (true, nil) on cache hit, -// (false, nil) on cache miss, and (false, ErrExpired) if the entry exists but -// has exceeded its TTL. The value is JSON-decoded into v. +// Get retrieves a cached value by key and unmarshals it into v. +// +// Return values indicate three distinct outcomes: +// - (true, nil): Cache hit. The value was found, is fresh, and unmarshaled into v. +// - (false, nil): Cache miss. No entry exists for this key. v is unchanged. +// - (false, ErrExpired): Entry exists but exceeded its TTL. v is unchanged. +// - (false, other error): I/O error, JSON unmarshal error, etc. v may be partially modified. +// +// The key can be any string. Consider namespacing keys to avoid collisions +// (e.g., "pypi:requests", "npm:react"). The key is hashed with SHA-256, +// so long keys are acceptable. +// +// The value v must be a pointer to a type compatible with json.Unmarshal. +// Common types include *string, *[]byte, *map[string]any, and pointers to +// custom structs with JSON tags. +// +// Get does not modify the cache or update modification times; reads are +// non-mutating operations. func (c *Cache) Get(key string, v any) (bool, error) { - path := c.keyPath(key) + path := c.keyPath(c.prefix + key) info, err := os.Stat(path) if os.IsNotExist(err) { return false, nil @@ -67,13 +123,51 @@ func (c *Cache) Get(key string, v any) (bool, error) { } // Set stores a value in the cache under the given key. -// The value is JSON-encoded before writing to disk. +// +// The value v is marshaled to JSON using encoding/json and written to disk. +// If v cannot be marshaled (e.g., contains channels or functions), Set +// returns a json.MarshalError. If the write fails (e.g., disk full, +// permission denied), Set returns the underlying I/O error. +// +// Set overwrites any existing entry for key, resetting its modification time +// to the current time. This effectively refreshes the TTL. +// +// The value v is not modified by Set; marshaling operates on a copy. func (c *Cache) Set(key string, v any) error { data, err := json.Marshal(v) if err != nil { return err } - return os.WriteFile(c.keyPath(key), data, 0o644) + return os.WriteFile(c.keyPath(c.prefix+key), data, 0o644) +} + +// Namespace returns a new Cache that automatically prefixes all keys with prefix. +// +// This creates a scoped view of the cache, useful for avoiding key collisions +// between different data sources or components. The returned Cache shares the +// same underlying directory and TTL as the parent. +// +// Example: +// +// cache, _ := httputil.NewCache("", 24*time.Hour) +// pypiCache := cache.Namespace("pypi:") +// npmCache := cache.Namespace("npm:") +// +// pypiCache.Set("requests", pypiData) // Stored as "pypi:requests" +// npmCache.Set("express", npmData) // Stored as "npm:express" +// +// Namespace calls can be chained to create hierarchical key spaces: +// +// cache.Namespace("python:").Namespace("pypi:") // prefix: "python:pypi:" +// +// The prefix is applied transparently to all Get and Set operations. +// An empty prefix is valid and results in no key transformation. +func (c *Cache) Namespace(prefix string) *Cache { + return &Cache{ + dir: c.dir, + ttl: c.ttl, + prefix: c.prefix + prefix, + } } func (c *Cache) keyPath(key string) string { diff --git a/pkg/httputil/cache_test.go b/pkg/httputil/cache_test.go index c1a1db0..a5f6875 100644 --- a/pkg/httputil/cache_test.go +++ b/pkg/httputil/cache_test.go @@ -119,3 +119,94 @@ func TestNewCache_DefaultDir(t *testing.T) { t.Errorf("directory not created: %v", err) } } + +func TestCache_Namespace(t *testing.T) { + c, _ := NewCache(t.TempDir(), time.Hour) + + t.Run("basicNamespacing", func(t *testing.T) { + pypi := c.Namespace("pypi:") + npm := c.Namespace("npm:") + + // Set values in different namespaces + if err := pypi.Set("requests", "pypi-data"); err != nil { + t.Fatalf("pypi.Set() failed: %v", err) + } + if err := npm.Set("requests", "npm-data"); err != nil { + t.Fatalf("npm.Set() failed: %v", err) + } + + // Retrieve from namespaced caches + var pypiVal, npmVal string + ok, err := pypi.Get("requests", &pypiVal) + if !ok || err != nil { + t.Fatalf("pypi.Get() = %v, %v; want true, nil", ok, err) + } + ok, err = npm.Get("requests", &npmVal) + if !ok || err != nil { + t.Fatalf("npm.Get() = %v, %v; want true, nil", ok, err) + } + + if pypiVal != "pypi-data" { + t.Errorf("got pypi value %q, want %q", pypiVal, "pypi-data") + } + if npmVal != "npm-data" { + t.Errorf("got npm value %q, want %q", npmVal, "npm-data") + } + + // Values should not cross-contaminate + _, _ = pypi.Get("requests", &npmVal) + if npmVal != "pypi-data" { + t.Error("namespace isolation violated") + } + }) + + t.Run("chainedNamespacing", func(t *testing.T) { + python := c.Namespace("python:") + pypi := python.Namespace("pypi:") + + if err := pypi.Set("test", "value"); err != nil { + t.Fatalf("Set() failed: %v", err) + } + + var result string + ok, err := pypi.Get("test", &result) + if !ok || err != nil || result != "value" { + t.Errorf("Get() = %v, %v, %q; want true, nil, %q", ok, err, result, "value") + } + + // Should not be accessible without full prefix + found, _ := python.Get("test", &result) + if found { + t.Error("value accessible without full namespace chain") + } + }) + + t.Run("emptyPrefix", func(t *testing.T) { + ns := c.Namespace("") + if err := ns.Set("key", "value"); err != nil { + t.Fatalf("Set() failed: %v", err) + } + + var result string + ok, err := ns.Get("key", &result) + if !ok || err != nil || result != "value" { + t.Errorf("Get() = %v, %v, %q; want true, nil, %q", ok, err, result, "value") + } + + // Should be same as parent cache + ok, err = c.Get("key", &result) + if !ok || err != nil || result != "value" { + t.Error("empty namespace should behave like parent") + } + }) + + t.Run("preservesDirAndTTL", func(t *testing.T) { + ns := c.Namespace("test:") + if ns.Dir() != c.Dir() { + t.Errorf("Dir() = %s, want %s", ns.Dir(), c.Dir()) + } + if ns.TTL() != c.TTL() { + t.Errorf("TTL() = %v, want %v", ns.TTL(), c.TTL()) + } + }) +} diff --git a/pkg/httputil/example_test.go b/pkg/httputil/example_test.go index 5d0a82f..3a25911 100644 --- a/pkg/httputil/example_test.go +++ b/pkg/httputil/example_test.go @@ -1,6 +1,8 @@ package httputil_test import ( + "context" + "errors" "fmt" "os" "path/filepath" @@ -65,3 +67,117 @@ func ExampleNewCache_defaultDir() { // Output: // Cache TTL: 24h0m0s } + +func ExampleRetry() { + ctx := context.Background() + attempts := 0 + + // Simulate an operation that fails twice then succeeds + err := httputil.Retry(ctx, 3, 10*time.Millisecond, func() error { + attempts++ + if attempts < 3 { + // Wrap transient errors to enable retry + return &httputil.RetryableError{ + Err: fmt.Errorf("temporary failure (attempt %d)", attempts), + } + } + return nil // Success + }) + + if err != nil { + fmt.Println("Failed:", err) + } else { + fmt.Println("Success after", attempts, "attempts") + } + // Output: + // Success after 3 attempts +} + +func ExampleRetryWithBackoff() { + ctx := context.Background() + + // Fetch data with automatic retry on transient failures + err := httputil.RetryWithBackoff(ctx, func() error { + // Your HTTP request or other operation here + // Return &httputil.RetryableError{...} for transient failures + // Return regular errors for permanent failures + return nil + }) + + if err != nil { + fmt.Println("Error:", err) + } else { + fmt.Println("Success") + } + // Output: + // Success +} + +func ExampleRetryableError() { + ctx := context.Background() + networkErr := errors.New("connection refused") + + err := httputil.Retry(ctx, 2, 10*time.Millisecond, func() error { + // Permanent error - no retry + if false { + return errors.New("invalid request") + } + // Transient error - will retry + return &httputil.RetryableError{Err: networkErr} + }) + + // Check if the underlying error is our network error + if errors.Is(err, networkErr) { + fmt.Println("Failed due to network error") + } + // Output: + // Failed due to network error +} + +func ExampleRetryable() { + ctx := context.Background() + attempts := 0 + + // Using the Retryable helper for cleaner code + err := httputil.RetryWithBackoff(ctx, func() error { + attempts++ + if attempts < 2 { + // Wrap errors concisely with Retryable() + return httputil.Retryable(errors.New("temporary failure")) + } + return nil + }) + + if err == nil { + fmt.Println("Success") + } + // Output: + // Success +} + +func ExampleCache_Namespace() { + dir := filepath.Join(os.TempDir(), "stacktower-namespace-example") + cache, _ := httputil.NewCache(dir, 24*time.Hour) + defer os.RemoveAll(dir) + + // Create namespaced caches for different registries + pypiCache := cache.Namespace("pypi:") + npmCache := cache.Namespace("npm:") + + // Store values in different namespaces + pypiCache.Set("requests", map[string]string{"version": "2.31.0"}) + npmCache.Set("express", map[string]string{"version": "4.18.2"}) + + // Retrieve from appropriate namespace + var pypiData map[string]string + pypiCache.Get("requests", &pypiData) + fmt.Println("PyPI requests:", pypiData["version"]) + + var npmData map[string]string + npmCache.Get("express", &npmData) + fmt.Println("npm express:", npmData["version"]) + + // Output: + // PyPI requests: 2.31.0 + // npm express: 4.18.2 +} diff --git a/pkg/httputil/retry.go b/pkg/httputil/retry.go index 357d22c..4b59392 100644 --- a/pkg/httputil/retry.go +++ b/pkg/httputil/retry.go @@ -7,17 +7,62 @@ import ( ) // RetryableError wraps an error to indicate it should trigger a retry. -// Wrap transient failures (network timeouts, 5xx responses) with this type -// so that [Retry] knows to attempt the operation again. +// Use this type to signal transient failures like network timeouts, +// temporary DNS resolution failures, or HTTP 5xx server errors. +// Errors not wrapped in RetryableError are treated as permanent failures +// and cause [Retry] to return immediately without further attempts. +// +// Prefer using the [Retryable] helper function for convenience: +// +// if resp.StatusCode >= 500 { +// return httputil.Retryable(fmt.Errorf("server error: %d", resp.StatusCode)) +// } +// +// RetryableError implements error unwrapping, so errors.Is and errors.As +// work correctly with the wrapped error. type RetryableError struct{ Err error } +// Retryable wraps an error as a [RetryableError], signaling to [Retry] +// that this failure should trigger a retry attempt. +// +// This is a convenience helper that avoids verbose struct literal syntax. +// Returns nil if err is nil, allowing safe use in error returns: +// +// if err := doSomething(); err != nil { +// return httputil.Retryable(err) +// } +func Retryable(err error) error { + if err == nil { + return nil + } + return &RetryableError{Err: err} +} + +// Error returns the error message of the wrapped error. func (e *RetryableError) Error() string { return e.Err.Error() } + +// Unwrap returns the wrapped error, enabling errors.Is and errors.As +// to inspect the underlying cause. func (e *RetryableError) Unwrap() error { return e.Err } // Retry executes fn up to attempts times with exponential backoff. -// It only retries errors wrapped with [RetryableError]; other errors are -// returned immediately. The delay doubles after each failed attempt. -// Returns the last error if all attempts fail, or ctx.Err() if cancelled. +// +// Only errors wrapped with [RetryableError] trigger a retry; all other errors +// are returned immediately. Between retries, Retry waits for delay, then +// doubles the delay for the next attempt (1s, 2s, 4s, etc.). If ctx is +// cancelled during a retry delay, Retry returns ctx.Err() immediately. +// +// Parameters: +// - ctx: Context for cancellation. If cancelled during backoff, returns ctx.Err(). +// - attempts: Maximum number of attempts (minimum 1). Zero or negative values default to 1. +// - delay: Initial backoff duration. Doubled after each failed attempt. +// - fn: Function to execute. Wrap errors in [RetryableError] to enable retries. +// +// Returns the result of fn on success, the last error if all attempts fail, +// or ctx.Err() if the context is cancelled during backoff. +// +// Retry is safe to call from multiple goroutines. However, fn itself must +// handle any concurrency concerns for the operation it performs. func Retry(ctx context.Context, attempts int, delay time.Duration, fn func() error) error { attempts = max(attempts, 1) var lastErr error @@ -41,8 +86,14 @@ func Retry(ctx context.Context, attempts int, delay time.Duration, fn func() err return lastErr } -// RetryWithBackoff is a convenience wrapper around [Retry] with sensible -// defaults: 3 attempts with 1 second initial delay (doubling each retry). +// RetryWithBackoff is a convenience wrapper around [Retry] with sensible defaults. +// +// It performs up to 3 attempts with exponential backoff starting at 1 second: +// attempt 1 (immediate), wait 1s, attempt 2, wait 2s, attempt 3. +// Total maximum wait time is 3 seconds across all retries. +// +// Use this when you need retry logic but don't need custom retry parameters. +// For more control over attempts or delay, call [Retry] directly. func RetryWithBackoff(ctx context.Context, fn func() error) error { return Retry(ctx, 3, time.Second, fn) } diff --git a/pkg/httputil/retry_test.go b/pkg/httputil/retry_test.go index f2ed02f..d9bf096 100644 --- a/pkg/httputil/retry_test.go +++ b/pkg/httputil/retry_test.go @@ -167,3 +167,45 @@ func TestRetryableError(t *testing.T) { t.Errorf("got Unwrap() = %v, want %v", got, inner) } } + +func TestRetryable(t *testing.T) { + t.Run("wrapsError", func(t *testing.T) { + inner := errors.New("test error") + err := Retryable(inner) + + if err == nil { + t.Fatal("expected non-nil error") + } + if !errors.Is(err, inner) { + t.Error("errors.Is failed to match wrapped error") + } + var retryErr *RetryableError + if !errors.As(err, &retryErr) { + t.Error("expected RetryableError type") + } + }) + + t.Run("nilInput", func(t *testing.T) { + err := Retryable(nil) + if err != nil { + t.Errorf("got %v, want nil", err) + } + }) + + t.Run("usedInRetry", func(t *testing.T) { + attempts := 0 + err := Retry(context.Background(), 3, time.Millisecond, func() error { + attempts++ + if attempts < 2 { + return Retryable(errors.New("retry me")) + } + return nil + }) + if err != nil { + t.Errorf("unexpected error: %v", err) + } + if attempts != 2 { + t.Errorf("got %d attempts, want 2", attempts) + } + }) +} diff --git a/pkg/integrations/client.go b/pkg/integrations/client.go index 8f32ba5..9345670 100644 --- a/pkg/integrations/client.go +++ b/pkg/integrations/client.go @@ -12,6 +12,11 @@ import ( // Client provides shared HTTP functionality for all registry API clients. // It handles caching, retry logic, and common request headers. +// +// Client is safe for concurrent use by multiple goroutines. +// The underlying HTTP client, cache, and headers are all goroutine-safe. +// +// Zero values: Do not use an uninitialized Client; always create via [NewClient]. type Client struct { http *http.Client cache *httputil.Cache @@ -20,7 +25,15 @@ type Client struct { // NewClient creates a Client with the given cache and default headers. // Headers are applied to all requests made through this client. -// Pass nil for headers if no default headers are needed. +// +// Parameters: +// - cache: Cache instance for storing responses (must not be nil). Create with +// [NewCacheWithNamespace] for registry-specific caching. +// - headers: Default HTTP headers for all requests. Pass nil if no default headers +// are needed. Common examples: "Authorization", "User-Agent", "Accept". +// +// The returned Client is safe for concurrent use by multiple goroutines. +// Panics if cache is nil. func NewClient(cache *httputil.Cache, headers map[string]string) *Client { return &Client{ http: NewHTTPClient(), @@ -31,7 +44,28 @@ func NewClient(cache *httputil.Cache, headers map[string]string) *Client { // Cached retrieves a value from cache or executes fetch and caches the result. // If refresh is true, the cache is bypassed and fetch is always called. -// The fetch function should populate v; on success, v is stored in the cache. +// +// Parameters: +// - ctx: Context for cancellation. If cancelled, fetch is not executed and returns ctx.Err(). +// - key: Cache key (usually package name or coordinate). Must not be empty. +// - refresh: If true, bypass cache and always call fetch. If false, try cache first. +// - v: Pointer to store the result. Must be a non-nil pointer to a JSON-serializable type. +// - fetch: Function to fetch data and populate v. Called with retry on transient failures. +// +// Behavior: +// 1. If refresh=false and cache hit: returns nil immediately with v populated +// 2. If cache miss or refresh=true: calls fetch with automatic retry on [httputil.RetryableError] +// 3. On successful fetch: stores result in cache (ignoring cache write errors) +// +// The fetch function should populate v and return nil on success, or return an error. +// Network errors should be wrapped with [httputil.Retryable] to enable retry. +// +// Returns: +// - nil on success (v is populated) +// - error from fetch if it fails (v may be partially populated) +// - ctx.Err() if context is cancelled +// +// This method is safe for concurrent use on the same Client. func (c *Client) Cached(ctx context.Context, key string, refresh bool, v any, fetch func() error) error { if !refresh { if ok, _ := c.cache.Get(key, v); ok { @@ -47,12 +81,39 @@ func (c *Client) Cached(ctx context.Context, key string, refresh bool, v any, fe // Get performs an HTTP GET request and JSON-decodes the response into v. // It uses the client's default headers and handles retries automatically. +// +// Parameters: +// - ctx: Context for cancellation and timeout +// - url: Full URL to request (must be absolute URL with scheme) +// - v: Pointer to store decoded JSON response (must be non-nil) +// +// Returns: +// - [ErrNotFound] for HTTP 404 responses +// - [ErrNetwork] wrapped with [httputil.RetryableError] for HTTP 5xx responses +// - [ErrNetwork] for connection failures and timeouts +// - json decoding errors if response is not valid JSON +// +// This method is safe for concurrent use on the same Client. func (c *Client) Get(ctx context.Context, url string, v any) error { return c.GetWithHeaders(ctx, url, nil, v) } // GetWithHeaders performs an HTTP GET with additional headers merged with defaults. // Request-specific headers override client defaults for the same key. +// +// Parameters: +// - ctx: Context for cancellation and timeout +// - url: Full URL to request (must be absolute URL with scheme) +// - headers: Additional headers for this request only (may be nil). Headers with the +// same key as client defaults will override the default value for this request. +// - v: Pointer to store decoded JSON response (must be non-nil) +// +// Example: +// +// err := client.GetWithHeaders(ctx, url, map[string]string{"X-Custom": "value"}, &resp) +// +// Returns the same errors as [Get]. +// This method is safe for concurrent use on the same Client. func (c *Client) GetWithHeaders(ctx context.Context, url string, headers map[string]string, v any) error { body, err := c.doRequest(ctx, url, headers) if err != nil { @@ -64,6 +125,21 @@ func (c *Client) GetWithHeaders(ctx context.Context, url string, headers map[str // GetText performs an HTTP GET request and returns the response body as a string. // Useful for non-JSON endpoints like go.mod files or plain text responses. +// +// Parameters: +// - ctx: Context for cancellation and timeout +// - url: Full URL to request (must be absolute URL with scheme) +// +// The entire response body is read into memory. Use caution with large responses. +// For files larger than a few MB, consider streaming with a custom implementation. +// +// Returns: +// - The response body as a string +// - [ErrNotFound] for HTTP 404 responses +// - [ErrNetwork] for connection failures, timeouts, and HTTP 5xx responses +// - io errors if reading the response body fails +// +// This method is safe for concurrent use on the same Client. func (c *Client) GetText(ctx context.Context, url string) (string, error) { body, err := c.doRequest(ctx, url, nil) if err != nil { @@ -88,7 +164,7 @@ func (c *Client) doRequest(ctx context.Context, url string, headers map[string]s resp, err := c.http.Do(req) if err != nil { - return nil, &httputil.RetryableError{Err: fmt.Errorf("%w: %v", ErrNetwork, err)} + return nil, httputil.Retryable(fmt.Errorf("%w: %v", ErrNetwork, err)) } if err := checkStatus(resp.StatusCode); err != nil { @@ -105,7 +181,7 @@ func checkStatus(code int) error { case code == http.StatusNotFound: return ErrNotFound case code >= 500: - return &httputil.RetryableError{Err: fmt.Errorf("%w: status %d", ErrNetwork, code)} + return httputil.Retryable(fmt.Errorf("%w: status %d", ErrNetwork, code)) default: return fmt.Errorf("%w: status %d", ErrNetwork, code) } diff --git a/pkg/integrations/common.go b/pkg/integrations/common.go index c96bb99..eac9d07 100644 --- a/pkg/integrations/common.go +++ b/pkg/integrations/common.go @@ -11,52 +11,120 @@ import ( "github.com/matzehuels/stacktower/pkg/httputil" ) +// httpTimeout is the default timeout for all HTTP requests to registry APIs. +// Individual registries do not override this value. const httpTimeout = 10 * time.Second var ( // ErrNotFound is returned when a package or resource doesn't exist in the registry. + // This corresponds to HTTP 404 responses. + // Callers should check with errors.Is(err, integrations.ErrNotFound). + // This error is never wrapped with additional context. ErrNotFound = errors.New("resource not found") // ErrNetwork is returned for HTTP failures (timeouts, connection errors, 5xx responses). + // This error may be wrapped with [httputil.RetryableError] for 5xx status codes. + // Callers should check with errors.Is(err, integrations.ErrNetwork) for any network issue, + // or errors.As(err, &httputil.RetryableError{}) to detect retryable failures specifically. ErrNetwork = errors.New("network error") ) // RepoMetrics holds repository-level data fetched from GitHub or GitLab. // Used to enrich package metadata with maintenance and popularity indicators. +// +// Zero values: All string fields are empty, integers are 0, time pointers are nil. +// Nil Contributors slice is valid and indicates no contributor data was fetched. +// +// This struct is safe for concurrent reads after construction but not for concurrent writes. type RepoMetrics struct { - RepoURL string `json:"repo_url"` // Canonical repository URL (https://...) - Owner string `json:"owner"` // Repository owner username - Stars int `json:"stars"` // GitHub/GitLab star count - SizeKB int `json:"size_kb,omitempty"` // Repository size in kilobytes - LastCommitAt *time.Time `json:"last_commit_at,omitempty"` // Date of most recent commit - LastReleaseAt *time.Time `json:"last_release_at,omitempty"` // Date of most recent release - License string `json:"license,omitempty"` // SPDX license identifier - Contributors []Contributor `json:"top_contributors,omitempty"` // Top contributors by commit count - Language string `json:"language,omitempty"` // Primary repository language - Topics []string `json:"topics,omitempty"` // Repository topic tags - Archived bool `json:"archived"` // Whether the repository is archived + RepoURL string `json:"repo_url"` // Canonical repository URL (https://...). Never empty in valid metrics. + Owner string `json:"owner"` // Repository owner username. Never empty in valid metrics. + Stars int `json:"stars"` // GitHub/GitLab star count. 0 is a valid value for new repositories. + SizeKB int `json:"size_kb,omitempty"` // Repository size in kilobytes. 0 means not available or very small. + LastCommitAt *time.Time `json:"last_commit_at,omitempty"` // Date of most recent commit. Nil if not available. + LastReleaseAt *time.Time `json:"last_release_at,omitempty"` // Date of most recent release. Nil if no releases or not available. + License string `json:"license,omitempty"` // SPDX license identifier (e.g., "MIT", "Apache-2.0"). Empty if not detected. + Contributors []Contributor `json:"top_contributors,omitempty"` // Top contributors by commit count (typically top 5). Nil or empty if not available. + Language string `json:"language,omitempty"` // Primary repository language (e.g., "Go", "Python"). Empty if not detected. + Topics []string `json:"topics,omitempty"` // Repository topic tags. Nil or empty if none. + Archived bool `json:"archived"` // Whether the repository is archived. False means active or unknown. } // Contributor represents a repository contributor with their contribution count. +// Used for bus factor analysis and maintainer identification. +// +// Zero values: Login is empty, Contributions is 0. A Contributor with 0 contributions is invalid. +// This struct is safe for concurrent reads. type Contributor struct { - Login string `json:"login"` // GitHub/GitLab username - Contributions int `json:"contributions"` // Number of commits + Login string `json:"login"` // GitHub/GitLab username. Never empty in valid contributors. + Contributions int `json:"contributions"` // Number of commits. Always positive in valid contributors. } // NewHTTPClient creates an HTTP client with a standard timeout for registry requests. +// The returned client has a 10-second timeout applied to all requests. +// +// The client is safe for concurrent use by multiple goroutines. +// Returns a new client on every call; clients are not pooled. func NewHTTPClient() *http.Client { return &http.Client{Timeout: httpTimeout} } // NewCache creates a file-based cache with the given TTL in the default cache directory. // See [httputil.NewCache] for details on cache location and behavior. +// +// The ttl parameter must be positive. A ttl of 0 means items never expire (not recommended). +// Negative ttl values are invalid and will be treated as 0. +// +// For registry-specific clients, prefer using [NewCacheWithNamespace] to automatically +// scope cache keys by registry name and prevent collisions. +// +// Returns an error if the cache directory cannot be created or accessed. +// The returned cache is safe for concurrent use by multiple goroutines. func NewCache(ttl time.Duration) (*httputil.Cache, error) { return httputil.NewCache("", ttl) } +// NewCacheWithNamespace creates a namespaced cache for a specific registry. +// The namespace parameter (e.g., "pypi:", "npm:") is automatically prefixed to all +// cache keys, preventing collisions between different registries. +// +// The namespace should be non-empty and typically ends with a colon. An empty namespace +// is valid but defeats the purpose of this function; use [NewCache] instead. +// +// The ttl parameter must be positive. A ttl of 0 means items never expire (not recommended). +// +// This is the preferred way to create caches for registry clients: +// +// cache, err := integrations.NewCacheWithNamespace("pypi:", 24*time.Hour) +// client := integrations.NewClient(cache, nil) +// +// Returns an error if the cache directory cannot be created or accessed. +// The returned cache is safe for concurrent use by multiple goroutines. +func NewCacheWithNamespace(namespace string, ttl time.Duration) (*httputil.Cache, error) { + cache, err := httputil.NewCache("", ttl) + if err != nil { + return nil, err + } + return cache.Namespace(namespace), nil +} + // NormalizePkgName converts a package name to its canonical form. // Applies lowercase and replaces underscores with hyphens, following PEP 503 // normalization rules used by PyPI and other registries. +// +// Normalization steps: +// 1. Trim leading and trailing whitespace +// 2. Convert to lowercase +// 3. Replace all underscores with hyphens +// +// Examples: +// +// NormalizePkgName("FastAPI") → "fastapi" +// NormalizePkgName("my_package") → "my-package" +// NormalizePkgName(" Spaces ") → "spaces" +// +// An empty string input returns an empty string. +// This function is safe for concurrent use. func NormalizePkgName(name string) string { return strings.ReplaceAll(strings.ToLower(strings.TrimSpace(name)), "_", "-") } @@ -68,7 +136,16 @@ var repoURLReplacer = strings.NewReplacer( // NormalizeRepoURL converts various repository URL formats to canonical HTTPS form. // Handles git@, git://, and git+ prefixes, and removes .git suffixes. -// Returns empty string if raw is empty. +// +// Transformations applied: +// - git@github.com:user/repo → https://github.com/user/repo +// - git://github.com/user/repo → https://github.com/user/repo +// - git+https://example.com/repo.git → https://example.com/repo +// - https://example.com/repo.git → https://example.com/repo +// +// Returns an empty string if the input is empty or contains only whitespace. +// Non-git URLs are returned unchanged after whitespace trimming and .git suffix removal. +// This function is safe for concurrent use. func NormalizeRepoURL(raw string) string { if raw == "" { return "" @@ -83,9 +160,32 @@ var repoURLKeys = []string{"Source", "Repository", "Code", "Homepage"} // ExtractRepoURL finds GitHub/GitLab owner and repo from package URLs. // It searches through urls using standard keys (Source, Repository, Code, Homepage) -// and falls back to homepage if no match is found. The re parameter should match -// URLs and capture owner (group 1) and repo name (group 2). -// Returns ok=false if no valid repository URL is found. +// and falls back to homepage if no match is found. +// +// The re parameter should match URLs and capture: +// - Group 1: owner/organization name +// - Group 2: repository name +// +// Examples: +// +// re := regexp.MustCompile(`https?://github\.com/([^/]+)/([^/]+)`) +// owner, repo, ok := ExtractRepoURL(re, pkg.ProjectURLs, pkg.HomePage) +// +// URLs containing "/sponsors/" are automatically skipped to avoid false positives. +// The .git suffix is trimmed from the repository name if present. +// +// Parameters: +// - re: Regular expression with exactly 2 capture groups (must not be nil) +// - urls: Map of URL keys to URL values (may be nil or empty) +// - homepage: Fallback homepage URL (may be empty) +// +// Returns: +// - owner: The repository owner/organization (empty if not found) +// - repo: The repository name without .git suffix (empty if not found) +// - ok: true if a valid match was found, false otherwise +// +// This function is safe for concurrent use if re is not mutated. +// Panics if re is nil. func ExtractRepoURL(re *regexp.Regexp, urls map[string]string, homepage string) (owner, repo string, ok bool) { match := func(u string) bool { if strings.Contains(u, "/sponsors/") { @@ -118,4 +218,8 @@ func ExtractRepoURL(re *regexp.Regexp, urls map[string]string, homepage string) // URLEncode percent-encodes a string for use in URLs. // This is a convenience wrapper around [url.QueryEscape]. +// +// Spaces are encoded as "+", and special characters as "%XX" hex sequences. +// An empty string returns an empty string. +// This function is safe for concurrent use. func URLEncode(s string) string { return url.QueryEscape(s) } diff --git a/pkg/integrations/crates/client.go b/pkg/integrations/crates/client.go index 5c52526..9f861d5 100644 --- a/pkg/integrations/crates/client.go +++ b/pkg/integrations/crates/client.go @@ -10,27 +10,46 @@ import ( ) // CrateInfo holds metadata for a Rust crate from crates.io. +// +// The Version field contains the max_version (latest stable or highest version). +// Dependencies include only "normal" (non-dev, non-optional) dependencies. +// +// Zero values: All string fields are empty, Dependencies is nil, Downloads is 0. +// A Downloads value of 0 is valid for newly published crates. +// This struct is safe for concurrent reads after construction. type CrateInfo struct { - Name string - Version string - Dependencies []string - Repository string - HomePage string - Description string - License string - Downloads int + Name string // Crate name (e.g., "serde", never empty in valid info) + Version string // Latest version (e.g., "1.0.193", never empty in valid info) + Dependencies []string // Normal dependency crate names (nil or empty if none) + Repository string // Repository URL (may be empty) + HomePage string // Homepage URL (may be empty) + Description string // Crate description (may be empty) + License string // License identifier(s) (may be empty or "MIT OR Apache-2.0") + Downloads int // Total download count across all versions (0 for new crates) } // Client provides access to the crates.io package registry API. // It handles HTTP requests with caching and automatic retries. +// +// All methods are safe for concurrent use by multiple goroutines. +// +// Note: crates.io requires a User-Agent header; this client sets one automatically. type Client struct { *integrations.Client baseURL string } // NewClient creates a crates.io client with the specified cache TTL. +// +// The cacheTTL parameter sets how long responses are cached. +// Typical values: 1-24 hours for production, 0 for testing (no cache). +// +// The client includes a User-Agent header as required by crates.io API policy. +// +// Returns an error if the cache directory cannot be created or accessed. +// The returned Client is safe for concurrent use. func NewClient(cacheTTL time.Duration) (*Client, error) { - cache, err := integrations.NewCache(cacheTTL) + cache, err := integrations.NewCacheWithNamespace("crates:", cacheTTL) if err != nil { return nil, err } @@ -44,9 +63,26 @@ func NewClient(cacheTTL time.Duration) (*Client, error) { } // FetchCrate retrieves metadata for a Rust crate from crates.io. -// If refresh is true, cached data is bypassed. +// +// The crate parameter is case-sensitive and must match the published crate name exactly. +// Crate name cannot be empty; an empty string will result in an API error. +// +// If refresh is true, the cache is bypassed and a fresh API call is made. +// If refresh is false, cached data is returned if available and not expired. +// +// Dependency fetching failures are silently ignored; Dependencies will be empty/nil +// if the secondary API call fails. This is not considered an error. +// +// Returns: +// - CrateInfo populated with metadata on success +// - [integrations.ErrNotFound] if the crate doesn't exist +// - [integrations.ErrNetwork] for HTTP failures (timeout, 5xx, etc.) +// - Other errors for JSON decoding failures +// +// The returned CrateInfo pointer is never nil if err is nil. +// This method is safe for concurrent use. func (c *Client) FetchCrate(ctx context.Context, crate string, refresh bool) (*CrateInfo, error) { - key := "crates:" + crate + key := crate var info CrateInfo err := c.Cached(ctx, key, refresh, &info, func() error { diff --git a/pkg/integrations/github/client.go b/pkg/integrations/github/client.go index 7cccf50..9ef24ca 100644 --- a/pkg/integrations/github/client.go +++ b/pkg/integrations/github/client.go @@ -14,15 +14,31 @@ var repoURLPattern = regexp.MustCompile(`https?://github\.com/([^/]+)/([^/]+?)(? // Client provides access to the GitHub API for repository metadata enrichment. // It handles HTTP requests with caching, automatic retries, and optional authentication. +// +// All methods are safe for concurrent use by multiple goroutines. type Client struct { *integrations.Client baseURL string } // NewClient creates a GitHub API client with optional authentication. -// Pass an empty string for token to use unauthenticated requests (lower rate limits). +// +// The token parameter is a GitHub personal access token for authentication. +// Pass an empty string to use unauthenticated requests. +// +// Rate limits: +// - Unauthenticated: 60 requests/hour per IP +// - Authenticated: 5,000 requests/hour per token +// +// Authentication is strongly recommended for production use to avoid rate limiting. +// +// The cacheTTL parameter sets how long responses are cached. +// Typical values: 1-24 hours for production, 0 for testing (no cache). +// +// Returns an error if the cache directory cannot be created or accessed. +// The returned Client is safe for concurrent use. func NewClient(token string, cacheTTL time.Duration) (*Client, error) { - cache, err := integrations.NewCache(cacheTTL) + cache, err := integrations.NewCacheWithNamespace("github:", cacheTTL) if err != nil { return nil, err } @@ -39,9 +55,30 @@ func NewClient(token string, cacheTTL time.Duration) (*Client, error) { } // Fetch retrieves repository metrics (stars, maintainers, activity) from GitHub. -// If refresh is true, cached data is bypassed. +// +// Parameters: +// - owner: Repository owner username (e.g., "pallets") +// - repo: Repository name (e.g., "flask") +// - refresh: If true, bypass cache and fetch fresh data +// +// The method performs up to 3 API calls: +// 1. Repository metadata (required) +// 2. Latest release data (optional, silently ignored if no releases) +// 3. Contributors list (optional, top 5, silently ignored on failure) +// +// If refresh is true, the cache is bypassed and a fresh API call is made. +// If refresh is false, cached data is returned if available and not expired. +// +// Returns: +// - RepoMetrics populated with repository data on success +// - [integrations.ErrNotFound] if the repository doesn't exist +// - [integrations.ErrNetwork] for HTTP failures (timeout, 5xx, rate limits, etc.) +// - Other errors for JSON decoding failures +// +// The returned RepoMetrics pointer is never nil if err is nil. +// This method is safe for concurrent use. func (c *Client) Fetch(ctx context.Context, owner, repo string, refresh bool) (*integrations.RepoMetrics, error) { - key := "github:" + owner + "/" + repo + key := owner + "/" + repo var m integrations.RepoMetrics err := c.Cached(ctx, key, refresh, &m, func() error { @@ -121,8 +158,27 @@ func (c *Client) fetchContributors(ctx context.Context, owner, repo string) ([]i return result, nil } +// SearchPackageRepo searches GitHub code for a manifest file containing a package name. +// +// This is useful for finding repository URLs when package metadata doesn't include them. +// +// Parameters: +// - pkgName: Package name to search for (exact match in manifest file) +// - manifestFile: Manifest filename to search in (e.g., "package.json", "Gemfile") +// +// Example: +// +// owner, repo, ok := client.SearchPackageRepo(ctx, "fastapi", "pyproject.toml") +// +// Returns: +// - owner: Repository owner username (empty if not found) +// - repo: Repository name (empty if not found) +// - ok: true if a match was found, false otherwise +// +// Search results are always cached (refresh=false) to conserve GitHub API quota. +// This method is safe for concurrent use. func (c *Client) SearchPackageRepo(ctx context.Context, pkgName, manifestFile string) (owner, repo string, ok bool) { - key := fmt.Sprintf("github:search:%s:%s", manifestFile, pkgName) + key := fmt.Sprintf("search:%s:%s", manifestFile, pkgName) var result searchResult _ = c.Cached(ctx, key, false, &result, func() error { @@ -144,6 +200,21 @@ func (c *Client) doSearch(ctx context.Context, pkgName, manifestFile string) (ow return item.Repository.Owner.Login, item.Repository.Name, true } +// ExtractURL extracts GitHub repository owner and name from package URLs. +// +// This function searches through urls map and homepage for GitHub URLs. +// It looks for patterns like "https://github.com/owner/repo". +// +// Parameters: +// - urls: Map of URL keys to URL values from package metadata (may be nil) +// - homepage: Fallback homepage URL (may be empty) +// +// Returns: +// - owner: Repository owner username (empty if not found) +// - repo: Repository name (empty if not found) +// - ok: true if a GitHub URL was found, false otherwise +// +// This function is safe for concurrent use. func ExtractURL(urls map[string]string, homepage string) (owner, repo string, ok bool) { return integrations.ExtractRepoURL(repoURLPattern, urls, homepage) } diff --git a/pkg/integrations/gitlab/client.go b/pkg/integrations/gitlab/client.go index 50910fe..6e5493e 100644 --- a/pkg/integrations/gitlab/client.go +++ b/pkg/integrations/gitlab/client.go @@ -11,14 +11,27 @@ var repoURLPattern = regexp.MustCompile(`https?://gitlab\.com/([^/]+)/([^/]+)`) // Client provides access to the GitLab API for repository metadata enrichment. // It handles HTTP requests with caching, automatic retries, and optional authentication. +// +// All methods are safe for concurrent use by multiple goroutines. +// +// Note: Full metrics fetching (stars, contributors, etc.) is not yet implemented. +// Currently, this client focuses on URL extraction. Use [ExtractURL] to identify GitLab-hosted packages. type Client struct { *integrations.Client } // NewClient creates a GitLab API client with optional authentication. -// Pass an empty string for token to use unauthenticated requests (lower rate limits). +// +// The token parameter is a GitLab personal access token for authentication. +// Pass an empty string to use unauthenticated requests (public repositories only). +// +// The cacheTTL parameter sets how long responses are cached. +// Typical values: 1-24 hours for production, 0 for testing (no cache). +// +// Returns an error if the cache directory cannot be created or accessed. +// The returned Client is safe for concurrent use. func NewClient(token string, cacheTTL time.Duration) (*Client, error) { - cache, err := integrations.NewCache(cacheTTL) + cache, err := integrations.NewCacheWithNamespace("gitlab:", cacheTTL) if err != nil { return nil, err } @@ -31,6 +44,21 @@ func NewClient(token string, cacheTTL time.Duration) (*Client, error) { return &Client{integrations.NewClient(cache, headers)}, nil } +// ExtractURL extracts GitLab repository owner and name from package URLs. +// +// This function searches through urls map and homepage for GitLab URLs. +// It looks for patterns like "https://gitlab.com/owner/repo". +// +// Parameters: +// - urls: Map of URL keys to URL values from package metadata (may be nil) +// - homepage: Fallback homepage URL (may be empty) +// +// Returns: +// - owner: Repository owner username (empty if not found) +// - repo: Repository name (empty if not found) +// - ok: true if a GitLab URL was found, false otherwise +// +// This function is safe for concurrent use. func ExtractURL(urls map[string]string, homepage string) (owner, repo string, ok bool) { return integrations.ExtractRepoURL(repoURLPattern, urls, homepage) } diff --git a/pkg/integrations/goproxy/client.go b/pkg/integrations/goproxy/client.go index b66b7c8..4baea75 100644 --- a/pkg/integrations/goproxy/client.go +++ b/pkg/integrations/goproxy/client.go @@ -13,22 +13,36 @@ import ( ) // ModuleInfo holds metadata for a Go module from the Go module proxy. +// +// Dependencies include only direct dependencies; indirect dependencies (marked with "// indirect") are excluded. +// Some modules (pre-modules or minimal modules) may not have a go.mod file; Dependencies will be nil/empty. +// +// Zero values: All string fields are empty, Dependencies is nil. +// This struct is safe for concurrent reads after construction. type ModuleInfo struct { - Path string - Version string - Dependencies []string + Path string // Module path (e.g., "github.com/spf13/cobra", never empty in valid info) + Version string // Latest version from @latest endpoint (e.g., "v1.8.0", never empty in valid info) + Dependencies []string // Direct dependency module paths (nil or empty if none or no go.mod) } // Client provides access to the Go module proxy API. // It handles HTTP requests with caching and automatic retries. +// +// All methods are safe for concurrent use by multiple goroutines. type Client struct { *integrations.Client baseURL string } // NewClient creates a Go module proxy client with the specified cache TTL. +// +// The cacheTTL parameter sets how long responses are cached. +// Typical values: 1-24 hours for production, 0 for testing (no cache). +// +// Returns an error if the cache directory cannot be created or accessed. +// The returned Client is safe for concurrent use. func NewClient(cacheTTL time.Duration) (*Client, error) { - cache, err := integrations.NewCache(cacheTTL) + cache, err := integrations.NewCacheWithNamespace("goproxy:", cacheTTL) if err != nil { return nil, err } @@ -39,10 +53,32 @@ func NewClient(cacheTTL time.Duration) (*Client, error) { } // FetchModule retrieves metadata for a Go module from the module proxy. -// If refresh is true, cached data is bypassed. +// +// The mod parameter should be a full module path (e.g., "github.com/user/repo"). +// Module paths with uppercase letters are escaped per the Go module proxy protocol. +// Module path cannot be empty; an empty string will result in an API error. +// +// If refresh is true, the cache is bypassed and a fresh API call is made. +// If refresh is false, cached data is returned if available and not expired. +// +// This method performs two API calls: +// 1. @latest endpoint to get the latest version +// 2. .mod endpoint to fetch and parse go.mod for dependencies +// +// go.mod fetch failures are silently ignored; Dependencies will be nil/empty if it fails. +// This is normal for pre-module packages or minimal modules without dependencies. +// +// Returns: +// - ModuleInfo populated with metadata on success +// - [integrations.ErrNotFound] if the module doesn't exist +// - [integrations.ErrNetwork] for HTTP failures (timeout, 5xx, etc.) +// - Other errors for JSON decoding failures +// +// The returned ModuleInfo pointer is never nil if err is nil. +// This method is safe for concurrent use. func (c *Client) FetchModule(ctx context.Context, mod string, refresh bool) (*ModuleInfo, error) { mod = normalizePath(mod) - key := "goproxy:" + mod + key := mod var info ModuleInfo err := c.Cached(ctx, key, refresh, &info, func() error { diff --git a/pkg/integrations/maven/client.go b/pkg/integrations/maven/client.go index a17b76b..75035b2 100644 --- a/pkg/integrations/maven/client.go +++ b/pkg/integrations/maven/client.go @@ -14,29 +14,46 @@ import ( ) // ArtifactInfo holds metadata for a Java artifact from Maven Central. +// +// Artifacts are identified by "groupId:artifactId" coordinates. +// Dependencies include only compile-scope dependencies; test, provided, and optional deps are excluded. +// Dependencies with unresolved Maven properties (${...}) are skipped. +// +// Zero values: All string fields are empty, Dependencies is nil. +// This struct is safe for concurrent reads after construction. type ArtifactInfo struct { - GroupID string - ArtifactID string - Version string - Dependencies []string - Description string - URL string + GroupID string // Maven groupId (e.g., "com.google.guava", never empty in valid info) + ArtifactID string // Maven artifactId (e.g., "guava", never empty in valid info) + Version string // Latest version (e.g., "32.1.3-jre", never empty in valid info) + Dependencies []string // Compile-scope dependency coordinates (nil or empty if none or POM fetch failed) + Description string // Artifact description from POM (may be empty) + URL string // URL to the POM file on Maven Central (never empty in valid info) } +// Coordinate returns the Maven coordinate string "groupId:artifactId". +// Example: "com.google.guava:guava" func (a *ArtifactInfo) Coordinate() string { return a.GroupID + ":" + a.ArtifactID } // Client provides access to the Maven Central repository API. // It handles HTTP requests with caching and automatic retries. +// +// All methods are safe for concurrent use by multiple goroutines. type Client struct { *integrations.Client baseURL string } // NewClient creates a Maven Central client with the specified cache TTL. +// +// The cacheTTL parameter sets how long responses are cached. +// Typical values: 1-24 hours for production, 0 for testing (no cache). +// +// Returns an error if the cache directory cannot be created or accessed. +// The returned Client is safe for concurrent use. func NewClient(cacheTTL time.Duration) (*Client, error) { - cache, err := integrations.NewCache(cacheTTL) + cache, err := integrations.NewCacheWithNamespace("maven:", cacheTTL) if err != nil { return nil, err } @@ -47,15 +64,36 @@ func NewClient(cacheTTL time.Duration) (*Client, error) { } // FetchArtifact retrieves metadata for a Java artifact from Maven Central. -// The coordinate should be in the format "groupId:artifactId". -// If refresh is true, cached data is bypassed. +// +// The coordinate parameter must be in the format "groupId:artifactId". +// Examples: "com.google.guava:guava", "org.apache.commons:commons-lang3" +// Coordinate cannot be empty or missing the colon separator. +// +// If refresh is true, the cache is bypassed and a fresh API call is made. +// If refresh is false, cached data is returned if available and not expired. +// +// This method performs two API calls: +// 1. Maven Central Search API to find the latest version +// 2. Direct POM fetch to extract dependencies +// +// POM fetch failures are silently ignored; Dependencies will be empty/nil if it fails. +// +// Returns: +// - ArtifactInfo populated with metadata on success +// - [integrations.ErrNotFound] if the artifact doesn't exist +// - [integrations.ErrNetwork] for HTTP failures (timeout, 5xx, etc.) +// - Error if coordinate format is invalid +// - Other errors for JSON decoding failures +// +// The returned ArtifactInfo pointer is never nil if err is nil. +// This method is safe for concurrent use. func (c *Client) FetchArtifact(ctx context.Context, coordinate string, refresh bool) (*ArtifactInfo, error) { groupID, artifactID, err := parseCoordinate(coordinate) if err != nil { return nil, err } - key := "maven:" + coordinate + key := coordinate var info ArtifactInfo err = c.Cached(ctx, key, refresh, &info, func() error { diff --git a/pkg/integrations/npm/client.go b/pkg/integrations/npm/client.go index f5eaa32..1dddfce 100644 --- a/pkg/integrations/npm/client.go +++ b/pkg/integrations/npm/client.go @@ -13,27 +13,41 @@ import ( ) // PackageInfo holds metadata for a JavaScript/TypeScript package from npm. +// +// The Version field always contains the "latest" dist-tag version. +// Dependencies include only runtime "dependencies", not devDependencies or peerDependencies. +// +// Zero values: All string fields are empty, Dependencies is nil. +// This struct is safe for concurrent reads after construction. type PackageInfo struct { - Name string - Version string - Dependencies []string - Repository string - HomePage string - Description string - License string - Author string + Name string // Package name as published (e.g., "@scope/package", never empty in valid info) + Version string // Latest version tag (e.g., "4.18.2", never empty in valid info) + Dependencies []string // Runtime dependency names (nil or empty if none) + Repository string // Normalized repository URL (empty if not provided) + HomePage string // Homepage URL (may be empty) + Description string // Package description (may be empty) + License string // License identifier (e.g., "MIT", may be empty) + Author string // Author name (may be empty) } // Client provides access to the npm package registry API. // It handles HTTP requests with caching and automatic retries. +// +// All methods are safe for concurrent use by multiple goroutines. type Client struct { *integrations.Client baseURL string } // NewClient creates an npm client with the specified cache TTL. +// +// The cacheTTL parameter sets how long responses are cached. +// Typical values: 1-24 hours for production, 0 for testing (no cache). +// +// Returns an error if the cache directory cannot be created or accessed. +// The returned Client is safe for concurrent use. func NewClient(cacheTTL time.Duration) (*Client, error) { - cache, err := integrations.NewCache(cacheTTL) + cache, err := integrations.NewCacheWithNamespace("npm:", cacheTTL) if err != nil { return nil, err } @@ -44,10 +58,25 @@ func NewClient(cacheTTL time.Duration) (*Client, error) { } // FetchPackage retrieves metadata for a JavaScript/TypeScript package from npm. -// If refresh is true, cached data is bypassed. +// +// The pkg parameter is normalized to lowercase with whitespace trimmed. +// Supports scoped packages (e.g., "@types/node"). +// Package name cannot be empty; an empty string will result in an API error. +// +// If refresh is true, the cache is bypassed and a fresh API call is made. +// If refresh is false, cached data is returned if available and not expired. +// +// Returns: +// - PackageInfo populated with metadata for the "latest" dist-tag version +// - [integrations.ErrNotFound] if the package doesn't exist +// - [integrations.ErrNetwork] for HTTP failures (timeout, 5xx, etc.) +// - Other errors for JSON decoding failures or missing "latest" version +// +// The returned PackageInfo pointer is never nil if err is nil. +// This method is safe for concurrent use. func (c *Client) FetchPackage(ctx context.Context, pkg string, refresh bool) (*PackageInfo, error) { pkg = strings.ToLower(strings.TrimSpace(pkg)) - key := "npm:" + pkg + key := pkg var info PackageInfo err := c.Cached(ctx, key, refresh, &info, func() error { diff --git a/pkg/integrations/packagist/client.go b/pkg/integrations/packagist/client.go index b2f7aba..0c0823a 100644 --- a/pkg/integrations/packagist/client.go +++ b/pkg/integrations/packagist/client.go @@ -14,27 +14,42 @@ import ( ) // PackageInfo holds metadata for a PHP package from Packagist. +// +// Package names follow Composer conventions (vendor/package format). +// Version is the latest stable version; dev versions are skipped. +// Dependencies exclude PHP, extensions (ext-*), libraries (lib-*), and Composer platform packages. +// +// Zero values: All string fields are empty, Dependencies is nil. +// This struct is safe for concurrent reads after construction. type PackageInfo struct { - Name string - Version string - Dependencies []string - Repository string - HomePage string - Description string - License string - Author string + Name string // Package name (e.g., "symfony/console", never empty in valid info) + Version string // Latest stable version (e.g., "6.3.0", never empty in valid info) + Dependencies []string // Composer require dependencies, filtered (nil or empty if none) + Repository string // Normalized repository URL (empty if not provided) + HomePage string // Homepage URL (may be empty) + Description string // Package description (may be empty) + License string // License identifier (may be empty, only first license if multiple) + Author string // First author name (may be empty) } // Client provides access to the Packagist package registry API. // It handles HTTP requests with caching and automatic retries. +// +// All methods are safe for concurrent use by multiple goroutines. type Client struct { *integrations.Client baseURL string } // NewClient creates a Packagist client with the specified cache TTL. +// +// The cacheTTL parameter sets how long responses are cached. +// Typical values: 1-24 hours for production, 0 for testing (no cache). +// +// Returns an error if the cache directory cannot be created or accessed. +// The returned Client is safe for concurrent use. func NewClient(cacheTTL time.Duration) (*Client, error) { - cache, err := integrations.NewCache(cacheTTL) + cache, err := integrations.NewCacheWithNamespace("packagist:", cacheTTL) if err != nil { return nil, err } @@ -45,10 +60,28 @@ func NewClient(cacheTTL time.Duration) (*Client, error) { } // FetchPackage retrieves metadata for a PHP package from Packagist. -// If refresh is true, cached data is bypassed. +// +// The pkg parameter must be in "vendor/package" format (e.g., "symfony/console"). +// Package name is normalized to lowercase with whitespace trimmed. +// Package name cannot be empty; an empty string will result in an API error. +// +// If refresh is true, the cache is bypassed and a fresh API call is made. +// If refresh is false, cached data is returned if available and not expired. +// +// Version selection: The latest stable version is selected, skipping dev versions. +// If no stable version exists, the first version in the list is used. +// +// Returns: +// - PackageInfo populated with metadata on success +// - [integrations.ErrNotFound] if the package doesn't exist +// - [integrations.ErrNetwork] for HTTP failures (timeout, 5xx, etc.) +// - Other errors for JSON decoding failures or missing version data +// +// The returned PackageInfo pointer is never nil if err is nil. +// This method is safe for concurrent use. func (c *Client) FetchPackage(ctx context.Context, pkg string, refresh bool) (*PackageInfo, error) { pkg = strings.ToLower(strings.TrimSpace(pkg)) - key := "packagist:" + pkg + key := pkg var info PackageInfo err := c.Cached(ctx, key, refresh, &info, func() error { diff --git a/pkg/integrations/pypi/client.go b/pkg/integrations/pypi/client.go index d23e97b..5d697a7 100644 --- a/pkg/integrations/pypi/client.go +++ b/pkg/integrations/pypi/client.go @@ -17,27 +17,42 @@ var ( ) // PackageInfo holds metadata for a Python package from PyPI. +// +// Package names are normalized following PEP 503 (lowercase, underscores→hyphens). +// Dependencies list only runtime dependencies; extras, dev, and test deps are excluded. +// +// Zero values: All string fields are empty, Dependencies is nil. +// A nil Dependencies slice is valid and indicates no dependencies or failed dependency fetch. +// This struct is safe for concurrent reads after construction. type PackageInfo struct { - Name string - Version string - Dependencies []string - ProjectURLs map[string]string - HomePage string - Summary string - License string - Author string + Name string // Normalized package name (e.g., "fastapi", never empty in valid info) + Version string // Version string (e.g., "0.104.1", never empty in valid info) + Dependencies []string // Direct runtime dependencies, normalized names (nil or empty if none) + ProjectURLs map[string]string // Project URLs from metadata (e.g., "Homepage", "Repository", may be nil) + HomePage string // Homepage URL (may be empty) + Summary string // Short package description (may be empty) + License string // License name or expression (may be empty) + Author string // Author name (may be empty) } // Client provides access to the PyPI package registry API. // It handles HTTP requests with caching and automatic retries. +// +// All methods are safe for concurrent use by multiple goroutines. type Client struct { *integrations.Client baseURL string } // NewClient creates a PyPI client with the specified cache TTL. +// +// The cacheTTL parameter sets how long responses are cached. +// Typical values: 1-24 hours for production, 0 for testing (no cache). +// +// Returns an error if the cache directory cannot be created or accessed. +// The returned Client is safe for concurrent use. func NewClient(cacheTTL time.Duration) (*Client, error) { - cache, err := integrations.NewCache(cacheTTL) + cache, err := integrations.NewCacheWithNamespace("pypi:", cacheTTL) if err != nil { return nil, err } @@ -48,10 +63,24 @@ func NewClient(cacheTTL time.Duration) (*Client, error) { } // FetchPackage retrieves metadata for a Python package from PyPI. -// If refresh is true, cached data is bypassed. +// +// The pkg parameter is normalized automatically (case-insensitive, underscores→hyphens). +// Package name cannot be empty; an empty string will result in an API error. +// +// If refresh is true, the cache is bypassed and a fresh API call is made. +// If refresh is false, cached data is returned if available and not expired. +// +// Returns: +// - PackageInfo populated with metadata on success +// - [integrations.ErrNotFound] if the package doesn't exist +// - [integrations.ErrNetwork] for HTTP failures (timeout, 5xx, etc.) +// - Other errors for JSON decoding failures +// +// The returned PackageInfo pointer is never nil if err is nil. +// This method is safe for concurrent use. func (c *Client) FetchPackage(ctx context.Context, pkg string, refresh bool) (*PackageInfo, error) { pkg = integrations.NormalizePkgName(pkg) - key := "pypi:" + pkg + key := pkg var info PackageInfo err := c.Cached(ctx, key, refresh, &info, func() error { diff --git a/pkg/integrations/rubygems/client.go b/pkg/integrations/rubygems/client.go index e684721..8f4ec50 100644 --- a/pkg/integrations/rubygems/client.go +++ b/pkg/integrations/rubygems/client.go @@ -11,28 +11,43 @@ import ( ) // GemInfo holds metadata for a Ruby gem from RubyGems. +// +// Gem names are normalized to lowercase. +// Dependencies include only runtime dependencies; development dependencies are excluded. +// +// Zero values: All string fields are empty, Dependencies is nil, Downloads is 0. +// A Downloads value of 0 is valid for newly published gems. +// This struct is safe for concurrent reads after construction. type GemInfo struct { - Name string - Version string - Dependencies []string - SourceCodeURI string - HomepageURI string - Description string - License string - Downloads int - Authors string + Name string // Gem name, normalized lowercase (e.g., "rails", never empty in valid info) + Version string // Current version (e.g., "7.1.2", never empty in valid info) + Dependencies []string // Runtime dependency gem names, normalized (nil or empty if none) + SourceCodeURI string // Source code repository URL (may be empty) + HomepageURI string // Homepage URL (may be empty) + Description string // Gem description/info (may be empty) + License string // License(s), comma-separated if multiple (may be empty) + Downloads int // Total download count (0 for new gems) + Authors string // Author name(s) (may be empty) } // Client provides access to the RubyGems package registry API. // It handles HTTP requests with caching and automatic retries. +// +// All methods are safe for concurrent use by multiple goroutines. type Client struct { *integrations.Client baseURL string } // NewClient creates a RubyGems client with the specified cache TTL. +// +// The cacheTTL parameter sets how long responses are cached. +// Typical values: 1-24 hours for production, 0 for testing (no cache). +// +// Returns an error if the cache directory cannot be created or accessed. +// The returned Client is safe for concurrent use. func NewClient(cacheTTL time.Duration) (*Client, error) { - cache, err := integrations.NewCache(cacheTTL) + cache, err := integrations.NewCacheWithNamespace("rubygems:", cacheTTL) if err != nil { return nil, err } @@ -43,10 +58,24 @@ func NewClient(cacheTTL time.Duration) (*Client, error) { } // FetchGem retrieves metadata for a Ruby gem from RubyGems. -// If refresh is true, cached data is bypassed. +// +// The gem parameter is normalized to lowercase with whitespace trimmed. +// Gem name cannot be empty; an empty string will result in an API error. +// +// If refresh is true, the cache is bypassed and a fresh API call is made. +// If refresh is false, cached data is returned if available and not expired. +// +// Returns: +// - GemInfo populated with metadata on success +// - [integrations.ErrNotFound] if the gem doesn't exist +// - [integrations.ErrNetwork] for HTTP failures (timeout, 5xx, etc.) +// - Other errors for JSON decoding failures +// +// The returned GemInfo pointer is never nil if err is nil. +// This method is safe for concurrent use. func (c *Client) FetchGem(ctx context.Context, gem string, refresh bool) (*GemInfo, error) { gem = strings.ToLower(strings.TrimSpace(gem)) - key := "rubygems:" + gem + key := gem var info GemInfo err := c.Cached(ctx, key, refresh, &info, func() error { diff --git a/pkg/io/doc.go b/pkg/io/doc.go index 126701d..6f9b753 100644 --- a/pkg/io/doc.go +++ b/pkg/io/doc.go @@ -1,13 +1,14 @@ -// Package io provides JSON import and export for dependency graphs. +// Package io provides JSON import and export for directed acyclic graphs (DAGs). // // # Overview // -// Stacktower uses a simple JSON format as its interchange format. This allows: +// This package enables serialization of dependency graphs to and from a simple +// JSON format. The format is designed for: // // - Visualization of any directed graph, not just package dependencies // - Integration with external tools that produce or consume graph data // - Caching of parsed dependency data for faster re-rendering -// - Round-trip preservation of layout decisions and render options +// - Round-trip preservation: import, render, export, and re-import identically // // # JSON Format // @@ -58,21 +59,37 @@ // log.Fatal(err) // } // +// Both functions validate the JSON structure and DAG constraints (no cycles, +// no duplicate node IDs). Errors are wrapped with context about which node or +// edge caused the problem. +// // # Export // // Use [ExportJSON] to write a graph to a file, or [WriteJSON] to write to any // io.Writer: // // err := io.ExportJSON(g, "output.json") +// if err != nil { +// log.Fatal(err) +// } // // The export includes all node and edge data, including synthetic nodes -// (subdividers, auxiliaries) and their metadata. This enables round-trip: -// import a graph, render it, export the result, and re-render identically. +// (subdividers, auxiliaries) and their metadata. Row assignments, node kinds, +// and all metadata are preserved. This enables full round-trip fidelity: +// import a graph, transform it, export the result, and re-import identically. +// +// # Concurrency +// +// All functions in this package are safe to call concurrently with other +// readers of the same DAG, but not with concurrent modifications to the DAG. +// The [ReadJSON] and [ImportJSON] functions create independent DAG instances +// that can be used and modified freely after import. // // # Layout Export // -// For external tools that need computed positions, use the JSON sink in -// [render/tower/sink] which exports the complete [layout.Layout] including +// This package exports the logical graph structure only (nodes, edges, metadata). +// For external tools that need computed layout positions, use the JSON sink in +// [render/tower/sink], which exports the complete [layout.Layout] including // block coordinates, row orderings, and all render options. // // [render/tower/sink]: github.com/matzehuels/stacktower/pkg/render/tower/sink diff --git a/pkg/io/example_test.go b/pkg/io/example_test.go index 87165f7..19133f3 100644 --- a/pkg/io/example_test.go +++ b/pkg/io/example_test.go @@ -78,6 +78,31 @@ func ExampleReadJSON() { // Children of app: [lib] } +func ExampleExportJSON() { + // Build a simple graph + g := dag.New(nil) + _ = g.AddNode(dag.Node{ID: "server"}) + _ = g.AddNode(dag.Node{ID: "database", Row: 1}) + _ = g.AddEdge(dag.Edge{From: "server", To: "database"}) + + // Export to a file + tmpDir := os.TempDir() + path := filepath.Join(tmpDir, "exported-graph.json") + defer os.Remove(path) + + if err := io.ExportJSON(g, path); err != nil { + fmt.Println("Error:", err) + return + } + + // Verify the file was created + if _, err := os.Stat(path); err == nil { + fmt.Println("Graph exported successfully") + } + // Output: + // Graph exported successfully +} + func ExampleImportJSON() { // Create a temporary JSON file tmpDir := os.TempDir() diff --git a/pkg/io/export.go b/pkg/io/export.go index eda42b0..78a15fc 100644 --- a/pkg/io/export.go +++ b/pkg/io/export.go @@ -5,6 +5,7 @@ import ( "fmt" "io" "os" + "slices" "github.com/matzehuels/stacktower/pkg/dag" ) @@ -32,15 +33,44 @@ type edge struct { } // WriteJSON encodes a DAG as JSON and writes it to w. -// The output includes all nodes (with metadata and kind) and edges. -// This format can be re-imported with [ReadJSON] for round-trip processing. +// +// The output is a JSON object with "nodes" and "edges" arrays, formatted with +// 2-space indentation. All nodes are written in their original order with: +// - id: always present +// - row: included only if non-zero +// - kind: included only for non-default kinds (subdivider, auxiliary) +// - meta: included if non-empty +// +// Edges are written as {from, to} pairs. +// +// The output can be read back with [ReadJSON] to produce an identical DAG, +// preserving all metadata, node kinds, and assigned row numbers. +// +// WriteJSON returns an error if encoding fails or if writing to w fails. +// It does not validate the DAG structure; malformed graphs will be encoded +// as-is and may fail validation on import. +// +// This function is safe to call concurrently with other readers of g, +// but not with concurrent writes to g. func WriteJSON(g *dag.DAG, w io.Writer) error { + nodes := g.Nodes() + // Sort nodes by ID for deterministic output + slices.SortFunc(nodes, func(a, b *dag.Node) int { + if a.ID < b.ID { + return -1 + } + if a.ID > b.ID { + return 1 + } + return 0 + }) + out := graph{ - Nodes: make([]node, len(g.Nodes())), + Nodes: make([]node, len(nodes)), Edges: make([]edge, len(g.Edges())), } - for i, n := range g.Nodes() { + for i, n := range nodes { nd := node{ID: n.ID, Meta: n.Meta} if n.Row != 0 { row := n.Row @@ -64,7 +94,17 @@ func WriteJSON(g *dag.DAG, w io.Writer) error { } // ExportJSON writes a DAG to a JSON file at path. -// This is a convenience wrapper around [WriteJSON] for file-based output. +// +// ExportJSON creates (or truncates) the file at path and writes the JSON +// representation of g using [WriteJSON]. The file is created with 0644 +// permissions. +// +// If the file cannot be created, or if writing fails, ExportJSON returns +// an error describing the failure. The error wraps the underlying cause +// with the file path for context. +// +// This function is safe to call concurrently with other readers of g, +// but not with concurrent writes to g. func ExportJSON(g *dag.DAG, path string) error { f, err := os.Create(path) if err != nil { diff --git a/pkg/io/import.go b/pkg/io/import.go index 9ef152b..0352a32 100644 --- a/pkg/io/import.go +++ b/pkg/io/import.go @@ -15,8 +15,32 @@ var kindFromString = map[string]dag.NodeKind{ } // ReadJSON decodes a JSON graph from r into a DAG. -// The JSON must have "nodes" and "edges" arrays. Node "kind" and "row" -// fields are optional; missing values use defaults. +// +// The input must be a JSON object with "nodes" and "edges" arrays: +// +// { +// "nodes": [{"id": "a"}, {"id": "b"}], +// "edges": [{"from": "a", "to": "b"}] +// } +// +// Each node must have an "id" field. Optional fields: +// - row: integer layer assignment (defaults to 0) +// - kind: "subdivider" or "auxiliary" (defaults to normal node) +// - meta: object with arbitrary key-value pairs +// +// Each edge must have "from" and "to" fields that reference node IDs. +// +// ReadJSON returns an error if: +// - The JSON is malformed or invalid +// - A node has a duplicate ID +// - An edge references an unknown node ID +// - Adding a node or edge violates DAG constraints (e.g., creates a cycle) +// +// Errors are wrapped with context describing which node or edge caused +// the problem. Use errors.Is or errors.As to check for specific DAG errors. +// +// The returned DAG is independent of r and can be modified safely after +// ReadJSON returns. ReadJSON does not close r. func ReadJSON(r io.Reader) (*dag.DAG, error) { var data graph if err := json.NewDecoder(r).Decode(&data); err != nil { @@ -46,7 +70,14 @@ func ReadJSON(r io.Reader) (*dag.DAG, error) { } // ImportJSON reads a JSON file at path and returns the decoded DAG. -// This is a convenience wrapper around [ReadJSON] for file-based input. +// +// ImportJSON opens the file, decodes it using [ReadJSON], and closes the +// file. If the file cannot be opened, or if decoding fails, ImportJSON +// returns an error describing the failure. The error wraps the underlying +// cause with the file path for context. +// +// ImportJSON returns the same validation errors as [ReadJSON] for malformed +// graphs or DAG constraint violations. func ImportJSON(path string) (*dag.DAG, error) { f, err := os.Open(path) if err != nil { diff --git a/pkg/render/tower/doc.go b/pkg/render/tower/doc.go new file mode 100644 index 0000000..ba63d16 --- /dev/null +++ b/pkg/render/tower/doc.go @@ -0,0 +1,46 @@ +// Package tower provides the physical tower visualization engine. +// +// # Overview +// +// Stacktower's primary visualization is a "tower" of blocks, where each block +// represents a package and rests on the blocks it depends on. This package +// implements the multi-stage pipeline required to transform a DAG into a +// 2D tower layout: +// +// 1. Ordering ([ordering]): Determine horizontal sequence of blocks in each row to minimize crossings. +// 2. Layout ([layout]): Compute (x, y) coordinates and dimensions (w, h) for every block. +// 3. Styles ([styles]): Define the visual appearance (simple, hand-drawn, colors, text). +// 4. Sink ([sink]): Export the final layout to various formats (SVG, JSON, PNG, PDF). +// +// # Rendering Pipeline +// +// The rendering process typically follows these steps: +// +// g := dag.New(...) +// // ... populate graph ... +// +// // 1. Transform the graph into a row-based structure +// transform.Normalize(g) +// +// // 2. Compute the physical layout +// l := layout.Build(g, width, height, layout.WithOrderer(ordering.Barycentric{})) +// +// // 3. Render to a specific format +// svg := sink.RenderSVG(l, sink.WithStyle(styles.NewSimple())) +// +// # Subpackages +// +// - [layout]: The core layout engine that positions blocks based on row orderings. +// - [ordering]: Algorithms for determining the best horizontal arrangement of blocks. +// - [sink]: Final output generators for different file formats. +// - [styles]: Visual themes and drawing primitives. +// - [transform]: Graph transformations specific to tower visualizations (e.g., merging subdividers). +// - [feature]: High-level visualization features like Nebraska ranking and brittle detection. +// +// [layout]: github.com/matzehuels/stacktower/pkg/render/tower/layout +// [ordering]: github.com/matzehuels/stacktower/pkg/render/tower/ordering +// [sink]: github.com/matzehuels/stacktower/pkg/render/tower/sink +// [styles]: github.com/matzehuels/stacktower/pkg/render/tower/styles +// [transform]: github.com/matzehuels/stacktower/pkg/render/tower/transform +// [feature]: github.com/matzehuels/stacktower/pkg/render/tower/feature +package tower diff --git a/pkg/render/tower/feature/brittle.go b/pkg/render/tower/feature/brittle.go index d7a5254..af7826f 100644 --- a/pkg/render/tower/feature/brittle.go +++ b/pkg/render/tower/feature/brittle.go @@ -13,6 +13,9 @@ const ( minMaintainerCount = 2 ) +// IsBrittle returns true if a node represents a package that is potentially +// unmaintained or risky to depend on. It checks for archived repositories, +// long periods of inactivity, and low maintainer counts. func IsBrittle(n *dag.Node) bool { if n == nil || n.Meta == nil { return false diff --git a/pkg/render/tower/feature/nebraska.go b/pkg/render/tower/feature/nebraska.go index 7065859..5a59bd6 100644 --- a/pkg/render/tower/feature/nebraska.go +++ b/pkg/render/tower/feature/nebraska.go @@ -34,6 +34,10 @@ const ( maintainerWeight = 1.0 ) +// RankNebraska identifies the most influential maintainers in the dependency +// graph using the Nebraska ranking algorithm. Maintainers are scored based +// on the "depth" of their packages in the tower (i.e., how many things +// depend on them). func RankNebraska(g *dag.DAG, topN int) []NebraskaRanking { scores := make(map[string]float64) packages := make(map[string][]PackageRole) diff --git a/pkg/render/tower/layout/block.go b/pkg/render/tower/layout/block.go index ba18b1c..7032de5 100644 --- a/pkg/render/tower/layout/block.go +++ b/pkg/render/tower/layout/block.go @@ -1,12 +1,21 @@ package layout +// Block represents a single rectangular element in the tower layout. +// All coordinates are in user units (typically pixels in SVG). type Block struct { NodeID string Left, Right float64 Bottom, Top float64 } -func (b Block) Width() float64 { return b.Right - b.Left } -func (b Block) Height() float64 { return b.Top - b.Bottom } +// Width returns the horizontal span of the block. +func (b Block) Width() float64 { return b.Right - b.Left } + +// Height returns the vertical span of the block. +func (b Block) Height() float64 { return b.Top - b.Bottom } + +// CenterX returns the horizontal center point of the block. func (b Block) CenterX() float64 { return (b.Left + b.Right) / 2 } + +// CenterY returns the vertical center point of the block. func (b Block) CenterY() float64 { return (b.Bottom + b.Top) / 2 } diff --git a/pkg/render/tower/layout/layout.go b/pkg/render/tower/layout/layout.go index 4303f11..14168f9 100644 --- a/pkg/render/tower/layout/layout.go +++ b/pkg/render/tower/layout/layout.go @@ -12,6 +12,8 @@ const ( defaultMarginRatio = 0.05 ) +// Layout represents the computed physical positions and dimensions of all +// blocks in a tower visualization. type Layout struct { FrameWidth float64 FrameHeight float64 @@ -21,6 +23,7 @@ type Layout struct { MarginY float64 } +// Option configures the layout generation process. type Option func(*config) type config struct { @@ -30,22 +33,34 @@ type config struct { topDownFlow bool } +// WithOrderer sets the algorithm used to determine the horizontal ordering +// of blocks in each row. Defaults to [ordering.Barycentric]. func WithOrderer(o ordering.Orderer) Option { return func(c *config) { c.orderer = o } } +// WithAuxiliaryRatio sets the height of auxiliary rows (separator beams) +// relative to regular rows. Defaults to 0.2. func WithAuxiliaryRatio(r float64) Option { return func(c *config) { c.auxRatio = r } } +// WithMarginRatio sets the outer margin of the tower relative to the total +// frame size. Defaults to 0.05. func WithMarginRatio(r float64) Option { return func(c *config) { c.marginRatio = r } } +// WithTopDownWidths configures width computation to flow from parents to +// children (top-down). The default is bottom-up, where blocks are sized +// to support what is above them. func WithTopDownWidths() Option { return func(c *config) { c.topDownFlow = true } } +// Build computes a physical layout for the given DAG within the specified +// width and height constraints. It applies row ordering, width computation, +// and coordinate assignment. func Build(g *dag.DAG, width, height float64, opts ...Option) Layout { cfg := config{ orderer: ordering.Barycentric{}, diff --git a/pkg/render/tower/layout/width.go b/pkg/render/tower/layout/width.go index cdf4fc2..d8ca772 100644 --- a/pkg/render/tower/layout/width.go +++ b/pkg/render/tower/layout/width.go @@ -8,6 +8,9 @@ import ( const eps = 1e-9 +// ComputeWidths assigns horizontal widths to nodes by distributing the +// frame width among top-level nodes and propagating that width down to +// children. This results in "top-heavy" towers where the root nodes are wide. func ComputeWidths(g *dag.DAG, orders map[int][]string, frameWidth float64) map[string]float64 { rows := g.RowIDs() if len(rows) == 0 { @@ -59,6 +62,10 @@ func ComputeWidths(g *dag.DAG, orders map[int][]string, frameWidth float64) map[ return widths } +// ComputeWidthsBottomUp assigns horizontal widths to nodes by distributing +// the frame width among bottom-level nodes and propagating that width up +// to parents. This results in "bottom-heavy" towers where the leaf nodes +// provide a wide base. func ComputeWidthsBottomUp(g *dag.DAG, orders map[int][]string, frameWidth float64) map[string]float64 { rows := g.RowIDs() if len(rows) == 0 { diff --git a/pkg/render/tower/ordering/barycentric.go b/pkg/render/tower/ordering/barycentric.go index 15e6624..ff4dc76 100644 --- a/pkg/render/tower/ordering/barycentric.go +++ b/pkg/render/tower/ordering/barycentric.go @@ -9,10 +9,14 @@ import ( const defaultPasses = 24 +// Barycentric implements a fast heuristic for edge crossing minimization +// based on the Sugiyama framework. It iteratively reorders rows by the +// average position (barycenter) of their neighbors in adjacent rows. type Barycentric struct { Passes int } +// OrderRows implements the [Orderer] interface using the barycentric heuristic. func (b Barycentric) OrderRows(g *dag.DAG) map[int][]string { rows := g.RowIDs() if len(rows) == 0 { diff --git a/pkg/render/tower/ordering/optimal.go b/pkg/render/tower/ordering/optimal.go index e2accd2..738ef50 100644 --- a/pkg/render/tower/ordering/optimal.go +++ b/pkg/render/tower/ordering/optimal.go @@ -15,24 +15,31 @@ import ( const maxCandidatesBase = 10000 +// OptimalSearch implements a branch-and-bound search algorithm to find the +// mathematically optimal horizontal ordering (minimum crossings). It uses +// PQ-trees to prune the search space to only include orderings that satisfy +// structural constraints (Consecutive Ones Property). type OptimalSearch struct { Progress func(explored, pruned, best int) Timeout time.Duration Debug func(info DebugInfo) } +// DebugInfo contains diagnostic information about the optimal search process. type DebugInfo struct { Rows []RowDebugInfo MaxDepth int TotalRows int } +// RowDebugInfo contains diagnostic information for a single row during search. type RowDebugInfo struct { Row int NodeCount int Candidates int } +// OrderRows implements the [Orderer] interface by performing an optimal search. func (o OptimalSearch) OrderRows(g *dag.DAG) map[int][]string { rows := g.RowIDs() if len(rows) == 0 { diff --git a/pkg/render/tower/ordering/ordering.go b/pkg/render/tower/ordering/ordering.go index 6193df2..d56dd39 100644 --- a/pkg/render/tower/ordering/ordering.go +++ b/pkg/render/tower/ordering/ordering.go @@ -7,15 +7,21 @@ import ( "github.com/matzehuels/stacktower/pkg/dag" ) +// Orderer is an interface for horizontal row ordering algorithms. +// An orderer determines the horizontal sequence of nodes in each row +// to minimize edge crossings. type Orderer interface { OrderRows(g *dag.DAG) map[int][]string } +// ContextOrderer is an Orderer that supports cancellation and timeouts +// via a context. type ContextOrderer interface { Orderer OrderRowsContext(ctx context.Context, g *dag.DAG) map[int][]string } +// Quality represents the desired trade-off between ordering speed and quality. type Quality int const ( diff --git a/pkg/render/tower/sink/json.go b/pkg/render/tower/sink/json.go index b6eb6c0..1103550 100644 --- a/pkg/render/tower/sink/json.go +++ b/pkg/render/tower/sink/json.go @@ -8,6 +8,7 @@ import ( "github.com/matzehuels/stacktower/pkg/render/tower/layout" ) +// JSONOption configures JSON rendering via [RenderJSON]. type JSONOption func(*jsonRenderer) type jsonRenderer struct { @@ -19,12 +20,26 @@ type jsonRenderer struct { nebraska []feature.NebraskaRanking } +// WithJSONGraph attaches the DAG for metadata enrichment (URLs, brittle flags, +// auxiliary/synthetic flags). Without this, blocks will have minimal metadata. func WithJSONGraph(g *dag.DAG) JSONOption { return func(r *jsonRenderer) { r.graph = g } } -func WithJSONMerged() JSONOption { return func(r *jsonRenderer) { r.merged = true } } + +// WithJSONMerged marks that the layout uses merged subdividers. This ensures the +// JSON correctly represents subdivider relationships. +func WithJSONMerged() JSONOption { return func(r *jsonRenderer) { r.merged = true } } + +// WithJSONRandomize records the randomization seed in the JSON output, enabling +// reproducible re-rendering with the same visual jitter. func WithJSONRandomize(seed uint64) JSONOption { return func(r *jsonRenderer) { r.randomize = true; r.seed = seed } } + +// WithJSONStyle records the style name (e.g., "simple", "handdrawn") in the JSON output +// for documentation or round-trip rendering. func WithJSONStyle(s string) JSONOption { return func(r *jsonRenderer) { r.style = s } } + +// WithJSONNebraska includes Nebraska ranking data in the JSON output. Rankings should +// come from [feature.RankNebraska]. func WithJSONNebraska(rankings []feature.NebraskaRanking) JSONOption { return func(r *jsonRenderer) { r.nebraska = rankings } } @@ -79,10 +94,27 @@ type jsonNebraska struct { type jsonNebPackage struct { Package string `json:"package"` - Role string `json:"role"` + Role string `json:"role"` // "owner", "lead", or "maintainer" URL string `json:"url,omitempty"` } +// RenderJSON exports the layout and associated metadata as a pretty-printed JSON document. +// This is the primary data interchange format for Stacktower, enabling: +// +// - Integration with external visualization tools +// - Caching computed layouts for fast re-rendering +// - Round-trip rendering (re-import and render identically) +// +// The JSON includes: +// - Block positions and dimensions +// - Row orderings (for reconstructing the layout) +// - Metadata (URLs, stars, dates, auxiliary/synthetic flags) +// - Optional Nebraska rankings +// - Render options (style, seed, merged flag) for reproducibility +// +// RenderJSON returns an error only if JSON marshaling fails (should not happen +// with well-formed layouts). It does not modify l or the DAG, and is safe to call +// concurrently. func RenderJSON(l layout.Layout, opts ...JSONOption) ([]byte, error) { r := jsonRenderer{} for _, opt := range opts { diff --git a/pkg/render/tower/styles/handdrawn/handdrawn.go b/pkg/render/tower/styles/handdrawn/handdrawn.go index 265f246..5aedee6 100644 --- a/pkg/render/tower/styles/handdrawn/handdrawn.go +++ b/pkg/render/tower/styles/handdrawn/handdrawn.go @@ -27,8 +27,12 @@ const ( fontFamily = `'Patrick Hand', 'Comic Sans MS', 'Bradley Hand', 'Segoe Script', sans-serif` ) +// HandDrawn implements a casual, hand-drawn visual style with wobbly +// lines and "Patrick Hand" typography. type HandDrawn struct{ seed uint64 } +// New creates a new HandDrawn style with the given seed forReproducible +// line wobbling. func New(seed uint64) *HandDrawn { return &HandDrawn{seed: seed} } func (h *HandDrawn) RenderDefs(buf *bytes.Buffer) { diff --git a/pkg/render/tower/transform/merge.go b/pkg/render/tower/transform/merge.go index a129efe..0d348fe 100644 --- a/pkg/render/tower/transform/merge.go +++ b/pkg/render/tower/transform/merge.go @@ -21,11 +21,14 @@ func MergeSubdividers(l layout.Layout, g *dag.DAG) layout.Layout { blocks := make(map[string]layout.Block) for master, members := range groupByMaster(g) { - subgroups := groupByPosition(l, members) + subgroups := groupByPosition(l, g, members) for _, group := range subgroups { - b := merge(group, master) + b := merge(group.blocks, master) key := master - if len(subgroups) > 1 { + // Only add @position suffix for subdivider-only groups when there are + // multiple groups. The group containing the master keeps the master's ID + // to match RowOrders. + if len(subgroups) > 1 && !group.containsMaster { key = fmt.Sprintf("%s@%.0f", master, b.Left) } blocks[key] = b @@ -50,20 +53,32 @@ func groupByMaster(g *dag.DAG) map[string][]string { return groups } -func groupByPosition(l layout.Layout, members []string) [][]layout.Block { +type positionGroup struct { + blocks []layout.Block + containsMaster bool +} + +func groupByPosition(l layout.Layout, g *dag.DAG, members []string) []positionGroup { type pos struct{ l, r int } - groups := make(map[pos][]layout.Block) + groups := make(map[pos]*positionGroup) for _, id := range members { if b, ok := l.Blocks[id]; ok { key := pos{int(b.Left + 0.5), int(b.Right + 0.5)} - groups[key] = append(groups[key], b) + if groups[key] == nil { + groups[key] = &positionGroup{} + } + groups[key].blocks = append(groups[key].blocks, b) + // Check if this member is the master node itself (not a subdivider) + if n, ok := g.Node(id); ok && !n.IsSubdivider() { + groups[key].containsMaster = true + } } } - result := make([][]layout.Block, 0, len(groups)) - for _, g := range groups { - result = append(result, g) + result := make([]positionGroup, 0, len(groups)) + for _, grp := range groups { + result = append(result, *grp) } return result } diff --git a/pkg/render/tower/transform/randomize.go b/pkg/render/tower/transform/randomize.go index 09ad310..2621738 100644 --- a/pkg/render/tower/transform/randomize.go +++ b/pkg/render/tower/transform/randomize.go @@ -56,7 +56,7 @@ func Randomize(l layout.Layout, g *dag.DAG, seed uint64, opts *Options) layout.L rng := rand.New(rand.NewPCG(seed, seed^0xdeadbeef)) shrinkCheckerboard(l.RowOrders, blocks, rows, rng, opts) - ensureMinimumOverlap(g, blocks, opts.MinOverlap) + ensureMinimumOverlap(g, blocks, l.RowOrders, opts.MinOverlap) l.Blocks = blocks return l @@ -89,28 +89,79 @@ func sortedRows(orders map[int][]string) []int { return rows } -func ensureMinimumOverlap(g *dag.DAG, blocks map[string]layout.Block, minOverlap float64) { +func ensureMinimumOverlap(g *dag.DAG, blocks map[string]layout.Block, rowOrders map[int][]string, minOverlap float64) { edges := g.Edges() + // Build a map of block ID to row for collision checking + blockRow := make(map[string]int) + for row, ids := range rowOrders { + for _, id := range ids { + blockRow[id] = row + } + } + for range 10 { changed := false for _, edge := range edges { parent, okP := blocks[edge.From] child, okC := blocks[edge.To] - if !okP || !okC || calcOverlap(parent.Left, parent.Right, child.Left, child.Right) >= minOverlap { + if !okP || !okC { + continue + } + + currentOverlap := calcOverlap(parent.Left, parent.Right, child.Left, child.Right) + if currentOverlap >= minOverlap { continue } - changed = true - if (parent.Left+parent.Right)/2 < (child.Left+child.Right)/2 { - parent.Right = max(parent.Right, child.Left+minOverlap) - child.Left = min(child.Left, parent.Right-minOverlap) + // Calculate proposed expansions for both parent and child + newParent, newChild := parent, child + parentCenter := (parent.Left + parent.Right) / 2 + childCenter := (child.Left + child.Right) / 2 + + if parentCenter < childCenter { + // Parent is left of child: parent expands right, child expands left + newParent.Right = max(parent.Right, child.Left+minOverlap) + newChild.Left = min(child.Left, parent.Right-minOverlap) } else { - parent.Left = min(parent.Left, child.Right-minOverlap) - child.Right = max(child.Right, parent.Left+minOverlap) + // Parent is right of child: parent expands left, child expands right + newParent.Left = min(parent.Left, child.Right-minOverlap) + newChild.Right = max(child.Right, parent.Left+minOverlap) } - blocks[edge.From] = parent - blocks[edge.To] = child + + // Check collisions independently and apply what we can + parentCollides := wouldCollide(edge.From, newParent, blockRow, rowOrders, blocks) + childCollides := wouldCollide(edge.To, newChild, blockRow, rowOrders, blocks) + + if !parentCollides && !childCollides { + // Both can expand + blocks[edge.From] = newParent + blocks[edge.To] = newChild + changed = true + } else if !parentCollides { + // Only parent can expand - make parent cover the child + if parentCenter < childCenter { + newParent.Right = child.Right + minOverlap + } else { + newParent.Left = child.Left - minOverlap + } + if !wouldCollide(edge.From, newParent, blockRow, rowOrders, blocks) { + blocks[edge.From] = newParent + changed = true + } + } else if !childCollides { + // Only child can expand - make child reach the parent + if parentCenter < childCenter { + newChild.Left = parent.Left - minOverlap + } else { + newChild.Right = parent.Right + minOverlap + } + if !wouldCollide(edge.To, newChild, blockRow, rowOrders, blocks) { + blocks[edge.To] = newChild + changed = true + } + } + // If both collide, skip this edge } if !changed { break @@ -118,6 +169,30 @@ func ensureMinimumOverlap(g *dag.DAG, blocks map[string]layout.Block, minOverlap } } +// wouldCollide checks if expanding a block to newBounds would collide with +// other blocks in the same row. +func wouldCollide(id string, newBounds layout.Block, blockRow map[string]int, rowOrders map[int][]string, blocks map[string]layout.Block) bool { + row, ok := blockRow[id] + if !ok { + return false + } + + for _, neighborID := range rowOrders[row] { + if neighborID == id { + continue + } + neighbor, ok := blocks[neighborID] + if !ok { + continue + } + // Check for overlap (with small tolerance for floating point) + if newBounds.Right > neighbor.Left+1 && newBounds.Left < neighbor.Right-1 { + return true + } + } + return false +} + func calcOverlap(a1, a2, b1, b2 float64) float64 { return max(0, min(a2, b2)-max(a1, b1)) } diff --git a/scripts/test_e2e.sh b/scripts/test_e2e.sh index 0ca3470..159d2fe 100755 --- a/scripts/test_e2e.sh +++ b/scripts/test_e2e.sh @@ -73,7 +73,7 @@ run_parse_tests() { test_parse javascript yargs test_parse ruby rspec test_parse php symfony/console - test_parse java com.google.guava_guava + test_parse java com.google.guava:guava test_parse go github.com/spf13/cobra echo "" @@ -320,7 +320,10 @@ test_parse() { local depth=${3:-$DEFAULT_MAX_DEPTH} local nodes=${4:-$DEFAULT_MAX_NODES} local refresh=${REFRESH:-true} - local output="$EXAMPLES_DIR/real/${pkg##*/}.json" + # Extract basename and replace colons with underscores (colons not allowed in filenames) + local basename="${pkg##*/}" + basename="${basename//:/_}" + local output="$EXAMPLES_DIR/real/${basename}.json" echo -n " $lang/$pkg... "