diff --git a/.claude/settings.json b/.claude/settings.json
index ebdc6b2..4ea3d06 100644
--- a/.claude/settings.json
+++ b/.claude/settings.json
@@ -6,7 +6,7 @@
"hooks": [
{
"type": "command",
- "command": "printf '\\n๐ฆ Agent Friendly Code โ current release: 0.3.0\\n โข Read AGENTS.md for conventions, CONTRIBUTING.md for the PR workflow.\\n โข Roadmap: 0.4.0 (quick wins โ history-aware signals + PR score-diff + Claude Code skill) โ 0.5.0 (auto-refresh + smarter matching) โ 0.6.0 (maintainer ownership + at-scale discovery) โ 1.0.0 (production cut โ Postgres + at-scale indexing + benchmark harness).\\n โข Changelog rule: user-facing capabilities only. Codebase hygiene (CI / linter / tests / CONTRIBUTING) does NOT go in lib/changelog.ts.\\n'"
+ "command": "printf '\\n๐ฆ Agent Friendly Code โ current release: 0.4.0\\n โข Read AGENTS.md for conventions, CONTRIBUTING.md for the PR workflow.\\n โข Roadmap: 0.5.0 (quick wins โ history-aware signals + PR score-diff + Claude Code skill) โ 0.6.0 (auto-refresh + smarter matching โ webhook rescoring + alternatives v2) โ 0.7.0 (maintainer ownership + at-scale discovery โ OAuth opt-out + package overlay at scale) โ 1.0.0 (production cut โ Postgres + at-scale indexing + benchmark harness).\\n โข Changelog rule: user-facing capabilities only. Codebase hygiene (CI / linter / tests / CONTRIBUTING) does NOT go in lib/changelog.ts.\\n'"
}
]
}
diff --git a/.claude/skills/code-review/SKILL.md b/.claude/skills/code-review/SKILL.md
index 81cd9eb..c400ac6 100644
--- a/.claude/skills/code-review/SKILL.md
+++ b/.claude/skills/code-review/SKILL.md
@@ -54,7 +54,7 @@ Only `@phosphor-icons/react`. Block Lucide, Heroicons, React Icons, inline SVG,
## Security
- Parameterised SQL only.
-- `dangerouslySetInnerHTML` is allowed only for the existing server-built JSON-LD scripts (`app/layout.tsx`, `app/page.tsx`, `app/repo/[id]/page.tsx`, `app/package/[registry]/[name]/page.tsx`) with the `<` โ `<` escape preserved. Reject any new use.
+- `dangerouslySetInnerHTML` is allowed only for the existing server-built JSON-LD scripts (`app/layout.tsx`, `app/page.tsx`, `app/methodology/page.tsx`, `app/repo/[id]/page.tsx`, `app/package/[registry]/[name]/page.tsx`) with the `<` โ `<` escape preserved. Reject any new use.
- External links include `rel="noopener noreferrer"`.
- Never execute code from a cloned repo.
diff --git a/.claude/skills/quality-check/SKILL.md b/.claude/skills/quality-check/SKILL.md
index 2b795c9..b4999ff 100644
--- a/.claude/skills/quality-check/SKILL.md
+++ b/.claude/skills/quality-check/SKILL.md
@@ -39,7 +39,7 @@ Run the four checks below on any diff affecting UI or I/O. Report findings group
## Security
- **SQL parameterisation**: every query uses `?` placeholders. No string concatenation.
-- **`dangerouslySetInnerHTML`** is allowed only for server-built JSON-LD (`app/layout.tsx`, `app/page.tsx`, `app/repo/[id]/page.tsx`, `app/package/[registry]/[name]/page.tsx`) and must keep the `<` โ `<` escape. Any other use must be rejected.
+- **`dangerouslySetInnerHTML`** is allowed only for server-built JSON-LD (`app/layout.tsx`, `app/page.tsx`, `app/methodology/page.tsx`, `app/repo/[id]/page.tsx`, `app/package/[registry]/[name]/page.tsx`) and must keep the `<` โ `<` escape. Any other use must be rejected.
- **External URLs** in `` always include `rel="noopener noreferrer"`.
- **User input at every boundary** is validated: `parseRepoUrl` for repo URLs, `Number.isFinite` for numeric params, length caps on search strings.
- **Clone safety**: `git clone --depth 1 --single-branch`; never execute code from a clone (no `bun install`, no `npm install`, no post-clone scripts).
diff --git a/AGENTS.md b/AGENTS.md
index b80f45d..9abfb4c 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -43,14 +43,17 @@ app/
page.tsx # leaderboard
repo/[id]/page.tsx # repo detail with per-model suggestions (includes generateMetadata)
methodology/page.tsx # how the static scoring works
+ about/page.tsx # who built this and why (footer-linked, E-E-A-T)
roadmap/page.tsx # upcoming versions (from lib/roadmap.ts)
changelog/page.tsx # what's in this build (from lib/changelog.ts)
- robots.ts # /robots.txt โ allows "/", disallows "/api/"
- sitemap.ts # /sitemap.xml โ static routes + every repo detail page
+ robots.ts # /robots.txt โ wildcard + explicit AI-crawler allows
+ sitemap.ts # /sitemap.xml โ static routes + every repo detail page (priority scaled by score)
+ llms.txt/route.ts # /llms.txt โ markdown manifest for LLM crawlers (Perplexity, Claude, ChatGPT search)
api/repos/route.ts
api/repo/[id]/route.ts
api/badge/[host]/[owner]/[name]/route.ts # SVG badge for README embeds (?model= for per-model)
api/package/[registry]/[name]/route.ts # npm/PyPI/Cargo lookup โ source-repo score
+ repo/[id]/opengraph-image.tsx # next/og convention โ per-repo OG image (auto-wired)
package/page.tsx # explainer + try-it examples
package/[registry]/[name]/page.tsx # scored | not_scored | unresolved states
globals.css # Tailwind import + @theme tokens (no custom utilities)
@@ -81,7 +84,7 @@ lib/
changelog.ts # typed ChangelogEntry[]
roadmap.ts # typed RoadmapVersion[]
scripts/
- init-db.ts, score.ts, seed.ts, seed-list.ts
+ init-db.ts, score.ts, seed.ts, seed-list.ts, seed-packages.ts (auto-runs after seed.ts)
tests/
_helpers.ts # makeFixture / removeFixture build synthetic trees under os.tmpdir()
format.test.ts # compactStars, relativeTime, hostLabel
@@ -93,9 +96,10 @@ tasks/
0.1.0/ # released โ shipped record
0.2.0/ # released โ dogfood complete (tests, self-score, row-click)
0.3.0/ # released โ embeddable scores + broader coverage (badge, more agents, alternatives, package lookup)
- 0.4.0/ # planned โ quick wins (history-aware signals + PR score-diff action + Claude Code skill)
- 0.5.0/ # planned โ auto-refresh + smarter matching (webhook rescoring + alternatives v2)
- 0.6.0/ # planned โ maintainer ownership + at-scale discovery (OAuth opt-out + package overlay at scale)
+ 0.4.0/ # released โ credible scores + discoverability (docs-cited rationales + agent-specific signals + About/llms.txt/OG)
+ 0.5.0/ # planned โ quick wins (history-aware signals + PR score-diff action + Claude Code skill)
+ 0.6.0/ # planned โ auto-refresh + smarter matching (webhook rescoring + alternatives v2)
+ 0.7.0/ # planned โ maintainer ownership + at-scale discovery (OAuth opt-out + package overlay at scale)
1.0.0/ # planned โ production cut (Postgres + at-scale indexing + benchmark harness)
.claude/
settings.json # SessionStart + Stop hooks (Stop โ hooks/stop-guard.sh)
@@ -176,9 +180,9 @@ Hooks docs: .
- We `git clone --depth 1 --single-branch` arbitrary URLs โ safe by default. We never run post-clone scripts, never `npm install`, never execute code from the clone.
- SQL: all queries parameterised. No interpolation.
-- HTML: React auto-escapes. The only `dangerouslySetInnerHTML` is server-built JSON-LD with `<` escaped to `<` (`app/layout.tsx`, `app/page.tsx`, `app/repo/[id]/page.tsx`, `app/package/[registry]/[name]/page.tsx`); never feed user-controlled strings into it.
+- HTML: React auto-escapes. The only `dangerouslySetInnerHTML` is server-built JSON-LD with `<` escaped to `<` (`app/layout.tsx`, `app/page.tsx`, `app/methodology/page.tsx`, `app/repo/[id]/page.tsx`, `app/package/[registry]/[name]/page.tsx`); never feed user-controlled strings into it.
- Local-path mode reads files; never writes outside `data/` and the clone workspace passed to `shallowClone`.
-- No auth yet (read-only dashboard). When auth lands (`tasks/0.6.0/01-opt-out-claim-flow.md`), do it via OAuth and gate DB writes per user.
+- No auth yet (read-only dashboard). When auth lands (`tasks/0.7.0/01-opt-out-claim-flow.md`), do it via OAuth and gate DB writes per user.
**Operational concerns** (not code-level security) worth flagging before public launch:
@@ -188,7 +192,7 @@ Hooks docs: .
## Things to leave alone
-- Per-model weights are illustrative. Don't tune without `tasks/1.0.0/03-benchmark-harness.md`.
+- Per-model rationales are derived from each agent's published documentation (see `MODELS[].sources` in `lib/scoring/weights.ts`); the weights themselves are still pre-benchmark. Do not tune individual values without re-running the docs audit (see `tasks/0.4.0/01-sourced-agent-rationales.md`) or shipping the v1.0.0 benchmark harness.
- SQLite schema is intentionally simple. Flag before restructuring.
- The I/O boundary. Scoring stays pure; DB stays in `lib/db.ts`.
- `APP_VERSION` โ don't bump without a release.
diff --git a/README.md b/README.md
index c10fa5f..aa78440 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# Agent Friendly Code
-[](./lib/changelog.ts)
+[](./lib/changelog.ts)
[](./LICENSE)
[](https://nextjs.org)
[](https://nodejs.org)
@@ -9,7 +9,7 @@
**A public dashboard that ranks open-source repos by how friendly they are for AI coding agents โ per model.**
-Next.js 16 + SQLite (`better-sqlite3`), styled with Tailwind CSS 4. Spans GitHub, GitLab, and Bitbucket out of the box. Current release: **0.3.0**.
+Next.js 16 + SQLite (`better-sqlite3`), styled with Tailwind CSS 4. Spans GitHub, GitLab, and Bitbucket out of the box. Current release: **0.4.0**.

@@ -60,9 +60,9 @@ Two audiences:
Not pretending the idea is free of risk:
-- **Per-model scoring is the hardest part and the easiest to fake.** Today the weights are illustrative. Real "Claude ranks this higher than GPT-5" requires actually running each agent on each repo. That's `tasks/1.0.0/03-benchmark-harness.md`.
+- **Per-model scoring is the hardest part and the easiest to fake.** Per-model rationales are now sourced from each agent's published docs (see `MODELS[].sources` in `lib/scoring/weights.ts`), but the weight values themselves are still pre-benchmark. Real "Claude ranks this higher than GPT-5" requires actually running each agent on each repo. That's `tasks/1.0.0/03-benchmark-harness.md`.
- **Factory.ai is already in this space.** Differentiation has to stay sharp.
-- **Public-shaming risk.** Ranking #47,823 without consent invites angry maintainers. Planned via `tasks/0.6.0/01-opt-out-claim-flow.md`.
+- **Public-shaming risk.** Ranking #47,823 without consent invites angry maintainers. Planned via `tasks/0.7.0/01-opt-out-claim-flow.md`.
- **Score gaming.** Once public, people add boilerplate `AGENTS.md` to pass the rubric without being useful. Dynamic (actually-run-an-agent) checks are the counter โ see benchmark harness.
- **Freshness.** Scores decay with every push. Webhook-driven rescoring is roadmap.
@@ -84,14 +84,14 @@ Short answer: **low risk**. The app:
- Rate limiting the public API.
- Sandbox the cloner in a container (future-proofing against hypothetical git CVEs).
-Auth and per-maintainer controls land with the opt-out / claim flow in v0.6.0.
+Auth and per-maintainer controls land with the opt-out / claim flow in v0.7.0.
## Quickstart
```bash
bun install
bun run prepare-hooks # once โ installs lefthook pre-commit (Biome + tsc + test + file-length)
-bun run seed # score the curated set across GH / GL / BB
+bun run seed # score the curated set across GH / GL / BB + cache popular package aliases
bun run dev # http://localhost:3000
```
@@ -110,7 +110,7 @@ Run the unit tests with `bun run test` (uses `node --test` + `tsx`; requires Nod
## Versioning
-`lib/version.ts` and `package.json` carry the current release number (currently **0.3.0**). Bumps happen only when we actually cut a release โ never when merging intermediate work. The version pill in the header surfaces the number directly; `/changelog` lists what each release shipped.
+`lib/version.ts` and `package.json` carry the current release number (currently **0.4.0**). Bumps happen only when we actually cut a release โ never when merging intermediate work. The version pill in the header surfaces the number directly; `/changelog` lists what each release shipped.
## Stack & rationale
@@ -172,9 +172,9 @@ See `/roadmap` in the running app or the per-version `tasks/` folders for the fu
Versions are sequenced cheap-first so the highest-impact small additions don't get gated on heavy infra:
-- **0.4.0 โ quick wins**: history-aware signals (maintenance recency, commit velocity, contributor activity) + a GitHub Action that comments the score delta on every PR + a Claude Code skill (with public `/api/score` lookup) that recommends a model for the active repo. No new infra.
-- **0.5.0 โ auto-refresh + smarter matching**: webhook-driven rescoring (keep scores fresh on every push) + alternatives via README embeddings (cross-language matches the v0.3.0 SQL heuristic misses).
-- **0.6.0 โ maintainer ownership + at-scale discovery**: OAuth opt-out / claim flow for maintainers + at-scale package overlay (per-registry leaderboards + userscript that renders the badge inline on npmjs.com / PyPI / crates.io).
+- **0.5.0 โ quick wins**: history-aware signals (maintenance recency, commit velocity, contributor activity) + a GitHub Action that comments the score delta on every PR + a Claude Code skill (with public `/api/score` lookup) that recommends a model for the active repo. No new infra.
+- **0.6.0 โ auto-refresh + smarter matching**: webhook-driven rescoring (keep scores fresh on every push) + alternatives via README embeddings (cross-language matches the v0.3.0 SQL heuristic misses).
+- **0.7.0 โ maintainer ownership + at-scale discovery**: OAuth opt-out / claim flow for maintainers + at-scale package overlay (per-registry leaderboards + userscript that renders the badge inline on npmjs.com / PyPI / crates.io).
- **1.0.0 โ production cut**: Postgres migration for concurrent writers + auto-discovered crawl (target 10k repos) + benchmark harness that derives per-model weights from measured agent success. From here on, breaking API changes require a MAJOR bump.
## Defensibility
diff --git a/app/about/page.tsx b/app/about/page.tsx
new file mode 100644
index 0000000..3b3c7d3
--- /dev/null
+++ b/app/about/page.tsx
@@ -0,0 +1,116 @@
+import type { Metadata } from "next";
+import Link from "next/link";
+
+import { Panel, PanelHeading } from "@/components/Panel";
+import { APP_NAME, REPO_URL } from "@/lib/version";
+
+export const metadata: Metadata = {
+ title: "About",
+ alternates: { canonical: "/about" },
+ twitter: { title: `About โ ${APP_NAME}` },
+ openGraph: { title: `About โ ${APP_NAME}`, url: "/about" },
+ description: `Who built ${APP_NAME}, why it exists, and what it isn't. Independent, MIT-licensed, no affiliation with any AI agent vendor.`,
+};
+
+export default function AboutPage() {
+ return (
+ <>
+
+ About
+
+ Who built {APP_NAME}, why it exists, and what it deliberately isn't.
+
+
+
+
+ Who
+
+ Built and maintained by{" "}
+
+ Himanshu Singh
+
+ . Independent project โ no affiliation with Anthropic, OpenAI, Google, Cognition, Anysphere, or any of the
+ agent vendors ranked here.
+
+
+
+
+
+ Why this exists
+
+ The gap between “repo with a README” and “repo that actually helps an AI coding agent ship
+ code” keeps widening, and there's no public way to tell who's doing the work. {APP_NAME}{" "}
+ tries to make that visible โ per model, because the agents aren't interchangeable. Claude Code wants an
+ AGENTS.md and a fast test loop; Cursor wants strong types and a skim-readable README; Devin wants a runnable
+ dev environment with declared deps and tests. The same repository can score very differently across them,
+ and a single overall number would hide that.
+
+
+
+
+
+
+ What it isn't
+
+ This is not a benchmark of agent performance. Today every score is derived from{" "}
+ static signals โ file existence and content-length checks on the
+ cloned tree. No agent is actually run. Per-model rationales are derived from each agent's published
+ documentation (sources are linked on the methodology page), but the weight values themselves are still
+ pre-benchmark โ not yet calibrated against measured agent success. Read the{" "}
+
+ methodology
+ {" "}
+ for the full picture, including the production-cut plan to replace pre-benchmark weights with measured ones.
+
+
+
+
+
+
+ Open source
+
+ MIT-licensed. The signal definitions, weight profiles, scoring code, seed list, and every score in the
+ database are all in the{" "}
+
+ source repository
+
+ . If a repo's score looks wrong, file an issue with a link and the rubric to revisit; if a signal is
+ missing, propose one.
+
+
+
+
+
+
+ Contact
+
+
+ Best signal: open an issue or discussion on{" "}
+
+ GitHub
+
+ .
+
+
+
+ >
+ );
+}
diff --git a/app/layout.tsx b/app/layout.tsx
index b6a36a7..2bff538 100644
--- a/app/layout.tsx
+++ b/app/layout.tsx
@@ -69,6 +69,19 @@ const JSON_LD = {
"query-input": "required name=search_term_string",
},
},
+ {
+ "@type": "WebApplication",
+ "@id": `${APP_URL}/#app`,
+ url: APP_URL,
+ name: APP_NAME,
+ operatingSystem: "Any",
+ isAccessibleForFree: true,
+ description: APP_DESCRIPTION,
+ publisher: { "@id": `${APP_URL}/#org` },
+ applicationCategory: "DeveloperApplication",
+ offers: { "@type": "Offer", price: "0", priceCurrency: "USD" },
+ browserRequirements: "Requires JavaScript-enabled modern browser",
+ },
],
};
@@ -88,10 +101,11 @@ const NAV_LINKS = [
const FOOTER_LINKS = [
{ href: "/", label: "Home" },
- { href: "/methodology", label: "Methodology" },
- { href: "/roadmap", label: "Roadmap" },
+ { href: "/about", label: "About" },
{ href: "/changelog", label: "Changelog" },
+ { href: "/methodology", label: "Methodology" },
{ href: "/package", label: "Packages" },
+ { href: "/roadmap", label: "Roadmap" },
];
export default function RootLayout({ children }: { children: React.ReactNode }) {
@@ -139,15 +153,19 @@ export default function RootLayout({ children }: { children: React.ReactNode })
{children}
-
+
{FOOTER_LINKS.map((l) => (
-
+
{l.label}
))}
- Signals are static heuristics โ no agent is actually run. Per-model weights are illustrative, not yet
- empirically derived.
+ Signals are static heuristics โ no agent is actually run. Per-model rationales are docs-cited; the weight
+ values themselves are still pre-benchmark.
diff --git a/app/llms.txt/route.ts b/app/llms.txt/route.ts
new file mode 100644
index 0000000..cd3ca18
--- /dev/null
+++ b/app/llms.txt/route.ts
@@ -0,0 +1,44 @@
+import { MODELS } from "@/lib/scoring/weights";
+import { APP_DESCRIPTION, APP_NAME, APP_URL, REPO_URL } from "@/lib/version";
+
+export const dynamic = "force-static";
+
+const HEADERS = {
+ "Content-Type": "text/markdown; charset=utf-8",
+ "Cache-Control": "public, max-age=3600, s-maxage=86400",
+};
+
+export function GET(): Response {
+ const body = `# ${APP_NAME}
+
+> ${APP_DESCRIPTION}
+
+The scoring engine evaluates sixteen static signals per repository โ twelve cross-agent (AGENTS.md / CLAUDE.md, CI configuration, test suite, README, linter / formatter, type config, license, contributing guide, reproducible dev environment, pre-commit hooks, dependency manifest, codebase size) plus four agent-specific instruction files (\`.cursor/rules/*.mdc\` for Cursor, \`GEMINI.md\` for Gemini CLI, \`.openhands/setup.sh\` for OpenHands, \`.aider.conf.yml\` for Aider). Each AI coding agent has its own weight profile across those signals, so the same repository can score differently for different agents.
+
+## Models evaluated
+
+${MODELS.map((m) => `- ${m.label} โ ${m.rationale}`).join("\n")}
+
+## Key pages
+
+- [Leaderboard](${APP_URL}/): Per-model leaderboard across GitHub, GitLab, and Bitbucket
+- [Methodology](${APP_URL}/methodology): How scores are computed; signals, weights, and limitations
+- [Roadmap](${APP_URL}/roadmap): Upcoming versions
+- [Changelog](${APP_URL}/changelog): What shipped per release
+- [Sitemap](${APP_URL}/sitemap.xml): Every indexed URL
+
+## Public API
+
+- [\`GET /api/repos\`](${APP_URL}/api/repos): JSON dump of the leaderboard (id, owner, name, host, stars, overall_score, per-model scores)
+- [\`GET /api/repo/{id}\`](${APP_URL}/api/repo/1): Per-repo detail โ signals, model scores, top improvements
+- [\`GET /api/badge/{host}/{owner}/{name}.svg\`](${APP_URL}/api/badge/github/vercel/next.js.svg): Embeddable SVG badge (\`?model=\` for per-model)
+- [\`GET /api/package/{registry}/{name}\`](${APP_URL}/api/package/npm/next): Resolve npm / PyPI / Cargo package โ source-repo score
+
+## Source
+
+- [Repository](${REPO_URL}): MIT-licensed Next.js app
+- [License](https://opensource.org/licenses/MIT): MIT
+`;
+
+ return new Response(body, { headers: HEADERS });
+}
diff --git a/app/methodology/page.tsx b/app/methodology/page.tsx
index d786112..aabed66 100644
--- a/app/methodology/page.tsx
+++ b/app/methodology/page.tsx
@@ -14,9 +14,61 @@ export const metadata: Metadata = {
"How scores are computed today: the signals checked, the per-model weight profiles, the scoring formula, and what the static-heuristic approach deliberately doesn't measure yet.",
};
+const FAQ = [
+ {
+ q: "How is the agent-friendliness score computed?",
+ a: "Each repository is shallow-cloned and evaluated against sixteen static signals โ twelve cross-agent (AGENTS.md / CLAUDE.md, CI, tests, README, linter, type config, license, contributing guide, reproducible dev environment, pre-commit hooks, dependency manifest, codebase size) plus four agent-specific instruction files (`.cursor/rules/*.mdc`, `GEMINI.md`, `.openhands/setup.sh`, `.aider.conf.yml`). Per-model score = ฮฃ(signal.pass ร model.weight[signal]) / ฮฃ(model.weight) ร 100. Overall score = mean of per-model scores.",
+ },
+ {
+ q: "Why score per model instead of giving one overall number?",
+ a: "Different agents lean on different repository properties โ and we know which because each vendor documents it. Claude Code loads CLAUDE.md at the start of every conversation, so AGENTS.md and tests carry the most weight. GPT-5 Codex reads AGENTS.md before doing any work, so AGENTS.md is the strongest single signal for it. Devin runs in a sandboxed VM and needs an explicit dev-env setup (deps, secrets, lint/test commands), so dev-environment beats CI. Cursor cites `.cursor/rules/` and AGENTS.md as its canonical instruction surface. The same repository can score very differently across models, and a single overall number would hide that.",
+ },
+ {
+ q: "Which AI coding agents are evaluated?",
+ a: "Claude Code, Cursor, Devin, GPT-5 Codex, Gemini CLI, Aider, OpenHands, and Pi. Each has its own weight profile encoded in lib/scoring/weights.ts.",
+ },
+ {
+ q: "Is this a benchmark of agent performance?",
+ a: "No. Today every score is derived from static signals โ file existence and content-length checks on the cloned tree. No agent is actually run. Per-model rationales are now derived from each agent's published documentation (see the Sources panel below for the URLs), but the weights themselves are still pre-benchmark โ they're not yet calibrated against measured agent success. Treat the numbers as a directional signal, not a verdict.",
+ },
+ {
+ q: "How can I improve my repository's score?",
+ a: "Add an AGENTS.md or CLAUDE.md file describing the project for agents, configure CI, ensure tests run, write a substantive README, add a linter and type config, include a license and CONTRIBUTING guide, and provide a reproducible dev environment (devcontainer or Dockerfile). The repo detail page lists the highest-impact gaps for each model.",
+ },
+ {
+ q: "What is AGENTS.md or CLAUDE.md?",
+ a: "A markdown file at the root of a repository that gives an AI coding agent a quick orientation: what the project is, how to build and test it, key conventions, and where to look. It is the highest-weighted signal for Pi, tied with the test suite as the top weight for Claude Code, and meaningfully helps every other agent.",
+ },
+ {
+ q: "How often is the data refreshed?",
+ a: "Manually for now โ repositories are re-scored when the seed list changes or the rubric is updated. Automated periodic refresh is planned for v0.6.0.",
+ },
+ {
+ q: "Which forges are supported?",
+ a: "GitHub, GitLab, and Bitbucket. Cross-forge support is built into the cloning and scoring pipeline so the leaderboard can compare repositories regardless of host.",
+ },
+];
+
+const FAQ_JSON_LD = {
+ "@context": "https://schema.org",
+ "@type": "FAQPage",
+ mainEntity: FAQ.map((entry) => ({
+ "@type": "Question",
+ name: entry.q,
+ acceptedAnswer: { "@type": "Answer", text: entry.a },
+ })),
+};
+
export default function MethodologyPage() {
return (
<>
+
Methodology
@@ -27,19 +79,20 @@ export default function MethodologyPage() {
- Status: static parameters
+ Status: documented rationales, pre-benchmark weights
Today every score is derived from static signals โ file existence and
- content-length checks on the cloned tree. No agent is actually run. Per-model weights are{" "}
- illustrative , not yet derived from measured agent success. This is
- enough to produce meaningfully different rankings and to show how the UX of per-model scoring feels, but it
- should not be read as a benchmark.
+ content-length checks on the cloned tree. No agent is actually run. Per-model rationales are{" "}
+ derived from each agent's published documentation โ see the Sources
+ links under every model below. The weight values themselves are still pre-benchmark; they aren't yet
+ calibrated against measured agent success. The combination is enough to produce meaningfully different
+ rankings and to show how the UX of per-model scoring feels, but it should not be read as a benchmark.
- The plan to replace illustrative weights with measured ones is part of the v1.0.0 production cut on the{" "}
+ The plan to replace pre-benchmark weights with measured ones is part of the v1.0.0 production cut on the{" "}
roadmap
{" "}
@@ -104,6 +157,32 @@ improvement = closing a gap unlocks (1 - pass) ร weight / ฮฃweight ร 100
{m.label}
{m.rationale}
+ {m.sources.length > 0 && (
+
+
Sources:
+
+ {m.sources.map((url) => {
+ const parsed = new URL(url);
+ const lastSeg = parsed.pathname.replace(/\/+$/, "").split("/").filter(Boolean).pop() ?? "";
+ const host = parsed.hostname.replace(/^www\./, "");
+ const label = lastSeg ? `${host}/${lastSeg}` : host;
+
+ return (
+
+ {label}
+
+ );
+ })}
+
+ )}
+
Weights
@@ -134,7 +213,7 @@ improvement = closing a gap unlocks (1 - pass) ร weight / ฮฃweight ร 100
--depth 1 --single-branch
which fetches the whole working tree at HEAD of the default branch, but no history. Closing this gap is
- planned as v0.4.0 on the{" "}
+ planned as v0.5.0 on the{" "}
roadmap
diff --git a/app/package/page.tsx b/app/package/page.tsx
index 2fed505..c7518ec 100644
--- a/app/package/page.tsx
+++ b/app/package/page.tsx
@@ -3,6 +3,7 @@ import Link from "next/link";
import { PackageLookupForm } from "@/components/PackageLookupForm";
import { Panel, PanelHeading } from "@/components/Panel";
+import { getTopPackagesByRegistry } from "@/lib/db";
export const metadata: Metadata = {
title: "Packages",
@@ -20,7 +21,14 @@ const EXAMPLES = [
{ registry: "pypi", name: "requests" },
] as const;
+const TOP_REGISTRIES = ["npm", "pypi", "cargo"] as const;
+const TOP_LIMIT_PER_REGISTRY = 6;
+
export default function PackageIndexPage() {
+ const topRows = TOP_REGISTRIES.flatMap((registry) =>
+ getTopPackagesByRegistry(registry, TOP_LIMIT_PER_REGISTRY).map((row) => ({ registry, ...row })),
+ ).sort((a, b) => b.score - a.score);
+
return (
<>
@@ -50,7 +58,7 @@ export default function PackageIndexPage() {
{registry}
/
@@ -73,6 +81,34 @@ export default function PackageIndexPage() {
+
+ {topRows.length > 0 && (
+ <>
+
+
+
+ Top scored packages by registry
+
+
+ {topRows.map((row) => (
+
+
+
+ {row.registry}
+ /
+ {row.name}
+
+ {row.score.toFixed(1)}
+
+
+ ))}
+
+
+ >
+ )}
>
);
}
diff --git a/app/page.tsx b/app/page.tsx
index b128b77..7186a83 100644
--- a/app/page.tsx
+++ b/app/page.tsx
@@ -119,17 +119,56 @@ export default async function Page({ searchParams }: { searchParams: Promise m.id === selected)?.rationale ?? "");
const allOverall = listLeaderboardOverall();
+ const lastModified =
+ stats.lastScoredAt != null ? new Date(stats.lastScoredAt * 1000).toISOString() : new Date().toISOString();
+
const itemListJsonLd = {
- "@type": "ItemList",
"@context": "https://schema.org",
- numberOfItems: allOverall.length,
- name: "Agent-friendliness leaderboard",
- itemListElement: allOverall.map((row, idx) => ({
- "@type": "ListItem",
- position: idx + 1,
- name: `${row.owner}/${row.name}`,
- url: `${APP_URL}/repo/${row.id}`,
- })),
+ "@graph": [
+ {
+ "@type": "ItemList",
+ "@id": `${APP_URL}/#leaderboard`,
+ numberOfItems: allOverall.length,
+ name: "Agent-friendliness leaderboard",
+ itemListElement: allOverall.map((row, idx) => ({
+ "@type": "ListItem",
+ position: idx + 1,
+ name: `${row.owner}/${row.name}`,
+ url: `${APP_URL}/repo/${row.id}`,
+ })),
+ },
+ {
+ "@type": "Dataset",
+ "@id": `${APP_URL}/#dataset`,
+ name: "Agent Friendly Code โ public repository scoring dataset",
+ description:
+ "Per-model agent-friendliness scores for public repositories on GitHub, GitLab, and Bitbucket, evaluated against sixteen static signals โ twelve cross-agent (AGENTS.md, CI, tests, README, linter, type config, license, contributing, dev env, pre-commit, deps manifest, size) plus four agent-specific instruction files (.cursor/rules/, GEMINI.md, .openhands/setup.sh, .aider.conf.yml) โ for Claude Code, Cursor, Devin, GPT-5 Codex, Gemini CLI, Aider, OpenHands, and Pi.",
+ url: APP_URL,
+ isAccessibleForFree: true,
+ dateModified: lastModified,
+ creator: { "@id": `${APP_URL}/#org` },
+ license: "https://opensource.org/licenses/MIT",
+ mainEntity: { "@id": `${APP_URL}/#leaderboard` },
+ variableMeasured: [
+ "License",
+ "Test suite",
+ "Codebase size",
+ "README quality",
+ "CI configuration",
+ "Contributing guide",
+ "Type configuration",
+ "Dependency manifest",
+ "Pre-commit / git hooks",
+ "Linter / formatter config",
+ "Reproducible dev environment",
+ "AGENTS.md / CLAUDE.md presence",
+ "Aider config (.aider.conf.yml)",
+ "Cursor rules (.cursor/rules/*.mdc)",
+ "Gemini CLI instructions (GEMINI.md)",
+ "OpenHands setup script (.openhands/setup.sh)",
+ ],
+ },
+ ],
};
return (
diff --git a/app/repo/[id]/opengraph-image.tsx b/app/repo/[id]/opengraph-image.tsx
new file mode 100644
index 0000000..777ffd1
--- /dev/null
+++ b/app/repo/[id]/opengraph-image.tsx
@@ -0,0 +1,109 @@
+import { ImageResponse } from "next/og";
+
+import { getRepo } from "@/lib/db";
+import { hostLabel } from "@/lib/utils/format";
+import { APP_NAME } from "@/lib/version";
+
+export const contentType = "image/png";
+export const alt = "Agent-friendliness score";
+export const size = { width: 1200, height: 630 };
+
+export default async function Image({ params }: { params: Promise<{ id: string }> }) {
+ const { id: idStr } = await params;
+ const id = Number(idStr);
+
+ const repo = Number.isFinite(id) ? getRepo(id) : null;
+ const slug = repo ? `${repo.owner}/${repo.name}` : "Unknown repo";
+ const score = repo?.overall_score != null ? repo.overall_score.toFixed(1) : "โ";
+ const host = repo ? hostLabel(repo.host) : "";
+
+ return new ImageResponse(
+
+
+
+
+
+ {slug}
+
+
+ Agent-friendliness score{host ? ` ยท ${host}` : ""}
+ {repo?.language ? ` ยท ${repo.language}` : ""}
+
+
+
+
+
+
+ Claude Code ยท Cursor ยท Devin ยท Codex ยท Gemini ยท Aider ยท OpenHands ยท Pi
+
+
,
+ { ...size },
+ );
+}
diff --git a/app/repo/[id]/page.tsx b/app/repo/[id]/page.tsx
index cd9d9ca..1d4227f 100644
--- a/app/repo/[id]/page.tsx
+++ b/app/repo/[id]/page.tsx
@@ -34,7 +34,7 @@ export async function generateMetadata({ params }: { params: Promise<{ id: strin
const score = repo.overall_score != null ? repo.overall_score.toFixed(1) : "unranked";
const title = `${slug} โ ${score} / 100`;
- const description = `Agent-friendliness score for ${slug} across Claude Code, Cursor, Devin, and GPT-5 Codex โ with the top improvements ranked by score-gain.`;
+ const description = `Agent-friendliness score for ${slug} across Claude Code, Cursor, Devin, GPT-5 Codex, Gemini CLI, Aider, OpenHands, and Pi โ with the top improvements ranked by score-gain.`;
const repoKeywords = [
slug,
@@ -117,7 +117,17 @@ export default async function Page({
codeRepository: repo.url,
url: `${APP_URL}/repo/${id}`,
...(repo.language ? { programmingLanguage: repo.language } : {}),
+ ...(repo.last_scored_at != null ? { dateModified: new Date(repo.last_scored_at * 1000).toISOString() } : {}),
+ keywords: [slug, repo.name, repo.owner, repo.language, "AGENTS.md", "AI coding agent"]
+ .filter(Boolean)
+ .join(", "),
description: `Agent-friendliness score for ${slug} across Claude Code, Cursor, Devin, GPT-5 Codex, Gemini CLI, Aider, OpenHands, and Pi.`,
+ additionalProperty: signals.map((s) => ({
+ "@type": "PropertyValue",
+ name: s.label,
+ value: s.pass,
+ ...(s.detail ? { description: s.detail } : {}),
+ })),
},
],
};
diff --git a/app/robots.ts b/app/robots.ts
index 26d5ce5..c7fc56f 100644
--- a/app/robots.ts
+++ b/app/robots.ts
@@ -1,9 +1,33 @@
import type { MetadataRoute } from "next";
import { APP_URL } from "@/lib/version";
+const AI_CRAWLERS = [
+ "CCBot",
+ "GPTBot",
+ "YouBot",
+ "Diffbot",
+ "Amazonbot",
+ "ClaudeBot",
+ "Cohere-AI",
+ "Bytespider",
+ "Claude-Web",
+ "anthropic-ai",
+ "ChatGPT-User",
+ "DuckAssistBot",
+ "OAI-SearchBot",
+ "PerplexityBot",
+ "Google-Extended",
+ "Perplexity-User",
+ "Applebot-Extended",
+ "Meta-ExternalAgent",
+];
+
export default function robots(): MetadataRoute.Robots {
return {
sitemap: `${APP_URL}/sitemap.xml`,
- rules: [{ userAgent: "*", allow: "/", disallow: "/api/" }],
+ rules: [
+ { userAgent: "*", allow: "/", disallow: "/api/" },
+ { userAgent: AI_CRAWLERS, allow: "/", disallow: "/api/" },
+ ],
};
}
diff --git a/app/sitemap.ts b/app/sitemap.ts
index a848fa6..f2ceb45 100644
--- a/app/sitemap.ts
+++ b/app/sitemap.ts
@@ -10,42 +10,48 @@ export default function sitemap(): MetadataRoute.Sitemap {
const staticRoutes: MetadataRoute.Sitemap = [
{
- url: `${APP_URL}/`,
priority: 1,
+ url: `${APP_URL}/`,
lastModified: lastScored,
changeFrequency: "daily",
},
{
- url: `${APP_URL}/package`,
priority: 0.8,
+ url: `${APP_URL}/package`,
lastModified: lastScored,
changeFrequency: "weekly",
},
{
- url: `${APP_URL}/methodology`,
priority: 0.7,
lastModified: now,
changeFrequency: "monthly",
+ url: `${APP_URL}/methodology`,
+ },
+ {
+ priority: 0.5,
+ lastModified: now,
+ changeFrequency: "monthly",
+ url: `${APP_URL}/about`,
},
{
- url: `${APP_URL}/roadmap`,
priority: 0.6,
lastModified: now,
changeFrequency: "weekly",
+ url: `${APP_URL}/roadmap`,
},
{
- url: `${APP_URL}/changelog`,
priority: 0.6,
lastModified: now,
changeFrequency: "weekly",
+ url: `${APP_URL}/changelog`,
},
];
const repoRoutes: MetadataRoute.Sitemap = listLeaderboardOverall().map((r) => ({
- url: `${APP_URL}/repo/${r.id}`,
- priority: 0.6,
changeFrequency: "weekly",
+ url: `${APP_URL}/repo/${r.id}`,
lastModified: r.last_scored_at != null ? new Date(r.last_scored_at * 1000) : now,
+ priority: r.score != null ? Math.round((0.3 + (r.score / 100) * 0.6) * 10) / 10 : 0.4,
}));
return [...staticRoutes, ...repoRoutes];
diff --git a/components/AlternativesStrip.tsx b/components/AlternativesStrip.tsx
index c7390b5..d2e9342 100644
--- a/components/AlternativesStrip.tsx
+++ b/components/AlternativesStrip.tsx
@@ -23,7 +23,7 @@ export function AlternativesStrip({ language, alternatives, selectedModelLabel }
Same-language repos scored for {selectedModelLabel} . Heuristic v1
- (same language + same host); cross-language matches are refined in v0.5.0.
+ (same language + same host); cross-language matches are refined in v0.6.0.
diff --git a/components/PackageLookupForm.tsx b/components/PackageLookupForm.tsx
index 274c7b0..ac75b88 100644
--- a/components/PackageLookupForm.tsx
+++ b/components/PackageLookupForm.tsx
@@ -28,10 +28,10 @@ export function PackageLookupForm() {
Registry
setRegistry(e.target.value as Registry)}
- className="h-9 w-full appearance-none rounded-md border border-line bg-surface-2 pl-3 pr-8 text-[13.5px] text-ink hover:bg-surface-hover sm:w-auto"
+ className="h-9 w-full appearance-none rounded-md border border-line bg-surface-2 pl-3 pr-8 text-[13.5px] text-ink cursor-pointer sm:w-auto"
>
{REGISTRIES.map((r) => (
@@ -51,9 +51,9 @@ export function PackageLookupForm() {
Package name
{
+ const m = firstExisting(repo, CANDIDATES);
+
+ if (m) {
+ return {
+ pass: 1,
+ label: LABEL,
+ matchedPath: m,
+ id: "aider_conf",
+ detail: "Aider config present",
+ };
+ }
+
+ return {
+ pass: 0,
+ label: LABEL,
+ id: "aider_conf",
+ detail: "No .aider.conf.yml at repo root",
+ };
+ },
+};
diff --git a/lib/scoring/signals/cursor-rules.ts b/lib/scoring/signals/cursor-rules.ts
new file mode 100644
index 0000000..da62ada
--- /dev/null
+++ b/lib/scoring/signals/cursor-rules.ts
@@ -0,0 +1,53 @@
+import { existsSync, readdirSync, statSync } from "node:fs";
+import { join } from "node:path";
+
+import type { Signal } from "./types";
+
+const LABEL = "Cursor rules (.cursor/rules)";
+
+export const cursorRules: Signal = {
+ label: LABEL,
+ id: "cursor_rules",
+ description: "Cursor's canonical instruction surface โ `.cursor/rules/*.mdc` (modern) or `.cursorrules` (legacy).",
+ improveSuggestion:
+ "Add `.cursor/rules/*.mdc` files describing how Cursor should work in this repo (architecture, conventions, naming). The legacy `.cursorrules` file is still read but is deprecated.",
+ check: (repo) => {
+ const dir = join(repo, ".cursor", "rules");
+
+ if (existsSync(dir)) {
+ try {
+ if (statSync(dir).isDirectory()) {
+ const mdc = readdirSync(dir).filter((f) => f.endsWith(".mdc"));
+
+ if (mdc.length > 0) {
+ return {
+ pass: 1,
+ label: LABEL,
+ id: "cursor_rules",
+ matchedPath: `.cursor/rules/${mdc[0]}`,
+ detail: `${mdc.length} .mdc file${mdc.length === 1 ? "" : "s"} in .cursor/rules/`,
+ };
+ }
+ }
+ } catch {}
+ }
+
+ const legacy = join(repo, ".cursorrules");
+ if (existsSync(legacy)) {
+ return {
+ pass: 0.5,
+ label: LABEL,
+ id: "cursor_rules",
+ matchedPath: ".cursorrules",
+ detail: "Legacy .cursorrules โ Cursor still reads it, but `.cursor/rules/*.mdc` is preferred",
+ };
+ }
+
+ return {
+ pass: 0,
+ label: LABEL,
+ id: "cursor_rules",
+ detail: "No .cursor/rules/*.mdc or .cursorrules",
+ };
+ },
+};
diff --git a/lib/scoring/signals/gemini-md.ts b/lib/scoring/signals/gemini-md.ts
new file mode 100644
index 0000000..c5994a0
--- /dev/null
+++ b/lib/scoring/signals/gemini-md.ts
@@ -0,0 +1,84 @@
+import { readdirSync } from "node:fs";
+import { join } from "node:path";
+
+import { readSafe } from "./helpers";
+import type { Signal } from "./types";
+
+const LABEL = "GEMINI.md";
+
+function findGeminiMd(repo: string): string | null {
+ let entries: string[] = [];
+
+ try {
+ entries = readdirSync(repo);
+ } catch {
+ return null;
+ }
+
+ for (const e of entries) {
+ if (e.toLowerCase() === "gemini.md") {
+ return join(repo, e);
+ }
+ }
+
+ return null;
+}
+
+export const geminiMd: Signal = {
+ label: LABEL,
+ id: "gemini_md",
+ description: "Gemini CLI's canonical hierarchical instructions file โ read at every prompt.",
+ improveSuggestion:
+ "Add a GEMINI.md at the repo root covering project goals, layout, setup commands, and conventions. Aim for 800+ chars of real guidance (not boilerplate).",
+ check: (repo) => {
+ const matched = findGeminiMd(repo);
+
+ if (!matched) {
+ return {
+ pass: 0,
+ label: LABEL,
+ id: "gemini_md",
+ detail: "No GEMINI.md at repo root",
+ };
+ }
+
+ const len = readSafe(matched).trim().length;
+ if (len === 0) {
+ return {
+ pass: 0.2,
+ label: LABEL,
+ id: "gemini_md",
+ matchedPath: matched,
+ detail: "GEMINI.md exists but empty",
+ };
+ }
+
+ if (len < 200) {
+ return {
+ pass: 0.5,
+ label: LABEL,
+ id: "gemini_md",
+ matchedPath: matched,
+ detail: `GEMINI.md exists (${len} chars) โ thin`,
+ };
+ }
+
+ if (len < 800) {
+ return {
+ pass: 0.8,
+ label: LABEL,
+ id: "gemini_md",
+ matchedPath: matched,
+ detail: `GEMINI.md exists (${len} chars)`,
+ };
+ }
+
+ return {
+ pass: 1,
+ label: LABEL,
+ id: "gemini_md",
+ matchedPath: matched,
+ detail: `Substantive GEMINI.md (${len} chars)`,
+ };
+ },
+};
diff --git a/lib/scoring/signals/index.ts b/lib/scoring/signals/index.ts
index 96997f1..2783e77 100644
--- a/lib/scoring/signals/index.ts
+++ b/lib/scoring/signals/index.ts
@@ -1,10 +1,14 @@
import { agentsMd } from "./agents-md";
+import { aiderConf } from "./aider-conf";
import { ci } from "./ci";
import { contributing } from "./contributing";
+import { cursorRules } from "./cursor-rules";
import { depsManifest } from "./deps-manifest";
import { devEnv } from "./dev-env";
+import { geminiMd } from "./gemini-md";
import { license } from "./license";
import { linter } from "./linter";
+import { openhandsSetup } from "./openhands-setup";
import { preCommit } from "./pre-commit";
import { readme } from "./readme";
import { size } from "./size";
@@ -16,6 +20,10 @@ export type { Signal, SignalResult };
export const SIGNALS: Signal[] = [
agentsMd,
+ cursorRules,
+ geminiMd,
+ openhandsSetup,
+ aiderConf,
readme,
tests,
ci,
diff --git a/lib/scoring/signals/openhands-setup.ts b/lib/scoring/signals/openhands-setup.ts
new file mode 100644
index 0000000..712c418
--- /dev/null
+++ b/lib/scoring/signals/openhands-setup.ts
@@ -0,0 +1,47 @@
+import { existsSync } from "node:fs";
+import { join } from "node:path";
+
+import { readSafe } from "./helpers";
+import type { Signal } from "./types";
+
+const LABEL = ".openhands/setup.sh";
+const REL = ".openhands/setup.sh";
+
+export const openhandsSetup: Signal = {
+ label: LABEL,
+ id: "openhands_setup",
+ description: "OpenHands runs `.openhands/setup.sh` at session start to bootstrap the repo's dev environment.",
+ improveSuggestion:
+ "Add a `.openhands/setup.sh` that installs dependencies and prepares the project so OpenHands can run tests and lints out of the box.",
+ check: (repo) => {
+ const abs = join(repo, REL);
+
+ if (!existsSync(abs)) {
+ return {
+ pass: 0,
+ label: LABEL,
+ id: "openhands_setup",
+ detail: "No .openhands/setup.sh",
+ };
+ }
+
+ const len = readSafe(abs).trim().length;
+ if (len === 0) {
+ return {
+ pass: 0.2,
+ label: LABEL,
+ matchedPath: abs,
+ id: "openhands_setup",
+ detail: "Empty .openhands/setup.sh",
+ };
+ }
+
+ return {
+ pass: 1,
+ label: LABEL,
+ matchedPath: abs,
+ id: "openhands_setup",
+ detail: `Setup script present (${len} chars)`,
+ };
+ },
+};
diff --git a/lib/scoring/weights.ts b/lib/scoring/weights.ts
index 90d5555..ec88d58 100644
--- a/lib/scoring/weights.ts
+++ b/lib/scoring/weights.ts
@@ -4,6 +4,7 @@ export type ModelProfile = {
id: ModelId;
label: string;
rationale: string;
+ sources: string[];
weights: Record;
};
@@ -12,7 +13,8 @@ export const MODELS: ModelProfile[] = [
id: "claude-code",
label: "Claude Code",
rationale:
- "Weights AGENTS.md and tests heavily โ Claude Code leans on an instructions file and a fast feedback loop.",
+ "Loads CLAUDE.md at the start of every conversation per Anthropic's memory docs, so AGENTS.md / CLAUDE.md and a fast test loop carry the most weight.",
+ sources: ["https://code.claude.com/docs/en/memory"],
weights: {
ci: 0.5,
size: 0.5,
@@ -21,57 +23,73 @@ export const MODELS: ModelProfile[] = [
linter: 0.6,
dev_env: 0.9,
license: 0.3,
+ gemini_md: 0,
+ aider_conf: 0,
agents_md: 1.0,
+ cursor_rules: 0,
pre_commit: 0.4,
type_config: 0.6,
contributing: 0.4,
deps_manifest: 0.7,
+ openhands_setup: 0,
},
},
{
id: "cursor",
label: "Cursor",
rationale:
- "Weights type config and a detailed README highly โ Cursor's inline edits benefit from static types and skim-readable docs.",
+ "Per Cursor's Rules docs, reads `.cursor/rules/*.mdc` and AGENTS.md as the canonical repo-side input. Type config and a clean README still aid the codebase index but aren't the docs-cited signal.",
+ sources: ["https://cursor.com/docs/context/rules"],
weights: {
- agents_md: 0.6,
- readme: 1.0,
- tests: 0.7,
ci: 0.4,
+ size: 0.4,
+ tests: 0.7,
linter: 0.8,
- deps_manifest: 0.8,
+ readme: 1.0,
dev_env: 0.5,
- type_config: 1.0,
+ gemini_md: 0,
license: 0.3,
- contributing: 0.3,
+ aider_conf: 0,
+ agents_md: 0.8,
pre_commit: 0.3,
- size: 0.4,
+ type_config: 1.0,
+ contributing: 0.3,
+ cursor_rules: 1.0,
+ deps_manifest: 0.8,
+ openhands_setup: 0,
},
},
{
id: "devin",
label: "Devin",
rationale:
- "Weights CI and reproducible envs highly โ Devin runs in a sandboxed VM and needs end-to-end automation.",
+ "Operates from a sandboxed Ubuntu VM and runs an 8-step machine setup (deps, secrets, language versions, lint/test commands) per Cognition's repo-setup docs. CI config files alone aren't what the docs ask for โ a runnable dev environment is.",
+ sources: ["https://docs.devin.ai/onboard-devin/repo-setup"],
weights: {
- agents_md: 0.6,
- readme: 0.7,
+ ci: 0.7,
+ size: 0.6,
tests: 0.9,
- ci: 1.0,
linter: 0.5,
- deps_manifest: 0.9,
+ readme: 0.7,
dev_env: 1.0,
- type_config: 0.5,
license: 0.3,
- contributing: 0.5,
+ gemini_md: 0,
+ aider_conf: 0,
+ agents_md: 0.6,
+ cursor_rules: 0,
pre_commit: 0.5,
- size: 0.6,
+ type_config: 0.5,
+ contributing: 0.5,
+ deps_manifest: 0.9,
+ openhands_setup: 0,
},
},
{
id: "gpt-5-codex",
label: "GPT-5 Codex",
- rationale: "Balanced profile as a reference point.",
+ rationale:
+ "Reads AGENTS.md before doing any work per OpenAI's Codex docs โ the strictest AGENTS.md adherent of any agent here. Hierarchical (per-directory) AGENTS.md and AGENTS.override.md are first-class.",
+ sources: ["https://developers.openai.com/codex/guides/agents-md"],
weights: {
ci: 0.7,
size: 0.5,
@@ -80,18 +98,23 @@ export const MODELS: ModelProfile[] = [
readme: 0.8,
dev_env: 0.7,
license: 0.3,
- agents_md: 0.7,
+ gemini_md: 0,
+ aider_conf: 0,
+ agents_md: 0.9,
+ cursor_rules: 0,
pre_commit: 0.4,
type_config: 0.7,
contributing: 0.4,
deps_manifest: 0.7,
+ openhands_setup: 0,
},
},
{
id: "gemini-cli",
label: "Gemini CLI",
rationale:
- "Weights long-form README and strict type configs โ Gemini's long context favors docs-heavy codebases that spell out contracts.",
+ "Reads hierarchical `GEMINI.md` (global โ workspace โ component-level) at every prompt per Gemini CLI's docs. The long-context advantage favors repos that split context per directory rather than docs-heavy in general.",
+ sources: ["https://geminicli.com/docs/cli/gemini-md/"],
weights: {
ci: 0.6,
size: 0.5,
@@ -100,18 +123,23 @@ export const MODELS: ModelProfile[] = [
readme: 0.9,
dev_env: 0.7,
license: 0.3,
+ aider_conf: 0,
agents_md: 0.7,
+ gemini_md: 1.0,
+ cursor_rules: 0,
pre_commit: 0.4,
type_config: 0.9,
contributing: 0.4,
deps_manifest: 0.8,
+ openhands_setup: 0,
},
},
{
id: "aider",
label: "Aider",
rationale:
- "Weights tests and linter highest โ Aider runs both per-edit as its feedback loop, so a green bar translates directly into successful commits.",
+ "Auto-lints on every edit by default; runs the configured test command after edits when `--test-cmd` is set (per Aider's lint/test docs). A green linter and a declared test command translate directly into successful commits.",
+ sources: ["https://aider.chat/docs/usage/lint-test.html"],
weights: {
ci: 0.3,
size: 0.4,
@@ -120,18 +148,26 @@ export const MODELS: ModelProfile[] = [
readme: 0.6,
dev_env: 0.5,
license: 0.2,
+ gemini_md: 0,
agents_md: 0.8,
+ aider_conf: 0.8,
+ cursor_rules: 0,
pre_commit: 0.3,
type_config: 0.5,
contributing: 0.3,
deps_manifest: 0.7,
+ openhands_setup: 0,
},
},
{
id: "openhands",
label: "OpenHands",
rationale:
- "Weights reproducible dev-env and CI highest โ OpenHands operates from a sandboxed container, so a working Dockerfile / Makefile is near-mandatory.",
+ "Runs in a sandboxed container and executes `.openhands/setup.sh` at session start per OpenHands' repo-customization docs. Root AGENTS.md is now the preferred always-on instruction surface (microagents are deprecated in favor of it).",
+ sources: [
+ "https://docs.openhands.dev/usage/prompting/repository",
+ "https://docs.openhands.dev/usage/prompting/microagents-overview",
+ ],
weights: {
ci: 1.0,
size: 0.7,
@@ -140,18 +176,23 @@ export const MODELS: ModelProfile[] = [
readme: 0.7,
dev_env: 1.0,
license: 0.4,
+ gemini_md: 0,
+ aider_conf: 0,
agents_md: 0.5,
+ cursor_rules: 0,
pre_commit: 0.6,
type_config: 0.5,
contributing: 0.7,
deps_manifest: 1.0,
+ openhands_setup: 1.0,
},
},
{
id: "pi",
label: "Pi",
rationale:
- "Weights AGENTS.md heavily and rewards a fast test/lint loop โ Pi's minimal terminal harness reads AGENTS.md explicitly and defers sandboxing to user-installed extensions.",
+ "Minimal terminal coding harness. Loads `AGENTS.md` (or `CLAUDE.md`) at startup โ global, parent dirs, then cwd โ per the Pi coding-agent README. Sandboxing is deferred to user-installed extensions.",
+ sources: ["https://github.com/badlogic/pi-mono/blob/main/packages/coding-agent/README.md"],
weights: {
ci: 0.4,
size: 0.5,
@@ -160,11 +201,15 @@ export const MODELS: ModelProfile[] = [
readme: 0.7,
dev_env: 0.6,
license: 0.2,
+ gemini_md: 0,
+ aider_conf: 0,
agents_md: 1.0,
+ cursor_rules: 0,
pre_commit: 0.4,
type_config: 0.6,
contributing: 0.3,
deps_manifest: 0.7,
+ openhands_setup: 0,
},
},
];
diff --git a/lib/version.ts b/lib/version.ts
index 65ff37c..88e6bb4 100644
--- a/lib/version.ts
+++ b/lib/version.ts
@@ -1,4 +1,4 @@
-export const APP_VERSION = "0.3.0";
+export const APP_VERSION = "0.4.0";
export const APP_NAME = "Agent Friendly Code";
export const IS_PRE_RELEASE = APP_VERSION.startsWith("0.0.");
@@ -6,7 +6,7 @@ export const REPO_URL = "https://github.com/hsnice16/agent-friendly-code";
export const APP_URL = process.env.NEXT_PUBLIC_APP_URL ?? "https://agent-friendly-code.vercel.app";
export const APP_DESCRIPTION =
- "Public dashboard ranking open-source repos by how friendly they are to AI coding agents (Claude Code, Cursor, Devin, GPT-5 Codex) โ per model, across GitHub, GitLab, and Bitbucket.";
+ "Public dashboard ranking open-source repos by how friendly they are to AI coding agents (Claude Code, Cursor, Devin, GPT-5 Codex, Gemini CLI, Aider, OpenHands, Pi) โ per model, across GitHub, GitLab, and Bitbucket.";
export const APP_KEYWORDS = [
"ai",
@@ -27,8 +27,10 @@ export const APP_KEYWORDS = [
"ai code agents",
"ai coding agent",
"ai coding agents",
+ "ai friendly code",
"coding assistant",
"agent friendly code",
+ "agent friendly repo",
"ai pair programming",
"ai software engineer",
"pi",
@@ -47,4 +49,50 @@ export const APP_KEYWORDS = [
"developer tools",
"agent ready repo",
"gemini code assist",
+ "v0",
+ "amp",
+ "cline",
+ "codeium",
+ "copilot",
+ "lovable",
+ "tabnine",
+ "bolt.new",
+ "roo code",
+ "windsurf",
+ "continue.dev",
+ "replit agent",
+ "copilot agent",
+ "github copilot",
+ "sourcegraph amp",
+ "copilot agent mode",
+ "MCP",
+ "spec kit",
+ "claude agent sdk",
+ "model context protocol",
+ "spec-driven development",
+ "agentic",
+ "agentic ai",
+ "agentic ide",
+ "agentic coding",
+ "ai code editor",
+ "ai code review",
+ "ai code generation",
+ "ai developer tools",
+ "ai pair programmer",
+ "swe agent",
+ "vibe coding",
+ "background agent",
+ "ai software development",
+ "autonomous coding agent",
+ "autonomous developer agent",
+ "agent eval",
+ "ai readiness",
+ "ai ready repo",
+ "agent benchmark",
+ "agent leaderboard",
+ "ai ready codebase",
+ "agent friendliness",
+ "ai agent benchmark",
+ "agent compatibility",
+ "ai agent leaderboard",
];
diff --git a/package.json b/package.json
index 99212b8..bdcba98 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "agent-friendly-code",
- "version": "0.3.0",
+ "version": "0.4.0",
"private": true,
"license": "MIT",
"author": "Himanshu Singh (https://github.com/hsnice16)",
@@ -13,6 +13,7 @@
"start": "next start -p 3000",
"score": "tsx scripts/score.ts score",
"seed": "tsx scripts/seed.ts",
+ "seed-packages": "tsx scripts/seed-packages.ts",
"init-db": "tsx scripts/init-db.ts",
"prepare-hooks": "lefthook install",
"test": "node --import tsx --test 'tests/**/*.test.ts'"
diff --git a/scripts/seed-packages.ts b/scripts/seed-packages.ts
new file mode 100644
index 0000000..1b6c003
--- /dev/null
+++ b/scripts/seed-packages.ts
@@ -0,0 +1,47 @@
+import type { Registry } from "../lib/clients/registries";
+import { lookupPackage } from "../lib/package-lookup";
+
+const PACKAGES: Array<{ registry: Registry; name: string }> = [
+ { registry: "npm", name: "next" },
+ { registry: "npm", name: "axios" },
+ { registry: "npm", name: "react" },
+ { registry: "npm", name: "eslint" },
+ { registry: "npm", name: "prettier" },
+ { registry: "pypi", name: "fastapi" },
+ { registry: "pypi", name: "requests" },
+ { registry: "cargo", name: "axum" },
+ { registry: "cargo", name: "serde" },
+ { registry: "cargo", name: "tokio" },
+ { registry: "cargo", name: "ripgrep" },
+];
+
+export async function seedPackages() {
+ console.log(`\nโโโ seeding ${PACKAGES.length} popular packages for /package chips โโโ`);
+
+ let other = 0;
+ let scored = 0;
+
+ for (const { registry, name } of PACKAGES) {
+ try {
+ const result = await lookupPackage(registry, name);
+ console.log(` ${registry}/${name} โ ${result.status}`);
+
+ if (result.status === "scored") {
+ scored++;
+ } else {
+ other++;
+ }
+ } catch (e) {
+ console.error(` ${registry}/${name} โ error: ${(e as Error).message}`);
+ other++;
+ }
+ }
+
+ console.log(`package seed done โ ${scored} scored / ${other} other`);
+}
+
+// Run when invoked as a top-level script (e.g. `bun run seed-packages`),
+// but stay inert when imported by scripts/seed.ts so we don't double-execute.
+if (process.argv[1]?.endsWith("seed-packages.ts")) {
+ void seedPackages();
+}
diff --git a/scripts/seed.ts b/scripts/seed.ts
index 077780c..08ca599 100644
--- a/scripts/seed.ts
+++ b/scripts/seed.ts
@@ -1,22 +1,32 @@
import { spawnSync } from "node:child_process";
+
import { SEEDS } from "./seed-list";
+import { seedPackages } from "./seed-packages";
-let ok = 0,
- failed = 0;
+async function main() {
+ let ok = 0,
+ failed = 0;
-for (const s of SEEDS) {
- console.log(`\nโโโ seeding ${s.url}${s.note ? ` โ ${s.note}` : ""} โโโ`);
+ for (const s of SEEDS) {
+ console.log(`\nโโโ seeding ${s.url}${s.note ? ` โ ${s.note}` : ""} โโโ`);
- const r = spawnSync("bun", ["run", "score", s.url], {
- stdio: "inherit",
- });
+ const r = spawnSync("bun", ["run", "score", s.url], {
+ stdio: "inherit",
+ });
- if (r.status === 0) {
- ok++;
- } else {
- failed++;
- console.error(` (failed with status ${r.status}, continuing)`);
+ if (r.status === 0) {
+ ok++;
+ } else {
+ failed++;
+ console.error(` (failed with status ${r.status}, continuing)`);
+ }
}
+
+ console.log(`\nseed done โ ${ok} ok / ${failed} failed.`);
+
+ await seedPackages();
+
+ console.log(`\nrun \`bun run dev\` and open http://localhost:3000`);
}
-console.log(`\nseed done โ ${ok} ok / ${failed} failed. Run \`bun run dev\` and open http://localhost:3000`);
+void main();
diff --git a/tasks/0.3.0/04-alternatives-v1.md b/tasks/0.3.0/04-alternatives-v1.md
index 92067fe..8ad94fb 100644
--- a/tasks/0.3.0/04-alternatives-v1.md
+++ b/tasks/0.3.0/04-alternatives-v1.md
@@ -12,7 +12,7 @@ On the repo detail page, show a short "alternatives" strip: _"Repos that look si
- `lib/clients/github.ts` โ `fetchRepoMeta` now returns `language` alongside `defaultBranch` and `stars`. GitHub + Bitbucket expose it directly; GitLab needs a separate endpoint, so it stays `null` there for v1.
- `scripts/score.ts` โ pipes the language through to `saveScoredRepo`; `saveScoredRepo` uses `COALESCE` on conflict so a rescore without language (e.g. offline) doesn't blow away an existing value.
- `lib/db.ts` โ new `getAlternatives(repoId, modelId | null, limit)` query. Filters: same host + same language + `id != self`. Orders by the selected model's `model_score` when a model is given, else `overall_score`.
-- `components/AlternativesStrip.tsx` โ up to 3 cards in a grid (single column on mobile, 3-col at `sm:`), each linking to the candidate's repo page. Panel is suppressed entirely when the query returns zero rows. UI flags itself as "heuristic v1" and points at v0.5.0 for refinement.
+- `components/AlternativesStrip.tsx` โ up to 3 cards in a grid (single column on mobile, 3-col at `sm:`), each linking to the candidate's repo page. Panel is suppressed entirely when the query returns zero rows. UI flags itself as "heuristic v1" and points at v0.6.0 for refinement.
- `app/repo/[id]/page.tsx` โ renders the strip between `PerModelScores` and `Signal breakdown`.
## Acceptance
@@ -22,9 +22,9 @@ On the repo detail page, show a short "alternatives" strip: _"Repos that look si
- โ Query stays in `lib/db.ts`; no SQL leaks into `app/` or `components/`.
- ~ One test in `tests/db.test.ts` โ not added in this pass; follow-up.
-## Known limitations (improve in v2 โ `tasks/0.5.0/`)
+## Known limitations (improve in v2 โ `tasks/0.6.0/`)
-- **Cross-language alternatives get missed** โ `axios` (JS) vs `requests` (Python) are semantic alternatives but different languages. v1 will never surface these; v2 in 0.5.0 lifts this with README-embedding similarity.
+- **Cross-language alternatives get missed** โ `axios` (JS) vs `requests` (Python) are semantic alternatives but different languages. v1 will never surface these; v2 in 0.6.0 lifts this with README-embedding similarity.
- **Language alone is coarse** โ a Rust CLI tool and a Rust ORM share a language but aren't alternatives. The UI flags itself as "heuristic v1" so users know the confidence level.
- **No topic clustering** โ `react` and `nextjs` appear as "alternatives" because both are top-scoring JS repos, even though they're complementary. Acceptable noise for v1.
- **Back-fill**: repos scored before this change have `language = NULL` until rescored. The heuristic excludes `NULL`-language rows, so the strip renders only on rescored repos until the next full re-seed.
diff --git a/tasks/0.3.0/05-package-registry-overlay.md b/tasks/0.3.0/05-package-registry-overlay.md
index d88f431..6c2bb7e 100644
--- a/tasks/0.3.0/05-package-registry-overlay.md
+++ b/tasks/0.3.0/05-package-registry-overlay.md
@@ -23,7 +23,7 @@
- โ `/package`, `/package/npm/react`, `/package/npm/lodash`, `/package/npm/` all render the expected state.
- โ Nav link visible; homepage callout visible.
-## Out of scope (stays in v0.6.0)
+## Out of scope (stays in v0.7.0)
- Per-registry leaderboards ("top 100 npm packages by agent-friendliness").
- Browser userscript for inline badges on registry pages.
@@ -34,7 +34,7 @@
Developers pick dependencies in registry UIs (npmjs.com, PyPI, crates.io) โ not on GitHub. This task adds the lookup half of the package-registry overlay: given a package name, resolve it to its source repo and surface the score (or invite scoring via a pre-filled GitHub issue).
-The at-scale side โ per-registry leaderboards + browser userscript for inline badges โ stays in v0.6.0 as a follow-up.
+The at-scale side โ per-registry leaderboards + browser userscript for inline badges โ stays in v0.7.0 as a follow-up.
## Public surface
@@ -65,7 +65,7 @@ The at-scale side โ per-registry leaderboards + browser userscript for inline
- `lib/db.ts` โ `package_alias(registry, name, repo_url, resolved_at)` cache table. Additive migration pattern matching `language`.
- `lib/utils/contact.ts` โ builds a `REPO_URL/issues/new?title=...&body=...` link with a pre-filled template naming the package + what we resolved.
-## Out of scope (v0.6.0 follow-up)
+## Out of scope (v0.7.0 follow-up)
- Per-registry leaderboards ("top 100 npm packages").
- Browser userscript for inline badges on registry pages.
diff --git a/tasks/0.3.0/README.md b/tasks/0.3.0/README.md
index eab0d9b..e0b776c 100644
--- a/tasks/0.3.0/README.md
+++ b/tasks/0.3.0/README.md
@@ -9,5 +9,5 @@ Make the current scores usable outside the dashboard (badge SVG for READMEs) and
- [01-badge-endpoint.md](./01-badge-endpoint.md) โ `/badge/:host/:owner/:name.svg` so repos can embed their score in a README. **Done.**
- [02-expand-agent-coverage.md](./02-expand-agent-coverage.md) โ add Gemini CLI and the next tier of active coding agents to `MODELS` on illustrative weights, tagged clearly on `/methodology`. **Done.**
- [03-animate-score-bar.md](./03-animate-score-bar.md) โ animate the `ScoreBar` fill width on leaderboard prev/next instead of remounting. **Done.**
-- [04-alternatives-v1.md](./04-alternatives-v1.md) โ v1 SQL heuristic for "alternative repos" on the repo detail page. Upgraded to embedding-similarity in 0.5.0. **Done.**
-- [05-package-registry-overlay.md](./05-package-registry-overlay.md) โ npm / PyPI / Cargo lookup: `/package/:registry/:name` resolves to source repo and surfaces its score, with a pre-filled GitHub issue for anything unscored. Per-registry leaderboards + browser userscript stay in 0.6.0.
+- [04-alternatives-v1.md](./04-alternatives-v1.md) โ v1 SQL heuristic for "alternative repos" on the repo detail page. Upgraded to embedding-similarity in 0.6.0. **Done.**
+- [05-package-registry-overlay.md](./05-package-registry-overlay.md) โ npm / PyPI / Cargo lookup: `/package/:registry/:name` resolves to source repo and surfaces its score, with a pre-filled GitHub issue for anything unscored. Per-registry leaderboards + browser userscript stay in 0.7.0. **Done.**
diff --git a/tasks/0.4.0/01-sourced-agent-rationales.md b/tasks/0.4.0/01-sourced-agent-rationales.md
new file mode 100644
index 0000000..475524a
--- /dev/null
+++ b/tasks/0.4.0/01-sourced-agent-rationales.md
@@ -0,0 +1,85 @@
+# 01 ยท Documentation-cited agent rationales
+
+**Status**: done
+
+## Goal
+
+Make every per-model rationale defensible against an "okay, where does it say that?" challenge. Today the rationales in `lib/scoring/weights.ts` are claims like "Cursor weighs type configs and a skim-readable README highly." Half of them are unsupported once you read the official docs (Cursor docs don't mention type configs at all; Gemini CLI docs don't mention README weighting; OpenHands user docs don't require a Dockerfile). This task closes that gap and lets `/methodology` drop the "illustrative" hedge that has shipped since 0.3.0.
+
+## Verification findings (do not re-research)
+
+A docs audit run during this conversation produced the per-agent verdicts below. Treat these as the input โ the task is to encode them into the codebase, not to redo the research.
+
+| Agent | Verdict | Action |
+|---|---|---|
+| Claude Code | โ
supported | Cite `https://code.claude.com/docs/en/memory` โ confirms CLAUDE.md is loaded at start of every conversation. |
+| Cursor | โ ๏ธ unsupported | Drop the "type config + README" claim. Real signals are `.cursor/rules/*.mdc` and AGENTS.md. Source: `https://cursor.com/docs/context/rules`. |
+| Devin | โ ๏ธ partial | VM/sandbox is correct; CI emphasis is not. Docs prioritize dev-env setup commands. Source: `https://docs.devin.ai/onboard-devin/repo-setup`. Reweight `ci` from 1.0 โ 0.7. |
+| GPT-5 Codex | โ
supported (under-weighted) | Codex is the strictest AGENTS.md adherent โ "Codex reads AGENTS.md files before doing any work." Source: `https://developers.openai.com/codex/guides/agents-md`. Lift `agents_md` from 0.7 โ 0.9. |
+| Gemini CLI | โ ๏ธ unsupported | "README + types + tests" not in docs. Real signal is hierarchical `GEMINI.md`. Source: `https://geminicli.com/docs/cli/gemini-md/`. |
+| Aider | โ
supported | Auto-lint default-on, auto-test when `--test-cmd` set. Source: `https://aider.chat/docs/usage/lint-test.html`. |
+| OpenHands | โ ๏ธ unsupported | Dockerfile/Makefile not required by user docs. Real signal is `.openhands/setup.sh` + AGENTS.md. Sources: `https://docs.openhands.dev/usage/prompting/repository`, `https://docs.openhands.dev/usage/prompting/microagents-overview`. |
+| Pi | โ
supported | Real tool, AGENTS.md/CLAUDE.md loaded explicitly. Source: `https://github.com/badlogic/pi-mono/blob/main/packages/coding-agent/README.md`. |
+
+## Pieces
+
+### 1. New `sources` field on `ModelProfile`
+
+Extend `ModelProfile` in `lib/scoring/weights.ts`:
+
+```ts
+export type ModelProfile = {
+ id: ModelId;
+ label: string;
+ rationale: string;
+ sources: string[]; // URLs the rationale was derived from
+ weights: Record;
+};
+```
+
+Every agent gets at least one URL. Multiple URLs allowed where the claim spans pages (Devin VM + repo-setup; OpenHands microagents + repository docs).
+
+### 2. Rewrite each rationale string
+
+Three rationales are unsupported and must be rewritten (Cursor, Gemini CLI, OpenHands). Two need reweighting (Devin, Codex). Three are accurate but should still cite their source URL (Claude Code, Aider, Pi).
+
+### 3. Reweights
+
+- **Devin** โ `ci`: 1.0 โ 0.7. Docs emphasize an 8-step machine-setup sequence (deps, secrets, language versions, lint/test commands), not `.github/workflows` files.
+- **Cursor** โ `agents_md`: 0.6 โ 0.8 (Cursor docs explicitly cite AGENTS.md). Lift the still-to-be-added `cursor_rules` to 1.0 in task `02-agent-specific-signals.md`.
+- **GPT-5 Codex** โ `agents_md`: 0.7 โ 0.9. Codex docs say it reads AGENTS.md *before doing any work*; this is the strongest AGENTS.md commitment of any agent.
+- **Gemini CLI** โ once `gemini_md` signal lands (task 02), weight it at 1.0 on Gemini, 0 elsewhere.
+
+Don't over-tune the others; the verification didn't surface evidence to.
+
+### 4. Methodology page surface
+
+`app/methodology/page.tsx`:
+
+- Replace the "Status: static parameters" warning panel's "illustrative" framing. New copy: "Per-model weights are **derived from each agent's published documentation** โ see the Sources panel below for the URLs each claim was sourced from. They are not yet derived from measured agent success; that's the v1.0.0 benchmark harness." Adjusts the FAQ entry that uses "illustrative" the same way.
+- Render each `ModelProfile.sources` array under the model's existing rationale block as a list of external links (`rel="noopener noreferrer"` per project security rule).
+- The "Models & weight profiles" section already iterates `MODELS`; weights/sources/rationale come along for free.
+
+### 5. Surrounding language
+
+Three other places say "illustrative":
+
+- `AGENTS.md` "Things to leave alone": "Per-model weights are illustrative." โ "Per-model weights are derived from each agent's published docs; do not tune individual values without re-running the audit (see `tasks/0.4.0/01-sourced-agent-rationales.md`) or shipping the v1.0.0 benchmark harness."
+- `lib/changelog.ts` 0.3.0 entry: "added to the per-model leaderboard with illustrative weights, flagged as such on `/methodology`." Past-tense; leave the historical claim accurate, but the next changelog entry will note the upgrade.
+- `tasks/0.3.0/README.md` and `tasks/0.3.0/02-expand-agent-coverage.md`: same โ historical, leave alone.
+
+## Acceptance
+
+- `lib/scoring/weights.ts` has a `sources: string[]` field on every model with at least one URL.
+- Each agent's rationale string makes a claim that can be checked against the cited URL (no unsourced "we think Cursor likes types").
+- Devin's `ci` weight is 0.7; Codex's `agents_md` is 0.9; Cursor's `agents_md` is 0.8.
+- `/methodology` no longer says "illustrative"; the Sources panel renders one link list per model.
+- `AGENTS.md` "Things to leave alone" line updated.
+- `bun run test` and `bun x tsc --noEmit` pass.
+- Re-scoring with `bun run seed` does not change scores beyond the expected delta from the four reweighted entries (sanity check โ flag anything bigger than ยฑ2 points overall on the dogfood repo).
+
+## Out of scope
+
+- Adding new signals for `.cursor/rules/`, `GEMINI.md`, `.openhands/setup.sh`, `.aider.conf.yml` โ that's task `02-agent-specific-signals.md`. Lift the model weights for those signals when that task lands.
+- Re-running the docs audit. The verification table above is the source of truth for this release.
+- Measured-success calibration โ still v1.0.0.
diff --git a/tasks/0.4.0/02-agent-specific-signals.md b/tasks/0.4.0/02-agent-specific-signals.md
new file mode 100644
index 0000000..9785d6e
--- /dev/null
+++ b/tasks/0.4.0/02-agent-specific-signals.md
@@ -0,0 +1,66 @@
+# 02 ยท Agent-specific instruction-file signals
+
+**Status**: done
+
+## Goal
+
+Detect each agent's *canonical* instruction file beyond AGENTS.md. The docs verification surfaced four files we currently miss but that vendor docs explicitly cite as the agent's primary repo-side input:
+
+| Signal ID | File | Agent | Doc citation |
+|---|---|---|---|
+| `cursor_rules` | `.cursor/rules/*.mdc` | Cursor | |
+| `gemini_md` | `GEMINI.md` (any case-insensitive variant) | Gemini CLI | |
+| `openhands_setup` | `.openhands/setup.sh` | OpenHands | |
+| `aider_conf` | `.aider.conf.yml` (or `.aider.conf.yaml`) | Aider | |
+
+Each is a per-agent boost โ present on the model that reads it, irrelevant on the others. So weights are the relevant model = high, every other model = 0 (deliberate, not absent โ convention in `weights.ts`).
+
+## Pieces
+
+### 1. New signal files
+
+One file per signal under `lib/scoring/signals/`, each implementing the existing `Signal` interface (id, label, description, improveSuggestion, evaluate).
+
+- **`cursor-rules.ts`** โ pass = `.cursor/rules/` directory exists and contains โฅ 1 `.mdc` file. Partial credit (0.5) for a single `.cursorrules` legacy file (deprecated by Cursor but still read).
+- **`gemini-md.ts`** โ pass = case-insensitive `GEMINI.md` at repo root. Length-graded the same way `agents-md.ts` already grades AGENTS.md so a one-line stub doesn't get a free pass.
+- **`openhands-setup.ts`** โ pass = `.openhands/setup.sh` exists and is non-empty. No need to validate executability (we never run it; agent does, with its own permissions).
+- **`aider-conf.ts`** โ pass = `.aider.conf.yml` or `.aider.conf.yaml` exists at repo root.
+
+### 2. Register signals
+
+Add each to the `SIGNALS` array in `lib/scoring/signals/index.ts`. Order alphabetically with the existing entries to avoid churn.
+
+### 3. Weight matrix
+
+In `lib/scoring/weights.ts`, add a weight entry for each new signal on **every** model (the `Adding a signal` rule in `AGENTS.md`). The non-target models get explicit `0` so the intent is recorded rather than implicit.
+
+| Signal | Cursor | Gemini CLI | OpenHands | Aider | Others |
+|---|---|---|---|---|---|
+| `cursor_rules` | 1.0 | 0 | 0 | 0 | 0 |
+| `gemini_md` | 0 | 1.0 | 0 | 0 | 0 |
+| `openhands_setup` | 0 | 0 | 1.0 | 0 | 0 |
+| `aider_conf` | 0 | 0 | 0 | 0.8 | 0 |
+
+(`aider_conf` at 0.8 because Aider can also be configured via CLI flags โ the file is the strongest signal but not strictly required.)
+
+### 4. Tests
+
+One `tests/signals/.test.ts` per new signal, mirroring the existing pattern. At minimum: present-and-substantive case, present-but-empty case, absent case.
+
+### 5. Methodology page
+
+The methodology page already iterates `SIGNALS` for the rendered list โ new signals appear automatically. No surface change needed beyond the `improveSuggestion` text on each.
+
+## Acceptance
+
+- Four new files under `lib/scoring/signals/`. All registered in `SIGNALS`.
+- Every model in `weights.ts` has an entry for each new signal (zeros where intentional).
+- Tests for each new signal pass.
+- Re-scoring the dogfood repo: Cursor's score drops slightly (we don't ship `.cursor/rules/`), Gemini's score drops similarly, etc. โ expected because we don't currently meet our own new bar. Acceptable; the bump-our-own-score work isn't in this task.
+- `/methodology` shows the new signals in the Signals list with sensible improve suggestions.
+
+## Out of scope
+
+- Updating the rationale strings โ that's task 01. Land 01 first, then this task adds the matching weight rows.
+- Hierarchical / nested AGENTS.md detection (Codex feature, valuable but bigger). Park as a follow-up.
+- A `cursorrules_modern_only` partial-credit nuance beyond what's listed above.
diff --git a/tasks/0.4.0/03-discoverability.md b/tasks/0.4.0/03-discoverability.md
new file mode 100644
index 0000000..24c1847
--- /dev/null
+++ b/tasks/0.4.0/03-discoverability.md
@@ -0,0 +1,48 @@
+# 03 ยท LLM + SEO discoverability surface
+
+**Status**: done
+
+## Goal
+
+Lift the dashboard's discoverability for both human visitors and LLM crawlers. The pieces here are independently small but reinforce each other: an About page gives the methodology a human face (E-E-A-T), `/llms.txt` lets Perplexity / Claude / ChatGPT-search crawlers ingest the dataset cleanly, and per-repo Open Graph images make `/repo/:id` URLs render with a real preview when shared.
+
+Most of this work is already on this branch as uncommitted changes; this task captures scope and acceptance for the release.
+
+## Pieces
+
+### 1. About page (`app/about/page.tsx`)
+
+- Who built this, why, what's in it, what isn't, and how scoring works at a high level (link to `/methodology` for the formula).
+- Linked from the footer's secondary nav (`/about` after `/changelog`).
+- Static page; same Panel + Tailwind tokens as the rest of the app.
+
+### 2. `/llms.txt` (`app/llms.txt/route.ts`)
+
+- Markdown manifest at the root path `/llms.txt` per the proposed [llms.txt convention](https://llmstxt.org/).
+- Lists what the site is, the public API endpoints with one-line descriptions, the methodology page link, and a representative subset of repos (top 20 by overall score).
+- `Cache-Control: public, max-age=3600` โ same cadence as `/api/repos`.
+
+### 3. Per-repo Open Graph images (`app/repo/[id]/opengraph-image.tsx`)
+
+- `next/og` route convention: every `/repo/:id` page auto-gets an OG image at `/repo/:id/opengraph-image`.
+- Image renders the repo's `owner/name`, the overall score, and the score's tier (e.g. "Excellent for AI agents"). 1200ร630.
+- Wired into Next's metadata pipeline by file location; no manual `metadata.openGraph.images` change needed.
+
+### 4. Robots + sitemap
+
+- `app/robots.ts` โ explicit allows for known LLM crawlers (GPTBot, ClaudeBot, PerplexityBot, etc.), still blocking `/api/`.
+- `app/sitemap.ts` โ include `/about` and `/llms.txt`. Repo entries already there.
+
+## Acceptance
+
+- `/about` reachable from the footer. Renders the same theme tokens as the rest of the app.
+- `curl localhost:3000/llms.txt` returns a markdown manifest with at least: site description, primary endpoints, methodology link, top-20 repo list.
+- Sharing a `/repo/:id` URL on a chat surface that does OG previews shows a 1200ร630 image with the repo + score, not the default `/og.png` site card.
+- `app/robots.ts` lists at least 3 known LLM-crawler user agents in `allow:`.
+- Type-check + tests pass.
+
+## Out of scope
+
+- A submission form for new packages โ keep the existing GitHub-issue contact link.
+- Crawler authentication / rate-limit policies for `/api/*` โ still blocked at the robots level for LLM crawlers; rate-limiting is operational work for v1.0.0.
+- Automatic OG-image regeneration on score updates โ Next handles caching; revalidation falls out of normal page revalidation.
diff --git a/tasks/0.4.0/README.md b/tasks/0.4.0/README.md
index 4f8776f..2387e70 100644
--- a/tasks/0.4.0/README.md
+++ b/tasks/0.4.0/README.md
@@ -1,11 +1,13 @@
-# 0.4.0 โ quick wins
+# 0.4.0 โ credible scores + better discoverability
-**Status**: planned
+**Status**: done
-The cheapest items on the roadmap, paired so they can ship in one cut. History-aware signals extend the existing host-API clients with ~3 new signal files; the PR-diff GitHub Action is a thin wrapper that calls the badge / API endpoints we already ship; the Claude Code skill bundles a small lookup endpoint, a `SKILL.md` shipped from this repo, and a UI integration page. No new infra, no new deps of consequence โ high-impact additions that don't need a heavy release.
+The credibility cut. Two related items make the per-model scores defensible: rationale strings sourced from each agent's official docs (instead of the "illustrative" hedge that has shipped since 0.3.0), and a new family of agent-specific instruction-file signals discovered during the docs verification (`.cursor/rules/`, `GEMINI.md`, `.openhands/setup.sh`, `.aider.conf.yml`). The third item lifts the dashboard's discoverability โ an About page (E-E-A-T), `/llms.txt` for LLM crawlers, and per-repo Open Graph images โ capturing the SEO / discoverability work that landed alongside the rationale audit.
+
+The release does not add measurement (that's still v1.0.0's benchmark harness). It removes a credibility tax: today's per-model rationales are claims like "Cursor weighs type configs and READMEs highly" with no source โ half of them turn out to be unsupported once you read the official docs. After this release, every claim cites a URL.
## Tasks
-- [01-history-aware-signals.md](./01-history-aware-signals.md) โ extend the scorer with maintenance recency, commit velocity, and contributor activity. Hybrid fetch (shallow clone for files, host API for history) โ degrades gracefully without a token.
-- [02-score-diff-on-pr.md](./02-score-diff-on-pr.md) โ GitHub Action that comments the score delta on every PR using the existing `/api/repo/:id` and `/badge/...svg` endpoints.
-- [03-claude-code-skill.md](./03-claude-code-skill.md) โ Claude Code skill (installable via `npx skills add`) that fetches the active repo's score and recommends a model, bundled with the public `/api/score` lookup endpoint it depends on and a UI integration page with the SessionStart hook snippet.
+- [01-sourced-agent-rationales.md](./01-sourced-agent-rationales.md) โ rewrite each agent's `rationale` in `lib/scoring/weights.ts` with a docs-cited claim, add a `sources` field per model, render the URLs in a "Sources" panel on `/methodology`, and replace the page's "illustrative" language with "documented preferences." Reweight Devin (CI 1.0 โ 0.7, dev_env stays at 1.0) and Cursor (lift `agents_md` and add `cursor_rules` weight) per the docs verification.
+- [02-agent-specific-signals.md](./02-agent-specific-signals.md) โ new signals for each agent's canonical instruction file beyond AGENTS.md: `cursor_rules` (`.cursor/rules/*.mdc`), `gemini_md` (`GEMINI.md`), `openhands_setup` (`.openhands/setup.sh`), `aider_conf` (`.aider.conf.yml`). Each weighted on the relevant model only; other models default to 0 (deliberate, not absent).
+- [03-discoverability.md](./03-discoverability.md) โ `/about` page (who built this and why โ SEO E-E-A-T), `/llms.txt` markdown manifest for Perplexity / Claude / ChatGPT search, and per-repo Open Graph images via the `next/og` convention (`app/repo/[id]/opengraph-image.tsx`). Mostly already in flight on this branch โ task captures scope and acceptance.
diff --git a/tasks/0.4.0/01-history-aware-signals.md b/tasks/0.5.0/01-history-aware-signals.md
similarity index 97%
rename from tasks/0.4.0/01-history-aware-signals.md
rename to tasks/0.5.0/01-history-aware-signals.md
index 2898d9a..46cab11 100644
--- a/tasks/0.4.0/01-history-aware-signals.md
+++ b/tasks/0.5.0/01-history-aware-signals.md
@@ -8,7 +8,7 @@ Score signals that need git history or repo activity data, which the current `--
## Context
-Today's scorer reads file contents from the working tree at HEAD of the default branch. This is correct for the 12 static signals (presence of `AGENTS.md`, `tests/`, CI configs, etc.) but misses anything time-derived. Methodology's "What isn't measured yet" section acknowledges this; v0.4.0 closes the gap.
+Today's scorer reads file contents from the working tree at HEAD of the default branch. This is correct for the 12 static signals (presence of `AGENTS.md`, `tests/`, CI configs, etc.) but misses anything time-derived. Methodology's "What isn't measured yet" section acknowledges this; v0.5.0 closes the gap.
## Candidate signals (pick โฅ 3 to ship)
diff --git a/tasks/0.4.0/02-score-diff-on-pr.md b/tasks/0.5.0/02-score-diff-on-pr.md
similarity index 100%
rename from tasks/0.4.0/02-score-diff-on-pr.md
rename to tasks/0.5.0/02-score-diff-on-pr.md
diff --git a/tasks/0.4.0/03-claude-code-skill.md b/tasks/0.5.0/03-claude-code-skill.md
similarity index 97%
rename from tasks/0.4.0/03-claude-code-skill.md
rename to tasks/0.5.0/03-claude-code-skill.md
index 5c39ccb..f239695 100644
--- a/tasks/0.4.0/03-claude-code-skill.md
+++ b/tasks/0.5.0/03-claude-code-skill.md
@@ -4,7 +4,7 @@
## Goal
-Ship a Claude Code skill that fetches the active repo's score and recommends a model, plus the public lookup endpoint it depends on and a UI page that explains the integration. Bundled because the endpoint has no other consumer in 0.4.0 and the three pieces only make sense together.
+Ship a Claude Code skill that fetches the active repo's score and recommends a model, plus the public lookup endpoint it depends on and a UI page that explains the integration. Bundled because the endpoint has no other consumer in 0.5.0 and the three pieces only make sense together.
Installable via `npx skills add hsnice16/agent-friendly-code` (the [vercel-labs/skills](https://github.com/vercel-labs/skills) CLI auto-discovers a top-level `skills/` directory).
diff --git a/tasks/0.5.0/README.md b/tasks/0.5.0/README.md
index 01007f2..e82c9d8 100644
--- a/tasks/0.5.0/README.md
+++ b/tasks/0.5.0/README.md
@@ -1,10 +1,11 @@
-# 0.5.0 โ auto-refresh + smarter matching
+# 0.5.0 โ quick wins
**Status**: planned
-Two moderate-effort items that need real infra (a webhook receiver + queue, an embedding model and a vector store) but don't yet need user accounts or registry crawls. Together they shift the product from a manual-seed snapshot to a self-updating dataset with smarter matching.
+The cheapest items on the roadmap, paired so they can ship in one cut. History-aware signals extend the existing host-API clients with ~3 new signal files; the PR-diff GitHub Action is a thin wrapper that calls the badge / API endpoints we already ship; the Claude Code skill bundles a small lookup endpoint, a `SKILL.md` shipped from this repo, and a UI integration page. No new infra, no new deps of consequence โ high-impact additions that don't need a heavy release.
## Tasks
-- [01-webhook-rescoring.md](./01-webhook-rescoring.md) โ keep scores fresh on every push; detect regressions. Webhook receiver + signature verification + rescore queue.
-- [02-alternatives-v2-embeddings.md](./02-alternatives-v2-embeddings.md) โ sentence-transformer embeddings on the README; cosine-similar neighbors = alternatives. Lifts the v1 same-language SQL heuristic so cross-language alternatives surface correctly (e.g. `axios` โ `requests`).
+- [01-history-aware-signals.md](./01-history-aware-signals.md) โ extend the scorer with maintenance recency, commit velocity, and contributor activity. Hybrid fetch (shallow clone for files, host API for history) โ degrades gracefully without a token.
+- [02-score-diff-on-pr.md](./02-score-diff-on-pr.md) โ GitHub Action that comments the score delta on every PR using the existing `/api/repo/:id` and `/badge/...svg` endpoints.
+- [03-claude-code-skill.md](./03-claude-code-skill.md) โ Claude Code skill (installable via `npx skills add`) that fetches the active repo's score and recommends a model, bundled with the public `/api/score` lookup endpoint it depends on and a UI integration page with the SessionStart hook snippet.
diff --git a/tasks/0.5.0/01-webhook-rescoring.md b/tasks/0.6.0/01-webhook-rescoring.md
similarity index 100%
rename from tasks/0.5.0/01-webhook-rescoring.md
rename to tasks/0.6.0/01-webhook-rescoring.md
diff --git a/tasks/0.5.0/02-alternatives-v2-embeddings.md b/tasks/0.6.0/02-alternatives-v2-embeddings.md
similarity index 100%
rename from tasks/0.5.0/02-alternatives-v2-embeddings.md
rename to tasks/0.6.0/02-alternatives-v2-embeddings.md
diff --git a/tasks/0.6.0/README.md b/tasks/0.6.0/README.md
index a86c56b..0555daf 100644
--- a/tasks/0.6.0/README.md
+++ b/tasks/0.6.0/README.md
@@ -1,10 +1,10 @@
-# 0.6.0 โ maintainer ownership + at-scale discovery
+# 0.6.0 โ auto-refresh + smarter matching
**Status**: planned
-Two heavier items that depend on real surface-area additions: an OAuth flow with per-user DB writes, and a registry-side discovery surface (per-registry leaderboards + a browser userscript). Bundled because both require new external touchpoints โ auth provider sessions, browser extension distribution, registry-page DOM probes โ that warrant a single release cut.
+Two moderate-effort items that need real infra (a webhook receiver + queue, an embedding model and a vector store) but don't yet need user accounts or registry crawls. Together they shift the product from a manual-seed snapshot to a self-updating dataset with smarter matching.
## Tasks
-- [01-opt-out-claim-flow.md](./01-opt-out-claim-flow.md) โ OAuth so maintainers can claim or opt out of their listing. First touchpoint that writes to the DB on behalf of a user.
-- [02-package-registry-overlay.md](./02-package-registry-overlay.md) โ at-scale package overlay: per-registry leaderboards on the dashboard + a browser userscript that renders the badge inline on npmjs.com / PyPI / crates.io. Builds on the v0.3.0 lookup endpoint.
+- [01-webhook-rescoring.md](./01-webhook-rescoring.md) โ keep scores fresh on every push; detect regressions. Webhook receiver + signature verification + rescore queue.
+- [02-alternatives-v2-embeddings.md](./02-alternatives-v2-embeddings.md) โ sentence-transformer embeddings on the README; cosine-similar neighbors = alternatives. Lifts the v1 same-language SQL heuristic so cross-language alternatives surface correctly (e.g. `axios` โ `requests`).
diff --git a/tasks/0.6.0/01-opt-out-claim-flow.md b/tasks/0.7.0/01-opt-out-claim-flow.md
similarity index 100%
rename from tasks/0.6.0/01-opt-out-claim-flow.md
rename to tasks/0.7.0/01-opt-out-claim-flow.md
diff --git a/tasks/0.6.0/02-package-registry-overlay.md b/tasks/0.7.0/02-package-registry-overlay.md
similarity index 96%
rename from tasks/0.6.0/02-package-registry-overlay.md
rename to tasks/0.7.0/02-package-registry-overlay.md
index 7774288..e56f379 100644
--- a/tasks/0.6.0/02-package-registry-overlay.md
+++ b/tasks/0.7.0/02-package-registry-overlay.md
@@ -4,7 +4,7 @@
## Goal
-The v0.3.0 lookup (`tasks/0.3.0/05-package-registry-overlay.md`) answers "is this specific package scored?" on demand. v0.6.0 turns that into a proactive dependency-choice signal: per-registry leaderboards and a browser-side overlay that renders our badge inline on npmjs.com / PyPI / crates.io.
+The v0.3.0 lookup (`tasks/0.3.0/05-package-registry-overlay.md`) answers "is this specific package scored?" on demand. v0.7.0 turns that into a proactive dependency-choice signal: per-registry leaderboards and a browser-side overlay that renders our badge inline on npmjs.com / PyPI / crates.io.
## Scope
diff --git a/tasks/0.7.0/README.md b/tasks/0.7.0/README.md
new file mode 100644
index 0000000..372a1c7
--- /dev/null
+++ b/tasks/0.7.0/README.md
@@ -0,0 +1,10 @@
+# 0.7.0 โ maintainer ownership + at-scale discovery
+
+**Status**: planned
+
+Two heavier items that depend on real surface-area additions: an OAuth flow with per-user DB writes, and a registry-side discovery surface (per-registry leaderboards + a browser userscript). Bundled because both require new external touchpoints โ auth provider sessions, browser extension distribution, registry-page DOM probes โ that warrant a single release cut.
+
+## Tasks
+
+- [01-opt-out-claim-flow.md](./01-opt-out-claim-flow.md) โ OAuth so maintainers can claim or opt out of their listing. First touchpoint that writes to the DB on behalf of a user.
+- [02-package-registry-overlay.md](./02-package-registry-overlay.md) โ at-scale package overlay: per-registry leaderboards on the dashboard + a browser userscript that renders the badge inline on npmjs.com / PyPI / crates.io. Builds on the v0.3.0 lookup endpoint.
diff --git a/tasks/1.0.0/02-at-scale-indexing.md b/tasks/1.0.0/02-at-scale-indexing.md
index 2ade25d..f2c61c8 100644
--- a/tasks/1.0.0/02-at-scale-indexing.md
+++ b/tasks/1.0.0/02-at-scale-indexing.md
@@ -28,7 +28,7 @@ These two tasks ship together as "production at scale": Postgres unblocks the cr
- **GitHub rate limit**: 5000 req/hr per token. Token pool + round-robin to scale. Respect `X-RateLimit-Remaining` + `Retry-After` headers.
- **Clone bandwidth**: `--depth 1 --single-branch` per repo (unchanged). At 50 MB median, 20k repos/day โ 1 TB/day. Realistic only on a cloud host with bandwidth-included pricing (Hetzner, Fly, Vercel functions with a worker).
- **Storage**: Postgres row count grows linearly with indexed repos ร models ร signals. Index-heavy schema; partition by host or by last-scored-at month after ~100k rows.
-- **Freshness**: webhook-driven rescoring (v0.4.0) handles pushes on already-indexed repos. The crawler fills in repos we haven't seen yet and re-checks long-dormant ones.
+- **Freshness**: webhook-driven rescoring (v0.6.0) handles pushes on already-indexed repos. The crawler fills in repos we haven't seen yet and re-checks long-dormant ones.
## Scope guard
diff --git a/tasks/1.0.0/README.md b/tasks/1.0.0/README.md
index c92090d..2a01eff 100644
--- a/tasks/1.0.0/README.md
+++ b/tasks/1.0.0/README.md
@@ -4,7 +4,7 @@
The big-rebase release: storage that handles concurrent writers, a dataset that reflects the ecosystem rather than a curated sample, and per-model weights derived from measured agent success instead of illustrative guesses. Bundled because each item changes the product's foundations โ storage shape, dataset shape, weight shape โ and shipping them together draws a single "1.0" line that downstream API consumers can pin against. From 1.0.0 forward, breaking changes require a MAJOR bump.
-Postgres lands first (the others assume concurrent writes the SQLite file can't handle). Benchmark harness sequences last because it needs the full signal set (history-aware signals from 0.4.0) to regress against.
+Postgres lands first (the others assume concurrent writes the SQLite file can't handle). Benchmark harness sequences last because it needs the full signal set (history-aware signals from 0.5.0) to regress against.
## Tasks
diff --git a/tests/scorer.test.ts b/tests/scorer.test.ts
index 4c6107d..5591678 100644
--- a/tests/scorer.test.ts
+++ b/tests/scorer.test.ts
@@ -1,5 +1,6 @@
import { strict as assert } from "node:assert";
import { afterEach, describe, test } from "node:test";
+
import { scoreRepo, topImprovements } from "../lib/scoring/scorer";
import { SIGNALS } from "../lib/scoring/signals";
import type { SignalResult } from "../lib/scoring/signals/types";
@@ -19,11 +20,15 @@ function richFixtureFiles(): FixtureFiles {
"CONTRIBUTING.md": "contrib",
"AGENTS.md": "x".repeat(1500),
"README.md": "y".repeat(1500),
+ "GEMINI.md": "g".repeat(1500),
"docker-compose.yml": "services: {}",
".github/workflows/ci.yml": ciYaml(),
"src/index.ts": "export const x = 1;",
"tests/smoke.test.ts": "// placeholder",
+ ".aider.conf.yml": "test-cmd: bun run test",
"lefthook.yml": "pre-commit:\n commands: {}",
+ ".openhands/setup.sh": "#!/usr/bin/env bash\nbun install",
+ ".cursor/rules/style.mdc": "---\nname: style\n---\nUse Tailwind.",
"package.json": JSON.stringify({
name: "demo",
scripts: { dev: "d", build: "b", test: "t" },
diff --git a/tests/signals/aider-conf.test.ts b/tests/signals/aider-conf.test.ts
new file mode 100644
index 0000000..416f824
--- /dev/null
+++ b/tests/signals/aider-conf.test.ts
@@ -0,0 +1,39 @@
+import { strict as assert } from "node:assert";
+import { afterEach, describe, test } from "node:test";
+
+import { aiderConf } from "../../lib/scoring/signals/aider-conf";
+import { makeFixture, removeFixture } from "../_helpers";
+
+describe("aiderConf signal", () => {
+ let fixture = "";
+
+ afterEach(() => {
+ if (fixture) {
+ removeFixture(fixture);
+ fixture = "";
+ }
+ });
+
+ test("pass=0 when neither .aider.conf.yml nor .aider.conf.yaml exists", () => {
+ fixture = makeFixture({ "README.md": "irrelevant" });
+ assert.equal(aiderConf.check(fixture).pass, 0);
+ });
+
+ test("pass=1 for .aider.conf.yml", () => {
+ fixture = makeFixture({
+ ".aider.conf.yml": "test-cmd: bun run test\nlint-cmd: bun run lint",
+ });
+ const r = aiderConf.check(fixture);
+
+ assert.equal(r.pass, 1);
+ assert.match(r.matchedPath ?? "", /\.aider\.conf\.yml$/);
+ });
+
+ test("pass=1 for .aider.conf.yaml (alternate extension)", () => {
+ fixture = makeFixture({ ".aider.conf.yaml": "model: gpt-4" });
+ const r = aiderConf.check(fixture);
+
+ assert.equal(r.pass, 1);
+ assert.match(r.matchedPath ?? "", /\.aider\.conf\.yaml$/);
+ });
+});
diff --git a/tests/signals/cursor-rules.test.ts b/tests/signals/cursor-rules.test.ts
new file mode 100644
index 0000000..587ddc2
--- /dev/null
+++ b/tests/signals/cursor-rules.test.ts
@@ -0,0 +1,56 @@
+import { strict as assert } from "node:assert";
+import { afterEach, describe, test } from "node:test";
+
+import { cursorRules } from "../../lib/scoring/signals/cursor-rules";
+import { makeFixture, removeFixture } from "../_helpers";
+
+describe("cursorRules signal", () => {
+ let fixture = "";
+
+ afterEach(() => {
+ if (fixture) {
+ removeFixture(fixture);
+ fixture = "";
+ }
+ });
+
+ test("pass=0 when neither modern nor legacy file exists", () => {
+ fixture = makeFixture({ "README.md": "irrelevant" });
+ const r = cursorRules.check(fixture);
+
+ assert.equal(r.pass, 0);
+ assert.equal(r.matchedPath, undefined);
+ });
+
+ test("pass=1 when .cursor/rules/ contains at least one .mdc file", () => {
+ fixture = makeFixture({
+ ".cursor/rules/style.mdc": "---\nname: style\n---\nUse Tailwind.",
+ });
+ const r = cursorRules.check(fixture);
+
+ assert.equal(r.pass, 1);
+ assert.match(r.matchedPath ?? "", /\.cursor\/rules\/style\.mdc$/);
+ });
+
+ test("pass=0 when .cursor/rules/ exists but contains no .mdc files", () => {
+ fixture = makeFixture({ ".cursor/rules/notes.txt": "not an mdc" });
+ assert.equal(cursorRules.check(fixture).pass, 0);
+ });
+
+ test("pass=0.5 for legacy .cursorrules file", () => {
+ fixture = makeFixture({ ".cursorrules": "Use Tailwind. Prefer RSC." });
+ const r = cursorRules.check(fixture);
+
+ assert.equal(r.pass, 0.5);
+ assert.equal(r.matchedPath, ".cursorrules");
+ });
+
+ test("modern .cursor/rules/*.mdc takes precedence over legacy .cursorrules", () => {
+ fixture = makeFixture({
+ ".cursorrules": "legacy content",
+ ".cursor/rules/style.mdc": "---\nname: style\n---\nmodern",
+ });
+
+ assert.equal(cursorRules.check(fixture).pass, 1);
+ });
+});
diff --git a/tests/signals/gemini-md.test.ts b/tests/signals/gemini-md.test.ts
new file mode 100644
index 0000000..ba582f9
--- /dev/null
+++ b/tests/signals/gemini-md.test.ts
@@ -0,0 +1,49 @@
+import { strict as assert } from "node:assert";
+import { afterEach, describe, test } from "node:test";
+
+import { geminiMd } from "../../lib/scoring/signals/gemini-md";
+import { makeFixture, removeFixture } from "../_helpers";
+
+describe("geminiMd signal", () => {
+ let fixture = "";
+
+ afterEach(() => {
+ if (fixture) {
+ removeFixture(fixture);
+ fixture = "";
+ }
+ });
+
+ test("pass=0 when no GEMINI.md exists", () => {
+ fixture = makeFixture({ "README.md": "irrelevant" });
+ assert.equal(geminiMd.check(fixture).pass, 0);
+ });
+
+ test("pass=0.2 for empty GEMINI.md", () => {
+ fixture = makeFixture({ "GEMINI.md": "" });
+ assert.equal(geminiMd.check(fixture).pass, 0.2);
+ });
+
+ test("pass=0.5 for thin file (<200 chars)", () => {
+ fixture = makeFixture({ "GEMINI.md": "Short notes." });
+ assert.equal(geminiMd.check(fixture).pass, 0.5);
+ });
+
+ test("pass=0.8 for moderate file (200 โค len < 800)", () => {
+ fixture = makeFixture({ "GEMINI.md": "x".repeat(500) });
+ assert.equal(geminiMd.check(fixture).pass, 0.8);
+ });
+
+ test("pass=1 for substantive file (โฅ800 chars)", () => {
+ fixture = makeFixture({ "GEMINI.md": "x".repeat(1200) });
+ const r = geminiMd.check(fixture);
+
+ assert.equal(r.pass, 1);
+ assert.match(r.matchedPath ?? "", /GEMINI\.md$/i);
+ });
+
+ test("matches case-insensitively", () => {
+ fixture = makeFixture({ "Gemini.md": "x".repeat(1200) });
+ assert.equal(geminiMd.check(fixture).pass, 1);
+ });
+});
diff --git a/tests/signals/openhands-setup.test.ts b/tests/signals/openhands-setup.test.ts
new file mode 100644
index 0000000..8cab60c
--- /dev/null
+++ b/tests/signals/openhands-setup.test.ts
@@ -0,0 +1,36 @@
+import { strict as assert } from "node:assert";
+import { afterEach, describe, test } from "node:test";
+
+import { openhandsSetup } from "../../lib/scoring/signals/openhands-setup";
+import { makeFixture, removeFixture } from "../_helpers";
+
+describe("openhandsSetup signal", () => {
+ let fixture = "";
+
+ afterEach(() => {
+ if (fixture) {
+ removeFixture(fixture);
+ fixture = "";
+ }
+ });
+
+ test("pass=0 when .openhands/setup.sh is missing", () => {
+ fixture = makeFixture({ "README.md": "irrelevant" });
+ assert.equal(openhandsSetup.check(fixture).pass, 0);
+ });
+
+ test("pass=0.2 when setup.sh is empty", () => {
+ fixture = makeFixture({ ".openhands/setup.sh": "" });
+ assert.equal(openhandsSetup.check(fixture).pass, 0.2);
+ });
+
+ test("pass=1 when setup.sh has content", () => {
+ fixture = makeFixture({
+ ".openhands/setup.sh": "#!/usr/bin/env bash\nbun install\nbun run init-db",
+ });
+ const r = openhandsSetup.check(fixture);
+
+ assert.equal(r.pass, 1);
+ assert.match(r.matchedPath ?? "", /\.openhands\/setup\.sh$/);
+ });
+});