Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 10 additions & 6 deletions deepdive.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

/** Extract the first JSON object from a model response (tolerates code fences). */
export function extractJsonObject(rawText) {
let text = (rawText || '').trim().replace(/^```(?:json)?\s*/i, '').replace(/\s*```$/, '');

Check warning on line 22 in deepdive.js

View workflow job for this annotation

GitHub Actions / test

'text' is never reassigned. Use 'const' instead
const start = text.indexOf('{');
const end = text.lastIndexOf('}');
if (start === -1 || end === -1) throw new Error('No JSON object found in response');
Expand All @@ -28,8 +28,12 @@

// ─── Stage 0: fetch source (GitHub-first; others degrade to README only) ──────

async function ghJson(url) {
const r = await fetch(url, { headers: { Accept: 'application/vnd.github+json' } });
async function ghJson(url, opts = {}) {
const headers = { Accept: 'application/vnd.github+json' };
// Optional auth (MCP only; the extension passes no token). Lifts the 60/hr
// anon GitHub limit that blueprint/deep-dive would otherwise trip mid-scan.
if (opts.githubToken) headers.Authorization = `Bearer ${opts.githubToken}`;
const r = await fetch(url, { headers });
if (!r.ok) throw new Error(`GitHub ${r.status} for ${url}`);
return r.json();
}
Expand Down Expand Up @@ -58,13 +62,13 @@
* Returns { tree: string[], files: [{path, content}], degraded: boolean }.
* Only GitHub fetches real source; other platforms return a degraded result.
*/
export async function fetchSource(platform, repoId) {
export async function fetchSource(platform, repoId, opts = {}) {
if (platform !== 'github') return { tree: [], files: [], degraded: true };

const meta = await ghJson(`https://api.github.com/repos/${repoId}`);
const meta = await ghJson(`https://api.github.com/repos/${repoId}`, opts);
const branch = meta.default_branch || 'main';
const treeRes = await ghJson(
`https://api.github.com/repos/${repoId}/git/trees/${branch}?recursive=1`
`https://api.github.com/repos/${repoId}/git/trees/${branch}?recursive=1`, opts
);
const allPaths = (treeRes.tree || []).filter(e => e.type === 'blob').map(e => e.path);
const tree = allPaths.slice(0, MAX_TREE_PATHS);
Expand All @@ -73,7 +77,7 @@
const files = [];
for (const path of keyPaths) {
try {
const data = await ghJson(`https://api.github.com/repos/${repoId}/contents/${encodeURIComponent(path).replace(/%2F/g, '/')}`);
const data = await ghJson(`https://api.github.com/repos/${repoId}/contents/${encodeURIComponent(path).replace(/%2F/g, '/')}`, opts);
if (data.encoding === 'base64' && data.content) {
const content = atob(data.content.replace(/\n/g, '')).slice(0, MAX_FILE_CHARS);
files.push({ path, content });
Expand Down
24 changes: 16 additions & 8 deletions fetcher.js
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
async function fetchJson(url) {
const r = await fetch(url);
async function fetchJson(url, headers) {
const r = await fetch(url, headers ? { headers } : undefined);
if (!r.ok) throw new Error(`HTTP ${r.status} for ${url}`);
return r.json();
}

export async function fetchRepoData(platform, repoId) {
if (platform === 'github') return fetchGitHub(repoId);
// Authorization header for GitHub when an MCP caller supplies a token; null
// otherwise, so the anonymous path stays byte-for-byte identical (extension use).
function ghHeaders(opts) {
return opts && opts.githubToken ? { Authorization: `Bearer ${opts.githubToken}` } : null;
}

export async function fetchRepoData(platform, repoId, opts = {}) {
if (platform === 'github') return fetchGitHub(repoId, opts);
if (platform === 'gitlab') return fetchGitLab(repoId);
if (platform === 'npm') return fetchNpm(repoId);
if (platform === 'pypi') return fetchPyPI(repoId);
Expand All @@ -22,11 +28,13 @@ function bytesToComposition(langs) {
.map(([name, bytes]) => ({ name, pct: Math.round((bytes / total) * 100) }));
}

async function fetchGitHub(repoId) {
async function fetchGitHub(repoId, opts = {}) {
const headers = ghHeaders(opts);
const init = headers ? { headers } : undefined;
const [meta, readmeRes, langRes] = await Promise.all([
fetchJson(`https://api.github.com/repos/${repoId}`),
fetch(`https://api.github.com/repos/${repoId}/readme`).catch(() => ({ ok: false })),
fetch(`https://api.github.com/repos/${repoId}/languages`).catch(() => ({ ok: false })),
fetchJson(`https://api.github.com/repos/${repoId}`, headers),
fetch(`https://api.github.com/repos/${repoId}/readme`, init).catch(() => ({ ok: false })),
fetch(`https://api.github.com/repos/${repoId}/languages`, init).catch(() => ({ ok: false })),
]);
let readme = '';
if (readmeRes.ok) {
Expand Down
73 changes: 55 additions & 18 deletions mcp/README.md
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
# RepoLens MCP server

A local [MCP](https://modelcontextprotocol.io) server that exposes RepoLens's repo
analysis as a tool. An LLM client (Claude Desktop, Cursor, etc.) calls `scan_repo`
and gets RepoLens's verdict-first JSON back — ready to render as components.
analysis as tools. An LLM client (Claude Desktop, Cursor, etc.) calls a tool and
gets RepoLens's JSON back — ready to render as components.

This is a **thin proof**: one tool, GitHub-only, Anthropic-only. It reuses the
extension's own pipeline (`fetcher.js` → `prompt.js` → `parser.js`); only the
provider call is MCP-specific.
GitHub-only, Anthropic-only, **three tools** (`scan_repo`, `blueprint_scene`,
`deep_dive`). Each reuses the extension's own pipeline modules verbatim
(`fetcher.js`, `prompt.js`, `parser.js`, `deepdive.js`, `blueprint-adapter.js`);
only the provider call (`anthropic.js`) is MCP-specific.

## What stays true

Expand All @@ -28,7 +29,8 @@ provider call is MCP-specific.
"license": "MIT",
"stars": 21000,
"description": "Small, fast web framework for the edges.",
"fit": "strong",
"fit": { "level": "strong", "label": "Strong fit", "why": "Health 92 · 0 flags · 4 pros / 1 cons" },
"bottom_line": "A lean, fast framework worth adopting for edge runtimes.",
"health": { "score": 92 },
"pros": ["..."],
"cons": ["..."],
Expand All @@ -37,32 +39,65 @@ provider call is MCP-specific.
}
```

`fit` is derived deterministically from the health score, red-flag count, and
pros/cons balance — `level` is one of `strong | solid | care | risky`.

### `blueprint_scene({ repo })`

Maps how the repo is built and returns a laid-out graph — `nodes` (key parts) and
Maps how the repo is built and returns a laid-out scene — `nodes` (key parts) and
`edges` (how they relate), with positions — ready for a `<DependencyGraph>`-style
component. Heavier than `scan_repo`: it reads source and makes two model calls
(atoms, then lineage).
(atoms, then lineage). Edges are engine-shaped (`{ id, from, to, rel }`), not
`{ source, target }`.

```json
{
"id": "repo:...", "scope": "blueprint", "repoId": "honojs/hono",
"nodes": [{ "id": "app", "label": "Hono app", "kind": "entrypoint", "x": 120, "y": 40 }],
"edges": [{ "source": "app", "target": "router", "label": "depends-on" }],
"nodes": [{ "id": "app", "label": "Hono app", "kind": "entrypoint", "x": 120, "y": 40,
"layer": "entrypoint", "ref": { "root": true, "purpose": "...", "files": ["src/hono.ts"] } }],
"edges": [{ "id": "e123", "from": "app", "to": "router", "rel": "depends-on" }],
"camera": { "x": 0, "y": 0, "zoom": 1 }
}
```

### `deep_dive({ repo })`

Explains how the repo actually works in plain language, with the weak spots named.
Returns a from-scratch `explanation`, the `gaps` and `assumptions` behind it,
self-test `questions`, per-claim `confidence`, plus the underlying `atoms` and
`lineage`. **Heaviest tool** — reads source and makes three model calls
(atoms → lineage → Feynman).

```json
{
"repoId": "honojs/hono",
"degraded": false,
"explanation": "Hono is a small web framework that ...",
"gaps": ["..."],
"assumptions": ["..."],
"questions": [{ "q": "What runs a request?", "a": "..." }],
"confidence": [{ "claim": "...", "level": "high", "note": "..." }],
"atoms": [{ "id": "router", "name": "Router", "kind": "subsystem", "purpose": "..." }],
"lineage": { "links": [{ "from": "app", "to": "router", "relation": "depends-on" }], "roots": ["app"], "leaves": [] }
}
```

## Setup

```bash
cd mcp
npm install
export ANTHROPIC_API_KEY=sk-ant-... # required
export ANTHROPIC_MODEL=claude-sonnet-4-6 # optional override
node server.js # speaks MCP over stdio
export ANTHROPIC_API_KEY=sk-ant-... # required
export ANTHROPIC_MODEL=claude-sonnet-4-6 # optional override
export ANTHROPIC_TIMEOUT_MS=60000 # optional; hard per-call timeout (default 60s)
export GITHUB_TOKEN=ghp_... # optional; lifts GitHub 60/hr → 5000/hr
node server.js # speaks MCP over stdio
```

A `GITHUB_TOKEN` is **strongly recommended** for `blueprint_scene` and `deep_dive`:
each makes 10+ GitHub calls per run and will hit the 60 req/hr anonymous limit
(surfacing as a mid-scan `GitHub 403`) without one.

### Add to Claude Desktop

In `claude_desktop_config.json`:
Expand All @@ -73,15 +108,17 @@ In `claude_desktop_config.json`:
"repolens": {
"command": "node",
"args": ["/absolute/path/to/repolens/mcp/server.js"],
"env": { "ANTHROPIC_API_KEY": "sk-ant-..." }
"env": { "ANTHROPIC_API_KEY": "sk-ant-...", "GITHUB_TOKEN": "ghp_..." }
}
}
}
```

## Notes

- Unauthenticated GitHub requests are rate-limited. For heavier use, a `GITHUB_TOKEN`
pass-through is a follow-up.
- Next (follow-ups): `deep_dive` (the plain-English layer), multi-provider, and
npm / PyPI / GitLab support; a `GITHUB_TOKEN` pass-through for higher rate limits.
- `deep_dive` and `blueprint_scene` are GitHub-deep but README-shallow elsewhere:
only GitHub exposes a file tree, so on other platforms they degrade (`deep_dive`
sets `degraded: true`).
- Next (follow-ups): multi-provider (reuse the extension's `providers.js` registry),
npm / PyPI / GitLab inputs for `scan_repo` (the fetcher already supports them —
only the input parser is GitHub-only), and a `tools/list` structural smoke test.
43 changes: 30 additions & 13 deletions mcp/anthropic.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

const ENDPOINT = 'https://api.anthropic.com/v1/messages';
const DEFAULT_MODEL = 'claude-sonnet-4-6';
const DEFAULT_TIMEOUT_MS = 60_000;

/**
* @param {string} prompt - the fully-assembled analysis prompt.
Expand All @@ -14,20 +15,36 @@ export async function callAnthropic(prompt) {
const key = process.env.ANTHROPIC_API_KEY;
if (!key) throw new Error('ANTHROPIC_API_KEY is not set in the environment');
const model = process.env.ANTHROPIC_MODEL || DEFAULT_MODEL;
const timeoutMs = Number(process.env.ANTHROPIC_TIMEOUT_MS) || DEFAULT_TIMEOUT_MS;

const res = await fetch(ENDPOINT, {
method: 'POST',
headers: {
'content-type': 'application/json',
'anthropic-version': '2023-06-01',
'x-api-key': key,
},
body: JSON.stringify({
model,
max_tokens: 4096,
messages: [{ role: 'user', content: prompt }],
}),
});
// Hard timeout so a stalled connection can't hang the tool call forever
// (mirrors the extension's per-provider timeout in background.js).
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
let res;
try {
res = await fetch(ENDPOINT, {
method: 'POST',
headers: {
'content-type': 'application/json',
'anthropic-version': '2023-06-01',
'x-api-key': key,
},
body: JSON.stringify({
model,
max_tokens: 4096,
messages: [{ role: 'user', content: prompt }],
}),
signal: controller.signal,
});
} catch (err) {
if (err && err.name === 'AbortError') {
throw new Error(`Anthropic request timed out after ${timeoutMs}ms`);
}
throw err;
} finally {
clearTimeout(timer);
}

if (!res.ok) {
const detail = await res.text().catch(() => '');
Expand Down
41 changes: 33 additions & 8 deletions mcp/blueprint-scene.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import { fetchSource, buildAtomsPrompt, parseAtoms, buildLineagePrompt, parseLin
import { buildBlueprintScene } from '../blueprint-adapter.js';
import { parseRepoInput } from './repo-input.js';
import { callAnthropic } from './anthropic.js';
import { ghOpts } from './github-auth.js';

export const BLUEPRINT_TOOL = {
name: 'blueprint_scene',
Expand All @@ -25,6 +26,8 @@ export const BLUEPRINT_TOOL = {
required: ['repo'],
additionalProperties: false,
},
// Mirrors the real scene object returned by buildBlueprintScene (scene.js +
// repair-graph.js): engine-shaped nodes/edges, not a {source,target} graph.
outputSchema: {
type: 'object',
properties: {
Expand All @@ -39,33 +42,55 @@ export const BLUEPRINT_TOOL = {
properties: {
id: { type: 'string' },
label: { type: 'string' },
kind: { type: 'string' },
kind: { type: 'string', description: 'subsystem|module|concept|entrypoint|data' },
layer: { type: ['string', 'null'] },
x: { type: 'number' },
y: { type: 'number' },
pinned: { type: 'boolean' },
ref: {
type: 'object',
description: 'root = lineage root (load-bearing); plus purpose + files',
properties: {
root: { type: 'boolean' },
purpose: { type: ['string', 'null'] },
files: { type: 'array', items: { type: 'string' } },
},
},
},
required: ['id', 'label', 'kind', 'x', 'y'],
},
},
edges: {
type: 'array',
items: {
type: 'object',
properties: {
source: { type: 'string' },
target: { type: 'string' },
label: { type: 'string' },
id: { type: 'string' },
from: { type: 'string' },
to: { type: 'string' },
rel: { type: 'string', description: 'depends-on|enables|triggers|derives-from' },
note: { type: ['string', 'null'] },
userDrawn: { type: 'boolean' },
},
required: ['id', 'from', 'to', 'rel'],
},
},
camera: { type: 'object' },
annotations: { type: 'array' },
camera: {
type: 'object',
properties: { x: { type: 'number' }, y: { type: 'number' }, zoom: { type: 'number' } },
},
source: { type: 'object', description: 'lens + timestamps' },
},
required: ['nodes', 'edges'],
required: ['id', 'nodes', 'edges'],
},
};

export async function runBlueprintScene(args) {
const { platform, repoId } = parseRepoInput(args?.repo);
const repoData = await fetchRepoData(platform, repoId);
const source = await fetchSource(platform, repoId);
const opts = ghOpts();
const repoData = await fetchRepoData(platform, repoId, opts);
const source = await fetchSource(platform, repoId, opts);
const { atoms } = parseAtoms(await callAnthropic(buildAtomsPrompt(repoData, source, null)));
const lineage = parseLineage(await callAnthropic(buildLineagePrompt(atoms)));
return buildBlueprintScene({ deepDive: { atoms, lineage }, repoId, title: repoId });
Expand Down
Loading
Loading