From 5316bbdc3ebb76629d9972045bef34932b8272f7 Mon Sep 17 00:00:00 2001 From: George Ng Date: Wed, 8 Apr 2026 13:33:34 -0700 Subject: [PATCH 1/5] Add initial config persistence design to agentServerSessions.md --- ts/docs/architecture/agentServerSessions.md | 118 +++++++++++++++++++- 1 file changed, 113 insertions(+), 5 deletions(-) diff --git a/ts/docs/architecture/agentServerSessions.md b/ts/docs/architecture/agentServerSessions.md index 3cd04a822..e4bdae9c7 100644 --- a/ts/docs/architecture/agentServerSessions.md +++ b/ts/docs/architecture/agentServerSessions.md @@ -41,6 +41,33 @@ The dispatcher already has the scaffolding for session persistence: However, this is **transparent to clients**: there is no protocol-level API to list, choose, or delete sessions. The server always resumes whatever was last active. +### Instance Storage vs. Session Storage + +The dispatcher exposes two storage scopes to agents via `SessionContext`: + +- **`instanceStorage`** — scoped to `persistDir` (the instance root). Intended for configuration and data that should **survive across dispatcher sessions** (e.g. agent auth tokens, user preferences, learned config). Agents write here and expect to read it back regardless of which session the user is in. +- **`sessionStorage`** — scoped to `persistDir/sessions//`. Intended for ephemeral, session-local data (e.g. caches, in-progress state) that is discarded when the user creates a new session. + +In `sessionContext.ts`, the mapping is explicit: + +```typescript +const storage = storageProvider.getStorage(name, sessionDirPath); // sessionStorage +const instanceStorage = storageProvider.getStorage(name, context.persistDir); // instanceStorage +``` + +This contract — `instanceStorage` survives, `sessionStorage` is ephemeral — holds today in both the standalone Shell and the CLI. + +### The Problem with Scoping `persistDir` per Server Session + +Naively scoping each server-session's `persistDir` to `server-sessions//` breaks this contract: + +``` +server-sessions// ← persistDir → instanceStorage root +server-sessions//sessions// ← sessionStorage +``` + +**Every time a new server session is created, both `instanceStorage` and `sessionStorage` start fresh.** Agent configuration data (auth tokens, user preferences, learned state) is silently discarded whenever the user connects to a new server session. The fix is a split storage root described in Section 4. + ### One Shared Context for All Clients A critical detail: `createSharedDispatcher()` calls `initializeCommandHandlerContext()` **once** at startup, producing a single `context`. Every subsequent `join()` call creates a `Dispatcher` via `createDispatcherFromContext(context, connectionId, ...)` — all clients share the same underlying session context. Chat history, conversation memory, and session config are fully shared state. The `connectionId` only isolates `ClientIO` routing (display output reaches the right client), not the conversation itself. @@ -77,7 +104,7 @@ Each session is identified by: ### 2. Session Metadata -A `sessions.json` file lives at `persistDir/server-sessions/sessions.json` and is the authoritative registry: +A `sessions.json` file lives at `instanceDir/server-sessions/sessions.json` and is the authoritative registry: ```json { @@ -91,7 +118,7 @@ A `sessions.json` file lives at `persistDir/server-sessions/sessions.json` and i } ``` -Each session's full data (chat history, conversation memory, display log) is stored in `persistDir/server-sessions//` — the same layout that exists today, but keyed on UUID. +Each session's ephemeral data (chat history, conversation memory, display log, session config) is stored in `instanceDir/server-sessions//`. Agent `instanceStorage` (config, auth tokens, learned state) is stored directly under `instanceDir//`, **shared across all server sessions**. > **Note:** `clientCount` is a runtime-only field — it is **never written to `sessions.json`**. It is populated at query time by inspecting the live dispatcher pool. @@ -166,7 +193,85 @@ AgentServer └── SharedDispatcher ← client 2 (connected to session B) ``` -Each session's `SharedDispatcher` is created lazily on first `joinSession()` and calls `initializeCommandHandlerContext()` with a `persistDir` scoped to `server-sessions//`, giving it fully isolated chat history, conversation memory, display log, and session config. Clients connecting to the same session share one dispatcher instance and its routing `ClientIO` table, consistent with how the current single dispatcher works today. +#### Storage Split: `instanceDir` vs. `persistDir` + +To preserve the `instanceStorage` / `sessionStorage` contract across server sessions, the dispatcher must be initialized with **two distinct root directories** rather than one: + +| Directory | Purpose | Lifetime | +| ------------- | --------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | +| `instanceDir` | Global instance root — maps to `instanceStorage` for all agents. Contains agent config, auth tokens, user preferences, embedding cache. | Lives for the lifetime of the agentServer process (or the user profile). Never scoped per server session. | +| `persistDir` | Per-server-session root — maps to `sessionStorage` and holds chat history, conversation memory, display log, and session config. | Scoped to `instanceDir/server-sessions//`. Discarded with the session. | + +**Concrete paths:** + +``` +~/.typeagent/profiles/dev/ ← instanceDir (global) +~/.typeagent/profiles/dev/server-sessions// ← persistDir (per session) +~/.typeagent/profiles/dev/server-sessions//sessions// ← sessionStorage +~/.typeagent/profiles/dev// ← instanceStorage (global) +``` + +#### `DispatcherOptions` changes + +`initializeCommandHandlerContext()` today accepts a single `persistDir`. To support the split, a new optional `instanceDir` field is added: + +```typescript +type DispatcherOptions = { + // ...existing fields... + persistDir?: string; // per-server-session directory (chat history, memory, config) + instanceDir?: string; // global instance directory for cross-session agent storage + // ... +}; +``` + +When `instanceDir` is provided, `instanceStorage` is rooted there instead of at `persistDir`. When `instanceDir` is omitted (standalone Shell, CLI, tests), behavior is unchanged — `instanceStorage` falls back to `persistDir`, preserving full backward compatibility. + +#### `SessionContext` wiring + +In `sessionContext.ts`, the `instanceStorage` base changes from `context.persistDir` to the new `context.instanceDir` (falling back to `context.persistDir` when `instanceDir` is absent): + +```typescript +const instanceStorage = + (context.instanceDir ?? context.persistDir) + ? storageProvider!.getStorage( + name, + context.instanceDir ?? context.persistDir!, + ) + : undefined; +``` + +This is the only change needed in the storage wiring — no changes to the `Storage` interface or agent code. + +#### Server initialization + +When the agentServer starts up, it resolves both directories once and passes them to every per-session dispatcher: + +```typescript +const instanceDir = getProfilePath("dev"); // e.g. ~/.typeagent/profiles/dev +const persistDir = path.join(instanceDir, "server-sessions", sessionId); // per-session subdirectory + +initializeCommandHandlerContext("agentServer", { + instanceDir, // global — never changes between sessions + persistDir, // scoped to this server session + persistSession: true, + // ... +}); +``` + +#### `CommandHandlerContext` changes + +A new `instanceDir` field is added alongside the existing `persistDir`: + +```typescript +export type CommandHandlerContext = { + // ...existing fields... + readonly persistDir: string | undefined; // per-server-session root (chat, memory, config) + readonly instanceDir: string | undefined; // global instance root (agent config, auth tokens) + // ... +}; +``` + +Each session's `SharedDispatcher` is created lazily on first `joinSession()` and calls `initializeCommandHandlerContext()` with a `persistDir` scoped to `server-sessions//` and a shared `instanceDir`, giving it fully isolated chat history and session config while preserving agent configuration across session boundaries. Clients connecting to the same session share one dispatcher instance and its routing `ClientIO` table, consistent with how the current single dispatcher works today. `SharedDispatcher.join()` calls `createDispatcherFromContext(context, connectionId, ...)` per client — producing a lightweight `Dispatcher` handle bound to a unique `connectionId` but sharing the same underlying context. Output routing is per-client via `connectionId`; conversation state is shared across all clients in the session. @@ -185,12 +290,14 @@ Each session uses namespaced WebSocket channels to allow multiple sessions over Client calls joinSession({ sessionId?, clientType, filter }) │ ├─ sessionId provided? - │ ├─ Yes → look up sessions.json + │ ├─ Yes → look up instanceDir/server-sessions/sessions.json │ │ ├─ Found → load SharedDispatcher for this session (lazy init if not in memory pool) │ │ └─ Not found → return error: "Session not found" │ └─ No → connect to the default session │ ├─ Session named "default" exists → use it │ └─ No sessions exist → auto-create session named "default" + │ ├─ Create instanceDir/server-sessions// ← persistDir + │ └─ Init dispatcher with instanceDir (global) + persistDir (session-scoped) │ ├─ Register client in session's SharedDispatcher routing table └─ Return JoinSessionResult { connectionId, sessionId } @@ -224,7 +331,7 @@ SessionInfo[] 1. Close all active client dispatcher handles for the session. 2. Shut down and evict the session's `SharedDispatcher` from the in-memory pool. -3. Remove `persistDir/server-sessions//` from disk (recursive delete, best-effort). +3. Remove `instanceDir/server-sessions//` from disk (recursive delete of the `persistDir` subtree only, best-effort). **Agent `instanceStorage` under `instanceDir//` is not touched.** 4. Remove the entry from `sessions.json`. > **Note:** Any connected client can call `deleteSession` on any session, including sessions they are not currently connected to. The calling client's session-namespaced channels are cleaned up immediately; other clients connected to the deleted session have their dispatcher handles closed when `SharedDispatcher.close()` is called. Server-side authorization is out of scope for v1 (see Open Questions). @@ -314,6 +421,7 @@ This design adds explicit session management to the agentServer without fundamen - `listSessions(name?)` with optional substring filtering as the primary session discovery mechanism. - Session-namespaced WebSocket channels (`dispatcher:`, `clientio:`) enabling multiple concurrent sessions over a single connection. - Idle dispatcher eviction after 5 minutes to free memory for inactive sessions. +- **A split storage root**: `instanceDir` (global, shared across all server sessions) and `persistDir` (per-server-session, discarded with the session). `instanceStorage` is rooted at `instanceDir`, preserving agent configuration and auth tokens across session boundaries. `sessionStorage` and all ephemeral dispatcher data (chat history, memory, display log) remain scoped to `persistDir`. A new `instanceDir` field is added to `DispatcherOptions` and `CommandHandlerContext`; when absent, behavior falls back to `persistDir` for full backward compatibility with the standalone Shell, CLI, and tests. The server enforces no policy on who can join or delete a session — `clientCount` gives clients the signal to make that decision themselves. From 7691a5d7f273443a0cc27ba220d53403d291f33b Mon Sep 17 00:00:00 2001 From: George Ng Date: Wed, 8 Apr 2026 14:56:46 -0700 Subject: [PATCH 2/5] Add excalidraw agent for converting documents to Excalidraw diagrams Implements a new TypeAgent agent (excalidraw-agent) that uses an LLM to convert source materials (markdown, plain text, Visio XML, Mermaid, and architecture descriptions) into valid Excalidraw JSON diagrams. Includes createDiagram and exportDiagram actions, with output defaulting to the user's Documents folder. Co-Authored-By: Claude Sonnet 4.6 --- ts/packages/agents/excalidraw/package.json | 57 +++ .../excalidraw/src/excalidrawActionHandler.ts | 374 ++++++++++++++++++ .../excalidraw/src/excalidrawActionSchema.ts | 41 ++ .../excalidraw/src/excalidrawManifest.json | 10 + .../agents/excalidraw/src/tsconfig.json | 12 + ts/packages/agents/excalidraw/tsconfig.json | 11 + .../data/config.agent.json | 4 + .../defaultAgentProvider/data/config.all.json | 4 + .../defaultAgentProvider/data/config.json | 4 + ts/packages/defaultAgentProvider/package.json | 1 + ts/pnpm-lock.yaml | 34 ++ 11 files changed, 552 insertions(+) create mode 100644 ts/packages/agents/excalidraw/package.json create mode 100644 ts/packages/agents/excalidraw/src/excalidrawActionHandler.ts create mode 100644 ts/packages/agents/excalidraw/src/excalidrawActionSchema.ts create mode 100644 ts/packages/agents/excalidraw/src/excalidrawManifest.json create mode 100644 ts/packages/agents/excalidraw/src/tsconfig.json create mode 100644 ts/packages/agents/excalidraw/tsconfig.json diff --git a/ts/packages/agents/excalidraw/package.json b/ts/packages/agents/excalidraw/package.json new file mode 100644 index 000000000..99d6cee4f --- /dev/null +++ b/ts/packages/agents/excalidraw/package.json @@ -0,0 +1,57 @@ +{ + "name": "excalidraw-agent", + "version": "0.0.1", + "private": true, + "description": "Excalidraw diagram generation agent - converts documents and descriptions into Excalidraw diagrams", + "homepage": "https://github.com/microsoft/TypeAgent#readme", + "repository": { + "type": "git", + "url": "https://github.com/microsoft/TypeAgent.git", + "directory": "ts/packages/agents/excalidraw" + }, + "license": "MIT", + "author": "Microsoft", + "type": "module", + "exports": { + "./agent/manifest": "./src/excalidrawManifest.json", + "./agent/handlers": "./dist/excalidrawActionHandler.js" + }, + "scripts": { + "asc": "asc -i ./src/excalidrawActionSchema.ts -o ./dist/excalidrawSchema.pas.json -t ExcalidrawAction", + "build": "concurrently npm:tsc npm:asc", + "clean": "rimraf --glob dist *.tsbuildinfo *.done.build.log", + "prettier": "prettier --check . --ignore-path ../../../.prettierignore", + "prettier:fix": "prettier --write . --ignore-path ../../../.prettierignore", + "tsc": "tsc -b" + }, + "dependencies": { + "@typeagent/agent-sdk": "workspace:*", + "aiclient": "workspace:*", + "telemetry": "workspace:*", + "typechat-utils": "workspace:*" + }, + "devDependencies": { + "@typeagent/action-schema-compiler": "workspace:*", + "concurrently": "^9.1.2", + "prettier": "^3.5.3", + "rimraf": "^6.0.1", + "typescript": "~5.4.5" + }, + "fluidBuild": { + "tasks": { + "asc": { + "dependsOn": [ + "@typeagent/action-schema-compiler#tsc" + ], + "files": { + "inputGlobs": [ + "src/excalidrawActionSchema.ts" + ], + "outputGlobs": [ + "dist/excalidrawSchema.pas.json" + ] + } + } + } + } +} diff --git a/ts/packages/agents/excalidraw/src/excalidrawActionHandler.ts b/ts/packages/agents/excalidraw/src/excalidrawActionHandler.ts new file mode 100644 index 000000000..b800a1040 --- /dev/null +++ b/ts/packages/agents/excalidraw/src/excalidrawActionHandler.ts @@ -0,0 +1,374 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +import { + ActionContext, + AppAction, + AppAgent, + ActionResult, +} from "@typeagent/agent-sdk"; +import { + createActionResult, + createActionResultFromTextDisplay, + createActionResultFromError, +} from "@typeagent/agent-sdk/helpers/action"; +import { openai } from "aiclient"; +import { + CreateDiagramAction, + ExcalidrawAction, + ExportDiagramAction, +} from "./excalidrawActionSchema.js"; + +import fs from "node:fs"; +import path from "node:path"; +import os from "node:os"; + +export function instantiate(): AppAgent { + return { + executeAction: executeExcalidrawAction, + }; +} + +type ExcalidrawActionContext = { + store: undefined; +}; + +async function executeExcalidrawAction( + action: AppAction, + context: ActionContext, +): Promise { + return handleExcalidrawAction(action as ExcalidrawAction, context); +} + +async function handleExcalidrawAction( + action: ExcalidrawAction, + context: ActionContext, +): Promise { + switch (action.actionName) { + case "createDiagram": + return handleCreateDiagram(action as CreateDiagramAction, context); + case "exportDiagram": + return handleExportDiagram(action as ExportDiagramAction); + default: + throw new Error(`Unknown action: ${(action as any).actionName}`); + } +} + +function getDefaultOutputDir(): string { + // Use Documents folder, works on Windows (%USERPROFILE%\Documents) and Unix (~Documents) + const documentsDir = path.join(os.homedir(), "Documents"); + if (!fs.existsSync(documentsDir)) { + fs.mkdirSync(documentsDir, { recursive: true }); + } + return documentsDir; +} + +function sanitizeFilename(name: string): string { + return name + .replace(/[<>:"/\\|?*]/g, "_") + .replace(/\s+/g, "_") + .substring(0, 100); +} + +function resolveOutputPath( + outputPath: string | undefined, + diagramTitle: string | undefined, +): string { + if (outputPath) { + // Ensure it has .excalidraw extension + if (!outputPath.endsWith(".excalidraw")) { + outputPath += ".excalidraw"; + } + return path.resolve(outputPath); + } + + const title = diagramTitle ?? "diagram"; + const filename = `${sanitizeFilename(title)}_${Date.now()}.excalidraw`; + return path.join(getDefaultOutputDir(), filename); +} + +function buildSystemPrompt(sourceType: string): string { + return `You are an expert diagram generator. Your task is to convert the provided content into a valid Excalidraw JSON diagram. + +OUTPUT FORMAT: +You MUST output ONLY valid JSON in Excalidraw format. Do NOT include any markdown code fences, explanations, or text outside the JSON. + +The JSON must have this top-level structure: +{ + "type": "excalidraw", + "version": 2, + "source": "typeagent-excalidraw", + "elements": [...], + "appState": { + "gridSize": null, + "viewBackgroundColor": "#ffffff" + }, + "files": {} +} + +ELEMENT TYPES you can use: +1. "rectangle" - for boxes/containers/nodes +2. "ellipse" - for circular/oval nodes +3. "diamond" - for decision points +4. "text" - for labels (can be standalone or bound to shapes) +5. "arrow" - for connections/relationships between elements +6. "line" - for non-directional connections + +ELEMENT STRUCTURE (required fields for each element): +{ + "id": "", + "type": "", + "x": , + "y": , + "width": , + "height": , + "angle": 0, + "strokeColor": "#1e1e1e", + "backgroundColor": "", + "fillStyle": "solid", + "strokeWidth": 2, + "strokeStyle": "solid", + "roughness": 1, + "opacity": 100, + "seed": , + "version": 1, + "versionNonce": , + "isDeleted": false, + "boundElements": null, + "updated": 1, + "link": null, + "locked": false, + "groupIds": [], + "frameId": null, + "roundness": { "type": 3 } +} + +For TEXT elements, also include: + "text": "", + "fontSize": 20, + "fontFamily": 1, + "textAlign": "center", + "verticalAlign": "middle", + "baseline": 18, + "containerId": null (or the id of the shape it's bound to) + +For ARROW elements, also include: + "points": [[0, 0], [, ]], + "startBinding": { "elementId": "", "focus": 0, "gap": 8 }, + "endBinding": { "elementId": "", "focus": 0, "gap": 8 }, + "startArrowhead": null, + "endArrowhead": "arrow" + +To bind text to a shape: +- On the shape element, set "boundElements": [{"id": "", "type": "text"}] +- On the text element, set "containerId": "" + +LAYOUT GUIDELINES: +- Space elements at least 200px apart horizontally and 150px vertically +- Use a left-to-right or top-to-bottom flow layout +- Group related items visually +- Use colors to distinguish different categories: + - "#a5d8ff" (light blue) for primary components + - "#b2f2bb" (light green) for data/storage + - "#ffd8a8" (light orange) for external services + - "#d0bfff" (light purple) for processing/logic + - "#ffc9c9" (light red) for errors/alerts + - "#fff3bf" (light yellow) for notes/annotations + +SOURCE CONTENT TYPE: ${sourceType} +- If "markdown": Parse headings as major nodes, bullet points as sub-nodes, and create hierarchy +- If "text": Identify key concepts, entities, and relationships to create a concept diagram +- If "visio-xml": Parse the XML structure to recreate the diagram layout with shapes and connectors +- If "mermaid": Parse Mermaid syntax (flowchart, sequence, etc.) and convert to Excalidraw elements +- If "architecture": Create an architecture diagram with components, layers, and data flow arrows + +Generate a clean, well-organized diagram that visually represents the content. Every shape that has a label MUST have a bound text element.`; +} + +async function handleCreateDiagram( + action: CreateDiagramAction, + context: ActionContext, +): Promise { + const { sourceContent, sourceType, diagramTitle, outputPath } = + action.parameters; + + // Show loading indicator + context.actionIO.setDisplay({ + type: "html", + content: ` +
+
+
Generating Excalidraw diagram...
+
`, + }); + + try { + const chatModel = openai.createJsonChatModel(); + const systemPrompt = buildSystemPrompt(sourceType); + const userPrompt = `Convert the following ${sourceType} content into an Excalidraw diagram:\n\n${sourceContent}`; + + const response = await chatModel.complete([ + { role: "system", content: systemPrompt }, + { role: "user", content: userPrompt }, + ]); + + // Clear loading display + context.actionIO.setDisplay({ + type: "html", + content: "", + }); + + if (!response.success) { + return createActionResultFromError( + `Failed to generate diagram: ${response.message}`, + ); + } + + // Parse and validate the Excalidraw JSON + let excalidrawData: ExcalidrawDocument; + try { + excalidrawData = JSON.parse(response.data); + } catch { + return createActionResultFromError( + "The AI returned invalid JSON. Please try again with clearer input.", + ); + } + + // Ensure required top-level fields + if (!excalidrawData.type) { + excalidrawData.type = "excalidraw"; + } + if (!excalidrawData.version) { + excalidrawData.version = 2; + } + if (!excalidrawData.source) { + excalidrawData.source = "typeagent-excalidraw"; + } + if (!excalidrawData.elements) { + excalidrawData.elements = []; + } + if (!excalidrawData.appState) { + excalidrawData.appState = { + gridSize: null, + viewBackgroundColor: "#ffffff", + }; + } + if (!excalidrawData.files) { + excalidrawData.files = {}; + } + + // Write the file + const resolvedPath = resolveOutputPath(outputPath, diagramTitle); + const outputDir = path.dirname(resolvedPath); + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); + } + + const jsonOutput = JSON.stringify(excalidrawData, null, 2); + fs.writeFileSync(resolvedPath, jsonOutput, "utf-8"); + + const elementCount = excalidrawData.elements.length; + const displayTitle = diagramTitle ?? "Excalidraw diagram"; + + const result = createActionResultFromTextDisplay( + `📐 ${displayTitle} created successfully!\n\n` + + `📁 Saved to: ${resolvedPath}\n` + + `📊 Elements: ${elementCount}\n` + + `📄 Source type: ${sourceType}\n\n` + + `Open the .excalidraw file in Excalidraw (https://excalidraw.com) to view and edit.`, + `Created Excalidraw diagram "${displayTitle}" with ${elementCount} elements at ${resolvedPath}`, + ); + + result.entities.push({ + name: path.basename(resolvedPath), + type: ["file", "diagram", "excalidraw"], + }); + + return result; + } catch (error) { + // Clear loading display on error + context.actionIO.setDisplay({ + type: "html", + content: "", + }); + + const errorMessage = + error instanceof Error ? error.message : String(error); + return createActionResultFromError( + `Failed to create diagram: ${errorMessage}`, + ); + } +} + +function handleExportDiagram(action: ExportDiagramAction): ActionResult { + const { excalidrawJson, outputPath } = action.parameters; + + try { + // Validate JSON + let parsed: ExcalidrawDocument; + try { + parsed = JSON.parse(excalidrawJson); + } catch { + return createActionResultFromError( + "The provided Excalidraw JSON is not valid JSON.", + ); + } + + // Ensure required top-level fields + if (!parsed.type) { + parsed.type = "excalidraw"; + } + if (!parsed.version) { + parsed.version = 2; + } + + const resolvedPath = resolveOutputPath(outputPath, undefined); + const outputDir = path.dirname(resolvedPath); + if (!fs.existsSync(outputDir)) { + fs.mkdirSync(outputDir, { recursive: true }); + } + + const jsonOutput = JSON.stringify(parsed, null, 2); + fs.writeFileSync(resolvedPath, jsonOutput, "utf-8"); + + const result = createActionResult( + `Excalidraw diagram exported to: ${resolvedPath}`, + ); + + result.entities.push({ + name: path.basename(resolvedPath), + type: ["file", "diagram", "excalidraw"], + }); + + return result; + } catch (error) { + const errorMessage = + error instanceof Error ? error.message : String(error); + return createActionResultFromError( + `Failed to export diagram: ${errorMessage}`, + ); + } +} + +// Type definition for the Excalidraw document format +interface ExcalidrawDocument { + type?: string; + version?: number; + source?: string; + elements: ExcalidrawElement[]; + appState?: { + gridSize: number | null; + viewBackgroundColor: string; + }; + files?: Record; +} + +interface ExcalidrawElement { + id: string; + type: string; + x: number; + y: number; + width: number; + height: number; + [key: string]: unknown; +} diff --git a/ts/packages/agents/excalidraw/src/excalidrawActionSchema.ts b/ts/packages/agents/excalidraw/src/excalidrawActionSchema.ts new file mode 100644 index 000000000..c206d87fe --- /dev/null +++ b/ts/packages/agents/excalidraw/src/excalidrawActionSchema.ts @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft Corporation. +// Licensed under the MIT License. + +export type ExcalidrawAction = CreateDiagramAction | ExportDiagramAction; + +// Creates an Excalidraw diagram from source content such as documentation, +// architecture descriptions, Visio XML exports, Mermaid markdown, or plain text. +// The agent uses AI to interpret the content and generate a visual diagram. +export type CreateDiagramAction = { + actionName: "createDiagram"; + parameters: { + // the original request from the user + originalRequest: string; + // the source content to convert into a diagram (text, markdown, Visio XML, etc.) + sourceContent: string; + // the type of the source content + sourceType: + | "markdown" + | "text" + | "visio-xml" + | "mermaid" + | "architecture"; + // optional title for the diagram + diagramTitle?: string; + // optional output file path; defaults to ~/Documents/.excalidraw + outputPath?: string; + }; +}; + +// Exports a previously generated or provided Excalidraw JSON to a file at the specified path. +export type ExportDiagramAction = { + actionName: "exportDiagram"; + parameters: { + // the original request from the user + originalRequest: string; + // the Excalidraw JSON content to export (as a string) + excalidrawJson: string; + // the output file path for the .excalidraw file + outputPath: string; + }; +}; diff --git a/ts/packages/agents/excalidraw/src/excalidrawManifest.json b/ts/packages/agents/excalidraw/src/excalidrawManifest.json new file mode 100644 index 000000000..a22268193 --- /dev/null +++ b/ts/packages/agents/excalidraw/src/excalidrawManifest.json @@ -0,0 +1,10 @@ +{ + "emojiChar": "📐", + "description": "Agent to create Excalidraw diagrams from documents, descriptions, and structured content", + "schema": { + "description": "Excalidraw agent that generates diagrams from text, markdown, Visio XML, Mermaid, and architecture descriptions using AI.", + "originalSchemaFile": "./excalidrawActionSchema.ts", + "schemaFile": "../dist/excalidrawSchema.pas.json", + "schemaType": "ExcalidrawAction" + } +} diff --git a/ts/packages/agents/excalidraw/src/tsconfig.json b/ts/packages/agents/excalidraw/src/tsconfig.json new file mode 100644 index 000000000..85efcd566 --- /dev/null +++ b/ts/packages/agents/excalidraw/src/tsconfig.json @@ -0,0 +1,12 @@ +{ + "extends": "../../../../tsconfig.base.json", + "compilerOptions": { + "composite": true, + "rootDir": ".", + "outDir": "../dist" + }, + "include": ["./**/*"], + "ts-node": { + "esm": true + } +} diff --git a/ts/packages/agents/excalidraw/tsconfig.json b/ts/packages/agents/excalidraw/tsconfig.json new file mode 100644 index 000000000..acb9cb4a9 --- /dev/null +++ b/ts/packages/agents/excalidraw/tsconfig.json @@ -0,0 +1,11 @@ +{ + "extends": "../../../tsconfig.base.json", + "compilerOptions": { + "composite": true + }, + "include": [], + "references": [{ "path": "./src" }], + "ts-node": { + "esm": true + } +} diff --git a/ts/packages/defaultAgentProvider/data/config.agent.json b/ts/packages/defaultAgentProvider/data/config.agent.json index 2a2bc48f6..614690fee 100644 --- a/ts/packages/defaultAgentProvider/data/config.agent.json +++ b/ts/packages/defaultAgentProvider/data/config.agent.json @@ -53,6 +53,10 @@ "weather": { "name": "weather-agent", "path": "../agents/weather" + }, + "excalidraw": { + "name": "excalidraw-agent", + "execMode": "dispatcher" } }, "mcpServers": { diff --git a/ts/packages/defaultAgentProvider/data/config.all.json b/ts/packages/defaultAgentProvider/data/config.all.json index 7a1799200..570a2f634 100644 --- a/ts/packages/defaultAgentProvider/data/config.all.json +++ b/ts/packages/defaultAgentProvider/data/config.all.json @@ -65,6 +65,10 @@ "weather": { "name": "weather-agent", "path": "../agents/weather" + }, + "excalidraw": { + "name": "excalidraw-agent", + "execMode": "dispatcher" } }, "mcpServers": { diff --git a/ts/packages/defaultAgentProvider/data/config.json b/ts/packages/defaultAgentProvider/data/config.json index e9125deb5..bdd3cb756 100644 --- a/ts/packages/defaultAgentProvider/data/config.json +++ b/ts/packages/defaultAgentProvider/data/config.json @@ -66,6 +66,10 @@ }, "utility": { "name": "utility-typeagent" + }, + "excalidraw": { + "name": "excalidraw-agent", + "execMode": "dispatcher" } }, "mcpServers": { diff --git a/ts/packages/defaultAgentProvider/package.json b/ts/packages/defaultAgentProvider/package.json index f626c72c8..974c9f7f0 100644 --- a/ts/packages/defaultAgentProvider/package.json +++ b/ts/packages/defaultAgentProvider/package.json @@ -55,6 +55,7 @@ "desktop-automation": "workspace:*", "dispatcher-node-providers": "workspace:*", "email": "workspace:*", + "excalidraw-agent": "workspace:*", "exifreader": "^4.30.1", "file-size": "^1.0.0", "glob": "^13.0.0", diff --git a/ts/pnpm-lock.yaml b/ts/pnpm-lock.yaml index a827bc469..74b4c5e4c 100644 --- a/ts/pnpm-lock.yaml +++ b/ts/pnpm-lock.yaml @@ -1882,6 +1882,37 @@ importers: specifier: ~5.4.5 version: 5.4.5 + packages/agents/excalidraw: + dependencies: + '@typeagent/agent-sdk': + specifier: workspace:* + version: link:../../agentSdk + aiclient: + specifier: workspace:* + version: link:../../aiclient + telemetry: + specifier: workspace:* + version: link:../../telemetry + typechat-utils: + specifier: workspace:* + version: link:../../utils/typechatUtils + devDependencies: + '@typeagent/action-schema-compiler': + specifier: workspace:* + version: link:../../actionSchemaCompiler + concurrently: + specifier: ^9.1.2 + version: 9.1.2 + prettier: + specifier: ^3.5.3 + version: 3.5.3 + rimraf: + specifier: ^6.0.1 + version: 6.0.1 + typescript: + specifier: ~5.4.5 + version: 5.4.5 + packages/agents/greeting: dependencies: '@typeagent/agent-sdk': @@ -3319,6 +3350,9 @@ importers: email: specifier: workspace:* version: link:../agents/email + excalidraw-agent: + specifier: workspace:* + version: link:../agents/excalidraw exifreader: specifier: ^4.30.1 version: 4.30.1 From c661b54d2f05f43e2c591719f6bf89fc23c9f13d Mon Sep 17 00:00:00 2001 From: George Ng <georgeng@microsoft.com> Date: Wed, 8 Apr 2026 15:21:00 -0700 Subject: [PATCH 3/5] Undo polluted agentServerSessions.md --- ts/docs/architecture/agentServerSessions.md | 118 +------------------- 1 file changed, 5 insertions(+), 113 deletions(-) diff --git a/ts/docs/architecture/agentServerSessions.md b/ts/docs/architecture/agentServerSessions.md index e4bdae9c7..3cd04a822 100644 --- a/ts/docs/architecture/agentServerSessions.md +++ b/ts/docs/architecture/agentServerSessions.md @@ -41,33 +41,6 @@ The dispatcher already has the scaffolding for session persistence: However, this is **transparent to clients**: there is no protocol-level API to list, choose, or delete sessions. The server always resumes whatever was last active. -### Instance Storage vs. Session Storage - -The dispatcher exposes two storage scopes to agents via `SessionContext`: - -- **`instanceStorage`** — scoped to `persistDir` (the instance root). Intended for configuration and data that should **survive across dispatcher sessions** (e.g. agent auth tokens, user preferences, learned config). Agents write here and expect to read it back regardless of which session the user is in. -- **`sessionStorage`** — scoped to `persistDir/sessions/<sessionId>/`. Intended for ephemeral, session-local data (e.g. caches, in-progress state) that is discarded when the user creates a new session. - -In `sessionContext.ts`, the mapping is explicit: - -```typescript -const storage = storageProvider.getStorage(name, sessionDirPath); // sessionStorage -const instanceStorage = storageProvider.getStorage(name, context.persistDir); // instanceStorage -``` - -This contract — `instanceStorage` survives, `sessionStorage` is ephemeral — holds today in both the standalone Shell and the CLI. - -### The Problem with Scoping `persistDir` per Server Session - -Naively scoping each server-session's `persistDir` to `server-sessions/<server-session-id>/` breaks this contract: - -``` -server-sessions/<server-session-id>/ ← persistDir → instanceStorage root -server-sessions/<server-session-id>/sessions/<session-id>/ ← sessionStorage -``` - -**Every time a new server session is created, both `instanceStorage` and `sessionStorage` start fresh.** Agent configuration data (auth tokens, user preferences, learned state) is silently discarded whenever the user connects to a new server session. The fix is a split storage root described in Section 4. - ### One Shared Context for All Clients A critical detail: `createSharedDispatcher()` calls `initializeCommandHandlerContext()` **once** at startup, producing a single `context`. Every subsequent `join()` call creates a `Dispatcher` via `createDispatcherFromContext(context, connectionId, ...)` — all clients share the same underlying session context. Chat history, conversation memory, and session config are fully shared state. The `connectionId` only isolates `ClientIO` routing (display output reaches the right client), not the conversation itself. @@ -104,7 +77,7 @@ Each session is identified by: ### 2. Session Metadata -A `sessions.json` file lives at `instanceDir/server-sessions/sessions.json` and is the authoritative registry: +A `sessions.json` file lives at `persistDir/server-sessions/sessions.json` and is the authoritative registry: ```json { @@ -118,7 +91,7 @@ A `sessions.json` file lives at `instanceDir/server-sessions/sessions.json` and } ``` -Each session's ephemeral data (chat history, conversation memory, display log, session config) is stored in `instanceDir/server-sessions/<sessionId>/`. Agent `instanceStorage` (config, auth tokens, learned state) is stored directly under `instanceDir/<agentName>/`, **shared across all server sessions**. +Each session's full data (chat history, conversation memory, display log) is stored in `persistDir/server-sessions/<sessionId>/` — the same layout that exists today, but keyed on UUID. > **Note:** `clientCount` is a runtime-only field — it is **never written to `sessions.json`**. It is populated at query time by inspecting the live dispatcher pool. @@ -193,85 +166,7 @@ AgentServer └── SharedDispatcher ← client 2 (connected to session B) ``` -#### Storage Split: `instanceDir` vs. `persistDir` - -To preserve the `instanceStorage` / `sessionStorage` contract across server sessions, the dispatcher must be initialized with **two distinct root directories** rather than one: - -| Directory | Purpose | Lifetime | -| ------------- | --------------------------------------------------------------------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------- | -| `instanceDir` | Global instance root — maps to `instanceStorage` for all agents. Contains agent config, auth tokens, user preferences, embedding cache. | Lives for the lifetime of the agentServer process (or the user profile). Never scoped per server session. | -| `persistDir` | Per-server-session root — maps to `sessionStorage` and holds chat history, conversation memory, display log, and session config. | Scoped to `instanceDir/server-sessions/<sessionId>/`. Discarded with the session. | - -**Concrete paths:** - -``` -~/.typeagent/profiles/dev/ ← instanceDir (global) -~/.typeagent/profiles/dev/server-sessions/<sessionId>/ ← persistDir (per session) -~/.typeagent/profiles/dev/server-sessions/<sessionId>/sessions/<id>/ ← sessionStorage -~/.typeagent/profiles/dev/<agentName>/ ← instanceStorage (global) -``` - -#### `DispatcherOptions` changes - -`initializeCommandHandlerContext()` today accepts a single `persistDir`. To support the split, a new optional `instanceDir` field is added: - -```typescript -type DispatcherOptions = { - // ...existing fields... - persistDir?: string; // per-server-session directory (chat history, memory, config) - instanceDir?: string; // global instance directory for cross-session agent storage - // ... -}; -``` - -When `instanceDir` is provided, `instanceStorage` is rooted there instead of at `persistDir`. When `instanceDir` is omitted (standalone Shell, CLI, tests), behavior is unchanged — `instanceStorage` falls back to `persistDir`, preserving full backward compatibility. - -#### `SessionContext` wiring - -In `sessionContext.ts`, the `instanceStorage` base changes from `context.persistDir` to the new `context.instanceDir` (falling back to `context.persistDir` when `instanceDir` is absent): - -```typescript -const instanceStorage = - (context.instanceDir ?? context.persistDir) - ? storageProvider!.getStorage( - name, - context.instanceDir ?? context.persistDir!, - ) - : undefined; -``` - -This is the only change needed in the storage wiring — no changes to the `Storage` interface or agent code. - -#### Server initialization - -When the agentServer starts up, it resolves both directories once and passes them to every per-session dispatcher: - -```typescript -const instanceDir = getProfilePath("dev"); // e.g. ~/.typeagent/profiles/dev -const persistDir = path.join(instanceDir, "server-sessions", sessionId); // per-session subdirectory - -initializeCommandHandlerContext("agentServer", { - instanceDir, // global — never changes between sessions - persistDir, // scoped to this server session - persistSession: true, - // ... -}); -``` - -#### `CommandHandlerContext` changes - -A new `instanceDir` field is added alongside the existing `persistDir`: - -```typescript -export type CommandHandlerContext = { - // ...existing fields... - readonly persistDir: string | undefined; // per-server-session root (chat, memory, config) - readonly instanceDir: string | undefined; // global instance root (agent config, auth tokens) - // ... -}; -``` - -Each session's `SharedDispatcher` is created lazily on first `joinSession()` and calls `initializeCommandHandlerContext()` with a `persistDir` scoped to `server-sessions/<sessionId>/` and a shared `instanceDir`, giving it fully isolated chat history and session config while preserving agent configuration across session boundaries. Clients connecting to the same session share one dispatcher instance and its routing `ClientIO` table, consistent with how the current single dispatcher works today. +Each session's `SharedDispatcher` is created lazily on first `joinSession()` and calls `initializeCommandHandlerContext()` with a `persistDir` scoped to `server-sessions/<sessionId>/`, giving it fully isolated chat history, conversation memory, display log, and session config. Clients connecting to the same session share one dispatcher instance and its routing `ClientIO` table, consistent with how the current single dispatcher works today. `SharedDispatcher.join()` calls `createDispatcherFromContext(context, connectionId, ...)` per client — producing a lightweight `Dispatcher` handle bound to a unique `connectionId` but sharing the same underlying context. Output routing is per-client via `connectionId`; conversation state is shared across all clients in the session. @@ -290,14 +185,12 @@ Each session uses namespaced WebSocket channels to allow multiple sessions over Client calls joinSession({ sessionId?, clientType, filter }) │ ├─ sessionId provided? - │ ├─ Yes → look up instanceDir/server-sessions/sessions.json + │ ├─ Yes → look up sessions.json │ │ ├─ Found → load SharedDispatcher for this session (lazy init if not in memory pool) │ │ └─ Not found → return error: "Session not found" │ └─ No → connect to the default session │ ├─ Session named "default" exists → use it │ └─ No sessions exist → auto-create session named "default" - │ ├─ Create instanceDir/server-sessions/<sessionId>/ ← persistDir - │ └─ Init dispatcher with instanceDir (global) + persistDir (session-scoped) │ ├─ Register client in session's SharedDispatcher routing table └─ Return JoinSessionResult { connectionId, sessionId } @@ -331,7 +224,7 @@ SessionInfo[] 1. Close all active client dispatcher handles for the session. 2. Shut down and evict the session's `SharedDispatcher` from the in-memory pool. -3. Remove `instanceDir/server-sessions/<sessionId>/` from disk (recursive delete of the `persistDir` subtree only, best-effort). **Agent `instanceStorage` under `instanceDir/<agentName>/` is not touched.** +3. Remove `persistDir/server-sessions/<sessionId>/` from disk (recursive delete, best-effort). 4. Remove the entry from `sessions.json`. > **Note:** Any connected client can call `deleteSession` on any session, including sessions they are not currently connected to. The calling client's session-namespaced channels are cleaned up immediately; other clients connected to the deleted session have their dispatcher handles closed when `SharedDispatcher.close()` is called. Server-side authorization is out of scope for v1 (see Open Questions). @@ -421,7 +314,6 @@ This design adds explicit session management to the agentServer without fundamen - `listSessions(name?)` with optional substring filtering as the primary session discovery mechanism. - Session-namespaced WebSocket channels (`dispatcher:<id>`, `clientio:<id>`) enabling multiple concurrent sessions over a single connection. - Idle dispatcher eviction after 5 minutes to free memory for inactive sessions. -- **A split storage root**: `instanceDir` (global, shared across all server sessions) and `persistDir` (per-server-session, discarded with the session). `instanceStorage` is rooted at `instanceDir`, preserving agent configuration and auth tokens across session boundaries. `sessionStorage` and all ephemeral dispatcher data (chat history, memory, display log) remain scoped to `persistDir`. A new `instanceDir` field is added to `DispatcherOptions` and `CommandHandlerContext`; when absent, behavior falls back to `persistDir` for full backward compatibility with the standalone Shell, CLI, and tests. The server enforces no policy on who can join or delete a session — `clientCount` gives clients the signal to make that decision themselves. From 02921106b3ba31d33f3e8dc58cad18577683bd07 Mon Sep 17 00:00:00 2001 From: George Ng <georgeng@microsoft.com> Date: Wed, 8 Apr 2026 21:14:55 -0700 Subject: [PATCH 4/5] Add file reading capabilities and fix arrow connections --- .../excalidraw/src/excalidrawActionHandler.ts | 299 +++++++++++++++++- .../excalidraw/src/excalidrawActionSchema.ts | 3 +- 2 files changed, 294 insertions(+), 8 deletions(-) diff --git a/ts/packages/agents/excalidraw/src/excalidrawActionHandler.ts b/ts/packages/agents/excalidraw/src/excalidrawActionHandler.ts index b800a1040..95a49646a 100644 --- a/ts/packages/agents/excalidraw/src/excalidrawActionHandler.ts +++ b/ts/packages/agents/excalidraw/src/excalidrawActionHandler.ts @@ -152,6 +152,15 @@ For TEXT elements, also include: "baseline": 18, "containerId": null (or the id of the shape it's bound to) +SIZING SHAPES TO FIT TEXT: +- Estimate text width: each character is approximately 12px wide at fontSize 20, with a minimum of 100px +- Estimate text height: each line of text is approximately 28px tall at fontSize 20 +- For multi-line text, count the lines and multiply +- Add padding of at least 24px on each horizontal side and 16px on each vertical side +- So for text "Hello World" (11 chars × 12px = 132px), the shape width should be at least 132 + 48 = 180px +- For shapes with longer text, increase width proportionally; never let text overflow its container +- Minimum shape dimensions: width 120px, height 60px + For ARROW elements, also include: "points": [[0, 0], [<dx>, <dy>]], "startBinding": { "elementId": "<source-id>", "focus": 0, "gap": 8 }, @@ -159,13 +168,39 @@ For ARROW elements, also include: "startArrowhead": null, "endArrowhead": "arrow" +ARROW BINDING RULES (critical — broken arrows ruin the diagram): +- Every arrow MUST have both "startBinding" and "endBinding" set (never null unless intentionally floating) +- "startBinding.elementId" must match the "id" of an existing shape element in the diagram +- "endBinding.elementId" must match the "id" of an existing shape element in the diagram +- On each shape that an arrow connects to, add the arrow's id to the shape's "boundElements" array: + "boundElements": [{"id": "<text-id>", "type": "text"}, {"id": "<arrow-id>", "type": "arrow"}] +- Arrow endpoints must land on the EDGE of their connected shapes, NOT at the center: + - Compute the centre of the source shape (sx = x + w/2, sy = y + h/2) and the centre of the target shape (tx = x + w/2, ty = y + h/2) + - The start point of the arrow is where the ray from source centre toward target centre exits the source rectangle + - The end point of the arrow is where that same ray enters the target rectangle + - Set the arrow's "x","y" to the start edge point + - Set points[0] to [0, 0] and points[1] to [endX - startX, endY - startY] +- Always verify that every elementId in startBinding/endBinding refers to a real shape id in the elements array + To bind text to a shape: - On the shape element, set "boundElements": [{"id": "<text-id>", "type": "text"}] - On the text element, set "containerId": "<shape-id>" +- The text element's x, y, width, and height MUST exactly match the container shape: + - "x": same as container x + - "y": same as container y + - "width": same as container width + - "height": same as container height + Excalidraw requires these to match for correct initial placement — do NOT use offsets or center-point coordinates + +ARROW LABEL TEXT: +- If an arrow needs a label, create a text element with "containerId": "<arrow-id>" and add {"id": "<label-id>", "type": "text"} to the arrow's "boundElements" +- Size the label text element to fit its content using the same character-width estimate above LAYOUT GUIDELINES: -- Space elements at least 200px apart horizontally and 150px vertically +- For "large concept" elements (primary nodes, major components, top-level headings): place them at least 96px (roughly 1 inch at 96 DPI) apart edge-to-edge, preferably 150–200px +- For smaller sub-elements or annotations: at least 60px apart edge-to-edge - Use a left-to-right or top-to-bottom flow layout +- Avoid overlapping elements; account for element width and height when computing positions, not just (x, y) origins - Group related items visually - Use colors to distinguish different categories: - "#a5d8ff" (light blue) for primary components @@ -182,15 +217,234 @@ SOURCE CONTENT TYPE: ${sourceType} - If "mermaid": Parse Mermaid syntax (flowchart, sequence, etc.) and convert to Excalidraw elements - If "architecture": Create an architecture diagram with components, layers, and data flow arrows -Generate a clean, well-organized diagram that visually represents the content. Every shape that has a label MUST have a bound text element.`; +SELF-REVIEW CHECKLIST (apply before outputting): +1. Every shape with a label has a bound text element (boundElements contains the text id, text has containerId set) +2. Every arrow has both startBinding and endBinding referencing real element ids in the elements array +3. Every shape connected by an arrow lists that arrow id in its boundElements array +4. No two elements overlap (check x, y, width, height for all pairs) +5. All text fits within its container shape (shape width/height is large enough per the sizing rules above) +6. Arrow labels (if any) have containerId pointing to the arrow id +7. No dangling element ids — every id referenced anywhere exists as an element in the array + +Generate a clean, well-organized diagram that visually represents the content. If anything in the source content is ambiguous (e.g. unclear direction of a relationship, unclear grouping), ask the user for clarification rather than guessing.`; +} + +/** + * Returns the point on the border of a rectangle (cx±w/2, cy±h/2) that is + * closest to the given external point (tx, ty). The rectangle is axis-aligned. + */ +function edgePoint( + rx: number, + ry: number, + rw: number, + rh: number, + tx: number, + ty: number, +): [number, number] { + const cx = rx + rw / 2; + const cy = ry + rh / 2; + const dx = tx - cx; + const dy = ty - cy; + + // Avoid division-by-zero for overlapping centres + if (dx === 0 && dy === 0) return [cx, ry]; // top-centre fallback + + // Scale factor so the ray from centre just reaches the rectangle edge + const scaleX = dx !== 0 ? rw / 2 / Math.abs(dx) : Infinity; + const scaleY = dy !== 0 ? rh / 2 / Math.abs(dy) : Infinity; + const scale = Math.min(scaleX, scaleY); + + return [cx + dx * scale, cy + dy * scale]; +} + +/** + * Post-generation repair pass: fixes common issues LLMs produce. + * - Flags arrows whose startBinding/endBinding point to non-existent element ids + * - Ensures every shape referenced by an arrow has that arrow in its boundElements + * - Ensures every text element's containerId (if set) exists + * - Ensures shapes have boundElements as an array (not null) when they have bindings + * Returns a list of warning strings describing what was fixed. + */ +function repairExcalidrawDiagram(doc: ExcalidrawDocument): string[] { + const warnings: string[] = []; + const elementIds = new Set(doc.elements.map((e) => e.id)); + const elementById = new Map(doc.elements.map((e) => [e.id, e])); + + // Fix arrows + const validElements: ExcalidrawElement[] = []; + for (const el of doc.elements) { + if (el.type === "arrow" || el.type === "line") { + const startId = (el.startBinding as any)?.elementId; + const endId = (el.endBinding as any)?.elementId; + + // Flag arrows with broken bindings with a visible error label + const brokenReasons: string[] = []; + if (startId && !elementIds.has(startId)) { + brokenReasons.push(`startBinding refs missing "${startId}"`); + el.startBinding = null; + } + if (endId && !elementIds.has(endId)) { + brokenReasons.push(`endBinding refs missing "${endId}"`); + el.endBinding = null; + } + if (brokenReasons.length > 0) { + const labelId = `error-label-${el.id}`; + const errorLabel: ExcalidrawElement = { + id: labelId, + type: "text", + x: el.x, + y: el.y - 30, + width: 300, + height: 40, + angle: 0, + strokeColor: "#e03131", + backgroundColor: "transparent", + fillStyle: "solid", + strokeWidth: 1, + strokeStyle: "solid", + roughness: 0, + opacity: 100, + seed: Math.floor(Math.random() * 1000000), + version: 1, + versionNonce: Math.floor(Math.random() * 1000000), + isDeleted: false, + boundElements: null, + updated: 1, + link: null, + locked: false, + groupIds: [], + frameId: null, + roundness: null, + text: "ERROR: FIX ME", + fontSize: 20, + fontFamily: 1, + textAlign: "left", + verticalAlign: "top", + baseline: 18, + containerId: null, + }; + validElements.push(errorLabel); + elementIds.add(labelId); + warnings.push( + `Arrow "${el.id}" has broken bindings (${brokenReasons.join("; ")}); added ERROR label`, + ); + } + + // Ensure connected shapes list this arrow in their boundElements + for (const connectedId of [startId, endId]) { + if (!connectedId) continue; + const shape = elementById.get(connectedId); + if (!shape) continue; + if (!Array.isArray(shape.boundElements)) { + shape.boundElements = []; + } + const already = (shape.boundElements as any[]).some( + (b: any) => b.id === el.id, + ); + if (!already) { + (shape.boundElements as any[]).push({ + id: el.id, + type: el.type, + }); + warnings.push( + `Added arrow "${el.id}" to boundElements of shape "${connectedId}"`, + ); + } + } + + // Recompute arrow geometry so endpoints land on shape edges, not centers. + // Re-read startId/endId after the broken-binding pass (they may have been nulled). + const fixedStartId = (el.startBinding as any)?.elementId as + | string + | undefined; + const fixedEndId = (el.endBinding as any)?.elementId as + | string + | undefined; + const startShape = fixedStartId + ? elementById.get(fixedStartId) + : undefined; + const endShape = fixedEndId + ? elementById.get(fixedEndId) + : undefined; + + if (startShape && endShape) { + const startCx = startShape.x + startShape.width / 2; + const startCy = startShape.y + startShape.height / 2; + const endCx = endShape.x + endShape.width / 2; + const endCy = endShape.y + endShape.height / 2; + + const [sx, sy] = edgePoint( + startShape.x, + startShape.y, + startShape.width, + startShape.height, + endCx, + endCy, + ); + const [ex, ey] = edgePoint( + endShape.x, + endShape.y, + endShape.width, + endShape.height, + startCx, + startCy, + ); + + // Arrow origin is the start edge point; points are relative to that origin + el.x = sx; + el.y = sy; + el.width = Math.abs(ex - sx); + el.height = Math.abs(ey - sy); + el.points = [ + [0, 0], + [ex - sx, ey - sy], + ]; + } + } + + // Fix text elements whose containerId is broken + if (el.type === "text") { + const cid = el.containerId as string | null | undefined; + if (cid && !elementIds.has(cid)) { + warnings.push( + `Cleared containerId on text "${el.id}": referenced missing element "${cid}"`, + ); + el.containerId = null; + } else if (cid) { + // Snap text geometry to match its container exactly + const container = elementById.get(cid); + if (container) { + if ( + el.x !== container.x || + el.y !== container.y || + el.width !== container.width || + el.height !== container.height + ) { + el.x = container.x; + el.y = container.y; + el.width = container.width; + el.height = container.height; + warnings.push( + `Snapped text "${el.id}" geometry to match container "${cid}"`, + ); + } + } + } + } + + validElements.push(el); + } + + doc.elements = validElements; + return warnings; } async function handleCreateDiagram( action: CreateDiagramAction, context: ActionContext<ExcalidrawActionContext>, ): Promise<ActionResult> { - const { sourceContent, sourceType, diagramTitle, outputPath } = - action.parameters; + const { sourceContent, diagramTitle, outputPath } = action.parameters; + let { sourceType } = action.parameters; // Show loading indicator context.actionIO.setDisplay({ @@ -204,8 +458,29 @@ async function handleCreateDiagram( try { const chatModel = openai.createJsonChatModel(); + + // If sourceContent looks like a file path and that file exists, read it + let resolvedContent = sourceContent; + const trimmed = sourceContent.trim(); + const looksLikePath = + /^[a-zA-Z]:[/\\]/.test(trimmed) || // Windows absolute: C:\... or C:/... + trimmed.startsWith("/") || // Unix absolute + trimmed.startsWith("./") || // relative + trimmed.startsWith("../") || // relative parent + (trimmed.length < 512 && /\.(md|txt|xml|json)$/i.test(trimmed)); // short string ending in a known extension + if (looksLikePath) { + const candidate = path.resolve(trimmed); + if (fs.existsSync(candidate) && fs.statSync(candidate).isFile()) { + resolvedContent = fs.readFileSync(candidate, "utf-8"); + // Auto-detect sourceType from extension if not already specific + if (sourceType === "text" && /\.md$/i.test(candidate)) { + sourceType = "markdown"; + } + } + } + const systemPrompt = buildSystemPrompt(sourceType); - const userPrompt = `Convert the following ${sourceType} content into an Excalidraw diagram:\n\n${sourceContent}`; + const userPrompt = `Convert the following ${sourceType} content into an Excalidraw diagram:\n\n${resolvedContent}`; const response = await chatModel.complete([ { role: "system", content: systemPrompt }, @@ -257,6 +532,9 @@ async function handleCreateDiagram( excalidrawData.files = {}; } + // Repair common LLM output issues (broken arrow bindings, missing boundElements, etc.) + const repairWarnings = repairExcalidrawDiagram(excalidrawData); + // Write the file const resolvedPath = resolveOutputPath(outputPath, diagramTitle); const outputDir = path.dirname(resolvedPath); @@ -270,12 +548,19 @@ async function handleCreateDiagram( const elementCount = excalidrawData.elements.length; const displayTitle = diagramTitle ?? "Excalidraw diagram"; + const warningNote = + repairWarnings.length > 0 + ? `\n\n⚠️ Auto-repaired ${repairWarnings.length} issue(s):\n` + + repairWarnings.map((w) => ` • ${w}`).join("\n") + : ""; + const result = createActionResultFromTextDisplay( `📐 ${displayTitle} created successfully!\n\n` + `📁 Saved to: ${resolvedPath}\n` + `📊 Elements: ${elementCount}\n` + - `📄 Source type: ${sourceType}\n\n` + - `Open the .excalidraw file in Excalidraw (https://excalidraw.com) to view and edit.`, + `📄 Source type: ${sourceType}` + + warningNote + + `\n\nOpen the .excalidraw file in Excalidraw (https://excalidraw.com) to view and edit.`, `Created Excalidraw diagram "${displayTitle}" with ${elementCount} elements at ${resolvedPath}`, ); diff --git a/ts/packages/agents/excalidraw/src/excalidrawActionSchema.ts b/ts/packages/agents/excalidraw/src/excalidrawActionSchema.ts index c206d87fe..e016af569 100644 --- a/ts/packages/agents/excalidraw/src/excalidrawActionSchema.ts +++ b/ts/packages/agents/excalidraw/src/excalidrawActionSchema.ts @@ -11,7 +11,8 @@ export type CreateDiagramAction = { parameters: { // the original request from the user originalRequest: string; - // the source content to convert into a diagram (text, markdown, Visio XML, etc.) + // the source content to convert into a diagram — either raw text/markdown/XML, + // or a file path (absolute or relative) whose contents will be read automatically sourceContent: string; // the type of the source content sourceType: From 2fdb1cba5294b7bb8dc35fd5a471c1f84aa41d64 Mon Sep 17 00:00:00 2001 From: George Ng <georgeng@microsoft.com> Date: Wed, 8 Apr 2026 21:32:09 -0700 Subject: [PATCH 5/5] Switch to 2 phase implementation for interpreting content and generating a diagram --- .../excalidraw/src/excalidrawActionHandler.ts | 254 +++++++++--------- 1 file changed, 125 insertions(+), 129 deletions(-) diff --git a/ts/packages/agents/excalidraw/src/excalidrawActionHandler.ts b/ts/packages/agents/excalidraw/src/excalidrawActionHandler.ts index 95a49646a..1a1aaa99d 100644 --- a/ts/packages/agents/excalidraw/src/excalidrawActionHandler.ts +++ b/ts/packages/agents/excalidraw/src/excalidrawActionHandler.ts @@ -87,146 +87,114 @@ function resolveOutputPath( return path.join(getDefaultOutputDir(), filename); } -function buildSystemPrompt(sourceType: string): string { - return `You are an expert diagram generator. Your task is to convert the provided content into a valid Excalidraw JSON diagram. +function buildMermaidSystemPrompt(sourceType: string): string { + return `You are an expert at reading documents and extracting their structure as a complete Mermaid flowchart. + +Your ONLY output is a valid Mermaid flowchart — no explanations, no markdown fences, just the raw Mermaid syntax starting with "flowchart TD" or "flowchart LR". + +RULES: +- Capture EVERY node, relationship, and label present in the source — do not simplify or omit anything +- Use quoted labels on nodes and edges so spaces and special characters are safe: A["My Label"] +- Use --> for directed edges, -- label --> for labelled edges +- Use subgraph ... end to represent groups or layers +- Prefer top-down (TD) layout unless the content is clearly horizontal + +SOURCE CONTENT TYPE: ${sourceType} +- If "markdown": headings become top-level nodes, bullet points become child nodes, nested bullets become sub-children +- If "text": identify all named concepts and their relationships; model causality/dependency as directed edges +- If "visio-xml": faithfully reproduce the shapes and connectors from the XML +- If "mermaid": output it unchanged (it is already Mermaid) +- If "architecture": represent every component, layer, and data-flow arrow`; +} + +function buildExcalidrawSystemPrompt(): string { + return `You are a mechanical converter from Mermaid flowchart syntax to Excalidraw JSON. +You receive a complete Mermaid flowchart and must produce a valid Excalidraw JSON file that faithfully represents every node and edge. Do not omit anything. OUTPUT FORMAT: -You MUST output ONLY valid JSON in Excalidraw format. Do NOT include any markdown code fences, explanations, or text outside the JSON. +Output ONLY valid JSON — no markdown fences, no explanation. -The JSON must have this top-level structure: +Top-level structure: { "type": "excalidraw", "version": 2, "source": "typeagent-excalidraw", "elements": [...], - "appState": { - "gridSize": null, - "viewBackgroundColor": "#ffffff" - }, + "appState": { "gridSize": null, "viewBackgroundColor": "#ffffff" }, "files": {} } -ELEMENT TYPES you can use: -1. "rectangle" - for boxes/containers/nodes -2. "ellipse" - for circular/oval nodes -3. "diamond" - for decision points -4. "text" - for labels (can be standalone or bound to shapes) -5. "arrow" - for connections/relationships between elements -6. "line" - for non-directional connections +ELEMENT TYPES: +- "rectangle" — regular nodes +- "diamond" — decision/condition nodes (Mermaid {braces}) +- "ellipse" — terminal/start/end nodes (Mermaid stadium or circle) +- "text" — bound label for a shape (containerId set) or standalone text +- "arrow" — directed edge -ELEMENT STRUCTURE (required fields for each element): +ELEMENT STRUCTURE (every field required): { - "id": "<unique-string-id>", - "type": "<element-type>", - "x": <number>, - "y": <number>, - "width": <number>, - "height": <number>, + "id": "<unique-string>", + "type": "<type>", + "x": <number>, "y": <number>, "width": <number>, "height": <number>, "angle": 0, "strokeColor": "#1e1e1e", "backgroundColor": "<color>", - "fillStyle": "solid", - "strokeWidth": 2, - "strokeStyle": "solid", - "roughness": 1, - "opacity": 100, - "seed": <random-integer>, - "version": 1, - "versionNonce": <random-integer>, - "isDeleted": false, - "boundElements": null, - "updated": 1, - "link": null, - "locked": false, - "groupIds": [], - "frameId": null, + "fillStyle": "solid", "strokeWidth": 2, "strokeStyle": "solid", + "roughness": 1, "opacity": 100, + "seed": <random-int>, "version": 1, "versionNonce": <random-int>, + "isDeleted": false, "boundElements": [], "updated": 1, + "link": null, "locked": false, "groupIds": [], "frameId": null, "roundness": { "type": 3 } } -For TEXT elements, also include: - "text": "<the text>", - "fontSize": 20, - "fontFamily": 1, - "textAlign": "center", - "verticalAlign": "middle", - "baseline": 18, - "containerId": null (or the id of the shape it's bound to) +TEXT elements also need: + "text": "<label>", "fontSize": 20, "fontFamily": 1, + "textAlign": "center", "verticalAlign": "middle", "baseline": 18, + "containerId": "<shape-id> or null" -SIZING SHAPES TO FIT TEXT: -- Estimate text width: each character is approximately 12px wide at fontSize 20, with a minimum of 100px -- Estimate text height: each line of text is approximately 28px tall at fontSize 20 -- For multi-line text, count the lines and multiply -- Add padding of at least 24px on each horizontal side and 16px on each vertical side -- So for text "Hello World" (11 chars × 12px = 132px), the shape width should be at least 132 + 48 = 180px -- For shapes with longer text, increase width proportionally; never let text overflow its container -- Minimum shape dimensions: width 120px, height 60px - -For ARROW elements, also include: - "points": [[0, 0], [<dx>, <dy>]], - "startBinding": { "elementId": "<source-id>", "focus": 0, "gap": 8 }, - "endBinding": { "elementId": "<target-id>", "focus": 0, "gap": 8 }, - "startArrowhead": null, - "endArrowhead": "arrow" - -ARROW BINDING RULES (critical — broken arrows ruin the diagram): -- Every arrow MUST have both "startBinding" and "endBinding" set (never null unless intentionally floating) -- "startBinding.elementId" must match the "id" of an existing shape element in the diagram -- "endBinding.elementId" must match the "id" of an existing shape element in the diagram -- On each shape that an arrow connects to, add the arrow's id to the shape's "boundElements" array: - "boundElements": [{"id": "<text-id>", "type": "text"}, {"id": "<arrow-id>", "type": "arrow"}] -- Arrow endpoints must land on the EDGE of their connected shapes, NOT at the center: - - Compute the centre of the source shape (sx = x + w/2, sy = y + h/2) and the centre of the target shape (tx = x + w/2, ty = y + h/2) - - The start point of the arrow is where the ray from source centre toward target centre exits the source rectangle - - The end point of the arrow is where that same ray enters the target rectangle - - Set the arrow's "x","y" to the start edge point - - Set points[0] to [0, 0] and points[1] to [endX - startX, endY - startY] -- Always verify that every elementId in startBinding/endBinding refers to a real shape id in the elements array - -To bind text to a shape: -- On the shape element, set "boundElements": [{"id": "<text-id>", "type": "text"}] -- On the text element, set "containerId": "<shape-id>" -- The text element's x, y, width, and height MUST exactly match the container shape: - - "x": same as container x - - "y": same as container y - - "width": same as container width - - "height": same as container height - Excalidraw requires these to match for correct initial placement — do NOT use offsets or center-point coordinates - -ARROW LABEL TEXT: -- If an arrow needs a label, create a text element with "containerId": "<arrow-id>" and add {"id": "<label-id>", "type": "text"} to the arrow's "boundElements" -- Size the label text element to fit its content using the same character-width estimate above - -LAYOUT GUIDELINES: -- For "large concept" elements (primary nodes, major components, top-level headings): place them at least 96px (roughly 1 inch at 96 DPI) apart edge-to-edge, preferably 150–200px -- For smaller sub-elements or annotations: at least 60px apart edge-to-edge -- Use a left-to-right or top-to-bottom flow layout -- Avoid overlapping elements; account for element width and height when computing positions, not just (x, y) origins -- Group related items visually -- Use colors to distinguish different categories: - - "#a5d8ff" (light blue) for primary components - - "#b2f2bb" (light green) for data/storage - - "#ffd8a8" (light orange) for external services - - "#d0bfff" (light purple) for processing/logic - - "#ffc9c9" (light red) for errors/alerts - - "#fff3bf" (light yellow) for notes/annotations +ARROW elements also need: + "points": [[0,0],[dx,dy]], + "startBinding": { "elementId": "<id>", "focus": 0, "gap": 8 }, + "endBinding": { "elementId": "<id>", "focus": 0, "gap": 8 }, + "startArrowhead": null, "endArrowhead": "arrow" -SOURCE CONTENT TYPE: ${sourceType} -- If "markdown": Parse headings as major nodes, bullet points as sub-nodes, and create hierarchy -- If "text": Identify key concepts, entities, and relationships to create a concept diagram -- If "visio-xml": Parse the XML structure to recreate the diagram layout with shapes and connectors -- If "mermaid": Parse Mermaid syntax (flowchart, sequence, etc.) and convert to Excalidraw elements -- If "architecture": Create an architecture diagram with components, layers, and data flow arrows - -SELF-REVIEW CHECKLIST (apply before outputting): -1. Every shape with a label has a bound text element (boundElements contains the text id, text has containerId set) -2. Every arrow has both startBinding and endBinding referencing real element ids in the elements array -3. Every shape connected by an arrow lists that arrow id in its boundElements array -4. No two elements overlap (check x, y, width, height for all pairs) -5. All text fits within its container shape (shape width/height is large enough per the sizing rules above) -6. Arrow labels (if any) have containerId pointing to the arrow id -7. No dangling element ids — every id referenced anywhere exists as an element in the array - -Generate a clean, well-organized diagram that visually represents the content. If anything in the source content is ambiguous (e.g. unclear direction of a relationship, unclear grouping), ask the user for clarification rather than guessing.`; +SIZING SHAPES TO FIT TEXT: +- Estimate ~12px per character at fontSize 20 +- Add 48px horizontal padding (24px each side) and 32px vertical padding (16px each side) +- Minimum size: 120 × 60px +- Example: "Hello World" (11 chars) → width = max(11×12+48, 120) = 180, height = 60 + +BOUND TEXT GEOMETRY — CRITICAL: +- Every shape with a label needs a paired text element +- The text element's x, y, width, height must EXACTLY match the container shape +- Set "containerId" on the text to the shape's id +- Add {"id": "<text-id>", "type": "text"} to the shape's "boundElements" + +ARROW GEOMETRY — CRITICAL: +- Endpoints must land on the EDGE of shapes, not the center +- For an arrow from shape A to shape B: + - Compute centre of A: (A.x + A.w/2, A.y + A.h/2), centre of B similarly + - Start point = where the ray from A-centre toward B-centre exits A's rectangle + - End point = where that ray enters B's rectangle + - Arrow x,y = start point; points = [[0,0],[endX-startX, endY-startY]] +- Add the arrow id to boundElements of both A and B: {"id":"<arrow-id>","type":"arrow"} +- Both startBinding.elementId and endBinding.elementId must be real ids in the elements array + +LAYOUT: +- Top-down or left-to-right flow matching the Mermaid layout direction +- Large concept nodes at least 150px apart edge-to-edge +- Account for shape width+height when placing nodes — no overlaps +- Use subgraph boundaries as visual grouping (add a lightly-filled rectangle behind the group) +- Colors: + "#a5d8ff" primary components · "#b2f2bb" data/storage · "#ffd8a8" external services + "#d0bfff" processing/logic · "#ffc9c9" errors/alerts · "#fff3bf" notes/annotations + +SELF-REVIEW before outputting: +1. Every node in the Mermaid has a corresponding shape + bound text element +2. Every edge in the Mermaid has a corresponding arrow with valid startBinding and endBinding +3. No two shapes overlap +4. All text fits its container (check width) +5. Every id referenced in a binding or containerId exists in the elements array`; } /** @@ -457,8 +425,6 @@ async function handleCreateDiagram( }); try { - const chatModel = openai.createJsonChatModel(); - // If sourceContent looks like a file path and that file exists, read it let resolvedContent = sourceContent; const trimmed = sourceContent.trim(); @@ -479,23 +445,53 @@ async function handleCreateDiagram( } } - const systemPrompt = buildSystemPrompt(sourceType); - const userPrompt = `Convert the following ${sourceType} content into an Excalidraw diagram:\n\n${resolvedContent}`; + // --- Pass 1: extract full structure as Mermaid (cheap tokens, complete content) --- + context.actionIO.setDisplay({ + type: "html", + content: `<div class="generating">Step 1/2: Extracting diagram structure...</div>`, + }); - const response = await chatModel.complete([ - { role: "system", content: systemPrompt }, - { role: "user", content: userPrompt }, + const mermaidModel = openai.createChatModel(); + const mermaidResponse = await mermaidModel.complete([ + { + role: "system", + content: buildMermaidSystemPrompt(sourceType), + }, + { + role: "user", + content: `Convert the following ${sourceType} content into a complete Mermaid flowchart:\n\n${resolvedContent}`, + }, ]); - // Clear loading display + if (!mermaidResponse.success) { + return createActionResultFromError( + `Failed to extract diagram structure: ${mermaidResponse.message}`, + ); + } + + const mermaidDiagram = mermaidResponse.data.trim(); + + // --- Pass 2: convert Mermaid → Excalidraw JSON (mechanical translation) --- context.actionIO.setDisplay({ type: "html", - content: "", + content: `<div class="generating">Step 2/2: Generating Excalidraw diagram...</div>`, }); + const excalidrawModel = openai.createJsonChatModel(); + const response = await excalidrawModel.complete([ + { role: "system", content: buildExcalidrawSystemPrompt() }, + { + role: "user", + content: `Convert this Mermaid flowchart to Excalidraw JSON:\n\n${mermaidDiagram}`, + }, + ]); + + // Clear loading display + context.actionIO.setDisplay({ type: "html", content: "" }); + if (!response.success) { return createActionResultFromError( - `Failed to generate diagram: ${response.message}`, + `Failed to generate Excalidraw JSON: ${response.message}`, ); }