diff --git a/apps/web/src/components/cloud-agent-next/SessionsList.tsx b/apps/web/src/components/cloud-agent-next/SessionsList.tsx index 46fc9bdd63..3f783d5ee8 100644 --- a/apps/web/src/components/cloud-agent-next/SessionsList.tsx +++ b/apps/web/src/components/cloud-agent-next/SessionsList.tsx @@ -1,7 +1,7 @@ 'use client'; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'; -import { Bot, Clock, Cloud, GitBranch, Puzzle, Terminal } from 'lucide-react'; +import { Bot, Clock, Cloud, GitBranch, Puzzle, Terminal, Workflow } from 'lucide-react'; import type { StoredSession } from './types'; import { TimeAgo } from '@/components/shared/TimeAgo'; import Link from 'next/link'; @@ -76,6 +76,13 @@ export function SessionsList({ sessions, organizationId, onSessionClick }: Sessi Slack ); + } else if (platform === 'gastown') { + badge = ( + + + Gastown + + ); } else { // Default to Extension badge for unknown, vscode, etc. badge = ( diff --git a/apps/web/src/components/cloud-agent/SessionsList.tsx b/apps/web/src/components/cloud-agent/SessionsList.tsx index a34f51f871..8652e5321a 100644 --- a/apps/web/src/components/cloud-agent/SessionsList.tsx +++ b/apps/web/src/components/cloud-agent/SessionsList.tsx @@ -1,7 +1,7 @@ 'use client'; import { Card, CardContent, CardDescription, CardHeader, CardTitle } from '@/components/ui/card'; -import { Bot, Clock, Cloud, GitBranch, Puzzle, Terminal } from 'lucide-react'; +import { Bot, Clock, Cloud, GitBranch, Puzzle, Terminal, Workflow } from 'lucide-react'; import type { StoredSession } from './types'; import { formatDistanceToNow } from 'date-fns'; import Link from 'next/link'; @@ -78,6 +78,13 @@ export function SessionsList({ Slack ); + } else if (platform === 'gastown') { + badge = ( + + + Gastown + + ); } else { // Default to Extension badge for unknown, vscode, etc. badge = ( diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index e758f3b7b6..641fb5d303 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1721,7 +1721,7 @@ importers: version: 7.0.0-dev.20260514.1 jest: specifier: 30.3.0 - version: 30.3.0(@types/node@24.12.4)(esbuild-register@3.6.0(esbuild@0.27.4)) + version: 30.3.0(@types/node@25.5.2)(esbuild-register@3.6.0(esbuild@0.27.4)) typescript: specifier: 'catalog:' version: 5.9.3 @@ -1851,8 +1851,8 @@ importers: specifier: 7.2.52 version: 7.2.52 '@kilocode/sdk': - specifier: 7.2.14 - version: 7.2.14 + specifier: 7.3.1 + version: 7.3.1 hono: specifier: 4.12.18 version: 4.12.18 @@ -4838,12 +4838,12 @@ packages: '@opentui/solid': optional: true - '@kilocode/sdk@7.2.14': - resolution: {integrity: sha512-Naz83lFrsbavuDp6UwxRuglOaSNvRBsZfcRNvb7RpWYAwbuJP0dBdhpXj6uO3ta5qxeQ2JzxKNC9Ffz+LCLLDg==} - '@kilocode/sdk@7.2.52': resolution: {integrity: sha512-j8w6ewvo7dyu/qxjJAg0bcjHGUGGvIZ4F2f5tJnpMwLzPTAu26DJoO/08aoxf1BhfuZLzNS9tA2q+ZPdzPT8Jg==} + '@kilocode/sdk@7.3.1': + resolution: {integrity: sha512-UFsCx+Nman7J0jBTr1mZxt6IQkpxkxt3Lqa+gb7/1lSjo0psRgO61H9sUfgLDvAFeaJsQy7k7KMq1eigsbd4rQ==} + '@lexical/clipboard@0.35.0': resolution: {integrity: sha512-ko7xSIIiayvDiqjNDX6fgH9RlcM6r9vrrvJYTcfGVBor5httx16lhIi0QJZ4+RNPvGtTjyFv4bwRmsixRRwImg==} @@ -7128,6 +7128,7 @@ packages: '@rocicorp/resolver@1.0.2': resolution: {integrity: sha512-TfjMTQp9cNNqNtHFfa+XHEGdA7NnmDRu+ZJH4YF3dso0Xk/b9DMhg/sl+b6CR4ThFZArXXDsG1j8Mwl34wcOZQ==} engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} + deprecated: Use Promise.withResolvers instead '@rolldown/binding-android-arm64@1.0.0-rc.17': resolution: {integrity: sha512-s70pVGhw4zqGeFnXWvAzJDlvxhlRollagdCCKRgOsgUOH3N1l0LIxf83AtGzmb5SiVM4Hjl5HyarMRfdfj3DaQ==} @@ -18478,7 +18479,7 @@ snapshots: cjs-module-lexer: 1.4.3 esbuild: 0.27.4 miniflare: 4.20260508.0(bufferutil@4.1.0)(utf-8-validate@6.0.6) - vitest: 4.1.6(@opentelemetry/api@1.9.1)(@types/node@24.12.4)(@vitest/coverage-v8@4.1.6)(@vitest/ui@4.1.6)(esbuild@0.27.4)(jiti@2.6.1)(terser@5.46.0)(tsx@4.21.0)(yaml@2.8.4) + vitest: 4.1.6(@opentelemetry/api@1.9.1)(@types/node@25.5.2)(@vitest/coverage-v8@4.1.6)(@vitest/ui@4.1.6)(esbuild@0.27.4)(jiti@2.6.1)(terser@5.46.0)(tsx@4.21.0)(yaml@2.8.4) wrangler: 4.90.1(@cloudflare/workers-types@4.20260511.1)(bufferutil@4.1.0)(utf-8-validate@6.0.6) zod: 3.25.76 transitivePeerDependencies: @@ -19955,11 +19956,11 @@ snapshots: effect: 4.0.0-beta.57 zod: 4.1.8 - '@kilocode/sdk@7.2.14': + '@kilocode/sdk@7.2.52': dependencies: cross-spawn: 7.0.6 - '@kilocode/sdk@7.2.52': + '@kilocode/sdk@7.3.1': dependencies: cross-spawn: 7.0.6 @@ -28690,6 +28691,25 @@ snapshots: - supports-color - ts-node + jest-cli@30.3.0(@types/node@25.5.2)(esbuild-register@3.6.0(esbuild@0.27.4)): + dependencies: + '@jest/core': 30.3.0(esbuild-register@3.6.0(esbuild@0.27.4)) + '@jest/test-result': 30.3.0 + '@jest/types': 30.3.0 + chalk: 4.1.2 + exit-x: 0.2.2 + import-local: 3.2.0 + jest-config: 30.3.0(@types/node@25.5.2)(esbuild-register@3.6.0(esbuild@0.27.4)) + jest-util: 30.3.0 + jest-validate: 30.3.0 + yargs: 17.7.2 + transitivePeerDependencies: + - '@types/node' + - babel-plugin-macros + - esbuild-register + - supports-color + - ts-node + jest-config@29.7.0(@types/node@24.12.4): dependencies: '@babel/core': 7.29.0 @@ -28782,6 +28802,38 @@ snapshots: - babel-plugin-macros - supports-color + jest-config@30.3.0(@types/node@25.5.2)(esbuild-register@3.6.0(esbuild@0.27.4)): + dependencies: + '@babel/core': 7.29.0 + '@jest/get-type': 30.1.0 + '@jest/pattern': 30.0.1 + '@jest/test-sequencer': 30.3.0 + '@jest/types': 30.3.0 + babel-jest: 30.3.0(@babel/core@7.29.0) + chalk: 4.1.2 + ci-info: 4.4.0 + deepmerge: 4.3.1 + glob: 13.0.6 + graceful-fs: 4.2.11 + jest-circus: 30.3.0 + jest-docblock: 30.2.0 + jest-environment-node: 30.3.0 + jest-regex-util: 30.0.1 + jest-resolve: 30.3.0 + jest-runner: 30.3.0 + jest-util: 30.3.0 + jest-validate: 30.3.0 + parse-json: 5.2.0 + pretty-format: 30.3.0 + slash: 3.0.0 + strip-json-comments: 3.1.1 + optionalDependencies: + '@types/node': 25.5.2 + esbuild-register: 3.6.0(esbuild@0.27.4) + transitivePeerDependencies: + - babel-plugin-macros + - supports-color + jest-diff@29.7.0: dependencies: chalk: 4.1.2 @@ -29309,6 +29361,19 @@ snapshots: - supports-color - ts-node + jest@30.3.0(@types/node@25.5.2)(esbuild-register@3.6.0(esbuild@0.27.4)): + dependencies: + '@jest/core': 30.3.0(esbuild-register@3.6.0(esbuild@0.27.4)) + '@jest/types': 30.3.0 + import-local: 3.2.0 + jest-cli: 30.3.0(@types/node@25.5.2)(esbuild-register@3.6.0(esbuild@0.27.4)) + transitivePeerDependencies: + - '@types/node' + - babel-plugin-macros + - esbuild-register + - supports-color + - ts-node + jimp-compact@0.16.1: {} jiti@2.6.1: {} diff --git a/services/gastown/container/Dockerfile b/services/gastown/container/Dockerfile index a0db8ddc00..0f1864ae01 100644 --- a/services/gastown/container/Dockerfile +++ b/services/gastown/container/Dockerfile @@ -72,7 +72,7 @@ RUN git lfs install --system # Install both glibc and musl variants — the CLI's binary resolver may # pick either depending on the detected libc. # Also install pnpm — many projects use it as their package manager. -RUN npm install -g @kilocode/cli@7.2.14 @kilocode/cli-linux-x64@7.2.14 @kilocode/cli-linux-x64-musl@7.2.14 @kilocode/plugin@7.2.14 pnpm && \ +RUN npm install -g @kilocode/cli@7.3.1 @kilocode/cli-linux-x64@7.3.1 @kilocode/cli-linux-x64-musl@7.3.1 @kilocode/plugin@7.3.1 pnpm && \ ln -s "$(which kilo)" /usr/local/bin/opencode # Create non-root user for defense-in-depth diff --git a/services/gastown/container/Dockerfile.dev b/services/gastown/container/Dockerfile.dev index 0b5ecf53ff..ab3ada9df0 100644 --- a/services/gastown/container/Dockerfile.dev +++ b/services/gastown/container/Dockerfile.dev @@ -71,7 +71,7 @@ RUN git lfs install --system # pick either depending on the detected libc. bun:1-slim is Debian (glibc) # but the resolver sometimes misdetects; installing both is safe. # Also install pnpm — many projects use it as their package manager. -RUN npm install -g @kilocode/cli@7.2.14 @kilocode/cli-linux-arm64@7.2.14 @kilocode/cli-linux-arm64-musl@7.2.14 @kilocode/plugin@7.2.14 pnpm && \ +RUN npm install -g @kilocode/cli@7.3.1 @kilocode/cli-linux-arm64@7.3.1 @kilocode/cli-linux-arm64-musl@7.3.1 @kilocode/plugin@7.3.1 pnpm && \ ln -s "$(which kilo)" /usr/local/bin/opencode # Create non-root user for defense-in-depth diff --git a/services/gastown/container/package.json b/services/gastown/container/package.json index d413c6091d..b2508545d2 100644 --- a/services/gastown/container/package.json +++ b/services/gastown/container/package.json @@ -13,7 +13,7 @@ }, "dependencies": { "@kilocode/plugin": "7.2.52", - "@kilocode/sdk": "7.2.14", + "@kilocode/sdk": "7.3.1", "hono": "catalog:", "zod": "catalog:" }, diff --git a/services/gastown/container/src/agent-runner.test.ts b/services/gastown/container/src/agent-runner.test.ts index da33166d16..7123c1d7e2 100644 --- a/services/gastown/container/src/agent-runner.test.ts +++ b/services/gastown/container/src/agent-runner.test.ts @@ -18,7 +18,7 @@ vi.mock('./logger', () => ({ log: { info: vi.fn() }, })); -import { buildAgentEnv, buildKiloConfigContent } from './agent-runner'; +import { buildAgentEnv, buildKiloAuthEnv, buildKiloConfigContent } from './agent-runner'; import type { StartAgentRequest } from './types'; function baseRequest(overrides: Partial = {}): StartAgentRequest { @@ -117,3 +117,44 @@ describe('buildKiloConfigContent', () => { expect(parsed.provider.kilo.options.kilocodeOrganizationId).toBeUndefined(); }); }); + +describe('buildKiloAuthEnv', () => { + it('sets KILO_PLATFORM to gastown', () => { + const env = buildKiloAuthEnv(undefined, undefined); + expect(env.KILO_PLATFORM).toBe('gastown'); + }); + + it('sets KILO_AUTH_CONTENT when kilocodeToken is provided', () => { + const env = buildKiloAuthEnv('tok-abc', undefined); + expect(env.KILO_AUTH_CONTENT).toBe(JSON.stringify({ kilo: { type: 'api', key: 'tok-abc' } })); + }); + + it('omits KILO_AUTH_CONTENT when kilocodeToken is absent', () => { + const env = buildKiloAuthEnv(undefined, undefined); + expect(env.KILO_AUTH_CONTENT).toBeUndefined(); + }); + + it('sets KILO_ORG_ID when organizationId is provided', () => { + const env = buildKiloAuthEnv('tok', 'org-123'); + expect(env.KILO_ORG_ID).toBe('org-123'); + }); + + it('omits KILO_ORG_ID when organizationId is null', () => { + const env = buildKiloAuthEnv('tok', null); + expect(env.KILO_ORG_ID).toBeUndefined(); + }); + + it('omits KILO_ORG_ID when organizationId is undefined', () => { + const env = buildKiloAuthEnv('tok', undefined); + expect(env.KILO_ORG_ID).toBeUndefined(); + }); + + it('sets all three env vars when both inputs are provided', () => { + const env = buildKiloAuthEnv('tok-full', 'org-full'); + expect(env).toEqual({ + KILO_PLATFORM: 'gastown', + KILO_AUTH_CONTENT: JSON.stringify({ kilo: { type: 'api', key: 'tok-full' } }), + KILO_ORG_ID: 'org-full', + }); + }); +}); diff --git a/services/gastown/container/src/agent-runner.ts b/services/gastown/container/src/agent-runner.ts index 30ac6330ca..17d226f1a8 100644 --- a/services/gastown/container/src/agent-runner.ts +++ b/services/gastown/container/src/agent-runner.ts @@ -85,6 +85,22 @@ export function buildKiloConfigContent( } satisfies Config); } +export function buildKiloAuthEnv( + kilocodeToken: string | undefined, + organizationId: string | undefined | null +): Record { + const env: Record = { + KILO_PLATFORM: 'gastown', + }; + if (kilocodeToken) { + env.KILO_AUTH_CONTENT = JSON.stringify({ kilo: { type: 'api', key: kilocodeToken } }); + } + if (organizationId) { + env.KILO_ORG_ID = organizationId; + } + return env; +} + export function buildAgentEnv(request: StartAgentRequest): Record { // Custom git identity: when GASTOWN_GIT_AUTHOR_NAME is set, the user becomes // the primary author and the AI agent name is used for co-authorship trailers. @@ -173,30 +189,11 @@ GASTOWN_TOWN_ID="${env.GASTOWN_TOWN_ID}"`); console.log( `[buildAgentEnv] KILO_CONFIG_CONTENT set (model=${request.model}, smallModel=${request.smallModel ?? '(default)'})` ); - - // Set KILO_AUTH_CONTENT so the kilo CLI's session-ingest path can - // authenticate. The CLI's Auth.all() reads this env var before - // falling back to the auth.json file. Without it, session deltas - // get "session bootstrap skipped: no client" and never reach - // cli_sessions_v2. - env.KILO_AUTH_CONTENT = JSON.stringify({ - kilo: { type: 'api', key: kilocodeToken }, - }); } else { console.warn('[buildAgentEnv] No KILOCODE_TOKEN available — KILO_CONFIG_CONTENT not set'); } - // Set KILO_PLATFORM so session-ingest writes created_on_platform = - // 'gastown'. The /cloud/sessions page has a "Gastown" filter that - // matches this value. - env.KILO_PLATFORM = 'gastown'; - - // Set KILO_ORG_ID so session-ingest populates organization_id for - // org-scoped filtering. Falls back to the auth file's accountId - // inside the CLI if not set. - if (request.organizationId) { - env.KILO_ORG_ID = request.organizationId; - } + Object.assign(env, buildKiloAuthEnv(kilocodeToken, request.organizationId)); // Authenticate the gh CLI via GH_TOKEN. Prefer the user's GitHub CLI PAT // (which makes PRs/issues appear under their identity) over the integration @@ -518,58 +515,21 @@ export async function runAgent(originalRequest: StartAgentRequest): Promise { - const envVars = await resolveGitCredentials({ - envVars: baseEnvVars, - platformIntegrationId: rig.platformIntegrationId, - }); - const hasGitToken = !!(envVars.GIT_TOKEN || envVars.GITHUB_TOKEN || envVars.GITLAB_TOKEN); - console.log( - `[runAgent] setting up browse worktree: rig=${rig.rigId} gitUrl=${rig.gitUrl} hasGitToken=${hasGitToken}` - ); - await setupRigBrowseWorktree({ - rigId: rig.rigId, - gitUrl: rig.gitUrl, - defaultBranch: rig.defaultBranch, - envVars, - }); - return rig.rigId; - }) - ); - - const failures: Array<{ rigId: string; error: unknown }> = []; - for (let i = 0; i < rigSetupResults.length; i++) { - const r = rigSetupResults[i]; - if (r.status === 'rejected') { - const reason: unknown = r.reason; - failures.push({ rigId: request.rigs[i].rigId, error: reason }); - } - } - - if (failures.length > 0) { - for (const f of failures) { - const msg = f.error instanceof Error ? f.error.message : String(f.error); - const stack = f.error instanceof Error ? f.error.stack : undefined; - console.error( - `[runAgent] browse worktree setup FAILED for rig=${f.rigId}: ${msg}`, - stack ? `\n${stack}` : '' - ); - } - console.error( - `[runAgent] mayor rig setup: ${failures.length}/${request.rigs.length} rigs failed. ` + - `Mayor will start but may not be able to browse these codebases.` - ); - } + void setupMayorBrowseWorktrees(request).catch(err => { + console.error('[runAgent] background mayor browse worktree setup failed:', err); + }); } // Write the system prompt to AGENTS.md so the mayor AND its built-in @@ -577,7 +537,13 @@ export async function runAgent(originalRequest: StartAgentRequest): Promise { + if (!request.rigs?.length) return; + + const setupStart = Date.now(); + const baseEnvVars = request.envVars ?? {}; + const rigSetupResults = await Promise.allSettled( + request.rigs.map(async rig => { + const envVars = await resolveGitCredentials({ + envVars: baseEnvVars, + platformIntegrationId: rig.platformIntegrationId, + }); + const hasGitToken = !!(envVars.GIT_TOKEN || envVars.GITHUB_TOKEN || envVars.GITLAB_TOKEN); + console.log( + `[runAgent] setting up browse worktree: rig=${rig.rigId} gitUrl=${rig.gitUrl} hasGitToken=${hasGitToken}` + ); + await setupRigBrowseWorktree({ + rigId: rig.rigId, + gitUrl: rig.gitUrl, + defaultBranch: rig.defaultBranch, + envVars, + }); + return rig.rigId; + }) + ); + + const failures: Array<{ rigId: string; error: unknown }> = []; + for (let i = 0; i < rigSetupResults.length; i++) { + const result = rigSetupResults[i]; + if (result.status === 'rejected') { + failures.push({ rigId: request.rigs[i].rigId, error: result.reason }); + } + } + + if (failures.length > 0) { + for (const failure of failures) { + const msg = failure.error instanceof Error ? failure.error.message : String(failure.error); + const stack = failure.error instanceof Error ? failure.error.stack : undefined; + console.error( + `[runAgent] browse worktree setup FAILED for rig=${failure.rigId}: ${msg}`, + stack ? `\n${stack}` : '' + ); + } + console.error( + `[runAgent] mayor rig setup: ${failures.length}/${request.rigs.length} rigs failed. ` + + `Mayor will start but may not be able to browse these codebases.` + ); + } + + const setupDurationMs = Date.now() - setupStart; + log.info('mayor.browse_worktree_setup_ms', { + agentId: request.agentId, + townId: request.townId, + durationMs: setupDurationMs, + rigCount: request.rigs.length, + failureCount: failures.length, + }); +} diff --git a/services/gastown/container/src/control-server.ts b/services/gastown/container/src/control-server.ts index 761feefbaa..67d5adf694 100644 --- a/services/gastown/container/src/control-server.ts +++ b/services/gastown/container/src/control-server.ts @@ -41,6 +41,11 @@ import type { const MAX_TICKETS = 1000; const streamTickets = new Map(); +const RefreshTokenRequest = z.object({ + token: z.string().min(1), + townId: z.string().optional(), +}); + // Minimal Zod schema for the town config delivered via X-Town-Config header. // Uses z.record() so any string-keyed object is accepted and future keys are preserved. const TownConfigHeader = z.record(z.string(), z.unknown()); @@ -95,6 +100,8 @@ function syncTownConfigToProcessEnv(): void { if (!cfg) return; const CONFIG_ENV_MAP: Array<[string, string]> = [ + ['town_id', 'GASTOWN_TOWN_ID'], + ['gastown_api_url', 'GASTOWN_API_URL'], ['github_cli_pat', 'GITHUB_CLI_PAT'], ['git_author_name', 'GASTOWN_GIT_AUTHOR_NAME'], ['git_author_email', 'GASTOWN_GIT_AUTHOR_EMAIL'], @@ -176,6 +183,7 @@ app.use('*', async (c, next) => { const result = TownConfigHeader.safeParse(raw); if (result.success) { lastKnownTownConfig = result.data; + syncTownConfigToProcessEnv(); const hasToken = typeof result.data.kilocode_token === 'string' && result.data.kilocode_token.length > 0; console.log( @@ -260,11 +268,12 @@ app.post('/dashboard-context', async c => { // server — matching the model hot-swap path. app.post('/refresh-token', async c => { const body: unknown = await c.req.json().catch(() => null); - if (!body || typeof body !== 'object' || !('token' in body) || typeof body.token !== 'string') { - return c.json({ error: 'Missing or invalid token field' }, 400); + const parsed = RefreshTokenRequest.safeParse(body); + if (!parsed.success) { + return c.json({ error: 'Invalid request body', issues: parsed.error.issues }, 400); } // Capture the new token into a local so it survives the await below. - const newToken = body.token; + const newToken = parsed.data.token; // Wait for boot hydration to release the global sdkServerLock before // we mutate process.env or serialise N agent restarts through it. @@ -276,6 +285,9 @@ app.post('/refresh-token', async c => { // Now safe to assign: hydration is done, no concurrent env readers. process.env.GASTOWN_CONTAINER_TOKEN = newToken; + if (parsed.data.townId) { + process.env.GASTOWN_TOWN_ID = parsed.data.townId; + } const activeAgents = listAgents().filter(a => a.status === 'running' || a.status === 'starting'); log.info('refresh_token.received', { @@ -303,9 +315,8 @@ app.post('/refresh-token', async c => { // POST /sync-config // Push config-derived env vars from X-Town-Config into process.env on -// the running container. Called by TownDO.syncConfigToContainer() after -// persisting env vars to DO storage, so the live process picks up -// changes (e.g. refreshed KILOCODE_TOKEN) without a container restart. +// the running container, so the live process picks up changes (e.g. +// refreshed KILOCODE_TOKEN) without a container restart. app.post('/sync-config', async c => { syncTownConfigToProcessEnv(); return c.json({ synced: true }); @@ -338,6 +349,10 @@ app.post('/agents/start', async c => { // config rebuilds (e.g. model hot-swap). The env var is the primary // source of truth; KILO_CONFIG_CONTENT extraction is the fallback. process.env.GASTOWN_ORGANIZATION_ID = parsed.data.organizationId ?? ''; + process.env.GASTOWN_TOWN_ID = parsed.data.townId; + if (parsed.data.envVars?.GASTOWN_CONTAINER_TOKEN) { + process.env.GASTOWN_CONTAINER_TOKEN = parsed.data.envVars.GASTOWN_CONTAINER_TOKEN; + } console.log( `[control-server] /agents/start: role=${parsed.data.role} name=${parsed.data.name} rigId=${parsed.data.rigId} agentId=${parsed.data.agentId}` @@ -671,10 +686,14 @@ app.post('/git/merge', async c => { // Called by the process-manager when the agent goes idle. app.get('/agents/:agentId/pending-nudges', async c => { const { agentId } = c.req.param(); - const apiUrl = process.env.GASTOWN_API_URL; - const token = process.env.GASTOWN_CONTAINER_TOKEN ?? process.env.GASTOWN_SESSION_TOKEN; - const townId = process.env.GASTOWN_TOWN_ID; - const rigId = process.env.GASTOWN_RIG_ID; + const agent = getAgentStatus(agentId); + const apiUrl = agent?.gastownApiUrl ?? process.env.GASTOWN_API_URL; + const token = + process.env.GASTOWN_CONTAINER_TOKEN ?? + agent?.gastownContainerToken ?? + agent?.gastownSessionToken; + const townId = agent?.townId ?? process.env.GASTOWN_TOWN_ID; + const rigId = agent?.rigId; if (!apiUrl || !token || !townId || !rigId) { return c.json({ error: 'Missing gastown configuration' }, 503); @@ -704,10 +723,14 @@ app.get('/agents/:agentId/pending-nudges', async c => { // Body: { nudge_id: string } app.post('/agents/:agentId/nudge-delivered', async c => { const { agentId } = c.req.param(); - const apiUrl = process.env.GASTOWN_API_URL; - const token = process.env.GASTOWN_CONTAINER_TOKEN ?? process.env.GASTOWN_SESSION_TOKEN; - const townId = process.env.GASTOWN_TOWN_ID; - const rigId = process.env.GASTOWN_RIG_ID; + const agent = getAgentStatus(agentId); + const apiUrl = agent?.gastownApiUrl ?? process.env.GASTOWN_API_URL; + const token = + process.env.GASTOWN_CONTAINER_TOKEN ?? + agent?.gastownContainerToken ?? + agent?.gastownSessionToken; + const townId = agent?.townId ?? process.env.GASTOWN_TOWN_ID; + const rigId = agent?.rigId; if (!apiUrl || !token || !townId || !rigId) { return c.json({ error: 'Missing gastown configuration' }, 503); diff --git a/services/gastown/container/src/main.ts b/services/gastown/container/src/main.ts index 4abff0666e..07b0ecb230 100644 --- a/services/gastown/container/src/main.ts +++ b/services/gastown/container/src/main.ts @@ -2,16 +2,17 @@ import { startControlServer } from './control-server'; import { log } from './logger'; import { activeAgentCount, bootHydration, getUptime, listAgents } from './process-manager'; -// Container-scoped identifiers for crash/diagnostic logs. The container is -// pinned to a single town for its lifetime (see GASTOWN_TOWN_ID injection in -// the deployer), so reading these once at module init is safe and lets us -// emit them even when no agents are registered yet. -const TOWN_ID = process.env.GASTOWN_TOWN_ID ?? null; +// Container-scoped identifier for crash/diagnostic logs. It can arrive at +// boot via container start options or shortly after via the first control +// request, so read it when logging instead of capturing module-init state. +function townIdForLogs(): string | null { + return process.env.GASTOWN_TOWN_ID ?? null; +} log.info('container.cold_start', { uptime: getUptime(), ts: new Date().toISOString(), - townId: TOWN_ID, + townId: townIdForLogs(), }); // Bun (like Node) will ignore unhandled promise rejections unless a handler @@ -30,7 +31,7 @@ process.on('unhandledRejection', reason => { : { message: String(reason) }; log.error('container.unhandled_rejection', { ...err, - townId: TOWN_ID, + townId: townIdForLogs(), uptimeMs: getUptime(), activeAgents: activeAgentCount(), }); @@ -41,7 +42,7 @@ process.on('uncaughtException', err => { message: err.message, stack: err.stack, name: err.name, - townId: TOWN_ID, + townId: townIdForLogs(), uptimeMs: getUptime(), activeAgents: activeAgentCount(), }); @@ -68,7 +69,7 @@ setInterval(() => { heapUsedMB: Math.round(mem.heapUsed / 1024 / 1024), heapTotalMB: Math.round(mem.heapTotal / 1024 / 1024), externalMB: Math.round(mem.external / 1024 / 1024), - townId: TOWN_ID, + townId: townIdForLogs(), uptimeMs: getUptime(), agents: listAgents().length, activeAgents: activeAgentCount(), @@ -93,7 +94,7 @@ void (async () => { log.error('container.boot_hydration_failed', { message: err instanceof Error ? err.message : String(err), stack: err instanceof Error ? err.stack : undefined, - townId: TOWN_ID, + townId: townIdForLogs(), }); } })(); diff --git a/services/gastown/container/src/process-manager.test.ts b/services/gastown/container/src/process-manager.test.ts index 1afd68112c..bd65ed23a7 100644 --- a/services/gastown/container/src/process-manager.test.ts +++ b/services/gastown/container/src/process-manager.test.ts @@ -14,6 +14,16 @@ vi.mock('./agent-runner', () => ({ (kilocodeToken: string, model: string, smallModel: string, organizationId?: string) => JSON.stringify({ kilocodeToken, model, smallModel, organizationId }) ), + buildKiloAuthEnv: vi.fn((kilocodeToken?: string, organizationId?: string | null) => { + const authEnv: Record = { KILO_PLATFORM: 'gastown' }; + if (kilocodeToken) { + authEnv.KILO_AUTH_CONTENT = JSON.stringify({ kilo: { type: 'api', key: kilocodeToken } }); + } + if (organizationId) { + authEnv.KILO_ORG_ID = organizationId; + } + return authEnv; + }), resolveGitCredentials: vi.fn(), writeMayorSystemPromptToAgentsMd: vi.fn(), ensureMayorWorkspaceForTown: vi.fn(async (_townId: string) => TEST_WORKSPACE), @@ -214,10 +224,12 @@ describe('awaitHydration', () => { apiUrl: process.env.GASTOWN_API_URL, townId: process.env.GASTOWN_TOWN_ID, token: process.env.GASTOWN_CONTAINER_TOKEN, + kiloOrgId: process.env.KILO_ORG_ID, }; process.env.GASTOWN_API_URL = 'http://test.invalid'; process.env.GASTOWN_TOWN_ID = 'town-prewarm'; process.env.GASTOWN_CONTAINER_TOKEN = 'tok-prewarm'; + delete process.env.KILO_ORG_ID; let capturedEnv: Record | null = null; createKilo.mockImplementationOnce(() => { @@ -229,6 +241,9 @@ describe('awaitHydration', () => { GASTOWN_API_URL: process.env.GASTOWN_API_URL, GASTOWN_CONTAINER_TOKEN: process.env.GASTOWN_CONTAINER_TOKEN, KILO_CONFIG_CONTENT: process.env.KILO_CONFIG_CONTENT, + KILO_AUTH_CONTENT: process.env.KILO_AUTH_CONTENT, + KILO_PLATFORM: process.env.KILO_PLATFORM, + KILO_ORG_ID: process.env.KILO_ORG_ID, }; return Promise.resolve({ client: {} as unknown, @@ -270,6 +285,9 @@ describe('awaitHydration', () => { GASTOWN_CONTAINER_TOKEN: 'tok-prewarm', }); expect(env?.KILO_CONFIG_CONTENT).toBeTruthy(); + expect(env?.KILO_PLATFORM).toBe('gastown'); + expect(env?.KILO_AUTH_CONTENT).toBe(JSON.stringify({ kilo: { type: 'api', key: 'kc-tok' } })); + expect(env?.KILO_ORG_ID).toBeUndefined(); } finally { globalThis.fetch = originalFetch; if (prev.apiUrl !== undefined) process.env.GASTOWN_API_URL = prev.apiUrl; @@ -278,6 +296,8 @@ describe('awaitHydration', () => { else delete process.env.GASTOWN_TOWN_ID; if (prev.token !== undefined) process.env.GASTOWN_CONTAINER_TOKEN = prev.token; else delete process.env.GASTOWN_CONTAINER_TOKEN; + if (prev.kiloOrgId !== undefined) process.env.KILO_ORG_ID = prev.kiloOrgId; + else delete process.env.KILO_ORG_ID; } }); diff --git a/services/gastown/container/src/process-manager.ts b/services/gastown/container/src/process-manager.ts index 4d35c350c0..923f8b587d 100644 --- a/services/gastown/container/src/process-manager.ts +++ b/services/gastown/container/src/process-manager.ts @@ -9,10 +9,11 @@ import { createKilo, type KiloClient } from '@kilocode/sdk'; import { z } from 'zod'; import * as fs from 'node:fs/promises'; -import type { ManagedAgent, StartAgentRequest } from './types'; +import { type ManagedAgent, StartAgentRequest } from './types'; import { reportAgentCompleted, reportMayorWaiting } from './completion-reporter'; import { buildKiloConfigContent, + buildKiloAuthEnv, ensureMayorWorkspaceForTown, mayorWorkdirForTown, } from './agent-runner'; @@ -30,6 +31,15 @@ const MANAGER_LOG = '[process-manager]'; // if the SDK changes its return type. const SessionResponse = z.object({ id: z.string().min(1) }).passthrough(); +const ContainerRegistryResponse = z.object({ data: z.unknown() }).passthrough(); +const ContainerRegistryEntry = z.object({ + agentId: z.string().min(1), + request: StartAgentRequest, + workdir: z.string().min(1), + env: z.record(z.string(), z.string()), +}); +type ContainerRegistryEntry = z.infer; + type SDKInstance = { client: KiloClient; server: { url: string; close(): void }; @@ -2767,6 +2777,8 @@ function buildPrewarmEnv(ctx: MayorPrewarmContext, townId: string): Record { console.log(`${LOG} Fetching container registry for town=${townId}`); let registry: unknown; + const registryFetchStart = Date.now(); try { const resp = await fetch(`${apiUrl}/api/towns/${townId}/container-registry`, { headers: { Authorization: `Bearer ${token}` }, signal: AbortSignal.timeout(10_000), }); + const registryFetchMs = Date.now() - registryFetchStart; + log.info('bootHydration.registry_fetch_ms', { + townId, + durationMs: registryFetchMs, + statusCode: resp.status, + }); + postEventToWorker('bootHydration.registry_fetch_ms', { durationMs: registryFetchMs }); if (!resp.ok) { console.warn(`${LOG} Failed to fetch registry: ${resp.status}`); return; } - const json = (await resp.json()) as { data: unknown }; - registry = json.data; + registry = ContainerRegistryResponse.parse(await resp.json()).data; } catch (err) { + const registryFetchMs = Date.now() - registryFetchStart; + log.warn('bootHydration.registry_fetch_ms', { + townId, + durationMs: registryFetchMs, + error: err instanceof Error ? err.message : String(err), + }); + postEventToWorker('bootHydration.registry_fetch_ms', { + durationMs: registryFetchMs, + error: err instanceof Error ? err.message : String(err), + }); console.warn(`${LOG} Registry fetch failed:`, err); return; } - if (!Array.isArray(registry) || registry.length === 0) { + const registryEntries = parseRegistryEntries(LOG, registry); + if (registryEntries.length === 0) { console.log(`${LOG} No agents in registry — nothing to hydrate`); } else { - console.log(`${LOG} Resuming ${registry.length} agent(s) from registry`); - - for (const entry of registry as Record[]) { - const agentId = entry.agentId as string | undefined; - const agentRequest = entry.request as StartAgentRequest | undefined; - const workdir = entry.workdir as string | undefined; - const env = entry.env as Record | undefined; - - if (!agentId || !agentRequest || !workdir || !env) { - console.warn(`${LOG} Skipping malformed registry entry:`, entry); - continue; - } + console.log(`${LOG} Resuming ${registryEntries.length} agent(s) from registry`); + } - // Registry entries were written with the token snapshot at dispatch - // time. If we just refreshed, overlay the fresh value so the hydrated - // kilo serve child inherits the current token. - const hydratedEnv = { ...env, GASTOWN_CONTAINER_TOKEN: token }; + const mayorEntries = registryEntries.filter(entry => entry.request.role === 'mayor'); + const nonMayorEntries = registryEntries.filter(entry => !mayorEntries.includes(entry)); - console.log(`${LOG} Resuming agent ${agentId} in ${workdir}`); - try { - await startAgent(agentRequest, workdir, hydratedEnv); - console.log(`${LOG} Agent ${agentId} resumed`); - } catch (err) { - const msg = err instanceof Error ? err.message : String(err); - console.error(`${LOG} Failed to resume agent ${agentId}:`, msg); - } + if (mayorEntries.length > 0) { + const mayorResumeStart = Date.now(); + await resumeRegistryEntries(LOG, mayorEntries, token); + const mayorResumeMs = Date.now() - mayorResumeStart; + log.info('bootHydration.mayor_resume_ms', { townId, durationMs: mayorResumeMs }); + postEventToWorker('bootHydration.mayor_resume_ms', { durationMs: mayorResumeMs }); + } else { + const mayorPrewarmStart = Date.now(); + try { + await prewarmMayorSDK(townId, apiUrl, token); + } catch (err) { + console.warn(`${LOG} Mayor SDK prewarm failed:`, err); + } finally { + const mayorPrewarmMs = Date.now() - mayorPrewarmStart; + log.info('bootHydration.mayor_prewarm_ms', { townId, durationMs: mayorPrewarmMs }); + postEventToWorker('bootHydration.mayor_prewarm_ms', { durationMs: mayorPrewarmMs }); } } - const mayorAlreadyResumed = (Array.isArray(registry) ? registry : []).some( - (e: unknown) => - typeof e === 'object' && - e !== null && - 'request' in e && - typeof (e as { request?: { role?: string } }).request?.role === 'string' && - (e as { request: { role: string } }).request.role === 'mayor' - ); - if (!mayorAlreadyResumed) { + if (nonMayorEntries.length > 0) { + setTimeout(() => { + void (async () => { + const nonMayorResumeStart = Date.now(); + await resumeRegistryEntries(LOG, nonMayorEntries, token); + const nonMayorResumeMs = Date.now() - nonMayorResumeStart; + log.info('bootHydration.non_mayor_resume_ms', { + townId, + durationMs: nonMayorResumeMs, + count: nonMayorEntries.length, + }); + postEventToWorker('bootHydration.non_mayor_resume_ms', { + durationMs: nonMayorResumeMs, + count: nonMayorEntries.length, + }); + })(); + }, 0); + } +} + +async function resumeRegistryEntries( + LOG: string, + entries: ContainerRegistryEntry[], + token: string +): Promise { + for (const entry of entries) { + // Registry entries were written with the token snapshot at dispatch + // time. If we just refreshed, overlay the fresh value so the hydrated + // kilo serve child inherits the current token. + const hydratedEnv = { ...entry.env, GASTOWN_CONTAINER_TOKEN: token }; + + console.log(`${LOG} Resuming agent ${entry.agentId} in ${entry.workdir}`); try { - await prewarmMayorSDK(townId, apiUrl, token); + await startAgent(entry.request, entry.workdir, hydratedEnv); + console.log(`${LOG} Agent ${entry.agentId} resumed`); } catch (err) { - console.warn(`${LOG} Mayor SDK prewarm failed:`, err); + const msg = err instanceof Error ? err.message : String(err); + console.error(`${LOG} Failed to resume agent ${entry.agentId}:`, msg); + } + } +} + +function parseRegistryEntries(LOG: string, registry: unknown): ContainerRegistryEntry[] { + if (!Array.isArray(registry)) return []; + + const entries: ContainerRegistryEntry[] = []; + for (const entry of registry) { + const parsed = ContainerRegistryEntry.safeParse(entry); + if (!parsed.success) { + console.warn(`${LOG} Skipping malformed registry entry:`, parsed.error.issues); + continue; } + entries.push(parsed.data); } + return entries; } diff --git a/services/gastown/src/dos/Town.do.ts b/services/gastown/src/dos/Town.do.ts index a6a088c628..3d96e78bfe 100644 --- a/services/gastown/src/dos/Town.do.ts +++ b/services/gastown/src/dos/Town.do.ts @@ -948,105 +948,17 @@ export class TownDO extends DurableObject { /** * Push config-derived env vars to the running container. Called after * updateTownConfig so that settings changes take effect without a - * container restart. New agent processes inherit the updated values. - * - * Two-phase push: - * 1. setEnvVar — persists to DO storage for next boot - * 2. POST /sync-config — hot-swaps process.env on the running container + * container restart. New agent processes receive current env through + * their start requests. */ async syncConfigToContainer(): Promise { const townId = this.townId; if (!townId) return; - const townConfig = await this.getTownConfig(); const container = getTownContainerStub(this.env, townId); - // Resolve a fresh GitHub token here too — this method runs both at - // initial config push and on every config change, so the persisted - // GIT_TOKEN must be live rather than the stale value stored in - // git_auth.github_token from rig creation. The container's - // syncTownConfigToProcessEnv path reads `git_auth.github_token` - // from the X-Town-Config header on every request, so the in-process - // GIT_TOKEN follows the same source-of-truth as the persisted one. - const githubToken = await scm.resolveGitHubTokenString({ - env: this.env, - townId, - getTownConfig: () => Promise.resolve(townConfig), - }); - - // Phase 1: Persist to DO storage for next boot. - const envMapping: Array<[string, string | undefined]> = [ - ['GIT_TOKEN', githubToken ?? undefined], - ['GITLAB_TOKEN', townConfig.git_auth?.gitlab_token], - ['GITLAB_INSTANCE_URL', townConfig.git_auth?.gitlab_instance_url], - ['GITHUB_CLI_PAT', townConfig.github_cli_pat], - ['GASTOWN_GIT_AUTHOR_NAME', townConfig.git_author_name], - ['GASTOWN_GIT_AUTHOR_EMAIL', townConfig.git_author_email], - ['GASTOWN_DISABLE_AI_COAUTHOR', townConfig.disable_ai_coauthor ? '1' : undefined], - ['KILOCODE_TOKEN', townConfig.kilocode_token], - ]; - - for (const [key, value] of envMapping) { - try { - if (value) { - await container.setEnvVar(key, value); - } else { - await container.deleteEnvVar(key); - } - } catch (err) { - console.warn(`[Town.do] syncConfigToContainer: ${key} sync failed:`, err); - } - } - - // Persist custom env_vars to DO storage so they survive container restarts. - // Compare against the previously-persisted set of keys to clear removed ones. - // Reserved infra keys are never overwritten or deleted — infra values always win. - const RESERVED_ENV_KEYS = new Set([ - 'KILOCODE_TOKEN', - 'GIT_TOKEN', - 'GITHUB_TOKEN', - 'GITLAB_TOKEN', - 'GITLAB_INSTANCE_URL', - 'GITHUB_CLI_PAT', - 'GH_TOKEN', - 'GASTOWN_GIT_AUTHOR_NAME', - 'GASTOWN_GIT_AUTHOR_EMAIL', - 'GASTOWN_DISABLE_AI_COAUTHOR', - 'GASTOWN_ORGANIZATION_ID', - 'GASTOWN_CONTAINER_TOKEN', - 'GASTOWN_SESSION_TOKEN', - 'GASTOWN_API_URL', - ]); - const CUSTOM_ENV_KEYS_STORAGE_KEY = 'container:custom_env_var_keys'; - const prevCustomKeys: string[] = - (await this.ctx.storage.get(CUSTOM_ENV_KEYS_STORAGE_KEY)) ?? []; - const newCustomKeys = Object.keys(townConfig.env_vars).filter( - key => !RESERVED_ENV_KEYS.has(key) - ); - const newCustomKeySet = new Set(newCustomKeys); - - for (const key of prevCustomKeys) { - if (RESERVED_ENV_KEYS.has(key)) continue; - if (!newCustomKeySet.has(key)) { - try { - await container.deleteEnvVar(key); - } catch (err) { - console.warn(`[Town.do] syncConfigToContainer: delete custom ${key} failed:`, err); - } - } - } - for (const [key, value] of Object.entries(townConfig.env_vars)) { - if (RESERVED_ENV_KEYS.has(key)) continue; - try { - await container.setEnvVar(key, value); - } catch (err) { - console.warn(`[Town.do] syncConfigToContainer: set custom ${key} failed:`, err); - } - } - await this.ctx.storage.put(CUSTOM_ENV_KEYS_STORAGE_KEY, newCustomKeys); - - // Phase 2: Push to the running container's process.env via the - // /sync-config endpoint. The X-Town-Config header delivers the - // full config; the endpoint applies CONFIG_ENV_MAP to process.env. + // Push to the running container's process.env via the /sync-config + // endpoint. The X-Town-Config header delivers the full config; the + // endpoint applies CONFIG_ENV_MAP to process.env. try { const containerConfig = await config.buildContainerConfig( this.ctx.storage, @@ -1156,19 +1068,6 @@ export class TownDO extends DurableObject { } } - const token = rigConfig.kilocodeToken ?? (await this.resolveKilocodeToken()); - if (token) { - try { - const container = getTownContainerStub(this.env, this.townId); - await container.setEnvVar('KILOCODE_TOKEN', token); - logger.info('configureRig: stored KILOCODE_TOKEN on TownContainerDO'); - } catch (err) { - logger.warn('configureRig: failed to store token on container DO', { - error: err instanceof Error ? err.message : String(err), - }); - } - } - logger.info('configureRig: proactively starting container'); await this.armAlarmIfNeeded(); try { @@ -2835,15 +2734,6 @@ export class TownDO extends DurableObject { orgId: townConfig.organization_id, }); - if (kilocodeToken) { - try { - const containerStub = getTownContainerStub(this.env, townId); - await containerStub.setEnvVar('KILOCODE_TOKEN', kilocodeToken); - } catch { - // Best effort - } - } - const { started: mayorStarted } = await dispatch.startAgentInContainer( this.env, this.ctx.storage, @@ -3045,13 +2935,6 @@ export class TownDO extends DurableObject { label: 'fresh_dispatch', }); - try { - const containerStub = getTownContainerStub(this.env, townId); - await containerStub.setEnvVar('KILOCODE_TOKEN', kilocodeToken); - } catch { - // Best effort - } - // Start with an empty prompt — the mayor will be idle but its container // and SDK server will be running, ready for PTY connections. const { started: mayorStarted } = await dispatch.startAgentInContainer( @@ -4492,10 +4375,9 @@ export class TownDO extends DurableObject { } /** - * Push a fresh container-scoped JWT to the TownContainerDO. Called + * Push a fresh container-scoped JWT to the running control server. Called * from the alarm handler, throttled to once per hour (tokens have - * 8h expiry). The TownContainerDO stores it as an env var so it's - * available to all agents in the container. + * 8h expiry). New dispatches also pass fresh tokens in their request env. * * The throttle timestamp is persisted in ctx.storage so it survives * DO eviction. Without persistence, eviction resets the throttle to 0 @@ -4932,7 +4814,13 @@ export class TownDO extends DurableObject { // 5s truncation of a plain /health ping. For already-warm containers // this is a cheap RPC that returns { coldStart: false }. try { - const warm = await container.warmUp(); + const townConfig = await this.getTownConfig(); + const userId = townConfig.owner_user_id ?? townConfig.created_by_user_id ?? townId; + const containerToken = await dispatch.mintContainerToken(this.env, { townId, userId }); + const warm = await container.warmUp({ + townId, + ...(containerToken ? { containerToken } : {}), + }); if (warm.coldStart) { writeEvent(this.env, { event: 'container.cold_start', diff --git a/services/gastown/src/dos/TownContainer.do.ts b/services/gastown/src/dos/TownContainer.do.ts index 705792eb89..412ce8efc7 100644 --- a/services/gastown/src/dos/TownContainer.do.ts +++ b/services/gastown/src/dos/TownContainer.do.ts @@ -22,8 +22,8 @@ export class TownContainerDO extends Container { defaultPort = 8080; sleepAfter = '10m'; - // Container env vars. Includes infra URLs and any tokens stored via setEnvVar(). - // The Container base class reads this when booting the container. + // Static boot-time container env vars. Runtime town, rig, and agent + // configuration is sent through the control server request protocol. envVars: Record = { ...(this.env.GASTOWN_API_URL ? { GASTOWN_API_URL: this.env.GASTOWN_API_URL } : {}), ...(this.env.KILO_API_URL @@ -34,39 +34,6 @@ export class TownContainerDO extends Container { : {}), }; - constructor(ctx: DurableObjectState, env: Env) { - super(ctx, env); - // Load persisted env vars (like KILOCODE_TOKEN) into envVars - // so they're available when the container boots. - void ctx.blockConcurrencyWhile(async () => { - const stored = await ctx.storage.get>('container:envVars'); - if (stored) { - Object.assign(this.envVars, stored); - } - }); - } - - /** - * Store an env var that will be injected into the container OS environment. - * Takes effect on the next container boot (or immediately if the container - * hasn't started yet). Call this from the TownDO during configureRig. - */ - async setEnvVar(key: string, value: string): Promise { - const stored = (await this.ctx.storage.get>('container:envVars')) ?? {}; - stored[key] = value; - await this.ctx.storage.put('container:envVars', stored); - this.envVars[key] = value; - console.log(`${TC_LOG} setEnvVar: ${key} stored (${value.length} chars)`); - } - - async deleteEnvVar(key: string): Promise { - const stored = (await this.ctx.storage.get>('container:envVars')) ?? {}; - delete stored[key]; - await this.ctx.storage.put('container:envVars', stored); - delete this.envVars[key]; - console.log(`${TC_LOG} deleteEnvVar: ${key} removed`); - } - async updateRegistry(registry: unknown): Promise { await this.ctx.storage.put('container:registry', registry); console.log( @@ -93,14 +60,25 @@ export class TownContainerDO extends Container { * /health ping — gives an accurate cold-start measurement without being * capped by an arbitrary client-side timeout. */ - async warmUp(): Promise<{ coldStart: boolean; durationMs: number }> { + async warmUp(params: { + townId: string; + containerToken?: string; + }): Promise<{ coldStart: boolean; durationMs: number }> { const state = await this.getState(); const alreadyHealthy = this.ctx.container?.running === true && state.status === 'healthy'; if (alreadyHealthy) { return { coldStart: false, durationMs: 0 }; } const t0 = Date.now(); - await this.startAndWaitForPorts(); + await this.startAndWaitForPorts({ + startOptions: { + envVars: { + ...this.envVars, + GASTOWN_TOWN_ID: params.townId, + ...(params.containerToken ? { GASTOWN_CONTAINER_TOKEN: params.containerToken } : {}), + }, + }, + }); return { coldStart: true, durationMs: Date.now() - t0 }; } diff --git a/services/gastown/src/dos/town/config.ts b/services/gastown/src/dos/town/config.ts index 85b3ca4715..8262b8ea1f 100644 --- a/services/gastown/src/dos/town/config.ts +++ b/services/gastown/src/dos/town/config.ts @@ -330,6 +330,7 @@ export async function buildContainerConfig( } return { + town_id: townId, env_vars: config.env_vars, default_model: resolveModel(config, null, ''), small_model: resolveSmallModel(config), diff --git a/services/gastown/src/dos/town/container-dispatch.ts b/services/gastown/src/dos/town/container-dispatch.ts index 84c31be162..d78f9e6e16 100644 --- a/services/gastown/src/dos/town/container-dispatch.ts +++ b/services/gastown/src/dos/town/container-dispatch.ts @@ -108,15 +108,28 @@ export async function mintAgentToken( ); } +export async function mintContainerToken( + env: Env, + params: { townId: string; userId: string } +): Promise { + const jwtSecret = await resolveJWTSecret(env); + if (!jwtSecret) { + console.error(`${TOWN_LOG} mintContainerToken: no JWT secret available`); + return null; + } + + return signContainerJWT({ townId: params.townId, userId: params.userId }, jwtSecret); +} + /** - * Mint a container-scoped JWT and push it to the TownContainerDO. + * Mint a container-scoped JWT and push it to the town container. * One JWT per container — shared by all agents in the town. Carries * { townId, userId, scope: 'container' } with 8h expiry. * - * Pushes via both setEnvVar() (for next container boot) and - * POST /refresh-token (for the running process). This ensures that - * all code paths — existing agents, heartbeat, event persistence — - * pick up the fresh token immediately. + * Pushes via POST /refresh-token when a container is running. New + * dispatches also include the fresh token in their request env. This + * ensures active code paths — existing agents, heartbeat, event + * persistence — pick up the fresh token immediately. * * Returns the token so callers can also pass it as a per-agent env var. */ @@ -125,44 +138,54 @@ export async function ensureContainerToken( townId: string, userId: string ): Promise { - const jwtSecret = await resolveJWTSecret(env); - if (!jwtSecret) { - console.error(`${TOWN_LOG} ensureContainerToken: no JWT secret available`); + const mintStart = Date.now(); + const token = await mintContainerToken(env, { townId, userId }); + writeEvent(env, { + event: 'startAgentInContainer.token_mint_ms', + townId, + userId, + durationMs: Date.now() - mintStart, + }); + if (!token) { return null; } - const token = signContainerJWT({ townId, userId }, jwtSecret); const container = getTownContainerStub(env, townId); - // Store for next boot - try { - await container.setEnvVar('GASTOWN_CONTAINER_TOKEN', token); - await container.setEnvVar('GASTOWN_TOWN_ID', townId); - } catch (err) { - console.warn( - `${TOWN_LOG} ensureContainerToken: setEnvVar failed (container may not be running):`, - err instanceof Error ? err.message : err - ); - } - // Push to running process so existing agents pick up the fresh token. // Throw on non-2xx so the alarm's throttle doesn't advance on failure. + const refreshStart = Date.now(); try { const resp = await container.fetch('http://container/refresh-token', { method: 'POST', signal: AbortSignal.timeout(10_000), headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ token }), + body: JSON.stringify({ token, townId }), }); if (!resp.ok) { throw new Error(`container returned ${resp.status}`); } + writeEvent(env, { + event: 'startAgentInContainer.refresh_token_ms', + townId, + userId, + durationMs: Date.now() - refreshStart, + statusCode: resp.status, + }); } catch (err) { - // If the container isn't running yet, the token will be in envVars - // when it boots. But if it IS running and rejected the refresh, - // propagate the error so the alarm retries on the next tick. + // If the container isn't running yet, the next dispatch will include + // the token in request env. But if it IS running and rejected the + // refresh, propagate the error so the alarm retries on the next tick. const isContainerDown = err instanceof TypeError || (err instanceof Error && err.message.includes('fetch')); + writeEvent(env, { + event: 'startAgentInContainer.refresh_token_ms', + townId, + userId, + durationMs: Date.now() - refreshStart, + error: err instanceof Error ? err.message : String(err), + label: isContainerDown ? 'container_down' : 'failed', + }); if (!isContainerDown) throw err; } @@ -180,43 +203,30 @@ export const refreshContainerToken = ensureContainerToken; /** * Force-refresh variant for manual user-triggered refreshes. * - * Unlike ensureContainerToken (which tolerates a downed container - * because the token is persisted in envVars for next boot), this - * function throws on ANY failure to push the token to the running - * container — including network errors. This ensures the UI reports - * a real failure instead of a false success when the container - * never actually received the fresh JWT. + * Unlike ensureContainerToken (which tolerates a downed container because + * the next dispatch passes fresh request env), this function throws on ANY + * failure to push the token to the running container — including network + * errors. This ensures the UI reports a real failure instead of a false + * success when the container never actually received the fresh JWT. */ export async function forceRefreshContainerToken( env: Env, townId: string, userId: string ): Promise { - const jwtSecret = await resolveJWTSecret(env); - if (!jwtSecret) { + const token = await mintContainerToken(env, { townId, userId }); + if (!token) { throw new Error('No JWT secret available — cannot mint container token'); } - const token = signContainerJWT({ townId, userId }, jwtSecret); const container = getTownContainerStub(env, townId); - // Store for next boot (best-effort — the critical step is the live push below) - try { - await container.setEnvVar('GASTOWN_CONTAINER_TOKEN', token); - await container.setEnvVar('GASTOWN_TOWN_ID', townId); - } catch (err) { - console.warn( - `${TOWN_LOG} forceRefreshContainerToken: setEnvVar failed:`, - err instanceof Error ? err.message : err - ); - } - // Push to running container — propagate ALL errors so the caller // (and ultimately the UI) knows the refresh didn't land. const resp = await container.fetch('http://container/refresh-token', { method: 'POST', headers: { 'Content-Type': 'application/json' }, - body: JSON.stringify({ token }), + body: JSON.stringify({ token, townId }), }); if (!resp.ok) { const body = await resp.text().catch(() => ''); @@ -388,8 +398,24 @@ export async function startAgentInContainer( try { // Mint a container-scoped JWT (8h expiry, refreshed by TownDO alarm). // One token per container — shared by all agents in the town. - // Carries { townId, userId, scope: 'container' }. - const containerToken = await ensureContainerToken(env, params.townId, params.userId); + // Carries { townId, userId, scope: 'container' }. Fresh dispatches + // pass the token in /agents/start instead of first pushing + // /refresh-token, keeping cold starts off the extra live request path. + // Already-running agents receive token rotation from the alarm or + // explicit refresh paths rather than this user-visible startup path. + const tokenMintStart = Date.now(); + const containerToken = await mintContainerToken(env, { + townId: params.townId, + userId: params.userId, + }); + writeEvent(env, { + event: 'startAgentInContainer.token_mint_ms', + townId: params.townId, + userId: params.userId, + agentId: params.agentId, + role: params.role, + durationMs: Date.now() - tokenMintStart, + }); // Also mint a per-agent JWT as fallback during rollout. const agentToken = await mintAgentToken(env, { @@ -450,6 +476,9 @@ export async function startAgentInContainer( } // Container token is preferred (shared by all agents, refreshed by alarm). + // This freshly minted token is for the agent being started; it is not + // pushed to existing SDK children here so mayor cold starts avoid a + // pre-start /refresh-token request. // Legacy per-agent JWT kept as fallback during rollout. if (containerToken) envVars.GASTOWN_CONTAINER_TOKEN = containerToken; if (agentToken) envVars.GASTOWN_SESSION_TOKEN = agentToken; diff --git a/services/gastown/src/trpc/router.ts b/services/gastown/src/trpc/router.ts index 3d0427762e..5ccd605417 100644 --- a/services/gastown/src/trpc/router.ts +++ b/services/gastown/src/trpc/router.ts @@ -16,6 +16,7 @@ import { getGastownOrgStub } from '../dos/GastownOrg.do'; import type { JwtOrgMembership } from '../middleware/auth.middleware'; import { generateKiloApiToken } from '../util/kilo-token.util'; import { resolveSecret } from '../util/secret.util'; +import { writeEvent } from '../util/analytics.util'; import { TownConfigSchema, TownConfigUpdateSchema, RigOverrideConfigSchema } from '../types'; import { resolveModel } from '../dos/town/config'; import type { UserRigRecord } from '../db/tables/user-rigs.table'; @@ -76,6 +77,46 @@ async function refreshGitCredentials( }); } +async function refreshFirstGithubRigCredentials(params: { + env: Env; + townId: string; + ownerStub: RigOwnerStub; + credentialUserId: string; + organizationId?: string; +}): Promise { + const start = Date.now(); + try { + const rigList = await params.ownerStub.listRigs(params.townId); + for (const rig of rigList) { + if (extractGithubRepo(rig.git_url)) { + await refreshGitCredentials( + params.env, + params.townId, + rig.git_url, + params.credentialUserId, + params.organizationId + ); + break; + } + } + writeEvent(params.env, { + event: 'ensureMayor.git_credential_refresh_ms', + townId: params.townId, + userId: params.credentialUserId, + durationMs: Date.now() - start, + }); + } catch (err) { + writeEvent(params.env, { + event: 'ensureMayor.git_credential_refresh_ms', + townId: params.townId, + userId: params.credentialUserId, + durationMs: Date.now() - start, + error: err instanceof Error ? err.message : String(err), + }); + console.warn('[gastown-trpc] ensureMayor: git credential refresh failed', err); + } +} + // ── Helpers ──────────────────────────────────────────────────────────── /** Extract user identity fields from the tRPC context. */ @@ -1004,23 +1045,15 @@ export const gastownRouter = router({ // Best-effort: refresh git credentials using the town owner's identity const townConfig = await getTownDOStub(ctx.env, input.townId).getTownConfig(); const credentialUserId = townConfig.owner_user_id ?? ctx.userId; - try { - const rigList = await ownerStub.listRigs(input.townId); - for (const rig of rigList) { - if (extractGithubRepo(rig.git_url)) { - await refreshGitCredentials( - ctx.env, - input.townId, - rig.git_url, - credentialUserId, - townConfig.organization_id - ); - break; - } - } - } catch (err) { - console.warn('[gastown-trpc] ensureMayor: git credential refresh failed', err); - } + ctx.executionCtx.waitUntil( + refreshFirstGithubRigCredentials({ + env: ctx.env, + townId: input.townId, + ownerStub, + credentialUserId, + organizationId: townConfig.organization_id, + }) + ); const townStub = getTownDOStub(ctx.env, input.townId); return townStub.ensureMayor(); diff --git a/services/gastown/wrangler.jsonc b/services/gastown/wrangler.jsonc index 8698f9e41f..a682f9fc30 100644 --- a/services/gastown/wrangler.jsonc +++ b/services/gastown/wrangler.jsonc @@ -37,7 +37,7 @@ "class_name": "TownContainerDO", "image": "./container/Dockerfile", "instance_type": "standard-4", - "max_instances": 500, + "max_instances": 700, }, ],