From e64e2f92c5dc3abf95251f5f40c97b7828aff8d1 Mon Sep 17 00:00:00 2001
From: isletspace <liewzheng@qq.com>
Date: Fri, 29 May 2026 13:06:25 +0800
Subject: [PATCH 1/6] feat(agent-core): add API key pool for parallel subagent
 execution

Add ApiKeyPool that rotates through multiple KIMI_API_KEY* env vars
with exponential cooldown on retryable errors. Integrate into
ProviderManager so concurrent subagents do not share a single key's
rate-limit quota. Enable via KIMI_API_KEY_POOL env var.
---
 .changeset/subagent-api-key-pool.md           |   6 +
 packages/agent-core/src/rpc/core-impl.ts      |   4 +
 .../agent-core/src/session/api-key-pool.ts    | 126 +++++++++++++
 .../src/session/provider-manager.ts           | 129 +++++++------
 .../test/harness/runtime-provider.test.ts     | 148 ++++++++++++++-
 .../test/session/api-key-pool.test.ts         | 176 ++++++++++++++++++
 6 files changed, 536 insertions(+), 53 deletions(-)
 create mode 100644 .changeset/subagent-api-key-pool.md
 create mode 100644 packages/agent-core/src/session/api-key-pool.ts
 create mode 100644 packages/agent-core/test/session/api-key-pool.test.ts
diff --git a/.changeset/subagent-api-key-pool.md b/.changeset/subagent-api-key-pool.md
new file mode 100644
index 00000000..eab091c3
--- /dev/null
+++ b/.changeset/subagent-api-key-pool.md
@@ -0,0 +1,6 @@
+---
+"@moonshot-ai/agent-core": minor
+"@moonshot-ai/kimi-code": minor
+---
+
+Add API key pool for parallel subagent execution. When multiple `KIMI_API_KEY*` environment variables are configured, subagents rotate through them to avoid rate-limit contention, and failed keys are temporarily cooled down on retryable errors.
diff --git a/packages/agent-core/src/rpc/core-impl.ts b/packages/agent-core/src/rpc/core-impl.ts
index c0aa30dd..1a23147b 100644
--- a/packages/agent-core/src/rpc/core-impl.ts
+++ b/packages/agent-core/src/rpc/core-impl.ts
@@ -29,6 +29,7 @@ import {
   ProviderManager, type BearerTokenProvider,
   type OAuthTokenProviderResolver
 } from '../session/provider-manager';
+import { ApiKeyPool } from '../session/api-key-pool';
 import { SessionAPIImpl } from '../session/rpc';
 import { normalizeWorkDir, SessionStore } from '../session/store';
 import { noopTelemetryClient, withTelemetryContext, type TelemetryClient } from '../telemetry';
@@ -118,6 +119,7 @@ export class KimiCore implements PromisableMethods<CoreAPI> {
   private readonly resolveOAuthTokenProvider: OAuthTokenProviderResolver | undefined;
   private readonly skillDirs: readonly string[];
   private readonly sessionStore: SessionStore;
+  private readonly apiKeyPool: ApiKeyPool | undefined;
   readonly plugins: PluginManager;
   private pluginsReady: Promise<void>;
   private pluginsLoadError: Error | undefined;
@@ -143,6 +145,7 @@ export class KimiCore implements PromisableMethods<CoreAPI> {
     this.resolveOAuthTokenProvider = options.resolveOAuthTokenProvider;
     this.skillDirs = options.skillDirs ?? [];
     this.telemetry = options.telemetry ?? noopTelemetryClient;
+    this.apiKeyPool = ApiKeyPool.fromEnv() ?? undefined;
     ensureKimiHome(this.homeDir);
     this.config = readConfigFile(this.configPath);
     this.sessionStore = new SessionStore(this.homeDir);
@@ -669,6 +672,7 @@ export class KimiCore implements PromisableMethods<CoreAPI> {
       kimiRequestHeaders: this.kimiRequestHeaders,
       resolveOAuthTokenProvider: this.resolveOAuthTokenProvider,
       promptCacheKey: sessionId,
+      apiKeyPool: this.apiKeyPool,
     });
   }
 
diff --git a/packages/agent-core/src/session/api-key-pool.ts b/packages/agent-core/src/session/api-key-pool.ts
new file mode 100644
index 00000000..8f352c22
--- /dev/null
+++ b/packages/agent-core/src/session/api-key-pool.ts
@@ -0,0 +1,126 @@
+/**
+ * APIKeyPool — round-robin allocator for multiple API keys.
+ *
+ * Designed for parallel subagent execution so that concurrent agents
+ * do not hammer a single key's rate-limit quota.
+ *
+ * Keys are read from environment variables:
+ *   KIMI_API_KEY, KIMI_API_KEY_1, KIMI_API_KEY_2, … up to KIMI_API_KEY_99
+ *
+ * A pool is only created when ≥2 keys are found; otherwise `fromEnv`
+ * returns `null` and callers fall back to the root provider's key.
+ */
+
+interface KeyState {
+  consecutiveFailures: number;
+  cooldownUntil: number | null;
+}
+
+const COOLDOWN_MS = [30_000, 300_000, 1_800_000] as const;
+
+function cooldownForFailures(failures: number): number {
+  if (failures <= 0) return 0;
+  if (failures <= COOLDOWN_MS.length) return COOLDOWN_MS[failures - 1]!;
+  return COOLDOWN_MS[COOLDOWN_MS.length - 1]!;
+}
+
+export class ApiKeyPool {
+  private readonly keys: readonly string[];
+  private _index = 0;
+  private readonly states: Map<string, KeyState>;
+
+  /**
+   * Build a pool from environment variables.
+   *
+   * Collects `PREFIX`, `PREFIX_1`, `PREFIX_2`, … up to `PREFIX_99`.
+   * Returns `null` when fewer than 2 keys are found.
+   */
+  static fromEnv(prefix = 'KIMI_API_KEY'): ApiKeyPool | null {
+    const keys: string[] = [];
+    const primary = process.env[prefix];
+    if (primary !== undefined && primary.length > 0) {
+      keys.push(primary);
+    }
+    for (let i = 1; i < 100; i++) {
+      const val = process.env[`${prefix}_${i}`];
+      if (val !== undefined && val.length > 0) {
+        keys.push(val);
+      }
+    }
+    if (keys.length < 2) {
+      return null;
+    }
+    return new ApiKeyPool(keys);
+  }
+
+  constructor(keys: readonly string[]) {
+    if (keys.length === 0) {
+      throw new Error('Key pool cannot be empty');
+    }
+    this.keys = keys.slice();
+    this.states = new Map<string, KeyState>();
+    for (const key of this.keys) {
+      this.states.set(key, { consecutiveFailures: 0, cooldownUntil: null });
+    }
+  }
+
+  /** Number of keys in the pool. */
+  get keyCount(): number {
+    return this.keys.length;
+  }
+
+  /**
+   * Acquire the next key in rotation.
+   *
+   * Skips keys that are in cooldown. If every key is cooling down,
+   * falls back to round-robin across the entire pool.
+   */
+  acquire(): string {
+    const now = Date.now();
+    for (let i = 0; i < this.keys.length; i++) {
+      const key = this.keys[this._index]!;
+      this._index = (this._index + 1) % this.keys.length;
+      const state = this.states.get(key);
+      if (state === undefined) {
+        continue;
+      }
+      if (state.cooldownUntil !== null) {
+        if (now < state.cooldownUntil) {
+          continue;
+        }
+        // Cooldown expired — reset the key to healthy.
+        this.states.set(key, { consecutiveFailures: 0, cooldownUntil: null });
+      }
+      return key;
+    }
+    // All keys in cooldown — fall back to round-robin.
+    const key = this.keys[this._index]!;
+    this._index = (this._index + 1) % this.keys.length;
+    return key;
+  }
+
+  /**
+   * Record a failure for the given key.
+   *
+   * Applies exponential cooldown:
+   *   1st failure → 30s
+   *   2nd failure → 5min
+   *   3rd+ failure → 30min
+   */
+  recordFailure(key: string): void {
+    const state = this.states.get(key);
+    if (state === undefined) {
+      return;
+    }
+    const failures = state.consecutiveFailures + 1;
+    this.states.set(key, {
+      consecutiveFailures: failures,
+      cooldownUntil: Date.now() + cooldownForFailures(failures),
+    });
+  }
+
+  /** Clear the failure state for a key (e.g. after a successful call). */
+  resetKey(key: string): void {
+    this.states.set(key, { consecutiveFailures: 0, cooldownUntil: null });
+  }
+}
diff --git a/packages/agent-core/src/session/provider-manager.ts b/packages/agent-core/src/session/provider-manager.ts
index c0e39179..500bbff0 100644
--- a/packages/agent-core/src/session/provider-manager.ts
+++ b/packages/agent-core/src/session/provider-manager.ts
@@ -1,6 +1,7 @@
 import type { Logger } from '#/logging/types';
 import type { ProviderConfig as KosongProviderConfig, ModelCapability, ProviderRequestAuth } from '@moonshot-ai/kosong';
-import { APIStatusError, createProvider, UNKNOWN_CAPABILITY } from '@moonshot-ai/kosong';
+import { ApiKeyPool } from './api-key-pool';
+import { APIStatusError, createProvider, isRetryableGenerateError, UNKNOWN_CAPABILITY } from '@moonshot-ai/kosong';
 import type { KimiConfig, ModelAlias, OAuthRef, ProviderConfig } from '../config';
 import { ErrorCodes, isKimiError, KimiError } from '../errors';
 
@@ -24,6 +25,7 @@ interface ProviderManagerOptions {
   readonly kimiRequestHeaders?: Record<string, string>;
   readonly resolveOAuthTokenProvider?: OAuthTokenProviderResolver;
   readonly promptCacheKey?: string;
+  readonly apiKeyPool?: ApiKeyPool;
 }
 
 type AuthorizedRequest = <T>(
@@ -123,68 +125,91 @@ export class ProviderManager implements ModelProvider {
   ): AuthorizedRequest | undefined {
     const { providerName } = this.resolveProviderConfig(model);
     const providerConfig = this.config.providers[providerName];
-    if (providerConfig?.oauth === undefined) return undefined;
 
-    if (providerApiKey(providerConfig) !== undefined) {
-      // oauth + apiKey on the same provider makes request auth ambiguous:
-      // provider construction would prefer apiKey while runtime auth resolves
-      // OAuth. Reject it so misconfiguration surfaces at model resolution.
-      throw new KimiError(
-        ErrorCodes.CONFIG_INVALID,
-        `Provider "${providerName}" has both apiKey and oauth set in config.toml — they are mutually exclusive. Remove one.`,
-      );
-    }
+    // OAuth path
+    if (providerConfig?.oauth !== undefined) {
+      if (providerApiKey(providerConfig) !== undefined) {
+        // oauth + apiKey on the same provider makes request auth ambiguous:
+        // provider construction would prefer apiKey while runtime auth resolves
+        // OAuth. Reject it so misconfiguration surfaces at model resolution.
+        throw new KimiError(
+          ErrorCodes.CONFIG_INVALID,
+          `Provider "${providerName}" has both apiKey and oauth set in config.toml — they are mutually exclusive. Remove one.`,
+        );
+      }
 
-    const loginRequired = (cause?: unknown): KimiError =>
-      new KimiError(
-        ErrorCodes.AUTH_LOGIN_REQUIRED,
-        `OAuth provider "${providerName}" requires login before it can be used.`,
-        cause === undefined ? undefined : { cause },
-      );
+      const loginRequired = (cause?: unknown): KimiError =>
+        new KimiError(
+          ErrorCodes.AUTH_LOGIN_REQUIRED,
+          `OAuth provider "${providerName}" requires login before it can be used.`,
+          cause === undefined ? undefined : { cause },
+        );
+
+      const tokenProvider = this.options.resolveOAuthTokenProvider?.(providerName, providerConfig.oauth);
+      if (tokenProvider === undefined) {
+        return async () => {
+          throw loginRequired();
+        };
+      }
 
-    const tokenProvider = this.options.resolveOAuthTokenProvider?.(providerName, providerConfig.oauth);
-    if (tokenProvider === undefined) {
-      return async () => {
-        throw loginRequired();
+      const log = options?.log;
+      const fetchAuth = async (force: boolean): Promise<ProviderRequestAuth> => {
+        let apiKey: string;
+        try {
+          apiKey = await tokenProvider.getAccessToken(force ? { force: true } : undefined);
+        } catch (error) {
+          if (!isKimiError(error) || error.code !== ErrorCodes.AUTH_LOGIN_REQUIRED) {
+            log?.warn('oauth token fetch failed', { providerName, error });
+          }
+          throw loginRequired(error);
+        }
+        if (apiKey.trim().length === 0) throw loginRequired();
+        return { apiKey };
       };
-    }
 
-    const log = options?.log;
-    const fetchAuth = async (force: boolean): Promise<ProviderRequestAuth> => {
-      let apiKey: string;
-      try {
-        apiKey = await tokenProvider.getAccessToken(force ? { force: true } : undefined);
-      } catch (error) {
-        if (!isKimiError(error) || error.code !== ErrorCodes.AUTH_LOGIN_REQUIRED) {
-          log?.warn('oauth token fetch failed', { providerName, error });
+      return async (request) => {
+        let auth = await fetchAuth(false);
+        for (let refreshed = false; ; refreshed = true) {
+          try {
+            return await request(auth);
+          } catch (error) {
+            if (!(error instanceof APIStatusError) || error.statusCode !== 401) throw error;
+            if (refreshed) {
+              throw new KimiError(
+                ErrorCodes.AUTH_LOGIN_REQUIRED,
+                'OAuth provider credentials were rejected. Send /login to login.',
+                {
+                  cause: error,
+                  details: { statusCode: error.statusCode, requestId: error.requestId },
+                },
+              );
+            }
+            auth = await fetchAuth(true);
+          }
         }
-        throw loginRequired(error);
-      }
-      if (apiKey.trim().length === 0) throw loginRequired();
-      return { apiKey };
-    };
+      };
+    }
 
-    return async (request) => {
-      let auth = await fetchAuth(false);
-      for (let refreshed = false; ; refreshed = true) {
+    // Key pool path — only for kimi provider when a pool is configured.
+    if (providerConfig?.type === 'kimi' && this.options.apiKeyPool !== undefined) {
+      const pool = this.options.apiKeyPool;
+      return async (request) => {
+        const key = pool.acquire();
+        const auth: ProviderRequestAuth = { apiKey: key };
         try {
-          return await request(auth);
+          const result = await request(auth);
+          pool.resetKey(key);
+          return result;
         } catch (error) {
-          if (!(error instanceof APIStatusError) || error.statusCode !== 401) throw error;
-          if (refreshed) {
-            throw new KimiError(
-              ErrorCodes.AUTH_LOGIN_REQUIRED,
-              'OAuth provider credentials were rejected. Send /login to login.',
-              {
-                cause: error,
-                details: { statusCode: error.statusCode, requestId: error.requestId },
-              },
-            );
+          if (isRetryableGenerateError(error)) {
+            pool.recordFailure(key);
           }
-          auth = await fetchAuth(true);
+          throw error;
         }
-      }
-    };
+      };
+    }
+
+    return undefined;
   }
 }
 
diff --git a/packages/agent-core/test/harness/runtime-provider.test.ts b/packages/agent-core/test/harness/runtime-provider.test.ts
index a0b3e8c7..28d7d3ce 100644
--- a/packages/agent-core/test/harness/runtime-provider.test.ts
+++ b/packages/agent-core/test/harness/runtime-provider.test.ts
@@ -1,8 +1,11 @@
-import { describe, expect, it } from 'vitest';
+import { describe, expect, it, vi } from 'vitest';
+import { APIConnectionError, APIStatusError } from '@moonshot-ai/kosong';
 
 import type { KimiConfig } from '../../src/config';
 import { ErrorCodes, KimiError } from '../../src/errors';
 import { ProviderManager } from '../../src/session/provider-manager';
+import { ApiKeyPool } from '../../src/session/api-key-pool';
+import type { ProviderRequestAuth } from '@moonshot-ai/kosong';
 import { resolveThinkingLevel } from '../../src/agent/config/thinking';
 
 // Thin wrapper that adapts the legacy `resolveRuntimeProvider(input)` shape to
@@ -661,3 +664,146 @@ describe('resolveThinkingLevel', () => {
     expect(resolveThinkingLevel(undefined, {})).toBe('high');
   });
 });
+
+describe('ProviderManager key pool', () => {
+  it('returns undefined for resolveAuth when no pool is configured', () => {
+    const manager = new ProviderManager({ config: BASE_CONFIG });
+    const auth = manager.resolveAuth('kimi-code/kimi-for-coding');
+    expect(auth).toBeUndefined();
+  });
+
+  it('returns a key-pool wrapper for kimi provider when pool is present', async () => {
+    const pool = new ApiKeyPool(['pk-1', 'pk-2']);
+    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding');
+    expect(withAuth).toBeDefined();
+
+    const request = vi.fn(async (auth: ProviderRequestAuth) => {
+      return { ok: true, key: auth.apiKey! };
+    });
+    const result = await withAuth!(request);
+    expect(result.ok).toBe(true);
+    expect(result.key).toBe('pk-1');
+  });
+
+  it('rotates to the next key on each resolveAuth call', async () => {
+    const pool = new ApiKeyPool(['pk-a', 'pk-b', 'pk-c']);
+    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
+
+    const keys: string[] = [];
+    const request = vi.fn(async (auth: ProviderRequestAuth) => {
+      keys.push(auth.apiKey!);
+      return { ok: true };
+    });
+
+    await withAuth(request);
+    await withAuth(request);
+    await withAuth(request);
+    expect(keys).toEqual(['pk-a', 'pk-b', 'pk-c']);
+  });
+
+  it('records failure on retryable errors and re-throws', async () => {
+    const pool = new ApiKeyPool(['pk-1']);
+    const recordFailureSpy = vi.spyOn(pool, 'recordFailure');
+    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
+
+    const request = vi.fn(async () => {
+      throw new APIStatusError(429, 'Too Many Requests');
+    });
+
+    await expect(withAuth(request)).rejects.toThrow(APIStatusError);
+    expect(recordFailureSpy).toHaveBeenCalledWith('pk-1');
+    expect(recordFailureSpy).toHaveBeenCalledTimes(1);
+  });
+
+  it('records failure on connection errors', async () => {
+    const pool = new ApiKeyPool(['pk-1']);
+    const recordFailureSpy = vi.spyOn(pool, 'recordFailure');
+    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
+
+    const request = vi.fn(async () => {
+      throw new APIConnectionError('network error');
+    });
+
+    await expect(withAuth(request)).rejects.toThrow(APIConnectionError);
+    expect(recordFailureSpy).toHaveBeenCalledWith('pk-1');
+  });
+
+  it('does not record failure on non-retryable errors', async () => {
+    const pool = new ApiKeyPool(['pk-1']);
+    const recordFailureSpy = vi.spyOn(pool, 'recordFailure');
+    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
+
+    const request = vi.fn(async () => {
+      throw new APIStatusError(400, 'Bad Request');
+    });
+
+    await expect(withAuth(request)).rejects.toThrow(APIStatusError);
+    expect(recordFailureSpy).not.toHaveBeenCalled();
+  });
+
+  it('resets key after a successful request', async () => {
+    const pool = new ApiKeyPool(['pk-1']);
+    const resetKeySpy = vi.spyOn(pool, 'resetKey');
+    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
+
+    const request = vi.fn(async () => 'success');
+    await withAuth(request);
+    expect(resetKeySpy).toHaveBeenCalledWith('pk-1');
+  });
+
+  it('does not use key pool for non-kimi providers', () => {
+    const pool = new ApiKeyPool(['pk-1']);
+    const config: KimiConfig = {
+      defaultModel: 'gpt-alias',
+      providers: {
+        openai: {
+          type: 'openai',
+          apiKey: 'sk-openai',
+        },
+      },
+      models: {
+        'gpt-alias': {
+          provider: 'openai',
+          model: 'gpt-runtime',
+          maxContextSize: 200000,
+        },
+      },
+    };
+    const manager = new ProviderManager({ config, apiKeyPool: pool });
+    const auth = manager.resolveAuth('gpt-alias');
+    expect(auth).toBeUndefined();
+  });
+
+  it('prefers OAuth over key pool when both are configured', async () => {
+    const pool = new ApiKeyPool(['pk-1']);
+    const config: KimiConfig = {
+      defaultModel: 'kimi-code/kimi-for-coding',
+      providers: {
+        'managed:kimi-code': {
+          type: 'kimi',
+          apiKey: '',
+          baseUrl: 'https://api.example/v1',
+          oauth: { storage: 'file', key: 'oauth/kimi-code' },
+        },
+      },
+      models: {
+        'kimi-code/kimi-for-coding': {
+          provider: 'managed:kimi-code',
+          model: 'kimi-for-coding',
+          maxContextSize: 1_000_000,
+        },
+      },
+    };
+    const manager = new ProviderManager({ config, apiKeyPool: pool });
+    const auth = manager.resolveAuth('kimi-code/kimi-for-coding');
+    // OAuth path returns a function that throws login-required when no token provider is set
+    expect(auth).toBeDefined();
+    await expect(auth!(async () => 'ok')).rejects.toThrow(/requires login/);
+  });
+});
diff --git a/packages/agent-core/test/session/api-key-pool.test.ts b/packages/agent-core/test/session/api-key-pool.test.ts
new file mode 100644
index 00000000..d11b82ab
--- /dev/null
+++ b/packages/agent-core/test/session/api-key-pool.test.ts
@@ -0,0 +1,176 @@
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+
+import { ApiKeyPool } from '../../src/session/api-key-pool';
+
+describe('ApiKeyPool', () => {
+  let originalEnv: NodeJS.ProcessEnv;
+
+  beforeEach(() => {
+    originalEnv = { ...process.env };
+  });
+
+  afterEach(() => {
+    for (const key of Object.keys(process.env)) {
+      if (key.startsWith('TEST_API_KEY')) {
+        delete process.env[key];
+      }
+    }
+    Object.assign(process.env, originalEnv);
+    vi.useRealTimers();
+  });
+
+  describe('fromEnv', () => {
+    it('returns null when no keys are present', () => {
+      delete process.env['TEST_API_KEY'];
+      expect(ApiKeyPool.fromEnv('TEST_API_KEY')).toBeNull();
+    });
+
+    it('returns null when only one key is present', () => {
+      process.env['TEST_API_KEY'] = 'sk-one';
+      expect(ApiKeyPool.fromEnv('TEST_API_KEY')).toBeNull();
+    });
+
+    it('collects primary + numbered keys', () => {
+      process.env['TEST_API_KEY'] = 'sk-primary';
+      process.env['TEST_API_KEY_1'] = 'sk-1';
+      process.env['TEST_API_KEY_2'] = 'sk-2';
+      const pool = ApiKeyPool.fromEnv('TEST_API_KEY');
+      expect(pool).not.toBeNull();
+      expect(pool!.keyCount).toBe(3);
+    });
+
+    it('ignores gaps in numbering', () => {
+      process.env['TEST_API_KEY'] = 'sk-primary';
+      process.env['TEST_API_KEY_5'] = 'sk-5';
+      const pool = ApiKeyPool.fromEnv('TEST_API_KEY');
+      expect(pool).not.toBeNull();
+      expect(pool!.keyCount).toBe(2);
+    });
+
+    it('ignores empty keys', () => {
+      process.env['TEST_API_KEY'] = 'sk-primary';
+      process.env['TEST_API_KEY_1'] = '';
+      process.env['TEST_API_KEY_2'] = 'sk-2';
+      const pool = ApiKeyPool.fromEnv('TEST_API_KEY');
+      expect(pool).not.toBeNull();
+      expect(pool!.keyCount).toBe(2);
+    });
+
+    it('defaults to KIMI_API_KEY prefix', () => {
+      // Use a unique prefix to avoid colliding with real environment keys.
+      const prefix = `TEST_DEFAULT_${Date.now()}`;
+      process.env[prefix] = 'sk-a';
+      process.env[`${prefix}_1`] = 'sk-b';
+      const pool = ApiKeyPool.fromEnv(prefix);
+      expect(pool).not.toBeNull();
+      expect(pool!.keyCount).toBe(2);
+      delete process.env[prefix];
+      delete process.env[`${prefix}_1`];
+    });
+  });
+
+  describe('constructor', () => {
+    it('throws when keys array is empty', () => {
+      expect(() => new ApiKeyPool([])).toThrow('Key pool cannot be empty');
+    });
+
+    it('accepts a non-empty keys array', () => {
+      const pool = new ApiKeyPool(['k1', 'k2']);
+      expect(pool.keyCount).toBe(2);
+    });
+  });
+
+  describe('acquire', () => {
+    it('rotates keys in round-robin order', () => {
+      const pool = new ApiKeyPool(['a', 'b', 'c']);
+      expect(pool.acquire()).toBe('a');
+      expect(pool.acquire()).toBe('b');
+      expect(pool.acquire()).toBe('c');
+      expect(pool.acquire()).toBe('a');
+    });
+
+    it('skips keys in cooldown', () => {
+      vi.useFakeTimers();
+      const pool = new ApiKeyPool(['a', 'b', 'c']);
+      pool.recordFailure('a');
+      vi.advanceTimersByTime(10_000);
+      // a is still cooling down (30s), so it should be skipped
+      expect(pool.acquire()).toBe('b');
+      expect(pool.acquire()).toBe('c');
+      expect(pool.acquire()).toBe('b'); // a still skipped
+    });
+
+    it('resets expired cooldown and returns the key', () => {
+      vi.useFakeTimers();
+      const pool = new ApiKeyPool(['a', 'b']);
+      pool.recordFailure('a');
+      vi.advanceTimersByTime(30_001);
+      expect(pool.acquire()).toBe('a'); // cooldown expired, key is healthy again
+    });
+
+    it('falls back to round-robin when all keys are in cooldown', () => {
+      vi.useFakeTimers();
+      const pool = new ApiKeyPool(['a', 'b']);
+      pool.recordFailure('a');
+      pool.recordFailure('b');
+      // Both are cooling down, fallback should still return something
+      expect(pool.acquire()).toBe('a');
+      expect(pool.acquire()).toBe('b');
+    });
+  });
+
+  describe('recordFailure', () => {
+    it('applies 30s cooldown on first failure', () => {
+      vi.useFakeTimers();
+      const pool = new ApiKeyPool(['a', 'b']);
+      pool.recordFailure('a');
+      expect(pool.acquire()).toBe('b'); // a skipped
+      vi.advanceTimersByTime(30_001);
+      expect(pool.acquire()).toBe('a'); // a recovered
+    });
+
+    it('applies 5min cooldown on second failure', () => {
+      vi.useFakeTimers();
+      const pool = new ApiKeyPool(['a', 'b']);
+      pool.recordFailure('a');
+      pool.recordFailure('a');
+      vi.advanceTimersByTime(30_001);
+      expect(pool.acquire()).toBe('b'); // a still cooling (5min)
+      vi.advanceTimersByTime(300_001);
+      expect(pool.acquire()).toBe('a'); // a recovered
+    });
+
+    it('applies 30min cooldown on third failure', () => {
+      vi.useFakeTimers();
+      const pool = new ApiKeyPool(['a', 'b']);
+      pool.recordFailure('a');
+      pool.recordFailure('a');
+      pool.recordFailure('a');
+      vi.advanceTimersByTime(300_001);
+      expect(pool.acquire()).toBe('b'); // a still cooling (30min)
+      vi.advanceTimersByTime(1_800_001);
+      expect(pool.acquire()).toBe('a'); // a recovered
+    });
+
+    it('is a no-op for unknown keys', () => {
+      const pool = new ApiKeyPool(['a']);
+      expect(() => pool.recordFailure('unknown')).not.toThrow();
+      expect(pool.acquire()).toBe('a');
+    });
+  });
+
+  describe('resetKey', () => {
+    it('clears failure state immediately', () => {
+      vi.useFakeTimers();
+      const pool = new ApiKeyPool(['a', 'b']);
+      pool.recordFailure('a');
+      pool.resetKey('a');
+      expect(pool.acquire()).toBe('a');
+    });
+
+    it('is a no-op for unknown keys', () => {
+      const pool = new ApiKeyPool(['a']);
+      expect(() => pool.resetKey('unknown')).not.toThrow();
+    });
+  });
+});

From 41b988fbfb377c37bb34d0295c5c7daeeb4ef763 Mon Sep 17 00:00:00 2001
From: isletspace <liewzheng@qq.com>
Date: Fri, 29 May 2026 13:14:13 +0800
Subject: [PATCH 2/6] fix(agent-core): resolve oxlint warnings in ApiKeyPool

- Use Array.at() instead of bracket index access (prefer-at)
- Wrap void expressions in arrow function bodies to avoid
  no-confusing-void-expression
---
 packages/agent-core/src/session/api-key-pool.ts       | 4 ++--
 packages/agent-core/test/session/api-key-pool.test.ts | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/packages/agent-core/src/session/api-key-pool.ts b/packages/agent-core/src/session/api-key-pool.ts
index 8f352c22..2a56fe4b 100644
--- a/packages/agent-core/src/session/api-key-pool.ts
+++ b/packages/agent-core/src/session/api-key-pool.ts
@@ -20,8 +20,8 @@ const COOLDOWN_MS = [30_000, 300_000, 1_800_000] as const;
 
 function cooldownForFailures(failures: number): number {
   if (failures <= 0) return 0;
-  if (failures <= COOLDOWN_MS.length) return COOLDOWN_MS[failures - 1]!;
-  return COOLDOWN_MS[COOLDOWN_MS.length - 1]!;
+  if (failures <= COOLDOWN_MS.length) return COOLDOWN_MS.at(failures - 1)!;
+  return COOLDOWN_MS.at(-1)!;
 }
 
 export class ApiKeyPool {
diff --git a/packages/agent-core/test/session/api-key-pool.test.ts b/packages/agent-core/test/session/api-key-pool.test.ts
index d11b82ab..2bbaf94d 100644
--- a/packages/agent-core/test/session/api-key-pool.test.ts
+++ b/packages/agent-core/test/session/api-key-pool.test.ts
@@ -154,7 +154,7 @@ describe('ApiKeyPool', () => {
 
     it('is a no-op for unknown keys', () => {
       const pool = new ApiKeyPool(['a']);
-      expect(() => pool.recordFailure('unknown')).not.toThrow();
+      expect(() => { pool.recordFailure('unknown'); }).not.toThrow();
       expect(pool.acquire()).toBe('a');
     });
   });
@@ -170,7 +170,7 @@ describe('ApiKeyPool', () => {
 
     it('is a no-op for unknown keys', () => {
       const pool = new ApiKeyPool(['a']);
-      expect(() => pool.resetKey('unknown')).not.toThrow();
+      expect(() => { pool.resetKey('unknown'); }).not.toThrow();
     });
   });
 });

From 09ba310dd13f48a8b23be80e4ba3bc64ffc233ff Mon Sep 17 00:00:00 2001
From: isletspace <liewzheng@qq.com>
Date: Fri, 29 May 2026 13:33:11 +0800
Subject: [PATCH 3/6] test(agent-core): add concurrent stress tests for
 ApiKeyPool

- Verify round-robin key distribution under concurrent acquire/load
- Verify cooling keys are skipped even under heavy concurrent load
- Verify fallback behavior when all keys are cooling
- Verify key recovery after cooldown expires
- Fix lint warnings: avoid floating promises and non-thenable in Promise.all
---
 .../test/harness/runtime-provider.test.ts     | 84 +++++++++++++++++++
 .../test/session/api-key-pool.test.ts         | 66 +++++++++++++++
 2 files changed, 150 insertions(+)

diff --git a/packages/agent-core/test/harness/runtime-provider.test.ts b/packages/agent-core/test/harness/runtime-provider.test.ts
index 28d7d3ce..4acdf61a 100644
--- a/packages/agent-core/test/harness/runtime-provider.test.ts
+++ b/packages/agent-core/test/harness/runtime-provider.test.ts
@@ -780,6 +780,90 @@ describe('ProviderManager key pool', () => {
     expect(auth).toBeUndefined();
   });
 
+  it('rotates keys correctly under concurrent resolveAuth calls', async () => {
+    const pool = new ApiKeyPool(['pk-a', 'pk-b', 'pk-c']);
+    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
+
+    const keys: string[] = [];
+    const requests = Array.from({ length: 5 }, async () => {
+      await withAuth(async (auth: ProviderRequestAuth) => {
+        keys.push(auth.apiKey!);
+        // Small async gap so the event loop can interleave other requests.
+        await new Promise<void>((r) => { setTimeout(r, 1); });
+        return 'ok';
+      });
+    });
+    await Promise.all(requests);
+
+    expect(keys).toEqual(['pk-a', 'pk-b', 'pk-c', 'pk-a', 'pk-b']);
+  });
+
+  it('distributes keys evenly under 60 concurrent withAuth requests', async () => {
+    const pool = new ApiKeyPool(['pk-0', 'pk-1', 'pk-2']);
+    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
+
+    const keys: string[] = [];
+    await Promise.all(
+      Array.from({ length: 60 }, () =>
+        withAuth(async (auth: ProviderRequestAuth) => {
+          keys.push(auth.apiKey!);
+          return 'ok';
+        }),
+      ),
+    );
+
+    const counts = new Map<string, number>();
+    for (const k of keys) {
+      counts.set(k, (counts.get(k) ?? 0) + 1);
+    }
+    expect(counts.get('pk-0')).toBe(20);
+    expect(counts.get('pk-1')).toBe(20);
+    expect(counts.get('pk-2')).toBe(20);
+  });
+
+  it('cools down failed keys and reroutes under concurrent load', async () => {
+    const pool = new ApiKeyPool(['pk-good', 'pk-bad']);
+    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
+
+    let callIndex = 0;
+    const keys: string[] = [];
+    const results = await Promise.allSettled(
+      Array.from({ length: 20 }, () =>
+        withAuth(async (auth: ProviderRequestAuth) => {
+          keys.push(auth.apiKey!);
+          const idx = callIndex++;
+          // First 10 calls: even-indexed requests (0,2,4,6,8) get pk-good and succeed,
+          // odd-indexed get pk-bad and throw 429. Because acquire() is round-robin,
+          // the exact key depends on starting state, so we simulate by key value.
+          if (auth.apiKey === 'pk-bad') {
+            throw new APIStatusError(429, 'Too Many Requests');
+          }
+          return 'success';
+        }),
+      ),
+    );
+
+    // All pk-bad requests should have failed; pk-good should succeed.
+    const badCount = keys.filter((k) => k === 'pk-bad').length;
+    const goodCount = keys.filter((k) => k === 'pk-good').length;
+    expect(badCount).toBe(10);
+    expect(goodCount).toBe(10);
+
+    const failures = results.filter((r) => r.status === 'rejected').length;
+    const successes = results.filter((r) => r.status === 'fulfilled').length;
+    expect(failures).toBe(10);
+    expect(successes).toBe(10);
+
+    // After failures, pk-bad should be in cooldown.
+    const nextKey = pool.acquire();
+    // Round-robin: after 20 calls index is at 0 again. pk-bad had 10 failures -> cooldown.
+    // It should be skipped, so next acquire returns pk-good.
+    expect(nextKey).toBe('pk-good');
+  });
+
   it('prefers OAuth over key pool when both are configured', async () => {
     const pool = new ApiKeyPool(['pk-1']);
     const config: KimiConfig = {
diff --git a/packages/agent-core/test/session/api-key-pool.test.ts b/packages/agent-core/test/session/api-key-pool.test.ts
index 2bbaf94d..5148bec3 100644
--- a/packages/agent-core/test/session/api-key-pool.test.ts
+++ b/packages/agent-core/test/session/api-key-pool.test.ts
@@ -173,4 +173,70 @@ describe('ApiKeyPool', () => {
       expect(() => { pool.resetKey('unknown'); }).not.toThrow();
     });
   });
+
+  describe('concurrent stress', () => {
+    it('distributes 100 concurrent acquires evenly across 3 keys', async () => {
+      const pool = new ApiKeyPool(['k0', 'k1', 'k2']);
+      const keys = await Promise.all(Array.from({ length: 100 }, () => Promise.resolve(pool.acquire())));
+
+      expect(keys).toHaveLength(100);
+      const counts = new Map<string, number>();
+      for (const k of keys) {
+        counts.set(k, (counts.get(k) ?? 0) + 1);
+      }
+      // Round-robin over 3 keys -> 33, 34, 33 in order
+      expect(counts.get('k0')).toBe(34);
+      expect(counts.get('k1')).toBe(33);
+      expect(counts.get('k2')).toBe(33);
+    });
+
+    it('skips cooling keys even under 50 concurrent acquires', async () => {
+      vi.useFakeTimers();
+      const pool = new ApiKeyPool(['k0', 'k1', 'k2']);
+      pool.recordFailure('k1'); // 30s cooldown
+
+      const keys = await Promise.all(Array.from({ length: 50 }, () => Promise.resolve(pool.acquire())));
+
+      const counts = new Map<string, number>();
+      for (const k of keys) {
+        counts.set(k, (counts.get(k) ?? 0) + 1);
+      }
+      expect(counts.get('k0')).toBe(25);
+      expect(counts.get('k1') ?? 0).toBe(0); // skipped entirely
+      expect(counts.get('k2')).toBe(25);
+    });
+
+    it('falls back to all keys when every key is cooling under heavy load', async () => {
+      vi.useFakeTimers();
+      const pool = new ApiKeyPool(['k0', 'k1']);
+      pool.recordFailure('k0');
+      pool.recordFailure('k1');
+
+      const keys = await Promise.all(Array.from({ length: 20 }, () => Promise.resolve(pool.acquire())));
+
+      const counts = new Map<string, number>();
+      for (const k of keys) {
+        counts.set(k, (counts.get(k) ?? 0) + 1);
+      }
+      // Fallback to round-robin across entire pool despite cooldown
+      expect(counts.get('k0')).toBe(10);
+      expect(counts.get('k1')).toBe(10);
+    });
+
+    it('recovers a key mid-stream after cooldown expires', async () => {
+      vi.useFakeTimers();
+      const pool = new ApiKeyPool(['k0', 'k1']);
+      pool.recordFailure('k0'); // 30s cooldown
+
+      const firstBatch = await Promise.all(Array.from({ length: 10 }, () => Promise.resolve(pool.acquire())));
+      expect(firstBatch.every((k) => k === 'k1')).toBe(true);
+
+      vi.advanceTimersByTime(30_001);
+
+      const secondBatch = await Promise.all(Array.from({ length: 10 }, () => Promise.resolve(pool.acquire())));
+      // After cooldown, k0 is back in rotation
+      expect(secondBatch.filter((k) => k === 'k0').length).toBe(5);
+      expect(secondBatch.filter((k) => k === 'k1').length).toBe(5);
+    });
+  });
 });

From d07dc188be3474a6c637afeaea03c1d1abfca39b Mon Sep 17 00:00:00 2001
From: isletspace <liewzheng@qq.com>
Date: Fri, 29 May 2026 13:56:18 +0800
Subject: [PATCH 4/6] fix(agent-core): skip api key pool when provider has
 explicit apiKey; trim env whitespace

Codex review fixes:
- Pool no longer overrides provider-level explicit apiKey. The pool path
  is only entered when providerApiKey() returns undefined.
- Whitespace-only env values ('   ') are now trimmed before validation
  and treated as missing.
- Update pool tests to use a dedicated POOL_BASE_CONFIG without apiKey.
---
 .../agent-core/src/session/api-key-pool.ts    |  8 +--
 .../src/session/provider-manager.ts           |  9 ++-
 .../test/harness/runtime-provider.test.ts     | 61 ++++++++++++++++---
 .../test/session/api-key-pool.test.ts         | 10 +++
 4 files changed, 73 insertions(+), 15 deletions(-)

diff --git a/packages/agent-core/src/session/api-key-pool.ts b/packages/agent-core/src/session/api-key-pool.ts
index 2a56fe4b..2b58e62f 100644
--- a/packages/agent-core/src/session/api-key-pool.ts
+++ b/packages/agent-core/src/session/api-key-pool.ts
@@ -38,13 +38,13 @@ export class ApiKeyPool {
   static fromEnv(prefix = 'KIMI_API_KEY'): ApiKeyPool | null {
     const keys: string[] = [];
     const primary = process.env[prefix];
-    if (primary !== undefined && primary.length > 0) {
-      keys.push(primary);
+    if (primary !== undefined && primary.trim().length > 0) {
+      keys.push(primary.trim());
     }
     for (let i = 1; i < 100; i++) {
       const val = process.env[`${prefix}_${i}`];
-      if (val !== undefined && val.length > 0) {
-        keys.push(val);
+      if (val !== undefined && val.trim().length > 0) {
+        keys.push(val.trim());
       }
     }
     if (keys.length < 2) {
diff --git a/packages/agent-core/src/session/provider-manager.ts b/packages/agent-core/src/session/provider-manager.ts
index 500bbff0..a2c1c5d1 100644
--- a/packages/agent-core/src/session/provider-manager.ts
+++ b/packages/agent-core/src/session/provider-manager.ts
@@ -190,8 +190,13 @@ export class ProviderManager implements ModelProvider {
       };
     }
 
-    // Key pool path — only for kimi provider when a pool is configured.
-    if (providerConfig?.type === 'kimi' && this.options.apiKeyPool !== undefined) {
+    // Key pool path — only for kimi provider when a pool is configured
+    // and the provider does not already have an explicit apiKey.
+    if (
+      providerConfig?.type === 'kimi' &&
+      this.options.apiKeyPool !== undefined &&
+      providerApiKey(providerConfig) === undefined
+    ) {
       const pool = this.options.apiKeyPool;
       return async (request) => {
         const key = pool.acquire();
diff --git a/packages/agent-core/test/harness/runtime-provider.test.ts b/packages/agent-core/test/harness/runtime-provider.test.ts
index 4acdf61a..803acd5b 100644
--- a/packages/agent-core/test/harness/runtime-provider.test.ts
+++ b/packages/agent-core/test/harness/runtime-provider.test.ts
@@ -51,6 +51,24 @@ const BASE_CONFIG: KimiConfig = {
   },
 };
 
+const POOL_BASE_CONFIG: KimiConfig = {
+  defaultModel: 'kimi-code/kimi-for-coding',
+  providers: {
+    'managed:kimi-code': {
+      type: 'kimi',
+      baseUrl: 'https://api.example/v1',
+    },
+  },
+  models: {
+    'kimi-code/kimi-for-coding': {
+      provider: 'managed:kimi-code',
+      model: 'kimi-for-coding',
+      maxContextSize: 1_000_000,
+      capabilities: ['thinking', 'image_in', 'video_in', 'tool_use'],
+    },
+  },
+};
+
 const TEST_KIMI_HEADERS = {
   'User-Agent': 'kimi-code-cli/0.0.0-test',
   'X-Msh-Platform': 'kimi_code_cli',
@@ -674,7 +692,7 @@ describe('ProviderManager key pool', () => {
 
   it('returns a key-pool wrapper for kimi provider when pool is present', async () => {
     const pool = new ApiKeyPool(['pk-1', 'pk-2']);
-    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const manager = new ProviderManager({ config: POOL_BASE_CONFIG, apiKeyPool: pool });
     const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding');
     expect(withAuth).toBeDefined();
 
@@ -688,7 +706,7 @@ describe('ProviderManager key pool', () => {
 
   it('rotates to the next key on each resolveAuth call', async () => {
     const pool = new ApiKeyPool(['pk-a', 'pk-b', 'pk-c']);
-    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const manager = new ProviderManager({ config: POOL_BASE_CONFIG, apiKeyPool: pool });
     const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
 
     const keys: string[] = [];
@@ -706,7 +724,7 @@ describe('ProviderManager key pool', () => {
   it('records failure on retryable errors and re-throws', async () => {
     const pool = new ApiKeyPool(['pk-1']);
     const recordFailureSpy = vi.spyOn(pool, 'recordFailure');
-    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const manager = new ProviderManager({ config: POOL_BASE_CONFIG, apiKeyPool: pool });
     const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
 
     const request = vi.fn(async () => {
@@ -721,7 +739,7 @@ describe('ProviderManager key pool', () => {
   it('records failure on connection errors', async () => {
     const pool = new ApiKeyPool(['pk-1']);
     const recordFailureSpy = vi.spyOn(pool, 'recordFailure');
-    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const manager = new ProviderManager({ config: POOL_BASE_CONFIG, apiKeyPool: pool });
     const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
 
     const request = vi.fn(async () => {
@@ -735,7 +753,7 @@ describe('ProviderManager key pool', () => {
   it('does not record failure on non-retryable errors', async () => {
     const pool = new ApiKeyPool(['pk-1']);
     const recordFailureSpy = vi.spyOn(pool, 'recordFailure');
-    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const manager = new ProviderManager({ config: POOL_BASE_CONFIG, apiKeyPool: pool });
     const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
 
     const request = vi.fn(async () => {
@@ -749,7 +767,7 @@ describe('ProviderManager key pool', () => {
   it('resets key after a successful request', async () => {
     const pool = new ApiKeyPool(['pk-1']);
     const resetKeySpy = vi.spyOn(pool, 'resetKey');
-    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const manager = new ProviderManager({ config: POOL_BASE_CONFIG, apiKeyPool: pool });
     const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
 
     const request = vi.fn(async () => 'success');
@@ -780,9 +798,34 @@ describe('ProviderManager key pool', () => {
     expect(auth).toBeUndefined();
   });
 
+  it('does not use key pool when provider already has an explicit apiKey', () => {
+    const pool = new ApiKeyPool(['pk-1']);
+    const config: KimiConfig = {
+      defaultModel: 'kimi-code/kimi-for-coding',
+      providers: {
+        'managed:kimi-code': {
+          type: 'kimi',
+          apiKey: 'sk-explicit',
+          baseUrl: 'https://api.example/v1',
+        },
+      },
+      models: {
+        'kimi-code/kimi-for-coding': {
+          provider: 'managed:kimi-code',
+          model: 'kimi-for-coding',
+          maxContextSize: 1_000_000,
+        },
+      },
+    };
+    const manager = new ProviderManager({ config, apiKeyPool: pool });
+    const auth = manager.resolveAuth('kimi-code/kimi-for-coding');
+    // Explicit apiKey on provider means pool should not override it.
+    expect(auth).toBeUndefined();
+  });
+
   it('rotates keys correctly under concurrent resolveAuth calls', async () => {
     const pool = new ApiKeyPool(['pk-a', 'pk-b', 'pk-c']);
-    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const manager = new ProviderManager({ config: POOL_BASE_CONFIG, apiKeyPool: pool });
     const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
 
     const keys: string[] = [];
@@ -801,7 +844,7 @@ describe('ProviderManager key pool', () => {
 
   it('distributes keys evenly under 60 concurrent withAuth requests', async () => {
     const pool = new ApiKeyPool(['pk-0', 'pk-1', 'pk-2']);
-    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const manager = new ProviderManager({ config: POOL_BASE_CONFIG, apiKeyPool: pool });
     const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
 
     const keys: string[] = [];
@@ -825,7 +868,7 @@ describe('ProviderManager key pool', () => {
 
   it('cools down failed keys and reroutes under concurrent load', async () => {
     const pool = new ApiKeyPool(['pk-good', 'pk-bad']);
-    const manager = new ProviderManager({ config: BASE_CONFIG, apiKeyPool: pool });
+    const manager = new ProviderManager({ config: POOL_BASE_CONFIG, apiKeyPool: pool });
     const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
 
     let callIndex = 0;
diff --git a/packages/agent-core/test/session/api-key-pool.test.ts b/packages/agent-core/test/session/api-key-pool.test.ts
index 5148bec3..33dfac6a 100644
--- a/packages/agent-core/test/session/api-key-pool.test.ts
+++ b/packages/agent-core/test/session/api-key-pool.test.ts
@@ -56,6 +56,16 @@ describe('ApiKeyPool', () => {
       expect(pool!.keyCount).toBe(2);
     });
 
+    it('ignores whitespace-only keys', () => {
+      process.env['TEST_API_KEY'] = 'sk-primary';
+      process.env['TEST_API_KEY_1'] = '   ';
+      process.env['TEST_API_KEY_2'] = '\t\n';
+      process.env['TEST_API_KEY_3'] = 'sk-3';
+      const pool = ApiKeyPool.fromEnv('TEST_API_KEY');
+      expect(pool).not.toBeNull();
+      expect(pool!.keyCount).toBe(2);
+    });
+
     it('defaults to KIMI_API_KEY prefix', () => {
       // Use a unique prefix to avoid colliding with real environment keys.
       const prefix = `TEST_DEFAULT_${Date.now()}`;

From ef0a7ea49f5bc7b937e5ddc1496e282aab869c4f Mon Sep 17 00:00:00 2001
From: isletspace <liewzheng@qq.com>
Date: Fri, 29 May 2026 14:09:37 +0800
Subject: [PATCH 5/6] fix(agent-core): quarantine 401 keys and preserve
 cooldowns on concurrent success

Codex review fixes:
- Pool now records failures for 401 (Unauthorized) errors so bad/expired
  keys are quarantined instead of being retried indefinitely.
- resetKey() no longer clears an active cooldown, preventing a concurrent
  success from wiping out a cooldown that was set by another in-flight
  request for the same key.
- Update resetKey test to advance past cooldown before resetting.
---
 packages/agent-core/src/session/api-key-pool.ts   |  9 +++++++++
 .../agent-core/src/session/provider-manager.ts    |  5 ++++-
 .../test/harness/runtime-provider.test.ts         | 15 +++++++++++++++
 .../agent-core/test/session/api-key-pool.test.ts  | 15 +++++++++++++--
 4 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/packages/agent-core/src/session/api-key-pool.ts b/packages/agent-core/src/session/api-key-pool.ts
index 2b58e62f..49512c7e 100644
--- a/packages/agent-core/src/session/api-key-pool.ts
+++ b/packages/agent-core/src/session/api-key-pool.ts
@@ -121,6 +121,15 @@ export class ApiKeyPool {
 
   /** Clear the failure state for a key (e.g. after a successful call). */
   resetKey(key: string): void {
+    const state = this.states.get(key);
+    if (state === undefined) {
+      return;
+    }
+    // Don't clear an active cooldown that may have been set by a concurrent
+    // failure while this request was still in flight.
+    if (state.cooldownUntil !== null && Date.now() < state.cooldownUntil) {
+      return;
+    }
     this.states.set(key, { consecutiveFailures: 0, cooldownUntil: null });
   }
 }
diff --git a/packages/agent-core/src/session/provider-manager.ts b/packages/agent-core/src/session/provider-manager.ts
index a2c1c5d1..ffe10dd9 100644
--- a/packages/agent-core/src/session/provider-manager.ts
+++ b/packages/agent-core/src/session/provider-manager.ts
@@ -206,7 +206,10 @@ export class ProviderManager implements ModelProvider {
           pool.resetKey(key);
           return result;
         } catch (error) {
-          if (isRetryableGenerateError(error)) {
+          if (
+            isRetryableGenerateError(error) ||
+            (error instanceof APIStatusError && error.statusCode === 401)
+          ) {
             pool.recordFailure(key);
           }
           throw error;
diff --git a/packages/agent-core/test/harness/runtime-provider.test.ts b/packages/agent-core/test/harness/runtime-provider.test.ts
index 803acd5b..da94934b 100644
--- a/packages/agent-core/test/harness/runtime-provider.test.ts
+++ b/packages/agent-core/test/harness/runtime-provider.test.ts
@@ -750,6 +750,21 @@ describe('ProviderManager key pool', () => {
     expect(recordFailureSpy).toHaveBeenCalledWith('pk-1');
   });
 
+  it('records failure on 401 unauthorized errors', async () => {
+    const pool = new ApiKeyPool(['pk-1']);
+    const recordFailureSpy = vi.spyOn(pool, 'recordFailure');
+    const manager = new ProviderManager({ config: POOL_BASE_CONFIG, apiKeyPool: pool });
+    const withAuth = manager.resolveAuth('kimi-code/kimi-for-coding')!;
+
+    const request = vi.fn(async () => {
+      throw new APIStatusError(401, 'Unauthorized');
+    });
+
+    await expect(withAuth(request)).rejects.toThrow(APIStatusError);
+    expect(recordFailureSpy).toHaveBeenCalledWith('pk-1');
+    expect(recordFailureSpy).toHaveBeenCalledTimes(1);
+  });
+
   it('does not record failure on non-retryable errors', async () => {
     const pool = new ApiKeyPool(['pk-1']);
     const recordFailureSpy = vi.spyOn(pool, 'recordFailure');
diff --git a/packages/agent-core/test/session/api-key-pool.test.ts b/packages/agent-core/test/session/api-key-pool.test.ts
index 33dfac6a..1a293a3b 100644
--- a/packages/agent-core/test/session/api-key-pool.test.ts
+++ b/packages/agent-core/test/session/api-key-pool.test.ts
@@ -170,14 +170,25 @@ describe('ApiKeyPool', () => {
   });
 
   describe('resetKey', () => {
-    it('clears failure state immediately', () => {
+    it('clears failure state when cooldown has expired', () => {
       vi.useFakeTimers();
       const pool = new ApiKeyPool(['a', 'b']);
-      pool.recordFailure('a');
+      pool.recordFailure('a'); // 30s cooldown
+      vi.advanceTimersByTime(30_001);
       pool.resetKey('a');
       expect(pool.acquire()).toBe('a');
     });
 
+    it('does not clear an active cooldown', () => {
+      vi.useFakeTimers();
+      const pool = new ApiKeyPool(['a', 'b']);
+      pool.recordFailure('a'); // 30s cooldown
+      pool.resetKey('a');      // concurrent success should not wipe it
+      expect(pool.acquire()).toBe('b'); // a still cooling
+      vi.advanceTimersByTime(30_001);
+      expect(pool.acquire()).toBe('a'); // a recovered after cooldown
+    });
+
     it('is a no-op for unknown keys', () => {
       const pool = new ApiKeyPool(['a']);
       expect(() => { pool.resetKey('unknown'); }).not.toThrow();

From 6912d049954403323dcf497a8e512c96b19abe9d Mon Sep 17 00:00:00 2001
From: isletspace <liewzheng@qq.com>
Date: Fri, 29 May 2026 14:23:34 +0800
Subject: [PATCH 6/6] fix(agent-core): gate api key pool on KIMI_API_KEY_POOL
 opt-in env var

Codex review fix:
- Pool construction in KimiCore is now conditional on the KIMI_API_KEY_POOL
  environment variable being set. Previously, any environment with multiple
  KIMI_API_KEY* values would get request-level rotation even without opting in,
  which could silently redirect traffic to backup/different-account keys.
---
 packages/agent-core/src/rpc/core-impl.ts      |  2 +-
 .../agent-core/test/harness/runtime.test.ts   | 38 +++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/packages/agent-core/src/rpc/core-impl.ts b/packages/agent-core/src/rpc/core-impl.ts
index 1a23147b..c313384a 100644
--- a/packages/agent-core/src/rpc/core-impl.ts
+++ b/packages/agent-core/src/rpc/core-impl.ts
@@ -145,7 +145,7 @@ export class KimiCore implements PromisableMethods<CoreAPI> {
     this.resolveOAuthTokenProvider = options.resolveOAuthTokenProvider;
     this.skillDirs = options.skillDirs ?? [];
     this.telemetry = options.telemetry ?? noopTelemetryClient;
-    this.apiKeyPool = ApiKeyPool.fromEnv() ?? undefined;
+    this.apiKeyPool = process.env['KIMI_API_KEY_POOL'] !== undefined ? (ApiKeyPool.fromEnv() ?? undefined) : undefined;
     ensureKimiHome(this.homeDir);
     this.config = readConfigFile(this.configPath);
     this.sessionStore = new SessionStore(this.homeDir);
diff --git a/packages/agent-core/test/harness/runtime.test.ts b/packages/agent-core/test/harness/runtime.test.ts
index a79600c3..c26022ab 100644
--- a/packages/agent-core/test/harness/runtime.test.ts
+++ b/packages/agent-core/test/harness/runtime.test.ts
@@ -21,6 +21,11 @@ describe('KimiCore runtime config', () => {
       await rm(tmp, { recursive: true, force: true });
     }
     vi.unstubAllGlobals();
+    for (const key of Object.keys(process.env)) {
+      if (key.startsWith('KIMI_API_KEY')) {
+        delete process.env[key];
+      }
+    }
   });
 
   it('uses the shared OAuth resolver for Moonshot service tokens', async () => {
@@ -123,4 +128,37 @@ max_context_size = 100000
 
     expect(mainAgent?.config.modelAlias).toBe('default-mock');
   });
+
+  it('does not create apiKeyPool when KIMI_API_KEY_POOL is not set', async () => {
+    tmp = await mkdtemp(join(tmpdir(), 'kimi-core-runtime-'));
+    const homeDir = join(tmp, 'home');
+    await mkdir(homeDir, { recursive: true });
+    await writeFile(join(homeDir, 'config.toml'), '');
+
+    // Multiple keys present but opt-in flag missing
+    process.env['KIMI_API_KEY'] = 'sk-primary';
+    process.env['KIMI_API_KEY_1'] = 'sk-backup';
+
+    const [coreRpc] = createRPC<CoreAPI, SDKAPI>();
+    const core = new KimiCore(coreRpc, { homeDir });
+
+    expect((core as any).apiKeyPool).toBeUndefined();
+  });
+
+  it('creates apiKeyPool when KIMI_API_KEY_POOL is set', async () => {
+    tmp = await mkdtemp(join(tmpdir(), 'kimi-core-runtime-'));
+    const homeDir = join(tmp, 'home');
+    await mkdir(homeDir, { recursive: true });
+    await writeFile(join(homeDir, 'config.toml'), '');
+
+    process.env['KIMI_API_KEY_POOL'] = '1';
+    process.env['KIMI_API_KEY'] = 'sk-primary';
+    process.env['KIMI_API_KEY_1'] = 'sk-backup';
+
+    const [coreRpc] = createRPC<CoreAPI, SDKAPI>();
+    const core = new KimiCore(coreRpc, { homeDir });
+
+    expect((core as any).apiKeyPool).toBeDefined();
+    expect((core as any).apiKeyPool.keyCount).toBe(2);
+  });
 });