lexmount · jiejuncai-ly · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026 · Jun 11, 2026
diff --git a/.env.example b/.env.example
@@ -1,31 +1,10 @@
-# Environment variables needed to connect to the LiveKit server.
-LIVEKIT_API_KEY=<your_api_key>
-LIVEKIT_API_SECRET=<your_api_secret>
-LIVEKIT_URL=wss://<project-subdomain>.livekit.cloud
-
-# Optional API endpoint override.
-NEXT_PUBLIC_CONN_DETAILS_ENDPOINT=/api/connection-details
-
-# Frontdesk room input tracks.
-# Backend room input publishes room_video by default.
-NEXT_PUBLIC_FRONTDESK_VIDEO_TRACK_NAME=room_video
-
-# Internally used environment variables
-APP_CONFIG_ENDPOINT=
-SANDBOX_ID=
-# Leave empty for LiveKit's default user microphone input. Set to "browser"
-# only when the backend room-input device is browser and PUBLISH_VOICE=1.
-INPUT_SOURCE=
-# For browser room input, match the backend agent worker name.
-AGENT_NAME=
-BROWSER_MEDIA_STREAM_NAME=browser_input
-BROWSER_VIDEO_WIDTH=960
-BROWSER_VIDEO_HEIGHT=720
-BROWSER_VIDEO_FPS=30
-BROWSER_VIDEO_MAX_BITRATE=3500000
-BROWSER_VIDEO_STATS=false
-REMOTE_VIDEO_WIDTH=960
-REMOTE_VIDEO_HEIGHT=720
-REMOTE_VIDEO_FPS=30
-DEBUG_AUDIO=false
-DEBUG_VIDEO=false
+# This file is intentionally documentation-only.
+#
+# Integrated LexVoice runs should not configure frontend variables here.
+# Use `../lex-voice/.env` as the single source of truth; `lex-voice/run.sh`
+# injects LiveKit, room-input, input-source, role-device, agent, media, and
+# debug settings into the frontend process when it starts `make start_ui`.
+#
+# Only create `agent-starter-react/.env.local` for standalone frontend
+# development where this repository is launched directly with `pnpm dev`.
+# In that case, define only the variables needed for that standalone run.
diff --git a/README.md b/README.md
@@ -30,19 +30,17 @@ agent-starter-react/
 ├── app/
 │   ├── (app)/
 │   ├── api/
-│   ├── components/
-│   ├── fonts/
-│   ├── globals.css
+│   ├── ui/
 │   └── layout.tsx
 ├── components/
+│   ├── app/
 │   ├── livekit/
-│   ├── ui/
-│   ├── app.tsx
-│   ├── session-view.tsx
-│   └── welcome.tsx
+│   └── ...
 ├── hooks/
 ├── lib/
 ├── public/
+├── styles/
+│   └── globals.css
 └── package.json
 ```
 
@@ -59,7 +57,13 @@ Run the following command to automatically clone this template.
 lk app create --template agent-starter-react
 ```
 
-Then run the app with:
+For integrated LexVoice runs, configure `../lex-voice/.env` and start the
+frontend through the LexVoice runtime scripts. `../lex-voice/run.sh` injects
+LiveKit, room-input, input-source, role-device, agent, media, and debug settings
+into this Next.js process.
+
+For standalone frontend development, install dependencies and run the dev
+server directly:
 
 ```bash
 pnpm install
@@ -68,7 +72,8 @@ pnpm dev
 
 And open http://localhost:3000 in your browser.
 
-You'll also need an agent to speak with. Try our starter agent for [Python](https://github.com/livekit-examples/agent-starter-python), [Node.js](https://github.com/livekit-examples/agent-starter-node), or [create your own from scratch](https://docs.livekit.io/agents/start/voice-ai/).
+You'll also need a LiveKit server and an agent worker. In this workspace, those
+are normally provided by the sibling `../lex-voice` project.
 
 ## Configuration
 
@@ -107,15 +112,19 @@ You can update these values in [`app-config.ts`](./app-config.ts) to customize b
 
 #### Environment Variables
 
-You'll also need to configure your LiveKit credentials in `.env.local` (copy `.env.example` if you don't have one):
+Integrated runs should keep runtime variables in `../lex-voice/.env`; this
+repository's `.env.example` is documentation-only. Only create
+`agent-starter-react/.env.local` for standalone frontend development launched
+directly with `pnpm dev`.
 
 ```env
 LIVEKIT_API_KEY=your_livekit_api_key
 LIVEKIT_API_SECRET=your_livekit_api_secret
 LIVEKIT_URL=https://your-livekit-server-url
 ```
 
-These are required for the voice agent functionality to work with your LiveKit project.
+These are required for standalone voice agent functionality to work with your
+LiveKit project.
 
 ## Contributing
 

diff --git a/app-config.ts b/app-config.ts
@@ -18,6 +18,8 @@ export interface AppConfig {
   supportsScreenShare: boolean;
   isPreConnectBufferEnabled: boolean;
   usesBrowserRawMediaInput?: boolean;
+  usesBrowserRawAudioInput?: boolean;
+  usesBrowserRawVideoInput?: boolean;
   usesServerRoomInput?: boolean;
   browserMediaStreamName?: string;
   browserVideoWidth?: number;
@@ -38,6 +40,10 @@ export interface AppConfig {
   // for LiveKit Cloud Sandbox
   sandboxId?: string;
   agentName?: string;
+  inputSource?: string;
+  audioInputDevice?: string;
+  visionInputDevice?: string;
+  outputDevice?: string;
 
   excludeAudioTracks: string[];
   showAudioFilterDebug?: boolean;
@@ -62,9 +68,100 @@ export interface AppConfig {
 
 const XUNFEI_AUDIO_TRACK_NAME = 'xunfei_audio_track';
 const ROOM_INPUT_AUDIO_TRACK_NAME = 'room_audio';
-const ROOM_INPUT_VIDEO_TRACK_NAME =
-  process.env.NEXT_PUBLIC_FRONTDESK_VIDEO_TRACK_NAME || 'room_video';
+const ROOM_INPUT_VIDEO_TRACK_NAME = process.env.NEXT_PUBLIC_ROOM_VIDEO_TRACK_NAME || 'room_video';
 const BROWSER_VIDEO_TRACK_NAME = 'browser_video_track';
+
+const DEFAULT_ROLE_INPUT_DEVICE = 'xunfei';
+const VALID_INPUT_DEVICES = new Set(['xunfei', 'generic', 'primebot', 'browser']);
+
+export interface InputDeviceConfigOptions {
+  inputSource?: string | null;
+  audioInputDevice?: string | null;
+  visionInputDevice?: string | null;
+  outputDevice?: string | null;
+}
+
+export interface InputDeviceConfig {
+  inputSource: string;
+  audioInputDevice: string;
+  visionInputDevice: string;
+  outputDevice: string;
+  usesBrowserRawAudioInput: boolean;
+  usesBrowserRawVideoInput: boolean;
+  usesBrowserRawMediaInput: boolean;
+  usesServerRoomInput: boolean;
+  supportsScreenShare: boolean;
+  showDefaultCameraPreview: boolean;
+}
+
+export function normalizeInputSource(inputSource?: string | null) {
+  const normalized = (inputSource || '').trim().toLowerCase();
+  if (!normalized) {
+    throw new Error('INPUT_SOURCE is required');
+  }
+  return normalized;
+}
+
+function normalizeRoleInputDevice(inputDevice: string | null | undefined, fallback: string) {
+  const normalized = (inputDevice || '').trim().toLowerCase();
+  if (VALID_INPUT_DEVICES.has(normalized)) {
+    return normalized;
+  }
+  return fallback;
+}
+
+export function resolveInputDeviceConfig({
+  inputSource,
+  audioInputDevice,
+  visionInputDevice,
+  outputDevice,
+}: InputDeviceConfigOptions = {}): InputDeviceConfig {
+  const normalizedInputSource = normalizeInputSource(inputSource);
+  const isMixedInputSource = normalizedInputSource === 'mixed';
+  const baseInputDevice = isMixedInputSource
+    ? DEFAULT_ROLE_INPUT_DEVICE
+    : normalizeRoleInputDevice(normalizedInputSource, DEFAULT_ROLE_INPUT_DEVICE);
+  const resolvedAudioInputDevice = isMixedInputSource
+    ? normalizeRoleInputDevice(audioInputDevice, baseInputDevice)
+    : baseInputDevice;
+  const resolvedVisionInputDevice = isMixedInputSource
+    ? normalizeRoleInputDevice(visionInputDevice, baseInputDevice)
+    : baseInputDevice;
+  const resolvedOutputDevice = isMixedInputSource
+    ? (outputDevice || baseInputDevice).trim().toLowerCase() || baseInputDevice
+    : baseInputDevice;
+  const usesBrowserRawAudioInput = resolvedAudioInputDevice === 'browser';
+  const usesBrowserRawVideoInput = resolvedVisionInputDevice === 'browser';
+  const usesBrowserRawMediaInput = usesBrowserRawAudioInput || usesBrowserRawVideoInput;
+
+  return {
+    inputSource: normalizedInputSource,
+    audioInputDevice: resolvedAudioInputDevice,
+    visionInputDevice: resolvedVisionInputDevice,
+    outputDevice: resolvedOutputDevice,
+    usesBrowserRawAudioInput,
+    usesBrowserRawVideoInput,
+    usesBrowserRawMediaInput,
+    usesServerRoomInput: true,
+    supportsScreenShare: usesBrowserRawVideoInput ? false : APP_CONFIG_DEFAULTS.supportsScreenShare,
+    showDefaultCameraPreview: usesBrowserRawVideoInput
+      ? false
+      : (APP_CONFIG_DEFAULTS.showDefaultCameraPreview ?? true),
+  };
+}
+
+export function resolveAgentNameForInputSource(
+  inputSource?: string | null,
+  agentName?: string | null
+) {
+  const configuredAgentName = agentName?.trim();
+  if (configuredAgentName) {
+    return configuredAgentName;
+  }
+
+  return `lexvoice-${normalizeInputSource(inputSource)}-agent`;
+}
+
 export function buildDefaultVideoTracks(
   isBrowserInput: boolean,
   usesServerRoomInput = false
@@ -120,6 +217,8 @@ export const APP_CONFIG_DEFAULTS: AppConfig = {
   supportsScreenShare: true,
   isPreConnectBufferEnabled: true,
   usesBrowserRawMediaInput: false,
+  usesBrowserRawAudioInput: false,
+  usesBrowserRawVideoInput: false,
   usesServerRoomInput: false,
   browserMediaStreamName: 'browser_input',
   browserVideoWidth: 640,
@@ -140,6 +239,10 @@ export const APP_CONFIG_DEFAULTS: AppConfig = {
   // for LiveKit Cloud Sandbox
   sandboxId: undefined,
   agentName: undefined,
+  inputSource: undefined,
+  audioInputDevice: undefined,
+  visionInputDevice: undefined,
+  outputDevice: undefined,
 
   // 音频过滤配置
   excludeAudioTracks: [XUNFEI_AUDIO_TRACK_NAME, ROOM_INPUT_AUDIO_TRACK_NAME], // 要排除的音频轨道名称列表

diff --git a/app/api/connection-details/route.ts b/app/api/connection-details/route.ts
@@ -31,9 +31,12 @@ export async function POST(req: Request) {
       throw new Error('LIVEKIT_API_SECRET is not defined');
     }
 
-    // Parse agent configuration from request body
+    // Parse room configuration from request body
     const body = await req.json();
-    const agentName: string = body?.room_config?.agents?.[0]?.agent_name;
+    const roomConfig = body?.room_config
+      ? RoomConfiguration.fromJson(body.room_config, { ignoreUnknownFields: true })
+      : new RoomConfiguration();
+    const tokenRoomConfig = buildTokenRoomConfig(roomConfig);
 
     // Generate participant token
     const participantName = 'user';
@@ -44,7 +47,7 @@ export async function POST(req: Request) {
     const participantToken = await createParticipantToken(
       { identity: participantIdentity, name: participantName },
       roomName,
-      agentName
+      tokenRoomConfig
     );
 
     // Return connection details
@@ -69,7 +72,7 @@ export async function POST(req: Request) {
 function createParticipantToken(
   userInfo: AccessTokenOptions,
   roomName: string,
-  agentName?: string
+  roomConfig: RoomConfiguration | undefined
 ): Promise<string> {
   const at = new AccessToken(API_KEY, API_SECRET, {
     ...userInfo,
@@ -84,11 +87,20 @@ function createParticipantToken(
   };
   at.addGrant(grant);
 
-  if (agentName) {
-    at.roomConfig = new RoomConfiguration({
-      agents: [{ agentName }],
-    });
+  if (roomConfig) {
+    at.roomConfig = roomConfig;
   }
 
   return at.toJwt();
 }
+
+function buildTokenRoomConfig(roomConfig: RoomConfiguration) {
+  if (roomConfig.agents.length === 0) {
+    return roomConfig;
+  }
+
+  return new RoomConfiguration({
+    ...roomConfig,
+    agents: [],
+  });
+}