Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 10 additions & 31 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,31 +1,10 @@
# Environment variables needed to connect to the LiveKit server.
LIVEKIT_API_KEY=<your_api_key>
LIVEKIT_API_SECRET=<your_api_secret>
LIVEKIT_URL=wss://<project-subdomain>.livekit.cloud

# Optional API endpoint override.
NEXT_PUBLIC_CONN_DETAILS_ENDPOINT=/api/connection-details

# Frontdesk room input tracks.
# Backend room input publishes room_video by default.
NEXT_PUBLIC_FRONTDESK_VIDEO_TRACK_NAME=room_video

# Internally used environment variables
APP_CONFIG_ENDPOINT=
SANDBOX_ID=
# Leave empty for LiveKit's default user microphone input. Set to "browser"
# only when the backend room-input device is browser and PUBLISH_VOICE=1.
INPUT_SOURCE=
# For browser room input, match the backend agent worker name.
AGENT_NAME=
BROWSER_MEDIA_STREAM_NAME=browser_input
BROWSER_VIDEO_WIDTH=960
BROWSER_VIDEO_HEIGHT=720
BROWSER_VIDEO_FPS=30
BROWSER_VIDEO_MAX_BITRATE=3500000
BROWSER_VIDEO_STATS=false
REMOTE_VIDEO_WIDTH=960
REMOTE_VIDEO_HEIGHT=720
REMOTE_VIDEO_FPS=30
DEBUG_AUDIO=false
DEBUG_VIDEO=false
# This file is intentionally documentation-only.
#
# Integrated LexVoice runs should not configure frontend variables here.
# Use `../lex-voice/.env` as the single source of truth; `lex-voice/run.sh`
# injects LiveKit, room-input, input-source, role-device, agent, media, and
# debug settings into the frontend process when it starts `make start_ui`.
#
# Only create `agent-starter-react/.env.local` for standalone frontend
# development where this repository is launched directly with `pnpm dev`.
# In that case, define only the variables needed for that standalone run.
31 changes: 20 additions & 11 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,19 +30,17 @@ agent-starter-react/
├── app/
│ ├── (app)/
│ ├── api/
│ ├── components/
│ ├── fonts/
│ ├── globals.css
│ ├── ui/
│ └── layout.tsx
├── components/
│ ├── app/
│ ├── livekit/
│ ├── ui/
│ ├── app.tsx
│ ├── session-view.tsx
│ └── welcome.tsx
│ └── ...
├── hooks/
├── lib/
├── public/
├── styles/
│ └── globals.css
└── package.json
```

Expand All @@ -59,7 +57,13 @@ Run the following command to automatically clone this template.
lk app create --template agent-starter-react
```

Then run the app with:
For integrated LexVoice runs, configure `../lex-voice/.env` and start the
frontend through the LexVoice runtime scripts. `../lex-voice/run.sh` injects
LiveKit, room-input, input-source, role-device, agent, media, and debug settings
into this Next.js process.

For standalone frontend development, install dependencies and run the dev
server directly:

```bash
pnpm install
Expand All @@ -68,7 +72,8 @@ pnpm dev

And open http://localhost:3000 in your browser.

You'll also need an agent to speak with. Try our starter agent for [Python](https://github.com/livekit-examples/agent-starter-python), [Node.js](https://github.com/livekit-examples/agent-starter-node), or [create your own from scratch](https://docs.livekit.io/agents/start/voice-ai/).
You'll also need a LiveKit server and an agent worker. In this workspace, those
are normally provided by the sibling `../lex-voice` project.

## Configuration

Expand Down Expand Up @@ -107,15 +112,19 @@ You can update these values in [`app-config.ts`](./app-config.ts) to customize b

#### Environment Variables

You'll also need to configure your LiveKit credentials in `.env.local` (copy `.env.example` if you don't have one):
Integrated runs should keep runtime variables in `../lex-voice/.env`; this
repository's `.env.example` is documentation-only. Only create
`agent-starter-react/.env.local` for standalone frontend development launched
directly with `pnpm dev`.

```env
LIVEKIT_API_KEY=your_livekit_api_key
LIVEKIT_API_SECRET=your_livekit_api_secret
LIVEKIT_URL=https://your-livekit-server-url
```

These are required for the voice agent functionality to work with your LiveKit project.
These are required for standalone voice agent functionality to work with your
LiveKit project.

## Contributing

Expand Down
107 changes: 105 additions & 2 deletions app-config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ export interface AppConfig {
supportsScreenShare: boolean;
isPreConnectBufferEnabled: boolean;
usesBrowserRawMediaInput?: boolean;
usesBrowserRawAudioInput?: boolean;
usesBrowserRawVideoInput?: boolean;
usesServerRoomInput?: boolean;
browserMediaStreamName?: string;
browserVideoWidth?: number;
Expand All @@ -38,6 +40,10 @@ export interface AppConfig {
// for LiveKit Cloud Sandbox
sandboxId?: string;
agentName?: string;
inputSource?: string;
audioInputDevice?: string;
visionInputDevice?: string;
outputDevice?: string;

excludeAudioTracks: string[];
showAudioFilterDebug?: boolean;
Expand All @@ -62,9 +68,100 @@ export interface AppConfig {

const XUNFEI_AUDIO_TRACK_NAME = 'xunfei_audio_track';
const ROOM_INPUT_AUDIO_TRACK_NAME = 'room_audio';
const ROOM_INPUT_VIDEO_TRACK_NAME =
process.env.NEXT_PUBLIC_FRONTDESK_VIDEO_TRACK_NAME || 'room_video';
const ROOM_INPUT_VIDEO_TRACK_NAME = process.env.NEXT_PUBLIC_ROOM_VIDEO_TRACK_NAME || 'room_video';
const BROWSER_VIDEO_TRACK_NAME = 'browser_video_track';

const DEFAULT_ROLE_INPUT_DEVICE = 'xunfei';
const VALID_INPUT_DEVICES = new Set(['xunfei', 'generic', 'primebot', 'browser']);

export interface InputDeviceConfigOptions {
inputSource?: string | null;
audioInputDevice?: string | null;
visionInputDevice?: string | null;
outputDevice?: string | null;
}

export interface InputDeviceConfig {
inputSource: string;
audioInputDevice: string;
visionInputDevice: string;
outputDevice: string;
usesBrowserRawAudioInput: boolean;
usesBrowserRawVideoInput: boolean;
usesBrowserRawMediaInput: boolean;
usesServerRoomInput: boolean;
supportsScreenShare: boolean;
showDefaultCameraPreview: boolean;
}

export function normalizeInputSource(inputSource?: string | null) {
const normalized = (inputSource || '').trim().toLowerCase();
if (!normalized) {
throw new Error('INPUT_SOURCE is required');
}
return normalized;
}

function normalizeRoleInputDevice(inputDevice: string | null | undefined, fallback: string) {
const normalized = (inputDevice || '').trim().toLowerCase();
if (VALID_INPUT_DEVICES.has(normalized)) {
return normalized;
}
return fallback;
}

export function resolveInputDeviceConfig({
inputSource,
audioInputDevice,
visionInputDevice,
outputDevice,
}: InputDeviceConfigOptions = {}): InputDeviceConfig {
const normalizedInputSource = normalizeInputSource(inputSource);
const isMixedInputSource = normalizedInputSource === 'mixed';
const baseInputDevice = isMixedInputSource
? DEFAULT_ROLE_INPUT_DEVICE
: normalizeRoleInputDevice(normalizedInputSource, DEFAULT_ROLE_INPUT_DEVICE);
const resolvedAudioInputDevice = isMixedInputSource
? normalizeRoleInputDevice(audioInputDevice, baseInputDevice)
: baseInputDevice;
const resolvedVisionInputDevice = isMixedInputSource
? normalizeRoleInputDevice(visionInputDevice, baseInputDevice)
: baseInputDevice;
const resolvedOutputDevice = isMixedInputSource
? (outputDevice || baseInputDevice).trim().toLowerCase() || baseInputDevice
: baseInputDevice;
const usesBrowserRawAudioInput = resolvedAudioInputDevice === 'browser';
const usesBrowserRawVideoInput = resolvedVisionInputDevice === 'browser';
const usesBrowserRawMediaInput = usesBrowserRawAudioInput || usesBrowserRawVideoInput;

return {
inputSource: normalizedInputSource,
audioInputDevice: resolvedAudioInputDevice,
visionInputDevice: resolvedVisionInputDevice,
outputDevice: resolvedOutputDevice,
usesBrowserRawAudioInput,
usesBrowserRawVideoInput,
usesBrowserRawMediaInput,
usesServerRoomInput: true,
supportsScreenShare: usesBrowserRawVideoInput ? false : APP_CONFIG_DEFAULTS.supportsScreenShare,
showDefaultCameraPreview: usesBrowserRawVideoInput
? false
: (APP_CONFIG_DEFAULTS.showDefaultCameraPreview ?? true),
};
}

export function resolveAgentNameForInputSource(
inputSource?: string | null,
agentName?: string | null
) {
const configuredAgentName = agentName?.trim();
if (configuredAgentName) {
return configuredAgentName;
}

return `lexvoice-${normalizeInputSource(inputSource)}-agent`;
}

export function buildDefaultVideoTracks(
isBrowserInput: boolean,
usesServerRoomInput = false
Expand Down Expand Up @@ -120,6 +217,8 @@ export const APP_CONFIG_DEFAULTS: AppConfig = {
supportsScreenShare: true,
isPreConnectBufferEnabled: true,
usesBrowserRawMediaInput: false,
usesBrowserRawAudioInput: false,
usesBrowserRawVideoInput: false,
usesServerRoomInput: false,
browserMediaStreamName: 'browser_input',
browserVideoWidth: 640,
Expand All @@ -140,6 +239,10 @@ export const APP_CONFIG_DEFAULTS: AppConfig = {
// for LiveKit Cloud Sandbox
sandboxId: undefined,
agentName: undefined,
inputSource: undefined,
audioInputDevice: undefined,
visionInputDevice: undefined,
outputDevice: undefined,

// 音频过滤配置
excludeAudioTracks: [XUNFEI_AUDIO_TRACK_NAME, ROOM_INPUT_AUDIO_TRACK_NAME], // 要排除的音频轨道名称列表
Expand Down
28 changes: 20 additions & 8 deletions app/api/connection-details/route.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,12 @@ export async function POST(req: Request) {
throw new Error('LIVEKIT_API_SECRET is not defined');
}

// Parse agent configuration from request body
// Parse room configuration from request body
const body = await req.json();
const agentName: string = body?.room_config?.agents?.[0]?.agent_name;
const roomConfig = body?.room_config
? RoomConfiguration.fromJson(body.room_config, { ignoreUnknownFields: true })
: new RoomConfiguration();
const tokenRoomConfig = buildTokenRoomConfig(roomConfig);

// Generate participant token
const participantName = 'user';
Expand All @@ -44,7 +47,7 @@ export async function POST(req: Request) {
const participantToken = await createParticipantToken(
{ identity: participantIdentity, name: participantName },
roomName,
agentName
tokenRoomConfig
);

// Return connection details
Expand All @@ -69,7 +72,7 @@ export async function POST(req: Request) {
function createParticipantToken(
userInfo: AccessTokenOptions,
roomName: string,
agentName?: string
roomConfig: RoomConfiguration | undefined
): Promise<string> {
const at = new AccessToken(API_KEY, API_SECRET, {
...userInfo,
Expand All @@ -84,11 +87,20 @@ function createParticipantToken(
};
at.addGrant(grant);

if (agentName) {
at.roomConfig = new RoomConfiguration({
agents: [{ agentName }],
});
if (roomConfig) {
at.roomConfig = roomConfig;
}

return at.toJwt();
}

function buildTokenRoomConfig(roomConfig: RoomConfiguration) {
if (roomConfig.agents.length === 0) {
return roomConfig;
}

return new RoomConfiguration({
...roomConfig,
agents: [],
});
}
Loading
Loading