Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 8 additions & 7 deletions livekit-agents/livekit/agents/voice/agent_activity.py
Original file line number Diff line number Diff line change
Expand Up @@ -735,19 +735,20 @@ def push_audio(self, frame: rtc.AudioFrame) -> None:
if not self._started:
return

if (
should_discard = bool(
self._current_speech
and not self._current_speech.allow_interruptions
and self._session.options.discard_audio_if_uninterruptible
):
# discard the audio if the current speech is not interruptable
return
)

if self._rt_session is not None:
self._rt_session.push_audio(frame)
if not should_discard:
if self._rt_session is not None:
self._rt_session.push_audio(frame)

# Always forward to _audio_recognition for VAD, even when discarding STT/LLM
# VAD needs frames to detect speech end and update user state correctly
if self._audio_recognition is not None:
self._audio_recognition.push_audio(frame)
self._audio_recognition.push_audio(frame, skip_stt=should_discard)

def push_video(self, frame: rtc.VideoFrame) -> None:
if not self._started:
Expand Down
4 changes: 2 additions & 2 deletions livekit-agents/livekit/agents/voice/audio_recognition.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,9 +180,9 @@ def stop(self) -> None:
self.update_stt(None)
self.update_vad(None)

def push_audio(self, frame: rtc.AudioFrame) -> None:
def push_audio(self, frame: rtc.AudioFrame, *, skip_stt: bool = False) -> None:
self._sample_rate = frame.sample_rate
if self._stt_ch is not None:
if not skip_stt and self._stt_ch is not None:
self._stt_ch.send_nowait(frame)

if self._vad_ch is not None:
Expand Down