livekit · longcw · Feb 13, 2026 · Feb 13, 2026 · devin-ai-integration · Feb 13, 2026
diff --git a/examples/voice_agents/basic_agent.py b/examples/voice_agents/basic_agent.py
@@ -40,8 +40,7 @@ def __init__(self) -> None:
     async def on_enter(self):
         # when the agent is added to the session, it'll generate a reply
         # according to its instructions
-        # Keep it uninterruptible so the client has time to calibrate AEC (Acoustic Echo Cancellation).
-        self.session.generate_reply(allow_interruptions=False)
+        self.session.generate_reply()
 
     # all functions annotated with @function_tool will be passed to the LLM when this
     # agent is active
@@ -102,6 +101,8 @@ async def entrypoint(ctx: JobContext):
         # when it's detected, you may resume the agent's speech
         resume_false_interruption=True,
         false_interruption_timeout=1.0,
+        # blocks interruptions for a few seconds after the agent starts speaking to allow client to calibrate AEC
+        echo_guard_duration=3.0,
     )
 
     # log metrics as they are emitted, and total usage after session is over

diff --git a/livekit-agents/livekit/agents/voice/agent_activity.py b/livekit-agents/livekit/agents/voice/agent_activity.py
@@ -781,10 +781,13 @@ def push_audio(self, frame: rtc.AudioFrame) -> None:
         if not self._started:
             return
 
-        should_discard = bool(
+        should_discard = (
             self._current_speech
             and not self._current_speech.allow_interruptions
             and self._session.options.discard_audio_if_uninterruptible
+        ) or (
+            self._session.agent_state == "speaking"
+            and self._session._echo_guard_remaining_duration > 0
         )
 
         if not should_discard:
@@ -1223,6 +1226,10 @@ def _on_generation_created(self, ev: llm.GenerationCreatedEvent) -> None:
         self._schedule_speech(handle, SpeechHandle.SPEECH_PRIORITY_NORMAL)
 
     def _interrupt_by_audio_activity(self) -> None:
+        if self._session._echo_guard_remaining_duration > 0:
+            # disable interruption from audio activity while echo guard is active
+            return
-        if self._session._echo_guard_remaining_duration > 0:
-            # disable interruption from audio activity while echo guard is active
-            return
+        if self._session.agent_state == "speaking" and self._session._echo_guard_remaining_duration > 0:
+            # disable interruption from audio activity while echo guard is active
+            return
+
-        if self._session._echo_guard_remaining_duration > 0:
-            # disable interruption from audio activity while echo guard is active
-            return
+        if self._session.agent_state == "speaking" and self._session._echo_guard_remaining_duration > 0:
+            # disable interruption from audio activity while echo guard is active
+            return
+
+
         opt = self._session.options
         use_pause = opt.resume_false_interruption and opt.false_interruption_timeout is not None
 

diff --git a/livekit-agents/livekit/agents/voice/agent_session.py b/livekit-agents/livekit/agents/voice/agent_session.py
@@ -89,6 +89,7 @@ class AgentSessionOptions:
     preemptive_generation: bool
     tts_text_transforms: Sequence[TextTransforms] | None
     ivr_detection: bool
+    echo_guard_duration: float | None
 
 
 Userdata_T = TypeVar("Userdata_T")
@@ -158,6 +159,7 @@ def __init__(
         use_tts_aligned_transcript: NotGivenOr[bool] = NOT_GIVEN,
         tts_text_transforms: NotGivenOr[Sequence[TextTransforms] | None] = NOT_GIVEN,
         preemptive_generation: bool = False,
+        echo_guard_duration: float | None = None,
         ivr_detection: bool = False,
         conn_options: NotGivenOr[SessionConnectOptions] = NOT_GIVEN,
         loop: asyncio.AbstractEventLoop | None = None,
@@ -246,6 +248,10 @@ def __init__(
                 can reduce response latency by overlapping model inference with user audio,
                 but may incur extra compute if the user interrupts or revises mid-utterance.
                 Defaults to ``False``.
+            echo_guard_duration (float, optional): The duration in seconds that the agent
+                will ignore user's audio interruptions after the agent starts speaking.
+                This is useful to prevent the agent from being interrupted by echo before AEC is ready.
+                Default ``None``.
             ivr_detection (bool): Whether to detect if the agent is interacting with an IVR system.
                 Default ``False``.
             conn_options (SessionConnectOptions, optional): Connection options for
@@ -291,6 +297,7 @@ def __init__(
             use_tts_aligned_transcript=use_tts_aligned_transcript
             if is_given(use_tts_aligned_transcript)
             else None,
+            echo_guard_duration=echo_guard_duration,
         )
         self._conn_options = conn_options or SessionConnectOptions()
         self._started = False
@@ -316,6 +323,11 @@ def __init__(
         self._llm_error_counts = 0
         self._tts_error_counts = 0
 
+        # echo guard: disable interruptions while AEC warms up
+        self._echo_guard_remaining_duration = echo_guard_duration or 0.0
+        self._echo_guard_timer: asyncio.TimerHandle | None = None
+        self._echo_guard_speaking_start: float | None = None
+
         # configurable IO
         self._input = io.AgentInput(self._on_video_input_changed, self._on_audio_input_changed)
         self._output = io.AgentOutput(
@@ -787,6 +799,8 @@ async def _aclose_impl(
 
             self._closing = True
             self._cancel_user_away_timer()
+            self._cancel_echo_guard_timer()
+            self._on_echo_guard_expired()  # always clear echo guard when closing the session
 
             if self._activity is not None:
                 if not drain:
@@ -1192,6 +1206,26 @@ def _cancel_user_away_timer(self) -> None:
             self._user_away_timer.cancel()
             self._user_away_timer = None
 
+    def _on_echo_guard_expired(self) -> None:
+        if self._echo_guard_remaining_duration > 0:
+            logger.debug("echo guard expired, re-enabling interruptions")
+
+        self._echo_guard_remaining_duration = 0.0
+        self._echo_guard_timer = None
+        self._echo_guard_speaking_start = None
+
+    def _cancel_echo_guard_timer(self) -> None:
+        if self._echo_guard_timer is not None:
+            self._echo_guard_timer.cancel()
+            self._echo_guard_timer = None
+
+        if self._echo_guard_speaking_start is not None:
+            elapsed = time.time() - self._echo_guard_speaking_start
+            self._echo_guard_remaining_duration = max(
+                0.0, self._echo_guard_remaining_duration - elapsed
+            )
+            self._echo_guard_speaking_start = None
+
     def _update_agent_state(
         self,
         state: AgentState,
@@ -1223,6 +1257,20 @@ def _update_agent_state(
             self._agent_speaking_span.end()
             self._agent_speaking_span = None
 
+        # echo guard: disable interruptions while AEC warms up
+        if state == "speaking" and self._echo_guard_remaining_duration > 0:
+            self._echo_guard_speaking_start = time.time()
+            self._echo_guard_timer = self._loop.call_later(
+                self._echo_guard_remaining_duration, self._on_echo_guard_expired
+            )
+            logger.debug(
+                "echo guard active, disabling interruptions for %.2fs",
+                self._echo_guard_remaining_duration,
+            )
+
+        if self._agent_state == "speaking" and state != "speaking":
+            self._cancel_echo_guard_timer()
+
         if state == "listening" and self._user_state == "listening":
             self._set_user_away_timer()
         else: