From b2230cf4f2e41d6fbe694986ae3a6ffc708b1388 Mon Sep 17 00:00:00 2001 From: sandikodev Date: Sat, 4 Apr 2026 20:48:06 +0700 Subject: [PATCH] fix(chat): prevent panic on UTF-8 boundary when rendering response When the AI response contains multi-byte characters (e.g. non-ASCII text adjacent to triple backticks), the byte offset accumulated via parsed.offset_from() can land in the middle of a UTF-8 character boundary. The subsequent &buf[offset..] slice then panics at runtime. The same pattern existed in two places: - crates/chat-cli/src/cli/chat/mod.rs (streaming response loop) - crates/chat-cli/src/cli/chat/parse.rs (validate! test macro loop) Replace the direct slice with .get(offset..) which returns None instead of panicking, and break the loop gracefully. Add a regression test with Indonesian, Chinese, and emoji inputs adjacent to triple backticks to verify no panic occurs. Fixes #3715 --- crates/chat-cli/src/cli/chat/mod.rs | 6 +++- crates/chat-cli/src/cli/chat/parse.rs | 41 ++++++++++++++++++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/crates/chat-cli/src/cli/chat/mod.rs b/crates/chat-cli/src/cli/chat/mod.rs index 099c0c8761..2599ea9e61 100644 --- a/crates/chat-cli/src/cli/chat/mod.rs +++ b/crates/chat-cli/src/cli/chat/mod.rs @@ -3099,7 +3099,11 @@ impl ChatSession { // Print the response for normal cases loop { - let input = Partial::new(&buf[offset..]); + // Use `get` to avoid panicking if `offset` lands on a non-UTF-8 boundary, + // which can happen when the response contains multi-byte characters (e.g. + // non-ASCII text adjacent to triple backticks). See: #3715 + let Some(slice) = buf.get(offset..) else { break }; + let input = Partial::new(slice); if self.stdout.should_send_structured_event { match interpret_markdown(input, &mut temp_buf, &mut state) { Ok(parsed) => { diff --git a/crates/chat-cli/src/cli/chat/parse.rs b/crates/chat-cli/src/cli/chat/parse.rs index cf457b56f3..5a5013107b 100644 --- a/crates/chat-cli/src/cli/chat/parse.rs +++ b/crates/chat-cli/src/cli/chat/parse.rs @@ -672,7 +672,8 @@ mod tests { let mut offset = 0; loop { - let input = Partial::new(&input[offset..]); + let Some(slice) = input.get(offset..) else { break }; + let input = Partial::new(slice); match interpret_markdown(input, &mut presult, &mut state) { Ok(parsed) => { offset += parsed.offset_from(&input); @@ -824,4 +825,42 @@ mod tests { [style::Print("+ % @ . ?")], true ); + + /// Regression test for #3715: multi-byte UTF-8 characters adjacent to triple backticks + /// must not cause a panic from byte-index slicing. + #[test] + fn multibyte_utf8_adjacent_to_triple_backticks_does_not_panic() { + // Indonesian / non-ASCII text followed by a code fence — the combination that + // triggered "byte index N is out of bounds" in the wild. + let inputs = [ + "Benar. Ganti dengan deskripsi langsung. Gunakan ini:\n\n```\ncontoh kode\n```", + "移除 eagleeye-ec-databases 任務狀況確認\n```bash\necho ok\n```", + "emoji 🎉 before ``` fence ```", + ]; + + for raw in inputs { + let mut input = raw.to_owned(); + input.push_str(" "); // simulate incomplete stream sentinel + + let mut state = ParseState::new(Some(80), Some(false)); + let mut out = vec![]; + let mut offset = 0; + + loop { + let Some(slice) = input.get(offset..) else { break }; + let partial = Partial::new(slice); + match interpret_markdown(partial, &mut out, &mut state) { + Ok(parsed) => { + offset += parsed.offset_from(&partial); + state.newline = state.set_newline; + state.set_newline = false; + }, + Err(err) => match err.into_inner() { + Some(err) => panic!("parse error on input {:?}: {err}", raw), + None => break, + }, + } + } + } + } }