diff --git a/python/semantic_kernel/contents/chat_history.py b/python/semantic_kernel/contents/chat_history.py index e5daec19fef5..a773c232c7bc 100644 --- a/python/semantic_kernel/contents/chat_history.py +++ b/python/semantic_kernel/contents/chat_history.py @@ -353,6 +353,23 @@ def from_rendered_prompt(cls: type[_T], rendered_prompt: str) -> _T: elif item.tag == CHAT_HISTORY_TAG: for message in item: messages.append(ChatMessageContent.from_element(message)) + else: + # Unknown XML tags (e.g. HTML tags like

,

) are not SK + # template tags. Serialize them back to text and append to the + # previous message so that the original content is preserved. + saved_tail = item.tail + item.tail = None + raw = unescape(tostring(item, encoding="unicode", short_empty_elements=False)) + item.tail = saved_tail + if messages: + messages[-1].content = (messages[-1].content or "") + raw + else: + messages.append(ChatMessageContent(role=AuthorRole.USER, content=raw)) + # For unknown tags the tail is part of the surrounding text, + # so keep it in the same message instead of starting a new one. + if item.tail: + messages[-1].content = (messages[-1].content or "") + unescape(item.tail) + continue if item.tail and item.tail.strip(): messages.append(ChatMessageContent(role=AuthorRole.USER, content=unescape(item.tail.strip()))) if len(messages) == 1 and messages[0].role == AuthorRole.SYSTEM: diff --git a/python/tests/unit/contents/test_chat_history.py b/python/tests/unit/contents/test_chat_history.py index ac41949a7325..3c1b92945296 100644 --- a/python/tests/unit/contents/test_chat_history.py +++ b/python/tests/unit/contents/test_chat_history.py @@ -612,6 +612,48 @@ def test_to_from_file(chat_history: ChatHistory, tmp_path): assert chat_history_2.messages[4] == chat_history.messages[4] +def test_from_rendered_prompt_preserves_html_p_tag(): + """HTML

tags in prompts should be preserved as text, not treated as template tags. + + Regression test for https://github.com/microsoft/semantic-kernel/issues/13632 + """ + rendered = ( + 'Translate following message from English language into the Spanish language - "

What is your name?

"' + ) + chat_history = ChatHistory.from_rendered_prompt(rendered) + assert len(chat_history.messages) == 1 + assert chat_history.messages[0].role == AuthorRole.USER + assert "

What is your name?

" in chat_history.messages[0].content + + +def test_from_rendered_prompt_preserves_multiple_html_tags(): + """Multiple HTML tags in prompts should be preserved as text.""" + rendered = "

First paragraph

A div
" + chat_history = ChatHistory.from_rendered_prompt(rendered) + assert len(chat_history.messages) == 1 + assert "

First paragraph

" in chat_history.messages[0].content + assert "
A div
" in chat_history.messages[0].content + + +def test_from_rendered_prompt_preserves_html_with_text_around(): + """HTML tags surrounded by plain text should preserve all content.""" + rendered = "Hello world today" + chat_history = ChatHistory.from_rendered_prompt(rendered) + assert len(chat_history.messages) == 1 + assert "Hello" in chat_history.messages[0].content + assert "world" in chat_history.messages[0].content + assert "today" in chat_history.messages[0].content + + +def test_from_rendered_prompt_sk_tags_still_work_with_html(): + """SK template tags should still be parsed correctly even when HTML tags are present.""" + rendered = 'Tell me about bold text' + chat_history = ChatHistory.from_rendered_prompt(rendered) + # The tag inside a is handled by ChatMessageContent.from_element + assert len(chat_history.messages) == 1 + assert chat_history.messages[0].role == AuthorRole.USER + + def test_chat_history_serialize(chat_history: ChatHistory): class CustomResultClass: def __init__(self, result):