diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py b/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py index 459eac6a6e52..c0ed3488fcf5 100644 --- a/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py +++ b/python/semantic_kernel/contents/history_reducer/chat_history_reducer_utils.py @@ -66,6 +66,7 @@ def locate_safe_reduction_index( target_count: int, threshold_count: int = 0, offset_count: int = 0, + has_system_message: bool = False, ) -> int | None: """Identify the index of the first message at or beyond the specified target_count. @@ -83,11 +84,27 @@ def locate_safe_reduction_index( threshold_count: The threshold beyond target_count required to trigger reduction. If total messages <= (target_count + threshold_count), no reduction occurs. offset_count: Optional number of messages to skip at the start (e.g. existing summary messages). + has_system_message: Whether the history contains a system message that will be preserved + separately. When True, the target_count is adjusted to account for the + system message being re-added after reduction. Returns: The index that identifies the starting point for a reduced history that does not orphan sensitive content. Returns None if reduction is not needed. """ + # Adjust target_count to account for the system message that will be preserved separately. + # This matches the .NET SDK behavior. + if has_system_message: + target_count -= 1 + if target_count <= 0: + logger.warning( + "target_count after accounting for system message is %d; reduction will keep only the system message.", + target_count, + ) + # Reduce to just the system message — return index past all non-system messages. + # The caller will prepend the system message to the empty/minimal tail. + return len(history) + total_count = len(history) threshold_index = total_count - (threshold_count or 0) - target_count if threshold_index <= offset_count: diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py index e01594f1e0e4..3b8d2db82acf 100644 --- a/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py +++ b/python/semantic_kernel/contents/history_reducer/chat_history_summarization_reducer.py @@ -26,6 +26,7 @@ locate_safe_reduction_index, locate_summarization_boundary, ) +from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.exceptions.content_exceptions import ChatHistoryReducerException from semantic_kernel.utils.feature_stage_decorator import experimental @@ -89,6 +90,21 @@ async def reduce(self) -> Self | None: logger.info("Performing chat history summarization check...") + # Preserve system/developer messages so they are not lost during summarization. + # This matches the .NET SDK behavior and the truncation reducer. + # Only the first system/developer message is preserved; this mirrors .NET semantics. + # Exclude summary messages (which may have SYSTEM role) — they are generated content, + # not original system prompts. + system_message_index = next( + ( + i + for i, msg in enumerate(history) + if msg.role in (AuthorRole.SYSTEM, AuthorRole.DEVELOPER) and not msg.metadata.get(SUMMARY_METADATA_KEY) + ), + -1, + ) + system_message = history[system_message_index] if system_message_index >= 0 else None + # 1. Identify where existing summary messages end insertion_point = locate_summarization_boundary(history) if insertion_point == len(history): @@ -96,12 +112,19 @@ async def reduce(self) -> Self | None: logger.warning("All messages are summaries, forcing boundary to 0.") insertion_point = 0 + # Only adjust target_count if the system message would be truncated away. + # If the system message is already in the retained portion, no adjustment needed. + system_would_be_truncated = ( + system_message is not None and system_message_index < len(history) - self.target_count + ) + # 2. Locate the safe reduction index truncation_index = locate_safe_reduction_index( history, self.target_count, self.threshold_count, offset_count=insertion_point, + has_system_message=system_would_be_truncated, ) if truncation_index is None: logger.info("No valid truncation index found.") @@ -138,7 +161,13 @@ async def reduce(self) -> Self | None: keep_existing_summaries = history[:insertion_point] remainder = history[truncation_index:] + + # Prepend the system/developer message if it was summarized away. + # Use identity comparison to avoid false matches from value-equal messages. new_history = [*keep_existing_summaries, summary_msg, *remainder] + if system_message is not None and not any(m is system_message for m in new_history): + new_history = [system_message, *new_history] + self.messages = new_history return self @@ -151,8 +180,6 @@ async def reduce(self) -> Self | None: async def _summarize(self, messages: list[ChatMessageContent]) -> ChatMessageContent | None: """Use the ChatCompletion service to generate a single summary message.""" - from semantic_kernel.contents.utils.author_role import AuthorRole - chat_history = ChatHistory(messages=messages) execution_settings = self.execution_settings or self.service.get_prompt_execution_settings_from_settings( PromptExecutionSettings() diff --git a/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py b/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py index 32c848a098d7..0d0810feb8df 100644 --- a/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py +++ b/python/semantic_kernel/contents/history_reducer/chat_history_truncation_reducer.py @@ -15,9 +15,9 @@ from semantic_kernel.contents.history_reducer.chat_history_reducer import ChatHistoryReducer from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import ( - extract_range, locate_safe_reduction_index, ) +from semantic_kernel.contents.utils.author_role import AuthorRole from semantic_kernel.utils.feature_stage_decorator import experimental logger = logging.getLogger(__name__) @@ -45,7 +45,28 @@ async def reduce(self) -> Self | None: logger.info("Performing chat history truncation check...") - truncation_index = locate_safe_reduction_index(history, self.target_count, self.threshold_count) + # Preserve system/developer messages so they are not lost during truncation. + # This matches the .NET SDK behavior where system messages are always retained. + # Only the first system/developer message is preserved; this mirrors .NET semantics. + system_message_index = next( + (i for i, msg in enumerate(history) if msg.role in (AuthorRole.SYSTEM, AuthorRole.DEVELOPER)), + -1, + ) + system_message = history[system_message_index] if system_message_index >= 0 else None + + # Only adjust target_count if the system message would be truncated away + # (i.e., it falls before the naive tail). If the system message is already in the + # retained portion, no adjustment is needed — it naturally occupies a slot. + system_would_be_truncated = ( + system_message is not None and system_message_index < len(history) - self.target_count + ) + + truncation_index = locate_safe_reduction_index( + history, + self.target_count, + self.threshold_count, + has_system_message=system_would_be_truncated, + ) if truncation_index is None: logger.info( f"No truncation index found. Target count: {self.target_count}, Threshold: {self.threshold_count}" @@ -53,7 +74,13 @@ async def reduce(self) -> Self | None: return None logger.info(f"Truncating history to {truncation_index} messages.") - truncated_list = extract_range(history, start=truncation_index) + truncated_list = history[truncation_index:] + + # Prepend the system/developer message if it was truncated away. + # Use identity comparison (is) to avoid false matches from value-equal messages. + if system_message is not None and all(msg is not system_message for msg in truncated_list): + truncated_list = [system_message, *truncated_list] + self.messages = truncated_list return self diff --git a/python/tests/unit/contents/test_chat_history_summarization_reducer.py b/python/tests/unit/contents/test_chat_history_summarization_reducer.py index c61d044a9811..3914128ef4b5 100644 --- a/python/tests/unit/contents/test_chat_history_summarization_reducer.py +++ b/python/tests/unit/contents/test_chat_history_summarization_reducer.py @@ -226,3 +226,89 @@ async def test_summarization_reducer_private_summarize(mock_service): actual_summary = await reducer._summarize(chat_messages) assert actual_summary is not None, "We should get a summary message back." assert actual_summary.content == "Mock Summary", "We expect the mock summary content." + + +async def test_summarization_preserves_system_message(mock_service): + """Verify that the summarization reducer preserves system messages.""" + messages = [ + ChatMessageContent(role=AuthorRole.SYSTEM, content="Important system prompt"), + ChatMessageContent(role=AuthorRole.USER, content="User says hello"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds"), + ChatMessageContent(role=AuthorRole.USER, content="User says more"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"), + ChatMessageContent(role=AuthorRole.USER, content="User says even more"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds yet again"), + ] + + reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, threshold_count=0) + reducer.messages = messages + + summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Summary of conversation.") + mock_service.get_chat_message_content.return_value = summary_content + mock_service.get_prompt_execution_settings_from_settings.return_value = PromptExecutionSettings() + + result = await reducer.reduce() + assert result is not None + # System message must be preserved + roles = [msg.role for msg in result.messages] + assert AuthorRole.SYSTEM in roles, "System message was lost during summarization" + # System message should be first + assert result.messages[0].role == AuthorRole.SYSTEM + assert result.messages[0].content == "Important system prompt" + + +async def test_summarization_does_not_preserve_summary_system_message(mock_service): + """A prior summary with SYSTEM role should NOT be treated as the system prompt to preserve.""" + summary_sys = ChatMessageContent( + role=AuthorRole.SYSTEM, content="Previous summary", metadata={SUMMARY_METADATA_KEY: True} + ) + messages = [ + summary_sys, + ChatMessageContent(role=AuthorRole.USER, content="User says hello"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds"), + ChatMessageContent(role=AuthorRole.USER, content="User says more"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"), + ChatMessageContent(role=AuthorRole.USER, content="User says even more"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds yet again"), + ] + + reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, threshold_count=0) + reducer.messages = messages + + summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="New summary.") + mock_service.get_chat_message_content.return_value = summary_content + mock_service.get_prompt_execution_settings_from_settings.return_value = PromptExecutionSettings() + + result = await reducer.reduce() + assert result is not None + # The old summary-system message should NOT be preserved as a "system prompt" + # It should be replaced by the new summary + for msg in result.messages: + if msg.role == AuthorRole.SYSTEM: + assert msg.metadata.get(SUMMARY_METADATA_KEY) is not True or msg is not summary_sys + + +async def test_summarization_preserves_developer_message(mock_service): + """Verify that developer messages are preserved during summarization.""" + messages = [ + ChatMessageContent(role=AuthorRole.DEVELOPER, content="Developer instructions"), + ChatMessageContent(role=AuthorRole.USER, content="User says hello"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds"), + ChatMessageContent(role=AuthorRole.USER, content="User says more"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds again"), + ChatMessageContent(role=AuthorRole.USER, content="User says even more"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant responds yet again"), + ] + + reducer = ChatHistorySummarizationReducer(service=mock_service, target_count=3, threshold_count=0) + reducer.messages = messages + + summary_content = ChatMessageContent(role=AuthorRole.ASSISTANT, content="Summary of conversation.") + mock_service.get_chat_message_content.return_value = summary_content + mock_service.get_prompt_execution_settings_from_settings.return_value = PromptExecutionSettings() + + result = await reducer.reduce() + assert result is not None + # Developer message must be preserved + assert result.messages[0].role == AuthorRole.DEVELOPER + assert result.messages[0].content == "Developer instructions" diff --git a/python/tests/unit/contents/test_chat_history_truncation_reducer.py b/python/tests/unit/contents/test_chat_history_truncation_reducer.py index 57b4f33ede48..de866708b686 100644 --- a/python/tests/unit/contents/test_chat_history_truncation_reducer.py +++ b/python/tests/unit/contents/test_chat_history_truncation_reducer.py @@ -1,10 +1,13 @@ # Copyright (c) Microsoft. All rights reserved. +import logging + import pytest from semantic_kernel.contents.chat_message_content import ChatMessageContent from semantic_kernel.contents.function_call_content import FunctionCallContent from semantic_kernel.contents.function_result_content import FunctionResultContent +from semantic_kernel.contents.history_reducer.chat_history_reducer_utils import locate_safe_reduction_index from semantic_kernel.contents.history_reducer.chat_history_truncation_reducer import ChatHistoryTruncationReducer from semantic_kernel.contents.utils.author_role import AuthorRole @@ -83,11 +86,13 @@ async def test_truncation_reducer_truncation(chat_messages): reducer = ChatHistoryTruncationReducer(target_count=2) reducer.messages = chat_messages result = await reducer.reduce() - # We expect only 2 messages remain after truncation + # We expect 2 messages: system message is preserved + 1 conversation message + # (target_count=2 includes the system message, matching .NET SDK behavior) assert result is not None assert len(result) == 2 - # They should be the last 4 messages while tool result is not orphaned - assert result[0] == chat_messages[-2] + # System message should be first, followed by the last conversation message + assert result[0] == chat_messages[0] # System message preserved + assert result[0].role == AuthorRole.SYSTEM assert result[1] == chat_messages[-1] @@ -96,12 +101,206 @@ async def test_truncation_reducer_truncation_with_tools(chat_messages_with_tools reducer = ChatHistoryTruncationReducer(target_count=3, threshold_count=0) reducer.messages = chat_messages_with_tools result = await reducer.reduce() - # We expect 4 messages remain after truncation - # Tool results are not orphaned, so we expect to keep them + # We expect 3 messages: system message + last 2 conversation messages + # (target_count=3 includes the system message, matching .NET SDK behavior) + assert result is not None + assert len(result) == 3 + # System message preserved, followed by last user-assistant pair + assert result[0] == chat_messages_with_tools[0] # System message + assert result[0].role == AuthorRole.SYSTEM + assert result[1] == chat_messages_with_tools[-2] # User message 2 + assert result[2] == chat_messages_with_tools[-1] # Assistant message 2 + + +async def test_truncation_preserves_system_message(): + """Verify that the system message is preserved after truncation (issue #12612).""" + reducer = ChatHistoryTruncationReducer( + target_count=2, + system_message="a system message", + ) + reducer.add_message(ChatMessageContent(role=AuthorRole.USER, content="user prompt 1")) + reducer.add_message(ChatMessageContent(role=AuthorRole.ASSISTANT, content="response 1")) + reducer.add_message(ChatMessageContent(role=AuthorRole.USER, content="user prompt 2")) + reducer.add_message(ChatMessageContent(role=AuthorRole.ASSISTANT, content="response 2")) + reducer.add_message(ChatMessageContent(role=AuthorRole.USER, content="user prompt 3")) + reducer.add_message(ChatMessageContent(role=AuthorRole.ASSISTANT, content="response 3")) + reducer.add_message(ChatMessageContent(role=AuthorRole.USER, content="user prompt 4")) + reducer.add_message(ChatMessageContent(role=AuthorRole.ASSISTANT, content="response 4")) + + result = await reducer.reduce() + assert result is not None + + # System message must be present after reduction + roles = [msg.role for msg in result.messages] + assert AuthorRole.SYSTEM in roles, "System message was deleted by the history reducer" + assert result.messages[0].role == AuthorRole.SYSTEM + assert result.messages[0].content == "a system message" + + +async def test_truncation_preserves_developer_message(): + """Verify that developer messages are preserved after truncation.""" + msgs = [ + ChatMessageContent(role=AuthorRole.DEVELOPER, content="Developer instructions."), + ChatMessageContent(role=AuthorRole.USER, content="User message 1"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant message 1"), + ChatMessageContent(role=AuthorRole.USER, content="User message 2"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant message 2"), + ] + reducer = ChatHistoryTruncationReducer(target_count=2) + reducer.messages = msgs + result = await reducer.reduce() + assert result is not None + assert len(result) == 2 + # Developer message should be first + assert result[0].role == AuthorRole.DEVELOPER + assert result[0].content == "Developer instructions." + + +async def test_truncation_target_count_1_with_system_message(): + """With target_count=1 and a system message, reduce to just the system message.""" + reducer = ChatHistoryTruncationReducer(target_count=1, system_message="System prompt") + reducer.add_message(ChatMessageContent(role=AuthorRole.USER, content="Hello")) + reducer.add_message(ChatMessageContent(role=AuthorRole.ASSISTANT, content="Hi")) + reducer.add_message(ChatMessageContent(role=AuthorRole.USER, content="How are you?")) + reducer.add_message(ChatMessageContent(role=AuthorRole.ASSISTANT, content="Good")) + + result = await reducer.reduce() + # Must reduce — history has 5 messages but target is 1 + assert result is not None + # Should contain only the system message + assert len(result.messages) == 1 + assert result.messages[0].role == AuthorRole.SYSTEM + assert result.messages[0].content == "System prompt" + + +async def test_truncation_without_system_message(): + """Verify truncation works correctly when there is no system message.""" + msgs = [ + ChatMessageContent(role=AuthorRole.USER, content="User message 1"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant message 1"), + ChatMessageContent(role=AuthorRole.USER, content="User message 2"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Assistant message 2"), + ] + reducer = ChatHistoryTruncationReducer(target_count=2) + reducer.messages = msgs + result = await reducer.reduce() + assert result is not None + assert len(result) == 2 + assert result[0] == msgs[-2] + assert result[1] == msgs[-1] + + +# --- Direct tests for locate_safe_reduction_index --- + + +def test_locate_safe_reduction_index_with_system_message(): + """Test locate_safe_reduction_index adjusts target_count for system message.""" + history = [ + ChatMessageContent(role=AuthorRole.SYSTEM, content="System"), + ChatMessageContent(role=AuthorRole.USER, content="User 1"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Asst 1"), + ChatMessageContent(role=AuthorRole.USER, content="User 2"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Asst 2"), + ] + # target_count=3 with system message → adjusted to 2, so truncation_index = 5 - 2 = 3 + index = locate_safe_reduction_index(history, target_count=3, has_system_message=True) + assert index is not None + assert index == 3 # Keep last 2 messages, system message re-added by caller + + +def test_locate_safe_reduction_index_without_system_message(): + """Test locate_safe_reduction_index without system message flag.""" + history = [ + ChatMessageContent(role=AuthorRole.USER, content="User 1"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Asst 1"), + ChatMessageContent(role=AuthorRole.USER, content="User 2"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Asst 2"), + ] + # target_count=2, no system message → truncation_index = 4 - 2 = 2 + index = locate_safe_reduction_index(history, target_count=2) + assert index is not None + assert index == 2 + + +def test_locate_safe_reduction_index_target_zero_returns_len(caplog): + """When target_count=1 + has_system_message → adjusted to 0 → returns len(history).""" + history = [ + ChatMessageContent(role=AuthorRole.SYSTEM, content="System"), + ChatMessageContent(role=AuthorRole.USER, content="User 1"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Asst 1"), + ] + with caplog.at_level(logging.WARNING): + index = locate_safe_reduction_index(history, target_count=1, has_system_message=True) + assert index == len(history) + assert "target_count after accounting for system message is 0" in caplog.text + + +def test_locate_safe_reduction_index_no_reduction_needed(): + """When total <= target + threshold, returns None (no reduction needed).""" + history = [ + ChatMessageContent(role=AuthorRole.USER, content="User 1"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Asst 1"), + ] + index = locate_safe_reduction_index(history, target_count=2, threshold_count=0) + assert index is None + + +# --- Multiple system/developer messages --- + + +async def test_truncation_multiple_system_messages(): + """Only the first system/developer message is preserved; others may be lost.""" + msgs = [ + ChatMessageContent(role=AuthorRole.SYSTEM, content="System prompt 1"), + ChatMessageContent(role=AuthorRole.DEVELOPER, content="Developer instructions"), + ChatMessageContent(role=AuthorRole.USER, content="User 1"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Asst 1"), + ChatMessageContent(role=AuthorRole.USER, content="User 2"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Asst 2"), + ] + reducer = ChatHistoryTruncationReducer(target_count=2) + reducer.messages = msgs + result = await reducer.reduce() + assert result is not None + # First system message is preserved + assert result[0].role == AuthorRole.SYSTEM + assert result[0].content == "System prompt 1" + + +# --- System message in retained tail (not truncated away) --- + + +async def test_truncation_system_message_in_retained_tail(): + """When system message falls within the retained portion, it is not duplicated.""" + msgs = [ + ChatMessageContent(role=AuthorRole.USER, content="User 1"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Asst 1"), + ChatMessageContent(role=AuthorRole.USER, content="User 2"), + ChatMessageContent(role=AuthorRole.SYSTEM, content="Late system msg"), + ChatMessageContent(role=AuthorRole.ASSISTANT, content="Asst 2"), + ] + # target_count=3, system at index 3 is within the naive tail (indices 2..4). + # No target_count adjustment needed — system message stays in its natural position. + reducer = ChatHistoryTruncationReducer(target_count=3) + reducer.messages = msgs + result = await reducer.reduce() + assert result is not None + # Should have exactly target_count messages, with system message not duplicated + assert len(result.messages) == 3 + system_msgs = [m for m in result.messages if m.role == AuthorRole.SYSTEM] + assert len(system_msgs) == 1 + + +# --- Warning log test --- + + +async def test_truncation_target_1_logs_warning(caplog): + """Verify a warning is logged when target_count=1 with a system message.""" + reducer = ChatHistoryTruncationReducer(target_count=1, system_message="System") + reducer.add_message(ChatMessageContent(role=AuthorRole.USER, content="Hello")) + reducer.add_message(ChatMessageContent(role=AuthorRole.ASSISTANT, content="Hi")) + + with caplog.at_level(logging.WARNING): + result = await reducer.reduce() assert result is not None - assert len(result) == 4 - # They should be the last 4 messages - assert result[0] == chat_messages_with_tools[-4] - assert result[1] == chat_messages_with_tools[-3] - assert result[2] == chat_messages_with_tools[-2] - assert result[3] == chat_messages_with_tools[-1] + assert "target_count after accounting for system message is 0" in caplog.text