From 9d4e9af5af6e0ffe68e37e1132fde9ce6e9dbcf2 Mon Sep 17 00:00:00 2001
From: Josh Park <50765702+JoshParkSJ@users.noreply.github.com>
Date: Tue, 24 Feb 2026 14:27:54 -0500
Subject: [PATCH] feat: convert DeepRag citations to inline tags
for conversational agents
---
pyproject.toml | 2 +-
src/uipath_langchain/agent/react/agent.py | 12 +
.../agent/tools/context_tool.py | 1 +
src/uipath_langchain/runtime/_citations.py | 140 +++++-
src/uipath_langchain/runtime/messages.py | 7 +-
tests/runtime/chat_message_mapper.py | 72 +++
tests/runtime/test_citations.py | 432 ++++++++++++++++++
uv.lock | 2 +-
8 files changed, 642 insertions(+), 26 deletions(-)
diff --git a/pyproject.toml b/pyproject.toml
index 74c355f53..78c7f3b49 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "uipath-langchain"
-version = "0.7.2"
+version = "0.7.3"
description = "Python SDK that enables developers to build and deploy LangGraph agents to the UiPath Cloud Platform"
readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.11"
diff --git a/src/uipath_langchain/agent/react/agent.py b/src/uipath_langchain/agent/react/agent.py
index 15b4e9019..96c8750f1 100644
--- a/src/uipath_langchain/agent/react/agent.py
+++ b/src/uipath_langchain/agent/react/agent.py
@@ -6,9 +6,12 @@
from langgraph.constants import END, START
from langgraph.graph import StateGraph
from pydantic import BaseModel
+from uipath.platform.context_grounding import DeepRagContent
from uipath.platform.guardrails import BaseGuardrail
+from ...runtime._citations import cas_deep_rag_citation_wrapper
from ..guardrails.actions import GuardrailAction
+from ..tools.structured_tool_with_output_type import StructuredToolWithOutputType
from .guardrails.guardrails_subgraph import (
create_agent_init_guardrails_subgraph,
create_agent_terminate_guardrails_subgraph,
@@ -74,6 +77,15 @@ def create_agent(
init_node = create_init_node(messages, input_schema, config.is_conversational)
tool_nodes = create_tool_node(agent_tools)
+
+ # for conversational agents we transform deeprag's citation format into cas's
+ if config.is_conversational:
+ for node in tool_nodes.values():
+ if isinstance(node.tool, StructuredToolWithOutputType) and issubclass(
+ node.tool.output_type, DeepRagContent
+ ):
+ node.awrapper = cas_deep_rag_citation_wrapper
+
tool_nodes_with_guardrails = create_tools_guardrails_subgraph(
tool_nodes, guardrails, input_schema=input_schema
)
diff --git a/src/uipath_langchain/agent/tools/context_tool.py b/src/uipath_langchain/agent/tools/context_tool.py
index a82ab81f7..d574a6f60 100644
--- a/src/uipath_langchain/agent/tools/context_tool.py
+++ b/src/uipath_langchain/agent/tools/context_tool.py
@@ -188,6 +188,7 @@ async def create_deep_rag():
index_name=index_name,
prompt=actual_prompt,
citation_mode=citation_mode,
+ index_folder_path=resource.folder_path,
)
return await create_deep_rag()
diff --git a/src/uipath_langchain/runtime/_citations.py b/src/uipath_langchain/runtime/_citations.py
index 412126f4f..7032687f3 100644
--- a/src/uipath_langchain/runtime/_citations.py
+++ b/src/uipath_langchain/runtime/_citations.py
@@ -1,11 +1,16 @@
from __future__ import annotations
+import json
import logging
import re
from dataclasses import dataclass
+from typing import Any
from uuid import uuid4
+from langchain_core.messages.tool import ToolCall
+from langchain_core.tools import BaseTool
from uipath.core.chat import (
+ UiPathConversationCitationData,
UiPathConversationCitationEndEvent,
UiPathConversationCitationEvent,
UiPathConversationCitationSourceMedia,
@@ -80,6 +85,37 @@ def _parse_citations(text: str) -> list[tuple[str, _ParsedCitation | None]]:
return segments
+def _make_source(
+ citation: _ParsedCitation,
+ source_numbers: dict[_ParsedCitation, int],
+ next_number: int,
+) -> tuple[
+ UiPathConversationCitationSourceUrl | UiPathConversationCitationSourceMedia, int
+]:
+ """Build a citation source, deduplicating by assigning numbers"""
+ if citation not in source_numbers:
+ source_numbers[citation] = next_number
+ next_number += 1
+ number = source_numbers[citation]
+
+ source: UiPathConversationCitationSourceUrl | UiPathConversationCitationSourceMedia
+ if citation.url is not None:
+ source = UiPathConversationCitationSourceUrl(
+ title=citation.title,
+ number=number,
+ url=citation.url,
+ )
+ else:
+ source = UiPathConversationCitationSourceMedia(
+ title=citation.title,
+ number=number,
+ mime_type=None,
+ download_url=citation.reference,
+ page_number=citation.page_number,
+ )
+ return source, next_number
+
+
def _find_partial_tag_start(text: str) -> int:
_TAG_PREFIX = " list[UiPathConversationContentPartChunkEvent]:
self._buffer = ""
return self._process_segments(remaining)
+
+
+def extract_citations_from_text(
+ text: str,
+) -> tuple[str, list[UiPathConversationCitationData]]:
+ """Parse inline tags from text and return cleaned text with structured citations."""
+ segments = _parse_citations(text)
+ if not segments:
+ return (text, [])
+
+ source_numbers: dict[_ParsedCitation, int] = {}
+ next_number = 1
+ cleaned_parts: list[str] = []
+ citations: list[UiPathConversationCitationData] = []
+ offset = 0
+
+ for segment_text, citation in segments:
+ cleaned_parts.append(segment_text)
+ length = len(segment_text)
+
+ if citation is not None:
+ source, next_number = _make_source(citation, source_numbers, next_number)
+ if length > 0:
+ citations.append(
+ UiPathConversationCitationData(
+ offset=offset,
+ length=length,
+ sources=[source],
+ )
+ )
+ elif citations:
+ # Back-to-back citation with no preceding text:
+ # merge into the previous citation's sources (one citation data with two sources)
+ citations[-1].sources.append(source)
+
+ offset += length
+
+ return ("".join(cleaned_parts), citations)
+
+
+def _escape_attr(value: str) -> str:
+ """Escape only characters that would break XML attribute parsing."""
+ return value.replace('"', """)
+
+
+def convert_citations_to_inline_tags(content: dict[str, Any]) -> str:
+ """Replace [ordinal] references in DeepRag text with tags."""
+ text = content.get("text", "")
+ citations = content.get("citations", [])
+
+ citation_map: dict[int, dict[str, Any]] = {}
+ for c in citations:
+ ordinal = c.get("ordinal")
+ if ordinal is not None:
+ citation_map[ordinal] = c
+
+ for ordinal, c in citation_map.items():
+ title = _escape_attr(str(c.get("source", "")))
+ reference = _escape_attr(str(c.get("reference", "")))
+ page_number = _escape_attr(str(c.get("pageNumber", c.get("page_number", ""))))
+ tag = (
+ f''
+ )
+ text = text.replace(f"[{ordinal}]", tag)
+
+ return text
+
+
+async def cas_deep_rag_citation_wrapper(tool: BaseTool, call: ToolCall):
+ """Transform DeepRag results into CAS's inline tags."""
+ result = await tool.ainvoke(call)
+ try:
+ data = json.loads(result.content)
+ result.content = json.dumps({"text": convert_citations_to_inline_tags(data)})
+ except Exception:
+ logger.warning(
+ "Failed to transform DeepRag citations, returning raw result", exc_info=True
+ )
+ return result
diff --git a/src/uipath_langchain/runtime/messages.py b/src/uipath_langchain/runtime/messages.py
index 53dce14cc..ed44f1ee8 100644
--- a/src/uipath_langchain/runtime/messages.py
+++ b/src/uipath_langchain/runtime/messages.py
@@ -39,7 +39,7 @@
)
from uipath.runtime import UiPathRuntimeStorageProtocol
-from ._citations import CitationStreamProcessor
+from ._citations import CitationStreamProcessor, extract_citations_from_text
logger = logging.getLogger(__name__)
@@ -626,11 +626,12 @@ def _map_langchain_ai_message_to_uipath_message_data(
content_parts: list[UiPathConversationContentPartData] = []
text_content = UiPathChatMessagesMapper._extract_text(message.content)
if text_content:
+ cleaned_text, citations = extract_citations_from_text(text_content)
content_parts.append(
UiPathConversationContentPartData(
mime_type="text/markdown",
- data=UiPathInlineValue(inline=text_content),
- citations=[], # TODO: Citations
+ data=UiPathInlineValue(inline=cleaned_text),
+ citations=citations,
)
)
diff --git a/tests/runtime/chat_message_mapper.py b/tests/runtime/chat_message_mapper.py
index 2f3c7f332..6c75f8770 100644
--- a/tests/runtime/chat_message_mapper.py
+++ b/tests/runtime/chat_message_mapper.py
@@ -12,6 +12,8 @@
ToolMessage,
)
from uipath.core.chat import (
+ UiPathConversationCitationSourceMedia,
+ UiPathConversationCitationSourceUrl,
UiPathConversationContentPart,
UiPathConversationMessage,
UiPathExternalValue,
@@ -1618,3 +1620,73 @@ def test_extracts_text_from_content_blocks(self):
assert len(result[0].content_parts) == 1
assert isinstance(result[0].content_parts[0].data, UiPathInlineValue)
assert result[0].content_parts[0].data.inline == "first part second part"
+
+
+class TestMapLangChainAIMessageCitations:
+ """Tests for citation extraction in _map_langchain_ai_message_to_uipath_message_data."""
+
+ def test_ai_message_with_citation_tags_populates_citations(self):
+ """AIMessage with inline citation tags should have citations populated and text cleaned."""
+ messages: list[AnyMessage] = [
+ AIMessage(
+ content='Some fact and more.'
+ )
+ ]
+
+ result = (
+ UiPathChatMessagesMapper.map_langchain_messages_to_uipath_message_data_list(
+ messages
+ )
+ )
+
+ assert len(result) == 1
+ part = result[0].content_parts[0]
+ assert isinstance(part.data, UiPathInlineValue)
+ assert part.data.inline == "Some fact and more."
+ assert len(part.citations) == 1
+ assert part.citations[0].offset == 0
+ assert part.citations[0].length == 9 # "Some fact"
+ source = part.citations[0].sources[0]
+ assert isinstance(source, UiPathConversationCitationSourceUrl)
+ assert source.url == "https://doc.com"
+ assert source.title == "Doc"
+
+ def test_ai_message_without_citation_tags_has_empty_citations(self):
+ """AIMessage without citation tags should have empty citations list."""
+ messages: list[AnyMessage] = [AIMessage(content="Plain text response")]
+
+ result = (
+ UiPathChatMessagesMapper.map_langchain_messages_to_uipath_message_data_list(
+ messages
+ )
+ )
+
+ assert len(result) == 1
+ part = result[0].content_parts[0]
+ assert isinstance(part.data, UiPathInlineValue)
+ assert part.data.inline == "Plain text response"
+ assert part.citations == []
+
+ def test_ai_message_with_media_citation(self):
+ """AIMessage with reference/media citation tag should produce media source."""
+ messages: list[AnyMessage] = [
+ AIMessage(
+ content='A finding'
+ )
+ ]
+
+ result = (
+ UiPathChatMessagesMapper.map_langchain_messages_to_uipath_message_data_list(
+ messages
+ )
+ )
+
+ assert len(result) == 1
+ part = result[0].content_parts[0]
+ assert isinstance(part.data, UiPathInlineValue)
+ assert part.data.inline == "A finding"
+ assert len(part.citations) == 1
+ source = part.citations[0].sources[0]
+ assert isinstance(source, UiPathConversationCitationSourceMedia)
+ assert source.download_url == "https://r.com"
+ assert source.page_number == "3"
diff --git a/tests/runtime/test_citations.py b/tests/runtime/test_citations.py
index 3bb6cbcf7..99ea08f64 100644
--- a/tests/runtime/test_citations.py
+++ b/tests/runtime/test_citations.py
@@ -1,6 +1,11 @@
"""Tests for the CitationStreamProcessor and citation parsing utilities."""
# mypy: disable-error-code="union-attr,operator"
+import json
+from unittest.mock import AsyncMock
+
+import pytest
+from langchain_core.messages.tool import ToolCall, ToolMessage
from uipath.core.chat import (
UiPathConversationCitationSourceMedia,
UiPathConversationCitationSourceUrl,
@@ -9,6 +14,9 @@
from uipath_langchain.runtime._citations import (
CitationStreamProcessor,
_find_partial_tag_start,
+ cas_deep_rag_citation_wrapper,
+ convert_citations_to_inline_tags,
+ extract_citations_from_text,
)
@@ -533,3 +541,427 @@ def test_uip_prefix_followed_by_citation_single_chunk(self):
assert len(cited) == 1
assert cited[0].data == "'
+ ' and second'
+ )
+ cleaned, citations = extract_citations_from_text(text)
+ assert cleaned == "First and second"
+ assert len(citations) == 2
+ # First citation: "First" at offset 0, length 5
+ assert citations[0].offset == 0
+ assert citations[0].length == 5
+ assert citations[0].sources[0].url == "https://a.com"
+ # Second citation: " and second" at offset 5, length 11
+ assert citations[1].offset == 5
+ assert citations[1].length == 11
+ assert citations[1].sources[0].url == "https://b.com"
+
+ def test_duplicate_sources_same_number(self):
+ """Duplicate sources (same title+url) get the same number."""
+ text = (
+ 'A'
+ 'B'
+ )
+ cleaned, citations = extract_citations_from_text(text)
+ assert cleaned == "AB"
+ assert len(citations) == 2
+ assert citations[0].sources[0].number == citations[1].sources[0].number
+
+ def test_back_to_back_citations_merged_into_previous(self):
+ """Back-to-back citations merge the second source into the previous citation."""
+ text = (
+ 'Text'
+ ''
+ )
+ cleaned, citations = extract_citations_from_text(text)
+ assert cleaned == "Text"
+ assert len(citations) == 1
+ assert len(citations[0].sources) == 2
+ assert citations[0].sources[0].url == "https://a.com"
+ assert citations[0].sources[1].url == "https://b.com"
+
+ def test_three_back_to_back_citations(self):
+ """Three back-to-back citations all merge into one citation with three sources."""
+ text = (
+ 'Answer'
+ ''
+ ''
+ )
+ cleaned, citations = extract_citations_from_text(text)
+ assert cleaned == "Answer"
+ assert len(citations) == 1
+ assert len(citations[0].sources) == 3
+ assert citations[0].sources[0].title == "A"
+ assert citations[0].sources[1].title == "B"
+ assert citations[0].sources[2].title == "C"
+
+ def test_back_to_back_citations_at_start_with_no_preceding_text(self):
+ """Back-to-back citations at the very start (no preceding text) are all dropped."""
+ text = (
+ ''
+ ''
+ )
+ cleaned, citations = extract_citations_from_text(text)
+ assert cleaned == ""
+ # First citation has no preceding text and no previous citation to merge into
+ # Second citation also has no preceding text but merges into the first
+ # Both end up dropped since neither has a text span
+ assert len(citations) == 0
+
+ def test_empty_text(self):
+ """Empty string returns empty text and no citations."""
+ cleaned, citations = extract_citations_from_text("")
+ assert cleaned == ""
+ assert citations == []
+
+ def test_text_with_trailing_content(self):
+ """Citation in middle of text, trailing text preserved."""
+ text = 'A fact and more text.'
+ cleaned, citations = extract_citations_from_text(text)
+ assert cleaned == "A fact and more text."
+ assert len(citations) == 1
+ assert citations[0].offset == 0
+ assert citations[0].length == 6 # len("A fact")
+
+ def test_different_sources_get_different_numbers(self):
+ """Different sources get incrementing numbers."""
+ text = (
+ 'A'
+ 'B'
+ )
+ cleaned, citations = extract_citations_from_text(text)
+ assert citations[0].sources[0].number == 1
+ assert citations[1].sources[0].number == 2
+
+
+class TestConvertCitationsToInlineTags:
+ """Test cases for convert_citations_to_inline_tags function."""
+
+ def test_basic_replacement(self):
+ """Test basic [1] and [2] replacement with tags."""
+ content = {
+ "text": "Fact A [1] and fact B [2].",
+ "citations": [
+ {
+ "ordinal": 1,
+ "pageNumber": 3,
+ "source": "Report.pdf",
+ "reference": "https://example.com/ref1",
+ },
+ {
+ "ordinal": 2,
+ "pageNumber": 7,
+ "source": "Manual.pdf",
+ "reference": "https://example.com/ref2",
+ },
+ ],
+ }
+
+ result = convert_citations_to_inline_tags(content)
+
+ assert "[1]" not in result
+ assert "[2]" not in result
+ assert (
+ ''
+ in result
+ )
+ assert (
+ ''
+ in result
+ )
+
+ def test_same_ordinal_used_twice(self):
+ """Test that the same ordinal appearing twice in text is replaced in both places."""
+ content = {
+ "text": "First mention [1] and second mention [1].",
+ "citations": [
+ {
+ "ordinal": 1,
+ "pageNumber": 5,
+ "source": "Doc.pdf",
+ "reference": "https://example.com/doc",
+ },
+ ],
+ }
+
+ result = convert_citations_to_inline_tags(content)
+
+ assert "[1]" not in result
+ assert result.count('' in result
+
+ def test_back_to_back_ordinals(self):
+ """Test [1][2] back-to-back at end of text are both converted."""
+ content = {
+ "text": "Answer [1][2]",
+ "citations": [
+ {
+ "ordinal": 1,
+ "pageNumber": 1,
+ "source": "A.pdf",
+ "reference": "https://a.com",
+ },
+ {
+ "ordinal": 2,
+ "pageNumber": 5,
+ "source": "B.pdf",
+ "reference": "https://b.com",
+ },
+ ],
+ }
+
+ result = convert_citations_to_inline_tags(content)
+
+ assert "[1]" not in result
+ assert "[2]" not in result
+ assert '