Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "uipath-langchain"
version = "0.7.2"
version = "0.7.3"
description = "Python SDK that enables developers to build and deploy LangGraph agents to the UiPath Cloud Platform"
readme = { file = "README.md", content-type = "text/markdown" }
requires-python = ">=3.11"
Expand Down
12 changes: 12 additions & 0 deletions src/uipath_langchain/agent/react/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
from langgraph.constants import END, START
from langgraph.graph import StateGraph
from pydantic import BaseModel
from uipath.platform.context_grounding import DeepRagContent
from uipath.platform.guardrails import BaseGuardrail

from ...runtime._citations import cas_deep_rag_citation_wrapper
from ..guardrails.actions import GuardrailAction
from ..tools.structured_tool_with_output_type import StructuredToolWithOutputType
from .guardrails.guardrails_subgraph import (
create_agent_init_guardrails_subgraph,
create_agent_terminate_guardrails_subgraph,
Expand Down Expand Up @@ -74,6 +77,15 @@ def create_agent(
init_node = create_init_node(messages, input_schema, config.is_conversational)

tool_nodes = create_tool_node(agent_tools)

# for conversational agents we transform deeprag's citation format into cas's
if config.is_conversational:
for node in tool_nodes.values():
if isinstance(node.tool, StructuredToolWithOutputType) and issubclass(
node.tool.output_type, DeepRagContent
):
node.awrapper = cas_deep_rag_citation_wrapper

tool_nodes_with_guardrails = create_tools_guardrails_subgraph(
tool_nodes, guardrails, input_schema=input_schema
)
Expand Down
1 change: 1 addition & 0 deletions src/uipath_langchain/agent/tools/context_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@ async def create_deep_rag():
index_name=index_name,
prompt=actual_prompt,
citation_mode=citation_mode,
index_folder_path=resource.folder_path,
)

return await create_deep_rag()
Expand Down
140 changes: 119 additions & 21 deletions src/uipath_langchain/runtime/_citations.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
from __future__ import annotations

import json
import logging
import re
from dataclasses import dataclass
from typing import Any
from uuid import uuid4

from langchain_core.messages.tool import ToolCall
from langchain_core.tools import BaseTool
from uipath.core.chat import (
UiPathConversationCitationData,
UiPathConversationCitationEndEvent,
UiPathConversationCitationEvent,
UiPathConversationCitationSourceMedia,
Expand Down Expand Up @@ -80,6 +85,37 @@ def _parse_citations(text: str) -> list[tuple[str, _ParsedCitation | None]]:
return segments


def _make_source(
citation: _ParsedCitation,
source_numbers: dict[_ParsedCitation, int],
next_number: int,
) -> tuple[
UiPathConversationCitationSourceUrl | UiPathConversationCitationSourceMedia, int
]:
"""Build a citation source, deduplicating by assigning numbers"""
if citation not in source_numbers:
source_numbers[citation] = next_number
next_number += 1
number = source_numbers[citation]

source: UiPathConversationCitationSourceUrl | UiPathConversationCitationSourceMedia
if citation.url is not None:
source = UiPathConversationCitationSourceUrl(
title=citation.title,
number=number,
url=citation.url,
)
else:
source = UiPathConversationCitationSourceMedia(
title=citation.title,
number=number,
mime_type=None,
download_url=citation.reference,
page_number=citation.page_number,
)
return source, next_number


def _find_partial_tag_start(text: str) -> int:
_TAG_PREFIX = "<uip:cite "

Expand Down Expand Up @@ -120,28 +156,9 @@ def _build_content_part_citation(
if citation is None:
return UiPathConversationContentPartChunkEvent(data=text)

if citation not in self._source_numbers:
self._source_numbers[citation] = self._next_number
self._next_number += 1
number = self._source_numbers[citation]

source: (
UiPathConversationCitationSourceUrl | UiPathConversationCitationSourceMedia
source, self._next_number = _make_source(
citation, self._source_numbers, self._next_number
)
if citation.url is not None:
source = UiPathConversationCitationSourceUrl(
title=citation.title,
number=number,
url=citation.url,
)
else:
source = UiPathConversationCitationSourceMedia(
title=citation.title,
number=number,
mime_type=None,
download_url=citation.reference,
page_number=citation.page_number,
)

return UiPathConversationContentPartChunkEvent(
data=text,
Expand Down Expand Up @@ -197,3 +214,84 @@ def finalize(self) -> list[UiPathConversationContentPartChunkEvent]:
self._buffer = ""

return self._process_segments(remaining)


def extract_citations_from_text(
text: str,
) -> tuple[str, list[UiPathConversationCitationData]]:
"""Parse inline <uip:cite .../> tags from text and return cleaned text with structured citations."""
segments = _parse_citations(text)
if not segments:
return (text, [])

source_numbers: dict[_ParsedCitation, int] = {}
next_number = 1
cleaned_parts: list[str] = []
citations: list[UiPathConversationCitationData] = []
offset = 0

for segment_text, citation in segments:
cleaned_parts.append(segment_text)
length = len(segment_text)

if citation is not None:
source, next_number = _make_source(citation, source_numbers, next_number)
if length > 0:
citations.append(
UiPathConversationCitationData(
offset=offset,
length=length,
sources=[source],
)
)
elif citations:
# Back-to-back citation with no preceding text:
# merge into the previous citation's sources (one citation data with two sources)
citations[-1].sources.append(source)

offset += length

return ("".join(cleaned_parts), citations)


def _escape_attr(value: str) -> str:
"""Escape only characters that would break XML attribute parsing."""
return value.replace('"', "&quot;")


def convert_citations_to_inline_tags(content: dict[str, Any]) -> str:
"""Replace [ordinal] references in DeepRag text with <uip:cite/> tags."""
text = content.get("text", "")
citations = content.get("citations", [])

citation_map: dict[int, dict[str, Any]] = {}
for c in citations:
ordinal = c.get("ordinal")
if ordinal is not None:
citation_map[ordinal] = c

for ordinal, c in citation_map.items():
title = _escape_attr(str(c.get("source", "")))
reference = _escape_attr(str(c.get("reference", "")))
page_number = _escape_attr(str(c.get("pageNumber", c.get("page_number", ""))))
tag = (
f'<uip:cite title="{title}" '
f'reference="{reference}" '
f'page_number="{page_number}" />'
)
text = text.replace(f"[{ordinal}]", tag)

return text


async def cas_deep_rag_citation_wrapper(tool: BaseTool, call: ToolCall):
"""Transform DeepRag results into CAS's inline <uip:cite/> tags."""
result = await tool.ainvoke(call)
try:
data = json.loads(result.content)
result.content = json.dumps({"text": convert_citations_to_inline_tags(data)})
except Exception:
logger.warning(
"Failed to transform DeepRag citations, returning raw result", exc_info=True
)
return result
Copy link
Contributor Author

@JoshParkSJ JoshParkSJ Feb 24, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

going with wrapper instead of passing in is_conversational down tool creation pipeline (which means updating params for 3 methods) - also follows existing batch_transform wrapper pattern

7 changes: 4 additions & 3 deletions src/uipath_langchain/runtime/messages.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
)
from uipath.runtime import UiPathRuntimeStorageProtocol

from ._citations import CitationStreamProcessor
from ._citations import CitationStreamProcessor, extract_citations_from_text

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -626,11 +626,12 @@ def _map_langchain_ai_message_to_uipath_message_data(
content_parts: list[UiPathConversationContentPartData] = []
text_content = UiPathChatMessagesMapper._extract_text(message.content)
if text_content:
cleaned_text, citations = extract_citations_from_text(text_content)
content_parts.append(
UiPathConversationContentPartData(
mime_type="text/markdown",
data=UiPathInlineValue(inline=text_content),
citations=[], # TODO: Citations
data=UiPathInlineValue(inline=cleaned_text),
citations=citations,
)
)

Expand Down
72 changes: 72 additions & 0 deletions tests/runtime/chat_message_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
ToolMessage,
)
from uipath.core.chat import (
UiPathConversationCitationSourceMedia,
UiPathConversationCitationSourceUrl,
UiPathConversationContentPart,
UiPathConversationMessage,
UiPathExternalValue,
Expand Down Expand Up @@ -1618,3 +1620,73 @@ def test_extracts_text_from_content_blocks(self):
assert len(result[0].content_parts) == 1
assert isinstance(result[0].content_parts[0].data, UiPathInlineValue)
assert result[0].content_parts[0].data.inline == "first part second part"


class TestMapLangChainAIMessageCitations:
"""Tests for citation extraction in _map_langchain_ai_message_to_uipath_message_data."""

def test_ai_message_with_citation_tags_populates_citations(self):
"""AIMessage with inline citation tags should have citations populated and text cleaned."""
messages: list[AnyMessage] = [
AIMessage(
content='Some fact<uip:cite title="Doc" url="https://doc.com" /> and more.'
)
]

result = (
UiPathChatMessagesMapper.map_langchain_messages_to_uipath_message_data_list(
messages
)
)

assert len(result) == 1
part = result[0].content_parts[0]
assert isinstance(part.data, UiPathInlineValue)
assert part.data.inline == "Some fact and more."
assert len(part.citations) == 1
assert part.citations[0].offset == 0
assert part.citations[0].length == 9 # "Some fact"
source = part.citations[0].sources[0]
assert isinstance(source, UiPathConversationCitationSourceUrl)
assert source.url == "https://doc.com"
assert source.title == "Doc"

def test_ai_message_without_citation_tags_has_empty_citations(self):
"""AIMessage without citation tags should have empty citations list."""
messages: list[AnyMessage] = [AIMessage(content="Plain text response")]

result = (
UiPathChatMessagesMapper.map_langchain_messages_to_uipath_message_data_list(
messages
)
)

assert len(result) == 1
part = result[0].content_parts[0]
assert isinstance(part.data, UiPathInlineValue)
assert part.data.inline == "Plain text response"
assert part.citations == []

def test_ai_message_with_media_citation(self):
"""AIMessage with reference/media citation tag should produce media source."""
messages: list[AnyMessage] = [
AIMessage(
content='A finding<uip:cite title="Report.pdf" reference="https://r.com" page_number="3" />'
)
]

result = (
UiPathChatMessagesMapper.map_langchain_messages_to_uipath_message_data_list(
messages
)
)

assert len(result) == 1
part = result[0].content_parts[0]
assert isinstance(part.data, UiPathInlineValue)
assert part.data.inline == "A finding"
assert len(part.citations) == 1
source = part.citations[0].sources[0]
assert isinstance(source, UiPathConversationCitationSourceMedia)
assert source.download_url == "https://r.com"
assert source.page_number == "3"
Loading