Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 35 additions & 20 deletions langfuse/_utils/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,21 @@ class Serializable: # type: ignore


class EventSerializer(JSONEncoder):
_MAX_DEPTH = 20

def __init__(self, *args: Any, **kwargs: Any) -> None:
super().__init__(*args, **kwargs)
self.seen: set[int] = set() # Track seen objects to detect circular references
self._depth = 0

def default(self, obj: Any) -> Any:
self._depth += 1
try:
return self._default_inner(obj)
finally:
self._depth -= 1

def _default_inner(self, obj: Any) -> Any:
try:
if isinstance(obj, (datetime)):
# Timezone-awareness check
Expand All @@ -66,7 +76,7 @@ def default(self, obj: Any) -> Any:
return "NaN"

if isinstance(obj, float) and math.isinf(obj):
return "Infinity"
return "-Infinity" if obj < 0 else "Infinity"

if isinstance(obj, (Exception, KeyboardInterrupt)):
return f"{type(obj).__name__}: {str(obj)}"
Expand All @@ -82,9 +92,6 @@ def default(self, obj: Any) -> Any:
if isinstance(obj, Queue):
return type(obj).__name__

if is_dataclass(obj):
return asdict(obj) # type: ignore

if isinstance(obj, UUID):
return str(obj)

Expand All @@ -97,22 +104,9 @@ def default(self, obj: Any) -> Any:
if isinstance(obj, (date)):
return obj.isoformat()

if isinstance(obj, BaseModel):
obj.model_rebuild()

# For LlamaIndex models, we need to rebuild the raw model as well if they include OpenAI models
if isinstance(raw := getattr(obj, "raw", None), BaseModel):
raw.model_rebuild()

return obj.model_dump()

if isinstance(obj, Path):
return str(obj)

# if langchain is not available, the Serializable type is NoneType
if Serializable is not type(None) and isinstance(obj, Serializable): # type: ignore
return obj.to_json()

# 64-bit integers might overflow the JavaScript safe integer range.
# Since Node.js is run on the server that handles the serialized value,
# we need to ensure that integers outside the safe range are converted to strings.
Expand All @@ -123,6 +117,25 @@ def default(self, obj: Any) -> Any:
if isinstance(obj, (str, float, type(None))):
return obj

if self._depth >= self._MAX_DEPTH:
return f"<{type(obj).__name__}>"

if is_dataclass(obj):
return asdict(obj) # type: ignore

if isinstance(obj, BaseModel):
obj.model_rebuild()

# For LlamaIndex models, we need to rebuild the raw model as well if they include OpenAI models
if isinstance(raw := getattr(obj, "raw", None), BaseModel):
raw.model_rebuild()

return obj.model_dump()

# if langchain is not available, the Serializable type is NoneType
if Serializable is not type(None) and isinstance(obj, Serializable): # type: ignore
return obj.to_json()

if isinstance(obj, (tuple, set, frozenset)):
return list(obj)
Comment on lines 139 to 140
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Depth limit bypassed for tuple/set/frozenset items

list(obj) is returned as-is — the items are not passed through self.default(). super().encode() then calls self.default() on each non-serializable item, but by that point _depth has been decremented back to 0. A structure like ((((custom_obj,),),),) triggers default() repeatedly from depth 0, so _MAX_DEPTH provides no protection for objects nested exclusively inside these collection types.

Prompt To Fix With AI
This is a comment left during a code review.
Path: langfuse/_utils/serializer.py
Line: 139-140

Comment:
**Depth limit bypassed for tuple/set/frozenset items**

`list(obj)` is returned as-is — the items are **not** passed through `self.default()`. `super().encode()` then calls `self.default()` on each non-serializable item, but by that point `_depth` has been decremented back to 0. A structure like `((((custom_obj,),),),)` triggers `default()` repeatedly from depth 0, so `_MAX_DEPTH` provides no protection for objects nested exclusively inside these collection types.

How can I resolve this? If you propose a fix, please make it concise.


Expand All @@ -138,9 +151,10 @@ def default(self, obj: Any) -> Any:
return [self.default(item) for item in obj]

if hasattr(obj, "__slots__"):
return self.default(
{slot: getattr(obj, slot, None) for slot in obj.__slots__}
)
return {
slot: self.default(getattr(obj, slot, None))
for slot in obj.__slots__
}
elif hasattr(obj, "__dict__"):
obj_id = id(obj)

Expand All @@ -167,6 +181,7 @@ def default(self, obj: Any) -> Any:

def encode(self, obj: Any) -> str:
self.seen.clear() # Clear seen objects before each encode call
self._depth = 0

try:
return super().encode(self.default(obj))
Expand Down
130 changes: 130 additions & 0 deletions tests/unit/test_serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pathlib import Path
from uuid import UUID

import pytest
from pydantic import BaseModel

from langfuse._utils.serializer import (
Expand Down Expand Up @@ -164,6 +165,12 @@ def test_none():
assert serializer.encode(None) == "null"


def test_infinity_floats():
serializer = EventSerializer()
assert serializer.encode(float("inf")) == '"Infinity"'
assert serializer.encode(float("-inf")) == '"-Infinity"'


def test_slots():
class SlotClass:
__slots__ = ["field"]
Expand All @@ -174,3 +181,126 @@ def __init__(self):
obj = SlotClass()
serializer = EventSerializer()
assert json.loads(serializer.encode(obj)) == {"field": "value"}


def test_deeply_nested_object_does_not_hang():
class Inner:
def __init__(self):
self.lock = threading.Lock()
self.value = "deep"

class Connection:
def __init__(self):
self._inner = Inner()
self._pool = [Inner() for _ in range(3)]

class Client:
def __init__(self):
self._connection = Connection()
self._config = {"key": "value"}

class Platform:
def __init__(self):
self._client = Client()

obj = {"args": (Platform(),), "kwargs": {}}
serializer = EventSerializer()
result = serializer.encode(obj)

# Must complete without hanging and produce valid JSON
parsed = json.loads(result)
assert "args" in parsed


def test_max_depth_returns_type_name():
class Level:
def __init__(self, child=None):
self.child = child

# Build a chain deeper than _MAX_DEPTH
obj = None
for _ in range(EventSerializer._MAX_DEPTH + 10):
obj = Level(child=obj)

serializer = EventSerializer()
result = json.loads(serializer.encode(obj))

# Walk down the chain — at some point it should be truncated to "Level"
node = result
found_truncation = False
while isinstance(node, dict) and "child" in node:
if node["child"] == "Level" or node["child"] == "<Level>":
found_truncation = True
break
node = node["child"]

assert found_truncation, "Expected depth limit to truncate deep nesting"


def test_deeply_nested_slots_object_is_truncated():
class SlotLevel:
__slots__ = ["child"]

def __init__(self, child=None):
self.child = child

obj = None
for _ in range(EventSerializer._MAX_DEPTH + 10):
obj = SlotLevel(child=obj)

serializer = EventSerializer()
result = json.loads(serializer.encode(obj))

# Walk the nested structure and verify it terminates
node = result
depth = 0
while isinstance(node, dict):
depth += 1
if "child" in node:
node = node["child"]
else:
break

assert EventSerializer._MAX_DEPTH - 2 <= depth <= EventSerializer._MAX_DEPTH + 2, (
f"Nesting depth {depth} not near _MAX_DEPTH ({EventSerializer._MAX_DEPTH}) — "
"serializer truncated too early or too late"
)


def test_deeply_nested_dict_preserves_keys_at_depth_boundary(monkeypatch):
monkeypatch.setattr(EventSerializer, "_MAX_DEPTH", 3)

input_obj = {"a": {"b": {"c": "leaf"}}}
expected = {"a": {"b": "<dict>"}}

serializer = EventSerializer()
result = json.loads(serializer.encode(input_obj))

assert result == expected


class _Color(Enum):
RED = "red"
NUMERIC = 7


@pytest.mark.parametrize(
"input_obj, expected",
[
(
{datetime(2024, 1, 1, tzinfo=timezone.utc): "v"},
{"2024-01-01T00:00:00Z": "v"},
),
(
{UUID("12345678-1234-5678-1234-567812345678"): "v"},
{"12345678-1234-5678-1234-567812345678": "v"},
),
({_Color.RED: "v"}, {"red": "v"}),
({_Color.NUMERIC: "v"}, {"7": "v"}),
],
ids=["datetime", "uuid", "enum_str_value", "enum_int_value"],
)
def test_dict_with_non_string_keys_is_serialized(input_obj, expected):
result = json.loads(EventSerializer().encode(input_obj))

assert result == expected