diff --git a/langfuse/langchain/CallbackHandler.py b/langfuse/langchain/CallbackHandler.py index 8d2c8db90..cddb1c63a 100644 --- a/langfuse/langchain/CallbackHandler.py +++ b/langfuse/langchain/CallbackHandler.py @@ -1256,8 +1256,25 @@ def _parse_usage_model(usage: Union[pydantic.BaseModel, dict]) -> Any: if "output" in usage_model: usage_model["output"] = max(0, usage_model["output"] - value) - # Vertex AI + # OpenAI / LiteLLM — prompt_tokens_details as dict + # e.g. {"cached_tokens": 12000} if "prompt_tokens_details" in usage_model and isinstance( + usage_model["prompt_tokens_details"], dict + ): + prompt_tokens_details = usage_model.pop("prompt_tokens_details") + + for key, value in prompt_tokens_details.items(): + if not isinstance(value, int): + continue + + usage_model[f"input_{key}"] = value + + if "input" in usage_model: + usage_model["input"] = max(0, usage_model["input"] - value) + + # Vertex AI — prompt_tokens_details as list + # e.g. [{"modality": "text", "token_count": N}] + elif "prompt_tokens_details" in usage_model and isinstance( usage_model["prompt_tokens_details"], list ): prompt_tokens_details = usage_model.pop("prompt_tokens_details") diff --git a/tests/test_parse_usage_model.py b/tests/test_parse_usage_model.py index df441523c..764d6132b 100644 --- a/tests/test_parse_usage_model.py +++ b/tests/test_parse_usage_model.py @@ -16,6 +16,68 @@ def test_standard_tier_input_token_details(): assert result["total"] == 14 +def test_prompt_tokens_details_dict_cached_tokens(): + """OpenAI/LiteLLM: prompt_tokens_details as dict with cached_tokens.""" + usage = { + "prompt_tokens": 15000, + "completion_tokens": 500, + "total_tokens": 15500, + "prompt_tokens_details": {"cached_tokens": 12000}, + } + result = _parse_usage_model(usage) + assert result["input"] == 3000 # 15000 - 12000 + assert result["output"] == 500 + assert result["total"] == 15500 + assert result["input_cached_tokens"] == 12000 + + +def test_prompt_tokens_details_dict_with_cache_creation(): + """OpenAI/LiteLLM: prompt_tokens_details dict + top-level cache_creation.""" + usage = { + "prompt_tokens": 15000, + "completion_tokens": 500, + "total_tokens": 15500, + "prompt_tokens_details": {"cached_tokens": 12000}, + "cache_creation_input_tokens": 3000, + } + result = _parse_usage_model(usage) + assert result["input"] == 3000 # 15000 - 12000 (cached_tokens only subtracted here) + assert result["input_cached_tokens"] == 12000 + assert result["cache_creation_input_tokens"] == 3000 + + +def test_prompt_tokens_details_list_vertex_ai(): + """Vertex AI: prompt_tokens_details as list — existing behavior preserved.""" + usage = { + "prompt_token_count": 1000, + "candidates_token_count": 200, + "total_token_count": 1200, + "prompt_tokens_details": [ + {"modality": "text", "token_count": 800}, + {"modality": "image", "token_count": 200}, + ], + } + result = _parse_usage_model(usage) + assert result["input"] == 0 # 1000 - 800 - 200 + assert result["output"] == 200 + assert result["total"] == 1200 + assert result["input_modality_text"] == 800 + assert result["input_modality_image"] == 200 + + +def test_prompt_tokens_details_dict_empty(): + """Empty dict prompt_tokens_details — no crash, input unchanged.""" + usage = { + "prompt_tokens": 5000, + "completion_tokens": 100, + "total_tokens": 5100, + "prompt_tokens_details": {}, + } + result = _parse_usage_model(usage) + assert result["input"] == 5000 + assert result["output"] == 100 + + def test_priority_tier_not_subtracted(): """Priority tier: 'priority' and 'priority_*' keys must NOT be subtracted.""" usage = {