fix: correct langfuse import path and use client API

jake-marsden · jake-marsden · commit abedb3f00987 · 2025-11-14T14:18:31.000+11:00
- Changed import from 'langfuse_context' to use get_client()
- Replaced all langfuse_context calls with client.update_current_generation()
- Added Langfuse client helper function with error handling
- Added graceful fallback if Langfuse unavailable
- Pinned langfuse&gt;=2.0.0 in requirements
- Verified container builds and runs successfully
diff --git a/src/api/models/bedrock.py b/src/api/models/bedrock.py
@@ -14,7 +14,7 @@
 import tiktoken
 from botocore.config import Config
 from fastapi import HTTPException
-from langfuse.decorators import observe, langfuse_context
+from langfuse import observe, get_client
 from starlette.concurrency import run_in_threadpool
 
 from api.models.base import BaseChatModel, BaseEmbeddingsModel
@@ -54,6 +54,20 @@
 
 logger = logging.getLogger(__name__)
 
+# Initialize Langfuse client
+_langfuse_client = None
+
+def _get_langfuse_client():
+    """Get or create the Langfuse client singleton."""
+    global _langfuse_client
+    if _langfuse_client is None:
+        try:
+            _langfuse_client = get_client()
+        except Exception as e:
+            logger.warning(f"Failed to initialize Langfuse client: {e}")
+            _langfuse_client = None
+    return _langfuse_client
+
 config = Config(
             connect_timeout=60,      # Connection timeout: 60 seconds
             read_timeout=900,        # Read timeout: 15 minutes (suitable for long streaming responses)
@@ -253,18 +267,23 @@ async def _invoke_bedrock(self, chat_request: ChatRequest, stream=False):
         }
         
         # Update Langfuse generation with input metadata
-        langfuse_context.update_current_observation(
-            input=messages,
-            model=model_id,
-            model_parameters=model_parameters,
-            metadata={
-                'system': args_clone.get('system', []),
-                'toolConfig': args_clone.get('toolConfig', {}),
-                'stream': stream
-            }
-        )
-        if DEBUG:
-            logger.info(f"Langfuse: Updated observation with input - model={model_id}, stream={stream}, messages_count={len(messages)}")
+        langfuse_client = _get_langfuse_client()
+        if langfuse_client:
+            try:
+                langfuse_client.update_current_generation(
+                    input=messages,
+                    model=model_id,
+                    model_parameters=model_parameters,
+                    metadata={
+                        'system': args_clone.get('system', []),
+                        'toolConfig': args_clone.get('toolConfig', {}),
+                        'stream': stream
+                    }
+                )
+                if DEBUG:
+                    logger.info(f"Langfuse: Updated observation with input - model={model_id}, stream={stream}, messages_count={len(messages)}")
+            except Exception as e:
+                logger.warning(f"Failed to update Langfuse: {e}")
 
         try:
             if stream:
@@ -302,41 +321,61 @@ async def _invoke_bedrock(self, chat_request: ChatRequest, stream=False):
                         metadata["reasoning_content"] = reasoning_text
                         metadata["reasoning_tokens_estimate"] = len(reasoning_text) // 4
                     
-                    langfuse_context.update_current_observation(
-                        output=output_message,
-                        usage={
-                            "input": usage.get("inputTokens", 0),
-                            "output": usage.get("outputTokens", 0),
-                            "total": usage.get("totalTokens", 0)
-                        },
-                        metadata=metadata
-                    )
-                    if DEBUG:
-                        logger.info(f"Langfuse: Updated observation with output - "
-                                  f"input_tokens={usage.get('inputTokens', 0)}, "
-                                  f"output_tokens={usage.get('outputTokens', 0)}, "
-                                  f"has_reasoning={has_reasoning}, "
-                                  f"stop_reason={response.get('stopReason')}")
+                    langfuse_client = _get_langfuse_client()
+                    if langfuse_client:
+                        try:
+                            langfuse_client.update_current_generation(
+                                output=output_message,
+                                usage={
+                                    "input": usage.get("inputTokens", 0),
+                                    "output": usage.get("outputTokens", 0),
+                                    "total": usage.get("totalTokens", 0)
+                                },
+                                metadata=metadata
+                            )
+                            if DEBUG:
+                                logger.info(f"Langfuse: Updated observation with output - "
+                                          f"input_tokens={usage.get('inputTokens', 0)}, "
+                                          f"output_tokens={usage.get('outputTokens', 0)}, "
+                                          f"has_reasoning={has_reasoning}, "
+                                          f"stop_reason={response.get('stopReason')}")
+                        except Exception as e:
+                            logger.warning(f"Failed to update Langfuse: {e}")
         except bedrock_runtime.exceptions.ValidationException as e:
             error_message = f"Bedrock validation error for model {chat_request.model}: {str(e)}"
             logger.error(error_message)
-            langfuse_context.update_current_observation(level="ERROR", status_message=error_message)
-            if DEBUG:
-                logger.info(f"Langfuse: Updated observation with ValidationException error")
+            langfuse_client = _get_langfuse_client()
+            if langfuse_client:
+                try:
+                    langfuse_client.update_current_generation(level="ERROR", status_message=error_message)
+                    if DEBUG:
+                        logger.info(f"Langfuse: Updated observation with ValidationException error")
+                except Exception:
+                    pass
             raise HTTPException(status_code=400, detail=str(e))
         except bedrock_runtime.exceptions.ThrottlingException as e:
             error_message = f"Bedrock throttling for model {chat_request.model}: {str(e)}"
             logger.warning(error_message)
-            langfuse_context.update_current_observation(level="WARNING", status_message=error_message)
-            if DEBUG:
-                logger.info(f"Langfuse: Updated observation with ThrottlingException warning")
+            langfuse_client = _get_langfuse_client()
+            if langfuse_client:
+                try:
+                    langfuse_client.update_current_generation(level="WARNING", status_message=error_message)
+                    if DEBUG:
+                        logger.info(f"Langfuse: Updated observation with ThrottlingException warning")
+                except Exception:
+                    pass
             raise HTTPException(status_code=429, detail=str(e))
         except Exception as e:
             error_message = f"Bedrock invocation failed for model {chat_request.model}: {str(e)}"
             logger.error(error_message)
-            langfuse_context.update_current_observation(level="ERROR", status_message=error_message)
-            if DEBUG:
-                logger.info(f"Langfuse: Updated observation with generic Exception error")
+            langfuse_client = _get_langfuse_client()
+            if langfuse_client:
+                try:
+                    langfuse_client.update_current_generation(level="ERROR", status_message=error_message)
+                    if DEBUG:
+                        logger.info(f"Langfuse: Updated observation with generic Exception error")
+                except Exception:
+                    pass
             raise HTTPException(status_code=500, detail=str(e))
         return response
 
@@ -447,17 +486,21 @@ async def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
                 if metadata:
                     update_params["metadata"] = metadata
                 
-                langfuse_context.update_current_observation(**update_params)
-                
-                if DEBUG:
-                    output_length = len(accumulated_output)
-                    logger.info(f"Langfuse: Updated observation with streaming output - "
-                              f"chunks_count={output_length}, "
-                              f"output_chars={len(final_output) if accumulated_output else 0}, "
-                              f"input_tokens={final_usage.prompt_tokens if final_usage else 'N/A'}, "
-                              f"output_tokens={final_usage.completion_tokens if final_usage else 'N/A'}, "
-                              f"has_reasoning={has_reasoning}, "
-                              f"finish_reason={finish_reason}")
+                langfuse_client = _get_langfuse_client()
+                if langfuse_client:
+                    try:
+                        langfuse_client.update_current_generation(**update_params)
+                        if DEBUG:
+                            output_length = len(accumulated_output)
+                            logger.info(f"Langfuse: Updated observation with streaming output - "
+                                      f"chunks_count={output_length}, "
+                                      f"output_chars={len(final_output) if accumulated_output else 0}, "
+                                      f"input_tokens={final_usage.prompt_tokens if final_usage else 'N/A'}, "
+                                      f"output_tokens={final_usage.completion_tokens if final_usage else 'N/A'}, "
+                                      f"has_reasoning={has_reasoning}, "
+                                      f"finish_reason={finish_reason}")
+                    except Exception as e:
+                        logger.warning(f"Failed to update Langfuse: {e}")
 
             # return an [DONE] message at the end.
             yield self.stream_response_to_bytes()
@@ -468,12 +511,17 @@ async def chat_stream(self, chat_request: ChatRequest) -> AsyncIterable[bytes]:
         except Exception as e:
             logger.error("Stream error for model %s: %s", chat_request.model, str(e))
             # Update Langfuse with error
-            langfuse_context.update_current_observation(
-                level="ERROR",
-                status_message=f"Stream error: {str(e)}"
-            )
-            if DEBUG:
-                logger.info(f"Langfuse: Updated observation with streaming error - error={str(e)[:100]}")
+            langfuse_client = _get_langfuse_client()
+            if langfuse_client:
+                try:
+                    langfuse_client.update_current_generation(
+                        level="ERROR",
+                        status_message=f"Stream error: {str(e)}"
+                    )
+                    if DEBUG:
+                        logger.info(f"Langfuse: Updated observation with streaming error - error={str(e)[:100]}")
+                except Exception:
+                    pass
             error_event = Error(error=ErrorMessage(message=str(e)))
             yield self.stream_response_to_bytes(error_event)