From 00670ae8ff0cb77c33e27bd09d9b7fd602341d1b Mon Sep 17 00:00:00 2001
From: NoAmateur <1972025243@qq.com>
Date: Wed, 6 May 2026 21:19:19 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=BAOpenAI=20API=E8=B0=83=E7=94=A8?=
 =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=8C=87=E6=95=B0=E9=80=80=E9=81=BF=E9=87=8D?=
 =?UTF-8?q?=E8=AF=95=E6=9C=BA=E5=88=B6?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

在OpenAIClient.chat_completion()中增加重试逻辑，支持指数退避：
- 最大重试次数：3次
- 可重试错误：429、502、503、timeout、connection
- 退避策略：2^attempt 秒（1s, 2s, 4s）

同时清理deepseek-r1等推理模型的<think>输出标签，
避免思考过程污染最终返回内容。

涉及文件：
- memoryos-pypi/utils.py
- memoryos-playground/utils.py
- memoryos-mcp/memoryos/utils.py
- memoryos-chromadb/utils.py
---
 memoryos-chromadb/utils.py     | 38 +++++++++++++++++++------------
 memoryos-mcp/memoryos/utils.py | 41 +++++++++++++++++++++-------------
 memoryos-playground/utils.py   | 41 +++++++++++++++++++++-------------
 memoryos-pypi/utils.py         | 41 +++++++++++++++++++++-------------
 4 files changed, 99 insertions(+), 62 deletions(-)
diff --git a/memoryos-chromadb/utils.py b/memoryos-chromadb/utils.py
index 015bec5..1921715 100644
--- a/memoryos-chromadb/utils.py
+++ b/memoryos-chromadb/utils.py
@@ -39,20 +39,30 @@ def __init__(self, api_key, base_url=None, max_workers=5):
         self._lock = threading.Lock()
 
     def chat_completion(self, model, messages, temperature=0.7, max_tokens=2000):
-        print(f"Calling OpenAI API. Model: {model}")
-        try:
-            response = self.client.chat.completions.create(
-                model=model,
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens
-            )
-            raw_content = response.choices[0].message.content.strip()
-            cleaned_content = clean_reasoning_model_output(raw_content)
-            return cleaned_content
-        except Exception as e:
-            print(f"Error calling OpenAI API: {e}")
-            return "Error: Could not get response from LLM."
+        max_retries = 3
+        for attempt in range(max_retries):
+            print(f"Calling OpenAI API. Model: {model} (attempt {attempt + 1}/{max_retries})")
+            try:
+                response = self.client.chat.completions.create(
+                    model=model,
+                    messages=messages,
+                    temperature=temperature,
+                    max_tokens=max_tokens
+                )
+                raw_content = response.choices[0].message.content.strip()
+                cleaned_content = clean_reasoning_model_output(raw_content)
+                return cleaned_content
+            except Exception as e:
+                error_str = str(e).lower()
+                is_retryable = any(code in error_str for code in ["429", "502", "503", "timeout", "connection"])
+                if is_retryable and attempt < max_retries - 1:
+                    wait_time = 2 ** attempt
+                    print(f"Error calling OpenAI API: {e}. Retrying in {wait_time}s...")
+                    time.sleep(wait_time)
+                else:
+                    print(f"Error calling OpenAI API: {e}")
+                    return "Error: Could not get response from LLM."
+        return "Error: Could not get response from LLM."
 
     def chat_completion_async(self, model, messages, temperature=0.7, max_tokens=2000):
         return self.executor.submit(self.chat_completion, model, messages, temperature, max_tokens)
diff --git a/memoryos-mcp/memoryos/utils.py b/memoryos-mcp/memoryos/utils.py
index 6983a44..93e6f0c 100644
--- a/memoryos-mcp/memoryos/utils.py
+++ b/memoryos-mcp/memoryos/utils.py
@@ -46,22 +46,31 @@ def __init__(self, api_key, base_url=None, max_workers=5):
         self._lock = threading.Lock()
 
     def chat_completion(self, model, messages, temperature=0.7, max_tokens=2000):
-        print(f"Calling OpenAI API. Model: {model}")
-        try:
-            response = self.client.chat.completions.create(
-                model=model,
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens
-            )
-            raw_content = response.choices[0].message.content.strip()
-            # 自动清理推理模型的<think>标签
-            cleaned_content = clean_reasoning_model_output(raw_content)
-            return cleaned_content
-        except Exception as e:
-            print(f"Error calling OpenAI API: {e}")
-            # Fallback or error handling
-            return "Error: Could not get response from LLM."
+        max_retries = 3
+        for attempt in range(max_retries):
+            print(f"Calling OpenAI API. Model: {model} (attempt {attempt + 1}/{max_retries})")
+            try:
+                response = self.client.chat.completions.create(
+                    model=model,
+                    messages=messages,
+                    temperature=temperature,
+                    max_tokens=max_tokens
+                )
+                raw_content = response.choices[0].message.content.strip()
+                # 自动清理推理模型的<think>标签
+                cleaned_content = clean_reasoning_model_output(raw_content)
+                return cleaned_content
+            except Exception as e:
+                error_str = str(e).lower()
+                is_retryable = any(code in error_str for code in ["429", "502", "503", "timeout", "connection"])
+                if is_retryable and attempt < max_retries - 1:
+                    wait_time = 2 ** attempt
+                    print(f"Error calling OpenAI API: {e}. Retrying in {wait_time}s...")
+                    time.sleep(wait_time)
+                else:
+                    print(f"Error calling OpenAI API: {e}")
+                    return "Error: Could not get response from LLM."
+        return "Error: Could not get response from LLM."
 
     def chat_completion_async(self, model, messages, temperature=0.7, max_tokens=2000):
         """异步版本的chat_completion"""
diff --git a/memoryos-playground/utils.py b/memoryos-playground/utils.py
index 40d95ff..974ed08 100644
--- a/memoryos-playground/utils.py
+++ b/memoryos-playground/utils.py
@@ -46,22 +46,31 @@ def __init__(self, api_key, base_url=None, max_workers=5):
         self._lock = threading.Lock()
 
     def chat_completion(self, model, messages, temperature=0.7, max_tokens=2000):
-        print(f"Calling OpenAI API. Model: {model}")
-        try:
-            response = self.client.chat.completions.create(
-                model=model,
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens
-            )
-            raw_content = response.choices[0].message.content.strip()
-            # 自动清理推理模型的<think>标签
-            cleaned_content = clean_reasoning_model_output(raw_content)
-            return cleaned_content
-        except Exception as e:
-            print(f"Error calling OpenAI API: {e}")
-            # Fallback or error handling
-            return "Error: Could not get response from LLM."
+        max_retries = 3
+        for attempt in range(max_retries):
+            print(f"Calling OpenAI API. Model: {model} (attempt {attempt + 1}/{max_retries})")
+            try:
+                response = self.client.chat.completions.create(
+                    model=model,
+                    messages=messages,
+                    temperature=temperature,
+                    max_tokens=max_tokens
+                )
+                raw_content = response.choices[0].message.content.strip()
+                # 自动清理推理模型的<think>标签
+                cleaned_content = clean_reasoning_model_output(raw_content)
+                return cleaned_content
+            except Exception as e:
+                error_str = str(e).lower()
+                is_retryable = any(code in error_str for code in ["429", "502", "503", "timeout", "connection"])
+                if is_retryable and attempt < max_retries - 1:
+                    wait_time = 2 ** attempt
+                    print(f"Error calling OpenAI API: {e}. Retrying in {wait_time}s...")
+                    time.sleep(wait_time)
+                else:
+                    print(f"Error calling OpenAI API: {e}")
+                    return "Error: Could not get response from LLM."
+        return "Error: Could not get response from LLM."
 
     def chat_completion_async(self, model, messages, temperature=0.7, max_tokens=2000):
         """异步版本的chat_completion"""
diff --git a/memoryos-pypi/utils.py b/memoryos-pypi/utils.py
index 40d95ff..974ed08 100644
--- a/memoryos-pypi/utils.py
+++ b/memoryos-pypi/utils.py
@@ -46,22 +46,31 @@ def __init__(self, api_key, base_url=None, max_workers=5):
         self._lock = threading.Lock()
 
     def chat_completion(self, model, messages, temperature=0.7, max_tokens=2000):
-        print(f"Calling OpenAI API. Model: {model}")
-        try:
-            response = self.client.chat.completions.create(
-                model=model,
-                messages=messages,
-                temperature=temperature,
-                max_tokens=max_tokens
-            )
-            raw_content = response.choices[0].message.content.strip()
-            # 自动清理推理模型的<think>标签
-            cleaned_content = clean_reasoning_model_output(raw_content)
-            return cleaned_content
-        except Exception as e:
-            print(f"Error calling OpenAI API: {e}")
-            # Fallback or error handling
-            return "Error: Could not get response from LLM."
+        max_retries = 3
+        for attempt in range(max_retries):
+            print(f"Calling OpenAI API. Model: {model} (attempt {attempt + 1}/{max_retries})")
+            try:
+                response = self.client.chat.completions.create(
+                    model=model,
+                    messages=messages,
+                    temperature=temperature,
+                    max_tokens=max_tokens
+                )
+                raw_content = response.choices[0].message.content.strip()
+                # 自动清理推理模型的<think>标签
+                cleaned_content = clean_reasoning_model_output(raw_content)
+                return cleaned_content
+            except Exception as e:
+                error_str = str(e).lower()
+                is_retryable = any(code in error_str for code in ["429", "502", "503", "timeout", "connection"])
+                if is_retryable and attempt < max_retries - 1:
+                    wait_time = 2 ** attempt
+                    print(f"Error calling OpenAI API: {e}. Retrying in {wait_time}s...")
+                    time.sleep(wait_time)
+                else:
+                    print(f"Error calling OpenAI API: {e}")
+                    return "Error: Could not get response from LLM."
+        return "Error: Could not get response from LLM."
 
     def chat_completion_async(self, model, messages, temperature=0.7, max_tokens=2000):
         """异步版本的chat_completion"""