From 00670ae8ff0cb77c33e27bd09d9b7fd602341d1b Mon Sep 17 00:00:00 2001 From: NoAmateur <1972025243@qq.com> Date: Wed, 6 May 2026 21:19:19 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=BAOpenAI=20API=E8=B0=83=E7=94=A8?= =?UTF-8?q?=E6=B7=BB=E5=8A=A0=E6=8C=87=E6=95=B0=E9=80=80=E9=81=BF=E9=87=8D?= =?UTF-8?q?=E8=AF=95=E6=9C=BA=E5=88=B6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 在OpenAIClient.chat_completion()中增加重试逻辑,支持指数退避: - 最大重试次数:3次 - 可重试错误:429、502、503、timeout、connection - 退避策略:2^attempt 秒(1s, 2s, 4s) 同时清理deepseek-r1等推理模型的输出标签, 避免思考过程污染最终返回内容。 涉及文件: - memoryos-pypi/utils.py - memoryos-playground/utils.py - memoryos-mcp/memoryos/utils.py - memoryos-chromadb/utils.py --- memoryos-chromadb/utils.py | 38 +++++++++++++++++++------------ memoryos-mcp/memoryos/utils.py | 41 +++++++++++++++++++++------------- memoryos-playground/utils.py | 41 +++++++++++++++++++++------------- memoryos-pypi/utils.py | 41 +++++++++++++++++++++------------- 4 files changed, 99 insertions(+), 62 deletions(-) diff --git a/memoryos-chromadb/utils.py b/memoryos-chromadb/utils.py index 015bec5..1921715 100644 --- a/memoryos-chromadb/utils.py +++ b/memoryos-chromadb/utils.py @@ -39,20 +39,30 @@ def __init__(self, api_key, base_url=None, max_workers=5): self._lock = threading.Lock() def chat_completion(self, model, messages, temperature=0.7, max_tokens=2000): - print(f"Calling OpenAI API. Model: {model}") - try: - response = self.client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens - ) - raw_content = response.choices[0].message.content.strip() - cleaned_content = clean_reasoning_model_output(raw_content) - return cleaned_content - except Exception as e: - print(f"Error calling OpenAI API: {e}") - return "Error: Could not get response from LLM." + max_retries = 3 + for attempt in range(max_retries): + print(f"Calling OpenAI API. Model: {model} (attempt {attempt + 1}/{max_retries})") + try: + response = self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens + ) + raw_content = response.choices[0].message.content.strip() + cleaned_content = clean_reasoning_model_output(raw_content) + return cleaned_content + except Exception as e: + error_str = str(e).lower() + is_retryable = any(code in error_str for code in ["429", "502", "503", "timeout", "connection"]) + if is_retryable and attempt < max_retries - 1: + wait_time = 2 ** attempt + print(f"Error calling OpenAI API: {e}. Retrying in {wait_time}s...") + time.sleep(wait_time) + else: + print(f"Error calling OpenAI API: {e}") + return "Error: Could not get response from LLM." + return "Error: Could not get response from LLM." def chat_completion_async(self, model, messages, temperature=0.7, max_tokens=2000): return self.executor.submit(self.chat_completion, model, messages, temperature, max_tokens) diff --git a/memoryos-mcp/memoryos/utils.py b/memoryos-mcp/memoryos/utils.py index 6983a44..93e6f0c 100644 --- a/memoryos-mcp/memoryos/utils.py +++ b/memoryos-mcp/memoryos/utils.py @@ -46,22 +46,31 @@ def __init__(self, api_key, base_url=None, max_workers=5): self._lock = threading.Lock() def chat_completion(self, model, messages, temperature=0.7, max_tokens=2000): - print(f"Calling OpenAI API. Model: {model}") - try: - response = self.client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens - ) - raw_content = response.choices[0].message.content.strip() - # 自动清理推理模型的标签 - cleaned_content = clean_reasoning_model_output(raw_content) - return cleaned_content - except Exception as e: - print(f"Error calling OpenAI API: {e}") - # Fallback or error handling - return "Error: Could not get response from LLM." + max_retries = 3 + for attempt in range(max_retries): + print(f"Calling OpenAI API. Model: {model} (attempt {attempt + 1}/{max_retries})") + try: + response = self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens + ) + raw_content = response.choices[0].message.content.strip() + # 自动清理推理模型的标签 + cleaned_content = clean_reasoning_model_output(raw_content) + return cleaned_content + except Exception as e: + error_str = str(e).lower() + is_retryable = any(code in error_str for code in ["429", "502", "503", "timeout", "connection"]) + if is_retryable and attempt < max_retries - 1: + wait_time = 2 ** attempt + print(f"Error calling OpenAI API: {e}. Retrying in {wait_time}s...") + time.sleep(wait_time) + else: + print(f"Error calling OpenAI API: {e}") + return "Error: Could not get response from LLM." + return "Error: Could not get response from LLM." def chat_completion_async(self, model, messages, temperature=0.7, max_tokens=2000): """异步版本的chat_completion""" diff --git a/memoryos-playground/utils.py b/memoryos-playground/utils.py index 40d95ff..974ed08 100644 --- a/memoryos-playground/utils.py +++ b/memoryos-playground/utils.py @@ -46,22 +46,31 @@ def __init__(self, api_key, base_url=None, max_workers=5): self._lock = threading.Lock() def chat_completion(self, model, messages, temperature=0.7, max_tokens=2000): - print(f"Calling OpenAI API. Model: {model}") - try: - response = self.client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens - ) - raw_content = response.choices[0].message.content.strip() - # 自动清理推理模型的标签 - cleaned_content = clean_reasoning_model_output(raw_content) - return cleaned_content - except Exception as e: - print(f"Error calling OpenAI API: {e}") - # Fallback or error handling - return "Error: Could not get response from LLM." + max_retries = 3 + for attempt in range(max_retries): + print(f"Calling OpenAI API. Model: {model} (attempt {attempt + 1}/{max_retries})") + try: + response = self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens + ) + raw_content = response.choices[0].message.content.strip() + # 自动清理推理模型的标签 + cleaned_content = clean_reasoning_model_output(raw_content) + return cleaned_content + except Exception as e: + error_str = str(e).lower() + is_retryable = any(code in error_str for code in ["429", "502", "503", "timeout", "connection"]) + if is_retryable and attempt < max_retries - 1: + wait_time = 2 ** attempt + print(f"Error calling OpenAI API: {e}. Retrying in {wait_time}s...") + time.sleep(wait_time) + else: + print(f"Error calling OpenAI API: {e}") + return "Error: Could not get response from LLM." + return "Error: Could not get response from LLM." def chat_completion_async(self, model, messages, temperature=0.7, max_tokens=2000): """异步版本的chat_completion""" diff --git a/memoryos-pypi/utils.py b/memoryos-pypi/utils.py index 40d95ff..974ed08 100644 --- a/memoryos-pypi/utils.py +++ b/memoryos-pypi/utils.py @@ -46,22 +46,31 @@ def __init__(self, api_key, base_url=None, max_workers=5): self._lock = threading.Lock() def chat_completion(self, model, messages, temperature=0.7, max_tokens=2000): - print(f"Calling OpenAI API. Model: {model}") - try: - response = self.client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens - ) - raw_content = response.choices[0].message.content.strip() - # 自动清理推理模型的标签 - cleaned_content = clean_reasoning_model_output(raw_content) - return cleaned_content - except Exception as e: - print(f"Error calling OpenAI API: {e}") - # Fallback or error handling - return "Error: Could not get response from LLM." + max_retries = 3 + for attempt in range(max_retries): + print(f"Calling OpenAI API. Model: {model} (attempt {attempt + 1}/{max_retries})") + try: + response = self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens + ) + raw_content = response.choices[0].message.content.strip() + # 自动清理推理模型的标签 + cleaned_content = clean_reasoning_model_output(raw_content) + return cleaned_content + except Exception as e: + error_str = str(e).lower() + is_retryable = any(code in error_str for code in ["429", "502", "503", "timeout", "connection"]) + if is_retryable and attempt < max_retries - 1: + wait_time = 2 ** attempt + print(f"Error calling OpenAI API: {e}. Retrying in {wait_time}s...") + time.sleep(wait_time) + else: + print(f"Error calling OpenAI API: {e}") + return "Error: Could not get response from LLM." + return "Error: Could not get response from LLM." def chat_completion_async(self, model, messages, temperature=0.7, max_tokens=2000): """异步版本的chat_completion"""