diff --git a/memoryos-chromadb/utils.py b/memoryos-chromadb/utils.py index 015bec5..1921715 100644 --- a/memoryos-chromadb/utils.py +++ b/memoryos-chromadb/utils.py @@ -39,20 +39,30 @@ def __init__(self, api_key, base_url=None, max_workers=5): self._lock = threading.Lock() def chat_completion(self, model, messages, temperature=0.7, max_tokens=2000): - print(f"Calling OpenAI API. Model: {model}") - try: - response = self.client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens - ) - raw_content = response.choices[0].message.content.strip() - cleaned_content = clean_reasoning_model_output(raw_content) - return cleaned_content - except Exception as e: - print(f"Error calling OpenAI API: {e}") - return "Error: Could not get response from LLM." + max_retries = 3 + for attempt in range(max_retries): + print(f"Calling OpenAI API. Model: {model} (attempt {attempt + 1}/{max_retries})") + try: + response = self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens + ) + raw_content = response.choices[0].message.content.strip() + cleaned_content = clean_reasoning_model_output(raw_content) + return cleaned_content + except Exception as e: + error_str = str(e).lower() + is_retryable = any(code in error_str for code in ["429", "502", "503", "timeout", "connection"]) + if is_retryable and attempt < max_retries - 1: + wait_time = 2 ** attempt + print(f"Error calling OpenAI API: {e}. Retrying in {wait_time}s...") + time.sleep(wait_time) + else: + print(f"Error calling OpenAI API: {e}") + return "Error: Could not get response from LLM." + return "Error: Could not get response from LLM." def chat_completion_async(self, model, messages, temperature=0.7, max_tokens=2000): return self.executor.submit(self.chat_completion, model, messages, temperature, max_tokens) diff --git a/memoryos-mcp/memoryos/utils.py b/memoryos-mcp/memoryos/utils.py index 6983a44..93e6f0c 100644 --- a/memoryos-mcp/memoryos/utils.py +++ b/memoryos-mcp/memoryos/utils.py @@ -46,22 +46,31 @@ def __init__(self, api_key, base_url=None, max_workers=5): self._lock = threading.Lock() def chat_completion(self, model, messages, temperature=0.7, max_tokens=2000): - print(f"Calling OpenAI API. Model: {model}") - try: - response = self.client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens - ) - raw_content = response.choices[0].message.content.strip() - # 自动清理推理模型的标签 - cleaned_content = clean_reasoning_model_output(raw_content) - return cleaned_content - except Exception as e: - print(f"Error calling OpenAI API: {e}") - # Fallback or error handling - return "Error: Could not get response from LLM." + max_retries = 3 + for attempt in range(max_retries): + print(f"Calling OpenAI API. Model: {model} (attempt {attempt + 1}/{max_retries})") + try: + response = self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens + ) + raw_content = response.choices[0].message.content.strip() + # 自动清理推理模型的标签 + cleaned_content = clean_reasoning_model_output(raw_content) + return cleaned_content + except Exception as e: + error_str = str(e).lower() + is_retryable = any(code in error_str for code in ["429", "502", "503", "timeout", "connection"]) + if is_retryable and attempt < max_retries - 1: + wait_time = 2 ** attempt + print(f"Error calling OpenAI API: {e}. Retrying in {wait_time}s...") + time.sleep(wait_time) + else: + print(f"Error calling OpenAI API: {e}") + return "Error: Could not get response from LLM." + return "Error: Could not get response from LLM." def chat_completion_async(self, model, messages, temperature=0.7, max_tokens=2000): """异步版本的chat_completion""" diff --git a/memoryos-playground/utils.py b/memoryos-playground/utils.py index 40d95ff..974ed08 100644 --- a/memoryos-playground/utils.py +++ b/memoryos-playground/utils.py @@ -46,22 +46,31 @@ def __init__(self, api_key, base_url=None, max_workers=5): self._lock = threading.Lock() def chat_completion(self, model, messages, temperature=0.7, max_tokens=2000): - print(f"Calling OpenAI API. Model: {model}") - try: - response = self.client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens - ) - raw_content = response.choices[0].message.content.strip() - # 自动清理推理模型的标签 - cleaned_content = clean_reasoning_model_output(raw_content) - return cleaned_content - except Exception as e: - print(f"Error calling OpenAI API: {e}") - # Fallback or error handling - return "Error: Could not get response from LLM." + max_retries = 3 + for attempt in range(max_retries): + print(f"Calling OpenAI API. Model: {model} (attempt {attempt + 1}/{max_retries})") + try: + response = self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens + ) + raw_content = response.choices[0].message.content.strip() + # 自动清理推理模型的标签 + cleaned_content = clean_reasoning_model_output(raw_content) + return cleaned_content + except Exception as e: + error_str = str(e).lower() + is_retryable = any(code in error_str for code in ["429", "502", "503", "timeout", "connection"]) + if is_retryable and attempt < max_retries - 1: + wait_time = 2 ** attempt + print(f"Error calling OpenAI API: {e}. Retrying in {wait_time}s...") + time.sleep(wait_time) + else: + print(f"Error calling OpenAI API: {e}") + return "Error: Could not get response from LLM." + return "Error: Could not get response from LLM." def chat_completion_async(self, model, messages, temperature=0.7, max_tokens=2000): """异步版本的chat_completion""" diff --git a/memoryos-pypi/utils.py b/memoryos-pypi/utils.py index 40d95ff..974ed08 100644 --- a/memoryos-pypi/utils.py +++ b/memoryos-pypi/utils.py @@ -46,22 +46,31 @@ def __init__(self, api_key, base_url=None, max_workers=5): self._lock = threading.Lock() def chat_completion(self, model, messages, temperature=0.7, max_tokens=2000): - print(f"Calling OpenAI API. Model: {model}") - try: - response = self.client.chat.completions.create( - model=model, - messages=messages, - temperature=temperature, - max_tokens=max_tokens - ) - raw_content = response.choices[0].message.content.strip() - # 自动清理推理模型的标签 - cleaned_content = clean_reasoning_model_output(raw_content) - return cleaned_content - except Exception as e: - print(f"Error calling OpenAI API: {e}") - # Fallback or error handling - return "Error: Could not get response from LLM." + max_retries = 3 + for attempt in range(max_retries): + print(f"Calling OpenAI API. Model: {model} (attempt {attempt + 1}/{max_retries})") + try: + response = self.client.chat.completions.create( + model=model, + messages=messages, + temperature=temperature, + max_tokens=max_tokens + ) + raw_content = response.choices[0].message.content.strip() + # 自动清理推理模型的标签 + cleaned_content = clean_reasoning_model_output(raw_content) + return cleaned_content + except Exception as e: + error_str = str(e).lower() + is_retryable = any(code in error_str for code in ["429", "502", "503", "timeout", "connection"]) + if is_retryable and attempt < max_retries - 1: + wait_time = 2 ** attempt + print(f"Error calling OpenAI API: {e}. Retrying in {wait_time}s...") + time.sleep(wait_time) + else: + print(f"Error calling OpenAI API: {e}") + return "Error: Could not get response from LLM." + return "Error: Could not get response from LLM." def chat_completion_async(self, model, messages, temperature=0.7, max_tokens=2000): """异步版本的chat_completion"""