From 0cf13b0760aaebdebab2feae76dcca7757434016 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 15 Dec 2025 09:35:18 +0000
Subject: [PATCH 1/2] Initial plan


From 6d905558fd24523f81b1642a122b144a2dd8da2c Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Mon, 15 Dec 2025 09:39:55 +0000
Subject: [PATCH 2/2] feat: make DEFAULT_SYSTEM_PROMPT configurable via
 environment variable

- Add `default_system_prompt` field to LLMConfig in src/config.py
- Update src/rag.py to use config.llm.default_system_prompt
- Update src/multi_tenant.py to use config value as default
- Add LLM_DEFAULT_SYSTEM_PROMPT to env.example with documentation

Co-authored-by: BukeLy <19304666+BukeLy@users.noreply.github.com>
---
 env.example         | 5 +++++
 src/config.py       | 4 ++++
 src/multi_tenant.py | 5 +++--
 src/rag.py          | 3 ++-
 4 files changed, 14 insertions(+), 3 deletions(-)
diff --git a/env.example b/env.example
index db7a4f2..5a3bd38 100644
--- a/env.example
+++ b/env.example
@@ -20,6 +20,11 @@ LLM_PROVIDER=ark
 # VLM 图片理解 API 超时时间（秒，默认 120 秒）
 LLM_VLM_TIMEOUT=120
 
+# --- 默认系统提示词 ---
+# 用于控制 LLM 回答风格（可被租户配置覆盖）
+# 默认值抑制 <think> 标签输出（Seed 1.6 模型默认返回）
+# LLM_DEFAULT_SYSTEM_PROMPT="You are a helpful assistant. Provide direct answers without showing your reasoning process."
+
 # --- LLM 速率限制配置 ---
 # 根据硅基流动 L0 等级（RPM=1000, TPM=50000）的 80% 保守设置
 # 用于避免触发 429 错误（TPM limit reached）
diff --git a/src/config.py b/src/config.py
index 0d1f8f5..c29b555 100644
--- a/src/config.py
+++ b/src/config.py
@@ -24,6 +24,10 @@ class LLMConfig(BaseSettings):
     model: str = Field(default="seed-1-6-250615", description="LLM Model Name")
     vlm_timeout: int = Field(default=120, description="VLM Image Understanding Timeout (seconds)")
     timeout: int = Field(default=60, description="General LLM Timeout (seconds)")
+    default_system_prompt: str = Field(
+        default="You are a helpful assistant. Provide direct answers without showing your reasoning process.",
+        description="Default system prompt for LLM calls"
+    )
 
     # Rate limiting
     requests_per_minute: int = Field(default=800, description="Maximum requests per minute")
diff --git a/src/multi_tenant.py b/src/multi_tenant.py
index d4cb0b1..094da17 100644
--- a/src/multi_tenant.py
+++ b/src/multi_tenant.py
@@ -36,10 +36,11 @@ class MultiTenantRAGManager:
     def __init__(
         self,
         max_instances: int = 50,  # 最多缓存 50 个租户实例
-        default_system_prompt: str = "You are a helpful assistant. Provide direct answers without showing your reasoning process.",
+        default_system_prompt: str = None,
     ):
         self.max_instances = max_instances
-        self.default_system_prompt = default_system_prompt
+        # Use config value if not explicitly provided
+        self.default_system_prompt = default_system_prompt or config.llm.default_system_prompt
 
         # 租户实例缓存：tenant_id -> LightRAG
         self._instances: Dict[str, LightRAG] = {}
diff --git a/src/rag.py b/src/rag.py
index 2d7b2d7..4137b8a 100644
--- a/src/rag.py
+++ b/src/rag.py
@@ -24,7 +24,8 @@
 load_dotenv()
 
 # Seed 1.6 model returns <think> tags by default, breaking API responses
-DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant. Provide direct answers without showing your reasoning process."
+# Now configurable via LLM_DEFAULT_SYSTEM_PROMPT environment variable
+DEFAULT_SYSTEM_PROMPT = config.llm.default_system_prompt
 
 # EC2 t3.small has 2 vCPUs. 4x oversubscription for I/O-bound LLM API calls.
 # Empirically tested: 8 gives best throughput without hitting rate limits.