BukeLy · Copilot · Dec 15, 2025 · Dec 15, 2025
diff --git a/env.example b/env.example
@@ -20,6 +20,11 @@ LLM_PROVIDER=ark
 # VLM 图片理解 API 超时时间（秒，默认 120 秒）
 LLM_VLM_TIMEOUT=120
 
+# --- 默认系统提示词 ---
+# 用于控制 LLM 回答风格（可被租户配置覆盖）
+# 默认值抑制 <think> 标签输出（Seed 1.6 模型默认返回）
+# LLM_DEFAULT_SYSTEM_PROMPT="You are a helpful assistant. Provide direct answers without showing your reasoning process."
+
 # --- LLM 速率限制配置 ---
 # 根据硅基流动 L0 等级（RPM=1000, TPM=50000）的 80% 保守设置
 # 用于避免触发 429 错误（TPM limit reached）

diff --git a/src/config.py b/src/config.py
@@ -24,6 +24,10 @@ class LLMConfig(BaseSettings):
     model: str = Field(default="seed-1-6-250615", description="LLM Model Name")
     vlm_timeout: int = Field(default=120, description="VLM Image Understanding Timeout (seconds)")
     timeout: int = Field(default=60, description="General LLM Timeout (seconds)")
+    default_system_prompt: str = Field(
+        default="You are a helpful assistant. Provide direct answers without showing your reasoning process.",
+        description="Default system prompt for LLM calls"
+    )
 
     # Rate limiting
     requests_per_minute: int = Field(default=800, description="Maximum requests per minute")

diff --git a/src/multi_tenant.py b/src/multi_tenant.py
@@ -36,10 +36,11 @@ class MultiTenantRAGManager:
     def __init__(
         self,
         max_instances: int = 50,  # 最多缓存 50 个租户实例
-        default_system_prompt: str = "You are a helpful assistant. Provide direct answers without showing your reasoning process.",
+        default_system_prompt: str = None,
     ):
         self.max_instances = max_instances
-        self.default_system_prompt = default_system_prompt
+        # Use config value if not explicitly provided
+        self.default_system_prompt = default_system_prompt or config.llm.default_system_prompt
 
         # 租户实例缓存：tenant_id -> LightRAG
         self._instances: Dict[str, LightRAG] = {}

diff --git a/src/rag.py b/src/rag.py
@@ -24,7 +24,8 @@
 load_dotenv()
 
 # Seed 1.6 model returns <think> tags by default, breaking API responses
-DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant. Provide direct answers without showing your reasoning process."
+# Now configurable via LLM_DEFAULT_SYSTEM_PROMPT environment variable
+DEFAULT_SYSTEM_PROMPT = config.llm.default_system_prompt
 
 # EC2 t3.small has 2 vCPUs. 4x oversubscription for I/O-bound LLM API calls.
 # Empirically tested: 8 gives best throughput without hitting rate limits.