From 0cf13b0760aaebdebab2feae76dcca7757434016 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Dec 2025 09:35:18 +0000 Subject: [PATCH 1/2] Initial plan From 6d905558fd24523f81b1642a122b144a2dd8da2c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Mon, 15 Dec 2025 09:39:55 +0000 Subject: [PATCH 2/2] feat: make DEFAULT_SYSTEM_PROMPT configurable via environment variable - Add `default_system_prompt` field to LLMConfig in src/config.py - Update src/rag.py to use config.llm.default_system_prompt - Update src/multi_tenant.py to use config value as default - Add LLM_DEFAULT_SYSTEM_PROMPT to env.example with documentation Co-authored-by: BukeLy <19304666+BukeLy@users.noreply.github.com> --- env.example | 5 +++++ src/config.py | 4 ++++ src/multi_tenant.py | 5 +++-- src/rag.py | 3 ++- 4 files changed, 14 insertions(+), 3 deletions(-) diff --git a/env.example b/env.example index db7a4f2..5a3bd38 100644 --- a/env.example +++ b/env.example @@ -20,6 +20,11 @@ LLM_PROVIDER=ark # VLM 图片理解 API 超时时间(秒,默认 120 秒) LLM_VLM_TIMEOUT=120 +# --- 默认系统提示词 --- +# 用于控制 LLM 回答风格(可被租户配置覆盖) +# 默认值抑制 标签输出(Seed 1.6 模型默认返回) +# LLM_DEFAULT_SYSTEM_PROMPT="You are a helpful assistant. Provide direct answers without showing your reasoning process." + # --- LLM 速率限制配置 --- # 根据硅基流动 L0 等级(RPM=1000, TPM=50000)的 80% 保守设置 # 用于避免触发 429 错误(TPM limit reached) diff --git a/src/config.py b/src/config.py index 0d1f8f5..c29b555 100644 --- a/src/config.py +++ b/src/config.py @@ -24,6 +24,10 @@ class LLMConfig(BaseSettings): model: str = Field(default="seed-1-6-250615", description="LLM Model Name") vlm_timeout: int = Field(default=120, description="VLM Image Understanding Timeout (seconds)") timeout: int = Field(default=60, description="General LLM Timeout (seconds)") + default_system_prompt: str = Field( + default="You are a helpful assistant. Provide direct answers without showing your reasoning process.", + description="Default system prompt for LLM calls" + ) # Rate limiting requests_per_minute: int = Field(default=800, description="Maximum requests per minute") diff --git a/src/multi_tenant.py b/src/multi_tenant.py index d4cb0b1..094da17 100644 --- a/src/multi_tenant.py +++ b/src/multi_tenant.py @@ -36,10 +36,11 @@ class MultiTenantRAGManager: def __init__( self, max_instances: int = 50, # 最多缓存 50 个租户实例 - default_system_prompt: str = "You are a helpful assistant. Provide direct answers without showing your reasoning process.", + default_system_prompt: str = None, ): self.max_instances = max_instances - self.default_system_prompt = default_system_prompt + # Use config value if not explicitly provided + self.default_system_prompt = default_system_prompt or config.llm.default_system_prompt # 租户实例缓存:tenant_id -> LightRAG self._instances: Dict[str, LightRAG] = {} diff --git a/src/rag.py b/src/rag.py index 2d7b2d7..4137b8a 100644 --- a/src/rag.py +++ b/src/rag.py @@ -24,7 +24,8 @@ load_dotenv() # Seed 1.6 model returns tags by default, breaking API responses -DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant. Provide direct answers without showing your reasoning process." +# Now configurable via LLM_DEFAULT_SYSTEM_PROMPT environment variable +DEFAULT_SYSTEM_PROMPT = config.llm.default_system_prompt # EC2 t3.small has 2 vCPUs. 4x oversubscription for I/O-bound LLM API calls. # Empirically tested: 8 gives best throughput without hitting rate limits.