diff --git a/env.example b/env.example index db7a4f2..5a3bd38 100644 --- a/env.example +++ b/env.example @@ -20,6 +20,11 @@ LLM_PROVIDER=ark # VLM 图片理解 API 超时时间(秒,默认 120 秒) LLM_VLM_TIMEOUT=120 +# --- 默认系统提示词 --- +# 用于控制 LLM 回答风格(可被租户配置覆盖) +# 默认值抑制 标签输出(Seed 1.6 模型默认返回) +# LLM_DEFAULT_SYSTEM_PROMPT="You are a helpful assistant. Provide direct answers without showing your reasoning process." + # --- LLM 速率限制配置 --- # 根据硅基流动 L0 等级(RPM=1000, TPM=50000)的 80% 保守设置 # 用于避免触发 429 错误(TPM limit reached) diff --git a/src/config.py b/src/config.py index 0d1f8f5..c29b555 100644 --- a/src/config.py +++ b/src/config.py @@ -24,6 +24,10 @@ class LLMConfig(BaseSettings): model: str = Field(default="seed-1-6-250615", description="LLM Model Name") vlm_timeout: int = Field(default=120, description="VLM Image Understanding Timeout (seconds)") timeout: int = Field(default=60, description="General LLM Timeout (seconds)") + default_system_prompt: str = Field( + default="You are a helpful assistant. Provide direct answers without showing your reasoning process.", + description="Default system prompt for LLM calls" + ) # Rate limiting requests_per_minute: int = Field(default=800, description="Maximum requests per minute") diff --git a/src/multi_tenant.py b/src/multi_tenant.py index d4cb0b1..094da17 100644 --- a/src/multi_tenant.py +++ b/src/multi_tenant.py @@ -36,10 +36,11 @@ class MultiTenantRAGManager: def __init__( self, max_instances: int = 50, # 最多缓存 50 个租户实例 - default_system_prompt: str = "You are a helpful assistant. Provide direct answers without showing your reasoning process.", + default_system_prompt: str = None, ): self.max_instances = max_instances - self.default_system_prompt = default_system_prompt + # Use config value if not explicitly provided + self.default_system_prompt = default_system_prompt or config.llm.default_system_prompt # 租户实例缓存:tenant_id -> LightRAG self._instances: Dict[str, LightRAG] = {} diff --git a/src/rag.py b/src/rag.py index 2d7b2d7..4137b8a 100644 --- a/src/rag.py +++ b/src/rag.py @@ -24,7 +24,8 @@ load_dotenv() # Seed 1.6 model returns tags by default, breaking API responses -DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant. Provide direct answers without showing your reasoning process." +# Now configurable via LLM_DEFAULT_SYSTEM_PROMPT environment variable +DEFAULT_SYSTEM_PROMPT = config.llm.default_system_prompt # EC2 t3.small has 2 vCPUs. 4x oversubscription for I/O-bound LLM API calls. # Empirically tested: 8 gives best throughput without hitting rate limits.