Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions env.example
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ LLM_PROVIDER=ark
# VLM 图片理解 API 超时时间(秒,默认 120 秒)
LLM_VLM_TIMEOUT=120

# --- 默认系统提示词 ---
# 用于控制 LLM 回答风格(可被租户配置覆盖)
# 默认值抑制 <think> 标签输出(Seed 1.6 模型默认返回)
# LLM_DEFAULT_SYSTEM_PROMPT="You are a helpful assistant. Provide direct answers without showing your reasoning process."

# --- LLM 速率限制配置 ---
# 根据硅基流动 L0 等级(RPM=1000, TPM=50000)的 80% 保守设置
# 用于避免触发 429 错误(TPM limit reached)
Expand Down
4 changes: 4 additions & 0 deletions src/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@ class LLMConfig(BaseSettings):
model: str = Field(default="seed-1-6-250615", description="LLM Model Name")
vlm_timeout: int = Field(default=120, description="VLM Image Understanding Timeout (seconds)")
timeout: int = Field(default=60, description="General LLM Timeout (seconds)")
default_system_prompt: str = Field(
default="You are a helpful assistant. Provide direct answers without showing your reasoning process.",
description="Default system prompt for LLM calls"
)

# Rate limiting
requests_per_minute: int = Field(default=800, description="Maximum requests per minute")
Expand Down
5 changes: 3 additions & 2 deletions src/multi_tenant.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,11 @@ class MultiTenantRAGManager:
def __init__(
self,
max_instances: int = 50, # 最多缓存 50 个租户实例
default_system_prompt: str = "You are a helpful assistant. Provide direct answers without showing your reasoning process.",
default_system_prompt: str = None,
):
self.max_instances = max_instances
self.default_system_prompt = default_system_prompt
# Use config value if not explicitly provided
self.default_system_prompt = default_system_prompt or config.llm.default_system_prompt

# 租户实例缓存:tenant_id -> LightRAG
self._instances: Dict[str, LightRAG] = {}
Expand Down
3 changes: 2 additions & 1 deletion src/rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@
load_dotenv()

# Seed 1.6 model returns <think> tags by default, breaking API responses
DEFAULT_SYSTEM_PROMPT = "You are a helpful assistant. Provide direct answers without showing your reasoning process."
# Now configurable via LLM_DEFAULT_SYSTEM_PROMPT environment variable
DEFAULT_SYSTEM_PROMPT = config.llm.default_system_prompt

# EC2 t3.small has 2 vCPUs. 4x oversubscription for I/O-bound LLM API calls.
# Empirically tested: 8 gives best throughput without hitting rate limits.
Expand Down