Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions backend/alembic/versions/increase_api_key_length.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
"""Increase api_key_encrypted column length to support Minimax API keys.

Revision ID: increase_api_key_length
Revises: add_notification_agent_id
Create Date: 2026-03-22
"""
from typing import Sequence, Union

from alembic import op
import sqlalchemy as sa


revision: str = 'increase_api_key_length'
down_revision: Union[str, None] = 'add_notification_agent_id'
branch_labels: Union[str, Sequence[str], None] = None
depends_on: Union[str, Sequence[str], None] = None


def upgrade() -> None:
# Increase api_key_encrypted column length from 500 to 1024
# Minimax API keys are very long and exceed the previous 500 char limit
op.execute("""
ALTER TABLE llm_models
ALTER COLUMN api_key_encrypted TYPE VARCHAR(1024)
""")


def downgrade() -> None:
# Revert to 500 chars (may fail if data exceeds 500 chars)
op.execute("""
ALTER TABLE llm_models
ALTER COLUMN api_key_encrypted TYPE VARCHAR(500)
""")
97 changes: 13 additions & 84 deletions backend/app/api/feishu.py
Original file line number Diff line number Diff line change
Expand Up @@ -1022,90 +1022,19 @@ async def _download_post_images(agent_id, config, message_id, image_keys):

async def _call_agent_llm(db: AsyncSession, agent_id: uuid.UUID, user_text: str, history: list[dict] | None = None, user_id=None, on_chunk=None, on_thinking=None) -> str:
"""Call the agent's configured LLM model with conversation history.

Reuses the same call_llm function as the WebSocket chat endpoint so that
all providers (OpenRouter, Qwen, etc.) work identically on both channels.
"""
from app.models.agent import Agent
from app.models.llm import LLMModel
from app.api.websocket import call_llm

# Load agent and model
agent_result = await db.execute(select(Agent).where(Agent.id == agent_id))
agent = agent_result.scalar_one_or_none()
if not agent:
return "⚠️ 数字员工未找到"

if is_agent_expired(agent):
return "This Agent has expired and is off duty. Please contact your admin to extend its service."

# Load primary model
model = None
if agent.primary_model_id:
model_result = await db.execute(select(LLMModel).where(LLMModel.id == agent.primary_model_id))
model = model_result.scalar_one_or_none()

# Load fallback model
fallback_model = None
if agent.fallback_model_id:
fb_result = await db.execute(select(LLMModel).where(LLMModel.id == agent.fallback_model_id))
fallback_model = fb_result.scalar_one_or_none()

# Config-level fallback: primary missing -> use fallback
if not model and fallback_model:
model = fallback_model
fallback_model = None
logger.warning(f"[Channel] Primary model unavailable, using fallback: {model.model}")

if not model:
return f"⚠️ {agent.name} 未配置 LLM 模型,请在管理后台设置。"

# Build conversation messages (without system prompt — call_llm adds it)
messages: list[dict] = []
if history:
messages.extend(history[-10:])
messages.append({"role": "user", "content": user_text})

# Use actual user_id so the system prompt knows who it's chatting with
effective_user_id = user_id or agent_id

try:
reply = await call_llm(
model,
messages,
agent.name,
agent.role_description or "",
agent_id=agent_id,
user_id=effective_user_id,
supports_vision=getattr(model, 'supports_vision', False),
on_chunk=on_chunk,
on_thinking=on_thinking,
)
return reply
except Exception as e:
import traceback
traceback.print_exc()
error_msg = str(e) or repr(e)
logger.error(f"[LLM] Primary model error: {error_msg}")
# Runtime fallback: primary model failed -> retry with fallback model
if fallback_model:
logger.info(f"[LLM] Retrying with fallback model: {fallback_model.model}")
try:
reply = await call_llm(
fallback_model,
messages,
agent.name,
agent.role_description or "",
agent_id=agent_id,
user_id=effective_user_id,
supports_vision=getattr(fallback_model, 'supports_vision', False),
on_chunk=on_chunk,
on_thinking=on_thinking,
)
return reply
except Exception as e2:
traceback.print_exc()
return f"⚠️ 调用模型出错: Primary: {str(e)[:80]} | Fallback: {str(e2)[:80]}"
return f"⚠️ 调用模型出错: {error_msg[:150]}"
DEPRECATED: Use app.services.llm_caller.call_agent_llm instead.
This function is kept for backward compatibility with existing imports.
"""
from app.services.llm import call_agent_llm
return await call_agent_llm(
db=db,
agent_id=agent_id,
user_text=user_text,
history=history,
user_id=user_id,
on_chunk=on_chunk,
on_thinking=on_thinking,
)


2 changes: 1 addition & 1 deletion backend/app/api/gateway.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,7 +343,7 @@ async def _send_to_agent_background(
"""
logger.info(f"[Gateway] _send_to_agent_background started: {source_agent_name} -> {target_agent_name}")
try:
from app.api.websocket import call_llm
from app.services.llm import call_llm
from app.services.agent_context import build_agent_context
from app.models.llm import LLMModel
from app.models.audit import ChatMessage
Expand Down
14 changes: 10 additions & 4 deletions backend/app/api/tenants.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,8 +209,11 @@ async def get_tenant(
"""Get tenant details. Platform admins can view any; org_admins only their own."""
if current_user.role not in ("platform_admin", "org_admin"):
raise HTTPException(status_code=403, detail="Admin access required")
if current_user.role == "org_admin" and str(current_user.tenant_id) != str(tenant_id):
raise HTTPException(status_code=403, detail="Access denied")
if current_user.role == "org_admin":
if not current_user.tenant_id:
raise HTTPException(status_code=403, detail="Organization admin must belong to a company")
if current_user.tenant_id != tenant_id:
raise HTTPException(status_code=403, detail="Access denied")
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
tenant = result.scalar_one_or_none()
if not tenant:
Expand All @@ -226,8 +229,11 @@ async def update_tenant(
db: AsyncSession = Depends(get_db),
):
"""Update tenant settings. Platform admins can update any; org_admins only their own."""
if current_user.role == "org_admin" and str(current_user.tenant_id) != str(tenant_id):
raise HTTPException(status_code=403, detail="Can only update your own company")
if current_user.role == "org_admin":
if not current_user.tenant_id:
raise HTTPException(status_code=403, detail="Organization admin must belong to a company")
if current_user.tenant_id != tenant_id:
raise HTTPException(status_code=403, detail="Can only update your own company")
result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
tenant = result.scalar_one_or_none()
if not tenant:
Expand Down
61 changes: 24 additions & 37 deletions backend/app/api/websocket.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from app.models.audit import ChatMessage
from app.models.llm import LLMModel
from app.models.user import User
from app.services.llm import call_llm, call_llm_with_failover

router = APIRouter(tags=["websocket"])

Expand Down Expand Up @@ -725,19 +726,27 @@ async def thinking_to_ws(text: str):

import asyncio as _aio

# Run call_llm as a cancellable task
llm_task = _aio.create_task(call_llm(
llm_model,
conversation[-ctx_size:],
agent_name,
role_description,
agent_id=agent_id,
user_id=user_id,
on_chunk=stream_to_ws,
on_tool_call=tool_call_to_ws,
on_thinking=thinking_to_ws,
supports_vision=getattr(llm_model, 'supports_vision', False),
))
# Run call_llm_with_failover as a cancellable task
async def _call_with_failover():
async def _on_failover(reason: str):
await websocket.send_json({"type": "info", "content": f"Primary model error, {reason}"})

return await call_llm_with_failover(
primary_model=llm_model,
fallback_model=fallback_llm_model,
messages=conversation[-ctx_size:],
agent_name=agent_name,
role_description=role_description,
agent_id=agent_id,
user_id=user_id,
on_chunk=stream_to_ws,
on_tool_call=tool_call_to_ws,
on_thinking=thinking_to_ws,
supports_vision=getattr(llm_model, 'supports_vision', False),
on_failover=_on_failover,
)

llm_task = _aio.create_task(_call_with_failover())

# Listen for abort while LLM is running
aborted = False
Expand Down Expand Up @@ -803,30 +812,7 @@ async def thinking_to_ws(text: str):
logger.error(f"[WS] LLM error: {e}")
import traceback
traceback.print_exc()
# Runtime fallback: primary model failed -> retry with fallback model
if fallback_llm_model:
logger.info(f"[WS] Primary model failed, retrying with fallback: {fallback_llm_model.model}")
try:
await websocket.send_json({"type": "info", "content": f"Primary model error, switching to fallback model ({fallback_llm_model.model})..."})
assistant_response = await call_llm(
fallback_llm_model,
conversation[-ctx_size:],
agent_name,
role_description,
agent_id=agent_id,
user_id=user_id,
on_chunk=stream_to_ws,
on_tool_call=tool_call_to_ws,
on_thinking=thinking_to_ws,
supports_vision=getattr(fallback_llm_model, 'supports_vision', False),
)
logger.info(f"[WS] Fallback LLM response: {assistant_response[:80]}")
except Exception as e2:
logger.error(f"[WS] Fallback LLM also failed: {e2}")
traceback.print_exc()
assistant_response = f"[LLM call error] Primary: {str(e)[:100]} | Fallback: {str(e2)[:100]}"
else:
assistant_response = f"[LLM call error] {str(e)[:200]}"
assistant_response = f"[LLM call error] {str(e)[:200]}"
else:
assistant_response = f"⚠️ {agent_name} has no LLM model configured. Please select a model in the agent's Settings tab."

Expand Down Expand Up @@ -893,3 +879,4 @@ async def thinking_to_ws(text: str):
await websocket.close(code=1011)
except Exception:
pass

2 changes: 1 addition & 1 deletion backend/app/models/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class LLMModel(Base):
tenant_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("tenants.id"), nullable=True, index=True)
provider: Mapped[str] = mapped_column(String(50), nullable=False) # anthropic, openai, deepseek, etc.
model: Mapped[str] = mapped_column(String(100), nullable=False) # claude-opus-4-6, gpt-4o, etc.
api_key_encrypted: Mapped[str] = mapped_column(String(500), nullable=False)
api_key_encrypted: Mapped[str] = mapped_column(String(1024), nullable=False)
base_url: Mapped[str | None] = mapped_column(String(500))
label: Mapped[str] = mapped_column(String(200), nullable=False) # Display name
max_tokens_per_day: Mapped[int | None] = mapped_column(Integer)
Expand Down
55 changes: 55 additions & 0 deletions backend/app/services/llm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
"""LLM service module - unified LLM calling interface.

This module provides:
- call_llm: Basic LLM call with tool support
- call_llm_with_failover: LLM call with automatic failover
- call_agent_llm: Agent chat LLM call
- call_agent_llm_with_tools: Agent LLM call with tools for background tasks

Example:
from app.services.llm import call_llm, call_llm_with_failover

# Basic call
reply = await call_llm(model, messages, agent_name, role_description)

# With failover
reply = await call_llm_with_failover(
primary_model=primary,
fallback_model=fallback,
messages=messages,
...
)
"""

from .caller import (
call_llm,
call_llm_with_failover,
call_agent_llm,
call_agent_llm_with_tools,
FailoverGuard,
is_retryable_error,
)
from .client import LLMClient, LLMResponse, LLMError, LLMMessage
from .failover import classify_error, FailoverErrorType
from .utils import create_llm_client, get_max_tokens

__all__ = [
# Core caller functions
"call_llm",
"call_llm_with_failover",
"call_agent_llm",
"call_agent_llm_with_tools",
# Failover utilities
"FailoverGuard",
"is_retryable_error",
"classify_error",
"FailoverErrorType",
# Client classes
"LLMClient",
"LLMResponse",
"LLMError",
"LLMMessage",
# Utilities
"create_llm_client",
"get_max_tokens",
]
Loading