dataelement · yaojin3616 · Mar 22, 2026 · Mar 22, 2026 · Mar 22, 2026 · Mar 23, 2026
diff --git a/backend/alembic/versions/increase_api_key_length.py b/backend/alembic/versions/increase_api_key_length.py
@@ -0,0 +1,33 @@
+"""Increase api_key_encrypted column length to support Minimax API keys.
+
+Revision ID: increase_api_key_length
+Revises: add_notification_agent_id
+Create Date: 2026-03-22
+"""
+from typing import Sequence, Union
+
+from alembic import op
+import sqlalchemy as sa
+
+
+revision: str = 'increase_api_key_length'
+down_revision: Union[str, None] = 'add_notification_agent_id'
+branch_labels: Union[str, Sequence[str], None] = None
+depends_on: Union[str, Sequence[str], None] = None
+
+
+def upgrade() -> None:
+    # Increase api_key_encrypted column length from 500 to 1024
+    # Minimax API keys are very long and exceed the previous 500 char limit
+    op.execute("""
+        ALTER TABLE llm_models
+        ALTER COLUMN api_key_encrypted TYPE VARCHAR(1024)
+    """)
+
+
+def downgrade() -> None:
+    # Revert to 500 chars (may fail if data exceeds 500 chars)
+    op.execute("""
+        ALTER TABLE llm_models
+        ALTER COLUMN api_key_encrypted TYPE VARCHAR(500)
+    """)
diff --git a/backend/app/api/feishu.py b/backend/app/api/feishu.py
@@ -1022,90 +1022,19 @@ async def _download_post_images(agent_id, config, message_id, image_keys):
 
 async def _call_agent_llm(db: AsyncSession, agent_id: uuid.UUID, user_text: str, history: list[dict] | None = None, user_id=None, on_chunk=None, on_thinking=None) -> str:
     """Call the agent's configured LLM model with conversation history.
-
-    Reuses the same call_llm function as the WebSocket chat endpoint so that
-    all providers (OpenRouter, Qwen, etc.) work identically on both channels.
-    """
-    from app.models.agent import Agent
-    from app.models.llm import LLMModel
-    from app.api.websocket import call_llm
-
-    # Load agent and model
-    agent_result = await db.execute(select(Agent).where(Agent.id == agent_id))
-    agent = agent_result.scalar_one_or_none()
-    if not agent:
-        return "⚠️ 数字员工未找到"
-
-    if is_agent_expired(agent):
-        return "This Agent has expired and is off duty. Please contact your admin to extend its service."
-
-    # Load primary model
-    model = None
-    if agent.primary_model_id:
-        model_result = await db.execute(select(LLMModel).where(LLMModel.id == agent.primary_model_id))
-        model = model_result.scalar_one_or_none()
-
-    # Load fallback model
-    fallback_model = None
-    if agent.fallback_model_id:
-        fb_result = await db.execute(select(LLMModel).where(LLMModel.id == agent.fallback_model_id))
-        fallback_model = fb_result.scalar_one_or_none()
-
-    # Config-level fallback: primary missing -> use fallback
-    if not model and fallback_model:
-        model = fallback_model
-        fallback_model = None
-        logger.warning(f"[Channel] Primary model unavailable, using fallback: {model.model}")
-
-    if not model:
-        return f"⚠️ {agent.name} 未配置 LLM 模型，请在管理后台设置。"
-
-    # Build conversation messages (without system prompt — call_llm adds it)
-    messages: list[dict] = []
-    if history:
-        messages.extend(history[-10:])
-    messages.append({"role": "user", "content": user_text})
-
-    # Use actual user_id so the system prompt knows who it's chatting with
-    effective_user_id = user_id or agent_id
 
-    try:
-        reply = await call_llm(
-            model,
-            messages,
-            agent.name,
-            agent.role_description or "",
-            agent_id=agent_id,
-            user_id=effective_user_id,
-            supports_vision=getattr(model, 'supports_vision', False),
-            on_chunk=on_chunk,
-            on_thinking=on_thinking,
-        )
-        return reply
-    except Exception as e:
-        import traceback
-        traceback.print_exc()
-        error_msg = str(e) or repr(e)
-        logger.error(f"[LLM] Primary model error: {error_msg}")
-        # Runtime fallback: primary model failed -> retry with fallback model
-        if fallback_model:
-            logger.info(f"[LLM] Retrying with fallback model: {fallback_model.model}")
-            try:
-                reply = await call_llm(
-                    fallback_model,
-                    messages,
-                    agent.name,
-                    agent.role_description or "",
-                    agent_id=agent_id,
-                    user_id=effective_user_id,
-                    supports_vision=getattr(fallback_model, 'supports_vision', False),
-                    on_chunk=on_chunk,
-                    on_thinking=on_thinking,
-                )
-                return reply
-            except Exception as e2:
-                traceback.print_exc()
-                return f"⚠️ 调用模型出错: Primary: {str(e)[:80]} | Fallback: {str(e2)[:80]}"
-        return f"⚠️ 调用模型出错: {error_msg[:150]}"
+    DEPRECATED: Use app.services.llm_caller.call_agent_llm instead.
+    This function is kept for backward compatibility with existing imports.
+    """
+    from app.services.llm import call_agent_llm
+    return await call_agent_llm(
+        db=db,
+        agent_id=agent_id,
+        user_text=user_text,
+        history=history,
+        user_id=user_id,
+        on_chunk=on_chunk,
+        on_thinking=on_thinking,
+    )
 
 
diff --git a/backend/app/api/gateway.py b/backend/app/api/gateway.py
@@ -343,7 +343,7 @@ async def _send_to_agent_background(
     """
     logger.info(f"[Gateway] _send_to_agent_background started: {source_agent_name} -> {target_agent_name}")
     try:
-        from app.api.websocket import call_llm
+        from app.services.llm import call_llm
         from app.services.agent_context import build_agent_context
         from app.models.llm import LLMModel
         from app.models.audit import ChatMessage

diff --git a/backend/app/api/tenants.py b/backend/app/api/tenants.py
@@ -209,8 +209,11 @@ async def get_tenant(
     """Get tenant details. Platform admins can view any; org_admins only their own."""
     if current_user.role not in ("platform_admin", "org_admin"):
         raise HTTPException(status_code=403, detail="Admin access required")
-    if current_user.role == "org_admin" and str(current_user.tenant_id) != str(tenant_id):
-        raise HTTPException(status_code=403, detail="Access denied")
+    if current_user.role == "org_admin":
+        if not current_user.tenant_id:
+            raise HTTPException(status_code=403, detail="Organization admin must belong to a company")
+        if current_user.tenant_id != tenant_id:
+            raise HTTPException(status_code=403, detail="Access denied")
     result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
     tenant = result.scalar_one_or_none()
     if not tenant:
@@ -226,8 +229,11 @@ async def update_tenant(
     db: AsyncSession = Depends(get_db),
 ):
     """Update tenant settings. Platform admins can update any; org_admins only their own."""
-    if current_user.role == "org_admin" and str(current_user.tenant_id) != str(tenant_id):
-        raise HTTPException(status_code=403, detail="Can only update your own company")
+    if current_user.role == "org_admin":
+        if not current_user.tenant_id:
+            raise HTTPException(status_code=403, detail="Organization admin must belong to a company")
+        if current_user.tenant_id != tenant_id:
+            raise HTTPException(status_code=403, detail="Can only update your own company")
     result = await db.execute(select(Tenant).where(Tenant.id == tenant_id))
     tenant = result.scalar_one_or_none()
     if not tenant:

diff --git a/backend/app/api/websocket.py b/backend/app/api/websocket.py
@@ -16,6 +16,7 @@
 from app.models.audit import ChatMessage
 from app.models.llm import LLMModel
 from app.models.user import User
+from app.services.llm import call_llm, call_llm_with_failover
 
 router = APIRouter(tags=["websocket"])
 
@@ -725,19 +726,27 @@ async def thinking_to_ws(text: str):
 
                     import asyncio as _aio
 
-                    # Run call_llm as a cancellable task
-                    llm_task = _aio.create_task(call_llm(
-                        llm_model,
-                        conversation[-ctx_size:],
-                        agent_name,
-                        role_description,
-                        agent_id=agent_id,
-                        user_id=user_id,
-                        on_chunk=stream_to_ws,
-                        on_tool_call=tool_call_to_ws,
-                        on_thinking=thinking_to_ws,
-                        supports_vision=getattr(llm_model, 'supports_vision', False),
-                    ))
+                    # Run call_llm_with_failover as a cancellable task
+                    async def _call_with_failover():
+                        async def _on_failover(reason: str):
+                            await websocket.send_json({"type": "info", "content": f"Primary model error, {reason}"})
+
+                        return await call_llm_with_failover(
+                            primary_model=llm_model,
+                            fallback_model=fallback_llm_model,
+                            messages=conversation[-ctx_size:],
+                            agent_name=agent_name,
+                            role_description=role_description,
+                            agent_id=agent_id,
+                            user_id=user_id,
+                            on_chunk=stream_to_ws,
+                            on_tool_call=tool_call_to_ws,
+                            on_thinking=thinking_to_ws,
+                            supports_vision=getattr(llm_model, 'supports_vision', False),
+                            on_failover=_on_failover,
+                        )
+
+                    llm_task = _aio.create_task(_call_with_failover())
 
                     # Listen for abort while LLM is running
                     aborted = False
@@ -803,30 +812,7 @@ async def thinking_to_ws(text: str):
                     logger.error(f"[WS] LLM error: {e}")
                     import traceback
                     traceback.print_exc()
-                    # Runtime fallback: primary model failed -> retry with fallback model
-                    if fallback_llm_model:
-                        logger.info(f"[WS] Primary model failed, retrying with fallback: {fallback_llm_model.model}")
-                        try:
-                            await websocket.send_json({"type": "info", "content": f"Primary model error, switching to fallback model ({fallback_llm_model.model})..."})
-                            assistant_response = await call_llm(
-                                fallback_llm_model,
-                                conversation[-ctx_size:],
-                                agent_name,
-                                role_description,
-                                agent_id=agent_id,
-                                user_id=user_id,
-                                on_chunk=stream_to_ws,
-                                on_tool_call=tool_call_to_ws,
-                                on_thinking=thinking_to_ws,
-                                supports_vision=getattr(fallback_llm_model, 'supports_vision', False),
-                            )
-                            logger.info(f"[WS] Fallback LLM response: {assistant_response[:80]}")
-                        except Exception as e2:
-                            logger.error(f"[WS] Fallback LLM also failed: {e2}")
-                            traceback.print_exc()
-                            assistant_response = f"[LLM call error] Primary: {str(e)[:100]} | Fallback: {str(e2)[:100]}"
-                    else:
-                        assistant_response = f"[LLM call error] {str(e)[:200]}"
+                    assistant_response = f"[LLM call error] {str(e)[:200]}"
             else:
                 assistant_response = f"⚠️ {agent_name} has no LLM model configured. Please select a model in the agent's Settings tab."
 
@@ -893,3 +879,4 @@ async def thinking_to_ws(text: str):
             await websocket.close(code=1011)
         except Exception:
             pass
+
diff --git a/backend/app/models/llm.py b/backend/app/models/llm.py
@@ -19,7 +19,7 @@ class LLMModel(Base):
     tenant_id: Mapped[uuid.UUID | None] = mapped_column(UUID(as_uuid=True), ForeignKey("tenants.id"), nullable=True, index=True)
     provider: Mapped[str] = mapped_column(String(50), nullable=False)  # anthropic, openai, deepseek, etc.
     model: Mapped[str] = mapped_column(String(100), nullable=False)  # claude-opus-4-6, gpt-4o, etc.
-    api_key_encrypted: Mapped[str] = mapped_column(String(500), nullable=False)
+    api_key_encrypted: Mapped[str] = mapped_column(String(1024), nullable=False)
     base_url: Mapped[str | None] = mapped_column(String(500))
     label: Mapped[str] = mapped_column(String(200), nullable=False)  # Display name
     max_tokens_per_day: Mapped[int | None] = mapped_column(Integer)

diff --git a/backend/app/services/llm/__init__.py b/backend/app/services/llm/__init__.py
@@ -0,0 +1,55 @@
+"""LLM service module - unified LLM calling interface.
+
+This module provides:
+- call_llm: Basic LLM call with tool support
+- call_llm_with_failover: LLM call with automatic failover
+- call_agent_llm: Agent chat LLM call
+- call_agent_llm_with_tools: Agent LLM call with tools for background tasks
+
+Example:
+    from app.services.llm import call_llm, call_llm_with_failover
+
+    # Basic call
+    reply = await call_llm(model, messages, agent_name, role_description)
+
+    # With failover
+    reply = await call_llm_with_failover(
+        primary_model=primary,
+        fallback_model=fallback,
+        messages=messages,
+        ...
+    )
+"""
+
+from .caller import (
+    call_llm,
+    call_llm_with_failover,
+    call_agent_llm,
+    call_agent_llm_with_tools,
+    FailoverGuard,
+    is_retryable_error,
+)
+from .client import LLMClient, LLMResponse, LLMError, LLMMessage
+from .failover import classify_error, FailoverErrorType
+from .utils import create_llm_client, get_max_tokens
+
+__all__ = [
+    # Core caller functions
+    "call_llm",
+    "call_llm_with_failover",
+    "call_agent_llm",
+    "call_agent_llm_with_tools",
+    # Failover utilities
+    "FailoverGuard",
+    "is_retryable_error",
+    "classify_error",
+    "FailoverErrorType",
+    # Client classes
+    "LLMClient",
+    "LLMResponse",
+    "LLMError",
+    "LLMMessage",
+    # Utilities
+    "create_llm_client",
+    "get_max_tokens",
+]