diff --git a/docs/strict_grounding_mode.md b/docs/strict_grounding_mode.md new file mode 100644 index 0000000..cd88dd1 --- /dev/null +++ b/docs/strict_grounding_mode.md @@ -0,0 +1,163 @@ +# 严格 Grounding 模式 + +## 问题背景 + +默认情况下,当知识库中的 chunks 不足以回答用户问题时,AI 可能会"强行编造"一个答案,即使这个答案没有依据。这是因为 LLM 有强大的生成能力,即使在没有足够上下文的情况下也会尝试给出"看起来合理"的回答。 + +## 解决方案 + +通过启用 **严格 Grounding 模式**,系统会使用增强版的 prompt,明确要求 AI 在信息不足时: + +1. **评估上下文充分性**:在生成答案前,先判断上下文是否足够回答问题 +2. **明确拒绝回答**:如果信息不足,使用标准化的拒绝回答格式 +3. **禁止编造内容**:严格禁止使用 AI 的通用知识来填补知识库的空白 + +## 使用方法 + +### 方式 1:环境变量(全局配置) + +在 `.env` 文件中添加: + +```bash +# 启用严格 Grounding 模式 +LIGHTRAG_STRICT_GROUNDING=true +``` + +### 方式 2:租户配置(租户级覆盖) + +通过 API 更新租户配置: + +```bash +curl -X PUT "http://localhost:8000/tenants/your_tenant/config" \ + -H "Content-Type: application/json" \ + -d '{ + "custom_prompts": { + "strict_grounding": "true" + } + }' +``` + +### 方式 3:完全自定义 Prompt + +如果需要更精细的控制,可以完全自定义 RAG 响应 prompt: + +```bash +# 环境变量方式 +LIGHTRAG_RAG_RESPONSE_PROMPT="你的自定义 prompt..." +LIGHTRAG_NAIVE_RAG_RESPONSE_PROMPT="你的自定义 naive 模式 prompt..." +``` + +或通过租户配置 API: + +```bash +curl -X PUT "http://localhost:8000/tenants/your_tenant/config" \ + -H "Content-Type: application/json" \ + -d '{ + "custom_prompts": { + "rag_response": "你的自定义 KG 模式 prompt...", + "naive_rag_response": "你的自定义 naive 模式 prompt..." + } + }' +``` + +## 配置优先级 + +从高到低: + +1. **租户配置的 `rag_response`/`naive_rag_response`**(完全自定义) +2. **环境变量 `LIGHTRAG_RAG_RESPONSE_PROMPT`/`LIGHTRAG_NAIVE_RAG_RESPONSE_PROMPT`** +3. **`strict_grounding=true`**(使用增强版默认 prompt) +4. **LightRAG 原生 prompt**(默认行为) + +## 增强版 Prompt 的关键指令 + +启用严格 Grounding 模式后,prompt 会包含以下关键指令: + +```markdown +---Critical Grounding Rules (MUST FOLLOW)--- + +⚠️ **ABSOLUTE REQUIREMENT**: You must ONLY use information explicitly stated in the **Context**. + +**Before generating any answer, you MUST evaluate:** +1. Does the Context contain information that DIRECTLY answers the user's question? +2. Is the information in the Context SUFFICIENT and RELEVANT to provide a complete answer? + +**If the answer is NO to either question, you MUST respond with:** +> 抱歉,根据当前知识库中的内容,我无法找到与您问题直接相关的信息。请尝试: +> - 重新表述您的问题 +> - 提供更多上下文信息 +> - 确认相关文档是否已上传到知识库 + +**DO NOT:** +- ❌ Make up or fabricate information not in the Context +- ❌ Use your general knowledge to fill gaps +- ❌ Provide speculative or assumed answers +- ❌ Say "based on my knowledge" or similar phrases +- ❌ Combine partial information to create misleading answers + +**DO:** +- ✅ Explicitly state when information is not available +- ✅ Only cite facts that appear in the Context +- ✅ Be honest about the limitations of the provided information +``` + +## 效果对比 + +### 未启用严格 Grounding 模式 + +用户问题:`公司的年度收入是多少?` + +(假设知识库中没有收入数据) + +AI 可能回答: +> 根据相关文档,该公司是一家成熟的企业...虽然具体年度收入数据未在文档中明确提及,但从其业务规模来看,估计年收入应该在... + +### 启用严格 Grounding 模式 + +同样的问题,AI 会回答: +> 抱歉,根据当前知识库中的内容,我无法找到与您问题直接相关的信息。请尝试: +> - 重新表述您的问题 +> - 提供更多上下文信息 +> - 确认相关文档是否已上传到知识库 + +## 刷新配置 + +修改配置后,需要刷新租户实例缓存: + +```bash +# 刷新特定租户 +curl -X POST "http://localhost:8000/tenants/your_tenant/config/refresh" + +# 或重启服务(全局生效) +docker compose restart rag-api +``` + +## Prompt 与查询模式的对应关系 + +LightRAG 有 5 种查询模式,但只使用 2 种响应 Prompt: + +| 查询模式 | 使用的 Prompt | 说明 | +|---------|--------------|------| +| `naive` | `naive_rag_response` | 纯向量搜索,不使用知识图谱 | +| `local` | `rag_response` | 局部知识图谱搜索 | +| `global` | `rag_response` | 全局知识图谱搜索 | +| `hybrid` | `rag_response` | 混合模式(local + global) | +| `mix` | `rag_response` | 全功能混合(KG + 向量) | + +因此,自定义 `rag_response` 会影响除 `naive` 以外的所有模式,而 `naive_rag_response` 仅影响 `naive` 模式。 + +## 相关配置 + +| 配置项 | 类型 | 描述 | +|-------|------|------| +| `LIGHTRAG_STRICT_GROUNDING` | 环境变量 | 全局启用严格 Grounding 模式 | +| `strict_grounding` | 租户配置 | 租户级启用严格 Grounding 模式 | +| `rag_response` | 租户配置 | 自定义 KG 模式响应 prompt(影响 local/global/hybrid/mix) | +| `naive_rag_response` | 租户配置 | 自定义 naive 模式响应 prompt(仅影响 naive) | + +## 注意事项 + +1. **语言适配**:当前默认拒绝回答消息是中文,如需英文或其他语言,请使用完全自定义 prompt +2. **性能影响**:严格 Grounding 模式不会影响性能,仅修改 prompt 内容 +3. **兼容性**:此功能与所有查询模式(naive、local、global、hybrid、mix)兼容 +4. **Prompt 复用**:LightRAG 的设计中,`rag_response` 被 local/global/hybrid/mix 四种模式共享 diff --git a/env.example b/env.example index adc5bf5..db7a4f2 100644 --- a/env.example +++ b/env.example @@ -254,6 +254,21 @@ RAG_CONTEXT_MODE=page # 最大上下文 tokens RAG_MAX_CONTEXT_TOKENS=3000 +# ====== LightRAG 响应增强配置 ====== +# 控制 LLM 回答问题时的行为,防止在信息不足时编造答案 + +# --- 严格 Grounding 模式 --- +# 启用后,当知识库中没有足够信息回答问题时,AI 会明确拒绝回答 +# 而不是强行编造答案 +# 可选值: true, false +# 默认:注释掉或设置为 false(使用 LightRAG 原生行为) +# LIGHTRAG_STRICT_GROUNDING=true # 取消注释以启用严格 Grounding + +# --- 自定义 RAG 响应 Prompt(可选)--- +# 完全自定义 RAG 响应 prompt,覆盖默认和增强版 +# LIGHTRAG_RAG_RESPONSE_PROMPT="你的自定义 prompt..." +# LIGHTRAG_NAIVE_RAG_RESPONSE_PROMPT="你的自定义 naive 模式 prompt..." + # ====== 存储后端配置 ====== # LightRAG 存储层配置(必须与 docker-compose 中的服务匹配) diff --git a/src/prompt_manager.py b/src/prompt_manager.py index c0b2d58..34b918a 100644 --- a/src/prompt_manager.py +++ b/src/prompt_manager.py @@ -4,22 +4,189 @@ Supports injecting custom prompts via: 1. Environment variables (global configuration) 2. Tenant configuration (tenant-specific override) + +Includes enhanced RAG response prompts that: +- Strictly refuse to answer when context is insufficient +- Require explicit "no information" responses +- Prevent AI from fabricating answers """ -import os import json -from typing import Dict, Any, Optional +import os +from typing import Any + from src.logger import logger +# Enhanced RAG response prompt with strict grounding requirements +ENHANCED_RAG_RESPONSE = """---Role--- + +You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided **Context**. + +---Goal--- + +Generate a comprehensive, well-structured answer to the user query. +The answer must integrate relevant facts from the Knowledge Graph and Document Chunks found in the **Context**. +Consider the conversation history if provided to maintain conversational flow and avoid repeating information. + +---Critical Grounding Rules (MUST FOLLOW)--- + +⚠️ **ABSOLUTE REQUIREMENT**: You must ONLY use information explicitly stated in the **Context**. + +**Before generating any answer, you MUST evaluate:** +1. Does the Context contain information that DIRECTLY answers the user's question? +2. Is the information in the Context SUFFICIENT and RELEVANT to provide a complete answer? + +**If the answer is NO to either question, you MUST respond with:** +{unable_to_answer_message} + +**DO NOT:** +- ❌ Make up or fabricate information not in the Context +- ❌ Use your general knowledge to fill gaps +- ❌ Provide speculative or assumed answers +- ❌ Say "based on my knowledge" or similar phrases +- ❌ Combine partial information to create misleading answers + +**DO:** +- ✅ Explicitly state when information is not available +- ✅ Only cite facts that appear in the Context +- ✅ Be honest about the limitations of the provided information + +---Instructions--- + +1. Step-by-Step Instruction: + - Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need. + - Scrutinize both `Knowledge Graph Data` and `Document Chunks` in the **Context**. Identify and extract all pieces of information that are directly relevant to answering the user query. + - **CRITICAL**: If no relevant information is found, immediately respond with the "unable to answer" message above. Do NOT attempt to generate an answer. + - Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information. + - Track the reference_id of the document chunks which directly support the facts presented in the response. Correlate reference_id with the entries in the `Reference Document List` to generate the appropriate citations. + - Generate a references section at the end of the response. Each reference document must directly support the facts presented in the response. + - Do not generate anything after the reference section. + +2. Content & Grounding: + - Strictly adhere to the provided context from the **Context**; DO NOT invent, assume, or infer any information not explicitly stated. + - If the answer cannot be found in the **Context**, state that you do not have enough information to answer. Do not attempt to guess. + +3. Formatting & Language: + - The response MUST be in the same language as the user query. + - The response MUST utilize Markdown formatting for enhanced clarity and structure (e.g., headings, bold text, bullet points). + - The response should be presented in {response_type}. + +4. References Section Format: + - The References section should be under heading: `### References` + - Reference list entries should adhere to the format: `- [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`). + - The Document Title in the citation must retain its original language. + - Output each citation on an individual line + - Provide maximum of 5 most relevant citations. + - Do not generate footnotes section or any comment, summary, or explanation after the references. + +5. Reference Section Example: +``` +### References + +- [1] Document Title One +- [2] Document Title Two +- [3] Document Title Three +``` + +6. Additional Instructions: {user_prompt} + + +---Context--- + +{context_data} +""" + +# Enhanced Naive RAG response prompt with strict grounding requirements +ENHANCED_NAIVE_RAG_RESPONSE = """---Role--- + +You are an expert AI assistant specializing in synthesizing information from a provided knowledge base. Your primary function is to answer user queries accurately by ONLY using the information within the provided **Context**. + +---Goal--- + +Generate a comprehensive, well-structured answer to the user query. +The answer must integrate relevant facts from the Document Chunks found in the **Context**. +Consider the conversation history if provided to maintain conversational flow and avoid repeating information. + +---Critical Grounding Rules (MUST FOLLOW)--- + +⚠️ **ABSOLUTE REQUIREMENT**: You must ONLY use information explicitly stated in the **Context**. + +**Before generating any answer, you MUST evaluate:** +1. Does the Context contain information that DIRECTLY answers the user's question? +2. Is the information in the Context SUFFICIENT and RELEVANT to provide a complete answer? + +**If the answer is NO to either question, you MUST respond with:** +> 抱歉,根据当前知识库中的内容,我无法找到与您问题直接相关的信息。请尝试: +> - 重新表述您的问题 +> - 提供更多上下文信息 +> - 确认相关文档是否已上传到知识库 + +**DO NOT:** +- ❌ Make up or fabricate information not in the Context +- ❌ Use your general knowledge to fill gaps +- ❌ Provide speculative or assumed answers +- ❌ Say "based on my knowledge" or similar phrases +- ❌ Combine partial information to create misleading answers + +**DO:** +- ✅ Explicitly state when information is not available +- ✅ Only cite facts that appear in the Context +- ✅ Be honest about the limitations of the provided information + +---Instructions--- + +1. Step-by-Step Instruction: + - Carefully determine the user's query intent in the context of the conversation history to fully understand the user's information need. + - Scrutinize `Document Chunks` in the **Context**. Identify and extract all pieces of information that are directly relevant to answering the user query. + - **CRITICAL**: If no relevant information is found, immediately respond with the "unable to answer" message above. Do NOT attempt to generate an answer. + - Weave the extracted facts into a coherent and logical response. Your own knowledge must ONLY be used to formulate fluent sentences and connect ideas, NOT to introduce any external information. + - Track the reference_id of the document chunks which directly support the facts presented in the response. Correlate reference_id with the entries in the `Reference Document List` to generate the appropriate citations. + - Generate a **References** section at the end of the response. Each reference document must directly support the facts presented in the response. + - Do not generate anything after the reference section. + +2. Content & Grounding: + - Strictly adhere to the provided context from the **Context**; DO NOT invent, assume, or infer any information not explicitly stated. + - If the answer cannot be found in the **Context**, state that you do not have enough information to answer. Do not attempt to guess. + +3. Formatting & Language: + - The response MUST be in the same language as the user query. + - The response MUST utilize Markdown formatting for enhanced clarity and structure (e.g., headings, bold text, bullet points). + - The response should be presented in {response_type}. + +4. References Section Format: + - The References section should be under heading: `### References` + - Reference list entries should adhere to the format: `- [n] Document Title`. Do not include a caret (`^`) after opening square bracket (`[`). + - The Document Title in the citation must retain its original language. + - Output each citation on an individual line + - Provide maximum of 5 most relevant citations. + - Do not generate footnotes section or any comment, summary, or explanation after the references. + +5. Reference Section Example: +``` +### References + +- [1] Document Title One +- [2] Document Title Two +- [3] Document Title Three +``` + +6. Additional Instructions: {user_prompt} + + +---Context--- + +{content_data} +""" + def apply_custom_prompts( - tenant_id: Optional[str] = None, - tenant_custom_prompts: Optional[Dict[str, Any]] = None + tenant_id: str | None = None, + tenant_custom_prompts: dict[str, Any] | None = None ) -> None: """ Apply custom prompts to LightRAG global PROMPTS dictionary. - Priority: Tenant Config > Environment Variables > Default + Priority: Tenant Config > Environment Variables > Enhanced Defaults Args: tenant_id: Tenant ID (for logging) @@ -100,14 +267,43 @@ def apply_custom_prompts( except json.JSONDecodeError as e: logger.warning(f"Failed to parse entity_types JSON: {e}") + # Check if strict grounding is enabled (used by both rag_response and naive_rag_response) + use_strict_grounding = _is_strict_grounding_enabled(tenant_custom_prompts) + + # 6. RAG Response Prompt (for knowledge graph queries) + rag_response = _get_prompt_value( + env_key="LIGHTRAG_RAG_RESPONSE_PROMPT", + tenant_key="rag_response", + tenant_config=tenant_custom_prompts + ) + if rag_response: + PROMPTS["rag_response"] = rag_response + applied_prompts.append("rag_response") + elif use_strict_grounding: + PROMPTS["rag_response"] = ENHANCED_RAG_RESPONSE + applied_prompts.append("rag_response(strict)") + + # 7. Naive RAG Response Prompt (for vector-only queries) + naive_rag_response = _get_prompt_value( + env_key="LIGHTRAG_NAIVE_RAG_RESPONSE_PROMPT", + tenant_key="naive_rag_response", + tenant_config=tenant_custom_prompts + ) + if naive_rag_response: + PROMPTS["naive_rag_response"] = naive_rag_response + applied_prompts.append("naive_rag_response") + elif use_strict_grounding: + PROMPTS["naive_rag_response"] = ENHANCED_NAIVE_RAG_RESPONSE + applied_prompts.append("naive_rag_response(strict)") + if applied_prompts: tenant_info = f"[Tenant {tenant_id}]" if tenant_id else "[Global]" logger.info(f"{tenant_info} Applied custom prompts: {', '.join(applied_prompts)}") def get_custom_entity_types( - tenant_custom_prompts: Optional[Dict[str, Any]] = None -) -> Optional[list]: + tenant_custom_prompts: dict[str, Any] | None = None +) -> list | None: """ Get custom entity types from environment or tenant configuration. @@ -138,11 +334,32 @@ def get_custom_entity_types( return None +def _is_strict_grounding_enabled( + tenant_config: dict[str, Any] | None +) -> bool: + """ + Check if strict grounding mode is enabled. + + Args: + tenant_config: Tenant custom prompts dictionary + + Returns: + bool: True if strict grounding is enabled + """ + use_strict = _get_prompt_value( + env_key="LIGHTRAG_STRICT_GROUNDING", + tenant_key="strict_grounding", + tenant_config=tenant_config + ) + # Handle both string and boolean values from tenant config + return use_strict is not None and str(use_strict).lower() in ("true", "1", "yes", "on") + + def _get_prompt_value( env_key: str, tenant_key: str, - tenant_config: Optional[Dict[str, Any]] -) -> Optional[str]: + tenant_config: dict[str, Any] | None +) -> str | None: """ Get prompt value with priority: Tenant Config > Environment Variable. diff --git a/src/tenant_config.py b/src/tenant_config.py index f027f27..23a3f2f 100644 --- a/src/tenant_config.py +++ b/src/tenant_config.py @@ -81,7 +81,10 @@ class TenantConfigModel(BaseModel): "entity_extraction_user_prompt": "你的自定义 user prompt...", "entity_continue_extraction_user_prompt": "你的自定义 continue prompt...", "entity_extraction_examples": ["示例1", "示例2"], - "entity_types": ["product", "feature", "error_code", "configuration"] + "entity_types": ["product", "feature", "error_code", "configuration"], + "strict_grounding": "true", + "rag_response": "你的自定义 RAG 响应 prompt(可选)...", + "naive_rag_response": "你的自定义 Naive RAG 响应 prompt(可选)..." } )