From 6f9cea7e3e71c4f21332cc4f243528dfae86a3cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=8E=8B=E8=89=AF?= <841369634@qq.com> Date: Mon, 27 Apr 2026 17:06:14 +0800 Subject: [PATCH] fix: Fix the `.` dot in the regex not matching `\n` --- .../step_node/ai_chat_step_node/impl/base_chat_node.py | 2 +- .../application_node/impl/base_application_node.py | 5 ++--- .../flow/step_node/intent_node/impl/base_intent_node.py | 2 +- .../impl/base_parameter_extraction_node.py | 2 +- .../step_node/question_node/impl/base_question_node.py | 2 +- apps/application/long_term_memory/__init__.py | 2 +- apps/common/utils/common.py | 6 +++--- apps/knowledge/task/handler.py | 3 +-- .../component/operation-button/ChatOperationButton.vue | 6 +++--- .../component/operation-button/LogOperationButton.vue | 4 ++-- .../ai-chat/component/prologue-content/index.vue | 8 ++++---- ui/src/components/markdown/IframeRender.vue | 2 +- 12 files changed, 21 insertions(+), 23 deletions(-) diff --git a/apps/application/flow/step_node/ai_chat_step_node/impl/base_chat_node.py b/apps/application/flow/step_node/ai_chat_step_node/impl/base_chat_node.py index a87c9123e82..f32eb49e19b 100644 --- a/apps/application/flow/step_node/ai_chat_step_node/impl/base_chat_node.py +++ b/apps/application/flow/step_node/ai_chat_step_node/impl/base_chat_node.py @@ -383,7 +383,7 @@ def get_history_message(history_chat_record, dialogue_number, dialogue_type, run range(start_index if start_index > 0 else 0, len(history_chat_record))], []) for message in history_message: if isinstance(message.content, str): - message.content = re.sub('[\d\D]*?<\/form_rander>', '', message.content) + message.content = re.sub(r'.*?<\/form_rander>', '', message.content, flags=re.DOTALL) return history_message def generate_prompt_question(self, prompt): diff --git a/apps/application/flow/step_node/application_node/impl/base_application_node.py b/apps/application/flow/step_node/application_node/impl/base_application_node.py index 770a820a59d..7622de3a75f 100644 --- a/apps/application/flow/step_node/application_node/impl/base_application_node.py +++ b/apps/application/flow/step_node/application_node/impl/base_application_node.py @@ -132,15 +132,14 @@ def reset_application_node_dict(application_node_dict, runtime_node_id, node_dat application_node = application_node_dict[key] if application_node.get('runtime_node_id') == runtime_node_id: content: str = application_node.get('content') - match = re.search('.*?', content) + match = re.search(r'.*?<\/form_rander>', content, flags=re.DOTALL) if match: form_setting_str = match.group().replace('', '').replace('', '') form_setting = json.loads(form_setting_str) form_setting['is_submit'] = True form_setting['form_data'] = node_data value = f'{json.dumps(form_setting)}' - res = re.sub('.*?', - '${value}', content) + res = re.sub(r'.*?<\/form_rander>', '${value}', content, flags=re.DOTALL) application_node['content'] = res.replace('${value}', value) except Exception as e: maxkb_logger.warning(f'reset_application_node_dict error: {e}', exc_info=True) diff --git a/apps/application/flow/step_node/intent_node/impl/base_intent_node.py b/apps/application/flow/step_node/intent_node/impl/base_intent_node.py index 64822aa9a20..b3f1608acc2 100644 --- a/apps/application/flow/step_node/intent_node/impl/base_intent_node.py +++ b/apps/application/flow/step_node/intent_node/impl/base_intent_node.py @@ -133,7 +133,7 @@ def get_history_message(history_chat_record, dialogue_number): for message in history_message: if isinstance(message.content, str): - message.content = re.sub('[\d\D]*?<\/form_rander>', '', message.content) + message.content = re.sub(r'.*?<\/form_rander>', '', message.content, flags=re.DOTALL) return history_message def build_system_prompt(self) -> str: diff --git a/apps/application/flow/step_node/parameter_extraction_node/impl/base_parameter_extraction_node.py b/apps/application/flow/step_node/parameter_extraction_node/impl/base_parameter_extraction_node.py index ae13f7c6c77..1ffa54ca4c7 100644 --- a/apps/application/flow/step_node/parameter_extraction_node/impl/base_parameter_extraction_node.py +++ b/apps/application/flow/step_node/parameter_extraction_node/impl/base_parameter_extraction_node.py @@ -71,7 +71,7 @@ def json_loads(response, expected_fields): extraction_strategies = [ lambda: json.loads(cleaned), lambda: json.loads(re.search(r'```(?:json)?\s*(\{.*?\})\s*```', cleaned, re.DOTALL).group(1)), - lambda: json.loads(re.search(r'(\{[\s\S]*\})', cleaned).group(1)), + lambda: json.loads(re.search(r'(\{.*\})', cleaned, flags=re.DOTALL).group(1)), ] for strategy in extraction_strategies: try: diff --git a/apps/application/flow/step_node/question_node/impl/base_question_node.py b/apps/application/flow/step_node/question_node/impl/base_question_node.py index 3f622c5f2dc..34000542db3 100644 --- a/apps/application/flow/step_node/question_node/impl/base_question_node.py +++ b/apps/application/flow/step_node/question_node/impl/base_question_node.py @@ -130,7 +130,7 @@ def get_history_message(history_chat_record, dialogue_number): range(start_index if start_index > 0 else 0, len(history_chat_record))], []) for message in history_message: if isinstance(message.content, str): - message.content = re.sub('[\d\D]*?<\/form_rander>', '', message.content) + message.content = re.sub(r'.*?<\/form_rander>', '', message.content, flags=re.DOTALL) return history_message def generate_prompt_question(self, prompt): diff --git a/apps/application/long_term_memory/__init__.py b/apps/application/long_term_memory/__init__.py index e3fa5851e03..004c566a4a1 100644 --- a/apps/application/long_term_memory/__init__.py +++ b/apps/application/long_term_memory/__init__.py @@ -246,7 +246,7 @@ def _run_extract(workspace_id, application_id, chat_user_id, config, history_lim ]): content += chunk.content - content = re.sub(r'.*?', '', content, flags=re.DOTALL).strip() + content = re.sub(r'.*?<\/think>', '', content, flags=re.DOTALL).strip() if long_term_memory: long_term_memory.memory = content diff --git a/apps/common/utils/common.py b/apps/common/utils/common.py index f234ebeaf8c..a0f13364339 100644 --- a/apps/common/utils/common.py +++ b/apps/common/utils/common.py @@ -115,18 +115,18 @@ def markdown_to_plain_text(md: str) -> str: # 移除行内代码 `code` text = re.sub(r'`(.*?)`', r'\1', text) # 移除代码块 ```code``` - text = re.sub(r'```[\s\S]*?```', '', text) + text = re.sub(r'```.*?```', '', text, flags=re.DOTALL) # 移除多余的换行符 text = re.sub(r'\n{2,}', '\n', text) # 使用正则表达式去除所有 HTML 标签 text = re.sub(r'<[^>]+>', '', text) # 先移除特定媒体标签(优先级高于通用HTML标签移除) - text = re.sub(r'<(?:audio|video)(?:\s+[^>]*)?>[\s\S]*?(?:)?', '', text, flags=re.IGNORECASE) + text = re.sub(r'<(?:audio|video)(?:\s+[^>]*)?>.*?(?:)?', '', text, flags=re.DOTALL | re.IGNORECASE) text = re.sub(r']*>', '', text) # 匹配图片标签 # 去除多余的空白字符(包括换行符、制表符等) text = re.sub(r'\s+', ' ', text) # 去除表单渲染 - re.sub(r'[\s\S]*?<\/form_rander>', '', text) + text = re.sub(r'.*?<\/form_rander>', '', text, flags=re.DOTALL) # 去除首尾空格 text = text.strip() return text diff --git a/apps/knowledge/task/handler.py b/apps/knowledge/task/handler.py index f0a8a54853c..d25873edc9a 100644 --- a/apps/knowledge/task/handler.py +++ b/apps/knowledge/task/handler.py @@ -102,8 +102,7 @@ def save_problem(knowledge_id, document_id, paragraph_id, problem): # print(f"paragraph_id: {paragraph_id}") # print(f"problem: {problem}") problem = re.sub(r"^\d+\.\s*", "", problem) - pattern = r"(.*?)" - match = re.search(pattern, problem) + match = re.search(r"(.*?)<\/question>", problem, flags=re.DOTALL) problem = match.group(1) if match else None if problem is None or len(problem) == 0: return diff --git a/ui/src/components/ai-chat/component/operation-button/ChatOperationButton.vue b/ui/src/components/ai-chat/component/operation-button/ChatOperationButton.vue index bddd993d2d3..2a870e77707 100644 --- a/ui/src/components/ai-chat/component/operation-button/ChatOperationButton.vue +++ b/ui/src/components/ai-chat/component/operation-button/ChatOperationButton.vue @@ -262,9 +262,9 @@ function markdownToPlainText(md: string) { // 移除行内代码 `code` .replace(/`(.*?)`/g, '$1') // 移除代码块 ```code``` - .replace(/```[\s\S]*?```/g, '') + .replace(/```.*?```/gs, '') // 移除video标签 - .replace(/