From 9679f6dd2abb121e9afe9c48b951407b8db05ef7 Mon Sep 17 00:00:00 2001 From: Color2333 <1552429809@qq.com> Date: Fri, 20 Mar 2026 11:23:30 +0800 Subject: [PATCH 1/2] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E7=B2=97=E8=AF=BB?= =?UTF-8?q?=E6=8A=A5=E5=91=8A=20LLM=20=E8=BF=94=E5=9B=9E=E5=8D=A0=E4=BD=8D?= =?UTF-8?q?=E7=AC=A6=E5=81=87=E5=86=85=E5=AE=B9=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - prompts.py: 去掉 prompt 示例中的字面占位符值 - pipelines.py: 增加占位符检测,过滤 "创新点1" 等假内容 - fallback 到 one_liner 或 llm_text 原文 --- packages/ai/pipelines.py | 31 +++++++++++++++++++++++++++++++ packages/ai/prompts.py | 8 ++++---- 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/packages/ai/pipelines.py b/packages/ai/pipelines.py index b783395..153da83 100644 --- a/packages/ai/pipelines.py +++ b/packages/ai/pipelines.py @@ -420,8 +420,39 @@ def _build_skim_structured( score = 0.5 score = min(max(score, 0.0), 1.0) one_liner = str(parsed_json.get("one_liner", "")).strip() or llm_text[:140] + + # 过滤 LLM 返回的字面占位符 + PLACEHOLDER_KEYWORDS = { + "创新点", + "创新点1", + "创新点2", + "创新点3", + "keyword", + "keyword1", + } + FALLBACK_KEYWORDS = { + "中文标题", + "中文标题翻译", + "中文摘要", + "中文摘要翻译", + "一句话", + "一句话总结", + "一句话中文总结", + } + innovations = [ + x + for x in innovations + if x.strip() and not any(pk in x for pk in PLACEHOLDER_KEYWORDS) + ] if not innovations: innovations = [one_liner[:80]] + if not title_zh or any(fk in title_zh for fk in FALLBACK_KEYWORDS): + title_zh = "" + if not abstract_zh or any(fk in abstract_zh for fk in FALLBACK_KEYWORDS): + abstract_zh = "" + if not one_liner or any(fk in one_liner for fk in FALLBACK_KEYWORDS): + one_liner = llm_text[:140] + return SkimReport( one_liner=one_liner[:280], innovations=[str(x)[:180] for x in innovations[:5]], diff --git a/packages/ai/prompts.py b/packages/ai/prompts.py index 44c7e73..f2ef9c8 100644 --- a/packages/ai/prompts.py +++ b/packages/ai/prompts.py @@ -7,11 +7,11 @@ def build_skim_prompt(title: str, abstract: str) -> str: return ( "你是科研助手。请根据标题和摘要输出严格 JSON:\n" - '{"one_liner":"一句话中文总结", ' - '"innovations":["创新点1","创新点2","创新点3"], ' + '{"one_liner":"用一句话概括论文核心贡献", ' + '"innovations":["从摘要中提取的创新点1","从摘要中提取的创新点2","从摘要中提取的创新点3"], ' '"keywords":["keyword1","keyword2","keyword3","keyword4","keyword5"], ' - '"title_zh":"中文标题翻译", ' - '"abstract_zh":"中文摘要翻译(完整翻译,不要缩写)", ' + '"title_zh":"中文标题", ' + '"abstract_zh":"中文摘要", ' '"relevance_score":0.0}\n' "要求:\n" "- one_liner、innovations、title_zh、abstract_zh 必须使用中文\n" From 93e1b57e3e2f46f98d9b07ce94f538ac2422ae1b Mon Sep 17 00:00:00 2001 From: Color2333 <1552429809@qq.com> Date: Fri, 20 Mar 2026 11:37:24 +0800 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20=E6=97=A5=E6=8A=A5=E7=B2=BE=E8=AF=BB?= =?UTF-8?q?=E6=A8=A1=E6=9D=BF=E7=9B=B4=E6=8E=A5=E6=B8=B2=E6=9F=93Markdown?= =?UTF-8?q?=20HTML=20+=20=E8=AE=BA=E6=96=87=E9=A1=B5=E7=B2=BE=E8=AF=BB?= =?UTF-8?q?=E7=8A=B6=E6=80=81=E5=88=B7=E6=96=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - brief_service.py: 修复_md_to_html()语法错误,精读部分改为 直接渲染deep_dive_md_html替代拆分的method/experiments字段 - PaperDetail.tsx: handleDeep()删除多余的状态操作,直接setSavedDeep --- frontend/src/pages/PaperDetail.tsx | 8 +-- packages/ai/brief_service.py | 80 ++++++++++++++++++++++++++---- 2 files changed, 72 insertions(+), 16 deletions(-) diff --git a/frontend/src/pages/PaperDetail.tsx b/frontend/src/pages/PaperDetail.tsx index 0590818..ad564c0 100644 --- a/frontend/src/pages/PaperDetail.tsx +++ b/frontend/src/pages/PaperDetail.tsx @@ -277,13 +277,7 @@ export default function PaperDetail() { setReportTab("deep"); try { const report = await pipelineApi.deep(id); - setDeepReport(report); - // 刷新论文信息,更新精读报告并清除旧缓存 - const updated = await paperApi.detail(id); - setPaper(updated); - if (updated.deep_report) setSavedDeep(updated.deep_report); - // 清除新生成的报告,优先显示 savedDeep(从后端加载的最新数据) - setDeepReport(null); + setSavedDeep(report); toast("success", "精读完成"); } catch { toast("error", "精读失败"); diff --git a/packages/ai/brief_service.py b/packages/ai/brief_service.py index 81eddec..4dff869 100644 --- a/packages/ai/brief_service.py +++ b/packages/ai/brief_service.py @@ -50,6 +50,65 @@ def _parse_deep_dive(md: str) -> dict: return sections +def _md_to_html(text: str) -> str: + """轻量 Markdown → HTML 转换(用于邮件模板)""" + if not text: + return "" + import re + + lines = text.split("\n") + html_lines: list[str] = [] + in_ul = False + for line in lines: + stripped = line.strip() + if not stripped: + if in_ul: + html_lines.append("") + in_ul = False + html_lines.append("") + continue + # 标题 + m = re.match(r"^#{1,3}\s+(.+)$", stripped) + if m: + if in_ul: + html_lines.append("") + in_ul = False + level = m.group(0).count("#") + tag = f"h{level + 2}" # h3/h4/h5 + inner = re.sub(r"\*\*(.+?)\*\*", r"\1", m.group(1)) + inner = re.sub(r"\*(.+?)\*", r"\1", inner) + html_lines.append(f"<{tag}>{inner}") + # 无序列表 + elif stripped.startswith("-"): + if not in_ul: + html_lines.append("") + in_ul = False + item = re.sub(r"^\d+\.\s+", "", stripped) + item = re.sub(r"\*\*(.+?)\*\*", r"\1", item) + item = re.sub(r"\*(.+?)\*", r"\1", item) + html_lines.append(f"
  • {item}
  • ") + # 段落 + else: + if in_ul: + html_lines.append("") + in_ul = False + para = re.sub(r"\*\*(.+?)\*\*", r"\1", stripped) + para = re.sub(r"\*(.+?)\*", r"\1", para) + html_lines.append(f"

    {para}

    ") + if in_ul: + html_lines.append("") + return "\n".join(html_lines) + + DAILY_TEMPLATE = Template("""\ @@ -89,6 +148,13 @@ def _parse_deep_dive(md: str) -> dict: .deep-section { margin-top: 12px; } .deep-section-label { font-size: 12px; font-weight: 700; color: #7c3aed; margin-bottom: 6px; display: flex; align-items: center; gap: 4px; text-transform: uppercase; letter-spacing: 0.05em; } .deep-text { font-size: 13px; color: #4b5563; line-height: 1.7; margin: 0; } + .deep-html { font-size: 13px; color: #374151; line-height: 1.8; } + .deep-html h3, .deep-html h4, .deep-html h5 { color: #7c3aed; font-weight: 700; margin: 12px 0 6px; } + .deep-html h3 { font-size: 15px; } .deep-html h4 { font-size: 14px; } .deep-html h5 { font-size: 13px; } + .deep-html p { margin: 0 0 8px; } + .deep-html ul, .deep-html ol { margin: 6px 0; padding-left: 20px; } + .deep-html li { margin-bottom: 4px; } + .deep-html strong { color: #1a1a2e; } .deep-html em { color: #4b5563; } .risk-list { margin: 6px 0 0 18px; padding: 0; font-size: 12px; color: #b45309; } .risk-list li { margin-bottom: 4px; line-height: 1.5; } @@ -200,16 +266,10 @@ def _parse_deep_dive(md: str) -> dict: {% endif %}
    arXiv: {{ d.arxiv_id }}
    - {% if d.method %} -
    -
    📐 方法
    -

    {{ d.method[:280] }}{% if d.method|length > 280 %}...{% endif %}

    -
    - {% endif %} - {% if d.experiments %} + {% if d.deep_dive_md_html %}
    -
    🧪 实验
    -

    {{ d.experiments[:280] }}{% if d.experiments|length > 280 %}...{% endif %}

    +
    📄 精读内容
    +
    {{ d.deep_dive_md_html|safe }}
    {% endif %} {% if d.risks %} @@ -400,6 +460,7 @@ def build_html(self, limit: int = 30) -> str: deep_read_highlights = [] for p, report in deep_read_papers[:5]: # 取前 5 篇 sections = _parse_deep_dive(report.deep_dive_md) + md_html = _md_to_html(report.deep_dive_md or "") deep_read_highlights.append( { "id": str(p.id), @@ -409,6 +470,7 @@ def build_html(self, limit: int = 30) -> str: "method": sections.get("method", ""), "experiments": sections.get("experiments", ""), "risks": (report.key_insights or {}).get("reviewer_risks", []), + "deep_dive_md_html": md_html, } )