From 9679f6dd2abb121e9afe9c48b951407b8db05ef7 Mon Sep 17 00:00:00 2001
From: Color2333 <1552429809@qq.com>
Date: Fri, 20 Mar 2026 11:23:30 +0800
Subject: [PATCH 1/2] =?UTF-8?q?fix:=20=E4=BF=AE=E5=A4=8D=E7=B2=97=E8=AF=BB?=
 =?UTF-8?q?=E6=8A=A5=E5=91=8A=20LLM=20=E8=BF=94=E5=9B=9E=E5=8D=A0=E4=BD=8D?=
 =?UTF-8?q?=E7=AC=A6=E5=81=87=E5=86=85=E5=AE=B9=E9=97=AE=E9=A2=98?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- prompts.py: 去掉 prompt 示例中的字面占位符值
- pipelines.py: 增加占位符检测，过滤 "创新点1" 等假内容
- fallback 到 one_liner 或 llm_text 原文
---
 packages/ai/pipelines.py | 31 +++++++++++++++++++++++++++++++
 packages/ai/prompts.py   |  8 ++++----
 2 files changed, 35 insertions(+), 4 deletions(-)

diff --git a/packages/ai/pipelines.py b/packages/ai/pipelines.py
index b783395..153da83 100644
--- a/packages/ai/pipelines.py
+++ b/packages/ai/pipelines.py
@@ -420,8 +420,39 @@ def _build_skim_structured(
                 score = 0.5
             score = min(max(score, 0.0), 1.0)
             one_liner = str(parsed_json.get("one_liner", "")).strip() or llm_text[:140]
+
+            # 过滤 LLM 返回的字面占位符
+            PLACEHOLDER_KEYWORDS = {
+                "创新点",
+                "创新点1",
+                "创新点2",
+                "创新点3",
+                "keyword",
+                "keyword1",
+            }
+            FALLBACK_KEYWORDS = {
+                "中文标题",
+                "中文标题翻译",
+                "中文摘要",
+                "中文摘要翻译",
+                "一句话",
+                "一句话总结",
+                "一句话中文总结",
+            }
+            innovations = [
+                x
+                for x in innovations
+                if x.strip() and not any(pk in x for pk in PLACEHOLDER_KEYWORDS)
+            ]
             if not innovations:
                 innovations = [one_liner[:80]]
+            if not title_zh or any(fk in title_zh for fk in FALLBACK_KEYWORDS):
+                title_zh = ""
+            if not abstract_zh or any(fk in abstract_zh for fk in FALLBACK_KEYWORDS):
+                abstract_zh = ""
+            if not one_liner or any(fk in one_liner for fk in FALLBACK_KEYWORDS):
+                one_liner = llm_text[:140]
+
             return SkimReport(
                 one_liner=one_liner[:280],
                 innovations=[str(x)[:180] for x in innovations[:5]],
diff --git a/packages/ai/prompts.py b/packages/ai/prompts.py
index 44c7e73..f2ef9c8 100644
--- a/packages/ai/prompts.py
+++ b/packages/ai/prompts.py
@@ -7,11 +7,11 @@
 def build_skim_prompt(title: str, abstract: str) -> str:
     return (
         "你是科研助手。请根据标题和摘要输出严格 JSON：\n"
-        '{"one_liner":"一句话中文总结", '
-        '"innovations":["创新点1","创新点2","创新点3"], '
+        '{"one_liner":"用一句话概括论文核心贡献", '
+        '"innovations":["从摘要中提取的创新点1","从摘要中提取的创新点2","从摘要中提取的创新点3"], '
         '"keywords":["keyword1","keyword2","keyword3","keyword4","keyword5"], '
-        '"title_zh":"中文标题翻译", '
-        '"abstract_zh":"中文摘要翻译（完整翻译，不要缩写）", '
+        '"title_zh":"中文标题", '
+        '"abstract_zh":"中文摘要", '
         '"relevance_score":0.0}\n'
         "要求：\n"
         "- one_liner、innovations、title_zh、abstract_zh 必须使用中文\n"

From 93e1b57e3e2f46f98d9b07ce94f538ac2422ae1b Mon Sep 17 00:00:00 2001
From: Color2333 <1552429809@qq.com>
Date: Fri, 20 Mar 2026 11:37:24 +0800
Subject: [PATCH 2/2] =?UTF-8?q?fix:=20=E6=97=A5=E6=8A=A5=E7=B2=BE=E8=AF=BB?=
 =?UTF-8?q?=E6=A8=A1=E6=9D=BF=E7=9B=B4=E6=8E=A5=E6=B8=B2=E6=9F=93Markdown?=
 =?UTF-8?q?=20HTML=20+=20=E8=AE=BA=E6=96=87=E9=A1=B5=E7=B2=BE=E8=AF=BB?=
 =?UTF-8?q?=E7=8A=B6=E6=80=81=E5=88=B7=E6=96=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- brief_service.py: 修复_md_to_html()语法错误，精读部分改为
  直接渲染deep_dive_md_html替代拆分的method/experiments字段
- PaperDetail.tsx: handleDeep()删除多余的状态操作，直接setSavedDeep
---
 frontend/src/pages/PaperDetail.tsx |  8 +--
 packages/ai/brief_service.py       | 80 ++++++++++++++++++++++++++----
 2 files changed, 72 insertions(+), 16 deletions(-)

diff --git a/frontend/src/pages/PaperDetail.tsx b/frontend/src/pages/PaperDetail.tsx
index 0590818..ad564c0 100644
--- a/frontend/src/pages/PaperDetail.tsx
+++ b/frontend/src/pages/PaperDetail.tsx
@@ -277,13 +277,7 @@ export default function PaperDetail() {
     setReportTab("deep");
     try {
       const report = await pipelineApi.deep(id);
-      setDeepReport(report);
-      // 刷新论文信息，更新精读报告并清除旧缓存
-      const updated = await paperApi.detail(id);
-      setPaper(updated);
-      if (updated.deep_report) setSavedDeep(updated.deep_report);
-      // 清除新生成的报告，优先显示 savedDeep（从后端加载的最新数据）
-      setDeepReport(null);
+      setSavedDeep(report);
       toast("success", "精读完成");
     } catch {
       toast("error", "精读失败");
diff --git a/packages/ai/brief_service.py b/packages/ai/brief_service.py
index 81eddec..4dff869 100644
--- a/packages/ai/brief_service.py
+++ b/packages/ai/brief_service.py
@@ -50,6 +50,65 @@ def _parse_deep_dive(md: str) -> dict:
     return sections
 
 
+def _md_to_html(text: str) -> str:
+    """轻量 Markdown → HTML 转换（用于邮件模板）"""
+    if not text:
+        return ""
+    import re
+
+    lines = text.split("\n")
+    html_lines: list[str] = []
+    in_ul = False
+    for line in lines:
+        stripped = line.strip()
+        if not stripped:
+            if in_ul:
+                html_lines.append("</ul>")
+                in_ul = False
+            html_lines.append("")
+            continue
+        # 标题
+        m = re.match(r"^#{1,3}\s+(.+)$", stripped)
+        if m:
+            if in_ul:
+                html_lines.append("</ul>")
+                in_ul = False
+            level = m.group(0).count("#")
+            tag = f"h{level + 2}"  # h3/h4/h5
+            inner = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", m.group(1))
+            inner = re.sub(r"\*(.+?)\*", r"<em>\1</em>", inner)
+            html_lines.append(f"<{tag}>{inner}</{tag}>")
+        # 无序列表
+        elif stripped.startswith("-"):
+            if not in_ul:
+                html_lines.append("<ul>")
+                in_ul = True
+            item = re.sub(r"^-\s+", "", stripped)
+            item = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", item)
+            item = re.sub(r"\*(.+?)\*", r"<em>\1</em>", item)
+            html_lines.append(f"<li>{item}</li>")
+        # 有序列表
+        elif re.match(r"^\d+\.\s+", stripped):
+            if in_ul:
+                html_lines.append("</ul>")
+                in_ul = False
+            item = re.sub(r"^\d+\.\s+", "", stripped)
+            item = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", item)
+            item = re.sub(r"\*(.+?)\*", r"<em>\1</em>", item)
+            html_lines.append(f"<li>{item}</li>")
+        # 段落
+        else:
+            if in_ul:
+                html_lines.append("</ul>")
+                in_ul = False
+            para = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", stripped)
+            para = re.sub(r"\*(.+?)\*", r"<em>\1</em>", para)
+            html_lines.append(f"<p>{para}</p>")
+    if in_ul:
+        html_lines.append("</ul>")
+    return "\n".join(html_lines)
+
+
 DAILY_TEMPLATE = Template("""\
 <!DOCTYPE html>
 <html lang="zh">
@@ -89,6 +148,13 @@ def _parse_deep_dive(md: str) -> dict:
   .deep-section { margin-top: 12px; }
   .deep-section-label { font-size: 12px; font-weight: 700; color: #7c3aed; margin-bottom: 6px; display: flex; align-items: center; gap: 4px; text-transform: uppercase; letter-spacing: 0.05em; }
   .deep-text { font-size: 13px; color: #4b5563; line-height: 1.7; margin: 0; }
+  .deep-html { font-size: 13px; color: #374151; line-height: 1.8; }
+  .deep-html h3, .deep-html h4, .deep-html h5 { color: #7c3aed; font-weight: 700; margin: 12px 0 6px; }
+  .deep-html h3 { font-size: 15px; } .deep-html h4 { font-size: 14px; } .deep-html h5 { font-size: 13px; }
+  .deep-html p { margin: 0 0 8px; }
+  .deep-html ul, .deep-html ol { margin: 6px 0; padding-left: 20px; }
+  .deep-html li { margin-bottom: 4px; }
+  .deep-html strong { color: #1a1a2e; } .deep-html em { color: #4b5563; }
   .risk-list { margin: 6px 0 0 18px; padding: 0; font-size: 12px; color: #b45309; }
   .risk-list li { margin-bottom: 4px; line-height: 1.5; }
 
@@ -200,16 +266,10 @@ def _parse_deep_dive(md: str) -> dict:
         {% endif %}
       </div>
       <div class="paper-id">arXiv: <a href="https://arxiv.org/abs/{{ d.arxiv_id }}" target="_blank">{{ d.arxiv_id }}</a></div>
-      {% if d.method %}
-      <div class="deep-section">
-        <div class="deep-section-label">📐 方法</div>
-        <p class="deep-text">{{ d.method[:280] }}{% if d.method|length > 280 %}...{% endif %}</p>
-      </div>
-      {% endif %}
-      {% if d.experiments %}
+      {% if d.deep_dive_md_html %}
       <div class="deep-section">
-        <div class="deep-section-label">🧪 实验</div>
-        <p class="deep-text">{{ d.experiments[:280] }}{% if d.experiments|length > 280 %}...{% endif %}</p>
+        <div class="deep-section-label">📄 精读内容</div>
+        <div class="deep-html">{{ d.deep_dive_md_html|safe }}</div>
       </div>
       {% endif %}
       {% if d.risks %}
@@ -400,6 +460,7 @@ def build_html(self, limit: int = 30) -> str:
             deep_read_highlights = []
             for p, report in deep_read_papers[:5]:  # 取前 5 篇
                 sections = _parse_deep_dive(report.deep_dive_md)
+                md_html = _md_to_html(report.deep_dive_md or "")
                 deep_read_highlights.append(
                     {
                         "id": str(p.id),
@@ -409,6 +470,7 @@ def build_html(self, limit: int = 30) -> str:
                         "method": sections.get("method", ""),
                         "experiments": sections.get("experiments", ""),
                         "risks": (report.key_insights or {}).get("reviewer_risks", []),
+                        "deep_dive_md_html": md_html,
                     }
                 )