Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 1 addition & 7 deletions frontend/src/pages/PaperDetail.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -277,13 +277,7 @@ export default function PaperDetail() {
setReportTab("deep");
try {
const report = await pipelineApi.deep(id);
setDeepReport(report);
// 刷新论文信息,更新精读报告并清除旧缓存
const updated = await paperApi.detail(id);
setPaper(updated);
if (updated.deep_report) setSavedDeep(updated.deep_report);
// 清除新生成的报告,优先显示 savedDeep(从后端加载的最新数据)
setDeepReport(null);
setSavedDeep(report);
toast("success", "精读完成");
} catch {
toast("error", "精读失败");
Expand Down
80 changes: 71 additions & 9 deletions packages/ai/brief_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,65 @@ def _parse_deep_dive(md: str) -> dict:
return sections


def _md_to_html(text: str) -> str:
"""轻量 Markdown → HTML 转换(用于邮件模板)"""
if not text:
return ""
import re

lines = text.split("\n")
html_lines: list[str] = []
in_ul = False
for line in lines:
stripped = line.strip()
if not stripped:
if in_ul:
html_lines.append("</ul>")
in_ul = False
html_lines.append("")
continue
# 标题
m = re.match(r"^#{1,3}\s+(.+)$", stripped)
if m:
if in_ul:
html_lines.append("</ul>")
in_ul = False
level = m.group(0).count("#")
tag = f"h{level + 2}" # h3/h4/h5
inner = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", m.group(1))
inner = re.sub(r"\*(.+?)\*", r"<em>\1</em>", inner)
html_lines.append(f"<{tag}>{inner}</{tag}>")
# 无序列表
elif stripped.startswith("-"):
if not in_ul:
html_lines.append("<ul>")
in_ul = True
item = re.sub(r"^-\s+", "", stripped)
item = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", item)
item = re.sub(r"\*(.+?)\*", r"<em>\1</em>", item)
html_lines.append(f"<li>{item}</li>")
# 有序列表
elif re.match(r"^\d+\.\s+", stripped):
if in_ul:
html_lines.append("</ul>")
in_ul = False
item = re.sub(r"^\d+\.\s+", "", stripped)
item = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", item)
item = re.sub(r"\*(.+?)\*", r"<em>\1</em>", item)
html_lines.append(f"<li>{item}</li>")
# 段落
else:
if in_ul:
html_lines.append("</ul>")
in_ul = False
para = re.sub(r"\*\*(.+?)\*\*", r"<strong>\1</strong>", stripped)
para = re.sub(r"\*(.+?)\*", r"<em>\1</em>", para)
html_lines.append(f"<p>{para}</p>")
if in_ul:
html_lines.append("</ul>")
return "\n".join(html_lines)


DAILY_TEMPLATE = Template("""\
<!DOCTYPE html>
<html lang="zh">
Expand Down Expand Up @@ -89,6 +148,13 @@ def _parse_deep_dive(md: str) -> dict:
.deep-section { margin-top: 12px; }
.deep-section-label { font-size: 12px; font-weight: 700; color: #7c3aed; margin-bottom: 6px; display: flex; align-items: center; gap: 4px; text-transform: uppercase; letter-spacing: 0.05em; }
.deep-text { font-size: 13px; color: #4b5563; line-height: 1.7; margin: 0; }
.deep-html { font-size: 13px; color: #374151; line-height: 1.8; }
.deep-html h3, .deep-html h4, .deep-html h5 { color: #7c3aed; font-weight: 700; margin: 12px 0 6px; }
.deep-html h3 { font-size: 15px; } .deep-html h4 { font-size: 14px; } .deep-html h5 { font-size: 13px; }
.deep-html p { margin: 0 0 8px; }
.deep-html ul, .deep-html ol { margin: 6px 0; padding-left: 20px; }
.deep-html li { margin-bottom: 4px; }
.deep-html strong { color: #1a1a2e; } .deep-html em { color: #4b5563; }
.risk-list { margin: 6px 0 0 18px; padding: 0; font-size: 12px; color: #b45309; }
.risk-list li { margin-bottom: 4px; line-height: 1.5; }

Expand Down Expand Up @@ -200,16 +266,10 @@ def _parse_deep_dive(md: str) -> dict:
{% endif %}
</div>
<div class="paper-id">arXiv: <a href="https://arxiv.org/abs/{{ d.arxiv_id }}" target="_blank">{{ d.arxiv_id }}</a></div>
{% if d.method %}
<div class="deep-section">
<div class="deep-section-label">📐 方法</div>
<p class="deep-text">{{ d.method[:280] }}{% if d.method|length > 280 %}...{% endif %}</p>
</div>
{% endif %}
{% if d.experiments %}
{% if d.deep_dive_md_html %}
<div class="deep-section">
<div class="deep-section-label">🧪 实验</div>
<p class="deep-text">{{ d.experiments[:280] }}{% if d.experiments|length > 280 %}...{% endif %}</p>
<div class="deep-section-label">📄 精读内容</div>
<div class="deep-html">{{ d.deep_dive_md_html|safe }}</div>
</div>
{% endif %}
{% if d.risks %}
Expand Down Expand Up @@ -400,6 +460,7 @@ def build_html(self, limit: int = 30) -> str:
deep_read_highlights = []
for p, report in deep_read_papers[:5]: # 取前 5 篇
sections = _parse_deep_dive(report.deep_dive_md)
md_html = _md_to_html(report.deep_dive_md or "")
deep_read_highlights.append(
{
"id": str(p.id),
Expand All @@ -409,6 +470,7 @@ def build_html(self, limit: int = 30) -> str:
"method": sections.get("method", ""),
"experiments": sections.get("experiments", ""),
"risks": (report.key_insights or {}).get("reviewer_risks", []),
"deep_dive_md_html": md_html,
}
)

Expand Down
31 changes: 31 additions & 0 deletions packages/ai/pipelines.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,8 +420,39 @@ def _build_skim_structured(
score = 0.5
score = min(max(score, 0.0), 1.0)
one_liner = str(parsed_json.get("one_liner", "")).strip() or llm_text[:140]

# 过滤 LLM 返回的字面占位符
PLACEHOLDER_KEYWORDS = {
"创新点",
"创新点1",
"创新点2",
"创新点3",
"keyword",
"keyword1",
}
FALLBACK_KEYWORDS = {
"中文标题",
"中文标题翻译",
"中文摘要",
"中文摘要翻译",
"一句话",
"一句话总结",
"一句话中文总结",
}
innovations = [
x
for x in innovations
if x.strip() and not any(pk in x for pk in PLACEHOLDER_KEYWORDS)
]
if not innovations:
innovations = [one_liner[:80]]
if not title_zh or any(fk in title_zh for fk in FALLBACK_KEYWORDS):
title_zh = ""
if not abstract_zh or any(fk in abstract_zh for fk in FALLBACK_KEYWORDS):
abstract_zh = ""
if not one_liner or any(fk in one_liner for fk in FALLBACK_KEYWORDS):
one_liner = llm_text[:140]

return SkimReport(
one_liner=one_liner[:280],
innovations=[str(x)[:180] for x in innovations[:5]],
Expand Down
8 changes: 4 additions & 4 deletions packages/ai/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
def build_skim_prompt(title: str, abstract: str) -> str:
return (
"你是科研助手。请根据标题和摘要输出严格 JSON:\n"
'{"one_liner":"一句话中文总结", '
'"innovations":["创新点1","创新点2","创新点3"], '
'{"one_liner":"用一句话概括论文核心贡献", '
'"innovations":["从摘要中提取的创新点1","从摘要中提取的创新点2","从摘要中提取的创新点3"], '
'"keywords":["keyword1","keyword2","keyword3","keyword4","keyword5"], '
'"title_zh":"中文标题翻译", '
'"abstract_zh":"中文摘要翻译(完整翻译,不要缩写)", '
'"title_zh":"中文标题", '
'"abstract_zh":"中文摘要", '
'"relevance_score":0.0}\n'
"要求:\n"
"- one_liner、innovations、title_zh、abstract_zh 必须使用中文\n"
Expand Down