-
-
-
- IEEE 配置提示
-
-
-
- - 需要在 .env 中设置 IEEE_API_KEY
- - 免费版限制:50 次 API 调用/天
- - IEEE PDF 暂不支持在线阅读
- - 建议在主题设置中配置独立配额
-
+
+ {isSelected && }
+
+
+ );
+ })}
+ )}
+
+ );
+ })}
+
+ {selectedChannels.includes('ieee') && (
+
+
+
+
+
+ IEEE 配置提示
+
+
+ - • 需在 .env 中设置 IEEE_API_KEY
+ - • 免费版限制:50 次/天
+ - • 建议在主题设置中配置独立配额
+
From 5b997747a1c94b3c066d98677fc55a8603843302 Mon Sep 17 00:00:00 2001
From: Color2333 <1552429809@qq.com>
Date: Mon, 23 Mar 2026 14:26:09 +0800
Subject: [PATCH 07/14] feat(search): add MultiSourceSearchBar with channel
suggestions
---
.../search/MultiSourceSearchBar.tsx | 166 ++++++++++++++++++
1 file changed, 166 insertions(+)
create mode 100644 frontend/src/components/search/MultiSourceSearchBar.tsx
diff --git a/frontend/src/components/search/MultiSourceSearchBar.tsx b/frontend/src/components/search/MultiSourceSearchBar.tsx
new file mode 100644
index 0000000..f6a9218
--- /dev/null
+++ b/frontend/src/components/search/MultiSourceSearchBar.tsx
@@ -0,0 +1,166 @@
+import React, { useState, useCallback } from 'react';
+import { Search, Loader2, Sparkles } from 'lucide-react';
+import { useChannels } from '@/contexts/ChannelContext';
+
+interface MultiSourceSearchBarProps {
+ onSearch: (query: string, channels: string[]) => void;
+ loading?: boolean;
+}
+
+interface ChannelSuggestion {
+ recommended: string[];
+ alternatives: string[];
+ reasoning: string;
+}
+
+export const MultiSourceSearchBar: React.FC
= ({
+ onSearch,
+ loading = false,
+}) => {
+ const [query, setQuery] = useState('');
+ const [selectedChannels, setSelectedChannels] = useState(['arxiv']);
+ const [suggestions, setSuggestions] = useState(null);
+ const { channels } = useChannels();
+
+ const fetchSuggestions = useCallback(async (q: string) => {
+ if (!q.trim()) {
+ setSuggestions(null);
+ return;
+ }
+ fetch(`/papers/suggest-channels?query=${encodeURIComponent(q)}`, {
+ headers: {
+ Authorization: `Bearer ${localStorage.getItem('auth_token') || ''}`,
+ },
+ })
+ .then((res) => res.ok && res.json())
+ .then((data) => data && setSuggestions(data))
+ .catch(() => {});
+ }, []);
+
+ const handleQueryChange = (e: React.ChangeEvent) => {
+ const val = e.target.value;
+ setQuery(val);
+ fetchSuggestions(val);
+ };
+
+ const handleChannelToggle = (channelId: string) => {
+ setSelectedChannels((prev) =>
+ prev.includes(channelId)
+ ? prev.filter((id) => id !== channelId)
+ : [...prev, channelId],
+ );
+ };
+
+ const handleSearch = () => {
+ if (!query.trim() || selectedChannels.length === 0) return;
+ onSearch(query, selectedChannels);
+ };
+
+ const handleKeyDown = (e: React.KeyboardEvent) => {
+ if (e.key === 'Enter') {
+ handleSearch();
+ }
+ };
+
+ const applyRecommendation = () => {
+ if (suggestions?.recommended) {
+ setSelectedChannels(suggestions.recommended);
+ }
+ };
+
+ return (
+
+
+
+
+
+
+
+
+
+ {suggestions && suggestions.recommended.length > 0 && (
+
+
+
+ 推荐渠道:
+
+
+ {suggestions.recommended.map((id) => {
+ const ch = channels.find((c) => c.id === id);
+ return ch ? (
+
+ {ch.name}
+
+ ) : null;
+ })}
+
+ {JSON.stringify(suggestions.recommended.sort()) !== JSON.stringify(selectedChannels.sort()) && (
+
+ )}
+
+ )}
+
+
+ 渠道:
+ {channels.map((channel) => {
+ const isSelected = selectedChannels.includes(channel.id);
+ return (
+
+ );
+ })}
+
+
+ );
+};
+
+export default MultiSourceSearchBar;
From c606cd73ebe7005910a12965329902b95cc12906 Mon Sep 17 00:00:00 2001
From: Color2333 <1552429809@qq.com>
Date: Mon, 23 Mar 2026 14:28:21 +0800
Subject: [PATCH 08/14] feat(search): add SearchResultsList with channel
filtering
---
.../components/search/SearchResultsList.tsx | 227 ++++++++++++++++++
1 file changed, 227 insertions(+)
create mode 100644 frontend/src/components/search/SearchResultsList.tsx
diff --git a/frontend/src/components/search/SearchResultsList.tsx b/frontend/src/components/search/SearchResultsList.tsx
new file mode 100644
index 0000000..14d388d
--- /dev/null
+++ b/frontend/src/components/search/SearchResultsList.tsx
@@ -0,0 +1,227 @@
+import { useState } from 'react';
+import { ChevronDown, ExternalLink, Star, AlertCircle } from 'lucide-react';
+
+export interface SearchPaperSource {
+ channel: string;
+ externalId: string;
+ citations?: number;
+ impactFactor?: number;
+ tldr?: string;
+ url?: string;
+}
+
+export interface SearchPaper {
+ id: string;
+ title: string;
+ authors: string[];
+ year?: number;
+ venue?: string;
+ abstract?: string;
+ citations?: number;
+ sources: SearchPaperSource[];
+}
+
+export interface ChannelStat {
+ total: number;
+ new: number;
+ duplicates: number;
+ error?: string;
+}
+
+interface SearchResultsListProps {
+ results: SearchPaper[];
+ channelStats: Record;
+ loading?: boolean;
+ onPaperClick?: (paper: SearchPaper) => void;
+ filterChannel: string | null;
+ onFilterChange: (channel: string | null) => void;
+}
+
+export function SearchResultsList({
+ results,
+ channelStats,
+ loading,
+ onPaperClick,
+ filterChannel,
+ onFilterChange,
+}: SearchResultsListProps) {
+ const [expandedPaper, setExpandedPaper] = useState(null);
+
+ const filtered = filterChannel
+ ? results.filter((p) => p.sources.some((s) => s.channel === filterChannel))
+ : results;
+
+ const totalResults = Object.values(channelStats).reduce(
+ (sum, s) => sum + s.total,
+ 0,
+ );
+
+ if (loading) {
+ return (
+
+ );
+ }
+
+ if (results.length === 0) {
+ return (
+
+ );
+ }
+
+ return (
+
+
+
+
+ 共 {totalResults} 篇,来自
+
+ {Object.entries(channelStats).map(([ch, stat]) => (
+
+ {ch}: {stat.total}
+ {stat.error && ` (${stat.error})`}
+
+ ))}
+
+
+
+
+
+
+ {filtered.map((paper) => {
+ const isExpanded = expandedPaper === paper.id;
+ const primarySource = paper.sources[0];
+
+ return (
+
+
+
+ {isExpanded && paper.sources.length > 1 && (
+
+
+
+
+ | 渠道 |
+ 外部ID |
+ 引用 |
+ 影响因子 |
+ 特殊 |
+
+
+
+ {paper.sources.map((source) => (
+
+ | {source.channel} |
+
+ {source.externalId.slice(0, 20)}...
+ |
+ {source.citations ?? '-'} |
+
+ {source.impactFactor ?? '-'}
+ |
+
+ {source.tldr && (
+ TL;DR
+ )}
+ |
+
+ ))}
+
+
+
+ )}
+
+ {isExpanded && primarySource?.url && (
+
+ )}
+
+ );
+ })}
+
+
+ );
+}
+
+export default SearchResultsList;
From 8307e9c3a9d41058c0588779cac42f045fcc3625 Mon Sep 17 00:00:00 2001
From: Color2333 <1552429809@qq.com>
Date: Mon, 23 Mar 2026 14:31:13 +0800
Subject: [PATCH 09/14] feat(api): add search-multi endpoint with
ResultAggregator
---
apps/api/routers/papers.py | 66 +++++++++++++++++++++++++++++
packages/integrations/__init__.py | 4 ++
packages/integrations/aggregator.py | 49 +++++++++++++++++++++
3 files changed, 119 insertions(+)
create mode 100644 packages/integrations/aggregator.py
diff --git a/apps/api/routers/papers.py b/apps/api/routers/papers.py
index 1feeeef..3f5300c 100644
--- a/apps/api/routers/papers.py
+++ b/apps/api/routers/papers.py
@@ -88,6 +88,72 @@ def recommended_papers(top_k: int = Query(default=10, ge=1, le=50)) -> dict:
return {"items": RecommendationService().recommend(top_k=top_k)}
+@router.post("/papers/search-multi")
+async def search_multi(
+ query: str,
+ channels: list[str] = Query(default=["arxiv"]),
+ max_results_per_channel: int = Query(default=50, ge=1, le=100),
+ topic_id: str | None = Query(default=None),
+) -> dict:
+ """多渠道并行搜索论文"""
+ import asyncio
+ import logging
+
+ from packages.integrations.aggregator import ResultAggregator
+ from packages.integrations.registry import ChannelRegistry
+
+ logger = logging.getLogger(__name__)
+
+ ChannelRegistry.register_default_channels()
+
+ async def fetch_channel(ch: str) -> tuple[str, list, dict]:
+ try:
+ channel = ChannelRegistry.get(ch)
+ if not channel:
+ return ch, [], {"error": "channel not found"}
+ papers = await asyncio.to_thread(channel.fetch, query, max_results_per_channel)
+ return ch, papers, {"total": len(papers)}
+ except Exception as exc: # noqa: BLE001
+ logger.warning("Channel %s failed: %s", ch, exc)
+ return ch, [], {"error": str(exc)}
+
+ tasks = [fetch_channel(ch) for ch in channels]
+ results = await asyncio.gather(*tasks, return_exceptions=True)
+
+ aggregator = ResultAggregator()
+ channel_stats: dict[str, dict[str, int | str]] = {}
+
+ for result in results:
+ if isinstance(result, Exception):
+ logger.error("Channel task failed: %s", result)
+ continue
+ ch, papers, meta = result
+ channel_stats[ch] = {"total": 0, "new": 0, "duplicates": 0}
+ if "error" in meta:
+ channel_stats[ch]["error"] = meta["error"]
+ else:
+ channel_stats[ch]["total"] = meta.get("total", 0)
+ aggregator.add_results(ch, papers, meta)
+
+ aggregated = aggregator.get_sorted_results()
+
+ return {
+ "papers": [
+ {
+ "id": f"temp-{i}",
+ "title": r.paper.title,
+ "authors": r.paper.authors or [],
+ "year": r.paper.publication_date.year if r.paper.publication_date else None,
+ "venue": r.paper.venue,
+ "abstract": r.paper.abstract,
+ "sources": r.sources,
+ }
+ for i, r in enumerate(aggregated)
+ ],
+ "channel_stats": channel_stats,
+ }
+
+
@router.get("/papers/proxy-arxiv-pdf/{arxiv_id:path}")
async def proxy_arxiv_pdf(arxiv_id: str):
"""代理访问 arXiv PDF(解决 CORS 问题)"""
diff --git a/packages/integrations/__init__.py b/packages/integrations/__init__.py
index cc09184..db02363 100644
--- a/packages/integrations/__init__.py
+++ b/packages/integrations/__init__.py
@@ -12,6 +12,8 @@
"""
# 渠道适配器(完整版新增)
+# 聚合器
+from packages.integrations.aggregator import ResultAggregator
from packages.integrations.arxiv_channel import ArxivChannel
# 原始客户端
@@ -35,6 +37,8 @@
from packages.integrations.semantic_scholar_search_client import SemanticScholarSearchClient
__all__ = [
+ # 聚合器
+ "ResultAggregator",
# 渠道适配器
"ChannelBase",
"ArxivChannel",
diff --git a/packages/integrations/aggregator.py b/packages/integrations/aggregator.py
new file mode 100644
index 0000000..b983fb4
--- /dev/null
+++ b/packages/integrations/aggregator.py
@@ -0,0 +1,49 @@
+from dataclasses import dataclass, field
+from typing import Any
+
+from packages.domain.schemas import PaperCreate
+
+
+@dataclass
+class AggregatedPaper:
+ paper: PaperCreate
+ sources: list[dict[str, Any]] = field(default_factory=list)
+
+
+class ResultAggregator:
+ def __init__(self):
+ self.results: list[AggregatedPaper] = []
+
+ def add_results(
+ self, channel: str, papers: list[PaperCreate], metadata: dict[str, Any]
+ ) -> None:
+ for paper in papers:
+ existing = self._find_existing(paper)
+ if existing:
+ existing.sources.append({"channel": channel, **metadata})
+ else:
+ self.results.append(
+ AggregatedPaper(
+ paper=paper,
+ sources=[{"channel": channel, **metadata}],
+ )
+ )
+
+ def _find_existing(self, paper: PaperCreate) -> AggregatedPaper | None:
+ for result in self.results:
+ if result.paper.doi and paper.doi and result.paper.doi == paper.doi:
+ return result
+ return None
+
+ def get_sorted_results(self) -> list[AggregatedPaper]:
+ return sorted(self.results, key=lambda r: len(r.sources), reverse=True)
+
+ def get_stats(self) -> dict[str, dict[str, int]]:
+ stats: dict[str, dict[str, int]] = {}
+ for result in self.results:
+ for source in result.sources:
+ ch = source.get("channel", "unknown")
+ if ch not in stats:
+ stats[ch] = {"total": 0, "new": 0, "duplicates": 0}
+ stats[ch]["total"] += 1
+ return stats
From 72f6b2918caed8eeafaf475acbf66d1e00336d86 Mon Sep 17 00:00:00 2001
From: Color2333 <1552429809@qq.com>
Date: Mon, 23 Mar 2026 14:32:59 +0800
Subject: [PATCH 10/14] feat(api): add suggest-channels endpoint with
SmartRouter
---
apps/api/routers/papers.py | 18 ++++++
packages/worker/smart_router.py | 98 +++++++++++++++++++++++++++++++++
2 files changed, 116 insertions(+)
create mode 100644 packages/worker/smart_router.py
diff --git a/apps/api/routers/papers.py b/apps/api/routers/papers.py
index 3f5300c..d1cb89e 100644
--- a/apps/api/routers/papers.py
+++ b/apps/api/routers/papers.py
@@ -154,6 +154,24 @@ async def fetch_channel(ch: str) -> tuple[str, list, dict]:
}
+@router.get("/papers/suggest-channels")
+def suggest_channels(query: str) -> dict:
+ """根据关键词推荐合适的渠道"""
+ from packages.integrations.registry import ChannelRegistry
+ from packages.worker.smart_router import suggest_channels as get_suggestion
+
+ ChannelRegistry.register_default_channels()
+ available = ChannelRegistry.list_channels()
+
+ recommended, alternatives, reasoning = get_suggestion(query, available)
+
+ return {
+ "recommended": recommended,
+ "alternatives": alternatives,
+ "reasoning": reasoning,
+ }
+
+
@router.get("/papers/proxy-arxiv-pdf/{arxiv_id:path}")
async def proxy_arxiv_pdf(arxiv_id: str):
"""代理访问 arXiv PDF(解决 CORS 问题)"""
diff --git a/packages/worker/smart_router.py b/packages/worker/smart_router.py
new file mode 100644
index 0000000..a285ab7
--- /dev/null
+++ b/packages/worker/smart_router.py
@@ -0,0 +1,98 @@
+CHANNEL_KEYWORDS = {
+ "arxiv": [
+ "ml",
+ "machine learning",
+ "deep learning",
+ "neural",
+ "transformer",
+ "nlp",
+ "cv",
+ "computer vision",
+ "artificial intelligence",
+ "reinforcement learning",
+ "supervised",
+ "unsupervised",
+ ],
+ "semantic_scholar": [
+ "ai",
+ "ml",
+ "citation",
+ "tldr",
+ "summary",
+ "impact",
+ "influential",
+ ],
+ "dblp": [
+ "nips",
+ "icml",
+ "cvpr",
+ "iccv",
+ "acl",
+ "emnlp",
+ "neurips",
+ "conference",
+ "paper",
+ ],
+ "ieee": [
+ "ieee",
+ "signal processing",
+ "wireless",
+ "5g",
+ "6g",
+ "iot",
+ "circuit",
+ "power",
+ ],
+ "biorxiv": [
+ "crispr",
+ "gene",
+ "protein",
+ "biology",
+ "bioinformatics",
+ "neuroscience",
+ "genome",
+ "cell",
+ "bio",
+ ],
+ "openalex": ["*"],
+}
+
+DEFAULT_CHANNELS = ["arxiv"]
+
+
+def suggest_channels(query: str, available_channels: list[str]) -> tuple[list[str], list[str], str]:
+ query_lower = query.lower()
+ recommended = []
+ alternatives = []
+ reasoning_parts = []
+
+ for channel, keywords in CHANNEL_KEYWORDS.items():
+ if channel not in available_channels:
+ continue
+
+ score = 0
+ for kw in keywords:
+ if kw == "*":
+ score += 1
+ continue
+ if kw in query_lower:
+ score += 1
+
+ if score > 0:
+ if score >= 2:
+ recommended.append(channel)
+ reasoning_parts.append(f"{channel} 匹配 {score} 个关键词")
+ else:
+ alternatives.append(channel)
+
+ if not recommended and available_channels:
+ recommended = [ch for ch in DEFAULT_CHANNELS if ch in available_channels]
+ if not recommended and available_channels:
+ recommended = ["arxiv"]
+ reasoning_parts.append("使用默认渠道")
+
+ return (
+ recommended,
+ alternatives,
+ "; ".join(reasoning_parts) if reasoning_parts else "无特定匹配",
+ )
From 2286460b6441487c9c9725bb7382ddf1b35a2e4f Mon Sep 17 00:00:00 2001
From: Color2333 <1552429809@qq.com>
Date: Mon, 23 Mar 2026 14:34:21 +0800
Subject: [PATCH 11/14] feat(worker): add ChannelWorkerPool for parallel
fetching
---
packages/worker/channel_pool.py | 69 +++++++++++++++++++++++++++++++++
1 file changed, 69 insertions(+)
create mode 100644 packages/worker/channel_pool.py
diff --git a/packages/worker/channel_pool.py b/packages/worker/channel_pool.py
new file mode 100644
index 0000000..405df19
--- /dev/null
+++ b/packages/worker/channel_pool.py
@@ -0,0 +1,69 @@
+import asyncio
+import logging
+from dataclasses import dataclass
+from typing import Any
+
+from packages.domain.schemas import PaperCreate
+from packages.integrations.aggregator import ResultAggregator
+from packages.integrations.registry import ChannelRegistry
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class ChannelResult:
+ channel: str
+ papers: list[PaperCreate]
+ metadata: dict[str, Any]
+ error: str | None = None
+
+
+class ChannelWorkerPool:
+ def __init__(self, max_concurrent: int = 3):
+ self.max_concurrent = max_concurrent
+ self.semaphore = asyncio.Semaphore(max_concurrent)
+
+ async def fetch_all(
+ self,
+ query: str,
+ channels: list[str],
+ max_per_channel: int = 50,
+ ) -> list[ChannelResult]:
+ ChannelRegistry.register_default_channels()
+
+ tasks = [self._fetch_channel(ch, query, max_per_channel) for ch in channels]
+ return await asyncio.gather(*tasks)
+
+ async def _fetch_channel(self, channel: str, query: str, max_results: int) -> ChannelResult:
+ async with self.semaphore:
+ try:
+ ch = ChannelRegistry.get(channel)
+ if not ch:
+ return ChannelResult(channel, [], {}, error="channel not found")
+
+ papers = await asyncio.to_thread(ch.fetch, query, max_results)
+ return ChannelResult(
+ channel=channel,
+ papers=papers,
+ metadata={"total": len(papers)},
+ )
+ except Exception as exc: # noqa: BLE001
+ logger.warning("Channel %s failed: %s", channel, exc)
+ return ChannelResult(channel, [], {}, error=str(exc))
+
+ async def fetch_and_aggregate(
+ self,
+ query: str,
+ channels: list[str],
+ max_per_channel: int = 50,
+ ) -> tuple[ResultAggregator, list[ChannelResult]]:
+ results = await self.fetch_all(query, channels, max_per_channel)
+
+ aggregator = ResultAggregator()
+ for result in results:
+ if result.error:
+ logger.warning("Channel %s failed: %s", result.channel, result.error)
+ continue
+ aggregator.add_results(result.channel, result.papers, result.metadata)
+
+ return aggregator, results
From 5dd527044baea5553f33b79da2040241b580cd98 Mon Sep 17 00:00:00 2001
From: Color2333 <1552429809@qq.com>
Date: Mon, 23 Mar 2026 14:35:53 +0800
Subject: [PATCH 12/14] feat(quota): add QuotaManager for IEEE API quota
management
---
packages/worker/quota_manager.py | 70 ++++++++++++++++++++++++++++++++
1 file changed, 70 insertions(+)
create mode 100644 packages/worker/quota_manager.py
diff --git a/packages/worker/quota_manager.py b/packages/worker/quota_manager.py
new file mode 100644
index 0000000..51a92ec
--- /dev/null
+++ b/packages/worker/quota_manager.py
@@ -0,0 +1,70 @@
+import logging
+
+from packages.storage.db import session_scope
+from packages.storage.models import TopicSubscription
+
+logger = logging.getLogger(__name__)
+
+
+class QuotaManager:
+ """IEEE API配额管理器"""
+
+ @staticmethod
+ def check_quota(topic_id: str, needed: int = 1) -> bool:
+ """检查主题是否有足够的IEEE配额"""
+ with session_scope() as session:
+ topic = session.get(TopicSubscription, topic_id)
+ if not topic:
+ return True
+
+ if "ieee" not in (topic.sources or []):
+ return True
+
+ if topic.ieee_daily_quota <= 0:
+ return False
+
+ remaining = topic.ieee_daily_quota
+ return remaining >= needed
+
+ @staticmethod
+ def reserve_quota(topic_id: str, count: int = 1) -> bool:
+ """预占配额(不实际消耗,只是检查是否足够)"""
+ return QuotaManager.check_quota(topic_id, count)
+
+ @staticmethod
+ def get_remaining(topic_id: str) -> int:
+ """获取主题剩余的IEEE配额"""
+ with session_scope() as session:
+ topic = session.get(TopicSubscription, topic_id)
+ if not topic:
+ return 0
+
+ if "ieee" not in (topic.sources or []):
+ return 0
+
+ return max(0, topic.ieee_daily_quota)
+
+ @staticmethod
+ def is_channel_enabled(topic_id: str, channel: str) -> bool:
+ """检查主题是否启用了某渠道"""
+ if channel != "ieee":
+ return True
+
+ with session_scope() as session:
+ topic = session.get(TopicSubscription, topic_id)
+ if not topic:
+ return True
+
+ sources = topic.sources or []
+ return channel in sources and topic.ieee_daily_quota > 0
+
+ @staticmethod
+ def filter_channels_by_quota(topic_id: str, channels: list[str]) -> list[str]:
+ """过滤掉没有配额的渠道"""
+ result = []
+ for ch in channels:
+ if ch == "ieee" and not QuotaManager.check_quota(topic_id):
+ logger.debug("IEEE quota exhausted for topic %s, skipping", topic_id)
+ continue
+ result.append(ch)
+ return result
From 549f4cffea54f4f2703b7620706fddbb2ab1413b Mon Sep 17 00:00:00 2001
From: Color2333 <1552429809@qq.com>
Date: Mon, 23 Mar 2026 14:59:11 +0800
Subject: [PATCH 13/14] fix: resolve IEEE config and PaperCreate schema issues
---
apps/api/routers/papers.py | 4 ++--
frontend/src/components/topics/index.ts | 14 +++-----------
2 files changed, 5 insertions(+), 13 deletions(-)
diff --git a/apps/api/routers/papers.py b/apps/api/routers/papers.py
index d1cb89e..0f21555 100644
--- a/apps/api/routers/papers.py
+++ b/apps/api/routers/papers.py
@@ -142,9 +142,9 @@ async def fetch_channel(ch: str) -> tuple[str, list, dict]:
{
"id": f"temp-{i}",
"title": r.paper.title,
- "authors": r.paper.authors or [],
+ "authors": r.paper.metadata.get("authors", []),
"year": r.paper.publication_date.year if r.paper.publication_date else None,
- "venue": r.paper.venue,
+ "venue": r.paper.metadata.get("venue"),
"abstract": r.paper.abstract,
"sources": r.sources,
}
diff --git a/frontend/src/components/topics/index.ts b/frontend/src/components/topics/index.ts
index 205d610..94bc505 100644
--- a/frontend/src/components/topics/index.ts
+++ b/frontend/src/components/topics/index.ts
@@ -1,15 +1,7 @@
-/**
- * IEEE 集成 - 前端组件导出
- * 完整版新增多渠道配置支持
- *
- * @author Color2333
- */
+export { TopicChannelSelector } from './TopicChannelSelector';
+export { IeeeQuotaConfig } from './IeeeQuotaConfig';
-export { TopicChannelSelector } from './topics/TopicChannelSelector';
-export { IeeeQuotaConfig } from './topics/IeeeQuotaConfig';
-
-// 类型导出
export type {
TopicChannelSelectorProps,
IeeeQuotaConfigProps,
-} from './topics/types';
+} from './types';
From 239aa2c6d48427fcdee4545840d04fa7789b6e38 Mon Sep 17 00:00:00 2001
From: "opencode-agent[bot]"
Date: Mon, 23 Mar 2026 07:44:11 +0000
Subject: [PATCH 14/14] =?UTF-8?q?7=20=E4=B8=AA=20review=20=E9=97=AE?=
=?UTF-8?q?=E9=A2=98=E5=B7=B2=E5=85=A8=E9=83=A8=E4=BF=AE=E5=A4=8D=E5=AE=8C?=
=?UTF-8?q?=E6=88=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: Color2333
---
frontend/src/contexts/ChannelContext.tsx | 33 ++++++-
.../versions/20260303_0009_ieee_mvp.py | 25 +++--
packages/domain/schemas.py | 8 ++
packages/integrations/aggregator.py | 13 +++
packages/integrations/ieee_client.py | 28 +++---
packages/worker/quota_manager.py | 92 +++++++++++++++++--
packages/worker/smart_router.py | 72 +++++++++++++++
7 files changed, 244 insertions(+), 27 deletions(-)
diff --git a/frontend/src/contexts/ChannelContext.tsx b/frontend/src/contexts/ChannelContext.tsx
index 25e492e..eb07c71 100644
--- a/frontend/src/contexts/ChannelContext.tsx
+++ b/frontend/src/contexts/ChannelContext.tsx
@@ -5,7 +5,7 @@
* @author Color2333
*/
-import { createContext, useContext, useState, useCallback, ReactNode } from 'react';
+import { createContext, useContext, useState, useCallback, ReactNode, useEffect } from 'react';
export interface Channel {
id: string;
@@ -21,9 +21,12 @@ export interface Channel {
interface ChannelContextValue {
channels: Channel[];
defaultChannels: string[];
+ loading: boolean;
+ error: string | null;
getChannel: (id: string) => Channel | undefined;
updateChannelStatus: (id: string, status: Channel['status']) => void;
setDefaultChannels: (channels: string[]) => void;
+ refreshChannels: () => Promise;
}
const ChannelContext = createContext(null);
@@ -31,6 +34,31 @@ const ChannelContext = createContext(null);
export function ChannelProvider({ children }: { children: ReactNode }) {
const [channels, setChannels] = useState(INITIAL_CHANNELS);
const [defaultChannels, setDefaultChannels] = useState(['arxiv']);
+ const [loading, setLoading] = useState(true);
+ const [error, setError] = useState(null);
+
+ const fetchChannels = useCallback(async () => {
+ try {
+ setLoading(true);
+ const response = await fetch('/api/papers/suggest-channels');
+ if (!response.ok) {
+ throw new Error(`HTTP ${response.status}`);
+ }
+ const data = await response.json();
+ setChannels(data.channels || INITIAL_CHANNELS);
+ setError(null);
+ } catch (err) {
+ setError(err instanceof Error ? err.message : '加载失败');
+ // 降级:使用默认渠道列表
+ setChannels(INITIAL_CHANNELS);
+ } finally {
+ setLoading(false);
+ }
+ }, []);
+
+ useEffect(() => {
+ fetchChannels();
+ }, [fetchChannels]);
const getChannel = useCallback(
(id: string) => channels.find((c) => c.id === id),
@@ -55,9 +83,12 @@ export function ChannelProvider({ children }: { children: ReactNode }) {
value={{
channels,
defaultChannels,
+ loading,
+ error,
getChannel,
updateChannelStatus,
setDefaultChannels: setDefault,
+ refreshChannels: fetchChannels,
}}
>
{children}
diff --git a/infra/migrations/versions/20260303_0009_ieee_mvp.py b/infra/migrations/versions/20260303_0009_ieee_mvp.py
index f357325..f065f21 100644
--- a/infra/migrations/versions/20260303_0009_ieee_mvp.py
+++ b/infra/migrations/versions/20260303_0009_ieee_mvp.py
@@ -47,15 +47,24 @@ def upgrade() -> None:
# 但为了安全,我们用更安全的方式:保留 arxiv_id 原样
# 5. 数据迁移:将现有 arxiv_id 复制到 source_id
- # 使用 SQLAlchemy 执行原生 SQL
+ # 分批更新,避免锁表(大数据量场景)
conn = op.get_bind()
- conn.execute(
- sa.text("""
- UPDATE papers
- SET source_id = arxiv_id, source = 'arxiv'
- WHERE source_id IS NULL AND arxiv_id IS NOT NULL
- """)
- )
+ batch_size = 10000
+ offset = 0
+
+ while True:
+ result = conn.execute(
+ sa.text("""
+ UPDATE papers
+ SET source_id = arxiv_id, source = 'arxiv'
+ WHERE source_id IS NULL AND arxiv_id IS NOT NULL
+ LIMIT :batch_size OFFSET :offset
+ """),
+ {"batch_size": batch_size, "offset": offset}
+ )
+ if result.rowcount < batch_size:
+ break
+ offset += batch_size
# 6. 设置 source 字段为 NOT NULL(所有记录都已设置默认值)
with op.batch_alter_table("papers", schema=None) as batch_op:
diff --git a/packages/domain/schemas.py b/packages/domain/schemas.py
index 736bf5e..a915336 100644
--- a/packages/domain/schemas.py
+++ b/packages/domain/schemas.py
@@ -13,6 +13,7 @@ class PaperCreate(BaseModel):
doi: str | None = None # DOI 号(可选,IEEE 论文常用)
# 保留字段(向后兼容)- ArXiv 特定
+ # @deprecated: 使用 source_id + source 字段代替
arxiv_id: str | None = None # ArXiv ID(可选,仅 ArXiv 渠道使用)
# 通用字段
@@ -21,6 +22,13 @@ class PaperCreate(BaseModel):
publication_date: date | None = None
metadata: dict = Field(default_factory=dict)
+ @property
+ def normalized_arxiv_id(self) -> str | None:
+ """归一化的 arxiv_id 获取方法"""
+ if self.source == "arxiv":
+ return self.source_id or self.arxiv_id
+ return self.arxiv_id
+
class SkimReport(BaseModel):
one_liner: str
diff --git a/packages/integrations/aggregator.py b/packages/integrations/aggregator.py
index b983fb4..fe2b0ee 100644
--- a/packages/integrations/aggregator.py
+++ b/packages/integrations/aggregator.py
@@ -1,3 +1,4 @@
+import re
from dataclasses import dataclass, field
from typing import Any
@@ -31,10 +32,22 @@ def add_results(
def _find_existing(self, paper: PaperCreate) -> AggregatedPaper | None:
for result in self.results:
+ # 优先匹配 DOI
if result.paper.doi and paper.doi and result.paper.doi == paper.doi:
return result
+ # 其次匹配标题(归一化后)
+ if self._normalize_title(result.paper.title) == self._normalize_title(paper.title):
+ return result
+ # 最后匹配 arxiv_id
+ if (result.paper.normalized_arxiv_id and paper.normalized_arxiv_id and
+ result.paper.normalized_arxiv_id == paper.normalized_arxiv_id):
+ return result
return None
+ def _normalize_title(self, title: str) -> str:
+ """归一化标题:转小写、去空格、去标点"""
+ return re.sub(r'[^a-z0-9]', '', title.lower())
+
def get_sorted_results(self) -> list[AggregatedPaper]:
return sorted(self.results, key=lambda r: len(r.sources), reverse=True)
diff --git a/packages/integrations/ieee_client.py b/packages/integrations/ieee_client.py
index 16cf7b5..14b5402 100644
--- a/packages/integrations/ieee_client.py
+++ b/packages/integrations/ieee_client.py
@@ -11,6 +11,7 @@
import logging
import os
+import threading
import time
from dataclasses import dataclass
from datetime import date, datetime
@@ -73,6 +74,7 @@ def __init__(self, api_key: str | None = None) -> None:
"""
settings = get_settings()
self.api_key = api_key or os.getenv("IEEE_API_KEY")
+ self._lock = threading.Lock()
self._client: httpx.Client | None = None
if not self.api_key:
@@ -80,19 +82,21 @@ def __init__(self, api_key: str | None = None) -> None:
@property
def client(self) -> httpx.Client:
- """复用 httpx.Client 连接池"""
+ """复用 httpx.Client 连接池(线程安全)"""
if self._client is None or self._client.is_closed:
- headers = {}
- if self.api_key:
- headers["apikey"] = self.api_key
-
- self._client = httpx.Client(
- base_url=IEEE_API_BASE,
- timeout=20,
- headers=headers,
- follow_redirects=True,
- )
- logger.info("IEEE Client 初始化完成")
+ with self._lock:
+ if self._client is None or self._client.is_closed:
+ headers = {}
+ if self.api_key:
+ headers["apikey"] = self.api_key
+
+ self._client = httpx.Client(
+ base_url=IEEE_API_BASE,
+ timeout=20,
+ headers=headers,
+ follow_redirects=True,
+ )
+ logger.info("IEEE Client 初始化完成")
return self._client
def _get(self, path: str, params: dict | None = None) -> dict | None:
diff --git a/packages/worker/quota_manager.py b/packages/worker/quota_manager.py
index 51a92ec..5c831c9 100644
--- a/packages/worker/quota_manager.py
+++ b/packages/worker/quota_manager.py
@@ -1,7 +1,8 @@
import logging
+from datetime import date
from packages.storage.db import session_scope
-from packages.storage.models import TopicSubscription
+from packages.storage.models import IeeeApiQuota, TopicSubscription
logger = logging.getLogger(__name__)
@@ -11,7 +12,7 @@ class QuotaManager:
@staticmethod
def check_quota(topic_id: str, needed: int = 1) -> bool:
- """检查主题是否有足够的IEEE配额"""
+ """检查主题是否有足够的 IEEE 配额"""
with session_scope() as session:
topic = session.get(TopicSubscription, topic_id)
if not topic:
@@ -23,7 +24,17 @@ def check_quota(topic_id: str, needed: int = 1) -> bool:
if topic.ieee_daily_quota <= 0:
return False
- remaining = topic.ieee_daily_quota
+ today = date.today()
+ quota_record = (
+ session.query(IeeeApiQuota)
+ .filter_by(topic_id=topic_id, date=today)
+ .first()
+ )
+
+ if not quota_record:
+ return topic.ieee_daily_quota >= needed
+
+ remaining = quota_record.api_calls_limit - quota_record.api_calls_used
return remaining >= needed
@staticmethod
@@ -31,9 +42,42 @@ def reserve_quota(topic_id: str, count: int = 1) -> bool:
"""预占配额(不实际消耗,只是检查是否足够)"""
return QuotaManager.check_quota(topic_id, count)
+ @staticmethod
+ def consume_quota(topic_id: str, count: int = 1) -> bool:
+ """实际消耗配额"""
+ with session_scope() as session:
+ topic = session.get(TopicSubscription, topic_id)
+ if not topic:
+ return True
+
+ if "ieee" not in (topic.sources or []):
+ return True
+
+ today = date.today()
+ quota_record = (
+ session.query(IeeeApiQuota)
+ .filter_by(topic_id=topic_id, date=today)
+ .first()
+ )
+
+ if not quota_record:
+ quota_record = IeeeApiQuota(
+ topic_id=topic_id,
+ date=today,
+ api_calls_used=0,
+ api_calls_limit=topic.ieee_daily_quota,
+ )
+ session.add(quota_record)
+
+ if quota_record.api_calls_used + count > quota_record.api_calls_limit:
+ return False
+
+ quota_record.api_calls_used += count
+ return True
+
@staticmethod
def get_remaining(topic_id: str) -> int:
- """获取主题剩余的IEEE配额"""
+ """获取主题剩余的 IEEE 配额"""
with session_scope() as session:
topic = session.get(TopicSubscription, topic_id)
if not topic:
@@ -42,7 +86,17 @@ def get_remaining(topic_id: str) -> int:
if "ieee" not in (topic.sources or []):
return 0
- return max(0, topic.ieee_daily_quota)
+ today = date.today()
+ quota_record = (
+ session.query(IeeeApiQuota)
+ .filter_by(topic_id=topic_id, date=today)
+ .first()
+ )
+
+ if not quota_record:
+ return topic.ieee_daily_quota
+
+ return max(0, quota_record.api_calls_limit - quota_record.api_calls_used)
@staticmethod
def is_channel_enabled(topic_id: str, channel: str) -> bool:
@@ -56,7 +110,11 @@ def is_channel_enabled(topic_id: str, channel: str) -> bool:
return True
sources = topic.sources or []
- return channel in sources and topic.ieee_daily_quota > 0
+ if channel not in sources:
+ return True
+
+ remaining = QuotaManager.get_remaining(topic_id)
+ return remaining > 0
@staticmethod
def filter_channels_by_quota(topic_id: str, channels: list[str]) -> list[str]:
@@ -68,3 +126,25 @@ def filter_channels_by_quota(topic_id: str, channels: list[str]) -> list[str]:
continue
result.append(ch)
return result
+
+ @staticmethod
+ def reset_quota(topic_id: str) -> None:
+ """重置主题的 IEEE 配额(用于测试或手动重置)"""
+ with session_scope() as session:
+ topic = session.get(TopicSubscription, topic_id)
+ if not topic:
+ return
+
+ if "ieee" not in (topic.sources or []):
+ return
+
+ today = date.today()
+ quota_record = (
+ session.query(IeeeApiQuota)
+ .filter_by(topic_id=topic_id, date=today)
+ .first()
+ )
+
+ if quota_record:
+ quota_record.api_calls_used = 0
+ logger.info("IEEE quota reset for topic %s", topic_id)
diff --git a/packages/worker/smart_router.py b/packages/worker/smart_router.py
index a285ab7..d4ebb34 100644
--- a/packages/worker/smart_router.py
+++ b/packages/worker/smart_router.py
@@ -57,6 +57,12 @@
"openalex": ["*"],
}
+CHANNEL_NEGATIVE_KEYWORDS = {
+ "ieee": ["not ieee", "exclude ieee"],
+ "arxiv": ["not arxiv", "exclude arxiv"],
+ "biorxiv": ["not biology", "exclude biology"],
+}
+
DEFAULT_CHANNELS = ["arxiv"]
@@ -96,3 +102,69 @@ def suggest_channels(query: str, available_channels: list[str]) -> tuple[list[st
alternatives,
"; ".join(reasoning_parts) if reasoning_parts else "无特定匹配",
)
+
+
+def suggest_channels_with_intent(
+ query: str, available_channels: list[str], exclude_channels: list[str] | None = None
+) -> tuple[list[str], list[str], str]:
+ """
+ 基于意图的智能渠道推荐(支持否定关键词)
+
+ Args:
+ query: 用户查询
+ available_channels: 可用渠道列表
+ exclude_channels: 排除渠道列表(可选)
+
+ Returns:
+ tuple[list[str], list[str], str]: (推荐渠道,备选渠道,推荐理由)
+
+ 示例:
+ >>> suggest_channels_with_intent("5G wireless NOT IEEE", ["arxiv", "ieee"])
+ (['arxiv'], ['ieee'], 'arxiv 匹配 2 个关键词; ieee 被否定关键词排除')
+ """
+ if exclude_channels is None:
+ exclude_channels = []
+
+ query_lower = query.lower()
+ recommended = []
+ alternatives = []
+ reasoning_parts = []
+
+ for channel, keywords in CHANNEL_KEYWORDS.items():
+ if channel not in available_channels or channel in exclude_channels:
+ continue
+
+ # 检查否定关键词
+ negative_keywords = CHANNEL_NEGATIVE_KEYWORDS.get(channel, [])
+ is_excluded = any(neg_kw in query_lower for neg_kw in negative_keywords)
+
+ if is_excluded:
+ reasoning_parts.append(f"{channel} 被否定关键词排除")
+ continue
+
+ score = 0
+ for kw in keywords:
+ if kw == "*":
+ score += 1
+ continue
+ if kw in query_lower:
+ score += 1
+
+ if score > 0:
+ if score >= 2:
+ recommended.append(channel)
+ reasoning_parts.append(f"{channel} 匹配 {score} 个关键词")
+ else:
+ alternatives.append(channel)
+
+ if not recommended and available_channels:
+ recommended = [ch for ch in DEFAULT_CHANNELS if ch in available_channels]
+ if not recommended and available_channels:
+ recommended = ["arxiv"]
+ reasoning_parts.append("使用默认渠道")
+
+ return (
+ recommended,
+ alternatives,
+ "; ".join(reasoning_parts) if reasoning_parts else "无特定匹配",
+ )