Skip to content

Commit 73917e7

Browse files
committed
Merge branch 'main' into feat/add-feed-comment-count
2 parents 6a24b86 + 5429119 commit 73917e7

31 files changed

+1127
-153
lines changed

.github/workflows/deploy_feed-crawler.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,6 @@ jobs:
4040
echo "AI_RATE_LIMIT_COUNT=${{ vars.AI_RATE_LIMIT_COUNT }}" >> env/.env.prod
4141
4242
cd /var/web05-Denamu
43-
docker-compose -f docker-compose/docker-compose.prod.yml up --build --no-deps -d feed-crawler
43+
docker-compose -f docker-compose/docker-compose.prod.yml up --build --no-deps --force-recreate -d feed-crawler
4444
docker image prune -f
4545
docker builder prune -f

.github/workflows/deploy_server.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,6 @@ jobs:
4949
echo "GITHUB_CLIENT_SECRET=${{secrets.GIT_CLIENT_SECRET}}" >> env/.env.prod
5050
5151
cd /var/web05-Denamu
52-
docker-compose -f docker-compose/docker-compose.prod.yml up --build --no-deps -d app
52+
docker-compose -f docker-compose/docker-compose.prod.yml up --build --no-deps --force-recreate -d app
5353
docker image prune -f
5454
docker builder prune -f

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ certificate.crt
3333
!.vscode/extensions.json
3434
!.vscode/*.code-snippets
3535

36+
### static files
37+
**/objects
38+
3639
# Local History for Visual Studio Code
3740
.history/
3841

docker-compose/docker-compose.prod.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ services:
2121
condition: service_healthy
2222
volumes:
2323
- ../server/logs:/var/web05-Denamu/server/logs
24+
- /var/web05-Denamu/objects:/var/web05-Denamu/objects
2425
environment:
2526
NODE_ENV: "PROD"
2627
TZ: "Asia/Seoul"
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import { XMLParser } from 'fast-xml-parser';
2+
import { FeedDetail, RssObj } from '../types';
3+
import {
4+
FEED_AI_SUMMARY_IN_PROGRESS_MESSAGE,
5+
ONE_MINUTE,
6+
TIME_INTERVAL,
7+
} from '../constant';
8+
import { ParserUtil } from './utils/parser-util';
9+
10+
export interface RawFeed {
11+
title: string;
12+
link: string;
13+
pubDate: string;
14+
description: string;
15+
}
16+
17+
export abstract class BaseFeedParser {
18+
protected readonly xmlParser = new XMLParser({
19+
ignoreAttributes: false,
20+
attributeNamePrefix: '@_',
21+
parseAttributeValue: true,
22+
trimValues: true,
23+
});
24+
protected readonly parserUtil: ParserUtil;
25+
26+
constructor(parserUtil: ParserUtil) {
27+
this.parserUtil = parserUtil;
28+
}
29+
30+
async parseFeed(rssObj: RssObj, xmlData: string): Promise<FeedDetail[]> {
31+
// 각 포맷(atom1.0, rss2.0 등...)
32+
const rawFeeds = this.extractRawFeeds(xmlData);
33+
const timeMatchedFeeds = this.filterByTime(rawFeeds);
34+
const detailedFeeds = await this.convertToFeedDetails(
35+
rssObj,
36+
timeMatchedFeeds,
37+
);
38+
39+
return detailedFeeds;
40+
}
41+
42+
abstract canParse(xmlData: string): boolean;
43+
protected abstract extractRawFeeds(xmlData: string): RawFeed[];
44+
45+
private filterByTime(rawFeeds: RawFeed[]): RawFeed[] {
46+
const now = new Date().setSeconds(0, 0);
47+
return rawFeeds.filter((item) => {
48+
const pubDate = new Date(item.pubDate).setSeconds(0, 0);
49+
const timeDiff = (now - pubDate) / (ONE_MINUTE * TIME_INTERVAL);
50+
return timeDiff >= 0 && timeDiff < 1;
51+
});
52+
}
53+
54+
private async convertToFeedDetails(
55+
rssObj: RssObj,
56+
rawFeeds: RawFeed[],
57+
): Promise<FeedDetail[]> {
58+
return Promise.all(
59+
rawFeeds.map(async (feed) => {
60+
const imageUrl = await this.parserUtil.getThumbnailUrl(feed.link);
61+
const date = new Date(feed.pubDate);
62+
const formattedDate = date.toISOString().slice(0, 19).replace('T', ' ');
63+
64+
const content = (feed.description || '')
65+
.replace(/<[^>]*>/g, '')
66+
.replace(/&nbsp;|&#160;/g, ' ')
67+
.replace(/&[^;]+;/g, '')
68+
.replace(/\s+/g, ' ')
69+
.trim();
70+
71+
return {
72+
id: null,
73+
blogId: rssObj.id,
74+
blogName: rssObj.blogName,
75+
blogPlatform: rssObj.blogPlatform,
76+
pubDate: formattedDate,
77+
title: feed.title,
78+
link: decodeURIComponent(feed.link),
79+
imageUrl: imageUrl,
80+
content: content,
81+
summary: FEED_AI_SUMMARY_IN_PROGRESS_MESSAGE,
82+
deathCount: 0,
83+
} as FeedDetail;
84+
}),
85+
);
86+
}
87+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import { inject, injectable } from 'tsyringe';
2+
import { FeedDetail, RssObj } from '../types';
3+
import { Rss20Parser } from './formats/rss20-parser';
4+
import { Atom10Parser } from './formats/atom10-parser';
5+
import { BaseFeedParser } from './base-feed-parser';
6+
import { DEPENDENCY_SYMBOLS } from '../../types/dependency-symbols';
7+
import logger from '../logger';
8+
9+
@injectable()
10+
export class FeedParserManager {
11+
private readonly parsers: BaseFeedParser[];
12+
13+
constructor(
14+
@inject(DEPENDENCY_SYMBOLS.Rss20Parser) rss20Parser: Rss20Parser,
15+
@inject(DEPENDENCY_SYMBOLS.Atom10Parser) atom10Parser: Atom10Parser,
16+
) {
17+
this.parsers = [rss20Parser, atom10Parser];
18+
}
19+
20+
async fetchAndParse(rssObj: RssObj): Promise<FeedDetail[]> {
21+
try {
22+
const response = await fetch(rssObj.rssUrl, {
23+
headers: {
24+
Accept:
25+
'application/rss+xml, application/xml, text/xml, application/atom+xml',
26+
},
27+
});
28+
29+
if (!response.ok) {
30+
throw new Error(`${rssObj.rssUrl}에서 피드 데이터 가져오기 실패`);
31+
}
32+
33+
const xmlData = await response.text();
34+
35+
const parser = this.findSuitableParser(xmlData);
36+
if (!parser) {
37+
throw new Error(`지원하지 않는 피드 형식: ${rssObj.rssUrl} / `);
38+
}
39+
logger.info(`${rssObj.blogName}: ${parser.constructor.name} 사용`);
40+
41+
return await parser.parseFeed(rssObj, xmlData);
42+
} catch (error) {
43+
logger.warn(`[${rssObj.rssUrl}] 피드 파싱 중 오류 발생: ${error}`);
44+
return [];
45+
}
46+
}
47+
48+
private findSuitableParser(xmlData: string): BaseFeedParser | null {
49+
for (const parser of this.parsers) {
50+
if (parser.canParse(xmlData)) {
51+
return parser;
52+
}
53+
}
54+
return null;
55+
}
56+
}
Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
import { inject, injectable } from 'tsyringe';
2+
import { BaseFeedParser, RawFeed } from '../base-feed-parser';
3+
import { ParserUtil } from '../utils/parser-util';
4+
import { DEPENDENCY_SYMBOLS } from '../../../types/dependency-symbols';
5+
6+
@injectable()
7+
export class Atom10Parser extends BaseFeedParser {
8+
constructor(@inject(DEPENDENCY_SYMBOLS.ParserUtil) parserUtil: ParserUtil) {
9+
super(parserUtil);
10+
}
11+
canParse(xmlData: string): boolean {
12+
try {
13+
const parsed = this.xmlParser.parse(xmlData);
14+
return !!parsed.feed?.entry;
15+
} catch {
16+
return false;
17+
}
18+
}
19+
20+
protected extractRawFeeds(xmlData: string): RawFeed[] {
21+
const parsed = this.xmlParser.parse(xmlData);
22+
23+
let entries = parsed.feed.entry;
24+
if (!Array.isArray(entries)) {
25+
entries = [entries];
26+
}
27+
28+
return entries.map((entry: any) => ({
29+
title: this.parserUtil.customUnescape(entry.title),
30+
link: this.extractLink(entry.link),
31+
pubDate: entry.published || entry.updated,
32+
description: entry.summary || entry.content || '',
33+
}));
34+
}
35+
36+
private extractLink(linkData: any): string {
37+
// link 태그가 속성없이 문자 형태 그대로일 경우
38+
if (typeof linkData === 'string') {
39+
return linkData;
40+
}
41+
42+
// link 태그가 여러개인 경우
43+
if (Array.isArray(linkData)) {
44+
const alternateLink = linkData.find((l) => l['@_rel'] === 'alternate');
45+
return alternateLink['@_href'] || '';
46+
}
47+
48+
return linkData['@_href'] || linkData?.href || '';
49+
}
50+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
import { inject, injectable } from 'tsyringe';
2+
import { BaseFeedParser, RawFeed } from '../base-feed-parser';
3+
import { ParserUtil } from '../utils/parser-util';
4+
import { DEPENDENCY_SYMBOLS } from '../../../types/dependency-symbols';
5+
6+
@injectable()
7+
export class Rss20Parser extends BaseFeedParser {
8+
constructor(@inject(DEPENDENCY_SYMBOLS.ParserUtil) parserUtil: ParserUtil) {
9+
super(parserUtil);
10+
}
11+
canParse(xmlData: string): boolean {
12+
try {
13+
const parsed = this.xmlParser.parse(xmlData);
14+
return !!parsed.rss?.channel?.item;
15+
} catch {
16+
return false;
17+
}
18+
}
19+
20+
protected extractRawFeeds(xmlData: string): RawFeed[] {
21+
const parsed = this.xmlParser.parse(xmlData);
22+
23+
if (!Array.isArray(parsed.rss.channel.item)) {
24+
parsed.rss.channel.item = [parsed.rss.channel.item];
25+
}
26+
27+
return parsed.rss.channel.item.map((feed: any) => ({
28+
title: this.parserUtil.customUnescape(feed.title),
29+
link: feed.link,
30+
pubDate: feed.pubDate,
31+
description: feed.description,
32+
}));
33+
}
34+
}

feed-crawler/src/common/rss-parser.ts renamed to feed-crawler/src/common/parser/utils/parser-util.ts

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,14 @@
1-
import logger from "./logger";
2-
import { parse } from "node-html-parser";
3-
import { unescape } from "html-escaper";
1+
import { injectable } from 'tsyringe';
2+
import logger from '../../logger';
3+
import { parse } from 'node-html-parser';
4+
import { unescape } from 'html-escaper';
45

5-
export class RssParser {
6+
@injectable()
7+
export class ParserUtil {
68
async getThumbnailUrl(feedUrl: string) {
79
const response = await fetch(feedUrl, {
810
headers: {
9-
Accept: "text/html",
11+
Accept: 'text/html',
1012
},
1113
});
1214
if (!response.ok) {
@@ -16,9 +18,9 @@ export class RssParser {
1618
const htmlData = await response.text();
1719
const htmlRootElement = parse(htmlData);
1820
const metaImage = htmlRootElement.querySelector(
19-
'meta[property="og:image"]'
21+
'meta[property="og:image"]',
2022
);
21-
let thumbnailUrl = metaImage?.getAttribute("content") ?? "";
23+
let thumbnailUrl = metaImage?.getAttribute('content') ?? '';
2224

2325
if (!thumbnailUrl.length) {
2426
logger.warn(`${feedUrl}에서 썸네일 추출 실패`);
@@ -42,12 +44,12 @@ export class RssParser {
4244

4345
customUnescape(feedTitle: string): string {
4446
const escapeEntity = {
45-
"&middot;": "·",
46-
"&nbsp;": " ",
47+
'&middot;': '·',
48+
'&nbsp;': ' ',
4749
};
4850
Object.keys(escapeEntity).forEach((escapeKey) => {
4951
const value = escapeEntity[escapeKey];
50-
const regex = new RegExp(escapeKey, "g");
52+
const regex = new RegExp(escapeKey, 'g');
5153
feedTitle = feedTitle.replace(regex, value);
5254
});
5355

feed-crawler/src/container.ts

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@ import { RssRepository } from './repository/rss.repository';
66
import { FeedRepository } from './repository/feed.repository';
77
import { RedisConnection } from './common/redis-access';
88
import { TagMapRepository } from './repository/tag-map.repository';
9-
import { RssParser } from './common/rss-parser';
9+
import { ParserUtil } from './common/parser/utils/parser-util';
1010
import { ClaudeService } from './claude.service';
11+
import { FeedParserManager } from './common/parser/feed-parser-manager';
12+
import { Rss20Parser } from './common/parser/formats/rss20-parser';
13+
import { Atom10Parser } from './common/parser/formats/atom10-parser';
14+
import { FeedCrawler } from './feed-crawler';
1115

1216
container.registerSingleton<DatabaseConnection>(
1317
DEPENDENCY_SYMBOLS.DatabaseConnection,
@@ -39,6 +43,29 @@ container.registerSingleton<ClaudeService>(
3943
ClaudeService,
4044
);
4145

42-
container.registerSingleton<RssParser>(DEPENDENCY_SYMBOLS.RssParser, RssParser);
46+
container.registerSingleton<ParserUtil>(
47+
DEPENDENCY_SYMBOLS.ParserUtil,
48+
ParserUtil,
49+
);
50+
51+
container.registerSingleton<Rss20Parser>(
52+
DEPENDENCY_SYMBOLS.Rss20Parser,
53+
Rss20Parser,
54+
);
55+
56+
container.registerSingleton<Atom10Parser>(
57+
DEPENDENCY_SYMBOLS.Atom10Parser,
58+
Atom10Parser,
59+
);
60+
61+
container.registerSingleton<FeedParserManager>(
62+
DEPENDENCY_SYMBOLS.FeedParserManager,
63+
FeedParserManager,
64+
);
65+
66+
container.registerSingleton<FeedCrawler>(
67+
DEPENDENCY_SYMBOLS.FeedCrawler,
68+
FeedCrawler,
69+
);
4370

4471
export { container };

0 commit comments

Comments
 (0)