Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
import com.linglevel.api.crawling.repository.CrawlingDslRepository;
import com.linglevel.api.crawling.service.CrawlingService;
import com.rometools.rome.feed.synd.SyndEntry;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.DisplayName;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -37,86 +35,6 @@ void setUp() {
filter = new ContentCrawlabilityFilter(crawlingDslRepository, crawlingService);
}

@Test
@DisplayName("실제 Medium 글에서 콘텐츠 크롤링 성공 - 100자 이상")
void testMediumArticleCrawlability() {
// given: Medium 실제 URL
String mediumUrl = "https://medium.com/@haleemakashimri111/32-7-trending-javascript-libraries-fe427f00a3f4";

// Medium DSL 설정 (올바른 문법)
CrawlingDsl mediumDsl = CrawlingDsl.builder()
.domain("medium.com")
.contentDsl("D'''article p.pw-post-body-paragraph'''>#")
.build();

when(crawlingService.extractDomain(mediumUrl)).thenReturn("medium.com");
when(crawlingDslRepository.findByDomain("medium.com"))
.thenReturn(Optional.of(mediumDsl));

// Mock SyndEntry
SyndEntry entry = mock(SyndEntry.class);
when(entry.getLink()).thenReturn(mediumUrl);

FeedSource feedSource = mock(FeedSource.class);

// when: 필터 실행
FeedFilterResult result = filter.filter(entry, feedSource);

// then: 통과해야 함 (100자 이상 추출됨)
assertTrue(result.isPassed(), "Medium 글은 100자 이상 콘텐츠가 추출되어야 함");

System.out.println("===========================================");
System.out.println("=== Medium Article Crawlability Test ===");
System.out.println("===========================================");
System.out.println("URL: " + mediumUrl);
System.out.println("DSL: " + mediumDsl.getContentDsl());
System.out.println("Result: " + (result.isPassed() ? "✅ PASS" : "❌ FAIL"));
if (!result.isPassed()) {
System.out.println("Reason: " + result.getReason());
}
System.out.println("===========================================");
}

@Test
@DisplayName("실제 Medium 글 크롤링 상세 검증")
void testMediumArticleDetailedCrawling() throws Exception {
// given: Medium URL과 DSL (올바른 문법)
String mediumUrl = "https://medium.com/@haleemakashimri111/32-7-trending-javascript-libraries-fe427f00a3f4";
String mediumDsl = "D'''article p.pw-post-body-paragraph'''>#";

// when: 직접 크롤링
System.out.println("===========================================");
System.out.println("=== Medium Article Detailed Crawling ===");
System.out.println("===========================================");
System.out.println("URL: " + mediumUrl);
System.out.println("DSL: " + mediumDsl);
System.out.println();

Document doc = Jsoup.connect(mediumUrl)
.timeout(10000)
.userAgent("Mozilla/5.0")
.get();

com.linglevel.api.crawling.dsl.CrawlerDsl crawler = new com.linglevel.api.crawling.dsl.CrawlerDsl(doc);
String extractedContent = crawler.executeAsString(mediumDsl);

// then: 검증
assertNotNull(extractedContent, "추출된 콘텐츠는 null이 아니어야 함");
assertFalse(extractedContent.trim().isEmpty(), "추출된 콘텐츠는 비어있지 않아야 함");
assertTrue(extractedContent.trim().length() >= 100,
"추출된 콘텐츠는 100자 이상이어야 함 (실제: " + extractedContent.trim().length() + "자)");

System.out.println("Extracted Content Length: " + extractedContent.trim().length() + " chars");
System.out.println();
System.out.println("Extracted Content Preview (first 500 chars):");
System.out.println("-------------------------------------------");
System.out.println(extractedContent.substring(0, Math.min(500, extractedContent.length())));
System.out.println("-------------------------------------------");
System.out.println();
System.out.println("✅ Test Result: PASS (100자 이상 추출 성공)");
System.out.println("===========================================");
}

@Test
@DisplayName("CrawlingDsl이 없는 도메인은 불통과")
void testNoCrawlingDsl() {
Expand Down Expand Up @@ -242,4 +160,4 @@ void testFilterOrder() {
// then
assertEquals(100, order, "HTTP 요청이 필요하므로 가장 나중에 실행되어야 함");
}
}
}

This file was deleted.

Loading
Loading