diff --git a/pittapi/news.py b/pittapi/news.py index ae1462c..76fbdea 100644 --- a/pittapi/news.py +++ b/pittapi/news.py @@ -19,50 +19,20 @@ from __future__ import annotations +from functools import cache import math from requests_html import Element, HTMLResponse, HTMLSession -from typing import Literal, NamedTuple +from typing import NamedTuple NUM_ARTICLES_PER_PAGE = 20 -NEWS_BY_CATEGORY_URL = ( - "https://www.pitt.edu/pittwire/news/{category}?field_topics_target_id={topic_id}&field_article_date_value={year}" +PITT_BASE_URL = "https://www.pitt.edu" +PITTWIRE_URL = PITT_BASE_URL + "/pittwire" +FEATURES_ARTICLES_URL = PITTWIRE_URL + "/news/features-articles" +NEWS_BY_CATEGORY_URL = PITTWIRE_URL + ( + "/news/{category}?field_topics_target_id={topic_id}&field_article_date_value={year}" "&title={query}&field_category_target_id=All&page={page_num}" ) -PITT_BASE_URL = "https://www.pitt.edu" - -Category = Literal["features-articles", "accolades-honors", "ones-to-watch", "announcements-and-updates"] -Topic = Literal[ - "university-news", - "health-and-wellness", - "technology-and-science", - "arts-and-humanities", - "community-impact", - "innovation-and-research", - "global", - "diversity-equity-and-inclusion", - "our-city-our-campus", - "teaching-and-learning", - "space", - "ukraine", - "sustainability", -] - -TOPIC_ID_MAP: dict[Topic, int] = { - "university-news": 432, - "health-and-wellness": 2, - "technology-and-science": 391, - "arts-and-humanities": 4, - "community-impact": 6, - "innovation-and-research": 1, - "global": 9, - "diversity-equity-and-inclusion": 8, - "our-city-our-campus": 12, - "teaching-and-learning": 7, - "space": 440, - "ukraine": 441, - "sustainability": 470, -} sess = HTMLSession() @@ -87,18 +57,51 @@ def from_html(cls, article_html: Element) -> Article: return cls(title=article_title, description=article_description, url=article_url, tags=article_tags) -def _get_page_articles( - topic: Topic, - category: Category, - query: str, - year: int | None, - page_num: int, -) -> list[Article]: +@cache +def _scrape_categories() -> dict[str, str]: + response: HTMLResponse = sess.get(PITTWIRE_URL) + category_menu: Element = response.html.find("div#block-views-block-category-menu-category-menu", first=True) + category_list: list[Element] = category_menu.find("ul.hamburger-menu-list li") + category_map: dict[str, str] = {} + for category in category_list: + category_link: Element = category.find("a", first=True) + category_url_name = category_link.attrs["href"].split("/")[-1] + category_map[category.text.strip()] = category_url_name + if not category_map: + raise RuntimeError("No categories found, please open a GitHub issue") + return category_map + + +@cache +def _scrape_topics() -> dict[str, int]: + response: HTMLResponse = sess.get(FEATURES_ARTICLES_URL) + main_content: Element = response.html.xpath("/html/body/div/main/div/section", first=True) + topic_fieldset: Element = main_content.find("fieldset.form-item-field-topics-target-id", first=True) + topic_options: list[Element] = topic_fieldset.find("option") + topic_map: dict[str, int] = {} + for topic_option in topic_options: + if (topic_id := topic_option.attrs["value"].strip()) == "All": # Skip placeholder "Topics" option + continue + topic_name = topic_option.text.strip() + topic_map[topic_name] = int(topic_id) + if not topic_map: + raise RuntimeError("No topics found, please open a GitHub issue") + return topic_map + + +def _get_page_articles(topic: str, category: str, query: str, year: int | None, page_num: int) -> list[Article]: + topic_id_map = _scrape_topics() + category_url_name_map = _scrape_categories() year_str = str(year) if year else "" page_num_str = str(page_num) if page_num else "" + response: HTMLResponse = sess.get( NEWS_BY_CATEGORY_URL.format( - category=category, topic_id=TOPIC_ID_MAP[topic], year=year_str, query=query, page_num=page_num_str + category=category_url_name_map[category], + topic_id=topic_id_map[topic], + year=year_str, + query=query, + page_num=page_num_str, ) ) main_content: Element = response.html.xpath("/html/body/div/main/div/section", first=True) @@ -107,13 +110,33 @@ def _get_page_articles( return page_articles +@cache +def get_categories() -> list[str]: + category_url_name_map = _scrape_categories() + return list(category_url_name_map.keys()) + + +@cache +def get_topics() -> list[str]: + topic_id_map = _scrape_topics() + return list(topic_id_map.keys()) + + def get_articles_by_topic( - topic: Topic, - category: Category = "features-articles", + topic: str, + category: str = "Features & Articles", query: str = "", year: int | None = None, max_num_results: int = NUM_ARTICLES_PER_PAGE, ) -> list[Article]: + topic_id_map = _scrape_topics() + if topic not in topic_id_map: + raise ValueError(f"'{topic}' is not a valid topic, must be one of the following: {get_topics()}") + + category_url_name_map = _scrape_categories() + if category not in category_url_name_map: + raise ValueError(f"'{category}' is not a valid category, must be one of the following: {get_categories()}") + num_pages = math.ceil(max_num_results / NUM_ARTICLES_PER_PAGE) # Get articles sequentially and synchronously (i.e., not using grequests) because the news pages must stay in order diff --git a/tests/news_test.py b/tests/news_test.py index 203b684..44de365 100644 --- a/tests/news_test.py +++ b/tests/news_test.py @@ -30,6 +30,14 @@ class NewsTest(unittest.TestCase): def __init__(self, *args, **kwargs): unittest.TestCase.__init__(self, *args, **kwargs) + with (SAMPLE_PATH / "news_pittwire.html").open() as f: + self.pittwire = f.read() + with (SAMPLE_PATH / "news_pittwire_no_categories.html").open() as f: + self.pittwire_no_categories = f.read() + with (SAMPLE_PATH / "news_features_articles.html").open() as f: + self.features_articles = f.read() + with (SAMPLE_PATH / "news_features_articles_no_topics.html").open() as f: + self.features_articles_no_topics = f.read() with (SAMPLE_PATH / "news_university_news_features_articles_page_0.html").open() as f: self.university_news_features_articles_page_0 = f.read() with (SAMPLE_PATH / "news_university_news_features_articles_page_1.html").open() as f: @@ -39,8 +47,69 @@ def __init__(self, *args, **kwargs): with (SAMPLE_PATH / "news_university_news_features_articles_2020.html").open() as f: self.university_news_features_articles_2020 = f.read() + @responses.activate + def test_get_categories(self): + news.get_categories.cache_clear() + news._scrape_categories.cache_clear() + responses.add(responses.GET, news.PITTWIRE_URL, body=self.pittwire) + + categories = news.get_categories() + + self.assertCountEqual( + categories, ["Features & Articles", "Accolades & Honors", "Ones to Watch", "Announcements and Updates"] + ) + + @responses.activate + def test_get_categories_missing(self): + news.get_categories.cache_clear() + news._scrape_categories.cache_clear() + responses.add(responses.GET, news.PITTWIRE_URL, body=self.pittwire_no_categories) + + self.assertRaises(RuntimeError, news.get_categories) + + @responses.activate + def test_get_topics(self): + news.get_topics.cache_clear() + news._scrape_topics.cache_clear() + responses.add(responses.GET, news.FEATURES_ARTICLES_URL, body=self.features_articles) + + topics = news.get_topics() + + self.assertCountEqual( + topics, + [ + "University News", + "Health and Wellness", + "Technology & Science", + "Arts and Humanities", + "Community Impact", + "Innovation and Research", + "Global", + "Diversity, Equity, and Inclusion", + "Our City/Our Campus", + "Teaching & Learning", + "Space", + "Ukraine", + "Sustainability", + ], + ) + + @responses.activate + def test_get_topics_missing(self): + news.get_topics.cache_clear() + news._scrape_topics.cache_clear() + responses.add(responses.GET, news.FEATURES_ARTICLES_URL, body=self.features_articles_no_topics) + + self.assertRaises(RuntimeError, news.get_topics) + @responses.activate def test_get_articles_by_topic(self): + news.get_categories.cache_clear() + news.get_topics.cache_clear() + news._scrape_categories.cache_clear() + news._scrape_topics.cache_clear() + responses.add(responses.GET, news.PITTWIRE_URL, body=self.pittwire) + responses.add(responses.GET, news.FEATURES_ARTICLES_URL, body=self.features_articles) responses.add( responses.GET, "https://www.pitt.edu/pittwire/news/features-articles?field_topics_target_id=432&field_article_date_value=&title=" @@ -48,7 +117,7 @@ def test_get_articles_by_topic(self): body=self.university_news_features_articles_page_0, ) - university_news_articles = news.get_articles_by_topic("university-news") + university_news_articles = news.get_articles_by_topic("University News") self.assertEqual(len(university_news_articles), news.NUM_ARTICLES_PER_PAGE) self.assertEqual( @@ -75,6 +144,12 @@ def test_get_articles_by_topic(self): @responses.activate def test_get_articles_by_topic_query(self): query = "fulbright" + news.get_categories.cache_clear() + news.get_topics.cache_clear() + news._scrape_categories.cache_clear() + news._scrape_topics.cache_clear() + responses.add(responses.GET, news.PITTWIRE_URL, body=self.pittwire) + responses.add(responses.GET, news.FEATURES_ARTICLES_URL, body=self.features_articles) responses.add( responses.GET, "https://www.pitt.edu/pittwire/news/features-articles?field_topics_target_id=432&field_article_date_value=" @@ -82,7 +157,7 @@ def test_get_articles_by_topic_query(self): body=self.university_news_features_articles_fulbright, ) - university_news_articles = news.get_articles_by_topic("university-news", query=query) + university_news_articles = news.get_articles_by_topic("University News", query=query) self.assertEqual(len(university_news_articles), 3) self.assertEqual( @@ -115,6 +190,12 @@ def test_get_articles_by_topic_query(self): @responses.activate def test_get_articles_by_topic_year(self): year = 2020 + news.get_categories.cache_clear() + news.get_topics.cache_clear() + news._scrape_categories.cache_clear() + news._scrape_topics.cache_clear() + responses.add(responses.GET, news.PITTWIRE_URL, body=self.pittwire) + responses.add(responses.GET, news.FEATURES_ARTICLES_URL, body=self.features_articles) responses.add( responses.GET, f"https://www.pitt.edu/pittwire/news/features-articles?field_topics_target_id=432&field_article_date_value={year}" @@ -122,7 +203,7 @@ def test_get_articles_by_topic_year(self): body=self.university_news_features_articles_2020, ) - university_news_articles = news.get_articles_by_topic("university-news", year=year) + university_news_articles = news.get_articles_by_topic("University News", year=year) self.assertEqual(len(university_news_articles), 5) self.assertEqual( @@ -152,6 +233,12 @@ def test_get_articles_by_topic_year(self): @responses.activate def test_get_articles_by_topic_less_than_one_page(self): num_results = 5 + news.get_categories.cache_clear() + news.get_topics.cache_clear() + news._scrape_categories.cache_clear() + news._scrape_topics.cache_clear() + responses.add(responses.GET, news.PITTWIRE_URL, body=self.pittwire) + responses.add(responses.GET, news.FEATURES_ARTICLES_URL, body=self.features_articles) responses.add( responses.GET, "https://www.pitt.edu/pittwire/news/features-articles?field_topics_target_id=432&field_article_date_value=&title=" @@ -159,7 +246,7 @@ def test_get_articles_by_topic_less_than_one_page(self): body=self.university_news_features_articles_page_0, ) - university_news_articles = news.get_articles_by_topic("university-news", max_num_results=num_results) + university_news_articles = news.get_articles_by_topic("University News", max_num_results=num_results) self.assertEqual(len(university_news_articles), num_results) self.assertEqual( @@ -186,6 +273,12 @@ def test_get_articles_by_topic_less_than_one_page(self): @responses.activate def test_get_articles_by_topic_multiple_pages(self): num_results = news.NUM_ARTICLES_PER_PAGE + 5 + news.get_categories.cache_clear() + news.get_topics.cache_clear() + news._scrape_categories.cache_clear() + news._scrape_topics.cache_clear() + responses.add(responses.GET, news.PITTWIRE_URL, body=self.pittwire) + responses.add(responses.GET, news.FEATURES_ARTICLES_URL, body=self.features_articles) responses.add( responses.GET, "https://www.pitt.edu/pittwire/news/features-articles?field_topics_target_id=432&field_article_date_value=&title=" @@ -199,7 +292,7 @@ def test_get_articles_by_topic_multiple_pages(self): body=self.university_news_features_articles_page_1, ) - university_news_articles = news.get_articles_by_topic("university-news", max_num_results=num_results) + university_news_articles = news.get_articles_by_topic("University News", max_num_results=num_results) self.assertEqual(len(university_news_articles), num_results) self.assertEqual( @@ -227,3 +320,25 @@ def test_get_articles_by_topic_multiple_pages(self): ], ), ) + + @responses.activate + def test_get_articles_by_topic_invalid_category(self): + news.get_categories.cache_clear() + news.get_topics.cache_clear() + news._scrape_categories.cache_clear() + news._scrape_topics.cache_clear() + responses.add(responses.GET, news.PITTWIRE_URL, body=self.pittwire) + responses.add(responses.GET, news.FEATURES_ARTICLES_URL, body=self.features_articles) + + self.assertRaises(ValueError, news.get_articles_by_topic, "University News", "Invalid Category") + + @responses.activate + def test_get_articles_by_topic_invalid_topic(self): + news.get_categories.cache_clear() + news.get_topics.cache_clear() + news._scrape_categories.cache_clear() + news._scrape_topics.cache_clear() + responses.add(responses.GET, news.PITTWIRE_URL, body=self.pittwire) + responses.add(responses.GET, news.FEATURES_ARTICLES_URL, body=self.features_articles) + + self.assertRaises(ValueError, news.get_articles_by_topic, "Invalid Topic") diff --git a/tests/samples/news_features_articles.html b/tests/samples/news_features_articles.html new file mode 100644 index 0000000..7dce0c4 --- /dev/null +++ b/tests/samples/news_features_articles.html @@ -0,0 +1,1839 @@ + + + + + + + + + + + + + + + + Pittwire News | University of Pittsburgh + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ + + +
+
+
+
+
+ +
+
+ + +
+
+
+
+
+ + + + + + + + + + + +
+ +
+ +
+
+
+ + +
+
+ +
+
+
+

Filter By

+
+
+ +
+ + +
+ + + + + +
+
+ + +
+ +
+ +
+
+
+ +
+
+ + +
+
+ + Baseball player throwing a pitch in a crowded stadium. + + + + + +
+

This Pittsburgh Pirates pitcher is taking a swing at raising mental health awareness +

+

+ A family trauma almost toppled Pitt alum Isaac Mattson’s dream. A career roadblock may have saved it. +

+ +
    +
  • + Health and Wellness +
  • +
  • + Community Impact +
  • +
+ +
+ + +
+
+ + Pitt-Greensburg graduates pose for a portrait during the campus' first-ever winter commencement celebration. + + + + + +
+

Pitt-Greensburg held its first winter commencement ceremony +

+

+ President Robert Gregerson and other campus leaders recognized the 39 students who completed their degree requirements in the summer and fall semesters. +

+ +
    +
  • + University News +
  • +
  • + Community Impact +
  • +
  • + Pitt-Greensburg +
  • +
  • + Cultivate student success +
  • +
  • + Commencement +
  • +
+ +
+ + +
+
+ + Confetti falls over winter commencement graduates and attendees. + + + + + +
+

Pitt celebrated its newest Class of 2024 graduates at winter commencement +

+

+ See a gallery of the ceremony, which included speeches by Holden Thorp and Mihika Shah. +

+ +
    +
  • + University News +
  • +
  • + Cultivate student success +
  • +
  • + Commencement +
  • +
+ +
+ + +
+
+ + Charles Rinaldo poses for a portrait in the lab with a colleague. + + + + + +
+

40 years later, the Pitt Men’s Study is still breaking ground in the fight against AIDS +

+

+ We spoke with principal investigator Charles Rinaldo and longtime participant and volunteer Marc Wagner about how the study has impacted their lives over the decades. +

+ +
    +
  • + Community Impact +
  • +
  • + Innovation and Research +
  • +
  • + Propel scholarship, creativity and innovation +
  • +
  • + Promote accountability and trust +
  • +
  • + School of Medicine +
  • +
  • + School of Public Health +
  • +
+ +
+ + +
+
+ + MLS participants smile and engage during the program's December retreat. + + + + + +
+

Nearly half of new moms in STEM leave their full-time positions. This Pitt program wants to change that. +

+

+ Mothers Leading Science is helping health sciences faculty find a supportive community, strategies for work-life integration and renewed passion for their research. +

+ +
    +
  • + Innovation and Research +
  • +
  • + Diversity, Equity, and Inclusion +
  • +
  • + Propel scholarship, creativity and innovation +
  • +
  • + Be welcoming and engaged +
  • +
  • + School of Medicine +
  • +
+ +
+ + +
+
+ + Students in Jennifer Hirsch’s Social Psychology of Reality TV course deliberate. + + + + + +
+

This Pitt professor designed a ‘Survivor’-style game to teach social psychology lessons +

+

+ Jennifer Hirsch’s unique course lets students get in the heads of reality stars — and learn some lessons while they’re in there. +

+ +
    +
  • + Arts and Humanities +
  • +
  • + Teaching & Learning +
  • +
  • + Cultivate student success +
  • +
  • + Kenneth P. Dietrich School of Arts and Sciences +
  • +
+ +
+ + +
+
+ + The Cathedral of Learning + + + + + +
+

5 Pitt students received Gilman Scholarships +

+

+ The undergraduates will travel to South America, Taiwan and more through the program, which supports Federal Pell Grant recipients with up to $5,000 during their study abroad experience. +

+ +
    +
  • + University News +
  • +
  • + Global +
  • +
+ +
+ + +
+
+ + A person in blue doctoral regalia hugs someone in a grey shawl + + + + + +
+

A guest’s guide to commencement at the University of Pittsburgh +

+

+ Everything you need to know about parking, pictures, accessibility and more for your time on the Pittsburgh campus. +

+ +
    +
  • + University News +
  • +
  • + Pittsburgh Campus +
  • +
  • + Commencement +
  • +
+ +
+ + +
+
+ + Portrait of Charles “Chas” Bonasorte at The Pittsburgh Stop Inc. + + + + + +
+

Chas Bonasorte, Pitt football’s ‘Kamikaze Kid’ and owner of famed Pitt apparel kiosk, died at 70 +

+

+ After his career on the field, Bonasorte became a fixture on the Pittsburgh campus with his clothing kiosk at Forbes and Bigelow. +

+ +
    +
  • + Community Impact +
  • +
  • + Alumni +
  • +
  • + Be welcoming and engaged +
  • +
+ +
+ + +
+
+ + Students embrace on the field of the Acrisure Stadium during a Homecoming Football game. + + + + + +
+

2024 at Pitt, in photos +

+

+ Our photographers shared their 10 favorite images of innovative researchers, major developments and more from the year. +

+ +
    +
  • + Technology & Science +
  • +
  • + Community Impact +
  • +
  • + Innovation and Research +
  • +
  • + Promote accountability and trust +
  • +
+ +
+ + +
+
+ + Man with dark hair sits in a wooded area and smiles for camera. + + + + + +
+

Live from New York, it’s Ben Asciutto +

+

+ This Pitt alum’s childhood aspirations of working in the entertainment industry are coming true on the set of ‘Saturday Night Live.’ +

+ +
    +
  • + Arts and Humanities +
  • +
  • + Cultivate student success +
  • +
  • + Kenneth P. Dietrich School of Arts and Sciences +
  • +
+ +
+ + +
+
+ + Three researchers in a lab + + + + + +
+

Chronic pain treatments can be dangerous and ineffective. These Pitt researchers are working on a solution. +

+

+ Supported by NIH funding, the Vanish Therapeutics team is working to bring a bioabsorbable nerve stimulator to market. +

+ +
    +
  • + Health and Wellness +
  • +
  • + Innovation and Research +
  • +
+ +
+ + +
+
+ + A branded Pitt flag with the University shield is framed by fall leaves. + + + + + +
+

ICYMI: Pitt contributed $6.6 billion to Pennsylvania in FY23 +

+

+ The latest Economic Impact Report showed the University supported nearly 49,000 jobs and contributed $356.2 million in state and local taxes. +

+ +
    +
  • + University News +
  • +
  • + Community Impact +
  • +
  • + Propel scholarship, creativity and innovation +
  • +
  • + Promote accountability and trust +
  • +
+ +
+ + +
+
+ + Fang on a staircase in the Honors College + + + + + +
+

This Pitt senior and cancer researcher is one to keep watching +

+

+ Here’s what’s next for Richard Su Fang, a Goldwater scholar who has already received interview invitations from 17 MD/PhD programs. +

+ +
    +
  • + Health and Wellness +
  • +
  • + Community Impact +
  • +
  • + Innovation and Research +
  • +
  • + Cultivate student success +
  • +
  • + David C. Frederick Honors College +
  • +
+ +
+ + +
+
+ + An adult and child volunteer help prepare meals for Christmas Day at Pitt. + + + + + +
+

Volunteers will spread cheer, presents and thousands of meals for Christmas Day at Pitt +

+

+ 156 people will serve at the University’s 19th annual celebration on Dec. 25. +

+ +
    +
  • + Community Impact +
  • +
  • + Our City/Our Campus +
  • +
  • + Be welcoming and engaged +
  • +
+ +
+ + +
+
+ + The Nonprofit Capacity Building Program cohort poses for a group photo at Café Momentum. + + + + + +
+

Nonprofits are scaling up their regional impact with support from Pitt +

+

+ The Nonprofit Capacity Building Program connects local organizations working to improve economic stability with University training and resources. +

+ +
    +
  • + Health and Wellness +
  • +
  • + Community Impact +
  • +
  • + Innovation and Research +
  • +
  • + Diversity, Equity, and Inclusion +
  • +
  • + Our City/Our Campus +
  • +
  • + Propel scholarship, creativity and innovation +
  • +
+ +
+ + +
+
+ + People stand in front of a Pitt health sciences backdrop + + + + + +
+

A new Pitt center will use AI to accelerate women’s health research globally +

+

+ The Vijayalakshmi Innovation Center is funded by a gift from siblings and health care entrepreneurs Vishnu Vardhan and Harsha Vardhini, along with a significant investment from the School of Medicine. +

+ +
    +
  • + Health and Wellness +
  • +
  • + Innovation and Research +
  • +
  • + Global +
  • +
  • + Propel scholarship, creativity and innovation +
  • +
  • + It's Possible at Pitt +
  • +
  • + School of Medicine +
  • +
+ +
+ + +
+
+ + People in yellow vests tour a construction site + + + + + +
+

Pitt’s new building at Fifth and Halket will support health sciences, medicine and online learning +

+

+ The University’s Property and Facilities Committee and Board of Trustees approved interior fit out projects for the Department of Computational and Systems Biology and Pitt EDGE on Dec. 5. +

+ +
    +
  • + University News +
  • +
  • + Our City/Our Campus +
  • +
  • + School of Health and Rehabilitation Sciences +
  • +
  • + School of Medicine +
  • +
+ +
+ + +
+
+ + Alexander Deiters and Jason Lohmueller in the lab + + + + + +
+

How this Pitt duo’s startup plans to attack the ‘tricky beast’ that is cancer +

+

+ A platform developed by Jason Lohmueller and Alex Dieters could allow immunotherapies to be delivered to tumors with more flexibility and precision. +

+ +
    +
  • + Innovation and Research +
  • +
  • + Innovation Institute +
  • +
  • + It's Possible at Pitt +
  • +
  • + Kenneth P. Dietrich School of Arts and Sciences +
  • +
  • + School of Medicine +
  • +
+ +
+ + +
+
+ + Applied Physiology Lab researchers conduct a spaceflight hibernation study on a volunteer. + + + + + +
+

A NASA-funded Pitt team is exploring the benefits of sleeping in space +

+

+ Kate Flickinger’s research on lower metabolic rates could help astronauts safely undergo long-duration spaceflights one day. It could also help ICU patients here on Earth. +

+ +
    +
  • + Technology & Science +
  • +
  • + Innovation and Research +
  • +
  • + Propel scholarship, creativity and innovation +
  • +
  • + School of Medicine +
  • +
+ +
+ +
+
+ + + + +
+
+ +
+ +
+ +
+ +
+
+ + +
+
+
+ +
+ + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/samples/news_features_articles_no_topics.html b/tests/samples/news_features_articles_no_topics.html new file mode 100644 index 0000000..bb7410e --- /dev/null +++ b/tests/samples/news_features_articles_no_topics.html @@ -0,0 +1,1838 @@ + + + + + + + + + + + + + + + + Pittwire News | University of Pittsburgh + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + +
+ + + +
+
+
+
+
+ +
+
+ + +
+
+
+
+
+ + + + + + + + + + + +
+ +
+ +
+
+
+ + +
+
+ +
+
+
+

Filter By

+
+
+ +
+ +
+ + + + + +
+
+ + +
+ +
+ +
+
+
+ +
+
+ + +
+
+ + Baseball player throwing a pitch in a crowded stadium. + + + + + +
+

This Pittsburgh Pirates pitcher is taking a swing at raising mental health awareness +

+

+ A family trauma almost toppled Pitt alum Isaac Mattson’s dream. A career roadblock may have saved it. +

+ +
    +
  • + Health and Wellness +
  • +
  • + Community Impact +
  • +
+ +
+ + +
+
+ + Pitt-Greensburg graduates pose for a portrait during the campus' first-ever winter commencement celebration. + + + + + +
+

Pitt-Greensburg held its first winter commencement ceremony +

+

+ President Robert Gregerson and other campus leaders recognized the 39 students who completed their degree requirements in the summer and fall semesters. +

+ +
    +
  • + University News +
  • +
  • + Community Impact +
  • +
  • + Pitt-Greensburg +
  • +
  • + Cultivate student success +
  • +
  • + Commencement +
  • +
+ +
+ + +
+
+ + Confetti falls over winter commencement graduates and attendees. + + + + + +
+

Pitt celebrated its newest Class of 2024 graduates at winter commencement +

+

+ See a gallery of the ceremony, which included speeches by Holden Thorp and Mihika Shah. +

+ +
    +
  • + University News +
  • +
  • + Cultivate student success +
  • +
  • + Commencement +
  • +
+ +
+ + +
+
+ + Charles Rinaldo poses for a portrait in the lab with a colleague. + + + + + +
+

40 years later, the Pitt Men’s Study is still breaking ground in the fight against AIDS +

+

+ We spoke with principal investigator Charles Rinaldo and longtime participant and volunteer Marc Wagner about how the study has impacted their lives over the decades. +

+ +
    +
  • + Community Impact +
  • +
  • + Innovation and Research +
  • +
  • + Propel scholarship, creativity and innovation +
  • +
  • + Promote accountability and trust +
  • +
  • + School of Medicine +
  • +
  • + School of Public Health +
  • +
+ +
+ + +
+
+ + MLS participants smile and engage during the program's December retreat. + + + + + +
+

Nearly half of new moms in STEM leave their full-time positions. This Pitt program wants to change that. +

+

+ Mothers Leading Science is helping health sciences faculty find a supportive community, strategies for work-life integration and renewed passion for their research. +

+ +
    +
  • + Innovation and Research +
  • +
  • + Diversity, Equity, and Inclusion +
  • +
  • + Propel scholarship, creativity and innovation +
  • +
  • + Be welcoming and engaged +
  • +
  • + School of Medicine +
  • +
+ +
+ + +
+
+ + Students in Jennifer Hirsch’s Social Psychology of Reality TV course deliberate. + + + + + +
+

This Pitt professor designed a ‘Survivor’-style game to teach social psychology lessons +

+

+ Jennifer Hirsch’s unique course lets students get in the heads of reality stars — and learn some lessons while they’re in there. +

+ +
    +
  • + Arts and Humanities +
  • +
  • + Teaching & Learning +
  • +
  • + Cultivate student success +
  • +
  • + Kenneth P. Dietrich School of Arts and Sciences +
  • +
+ +
+ + +
+
+ + The Cathedral of Learning + + + + + +
+

5 Pitt students received Gilman Scholarships +

+

+ The undergraduates will travel to South America, Taiwan and more through the program, which supports Federal Pell Grant recipients with up to $5,000 during their study abroad experience. +

+ +
    +
  • + University News +
  • +
  • + Global +
  • +
+ +
+ + +
+
+ + A person in blue doctoral regalia hugs someone in a grey shawl + + + + + +
+

A guest’s guide to commencement at the University of Pittsburgh +

+

+ Everything you need to know about parking, pictures, accessibility and more for your time on the Pittsburgh campus. +

+ +
    +
  • + University News +
  • +
  • + Pittsburgh Campus +
  • +
  • + Commencement +
  • +
+ +
+ + +
+
+ + Portrait of Charles “Chas” Bonasorte at The Pittsburgh Stop Inc. + + + + + +
+

Chas Bonasorte, Pitt football’s ‘Kamikaze Kid’ and owner of famed Pitt apparel kiosk, died at 70 +

+

+ After his career on the field, Bonasorte became a fixture on the Pittsburgh campus with his clothing kiosk at Forbes and Bigelow. +

+ +
    +
  • + Community Impact +
  • +
  • + Alumni +
  • +
  • + Be welcoming and engaged +
  • +
+ +
+ + +
+
+ + Students embrace on the field of the Acrisure Stadium during a Homecoming Football game. + + + + + +
+

2024 at Pitt, in photos +

+

+ Our photographers shared their 10 favorite images of innovative researchers, major developments and more from the year. +

+ +
    +
  • + Technology & Science +
  • +
  • + Community Impact +
  • +
  • + Innovation and Research +
  • +
  • + Promote accountability and trust +
  • +
+ +
+ + +
+
+ + Man with dark hair sits in a wooded area and smiles for camera. + + + + + +
+

Live from New York, it’s Ben Asciutto +

+

+ This Pitt alum’s childhood aspirations of working in the entertainment industry are coming true on the set of ‘Saturday Night Live.’ +

+ +
    +
  • + Arts and Humanities +
  • +
  • + Cultivate student success +
  • +
  • + Kenneth P. Dietrich School of Arts and Sciences +
  • +
+ +
+ + +
+
+ + Three researchers in a lab + + + + + +
+

Chronic pain treatments can be dangerous and ineffective. These Pitt researchers are working on a solution. +

+

+ Supported by NIH funding, the Vanish Therapeutics team is working to bring a bioabsorbable nerve stimulator to market. +

+ +
    +
  • + Health and Wellness +
  • +
  • + Innovation and Research +
  • +
+ +
+ + +
+
+ + A branded Pitt flag with the University shield is framed by fall leaves. + + + + + +
+

ICYMI: Pitt contributed $6.6 billion to Pennsylvania in FY23 +

+

+ The latest Economic Impact Report showed the University supported nearly 49,000 jobs and contributed $356.2 million in state and local taxes. +

+ +
    +
  • + University News +
  • +
  • + Community Impact +
  • +
  • + Propel scholarship, creativity and innovation +
  • +
  • + Promote accountability and trust +
  • +
+ +
+ + +
+
+ + Fang on a staircase in the Honors College + + + + + +
+

This Pitt senior and cancer researcher is one to keep watching +

+

+ Here’s what’s next for Richard Su Fang, a Goldwater scholar who has already received interview invitations from 17 MD/PhD programs. +

+ +
    +
  • + Health and Wellness +
  • +
  • + Community Impact +
  • +
  • + Innovation and Research +
  • +
  • + Cultivate student success +
  • +
  • + David C. Frederick Honors College +
  • +
+ +
+ + +
+
+ + An adult and child volunteer help prepare meals for Christmas Day at Pitt. + + + + + +
+

Volunteers will spread cheer, presents and thousands of meals for Christmas Day at Pitt +

+

+ 156 people will serve at the University’s 19th annual celebration on Dec. 25. +

+ +
    +
  • + Community Impact +
  • +
  • + Our City/Our Campus +
  • +
  • + Be welcoming and engaged +
  • +
+ +
+ + +
+
+ + The Nonprofit Capacity Building Program cohort poses for a group photo at Café Momentum. + + + + + +
+

Nonprofits are scaling up their regional impact with support from Pitt +

+

+ The Nonprofit Capacity Building Program connects local organizations working to improve economic stability with University training and resources. +

+ +
    +
  • + Health and Wellness +
  • +
  • + Community Impact +
  • +
  • + Innovation and Research +
  • +
  • + Diversity, Equity, and Inclusion +
  • +
  • + Our City/Our Campus +
  • +
  • + Propel scholarship, creativity and innovation +
  • +
+ +
+ + +
+
+ + People stand in front of a Pitt health sciences backdrop + + + + + +
+

A new Pitt center will use AI to accelerate women’s health research globally +

+

+ The Vijayalakshmi Innovation Center is funded by a gift from siblings and health care entrepreneurs Vishnu Vardhan and Harsha Vardhini, along with a significant investment from the School of Medicine. +

+ +
    +
  • + Health and Wellness +
  • +
  • + Innovation and Research +
  • +
  • + Global +
  • +
  • + Propel scholarship, creativity and innovation +
  • +
  • + It's Possible at Pitt +
  • +
  • + School of Medicine +
  • +
+ +
+ + +
+
+ + People in yellow vests tour a construction site + + + + + +
+

Pitt’s new building at Fifth and Halket will support health sciences, medicine and online learning +

+

+ The University’s Property and Facilities Committee and Board of Trustees approved interior fit out projects for the Department of Computational and Systems Biology and Pitt EDGE on Dec. 5. +

+ +
    +
  • + University News +
  • +
  • + Our City/Our Campus +
  • +
  • + School of Health and Rehabilitation Sciences +
  • +
  • + School of Medicine +
  • +
+ +
+ + +
+
+ + Alexander Deiters and Jason Lohmueller in the lab + + + + + +
+

How this Pitt duo’s startup plans to attack the ‘tricky beast’ that is cancer +

+

+ A platform developed by Jason Lohmueller and Alex Dieters could allow immunotherapies to be delivered to tumors with more flexibility and precision. +

+ +
    +
  • + Innovation and Research +
  • +
  • + Innovation Institute +
  • +
  • + It's Possible at Pitt +
  • +
  • + Kenneth P. Dietrich School of Arts and Sciences +
  • +
  • + School of Medicine +
  • +
+ +
+ + +
+
+ + Applied Physiology Lab researchers conduct a spaceflight hibernation study on a volunteer. + + + + + +
+

A NASA-funded Pitt team is exploring the benefits of sleeping in space +

+

+ Kate Flickinger’s research on lower metabolic rates could help astronauts safely undergo long-duration spaceflights one day. It could also help ICU patients here on Earth. +

+ +
    +
  • + Technology & Science +
  • +
  • + Innovation and Research +
  • +
  • + Propel scholarship, creativity and innovation +
  • +
  • + School of Medicine +
  • +
+ +
+ +
+
+ + + + +
+
+ +
+ +
+ +
+ +
+
+ + +
+
+
+ +
+ + + + + + + +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/samples/news_pittwire.html b/tests/samples/news_pittwire.html new file mode 100644 index 0000000..74dc749 --- /dev/null +++ b/tests/samples/news_pittwire.html @@ -0,0 +1,1942 @@ + + + + + + + + + + + + + + + + Pittwire | University of Pittsburgh + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+ + + +
+
+
+
+
+ +
+
+ + +
+
+
+
+
+ + + + + + + + + + + +
+ + +
+ + + + +
+
+ + + + + + +
+ + +
+ +
+
+ +
Explore Sections
+ + + + + + + + + + + + + +
+
+ +
+ + + + +
+
Features & Articles +
+ + + +
+ + + +
+
+
+

This Pittsburgh Pirates pitcher is taking a swing at raising mental health awareness +

+

+ A family trauma almost toppled Pitt alum Isaac Mattson’s dream. A career roadblock may have saved it. +

+ + +
+
+
+
+

Pitt celebrated its newest Class of 2024 graduates at winter commencement +

+

+ See a gallery of the ceremony, which included speeches by Holden Thorp and Mihika Shah. +

+ + +
+
+
+
+

40 years later, the Pitt Men’s Study is still breaking ground in the fight against AIDS +

+

+ We spoke with principal investigator Charles Rinaldo and longtime participant and volunteer Marc Wagner about how the study has impacted their lives over the decades. +

+ + +
+
+
+
+

Pitt-Greensburg held its first winter commencement ceremony +

+

+ President Robert Gregerson and other campus leaders recognized the 39 students who completed their degree requirements in the summer and fall semesters. +

+ + +
+
+ +
+ +
+
+ + +
+ + View All Articles + +
+
+ + + +
+
Announcements and Updates +
+ + + + + + +
+ + View All Articles + +
+
+ + + +
+
Accolades & Honors +
+ + + + + + +
+ + View All Articles + +
+
+ + + +
+ + +
+ +
+
+ + + +
+ +
+ + +
+ +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/tests/samples/news_pittwire_no_categories.html b/tests/samples/news_pittwire_no_categories.html new file mode 100644 index 0000000..aed5fa4 --- /dev/null +++ b/tests/samples/news_pittwire_no_categories.html @@ -0,0 +1,1935 @@ + + + + + + + + + + + + + + + + Pittwire | University of Pittsburgh + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+ + + +
+
+
+
+
+ +
+
+ + +
+
+
+
+
+ + + + + + + + + + + +
+ + +
+ + + + +
+
+ + + + + + +
+ + +
+ +
+
+ +
Explore Sections
+ + + + + + + + + + + + + +
+
+ +
+ + + + +
+
Features & Articles +
+ + + +
+ + + +
+
+
+

This Pittsburgh Pirates pitcher is taking a swing at raising mental health awareness +

+

+ A family trauma almost toppled Pitt alum Isaac Mattson’s dream. A career roadblock may have saved it. +

+ + +
+
+
+
+

Pitt celebrated its newest Class of 2024 graduates at winter commencement +

+

+ See a gallery of the ceremony, which included speeches by Holden Thorp and Mihika Shah. +

+ + +
+
+
+
+

40 years later, the Pitt Men’s Study is still breaking ground in the fight against AIDS +

+

+ We spoke with principal investigator Charles Rinaldo and longtime participant and volunteer Marc Wagner about how the study has impacted their lives over the decades. +

+ + +
+
+
+
+

Pitt-Greensburg held its first winter commencement ceremony +

+

+ President Robert Gregerson and other campus leaders recognized the 39 students who completed their degree requirements in the summer and fall semesters. +

+ + +
+
+ +
+ +
+
+ + +
+ + View All Articles + +
+
+ + + +
+
Announcements and Updates +
+ + + + + + +
+ + View All Articles + +
+
+ + + +
+
Accolades & Honors +
+ + + + + + +
+ + View All Articles + +
+
+ + + +
+ + +
+ +
+
+ + + +
+ +
+ + +
+ +
+ +
+ + + + + + + + + +
+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +