Skip to content

Commit d2017c3

Browse files
authored
Add Headlines of Readme to Search Entries (#142)
1 parent 3d33809 commit d2017c3

File tree

2 files changed

+74
-9
lines changed

2 files changed

+74
-9
lines changed

components/entrytypes.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from abc import ABC, abstractmethod
33
from dataclasses import dataclass
44
from typing import ClassVar, List, Optional
5+
from urllib.parse import urljoin
56

67
from telegram import InlineKeyboardMarkup
78
from thefuzz import fuzz
@@ -65,6 +66,49 @@ def inline_keyboard(self) -> Optional[InlineKeyboardMarkup]:
6566
return None
6667

6768

69+
class ReadmeSection(BaseEntry):
70+
"""A section of the readme.
71+
72+
Args:
73+
name: The name of the section
74+
anchor: the URL anchor of the section
75+
"""
76+
77+
def __init__(self, name: str, anchor: str):
78+
self.name = name
79+
self.anchor = anchor
80+
81+
@property
82+
def url(self) -> str:
83+
return urljoin(DOCS_URL, self.anchor)
84+
85+
@property
86+
def display_name(self) -> str:
87+
return f"Readme {ARROW_CHARACTER} {self.name}"
88+
89+
@property
90+
def short_name(self) -> str:
91+
return self.name
92+
93+
@property
94+
def description(self) -> str:
95+
return "Readme of python-telegram-bot"
96+
97+
def html_markup(self, search_query: str = None) -> str:
98+
return (
99+
f"Readme of <i>python-telegram-bot</i>\n" f"{self.html_insertion_markup(search_query)}"
100+
)
101+
102+
def html_insertion_markup(self, search_query: str = None) -> str:
103+
return f'<a href="{self.url}">{self.short_name}</a>'
104+
105+
def html_reply_markup(self, search_query: str = None) -> str:
106+
return f'<a href="{self.url}">Readme Section: {self.short_name}</a>'
107+
108+
def compare_to_query(self, search_query: str) -> float:
109+
return fuzz.token_set_ratio(f"readme {self.name}", search_query)
110+
111+
68112
class Example(BaseEntry):
69113
"""An example in the examples directory.
70114

components/search.py

Lines changed: 30 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
FAQEntry,
3434
FRDPEntry,
3535
ParamDocEntry,
36+
ReadmeSection,
3637
WikiPage,
3738
)
3839
from .github import GitHub
@@ -43,13 +44,14 @@ class Search:
4344
def __init__(self, github_auth: str, github_user_agent: str = USER_AGENT) -> None:
4445
self.__lock = asyncio.Lock()
4546
self._docs: List[DocEntry] = []
47+
self._readme: List[ReadmeSection] = []
4648
self._official: Dict[str, str] = {}
4749
self._wiki: List[WikiPage] = []
4850
self._snippets: List[CodeSnippet] = []
4951
self._faq: List[FAQEntry] = []
5052
self._design_patterns: List[FRDPEntry] = []
5153
self.github = GitHub(auth=github_auth, user_agent=github_user_agent)
52-
self._httpx_client = httpx.AsyncClient()
54+
self._httpx_client = httpx.AsyncClient(headers=DEFAULT_HEADERS)
5355

5456
async def initialize(
5557
self, application: Application[Any, Any, Any, Any, Any, JobQueue]
@@ -76,6 +78,7 @@ async def update_job(self, context: ContextTypes.DEFAULT_TYPE) -> None:
7678
)
7779
async with self.__lock:
7880
await asyncio.gather(
81+
context.application.create_task(self.update_readme()),
7982
context.application.create_task(self.update_docs()),
8083
context.application.create_task(self.update_wiki()),
8184
context.application.create_task(self.update_wiki_code_snippets()),
@@ -108,7 +111,7 @@ async def update_job(self, context: ContextTypes.DEFAULT_TYPE) -> None:
108111
self.multi_search_combinations.cache_clear() # pylint:disable=no-member
109112

110113
async def _update_official_docs(self) -> None:
111-
response = await self._httpx_client.get(url=OFFICIAL_URL, headers=DEFAULT_HEADERS)
114+
response = await self._httpx_client.get(url=OFFICIAL_URL)
112115
official_soup = BeautifulSoup(response.content, "html.parser")
113116
for anchor in official_soup.select("a.anchor"):
114117
if "-" not in anchor["href"]:
@@ -173,8 +176,26 @@ async def update_docs(self) -> None:
173176
)
174177
)
175178

179+
async def update_readme(self) -> None:
180+
response = await self._httpx_client.get(url=DOCS_URL, follow_redirects=True)
181+
readme_soup = BeautifulSoup(response.content, "html.parser")
182+
self._readme = []
183+
184+
# parse section headers from readme
185+
for tag in ["h1", "h2", "h3", "h4", "h5"]:
186+
for headline in readme_soup.select(tag):
187+
# check if element is inside a hidden div - special casing for the
188+
# "Hidden Headline" we include for furo
189+
if headline.find_parent("div", attrs={"style": "display: none"}):
190+
continue
191+
self._readme.append(
192+
ReadmeSection(
193+
name=str(headline.contents[0]).strip(), anchor=headline.find("a")["href"]
194+
)
195+
)
196+
176197
async def update_wiki(self) -> None:
177-
response = await self._httpx_client.get(url=WIKI_URL, headers=DEFAULT_HEADERS)
198+
response = await self._httpx_client.get(url=WIKI_URL)
178199
wiki_soup = BeautifulSoup(response.content, "html.parser")
179200
self._wiki = []
180201

@@ -195,9 +216,7 @@ async def update_wiki(self) -> None:
195216
self._wiki.append(WikiPage(category="Code Resources", name="Examples", url=EXAMPLES_URL))
196217

197218
async def update_wiki_code_snippets(self) -> None:
198-
response = await self._httpx_client.get(
199-
url=WIKI_CODE_SNIPPETS_URL, headers=DEFAULT_HEADERS
200-
)
219+
response = await self._httpx_client.get(url=WIKI_CODE_SNIPPETS_URL)
201220
code_snippet_soup = BeautifulSoup(response.content, "html.parser")
202221
self._snippets = []
203222
for headline in code_snippet_soup.select(
@@ -211,7 +230,7 @@ async def update_wiki_code_snippets(self) -> None:
211230
)
212231

213232
async def update_wiki_faq(self) -> None:
214-
response = await self._httpx_client.get(url=WIKI_FAQ_URL, headers=DEFAULT_HEADERS)
233+
response = await self._httpx_client.get(url=WIKI_FAQ_URL)
215234
faq_soup = BeautifulSoup(response.content, "html.parser")
216235
self._faq = []
217236
for headline in faq_soup.select("div#wiki-body h3"):
@@ -223,7 +242,7 @@ async def update_wiki_faq(self) -> None:
223242
)
224243

225244
async def update_wiki_design_patterns(self) -> None:
226-
response = await self._httpx_client.get(url=WIKI_FRDP_URL, headers=DEFAULT_HEADERS)
245+
response = await self._httpx_client.get(url=WIKI_FRDP_URL)
227246
frdp_soup = BeautifulSoup(response.content, "html.parser")
228247
self._design_patterns = []
229248
for headline in frdp_soup.select("div#wiki-body h3,div#wiki-body h2"):
@@ -244,9 +263,10 @@ async def search(
244263
) -> Optional[List[BaseEntry]]:
245264
"""Searches all available entries for appropriate results. This includes:
246265
266+
* readme sections
247267
* wiki pages
248268
* FAQ entries
249-
* Design Pattern entries entries
269+
* Design Pattern entries
250270
* Code snippets
251271
* examples
252272
* documentation
@@ -312,6 +332,7 @@ async def search(
312332
async with self.__lock:
313333
if not search_entries:
314334
search_entries = itertools.chain(
335+
self._readme,
315336
self._wiki,
316337
self.github.all_examples,
317338
self._faq,

0 commit comments

Comments
 (0)