Skip to content

Commit 0489b20

Browse files
Replace broken links within documentation
1 parent 3ceab9b commit 0489b20

File tree

2 files changed

+243
-130
lines changed

2 files changed

+243
-130
lines changed

meta/scrape_tags.py

Lines changed: 114 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,68 @@
3232
"""
3333

3434

35+
MDN_BASE = "https://developer.mozilla.org/en-US/docs/Web/"
36+
"""
37+
Base page of MDN
38+
"""
39+
40+
41+
def htmlElementReplace(lookup: str, presentation: Optional[str] = None) -> str:
42+
"""
43+
Replace text in an HTML reference
44+
"""
45+
if presentation is None:
46+
presentation = f"`<{lookup.lower()}>`"
47+
48+
url = f"https://developer.mozilla.org/en-US/docs/Web/HTML/Element/{lookup}"
49+
50+
return f"[{presentation}]({url})"
51+
52+
53+
def glossaryReplace(lookup: str, presentation: Optional[str] = None) -> str:
54+
"""
55+
Replace text in a glossary reference
56+
"""
57+
if presentation is None:
58+
presentation = lookup
59+
60+
url = f"https://developer.mozilla.org/en-US/docs/Glossary/{lookup}"
61+
62+
return f"[{presentation}]({url})"
63+
64+
65+
def cssXrefReplace(lookup: str, presentation: Optional[str] = None) -> str:
66+
"""
67+
Replace text for CSS cross reference lookup
68+
"""
69+
if presentation is None:
70+
presentation = lookup
71+
72+
url = f"https://developer.mozilla.org/en-US/docs/Web/CSS/{lookup}"
73+
74+
return f"[{presentation}]({url})"
75+
76+
77+
def domXrefReplace(lookup: str, presentation: Optional[str] = None) -> str:
78+
"""
79+
Replace text for DOM cross reference lookup
80+
"""
81+
if presentation is None:
82+
presentation = lookup
83+
84+
url = f"https://developer.mozilla.org/en-US/docs/Web/API/{lookup}"
85+
86+
return f"[{presentation}]({url})"
87+
88+
89+
DESCRIPTION_LOOKUPS = {
90+
"htmlelement": htmlElementReplace,
91+
"glossary": glossaryReplace,
92+
"cssxref": cssXrefReplace,
93+
"domxref": domXrefReplace,
94+
}
95+
96+
3597
TagMdnInfo = tuple[str, str]
3698
"""Type definition for info grabbed from MDN docs"""
3799

@@ -126,6 +188,56 @@ def handle_header_elements(description) -> list[TagMdnInfo]:
126188
return tags
127189

128190

191+
def format_description(description: str, ele: str) -> str:
192+
"""
193+
Replace parts of the description to fix broken links
194+
195+
Also includes element name for debugging purposes
196+
"""
197+
# Manual links
198+
description = description.replace(
199+
"](/en-US/docs/Web",
200+
f"]({MDN_BASE}",
201+
)
202+
203+
# Other elements - keep looping until none left
204+
while (start := description.find("{{")) != -1:
205+
end = description.find("}}", start)
206+
207+
element_text = description[start+2:end]
208+
# In format key("arg1", "arg2")
209+
210+
key, args = element_text.split("(")
211+
212+
# Split up args
213+
if "," in args:
214+
# Two args
215+
lookup, presentation = args.split(",")
216+
presentation.strip()
217+
lookup = lookup.replace('"', "")
218+
presentation = presentation.replace('"', "")
219+
220+
else:
221+
# One arg (presentation determined automatically
222+
lookup = args.replace('"', "")
223+
presentation = None
224+
225+
# Check for missing keys
226+
if key.lower() not in DESCRIPTION_LOOKUPS:
227+
print(f"lookup '{key}' not in table", file=sys.stderr)
228+
print(f"Element: {ele}", file=sys.stderr)
229+
exit(1)
230+
231+
# Replace elements
232+
description = (
233+
description[:start]
234+
+ DESCRIPTION_LOOKUPS[key.lower()](lookup, presentation)
235+
+ description[end+2:]
236+
)
237+
238+
return description
239+
240+
129241
def parse_markdown_table(lines: Iterator[str]) -> list[TagMdnInfo]:
130242
"""
131243
Parse table from markdown
@@ -141,7 +253,6 @@ def parse_markdown_table(lines: Iterator[str]) -> list[TagMdnInfo]:
141253
_, tag_base, description, _ = line.split('|')
142254

143255
tag_base = tag_base.strip()
144-
description = description.strip()
145256

146257
# Header elements are yucky - we should handle them separately
147258
if tag_base.startswith("[`<h1>`]"):
@@ -152,6 +263,8 @@ def parse_markdown_table(lines: Iterator[str]) -> list[TagMdnInfo]:
152263
.removeprefix('{{HTMLElement("')\
153264
.removesuffix('")}}')
154265

266+
description = format_description(description.strip(), tag_name)
267+
155268
# Element name should be of the format `{{HTMLElement("<name>")}}``
156269
# Grab the actual name
157270
if not tag_base.startswith('{{HTMLElement("'):

0 commit comments

Comments
 (0)