Skip to content

Commit 8cda1d2

Browse files
Addendum
1 parent 40e1450 commit 8cda1d2

File tree

1 file changed

+12
-5
lines changed

1 file changed

+12
-5
lines changed

migrate/oldwiki/scrape.py

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -267,13 +267,20 @@ def parse_notes(content_div):
267267
"text": text
268268
})
269269

270-
# Additional hack: find section 'Remarks' and extract content into an info note
270+
# Additional hack: find section 'Remarks' and extract content into info notes
271271
remarks_header = content_div.find("span", id="Remarks")
272272
if remarks_header:
273-
remarks_paragraph = remarks_header.find_next("p")
274-
if remarks_paragraph:
275-
remarks_text = remarks_paragraph.get_text(" ", strip=True)
276-
if remarks_text:
273+
# get all p next to it before the following header
274+
current = remarks_header.find_parent(["h2", "h3"]).find_next_sibling()
275+
remarks_texts = []
276+
while current:
277+
if current.name in ["h2", "h3"]:
278+
break
279+
if current.name == "p":
280+
remarks_texts.append(current.get_text(" ", strip=True))
281+
current = current.find_next_sibling()
282+
if remarks_texts:
283+
for remarks_text in remarks_texts:
277284
note_boxes.append({
278285
"type": "note",
279286
"text": remarks_text

0 commit comments

Comments
 (0)