Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,21 @@ This is a paragraph with **bold** and *italic* text.
"""
post.from_markdown(markdown_content, api=api)

# Markdown footnotes are supported too. References become inline anchors and
# definitions become footnote blocks, numbered by order of first appearance.
# Labels can be numbers or names (e.g. [^1] or [^source]).
footnote_markdown = """
A claim that needs support.[^1] Another, with a named label.[^source]

[^1]: The supporting detail, with a [link](https://example.com).
[^source]: Author, *Title* (2025).
"""
post.from_markdown(footnote_markdown, api=api)

# Or build footnotes manually:
post.paragraph(content=[{"content": "Some claim."}]).footnote_anchor(1)
post.footnote(1, "The note text, with **formatting** allowed.")

draft = api.post_draft(post.get_draft())

# set section (can only be done after first posting the draft)
Expand Down
202 changes: 202 additions & 0 deletions substack/post.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@

from substack.exceptions import SectionNotExistsException

# Markdown footnotes: ``text.[^label]`` references and ``[^label]: definition`` lines.
FOOTNOTE_REFERENCE_PATTERN = re.compile(r"\[\^([^\]]+)\]")
FOOTNOTE_DEFINITION_PATTERN = re.compile(r"^\[\^([^\]]+)\]:\s?(.*)$")


def tokens_to_text_nodes(tokens: List[Dict]) -> List[Dict]:
"""Convert parse_inline() tokens to ProseMirror text nodes.
Expand Down Expand Up @@ -543,6 +547,186 @@ def code_block(self, content, attrs=None):

return self

def footnote_anchor(self, number: int):
"""

Add an inline footnote reference (the superscript marker) to the last block.

Args:
number: The footnote number this anchor points to.

Returns:
Self for method chaining.

"""
content = self.draft_body["content"][-1].get("content", [])
content += [{"type": "footnoteAnchor", "attrs": {"number": number}}]
self.draft_body["content"][-1]["content"] = content
return self

def footnote(self, number: int, content=None):
"""

Append a footnote block (the note shown at the foot of the post).

Args:
number: The footnote number, matching a footnote_anchor.
content: Text string or list of inline token dicts. A plain string is
parsed for inline Markdown and may contain blank-line-separated
paragraphs; a parse_inline() token list or a list of ready text
nodes is also accepted (single paragraph).

Returns:
Self for method chaining.

"""
paragraphs: List[Dict] = []
if isinstance(content, str):
# Blank lines separate paragraphs within the footnote.
for chunk in re.split(r"\n\s*\n", content):
chunk = chunk.strip()
if chunk:
paragraphs.append(
{"type": "paragraph", "content": tokens_to_text_nodes(parse_inline(chunk))}
)
elif isinstance(content, list):
# Accept either parse_inline tokens ({"content": ...}) or text nodes.
if content and content[0].get("type") == "text":
text_nodes = content
else:
text_nodes = tokens_to_text_nodes(content)
paragraphs.append({"type": "paragraph", "content": text_nodes})

if not paragraphs:
paragraphs = [{"type": "paragraph", "content": []}]

node: Dict = {
"type": "footnote",
"attrs": {"number": number},
"content": paragraphs,
}
self.draft_body["content"] = self.draft_body.get("content", []) + [node]
return self

@staticmethod
def _extract_footnote_definitions(markdown_content: str):
"""

Pull ``[^label]: definition`` lines out of the Markdown.

Definitions may wrap onto indented continuation lines and may contain
multiple paragraphs (blank line followed by an indented block). Returns
the body with definitions removed plus a {label: definition_text} mapping,
where paragraphs are separated by a blank line.

"""
lines = markdown_content.split("\n")
body_lines: List[str] = []
definitions: Dict[str, str] = {}
in_code_fence = False
i = 0
while i < len(lines):
# Track fenced code blocks so footnote-like lines inside them are
# left untouched.
if lines[i].lstrip().startswith("```"):
in_code_fence = not in_code_fence
body_lines.append(lines[i])
i += 1
continue
match = None if in_code_fence else FOOTNOTE_DEFINITION_PATTERN.match(lines[i])
if match:
label, first = match.group(1), match.group(2)
paragraphs: List[str] = []
current = [first.strip()] if first.strip() else []
i += 1
while i < len(lines):
line = lines[i]
if line.strip() == "":
# A blank line stays in the footnote only if the next
# non-empty line is indented (a further paragraph).
nxt = i + 1
if (
nxt < len(lines)
and lines[nxt].strip()
and lines[nxt][:1] in (" ", "\t")
):
if current:
paragraphs.append(" ".join(current))
current = []
i += 1
continue
break
if line[:1] in (" ", "\t"):
current.append(line.strip())
i += 1
else:
break
if current:
paragraphs.append(" ".join(current))
definitions[label] = "\n\n".join(paragraphs)
else:
body_lines.append(lines[i])
i += 1
return "\n".join(body_lines), definitions

@staticmethod
def _number_footnotes(markdown_content: str, definitions: Dict[str, str]):
"""Number footnotes by order of first inline reference in the body."""
order: List[str] = []
for match in FOOTNOTE_REFERENCE_PATTERN.finditer(markdown_content):
label = match.group(1)
if label in definitions and label not in order:
order.append(label)
# Defined-but-unreferenced footnotes go last, in definition order.
for label in definitions:
if label not in order:
order.append(label)
return {label: index + 1 for index, label in enumerate(order)}

def _inject_footnote_anchors(self, node: Dict, numbers_by_label: Dict[str, int]):
"""Recursively replace ``[^label]`` in text nodes with footnoteAnchor nodes."""
# Never rewrite the contents of a code block.
if node.get("type") == "codeBlock":
return
content = node.get("content")
if not isinstance(content, list):
return
new_content: List[Dict] = []
for child in content:
text = child.get("text", "")
has_code_mark = any(
mark.get("type") == "code" for mark in (child.get("marks") or [])
)
if (
child.get("type") == "text"
and not has_code_mark
and FOOTNOTE_REFERENCE_PATTERN.search(text)
):
marks = child.get("marks")
last = 0
for match in FOOTNOTE_REFERENCE_PATTERN.finditer(text):
label = match.group(1)
if label not in numbers_by_label:
continue # Unknown label: leave the literal text in place.
if match.start() > last:
segment = {"type": "text", "text": text[last:match.start()]}
if marks:
segment["marks"] = marks
new_content.append(segment)
new_content.append(
{"type": "footnoteAnchor", "attrs": {"number": numbers_by_label[label]}}
)
last = match.end()
if last < len(text):
segment = {"type": "text", "text": text[last:]}
if marks:
segment["marks"] = marks
new_content.append(segment)
else:
self._inject_footnote_anchors(child, numbers_by_label)
new_content.append(child)
node["content"] = new_content

def from_markdown(self, markdown_content: str, api=None):
"""
Parse Markdown content and add it to the post.
Expand All @@ -559,6 +743,10 @@ def from_markdown(self, markdown_content: str, api=None):
- Ordered lists: Lines starting with '1.', '2.', etc.
- Horizontal rules: Lines with ---, ***, or ___
- Inline formatting: **bold**, *italic*, ***bold+italic***, `code`, ~~strikethrough~~
- Footnotes: ``text.[^label]`` references plus ``[^label]: definition``
lines. References become inline anchors and definitions become
footnote blocks, numbered by order of first appearance. Labels may be
numbers or names (e.g. ``[^1]`` or ``[^agi-book]``).

Args:
markdown_content: Markdown string to parse and add to the post.
Expand All @@ -572,6 +760,13 @@ def from_markdown(self, markdown_content: str, api=None):
>>> post = Post("Title", "Subtitle", user_id)
>>> post.from_markdown("# Heading\\n\\nThis is **bold** text with [a link](https://example.com).")
"""
# Footnotes: extract ``[^label]: ...`` definitions and number them by
# order of first reference before parsing the rest of the body.
markdown_content, footnote_definitions = self._extract_footnote_definitions(
markdown_content
)
footnote_numbers = self._number_footnotes(markdown_content, footnote_definitions)

lines = markdown_content.split("\n")
blocks = []
current_block: List[str] = []
Expand Down Expand Up @@ -844,4 +1039,11 @@ def flush_ordered():
tokens = parse_inline(text_content)
self.add({"type": "paragraph", "content": tokens})

# Footnotes: turn ``[^label]`` references into inline anchors, then append
# the footnote blocks in numbered order.
if footnote_numbers:
self._inject_footnote_anchors(self.draft_body, footnote_numbers)
for label, number in sorted(footnote_numbers.items(), key=lambda item: item[1]):
self.footnote(number, footnote_definitions[label])

return self
Loading