ma2za
diff --git a/‎README.md‎
Lines changed: 11 additions & 0 deletions b/‎README.md‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎substack/mdrender.py‎
Lines changed: 183 additions & 0 deletions b/‎substack/mdrender.py‎
Lines changed: 183 additions & 0 deletions
diff --git a/‎substack/nodes.py‎
Lines changed: 121 additions & 0 deletions b/‎substack/nodes.py‎
Lines changed: 121 additions & 0 deletions
@@ -156,6 +156,17 @@ This is a paragraph with **bold** and *italic* text.
 """
 post.from_markdown(markdown_content, api=api)
 
+# Markdown footnotes are supported. References become inline anchors and
+# definitions (which may span multiple paragraphs) become footnote blocks,
+# numbered by order of first reference. Labels can be numbers or names.
+footnote_markdown = """
+A claim that needs support.[^1] And another.[^source]
+
+[^1]: The supporting detail, with a [link](https://example.com).
+[^source]: Author, *Title* (2025).
+"""
+post.from_markdown(footnote_markdown, api=api)
+
 draft = api.post_draft(post.get_draft())
 
 # set section (can only be done after first posting the draft)
 
@@ -21,6 +21,8 @@ python = "<4.0,>=3.10"
 requests = "^2.32.0"
 python-dotenv = "^1.2.1"
 PyYAML = "^6.0"
+markdown-it-py = "^3.0"
+mdit-py-plugins = "^0.4"
 
 [tool.poetry.group.dev.dependencies]
 
 
@@ -0,0 +1,183 @@
+"""PROTOTYPE: Markdown -> Substack ProseMirror via markdown-it-py.
+
+This replaces the hand-rolled parser in Post.from_markdown() with a real
+CommonMark parser (markdown-it-py) plus the standard footnote plugin, and a
+small renderer that walks the syntax tree into Substack's node schema.
+
+Node construction goes through ``substack.nodes`` so the (undocumented) schema
+lives in exactly one place.
+
+Not wired for production; imported by Post.from_markdown() when available so the
+existing test-suite can be run against it for evaluation.
+"""
+
+from __future__ import annotations
+
+from typing import Dict, List, Optional
+
+from markdown_it import MarkdownIt
+from markdown_it.tree import SyntaxTreeNode
+from mdit_py_plugins.footnote import footnote_plugin
+
+from substack import nodes
+from substack.nodes import MarkType, NodeType
+
+_MARK_FOR = {
+    "strong": {"type": MarkType.STRONG},
+    "em": {"type": MarkType.EM},
+    "s": {"type": MarkType.STRIKETHROUGH},
+}
+
+
+def _make_parser() -> MarkdownIt:
+    return MarkdownIt("commonmark").use(footnote_plugin).enable("strikethrough")
+
+
+def _coalesce(out_nodes: List[Dict]) -> List[Dict]:
+    """Merge adjacent text nodes that carry identical marks (e.g. softbreaks)."""
+    merged: List[Dict] = []
+    for node in out_nodes:
+        if (
+            merged
+            and node.get("type") == NodeType.TEXT
+            and merged[-1].get("type") == NodeType.TEXT
+            and node.get("marks") == merged[-1].get("marks")
+        ):
+            merged[-1]["text"] += node["text"]
+        else:
+            merged.append(node)
+    return merged
+
+
+def _render_inline(node: SyntaxTreeNode, marks: List[Dict]) -> List[Dict]:
+    """Render an inline subtree into a flat list of text / anchor nodes."""
+    out: List[Dict] = []
+    for child in node.children:
+        t = child.type
+        if t == "text":
+            if child.content:
+                out.append(nodes.text(child.content, marks))
+        elif t == "code_inline":
+            out.append(nodes.text(child.content, marks + [nodes.code_mark()]))
+        elif t in _MARK_FOR:
+            out.extend(_render_inline(child, marks + [_MARK_FOR[t]]))
+        elif t == "link":
+            href = child.attrs.get("href", "")
+            out.extend(_render_inline(child, marks + [nodes.link_mark(href)]))
+        elif t in ("softbreak", "hardbreak"):
+            out.append(nodes.text(" ", marks))
+        elif t == "footnote_ref":
+            out.append(nodes.footnote_anchor(child.meta["id"] + 1))
+        elif t == "image":
+            # Inline images are rare in this schema; fall back to alt text.
+            alt = child.attrs.get("alt") or "".join(
+                c.content for c in child.children if c.type == "text"
+            )
+            if alt:
+                out.append(nodes.text(alt, marks))
+    return _coalesce(out)
+
+
+def _only_image(inline: SyntaxTreeNode) -> Optional[SyntaxTreeNode]:
+    """If an inline node is just an image (optionally wrapped in a link), return it."""
+    kids = [c for c in inline.children if c.type != "softbreak"]
+    if len(kids) == 1 and kids[0].type == "image":
+        return kids[0]
+    if len(kids) == 1 and kids[0].type == "link":
+        inner = [c for c in kids[0].children if c.type != "softbreak"]
+        if len(inner) == 1 and inner[0].type == "image":
+            img = inner[0]
+            img._link_href = kids[0].attrs.get("href")  # type: ignore[attr-defined]
+            return img
+    return None
+
+
+def _captioned_image(img: SyntaxTreeNode, api) -> Dict:
+    src = img.attrs.get("src", "")
+    if src.startswith("/"):
+        src = src[1:]
+    if api is not None and not src.startswith("http"):
+        try:
+            src = api.get_image(src).get("url")
+        except Exception:
+            pass
+    # markdown-it stores the image alt text as the node's content, not in attrs.
+    alt = img.content or img.attrs.get("alt") or None
+    return nodes.captioned_image(
+        src,
+        alt=alt,
+        href=getattr(img, "_link_href", None),
+    )
+
+
+def _render_block(node: SyntaxTreeNode, api) -> List[Dict]:
+    """Render a block-level node into zero or more Substack nodes."""
+    t = node.type
+
+    if t == "paragraph":
+        inline = node.children[0]
+        img = _only_image(inline)
+        if img is not None:
+            return [_captioned_image(img, api)]
+        return [nodes.paragraph(_render_inline(inline, []))]
+
+    if t == "heading":
+        level = int(node.tag[1])
+        return [nodes.heading(_render_inline(node.children[0], []), level=level)]
+
+    if t == "hr":
+        return [nodes.horizontal_rule()]
+
+    if t in ("fence", "code_block"):
+        return [
+            nodes.code_block(
+                node.content.rstrip("\n"), language=node.info.strip() or None
+            )
+        ]
+
+    if t == "blockquote":
+        paras: List[Dict] = []
+        for child in node.children:
+            paras.extend(_render_block(child, api))
+        return [nodes.blockquote(paras)]
+
+    if t == "bullet_list":
+        return [nodes.bullet_list(_render_list_items(node, api))]
+
+    if t == "ordered_list":
+        return [nodes.ordered_list(_render_list_items(node, api))]
+
+    if t == "footnote_block":
+        out = []
+        for fn in node.children:
+            number = fn.meta["id"] + 1
+            paras = [
+                nodes.paragraph(_render_inline(child.children[0], []))
+                for child in fn.children
+                if child.type == "paragraph"
+            ]
+            out.append(nodes.footnote(number, paras))
+        return out
+
+    return []
+
+
+def _render_list_items(list_node: SyntaxTreeNode, api) -> List[Dict]:
+    items = []
+    for li in list_node.children:
+        # A list_item built by nodes.list_item wraps inline content in a single
+        # paragraph; here items may already contain block nodes, so build directly.
+        content: List[Dict] = []
+        for child in li.children:
+            content.extend(_render_block(child, api))
+        items.append({"type": NodeType.LIST_ITEM, "content": content})
+    return items
+
+
+def markdown_to_doc(markdown_content: str, api=None) -> List[Dict]:
+    """Convert Markdown into a list of Substack ProseMirror block nodes."""
+    tree = SyntaxTreeNode(_make_parser().parse(markdown_content))
+    out: List[Dict] = []
+    for node in tree.children:
+        out.extend(_render_block(node, api))
+    return out
@@ -0,0 +1,121 @@
+"""ProseMirror node builders for Substack documents.
+
+PROTOTYPE: this module centralises the (undocumented) Substack ProseMirror
+schema in one place. Today the node-type strings ("paragraph", "footnoteAnchor",
+"image2", ...) and their shapes are scattered across post.py as inline dict
+literals. Pulling them here gives:
+
+  * one source of truth for node shapes (so a schema change is a one-line fix),
+  * discoverable, typed constructors instead of bare dict literals,
+  * a natural seam for validation.
+
+The builders intentionally return plain dicts so they stay 100% compatible with
+the existing draft_body structure.
+"""
+
+from __future__ import annotations
+
+from typing import Dict, List, Optional
+
+
+class NodeType:
+    DOC = "doc"
+    PARAGRAPH = "paragraph"
+    HEADING = "heading"
+    TEXT = "text"
+    BLOCKQUOTE = "blockquote"
+    CODE_BLOCK = "codeBlock"
+    HORIZONTAL_RULE = "horizontal_rule"
+    BULLET_LIST = "bullet_list"
+    ORDERED_LIST = "ordered_list"
+    LIST_ITEM = "list_item"
+    FOOTNOTE = "footnote"
+    FOOTNOTE_ANCHOR = "footnoteAnchor"
+    CAPTIONED_IMAGE = "captionedImage"
+
+
+class MarkType:
+    STRONG = "strong"
+    EM = "em"
+    CODE = "code"
+    STRIKETHROUGH = "strikethrough"
+    LINK = "link"
+
+
+def code_mark() -> Dict:
+    return {"type": MarkType.CODE}
+
+
+def text(value: str, marks: Optional[List[Dict]] = None) -> Dict:
+    node: Dict = {"type": NodeType.TEXT, "text": value}
+    if marks:
+        node["marks"] = marks
+    return node
+
+
+def link_mark(href: str) -> Dict:
+    return {"type": MarkType.LINK, "attrs": {"href": href}}
+
+
+def paragraph(content: Optional[List[Dict]] = None) -> Dict:
+    return {"type": NodeType.PARAGRAPH, "content": content or []}
+
+
+def heading(content: List[Dict], level: int = 1) -> Dict:
+    return {"type": NodeType.HEADING, "content": content, "attrs": {"level": level}}
+
+
+def horizontal_rule() -> Dict:
+    return {"type": NodeType.HORIZONTAL_RULE}
+
+
+def blockquote(paragraphs: List[Dict]) -> Dict:
+    node: Dict = {"type": NodeType.BLOCKQUOTE}
+    if paragraphs:
+        node["content"] = paragraphs
+    return node
+
+
+def list_item(content_nodes: List[Dict]) -> Dict:
+    return {
+        "type": NodeType.LIST_ITEM,
+        "content": [paragraph(content_nodes)],
+    }
+
+
+def bullet_list(items: List[Dict]) -> Dict:
+    return {"type": NodeType.BULLET_LIST, "content": items}
+
+
+def ordered_list(items: List[Dict]) -> Dict:
+    return {"type": NodeType.ORDERED_LIST, "content": items}
+
+
+def code_block(code: str, language: Optional[str] = None) -> Dict:
+    node: Dict = {"type": NodeType.CODE_BLOCK, "content": [text(code)]}
+    if language:
+        node["attrs"] = {"language": language}
+    return node
+
+
+def captioned_image(
+    src: str, alt: Optional[str] = None, href: Optional[str] = None
+) -> Dict:
+    node: Dict = {"type": NodeType.CAPTIONED_IMAGE, "src": src}
+    if alt:
+        node["alt"] = alt
+    if href:
+        node["href"] = href
+    return node
+
+
+def footnote_anchor(number: int) -> Dict:
+    return {"type": NodeType.FOOTNOTE_ANCHOR, "attrs": {"number": number}}
+
+
+def footnote(number: int, paragraphs: List[Dict]) -> Dict:
+    return {
+        "type": NodeType.FOOTNOTE,
+        "attrs": {"number": number},
+        "content": paragraphs or [paragraph()],
+    }