From 881808eb73c749a82fe278b96b4fcb96e7f60abf Mon Sep 17 00:00:00 2001
From: Matthias Splieth
Date: Fri, 27 Mar 2026 13:15:57 +0100
Subject: [PATCH 1/2] Include draw.io diagrams in exports
---
src/confluence2md/cli.py | 8 +-
src/confluence2md/client.py | 40 +++++++++
src/confluence2md/renderer.py | 124 +++++++++++++++++++++++--
tests/test_renderer.py | 165 +++++++++++++++++++++++++++++++++-
uv.lock | 2 +-
5 files changed, 326 insertions(+), 13 deletions(-)
diff --git a/src/confluence2md/cli.py b/src/confluence2md/cli.py
index 6b41400..e2d30e0 100644
--- a/src/confluence2md/cli.py
+++ b/src/confluence2md/cli.py
@@ -80,14 +80,14 @@ def _handle_export(args: argparse.Namespace, config: Config) -> None:
if args.page_id:
page = fetch_page(confluence, args.page_id)
- path = export_page(page, config.output)
+ path = export_page(page, config.output, confluence)
print(f"Exported: {path}")
include_children = args.include_children or config.output.include_children
if include_children:
children = fetch_child_pages(confluence, args.page_id)
if children:
- child_paths = export_pages(children, config.output)
+ child_paths = export_pages(children, config.output, confluence)
for p in child_paths:
print(f"Exported: {p}")
total = 1 + len(child_paths)
@@ -99,7 +99,7 @@ def _handle_export(args: argparse.Namespace, config: Config) -> None:
if not pages:
print("No pages found.")
return
- paths = export_pages(pages, config.output)
+ paths = export_pages(pages, config.output, confluence)
for p in paths:
print(f"Exported: {p}")
print(f"\n{len(paths)} page(s) exported to {config.output.directory}")
@@ -108,7 +108,7 @@ def _handle_export(args: argparse.Namespace, config: Config) -> None:
if not pages:
print("No pages found.")
return
- paths = export_pages(pages, config.output)
+ paths = export_pages(pages, config.output, confluence)
for p in paths:
print(f"Exported: {p}")
print(f"\n{len(paths)} page(s) exported to {config.output.directory}")
diff --git a/src/confluence2md/client.py b/src/confluence2md/client.py
index 2fa6b8b..6226bb7 100644
--- a/src/confluence2md/client.py
+++ b/src/confluence2md/client.py
@@ -1,4 +1,5 @@
from dataclasses import dataclass, field
+from pathlib import Path
from typing import Any
from atlassian import Confluence
@@ -18,6 +19,14 @@ class Page:
parent_title: str = ""
+@dataclass
+class Attachment:
+ id: str
+ title: str
+ media_type: str
+ download_url: str
+
+
def connect(config: ConfluenceConfig) -> Confluence:
"""Create an authenticated Confluence client."""
if config.username:
@@ -151,3 +160,34 @@ def _build_page_url(raw: dict[str, Any], confluence: Confluence) -> str:
base = links.get("base", confluence.url.rstrip("/"))
return f"{base}{webui}"
return ""
+
+
+def fetch_attachments(confluence: Confluence, page_id: str) -> list[Attachment]:
+ """Fetch all attachments for a page."""
+ results = confluence.get_attachments_from_content(page_id) # type: ignore[no-untyped-call]
+ attachments = []
+ for item in results.get("results", []):
+ download_url = item.get("_links", {}).get("download", "")
+ attachments.append(
+ Attachment(
+ id=str(item.get("id", "")),
+ title=item.get("title", ""),
+ media_type=item.get("metadata", {}).get("mediaType", ""),
+ download_url=download_url,
+ )
+ )
+ return attachments
+
+
+def download_attachment(
+ confluence: Confluence, attachment: Attachment, dest: Path
+) -> Path:
+ """Download an attachment to the given directory. Returns the file path."""
+ dest.mkdir(parents=True, exist_ok=True)
+ filepath = dest / attachment.title
+ response = confluence.request(
+ method="GET",
+ path=attachment.download_url,
+ )
+ filepath.write_bytes(response.content)
+ return filepath
diff --git a/src/confluence2md/renderer.py b/src/confluence2md/renderer.py
index 3870cbb..dd65f60 100644
--- a/src/confluence2md/renderer.py
+++ b/src/confluence2md/renderer.py
@@ -1,9 +1,16 @@
import re
from pathlib import Path
+from typing import Optional
+from atlassian import Confluence
from markdownify import markdownify
-from .client import Page
+from .client import (
+ Attachment,
+ Page,
+ download_attachment,
+ fetch_attachments,
+)
from .config import OutputConfig
@@ -42,20 +49,46 @@ def render_page(page: Page, config: OutputConfig) -> str:
return "\n".join(lines)
-def export_page(page: Page, config: OutputConfig) -> Path:
+def export_page(
+ page: Page,
+ config: OutputConfig,
+ confluence: Optional[Confluence] = None,
+) -> Path:
"""Export a single page to a Markdown file. Returns the file path."""
- content = render_page(page, config)
- filename = _safe_filename(config.filename_pattern.format(title=page.title)) + ".md"
output_dir = Path(config.directory)
output_dir.mkdir(parents=True, exist_ok=True)
+
+ body = page.body
+ if confluence:
+ body = _process_drawio_macros(body, page, output_dir, confluence)
+
+ content = render_page(
+ Page(
+ id=page.id,
+ title=page.title,
+ space_key=page.space_key,
+ body=body,
+ labels=page.labels,
+ url=page.url,
+ version=page.version,
+ parent_title=page.parent_title,
+ ),
+ config,
+ )
+
+ filename = _safe_filename(config.filename_pattern.format(title=page.title)) + ".md"
filepath = output_dir / filename
filepath.write_text(content, encoding="utf-8")
return filepath
-def export_pages(pages: list[Page], config: OutputConfig) -> list[Path]:
+def export_pages(
+ pages: list[Page],
+ config: OutputConfig,
+ confluence: Optional[Confluence] = None,
+) -> list[Path]:
"""Export multiple pages to Markdown files."""
- return [export_page(page, config) for page in pages]
+ return [export_page(page, config, confluence) for page in pages]
def _convert_body(html: str) -> str:
@@ -63,6 +96,85 @@ def _convert_body(html: str) -> str:
return markdownify(html, heading_style="ATX", strip=["style"])
+def _extract_drawio_diagram_names(html: str) -> list[str]:
+ """Extract draw.io diagram names from Confluence storage format HTML."""
+ names: list[str] = []
+ # Confluence macros use ac: namespace prefixes which aren't valid XML
+ # without namespace declarations, so we use regex to extract them.
+ macro_pattern = re.compile(
+ r']*ac:name=["\']drawio["\'][^>]*>'
+ r"(.*?)",
+ re.DOTALL,
+ )
+ param_pattern = re.compile(
+ r']*ac:name=["\']diagramName["\'][^>]*>'
+ r"(.*?)",
+ re.DOTALL,
+ )
+ for macro_match in macro_pattern.finditer(html):
+ macro_body = macro_match.group(1)
+ param_match = param_pattern.search(macro_body)
+ if param_match:
+ names.append(param_match.group(1).strip())
+ return names
+
+
+def _find_drawio_png(
+ diagram_name: str, attachments: list[Attachment]
+) -> Optional[Attachment]:
+ """Find the PNG attachment for a draw.io diagram."""
+ # draw.io stores previews with various naming conventions
+ candidates = [
+ f"{diagram_name}.png",
+ f"{diagram_name}.drawio.png",
+ ]
+ for att in attachments:
+ if att.title in candidates:
+ return att
+ return None
+
+
+def _process_drawio_macros(
+ html: str,
+ page: Page,
+ output_dir: Path,
+ confluence: Confluence,
+) -> str:
+ """Replace draw.io macros with image references and download PNGs."""
+ diagram_names = _extract_drawio_diagram_names(html)
+ if not diagram_names:
+ return html
+
+ attachments = fetch_attachments(confluence, page.id)
+
+ macro_pattern = re.compile(
+ r']*ac:name=["\']drawio["\'][^>]*>'
+ r"(.*?)",
+ re.DOTALL,
+ )
+ param_pattern = re.compile(
+ r']*ac:name=["\']diagramName["\'][^>]*>'
+ r"(.*?)",
+ re.DOTALL,
+ )
+
+ def _replace_macro(match: re.Match[str]) -> str:
+ macro_body = match.group(1)
+ param_match = param_pattern.search(macro_body)
+ if not param_match:
+ return match.group(0)
+
+ diagram_name = param_match.group(1).strip()
+ png_attachment = _find_drawio_png(diagram_name, attachments)
+ if not png_attachment:
+ return match.group(0)
+
+ download_attachment(confluence, png_attachment, output_dir)
+ return f'
'
+
+ return macro_pattern.sub(_replace_macro, html)
+
+
def _safe_filename(name: str) -> str:
"""Sanitize a string for use as a filename."""
# Replace characters that are problematic in filenames
diff --git a/tests/test_renderer.py b/tests/test_renderer.py
index 9223a15..efa0b3a 100644
--- a/tests/test_renderer.py
+++ b/tests/test_renderer.py
@@ -1,6 +1,15 @@
-from confluence2md.client import Page
+from unittest.mock import MagicMock, patch
+
+from confluence2md.client import Attachment, Page
from confluence2md.config import OutputConfig
-from confluence2md.renderer import _safe_filename, render_page
+from confluence2md.renderer import (
+ _extract_drawio_diagram_names,
+ _find_drawio_png,
+ _process_drawio_macros,
+ _safe_filename,
+ export_page,
+ render_page,
+)
def test_render_basic_page():
@@ -148,3 +157,155 @@ def test_render_page_labels_hidden():
# then
assert "| Labels |" not in md
assert "| Labels | draft |" not in md
+
+
+# --- draw.io support ---
+
+DRAWIO_MACRO = (
+ ''
+ 'Architecture'
+ '800'
+ ""
+)
+
+DRAWIO_MACRO_TWO = (
+ ''
+ 'Flow'
+ ""
+)
+
+
+def test_extract_drawio_diagram_names_single():
+ html = f"Before
{DRAWIO_MACRO}After
"
+ assert _extract_drawio_diagram_names(html) == ["Architecture"]
+
+
+def test_extract_drawio_diagram_names_multiple():
+ html = f"{DRAWIO_MACRO}text
{DRAWIO_MACRO_TWO}"
+ assert _extract_drawio_diagram_names(html) == ["Architecture", "Flow"]
+
+
+def test_extract_drawio_diagram_names_none():
+ html = "No diagrams here
"
+ assert _extract_drawio_diagram_names(html) == []
+
+
+def test_find_drawio_png_direct_match():
+ attachments = [
+ Attachment(
+ id="1",
+ title="Architecture.png",
+ media_type="image/png",
+ download_url="/download/1",
+ ),
+ Attachment(
+ id="2",
+ title="other.pdf",
+ media_type="application/pdf",
+ download_url="/download/2",
+ ),
+ ]
+ result = _find_drawio_png("Architecture", attachments)
+ assert result is not None
+ assert result.title == "Architecture.png"
+
+
+def test_find_drawio_png_drawio_suffix():
+ attachments = [
+ Attachment(
+ id="1",
+ title="Architecture.drawio.png",
+ media_type="image/png",
+ download_url="/download/1",
+ ),
+ ]
+ result = _find_drawio_png("Architecture", attachments)
+ assert result is not None
+ assert result.title == "Architecture.drawio.png"
+
+
+def test_find_drawio_png_no_match():
+ attachments = [
+ Attachment(
+ id="1",
+ title="unrelated.png",
+ media_type="image/png",
+ download_url="/download/1",
+ ),
+ ]
+ result = _find_drawio_png("Architecture", attachments)
+ assert result is None
+
+
+@patch("confluence2md.renderer.download_attachment")
+@patch("confluence2md.renderer.fetch_attachments")
+def test_process_drawio_macros(mock_fetch, mock_download):
+ # given
+ mock_fetch.return_value = [
+ Attachment(
+ id="1",
+ title="Architecture.png",
+ media_type="image/png",
+ download_url="/download/1",
+ ),
+ ]
+ page = Page(id="100", title="Test", space_key="DEV")
+ html = f"Before
{DRAWIO_MACRO}After
"
+ confluence = MagicMock()
+
+ # when
+ result = _process_drawio_macros(html, page, MagicMock(), confluence)
+
+ # then
+ assert '
' in result
+ assert "Before
" in result
+ assert "After
" in result
+ mock_download.assert_called_once()
+
+
+@patch("confluence2md.renderer.download_attachment")
+@patch("confluence2md.renderer.fetch_attachments")
+def test_process_drawio_macros_no_png_leaves_macro(mock_fetch, mock_download):
+ # given
+ mock_fetch.return_value = []
+ page = Page(id="100", title="Test", space_key="DEV")
+ html = f"Before
{DRAWIO_MACRO}After
"
+ confluence = MagicMock()
+
+ # when
+ result = _process_drawio_macros(html, page, MagicMock(), confluence)
+
+ # then
+ assert "Intro{DRAWIO_MACRO}",
+ )
+ config = OutputConfig(directory=str(tmp_path), include_metadata=False)
+ confluence = MagicMock()
+
+ # when
+ path = export_page(page, config, confluence)
+
+ # then
+ content = path.read_text()
+ assert "" in content
+ mock_download.assert_called_once()
diff --git a/uv.lock b/uv.lock
index 0892441..cfead97 100644
--- a/uv.lock
+++ b/uv.lock
@@ -126,7 +126,7 @@ wheels = [
[[package]]
name = "confluence2md"
-version = "0.1.0"
+version = "0.2.0"
source = { editable = "." }
dependencies = [
{ name = "atlassian-python-api" },
From a5bda6554c04b6d6223e889ea6a6e1fd850f9e60 Mon Sep 17 00:00:00 2001
From: Matthias Splieth
Date: Fri, 27 Mar 2026 13:17:24 +0100
Subject: [PATCH 2/2] Set README for release purposes
---
pyproject.toml | 1 +
1 file changed, 1 insertion(+)
diff --git a/pyproject.toml b/pyproject.toml
index e0dd5c9..fa91e1a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -2,6 +2,7 @@
name = "confluence2md"
version = "0.2.0"
description = "Export Confluence pages to Markdown files"
+readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"atlassian-python-api>=3.41",