Simply how we build on MarkupSafe.

davepeck · davepeck · commit fb204c146416 · 2025-09-08T09:03:33.000-07:00
diff --git a/html_tstring/nodes.py b/html_tstring/nodes.py
@@ -1,7 +1,6 @@
-import typing as t
 from dataclasses import dataclass, field
-from functools import cached_property
-from html import escape
+
+from markupsafe import escape
 
 # See https://developer.mozilla.org/en-US/docs/Glossary/Void_element
 VOID_ELEMENTS = frozenset(
@@ -28,16 +27,8 @@
 RCDATA_CONTENT_ELEMENTS = frozenset(["textarea", "title"])
 CONTENT_ELEMENTS = CDATA_CONTENT_ELEMENTS | RCDATA_CONTENT_ELEMENTS
 
-# TODO: add a pretty-printer for nodes for debugging
-# TODO: consider how significant whitespace is handled from t-string to nodes
-
-
-@t.runtime_checkable
-class HasHTMLDunder(t.Protocol):
-    def __html__(self) -> str: ...
-
-
-type HTMLDunder = t.Callable[[], str]
+# FUTURE: add a pretty-printer to nodes for debugging
+# FUTURE: make nodes frozen (and have the parser work with mutable builders)
 
 
 @dataclass(slots=True)
@@ -50,24 +41,11 @@ def __html__(self) -> str:
 
 @dataclass(slots=False)
 class Text(Node):
-    # Django's `SafeString` and Markupsafe/Jinja2's `Markup` both inherit
-    # from `str`, but that is not a requirement for the `__html__` dunder.
-    text: str | HasHTMLDunder
-
-    @cached_property
-    def _cached_str(self) -> str:
-        if isinstance(self.text, HasHTMLDunder):
-            return self.text.__html__()
-        return escape(t.cast(str, self.text), quote=False)
-
-    def _as_unescaped(self) -> str:
-        """Return the text as-is, without escaping. For internal use only."""
-        if isinstance(self.text, HasHTMLDunder):
-            return self.text.__html__()
-        return self.text
+    text: str
 
     def __str__(self) -> str:
-        return self._cached_str
+        # Use markupsafe's escape to handle HTML escaping
+        return escape(self.text)
 
 
 @dataclass(slots=True)
@@ -113,20 +91,26 @@ def __post_init__(self):
     def is_void(self) -> bool:
         return self.tag in VOID_ELEMENTS
 
+    @property
+    def is_content(self) -> bool:
+        return self.tag in CONTENT_ELEMENTS
+
     def __str__(self) -> str:
-        # TODO: CONSIDER: should values in attrs support the __html__ dunder?
+        # We use markupsafe's escape to handle HTML escaping of attribute values
+        # which means it's possible to mark them as safe if needed.
         attrs_str = "".join(
-            f" {key}" if value is None else f' {key}="{escape(value, quote=True)}"'
+            f" {key}" if value is None else f' {key}="{escape(value)}"'
             for key, value in self.attrs.items()
         )
         if self.is_void:
             return f"<{self.tag}{attrs_str} />"
         if not self.children:
             return f"<{self.tag}{attrs_str}></{self.tag}>"
-        if self.tag in CONTENT_ELEMENTS:
-            # Content elements should not escape their content
+        if self.is_content:
+            # Content elements should *not* escape their content when
+            # rendering to HTML. Sheesh, HTML is weird.
             children_str = "".join(
-                child._as_unescaped() if isinstance(child, Text) else str(child)
+                child.text if isinstance(child, Text) else str(child)
                 for child in self.children
             )
         else:
diff --git a/html_tstring/nodes_test.py b/html_tstring/nodes_test.py
@@ -35,7 +35,7 @@ def test_text():
 
 def test_text_escaping():
     text = Text("<script>alert('XSS')</script>")
-    assert str(text) == "&lt;script&gt;alert('XSS')&lt;/script&gt;"
+    assert str(text) == "&lt;script&gt;alert(&#39;XSS&#39;)&lt;/script&gt;"
 
 
 def test_text_safe():
@@ -215,9 +215,9 @@ def test_dunder_html_method():
 
 def test_escaping_of_text_content():
     div = Element("div", children=[Text("<script>alert('XSS')</script>")])
-    assert str(div) == "<div>&lt;script&gt;alert('XSS')&lt;/script&gt;</div>"
+    assert str(div) == "<div>&lt;script&gt;alert(&#39;XSS&#39;)&lt;/script&gt;</div>"
 
 
 def test_escaping_of_attribute_values():
     div = Element("div", attrs={"class": '">XSS<'})
-    assert str(div) == '<div class="&quot;&gt;XSS&lt;"></div>'
+    assert str(div) == '<div class="&#34;&gt;XSS&lt;"></div>'
diff --git a/html_tstring/parser.py b/html_tstring/parser.py
@@ -110,23 +110,18 @@ def get_node(self) -> Node:
             return Text("")
 
 
-def parse_html(input_html: str) -> Node:
-    """Parse an HTML string into a Node tree."""
-    parser = NodeParser()
-    parser.feed(input_html)
-    parser.close()
-    return parser.get_node()
-
-
-def parse_html_iter(input_html: t.Iterable[str]) -> Node:
+def parse_html(input: str | t.Iterable[str]) -> Node:
     """
-    Parse a sequence of HTML string chunks into a Node tree.
+    Parse a string, or sequence of HTML string chunks, into a Node tree.
 
-    This is particularly useful if your sequence keeps separate text nodes
-    that you wish to preserve intact.
+    If a single string is provided, it is parsed as a whole. If an iterable
+    of strings is provided, each string is fed to the parser in sequence.
+    This is particularly useful if you want to keep specific text chunks
+    separate in the resulting Node tree.
     """
     parser = NodeParser()
-    for chunk in input_html:
+    iterable = [input] if isinstance(input, str) else input
+    for chunk in iterable:
         parser.feed(chunk)
     parser.close()
     return parser.get_node()
diff --git a/html_tstring/parser_test.py b/html_tstring/parser_test.py
@@ -1,7 +1,7 @@
 import pytest
 
 from .nodes import Comment, DocumentType, Element, Fragment, Text
-from .parser import parse_html, parse_html_iter
+from .parser import parse_html
 
 
 def test_parse_empty():
@@ -173,7 +173,7 @@ def test_parse_html_iter_preserves_chunks():
         "<span>world</span>",
         "!</div>",
     ]
-    node = parse_html_iter(chunks)
+    node = parse_html(chunks)
     assert node == Element(
         "div",
         children=[
diff --git a/html_tstring/processor.py b/html_tstring/processor.py
@@ -8,8 +8,8 @@
 from markupsafe import Markup
 
 from .classnames import classnames
-from .nodes import Element, Fragment, HasHTMLDunder, Node, Text
-from .parser import parse_html_iter
+from .nodes import Element, Fragment, Node, Text
+from .parser import parse_html
 from .utils import format_interpolation as base_format_interpolation
 
 # --------------------------------------------------------------------------
@@ -18,11 +18,18 @@
 
 
 def _format_safe(value: object, format_spec: str) -> str:
+    """Use Markup() to mark a value as safe HTML."""
     assert format_spec == "safe"
     return Markup(value)
 
 
-CUSTOM_FORMATTERS = (("safe", _format_safe),)
+def _format_unsafe(value: object, format_spec: str) -> str:
+    """Convert a value to a plain string, forcing it to be treated as unsafe."""
+    assert format_spec == "unsafe"
+    return str(value)
+
+
+CUSTOM_FORMATTERS = (("safe", _format_safe), ("unsafe", _format_unsafe))
 
 
 def format_interpolation(interpolation: Interpolation) -> object:
@@ -94,7 +101,7 @@ def _instrument_and_parse_internal(
     The result is cached to avoid re-parsing the same template multiple times.
     """
     instrumented = _instrument(strings, callable_ids)
-    return parse_html_iter(instrumented)
+    return parse_html(instrumented)
 
 
 def _callable_id(value: object) -> int | None:
@@ -280,8 +287,6 @@ def _node_from_value(value: object) -> Node:
             return value
         case Template():
             return html(value)
-        case HasHTMLDunder():
-            return Text(value)
         case False:
             return Text("")
         case Iterable():
@@ -312,12 +317,12 @@ def _invoke_component(
             return result
         case Template():
             return html(result)
-        case HasHTMLDunder() | str():
+        case str():
             return Text(result)
         case _:
             raise TypeError(
-                f"Component callable must return a Node, Template, str, or "
-                f"HasHTMLDunder, got {type(result).__name__}"
+                f"Component callable must return a Node, Template, or str; "
+                f"got {type(result).__name__}"
             )
 
 
diff --git a/html_tstring/processor_test.py b/html_tstring/processor_test.py
@@ -142,7 +142,7 @@ def test_raw_html_injection_with_helper():
 
 
 def test_raw_html_injection_with_dunder_html_protocol():
-    class SafeContent:
+    class SafeContent(str):
         def __init__(self, text):
             self._text = text
 
@@ -319,12 +319,12 @@ def test_escaping_of_interpolated_attribute_value():
     node = html(t'<a href="{url}">Link</a>')
     assert node == Element(
         "a",
-        attrs={"href": 'https://example.com/?q="test"&lang=en'},
+        attrs={"href": Markup('https://example.com/?q="test"&lang=en')},
         children=[Text("Link")],
     )
     assert (
         str(node)
-        == '<a href="https://example.com/?q=&quot;test&quot;&amp;lang=en">Link</a>'
+        == '<a href="https://example.com/?q=&#34;test&#34;&amp;lang=en">Link</a>'
     )
 
 
diff --git a/uv.lock b/uv.lock