Skip to content
This repository was archived by the owner on Sep 15, 2025. It is now read-only.

Commit fb204c1

Browse files
committed
Simply how we build on MarkupSafe.
1 parent 2787159 commit fb204c1

File tree

7 files changed

+49
-65
lines changed

7 files changed

+49
-65
lines changed

html_tstring/nodes.py

Lines changed: 18 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
1-
import typing as t
21
from dataclasses import dataclass, field
3-
from functools import cached_property
4-
from html import escape
2+
3+
from markupsafe import escape
54

65
# See https://developer.mozilla.org/en-US/docs/Glossary/Void_element
76
VOID_ELEMENTS = frozenset(
@@ -28,16 +27,8 @@
2827
RCDATA_CONTENT_ELEMENTS = frozenset(["textarea", "title"])
2928
CONTENT_ELEMENTS = CDATA_CONTENT_ELEMENTS | RCDATA_CONTENT_ELEMENTS
3029

31-
# TODO: add a pretty-printer for nodes for debugging
32-
# TODO: consider how significant whitespace is handled from t-string to nodes
33-
34-
35-
@t.runtime_checkable
36-
class HasHTMLDunder(t.Protocol):
37-
def __html__(self) -> str: ...
38-
39-
40-
type HTMLDunder = t.Callable[[], str]
30+
# FUTURE: add a pretty-printer to nodes for debugging
31+
# FUTURE: make nodes frozen (and have the parser work with mutable builders)
4132

4233

4334
@dataclass(slots=True)
@@ -50,24 +41,11 @@ def __html__(self) -> str:
5041

5142
@dataclass(slots=False)
5243
class Text(Node):
53-
# Django's `SafeString` and Markupsafe/Jinja2's `Markup` both inherit
54-
# from `str`, but that is not a requirement for the `__html__` dunder.
55-
text: str | HasHTMLDunder
56-
57-
@cached_property
58-
def _cached_str(self) -> str:
59-
if isinstance(self.text, HasHTMLDunder):
60-
return self.text.__html__()
61-
return escape(t.cast(str, self.text), quote=False)
62-
63-
def _as_unescaped(self) -> str:
64-
"""Return the text as-is, without escaping. For internal use only."""
65-
if isinstance(self.text, HasHTMLDunder):
66-
return self.text.__html__()
67-
return self.text
44+
text: str
6845

6946
def __str__(self) -> str:
70-
return self._cached_str
47+
# Use markupsafe's escape to handle HTML escaping
48+
return escape(self.text)
7149

7250

7351
@dataclass(slots=True)
@@ -113,20 +91,26 @@ def __post_init__(self):
11391
def is_void(self) -> bool:
11492
return self.tag in VOID_ELEMENTS
11593

94+
@property
95+
def is_content(self) -> bool:
96+
return self.tag in CONTENT_ELEMENTS
97+
11698
def __str__(self) -> str:
117-
# TODO: CONSIDER: should values in attrs support the __html__ dunder?
99+
# We use markupsafe's escape to handle HTML escaping of attribute values
100+
# which means it's possible to mark them as safe if needed.
118101
attrs_str = "".join(
119-
f" {key}" if value is None else f' {key}="{escape(value, quote=True)}"'
102+
f" {key}" if value is None else f' {key}="{escape(value)}"'
120103
for key, value in self.attrs.items()
121104
)
122105
if self.is_void:
123106
return f"<{self.tag}{attrs_str} />"
124107
if not self.children:
125108
return f"<{self.tag}{attrs_str}></{self.tag}>"
126-
if self.tag in CONTENT_ELEMENTS:
127-
# Content elements should not escape their content
109+
if self.is_content:
110+
# Content elements should *not* escape their content when
111+
# rendering to HTML. Sheesh, HTML is weird.
128112
children_str = "".join(
129-
child._as_unescaped() if isinstance(child, Text) else str(child)
113+
child.text if isinstance(child, Text) else str(child)
130114
for child in self.children
131115
)
132116
else:

html_tstring/nodes_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ def test_text():
3535

3636
def test_text_escaping():
3737
text = Text("<script>alert('XSS')</script>")
38-
assert str(text) == "&lt;script&gt;alert('XSS')&lt;/script&gt;"
38+
assert str(text) == "&lt;script&gt;alert(&#39;XSS&#39;)&lt;/script&gt;"
3939

4040

4141
def test_text_safe():
@@ -215,9 +215,9 @@ def test_dunder_html_method():
215215

216216
def test_escaping_of_text_content():
217217
div = Element("div", children=[Text("<script>alert('XSS')</script>")])
218-
assert str(div) == "<div>&lt;script&gt;alert('XSS')&lt;/script&gt;</div>"
218+
assert str(div) == "<div>&lt;script&gt;alert(&#39;XSS&#39;)&lt;/script&gt;</div>"
219219

220220

221221
def test_escaping_of_attribute_values():
222222
div = Element("div", attrs={"class": '">XSS<'})
223-
assert str(div) == '<div class="&quot;&gt;XSS&lt;"></div>'
223+
assert str(div) == '<div class="&#34;&gt;XSS&lt;"></div>'

html_tstring/parser.py

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -110,23 +110,18 @@ def get_node(self) -> Node:
110110
return Text("")
111111

112112

113-
def parse_html(input_html: str) -> Node:
114-
"""Parse an HTML string into a Node tree."""
115-
parser = NodeParser()
116-
parser.feed(input_html)
117-
parser.close()
118-
return parser.get_node()
119-
120-
121-
def parse_html_iter(input_html: t.Iterable[str]) -> Node:
113+
def parse_html(input: str | t.Iterable[str]) -> Node:
122114
"""
123-
Parse a sequence of HTML string chunks into a Node tree.
115+
Parse a string, or sequence of HTML string chunks, into a Node tree.
124116
125-
This is particularly useful if your sequence keeps separate text nodes
126-
that you wish to preserve intact.
117+
If a single string is provided, it is parsed as a whole. If an iterable
118+
of strings is provided, each string is fed to the parser in sequence.
119+
This is particularly useful if you want to keep specific text chunks
120+
separate in the resulting Node tree.
127121
"""
128122
parser = NodeParser()
129-
for chunk in input_html:
123+
iterable = [input] if isinstance(input, str) else input
124+
for chunk in iterable:
130125
parser.feed(chunk)
131126
parser.close()
132127
return parser.get_node()

html_tstring/parser_test.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import pytest
22

33
from .nodes import Comment, DocumentType, Element, Fragment, Text
4-
from .parser import parse_html, parse_html_iter
4+
from .parser import parse_html
55

66

77
def test_parse_empty():
@@ -173,7 +173,7 @@ def test_parse_html_iter_preserves_chunks():
173173
"<span>world</span>",
174174
"!</div>",
175175
]
176-
node = parse_html_iter(chunks)
176+
node = parse_html(chunks)
177177
assert node == Element(
178178
"div",
179179
children=[

html_tstring/processor.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88
from markupsafe import Markup
99

1010
from .classnames import classnames
11-
from .nodes import Element, Fragment, HasHTMLDunder, Node, Text
12-
from .parser import parse_html_iter
11+
from .nodes import Element, Fragment, Node, Text
12+
from .parser import parse_html
1313
from .utils import format_interpolation as base_format_interpolation
1414

1515
# --------------------------------------------------------------------------
@@ -18,11 +18,18 @@
1818

1919

2020
def _format_safe(value: object, format_spec: str) -> str:
21+
"""Use Markup() to mark a value as safe HTML."""
2122
assert format_spec == "safe"
2223
return Markup(value)
2324

2425

25-
CUSTOM_FORMATTERS = (("safe", _format_safe),)
26+
def _format_unsafe(value: object, format_spec: str) -> str:
27+
"""Convert a value to a plain string, forcing it to be treated as unsafe."""
28+
assert format_spec == "unsafe"
29+
return str(value)
30+
31+
32+
CUSTOM_FORMATTERS = (("safe", _format_safe), ("unsafe", _format_unsafe))
2633

2734

2835
def format_interpolation(interpolation: Interpolation) -> object:
@@ -94,7 +101,7 @@ def _instrument_and_parse_internal(
94101
The result is cached to avoid re-parsing the same template multiple times.
95102
"""
96103
instrumented = _instrument(strings, callable_ids)
97-
return parse_html_iter(instrumented)
104+
return parse_html(instrumented)
98105

99106

100107
def _callable_id(value: object) -> int | None:
@@ -280,8 +287,6 @@ def _node_from_value(value: object) -> Node:
280287
return value
281288
case Template():
282289
return html(value)
283-
case HasHTMLDunder():
284-
return Text(value)
285290
case False:
286291
return Text("")
287292
case Iterable():
@@ -312,12 +317,12 @@ def _invoke_component(
312317
return result
313318
case Template():
314319
return html(result)
315-
case HasHTMLDunder() | str():
320+
case str():
316321
return Text(result)
317322
case _:
318323
raise TypeError(
319-
f"Component callable must return a Node, Template, str, or "
320-
f"HasHTMLDunder, got {type(result).__name__}"
324+
f"Component callable must return a Node, Template, or str; "
325+
f"got {type(result).__name__}"
321326
)
322327

323328

html_tstring/processor_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ def test_raw_html_injection_with_helper():
142142

143143

144144
def test_raw_html_injection_with_dunder_html_protocol():
145-
class SafeContent:
145+
class SafeContent(str):
146146
def __init__(self, text):
147147
self._text = text
148148

@@ -319,12 +319,12 @@ def test_escaping_of_interpolated_attribute_value():
319319
node = html(t'<a href="{url}">Link</a>')
320320
assert node == Element(
321321
"a",
322-
attrs={"href": 'https://example.com/?q="test"&lang=en'},
322+
attrs={"href": Markup('https://example.com/?q="test"&lang=en')},
323323
children=[Text("Link")],
324324
)
325325
assert (
326326
str(node)
327-
== '<a href="https://example.com/?q=&quot;test&quot;&amp;lang=en">Link</a>'
327+
== '<a href="https://example.com/?q=&#34;test&#34;&amp;lang=en">Link</a>'
328328
)
329329

330330

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)