Skip to content

Commit 131f2e2

Browse files
authored
Escape attributes by default (#9)
* wip * wip * ruff format * wip * escape tests * docs * wip * add a few other common attributes * rename var * clarify comment
1 parent 63ccafc commit 131f2e2

File tree

6 files changed

+170
-51
lines changed

6 files changed

+170
-51
lines changed

.github/workflows/push.yml

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,4 @@ jobs:
2727
run: poetry run pytest
2828
- name: run bench (pure python)
2929
run: poetry run python -m bench.run
30-
- name: compile
31-
run: poetry run mypyc simple_html
32-
- name: run bench (compiled)
33-
run: poetry run python -m bench.run
30+

README.md

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,18 +61,6 @@ render(
6161
# <div empty-str-attribute="" key-only-attr></div>
6262
```
6363

64-
Strings are escaped by default, but you can pass in `SafeString`s to avoid escaping.
65-
66-
```python
67-
from simple_html import br, p, SafeString, render
68-
69-
node = p({},
70-
"Escaped & stuff",
71-
br,
72-
SafeString("Not escaped & stuff"))
73-
74-
render(node) # returns: <p>Escaped &amp; stuff<br/>Not escaped & stuff</p>
75-
```
7664

7765
Lists and generators are both valid collections of nodes:
7866
```python
@@ -115,3 +103,31 @@ node = custom_elem(
115103

116104
render(node) # <custom-elem id="some-custom-elem-id">Wow</custom-elem>
117105
```
106+
107+
108+
Strings are escaped by default, but you can pass in `SafeString`s to avoid escaping.
109+
110+
```python
111+
from simple_html import br, p, SafeString, render
112+
113+
node = p({},
114+
"Escaped & stuff",
115+
br,
116+
SafeString("Not escaped & stuff"))
117+
118+
render(node) # <p>Escaped &amp; stuff<br/>Not escaped & stuff</p>
119+
```
120+
121+
Attributes are also escaped -- both names and values. You can use `SafeString` to bypass, if needed.
122+
123+
```python
124+
from simple_html import div, render, SafeString
125+
126+
escaped_attrs_node = div({"<bad>":"</also bad>"})
127+
128+
render(escaped_attrs_node) # <div &amp;lt;bad&amp;gt;="&amp;lt;/also bad&amp;gt;"></div>
129+
130+
unescaped_attrs_node = div({SafeString("<bad>"): SafeString("</also bad>")})
131+
132+
render(unescaped_attrs_node) # <div <bad>="</also bad>"></div>
133+
```

bench/simple.py

Lines changed: 7 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,7 @@ def basic(objs: List[Tuple[str, str, List[str]]]) -> None:
3838
p({}, content, br, br),
3939
ul(
4040
{},
41-
[
42-
li({"class": "item-stuff"}, SafeString(ss))
43-
for ss in oks
44-
],
41+
[li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
4542
),
4643
),
4744
),
@@ -55,9 +52,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
5552
"<!doctype html>",
5653
html(
5754
{},
58-
head({},
59-
title({},
60-
title_)),
55+
head({}, title({}, title_)),
6156
body(
6257
{},
6358
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
@@ -66,10 +61,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
6661
p({}, content, br, br),
6762
ul(
6863
{},
69-
[
70-
li({"class": "item-stuff"}, SafeString(ss))
71-
for ss in oks
72-
],
64+
[li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
7365
),
7466
),
7567
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
@@ -78,10 +70,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
7870
p({}, content, br, br),
7971
ul(
8072
{},
81-
[
82-
li({"class": "item-stuff"}, SafeString(ss))
83-
for ss in oks
84-
],
73+
[li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
8574
),
8675
),
8776
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
@@ -90,10 +79,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
9079
p({}, content, br, br),
9180
ul(
9281
{},
93-
[
94-
li({"class": "item-stuff"}, SafeString(ss))
95-
for ss in oks
96-
],
82+
[li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
9783
),
9884
),
9985
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
@@ -102,10 +88,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
10288
p({}, content, br, br),
10389
ul(
10490
{},
105-
[
106-
li({"class": "item-stuff"}, SafeString(ss))
107-
for ss in oks
108-
],
91+
[li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
10992
),
11093
),
11194
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
@@ -114,10 +97,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
11497
p({}, content, br, br),
11598
ul(
11699
{},
117-
[
118-
li({"class": "item-stuff"}, SafeString(ss))
119-
for ss in oks
120-
],
100+
[li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
121101
),
122102
),
123103
),

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[tool.poetry]
22
name = "simple-html"
3-
version = "1.0.1"
3+
version = "1.1.0"
44
readme = "README.md"
55
description = "Template-less html rendering in Python"
66
authors = ["Keith Philpott <[email protected]>"]

simple_html/__init__.py

Lines changed: 87 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,17 @@
11
from html import escape
22
from types import GeneratorType
3-
from typing import Tuple, Union, Dict, List, Generator, Optional, Iterable
3+
from typing import Tuple, Union, Dict, List, FrozenSet, Generator, Iterable
44

55

66
class SafeString:
7-
__slots__ = ('safe_str',)
7+
__slots__ = ("safe_str",)
88

99
def __init__(self, safe_str: str) -> None:
1010
self.safe_str = safe_str
1111

12+
def __hash__(self) -> int:
13+
return hash(f"SafeString__{self.safe_str}")
14+
1215

1316
Node = Union[
1417
str,
@@ -21,6 +24,69 @@ def __init__(self, safe_str: str) -> None:
2124

2225
TagTuple = Tuple[str, Tuple[Node, ...], str]
2326

27+
_common_safe_attribute_names: FrozenSet[str] = frozenset(
28+
{
29+
"alt",
30+
"autoplay",
31+
"autoplay",
32+
"charset",
33+
"checked",
34+
"class",
35+
"colspan",
36+
"content",
37+
"contenteditable",
38+
"dir",
39+
"draggable",
40+
"enctype",
41+
"for",
42+
"height",
43+
"hidden",
44+
"href",
45+
"hreflang",
46+
"http-equiv",
47+
"id",
48+
"itemprop",
49+
"itemscope",
50+
"itemtype",
51+
"lang",
52+
"loadable",
53+
"method",
54+
"name",
55+
"onblur",
56+
"onclick",
57+
"onfocus",
58+
"onkeydown",
59+
"onkeyup",
60+
"onload",
61+
"onselect",
62+
"onsubmit",
63+
"placeholder",
64+
"poster",
65+
"property",
66+
"rel",
67+
"rowspan",
68+
"sizes",
69+
"spellcheck",
70+
"src",
71+
"style",
72+
"target",
73+
"title",
74+
"type",
75+
"value",
76+
"width",
77+
}
78+
)
79+
80+
81+
def escape_attribute_key(k: str) -> str:
82+
return (
83+
escape(k)
84+
.replace("=", "&#x3D;")
85+
.replace("\\", "&#x5C;")
86+
.replace("`", "&#x60;")
87+
.replace(" ", "&nbsp;")
88+
)
89+
2490

2591
class Tag:
2692
__slots__ = ("tag_start", "rendered", "closing_tag", "no_children_close")
@@ -36,13 +102,30 @@ def __init__(self, name: str, self_closing: bool = False) -> None:
36102
self.rendered = f"{self.tag_start}{self.no_children_close}"
37103

38104
def __call__(
39-
self, attributes: Dict[str, Optional[str]], *children: Node
105+
self,
106+
attributes: Dict[Union[SafeString, str], Union[str, SafeString, None]],
107+
*children: Node,
40108
) -> TagTuple:
41109
if attributes:
42110
# in this case this is faster than attrs = "".join([...])
43111
attrs = ""
44112
for key, val in attributes.items():
45-
attrs += f" {key}" if val is None else f' {key}="{val}"'
113+
# optimization: a large portion of attribute keys should be
114+
# covered by this check. It allows us to skip escaping
115+
# where it is not needed. Note this is for attribute names only;
116+
# attributes values are always escaped (when they are `str`s)
117+
if key not in _common_safe_attribute_names:
118+
key = (
119+
key.safe_str
120+
if isinstance(key, SafeString)
121+
else escape_attribute_key(key)
122+
)
123+
if isinstance(val, str):
124+
attrs += f' {key}="{escape(val)}"'
125+
elif isinstance(val, SafeString):
126+
attrs += f' {key}="{val.safe_str}"'
127+
elif val is None:
128+
attrs += f" {key}"
46129

47130
if children:
48131
return f"{self.tag_start}{attrs}>", children, self.closing_tag

tests/test_render.py renamed to tests/test_simple_html.py

Lines changed: 46 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
Node,
2020
DOCTYPE_HTML5,
2121
render,
22+
escape_attribute_key,
2223
)
2324

2425

@@ -97,9 +98,7 @@ def test_simple_form() -> None:
9798

9899

99100
def test_safestring_in_tag() -> None:
100-
node = script(
101-
{"type": "ld+json"}, SafeString(json.dumps({"some_key": "some_val"}))
102-
)
101+
node = script({"type": "ld+json"}, SafeString(json.dumps({"some_key": "some_val"})))
103102

104103
assert render(node) == ('<script type="ld+json">{"some_key": "some_val"}</script>')
105104

@@ -160,3 +159,47 @@ def test_can_render_empty() -> None:
160159
render(div({}, [], "hello ", [], span({}, "World!"), []))
161160
== "<div>hello <span>World!</span></div>"
162161
)
162+
163+
164+
def test_hash_for_safestring() -> None:
165+
assert hash(SafeString("okokok")) == hash("SafeString__okokok")
166+
167+
168+
def test_escape_key() -> None:
169+
assert escape_attribute_key("") == ""
170+
assert escape_attribute_key(">") == "&gt;"
171+
assert escape_attribute_key("<") == "&lt;"
172+
assert escape_attribute_key('"') == "&quot;"
173+
assert escape_attribute_key("\\") == "&#x5C;"
174+
assert escape_attribute_key("'") == "&#x27;"
175+
assert escape_attribute_key("=") == "&#x3D;"
176+
assert escape_attribute_key("`") == "&#x60;"
177+
assert (
178+
escape_attribute_key("something with spaces")
179+
== "something&nbsp;with&nbsp;spaces"
180+
)
181+
182+
183+
def test_render_with_escaped_attributes() -> None:
184+
assert (
185+
render(div({'onmousenter="alert(1)" noop': "1"}))
186+
== '<div onmousenter&#x3D;&quot;alert(1)&quot;&nbsp;noop="1"></div>'
187+
)
188+
assert (
189+
render(span({"<script></script>": ">"}))
190+
== '<span &lt;script&gt;&lt;/script&gt;="&gt;"></span>'
191+
)
192+
# vals and keys escape slightly differently
193+
assert (
194+
render(div({'onmousenter="alert(1)" noop': 'onmousenter="alert(1)" noop'}))
195+
== '<div onmousenter&#x3D;&quot;alert(1)&quot;&nbsp;noop="onmousenter=&quot;alert(1)&quot; noop"></div>'
196+
)
197+
198+
199+
def test_render_with_safestring_attributes() -> None:
200+
bad_key = 'onmousenter="alert(1)" noop'
201+
bad_val = "<script></script>"
202+
assert (
203+
render(div({SafeString(bad_key): SafeString(bad_val)}))
204+
== f'<div {bad_key}="{bad_val}"></div>'
205+
)

0 commit comments

Comments
 (0)