Skip to content

Commit

Permalink
Escape attributes by default (#9)
Browse files Browse the repository at this point in the history
* wip

* wip

* ruff format

* wip

* escape tests

* docs

* wip

* add a few other common attributes

* rename var

* clarify comment
  • Loading branch information
keithasaurus authored Nov 26, 2023
1 parent 63ccafc commit 131f2e2
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 51 deletions.
5 changes: 1 addition & 4 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,4 @@ jobs:
run: poetry run pytest
- name: run bench (pure python)
run: poetry run python -m bench.run
- name: compile
run: poetry run mypyc simple_html
- name: run bench (compiled)
run: poetry run python -m bench.run

40 changes: 28 additions & 12 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,6 @@ render(
# <div empty-str-attribute="" key-only-attr></div>
```

Strings are escaped by default, but you can pass in `SafeString`s to avoid escaping.

```python
from simple_html import br, p, SafeString, render

node = p({},
"Escaped & stuff",
br,
SafeString("Not escaped & stuff"))

render(node) # returns: <p>Escaped &amp; stuff<br/>Not escaped & stuff</p>
```

Lists and generators are both valid collections of nodes:
```python
Expand Down Expand Up @@ -115,3 +103,31 @@ node = custom_elem(

render(node) # <custom-elem id="some-custom-elem-id">Wow</custom-elem>
```


Strings are escaped by default, but you can pass in `SafeString`s to avoid escaping.

```python
from simple_html import br, p, SafeString, render

node = p({},
"Escaped & stuff",
br,
SafeString("Not escaped & stuff"))

render(node) # <p>Escaped &amp; stuff<br/>Not escaped & stuff</p>
```

Attributes are also escaped -- both names and values. You can use `SafeString` to bypass, if needed.

```python
from simple_html import div, render, SafeString

escaped_attrs_node = div({"<bad>":"</also bad>"})

render(escaped_attrs_node) # <div &amp;lt;bad&amp;gt;="&amp;lt;/also bad&amp;gt;"></div>

unescaped_attrs_node = div({SafeString("<bad>"): SafeString("</also bad>")})

render(unescaped_attrs_node) # <div <bad>="</also bad>"></div>
```
34 changes: 7 additions & 27 deletions bench/simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,7 @@ def basic(objs: List[Tuple[str, str, List[str]]]) -> None:
p({}, content, br, br),
ul(
{},
[
li({"class": "item-stuff"}, SafeString(ss))
for ss in oks
],
[li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
),
),
),
Expand All @@ -55,9 +52,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
"<!doctype html>",
html(
{},
head({},
title({},
title_)),
head({}, title({}, title_)),
body(
{},
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
Expand All @@ -66,10 +61,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
p({}, content, br, br),
ul(
{},
[
li({"class": "item-stuff"}, SafeString(ss))
for ss in oks
],
[li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
),
),
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
Expand All @@ -78,10 +70,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
p({}, content, br, br),
ul(
{},
[
li({"class": "item-stuff"}, SafeString(ss))
for ss in oks
],
[li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
),
),
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
Expand All @@ -90,10 +79,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
p({}, content, br, br),
ul(
{},
[
li({"class": "item-stuff"}, SafeString(ss))
for ss in oks
],
[li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
),
),
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
Expand All @@ -102,10 +88,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
p({}, content, br, br),
ul(
{},
[
li({"class": "item-stuff"}, SafeString(ss))
for ss in oks
],
[li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
),
),
h1({"class": "great header", "other_attr": "5", "id": "header1"}),
Expand All @@ -114,10 +97,7 @@ def basic_long(objs: List[Tuple[str, str, List[str]]]) -> None:
p({}, content, br, br),
ul(
{},
[
li({"class": "item-stuff"}, SafeString(ss))
for ss in oks
],
[li({"class": "item-stuff"}, SafeString(ss)) for ss in oks],
),
),
),
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "simple-html"
version = "1.0.1"
version = "1.1.0"
readme = "README.md"
description = "Template-less html rendering in Python"
authors = ["Keith Philpott <[email protected]>"]
Expand Down
91 changes: 87 additions & 4 deletions simple_html/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
from html import escape
from types import GeneratorType
from typing import Tuple, Union, Dict, List, Generator, Optional, Iterable
from typing import Tuple, Union, Dict, List, FrozenSet, Generator, Iterable


class SafeString:
__slots__ = ('safe_str',)
__slots__ = ("safe_str",)

def __init__(self, safe_str: str) -> None:
self.safe_str = safe_str

def __hash__(self) -> int:
return hash(f"SafeString__{self.safe_str}")


Node = Union[
str,
Expand All @@ -21,6 +24,69 @@ def __init__(self, safe_str: str) -> None:

TagTuple = Tuple[str, Tuple[Node, ...], str]

_common_safe_attribute_names: FrozenSet[str] = frozenset(
{
"alt",
"autoplay",
"autoplay",
"charset",
"checked",
"class",
"colspan",
"content",
"contenteditable",
"dir",
"draggable",
"enctype",
"for",
"height",
"hidden",
"href",
"hreflang",
"http-equiv",
"id",
"itemprop",
"itemscope",
"itemtype",
"lang",
"loadable",
"method",
"name",
"onblur",
"onclick",
"onfocus",
"onkeydown",
"onkeyup",
"onload",
"onselect",
"onsubmit",
"placeholder",
"poster",
"property",
"rel",
"rowspan",
"sizes",
"spellcheck",
"src",
"style",
"target",
"title",
"type",
"value",
"width",
}
)


def escape_attribute_key(k: str) -> str:
return (
escape(k)
.replace("=", "&#x3D;")
.replace("\\", "&#x5C;")
.replace("`", "&#x60;")
.replace(" ", "&nbsp;")
)


class Tag:
__slots__ = ("tag_start", "rendered", "closing_tag", "no_children_close")
Expand All @@ -36,13 +102,30 @@ def __init__(self, name: str, self_closing: bool = False) -> None:
self.rendered = f"{self.tag_start}{self.no_children_close}"

def __call__(
self, attributes: Dict[str, Optional[str]], *children: Node
self,
attributes: Dict[Union[SafeString, str], Union[str, SafeString, None]],
*children: Node,
) -> TagTuple:
if attributes:
# in this case this is faster than attrs = "".join([...])
attrs = ""
for key, val in attributes.items():
attrs += f" {key}" if val is None else f' {key}="{val}"'
# optimization: a large portion of attribute keys should be
# covered by this check. It allows us to skip escaping
# where it is not needed. Note this is for attribute names only;
# attributes values are always escaped (when they are `str`s)
if key not in _common_safe_attribute_names:
key = (
key.safe_str
if isinstance(key, SafeString)
else escape_attribute_key(key)
)
if isinstance(val, str):
attrs += f' {key}="{escape(val)}"'
elif isinstance(val, SafeString):
attrs += f' {key}="{val.safe_str}"'
elif val is None:
attrs += f" {key}"

if children:
return f"{self.tag_start}{attrs}>", children, self.closing_tag
Expand Down
49 changes: 46 additions & 3 deletions tests/test_render.py → tests/test_simple_html.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
Node,
DOCTYPE_HTML5,
render,
escape_attribute_key,
)


Expand Down Expand Up @@ -97,9 +98,7 @@ def test_simple_form() -> None:


def test_safestring_in_tag() -> None:
node = script(
{"type": "ld+json"}, SafeString(json.dumps({"some_key": "some_val"}))
)
node = script({"type": "ld+json"}, SafeString(json.dumps({"some_key": "some_val"})))

assert render(node) == ('<script type="ld+json">{"some_key": "some_val"}</script>')

Expand Down Expand Up @@ -160,3 +159,47 @@ def test_can_render_empty() -> None:
render(div({}, [], "hello ", [], span({}, "World!"), []))
== "<div>hello <span>World!</span></div>"
)


def test_hash_for_safestring() -> None:
assert hash(SafeString("okokok")) == hash("SafeString__okokok")


def test_escape_key() -> None:
assert escape_attribute_key("") == ""
assert escape_attribute_key(">") == "&gt;"
assert escape_attribute_key("<") == "&lt;"
assert escape_attribute_key('"') == "&quot;"
assert escape_attribute_key("\\") == "&#x5C;"
assert escape_attribute_key("'") == "&#x27;"
assert escape_attribute_key("=") == "&#x3D;"
assert escape_attribute_key("`") == "&#x60;"
assert (
escape_attribute_key("something with spaces")
== "something&nbsp;with&nbsp;spaces"
)


def test_render_with_escaped_attributes() -> None:
assert (
render(div({'onmousenter="alert(1)" noop': "1"}))
== '<div onmousenter&#x3D;&quot;alert(1)&quot;&nbsp;noop="1"></div>'
)
assert (
render(span({"<script></script>": ">"}))
== '<span &lt;script&gt;&lt;/script&gt;="&gt;"></span>'
)
# vals and keys escape slightly differently
assert (
render(div({'onmousenter="alert(1)" noop': 'onmousenter="alert(1)" noop'}))
== '<div onmousenter&#x3D;&quot;alert(1)&quot;&nbsp;noop="onmousenter=&quot;alert(1)&quot; noop"></div>'
)


def test_render_with_safestring_attributes() -> None:
bad_key = 'onmousenter="alert(1)" noop'
bad_val = "<script></script>"
assert (
render(div({SafeString(bad_key): SafeString(bad_val)}))
== f'<div {bad_key}="{bad_val}"></div>'
)

0 comments on commit 131f2e2

Please sign in to comment.