Skip to content

Commit d2d9b50

Browse files
author
Stefan Majoor
authored
Add HTML field plugin (#210)
1 parent 77eca30 commit d2d9b50

File tree

8 files changed

+328
-0
lines changed

8 files changed

+328
-0
lines changed

binder/plugins/models/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .html_field import HtmlField # noqa: F401
Lines changed: 157 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,157 @@
1+
from typing import List
2+
3+
from django.db.models import TextField
4+
from html.parser import HTMLParser
5+
from django.core.exceptions import ValidationError
6+
from django.utils.translation import gettext as _
7+
8+
ALLOWED_LINK_PREFIXES = [
9+
'http://',
10+
'https://',
11+
'mailto:'
12+
]
13+
14+
15+
def link_rel_validator(tag, attribute_name, attribute_value) -> List[ValidationError]:
16+
validation_errors = []
17+
18+
rels = attribute_value.split(' ')
19+
20+
if 'noopener' not in rels:
21+
22+
validation_errors.append(ValidationError(
23+
_('Link needs rel="noopener"'),
24+
code='invalid_attribute',
25+
params={
26+
'tag': tag,
27+
},
28+
))
29+
30+
if 'noreferrer' not in rels:
31+
validation_errors.append(ValidationError(
32+
_('Link needs rel="noreferer"'),
33+
code='invalid_attribute',
34+
params={
35+
'tag': tag,
36+
},
37+
))
38+
39+
40+
return validation_errors
41+
42+
43+
def link_validator(tag, attribute_name, attribute_value) -> List[ValidationError]:
44+
validation_errors = []
45+
if not any(map(lambda prefix: attribute_value.startswith(prefix), ALLOWED_LINK_PREFIXES)):
46+
validation_errors.append(ValidationError(
47+
_('Link is not valid'),
48+
code='invalid_attribute',
49+
params={
50+
'tag': tag,
51+
},
52+
))
53+
return validation_errors
54+
55+
56+
class HtmlValidator(HTMLParser):
57+
allowed_tags = [
58+
# General setup
59+
'p', 'br',
60+
# Headers
61+
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'h7',
62+
63+
# text decoration
64+
'b', 'strong', 'i', 'em', 'u',
65+
# Lists
66+
'ol', 'ul', 'li',
67+
68+
# Special
69+
'a',
70+
]
71+
72+
allowed_attributes = {
73+
'a': ['href', 'rel', 'target']
74+
}
75+
76+
required_attributes = {
77+
'a': ['rel'],
78+
}
79+
80+
special_validators = {
81+
('a', 'href'): link_validator,
82+
('a', 'rel'): link_rel_validator,
83+
}
84+
85+
error_messages = {
86+
'invalid_tag': _('Tag %(tag)s is not allowed'),
87+
'missing_attribute': _('Attribute %(attribute)s is required for tag %(tag)s'),
88+
'invalid_attribute': _('Attribute %(attribute)s not allowed for tag %(tag)s'),
89+
}
90+
91+
def validate(self, value: str) -> List[ValidationError]:
92+
"""
93+
Validates html, and gives a list of validation errors
94+
"""
95+
96+
self.errors = []
97+
98+
self.feed(value)
99+
100+
return self.errors
101+
102+
def handle_starttag(self, tag: str, attrs: list) -> None:
103+
tag_errors = []
104+
if tag not in self.allowed_tags:
105+
tag_errors.append(ValidationError(
106+
self.error_messages['invalid_tag'],
107+
code='invalid_tag',
108+
params={
109+
'tag': tag
110+
},
111+
))
112+
113+
set_attributes = set(map(lambda attr: attr[0], attrs))
114+
required_attributes = set(self.required_attributes.get(tag, []))
115+
missing_attributes = required_attributes - set_attributes
116+
for missing_attribute in missing_attributes:
117+
tag_errors.append(
118+
ValidationError(
119+
self.error_messages['missing_attribute'],
120+
code='missing_attribute',
121+
params={
122+
'tag': tag,
123+
'attribute': missing_attribute
124+
},
125+
)
126+
)
127+
128+
allowed_attributes_for_tag = self.allowed_attributes.get(tag, [])
129+
130+
for (attribute_name, attribute_content) in attrs:
131+
if attribute_name not in allowed_attributes_for_tag:
132+
tag_errors.append(ValidationError(
133+
self.error_messages['invalid_attribute'],
134+
code='invalid_attribute',
135+
params={
136+
'tag': tag,
137+
'attribute': attribute_name
138+
},
139+
))
140+
if (tag, attribute_name) in self.special_validators:
141+
tag_errors += self.special_validators[(tag, attribute_name)](tag, attribute_name, attribute_content)
142+
143+
self.errors += tag_errors
144+
145+
146+
class HtmlField(TextField):
147+
"""
148+
Determine a safe way to save "secure" user provided HTML input, and prevent XSS injections
149+
"""
150+
151+
def validate(self, value: str, _):
152+
# Validate all html tags
153+
validator = HtmlValidator()
154+
errors = validator.validate(value)
155+
156+
if errors:
157+
raise ValidationError(errors)

docs/plugins/html_field.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# HTML Field
2+
3+
The HTML field provides a django model field optimized for user posted HTML code. Its aim is to provide a safe
4+
way to implement a CMS system, where the end user can create pages, but cannot do XSS injections.
5+

tests/test_html_field.py

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
from django.contrib.auth.models import User
2+
from django.test import TestCase, Client
3+
4+
import json
5+
from .testapp.models import Zoo, WebPage
6+
7+
8+
class HtmlFieldTestCase(TestCase):
9+
10+
def setUp(self):
11+
super().setUp()
12+
u = User(username='testuser', is_active=True, is_superuser=True)
13+
u.set_password('test')
14+
u.save()
15+
self.client = Client()
16+
r = self.client.login(username='testuser', password='test')
17+
self.assertTrue(r)
18+
19+
self.zoo = Zoo(name='Apenheul')
20+
self.zoo.save()
21+
22+
self.webpage = WebPage.objects.create(zoo=self.zoo, content='')
23+
24+
25+
26+
def test_save_normal_text_ok(self):
27+
response = self.client.put(f'/web_page/{self.webpage.id}/', data=json.dumps({'content': 'Artis'}))
28+
self.assertEqual(response.status_code, 200)
29+
30+
def test_simple_html_is_ok(self):
31+
response = self.client.put(f'/web_page/{self.webpage.id}/',
32+
data=json.dumps({'content': '<h1>Artis</h1><b><p>Artis is a zoo in amsterdam</a>'}))
33+
self.assertEqual(response.status_code, 200)
34+
35+
def test_wrong_attribute_not_ok(self):
36+
response = self.client.put(f'/web_page/{self.webpage.id}/',
37+
data=json.dumps({'content': '<b onclick="">test</b>'}))
38+
self.assertEqual(response.status_code, 400)
39+
40+
parsed_response = json.loads(response.content)
41+
self.assertEqual('ValidationError', parsed_response['code'])
42+
self.assertEqual('invalid_attribute', parsed_response['errors']['web_page'][f'{self.webpage.id}']['content'][0]['code'])
43+
44+
def test_simple_link_is_ok(self):
45+
response = self.client.put(f'/web_page/{self.webpage.id}/', data=json.dumps(
46+
{'content': '<a href="https://www.artis.nl/en/" rel="noreferrer noopener">Visit artis website</a>'}))
47+
48+
self.assertEqual(response.status_code, 200)
49+
50+
51+
52+
def test_javascript_link_is_not_ok(self):
53+
response = self.client.put(f'/web_page/{self.webpage.id}/',
54+
data=json.dumps({
55+
'content': '<a href="javascrt:alert(document.cookie)" rel="noreferrer noopener">Visit artis website</a>'}))
56+
self.assertEqual(response.status_code, 400)
57+
58+
parsed_response = json.loads(response.content)
59+
self.assertEqual('ValidationError', parsed_response['code'])
60+
61+
self.assertEqual('invalid_attribute', parsed_response['errors']['web_page'][f'{self.webpage.id}']['content'][0]['code'])
62+
63+
64+
65+
def test_script_is_not_ok(self):
66+
response = self.client.put(f'/web_page/{self.webpage.id}/',
67+
data=json.dumps({'content': '<script>alert(\'hoi\');</script>'}))
68+
69+
self.assertEqual(response.status_code, 400)
70+
71+
parsed_response = json.loads(response.content)
72+
self.assertEqual('ValidationError', parsed_response['code'])
73+
self.assertEqual('invalid_tag', parsed_response['errors']['web_page'][f'{self.webpage.id}']['content'][0]['code'])
74+
75+
def test_script_is_not_ok_nested(self):
76+
response = self.client.put(f'/web_page/{self.webpage.id}/',
77+
data=json.dumps({'content': '<b><script>alert(\'hoi\');</script></b>'}))
78+
self.assertEqual(response.status_code, 400)
79+
80+
parsed_response = json.loads(response.content)
81+
self.assertEqual('ValidationError', parsed_response['code'])
82+
self.assertEqual('invalid_tag', parsed_response['errors']['web_page'][f'{self.webpage.id}']['content'][0]['code'])
83+
84+
85+
def test_can_handle_reallife_data(self):
86+
"""
87+
This is the worst case that we could produce on the WYIWYG edittor
88+
"""
89+
content = '<p>normal text</p><p><br></p><h1>HEADing 1</h1><p><br></p><h2>HEADING 2</h2><h3><br></h3><h3>HEADING 3</h3><p><br></p><p><strong>bold</strong></p><p><br></p><p><em>italic</em></p><p><br></p><p><u>underlined</u></p><p><br></p><p><a href=\"http://codeyellow.nl\" rel=\"noopener noreferrer\" target=\"_blank\">Link</a></p><p><br></p><ol><li>ol1</li><li>ol2</li></ol><ul><li>ul1</li><li>ul2</li></ul><p><br></p><p>subscripttgege</p><p>g</p>"'
90+
response = self.client.put(f'/web_page/{self.webpage.id}/',
91+
data=json.dumps({'content': content}))
92+
93+
self.assertEqual(response.status_code, 200)
94+
95+
def test_multiple_errors(self):
96+
response = self.client.put(f'/web_page/{self.webpage.id}/',
97+
data=json.dumps({
98+
'content': '<foo><bar>Visit artis website</foo></bar>'}))
99+
self.assertEqual(response.status_code, 400)
100+
101+
parsed_response = json.loads(response.content)
102+
self.assertEqual('ValidationError', parsed_response['code'])
103+
104+
105+
self.assertEqual('invalid_tag',
106+
parsed_response['errors']['web_page'][f'{self.webpage.id}']['content'][0]['code'])
107+
self.assertEqual('invalid_tag',
108+
parsed_response['errors']['web_page'][f'{self.webpage.id}']['content'][1]['code'])
109+
110+
111+
def test_link_no_rel_errors(self):
112+
response = self.client.put(f'/web_page/{self.webpage.id}/',
113+
data=json.dumps({'content': '<a href="https://codeyellow.nl">bla</a>'}))
114+
self.assertEqual(response.status_code, 400)
115+
116+
parsed_response = json.loads(response.content)
117+
118+
self.assertEqual('ValidationError', parsed_response['code'])
119+
self.assertEqual('missing_attribute',
120+
parsed_response['errors']['web_page'][f'{self.webpage.id}']['content'][0]['code'])
121+
122+
def test_link_noopener_required(self):
123+
response = self.client.put(f'/web_page/{self.webpage.id}/',
124+
data=json.dumps({'content': '<a href="https://codeyellow.nl" rel="noreferrer">bla</a>'}))
125+
self.assertEqual(response.status_code, 400)
126+
127+
parsed_response = json.loads(response.content)
128+
129+
self.assertEqual('ValidationError', parsed_response['code'])
130+
self.assertEqual('invalid_attribute',
131+
parsed_response['errors']['web_page'][f'{self.webpage.id}']['content'][0]['code'])
132+
133+
def test_link_noreferrer_required(self):
134+
response = self.client.put(f'/web_page/{self.webpage.id}/',
135+
data=json.dumps({'content': '<a href="https://codeyellow.nl" rel="noopener">bla</a>'}))
136+
self.assertEqual(response.status_code, 400)
137+
138+
parsed_response = json.loads(response.content)
139+
140+
self.assertEqual('ValidationError', parsed_response['code'])
141+
self.assertEqual('invalid_attribute',
142+
parsed_response['errors']['web_page'][f'{self.webpage.id}']['content'][0]['code'])

tests/testapp/models/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@
1414
from .zoo_employee import ZooEmployee
1515
from .city import City, CityState, PermanentCity
1616
from .country import Country
17+
from .web_page import WebPage
18+
1719
# This is Postgres-specific
1820
if os.environ.get('BINDER_TEST_MYSQL', '0') != '1':
1921
from .timetable import TimeTable

tests/testapp/models/web_page.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
2+
from binder.models import BinderModel
3+
from django.db import models
4+
5+
from binder.plugins.models import HtmlField
6+
7+
8+
class WebPage(BinderModel):
9+
"""
10+
Every zoo has a webpage containing some details about the zoo
11+
"""
12+
zoo = models.OneToOneField('Zoo', related_name='web_page', on_delete=models.CASCADE)
13+
content = HtmlField()

tests/testapp/views/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,4 @@
1919
from .user import UserView
2020
from .zoo import ZooView
2121
from .zoo_employee import ZooEmployeeView
22+
from .web_page import WebPageView

tests/testapp/views/web_page.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from binder.views import ModelView
2+
3+
from ..models import WebPage
4+
5+
# From the api docs
6+
class WebPageView(ModelView):
7+
model = WebPage

0 commit comments

Comments
 (0)