Skip to content

Commit 309386f

Browse files
committed
Fix escaping HTML in safemode having different behaviour to regular mode
1 parent b56dca5 commit 309386f

File tree

4 files changed

+12
-3
lines changed

4 files changed

+12
-3
lines changed

lib/markdown2.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1289,6 +1289,10 @@ def _run_span_gamut(self, text: str) -> str:
12891289
)
12901290
""", re.X)
12911291

1292+
# regex that checks that the start of a string is NOT escaped
1293+
# it does this by matching pairs of `\` chars and checking that they're NOT followed by another `\`
1294+
_is_unescaped_re = re.compile(r'^((?:\\\\)*(?!\\))')
1295+
12921296
@mark_stage(Stage.ESCAPE_SPECIAL)
12931297
def _escape_special_chars(self, text: str) -> str:
12941298
# Python markdown note: the HTML tokenization here differs from
@@ -1297,20 +1301,19 @@ def _escape_special_chars(self, text: str) -> str:
12971301
# it isn't susceptible to unmatched '<' and '>' in HTML tags).
12981302
# Note, however, that '>' is not allowed in an auto-link URL
12991303
# here.
1300-
lead_escape_re = re.compile(r'^((?:\\\\)*(?!\\))')
13011304
escaped = []
13021305
is_html_markup = False
13031306
for token in self._sorta_html_tokenize_re.split(text):
13041307
# check token is preceded by 0 or more PAIRS of escapes, because escape pairs
13051308
# escape themselves and don't affect the token
1306-
if is_html_markup and lead_escape_re.match(token):
1309+
if is_html_markup and self._is_unescaped_re.match(token):
13071310
# Within tags/HTML-comments/auto-links, encode * and _
13081311
# so they don't conflict with their use in Markdown for
13091312
# italics and strong. We're replacing each such
13101313
# character with its corresponding MD5 checksum value;
13111314
# this is likely overkill, but it should prevent us from
13121315
# colliding with the escape values by accident.
1313-
escape_seq, token = lead_escape_re.split(token)[1:] or ('', token)
1316+
escape_seq, token = self._is_unescaped_re.split(token)[1:] or ('', token)
13141317
escaped.append(
13151318
escape_seq.replace('\\\\', self._escape_table['\\'])
13161319
+ token.replace('*', self._escape_table['*'])
@@ -1366,6 +1369,9 @@ def _is_comment(token):
13661369
# sanitise but leave comment body intact for further markdown processing
13671370
tokens.append(self._sanitize_html(is_comment.group(2)))
13681371
tokens.append(self._hash_span(self._sanitize_html(is_comment.group(3))))
1372+
elif self._is_unescaped_re.match(token) is None:
1373+
# if the HTML is escaped then escape any special chars and add the token as-is
1374+
tokens.append(self._escape_special_chars(token))
13691375
else:
13701376
tokens.append(self._hash_span(self._sanitize_html(token)))
13711377
elif is_html_markup and is_code:
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
<p>&lt;abc></p>
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{'safe_mode': 'escape'}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
\<abc\>

0 commit comments

Comments
 (0)