@@ -1289,6 +1289,10 @@ def _run_span_gamut(self, text: str) -> str:
12891289 )
12901290 """ , re .X )
12911291
1292+ # regex that checks that the start of a string is NOT escaped
1293+ # it does this by matching pairs of `\` chars and checking that they're NOT followed by another `\`
1294+ _is_unescaped_re = re .compile (r'^((?:\\\\)*(?!\\))' )
1295+
12921296 @mark_stage (Stage .ESCAPE_SPECIAL )
12931297 def _escape_special_chars (self , text : str ) -> str :
12941298 # Python markdown note: the HTML tokenization here differs from
@@ -1297,20 +1301,19 @@ def _escape_special_chars(self, text: str) -> str:
12971301 # it isn't susceptible to unmatched '<' and '>' in HTML tags).
12981302 # Note, however, that '>' is not allowed in an auto-link URL
12991303 # here.
1300- lead_escape_re = re .compile (r'^((?:\\\\)*(?!\\))' )
13011304 escaped = []
13021305 is_html_markup = False
13031306 for token in self ._sorta_html_tokenize_re .split (text ):
13041307 # check token is preceded by 0 or more PAIRS of escapes, because escape pairs
13051308 # escape themselves and don't affect the token
1306- if is_html_markup and lead_escape_re .match (token ):
1309+ if is_html_markup and self . _is_unescaped_re .match (token ):
13071310 # Within tags/HTML-comments/auto-links, encode * and _
13081311 # so they don't conflict with their use in Markdown for
13091312 # italics and strong. We're replacing each such
13101313 # character with its corresponding MD5 checksum value;
13111314 # this is likely overkill, but it should prevent us from
13121315 # colliding with the escape values by accident.
1313- escape_seq , token = lead_escape_re .split (token )[1 :] or ('' , token )
1316+ escape_seq , token = self . _is_unescaped_re .split (token )[1 :] or ('' , token )
13141317 escaped .append (
13151318 escape_seq .replace ('\\ \\ ' , self ._escape_table ['\\ ' ])
13161319 + token .replace ('*' , self ._escape_table ['*' ])
@@ -1366,6 +1369,9 @@ def _is_comment(token):
13661369 # sanitise but leave comment body intact for further markdown processing
13671370 tokens .append (self ._sanitize_html (is_comment .group (2 )))
13681371 tokens .append (self ._hash_span (self ._sanitize_html (is_comment .group (3 ))))
1372+ elif self ._is_unescaped_re .match (token ) is None :
1373+ # if the HTML is escaped then escape any special chars and add the token as-is
1374+ tokens .append (self ._escape_special_chars (token ))
13691375 else :
13701376 tokens .append (self ._hash_span (self ._sanitize_html (token )))
13711377 elif is_html_markup and is_code :
0 commit comments