-
Notifications
You must be signed in to change notification settings - Fork 77
/
Copy pathescape.py
84 lines (69 loc) · 1.91 KB
/
escape.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# -*- coding: utf-8 -*-
"""
Common functions for dealing with escape characters.
Author: Gertjan van den Burg
Date: 2018-11-06
"""
import codecs
import sys
import unicodedata
from typing import Iterable
from typing import Optional
from typing import Set
#: Set of default characters to *never* consider as escape character
DEFAULT_BLOCK_CHARS: Set[str] = set(
[
"!",
"?",
'"',
"'",
".",
",",
";",
":",
"%",
"*",
"&",
"#",
]
)
#: Set of characters in the Unicode "Po" category
UNICODE_PO_CHARS: Set[str] = set(
[
c
for c in map(chr, range(sys.maxunicode + 1))
if unicodedata.category(c) == "Po"
]
)
def is_potential_escapechar(
char: str, encoding: str, block_char: Optional[Iterable[str]] = None
) -> bool:
"""Check if a character is a potential escape character.
A character is considered a potential escape character if it is in the
"Punctuation, Other" Unicode category and not in the list of blocked
characters.
Parameters
----------
char: str
The character to check
encoding : str
The encoding of the character
block_char : Optional[Iterable[str]]
Characters that are in the Punctuation Other category but that should
not be considered as escape character. If None, the default set is
used, which is defined in :py:data:`DEFAULT_BLOCK_CHARS`.
Returns
-------
is_escape : bool
Whether the character is considered a potential escape or not.
"""
if encoding.lower() in set(["utf-8", "ascii"]):
uchar = char
else:
uchar = codecs.decode(bytes(char, encoding), encoding=encoding)
block_chars = (
DEFAULT_BLOCK_CHARS if block_char is None else set(block_char)
)
if uchar in UNICODE_PO_CHARS and uchar not in block_chars:
return True
return False