forked from JeffreyMFarley/aws-flash
-
Notifications
You must be signed in to change notification settings - Fork 0
/
convert_raw.py
87 lines (72 loc) · 2.25 KB
/
convert_raw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import re
import json
import codecs
# -----------------------------------------------------------------------------
def acquire():
with codecs.open('raw.txt', encoding='utf-8') as f:
for line in f:
s = line.strip()
if s:
yield s
def extract_pair(match, table):
a = match.group(1).upper()
b = match.group(2).strip().translate(table)
return (a, b)
def buildPunctuationReplace():
table = {0xa6 : u'|',
0xb4 : u'\'',
0xb6 : u'*',
0xd7 : u'x',
0x2022 : u'*', # bullet
0x2023 : u'*',
0x2024 : u'.',
0x2027 : u'*',
0x2032 : u"'",
0x2035 : u"'",
0x2039 : u'<',
0x203a : u'>',
0x2043 : u'-',
0x2044 : u'/',
0x204e : u'*',
0x2053 : u'~',
0x205f : u' ',
0x2192 : u'>' # rightwards arrow
}
table.update({c :u' ' for c in range(0x2000, 0x200a)})
table.update({c :u'-' for c in range(0x2010, 0x2015)})
table.update({c :u"'" for c in range(0x2018, 0x201b)})
table.update({c :u'"' for c in range(0x201c, 0x201f)})
return table
# -----------------------------------------------------------------------------
def run():
sm = [
re.compile('([0-9]+)\.\s(.*)'),
re.compile('([a-fA-F])\.?\s(.*)'),
re.compile('([a-fA-F])+')
]
table = buildPunctuationReplace()
state = 0
accum = {}
num = 1
for l in acquire():
m = sm[state].match(l)
if not m:
m = sm[2].match(l)
if m:
accum['answers'] = [c for c in l.strip()]
yield accum
else:
print('Lost State around {0}'.format(num))
state = 0
accum = {}
elif state == 0:
num, accum['text'] = extract_pair(m, table)
accum['options'] = {}
state = state + 1
elif state == 1:
on, t = extract_pair(m, table)
accum['options'].update({on: t})
if __name__ == '__main__':
questions = [q for q in run()]
with open('questions.json', 'w') as f:
json.dump(questions, f, sort_keys=True)