1
1
# coding=utf8
2
-
3
- import re
4
- import sys
5
- import traceback
6
- import functools
7
- import json .decoder
8
-
9
- from json .decoder import JSONDecoder
10
- from json .scanner import py_make_scanner
11
- from json .decoder import py_scanstring
12
-
13
-
14
- # errmsg.inv
15
- def inv_errmsg (e , exc_info ):
16
- exc_type , exc_value , exc_traceback_obj = exc_info
17
-
18
- message = e .message
19
- # err, left = message.split(':', 1) # badcase Expecting ':' delimiter
20
- idx = message .rindex (':' )
21
- errmsg = message [:idx ]
22
- left = message [idx + 1 :]
23
- numbers = re .compile (r'\d+' ).findall (left )
24
- result = {
25
- "errmsg" : errmsg ,
26
- "parser" : e .__dict__ .get ("parser" , "" ),
27
- "lineno" : int (numbers [0 ]),
28
- "colno" : int (numbers [1 ]),
29
- }
30
- if len (numbers ) == 3 :
31
- result ["pos" ] = int (numbers [2 ])
32
-
33
- if len (numbers ) > 3 :
34
- result ["endlineno" ] = int (numbers [2 ])
35
- result ["endcolno" ] = int (numbers [3 ])
36
- result ["pos" ] = int (numbers [4 ])
37
- result ["end" ] = int (numbers [5 ])
38
- return result
39
-
40
-
41
- # 记录 Exception 被哪个 parser 抛出的
42
- def add_parser_name (parser ):
43
-
44
- # @functools.wraps
45
- def new_parser (* args , ** kwargs ):
46
- try :
47
- return parser (* args , ** kwargs )
48
- except Exception as e :
49
- if "parser" not in e .__dict__ :
50
- e .__dict__ ["parser" ] = parser .__name__
51
- raise e
52
- return new_parser
53
-
54
-
55
- def make_decoder ():
56
- # json.decoder.scanstring = py_scanstring
57
-
58
- decoder = JSONDecoder ()
59
- decoder .parse_object = add_parser_name (decoder .parse_object )
60
- decoder .parse_array = add_parser_name (decoder .parse_array )
61
- decoder .parse_string = add_parser_name (py_scanstring )
62
- decoder .parse_object = add_parser_name (decoder .parse_object )
63
-
64
- decoder .scan_once = py_make_scanner (decoder )
65
-
66
- json .decoder .scanstring = add_parser_name (py_scanstring )
67
- return decoder
68
-
69
-
70
- decoder = make_decoder ()
71
-
72
- """
73
- ValueError 抛出
74
- 01. _decode_uXXXX "Invalid \\ uXXXX escape"
75
- 02. py_scanstring "Unterminated string starting at"
76
- 03. py_scanstring "Invalid control character {0!r} at".format(terminator)
77
- 04. py_scanstring "Unterminated string starting at"
78
- 05. py_scanstring "Invalid \\ escape: " + repr(esc)
79
- 06. JSONObject "Expecting property name enclosed in double quotes"
80
- 07. JSONObject "Expecting ':' delimiter"
81
- 08. JSONObject "Expecting object"
82
- 09. JSONObject "Expecting ',' delimiter"
83
- 10. JSONObject "Expecting property name enclosed in double quotes"
84
- 11. JSONArray "Expecting object"
85
- 12. JSONArray "Expecting ',' delimiter"
86
-
87
- 01 先不看,不研究
88
- 02 badcase: " --> "" success
89
- 03 控制符 pass
90
- 04 unicode \\ u 的 pass
91
- 05 同上
92
- 06 object 后面没有跟随 " , badcase: {abc":1} --> {"abc":1}
93
- 07 object key 后面没有 : , badcase: {"abc"1} --> {"abc":1}
94
- 08 object 开始检测 Value 收到 StopIteration
95
- 08.1 要么后面没有了
96
- 08.2 要么后面不是 "/{/[/n[ull]/t[rue]/f[alse]/number/NaN/Infinity/-Infinity 开头的东西
97
- -- 08.1 后面补上 null}
98
- -- 08.2 无脑补一个 "
99
- 09 object 解析完一个 pair 后,下一个不是}, 期待一个 ','
100
- badcase {"k":1"s":2}
101
- 10 在 09 的基础上解析完{"k":1, 发现下一个不是 ", 这个后面再优化(暂时和 06 一致)
102
- badcase {"k":1,x":2}
103
- 11 array 开始检测 Value 收到 StopIteration
104
- 11.1 要么后面没有了,补上]
105
- 11.2 同 08.2,无脑补一个{ 看看
106
- 12 array 解析完前一个 object, 需要一个 ,
107
- 这里 nextchar 既不是 ] 也不是, 代表这个 nextchar 的 end 也已经+1 了,所以减 2
108
- """
109
-
110
- def process_number ():
111
- pass
2
+ from half_json .json_util import decoder
3
+ from half_json .json_util import errmsg_inv
4
+ from half_json .json_util import errors
112
5
113
6
114
7
def find_stop (line ):
115
8
try :
116
- import pdb
117
- pdb .set_trace ()
118
-
119
9
# 暂时只考虑 1 行的情况
120
10
obj , end = decoder .scan_once (line , 0 )
121
11
# TODO end is only part of line
122
12
return end == len (line ), line
123
13
except StopIteration as e :
124
14
return True , ""
125
15
except ValueError as e :
126
- err_info = inv_errmsg (e , sys .exc_info ())
16
+ err_info = errmsg_inv (e )
17
+ error = err_info ["error" ]
127
18
pos = err_info ["pos" ]
128
19
nextchar = line [pos : pos + 1 ]
129
- parser = err_info ["parser" ]
130
- errmsg = err_info ["errmsg" ]
131
- lastchar = line [pos - 1 : pos ]
20
+ # lastchar = line[pos-1: pos]
132
21
133
22
# 02
134
- if errmsg == "Unterminated string starting at" :
23
+ if error == errors . StringUnterminatedString :
135
24
# TODO resolve "abc --> "abc"
136
25
return False , insert_line (line , "\" " , len (line ))
137
26
# 06
138
- if errmsg == "Expecting property name enclosed in double quotes" :
27
+ if error == errors . ObjectExceptKey :
139
28
# lastchar = line[pos-1: pos]
140
29
# for case {
141
30
# if lastchar == "{" and all([c not in line for c in '"}:']):
142
31
# return False, insert_line(line, "}", pos)
143
32
return False , insert_line (line , "\" " , pos )
144
33
# 07
145
- if errmsg == "Expecting ':' delimiter" :
34
+ if error == errors . ObjectExceptColon :
146
35
return False , insert_line (line , ":" , pos )
147
36
# 08
148
- if parser == "JSONObject" and errmsg == "Expecting object" :
37
+ if error == errors . ObjectExceptObject :
149
38
# 08.1
150
39
if nextchar == "" :
151
40
return False , insert_line (line , "null}" , pos )
152
41
# 08.2
153
42
else :
154
43
return False , insert_line (line , "\" " , pos )
155
44
# 09
156
- if parser == "JSONObject" and errmsg == "Expecting ',' delimiter" :
45
+ if error == errors . ObjectExceptComma :
157
46
if nextchar == "" :
158
47
return False , insert_line (line , "}" , pos )
159
48
return False , insert_line (line , "," , pos )
160
49
# 11
161
- if parser == "JSONArray" and errmsg == "Expecting object" :
50
+ if error == errors . ArrayExceptObject :
162
51
# ?
163
52
if nextchar == "," :
164
53
return False , insert_line (line , "null" , pos )
@@ -170,7 +59,7 @@ def find_stop(line):
170
59
return False , insert_line (line , "{" , pos )
171
60
# 也许可以删掉前面的 , 补一个]
172
61
# 12
173
- if parser == "JSONArray" and errmsg == "Expecting ',' delimiter" :
62
+ if error == errors . ArrayExceptComma :
174
63
"""
175
64
code:
176
65
end += 1
@@ -180,7 +69,7 @@ def find_stop(line):
180
69
raise ValueError(errmsg("Expecting ',' delimiter", s, end))
181
70
"""
182
71
pos = pos - 1
183
- nextchar = line [pos : pos + 1 ]
72
+ nextchar = line [pos : pos + 1 ]
184
73
# 11.1
185
74
if nextchar == "" :
186
75
return False , insert_line (line , "]" , pos )
@@ -201,32 +90,3 @@ def clear(line):
201
90
if ok :
202
91
break
203
92
return ok , line
204
-
205
-
206
- def main (infile , outfile ):
207
- inf = open (infile , 'r' )
208
- outf = open (outfile , 'w' )
209
- output = sys .stdout
210
-
211
- total = 0
212
- hit = 0
213
-
214
- for line in inf :
215
- try :
216
- total += 1
217
- line = line .strip ()
218
- ok , new_line = clear (line )
219
- if ok :
220
- outf .write (new_line + "\n " )
221
- hit += 1
222
- else :
223
- print (ok , line , new_line )
224
- except Exception as e :
225
- print (e , line )
226
- print ("total is {} and hit {} --> ratio:{} \n " .format (total , hit , hit * 1.0 / total ))
227
- inf .close ()
228
- outf .close ()
229
-
230
-
231
- if __name__ == '__main__' :
232
- main (sys .argv [1 ], sys .argv [2 ])
0 commit comments