Skip to content

Commit 4ce6de8

Browse files
committed
simple performance improvements
1 parent 11f7b22 commit 4ce6de8

File tree

3 files changed

+91
-61
lines changed

3 files changed

+91
-61
lines changed

src/json_stream/base.py

Lines changed: 86 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,53 @@
1-
import collections
21
import copy
3-
from abc import ABC
4-
from collections import OrderedDict
2+
from collections import OrderedDict, deque
53
from itertools import chain
64
from typing import Optional, Iterator, Any
75

86
from json_stream.tokenizer import TokenType
97

8+
OPERATOR = TokenType.OPERATOR
9+
STRING = TokenType.STRING
10+
NUMBER = TokenType.NUMBER
11+
NULL = TokenType.NULL
12+
BOOLEAN = TokenType.BOOLEAN
13+
COLON = (OPERATOR, ":")
14+
1015

1116
class TransientAccessException(Exception):
1217
pass
1318

1419

15-
class StreamingJSONBase(ABC):
20+
class StreamingJSONBase(object):
1621
INCOMPLETE_ERROR = "Unexpected end of file"
1722

18-
@classmethod
19-
def factory(cls, token, token_stream, persistent):
20-
if persistent:
21-
if token == '{':
22-
return PersistentStreamingJSONObject(token_stream)
23-
if token == '[':
24-
return PersistentStreamingJSONList(token_stream)
25-
else:
26-
if token == '{':
27-
return TransientStreamingJSONObject(token_stream)
28-
if token == '[':
29-
return TransientStreamingJSONList(token_stream)
30-
raise ValueError(f"Unknown operator {token}") # pragma: no cover
31-
32-
_persistent_children: bool
23+
__slots__ = '_persistent_children', '_stream', '_child', 'streaming'
3324

3425
def __init__(self, token_stream):
26+
# this is inlined in subclasses
3527
self.streaming = True
3628
self._stream = token_stream
3729
self._child: Optional[StreamingJSONBase] = None
3830

39-
def _clear_child(self):
40-
if self._child is not None:
41-
self._child.read_all()
42-
self._child = None
43-
4431
def _iter_items(self):
32+
if not self.streaming:
33+
return
34+
load = self._load_item
4535
while True:
46-
if not self.streaming:
47-
return
48-
self._clear_child()
36+
# clear child
37+
if self._child is not None:
38+
# inlined from read_all()
39+
deque(self._child._iter_items(), maxlen=0)
40+
self._child = None
41+
4942
try:
50-
item = self._load_item()
43+
yield load()
5144
except StopIteration:
5245
if self.streaming:
5346
raise ValueError(self.INCOMPLETE_ERROR)
5447
return
55-
yield item
56-
57-
def _done(self):
58-
self.streaming = False
59-
raise StopIteration()
6048

6149
def read_all(self):
62-
collections.deque(self._iter_items(), maxlen=0)
50+
deque(self._iter_items(), maxlen=0)
6351

6452
def _load_item(self):
6553
raise NotImplementedError() # pragma: no cover
@@ -83,9 +71,15 @@ def __deepcopy__(self, memo):
8371
raise copy.Error("Copying json_steam objects leads to a bad time")
8472

8573

86-
class PersistentStreamingJSONBase(StreamingJSONBase, ABC):
74+
class PersistentStreamingJSONBase(StreamingJSONBase):
75+
__slots__ = '_data'
76+
8777
def __init__(self, token_stream):
88-
super().__init__(token_stream)
78+
# inlined from super
79+
self.streaming = True
80+
self._stream = token_stream
81+
self._child: Optional[StreamingJSONBase] = None
82+
8983
self._data = self._init_persistent_data()
9084
self._persistent_children = True
9185

@@ -107,9 +101,15 @@ def __repr__(self): # pragma: no cover
107101
return f"<{type(self).__name__}: {repr(self._data)}, {'STREAMING' if self.streaming else 'DONE'}>"
108102

109103

110-
class TransientStreamingJSONBase(StreamingJSONBase, ABC):
104+
class TransientStreamingJSONBase(StreamingJSONBase):
105+
__slots__ = '_started',
106+
111107
def __init__(self, token_stream):
112-
super().__init__(token_stream)
108+
# inlined from super
109+
self.streaming = True
110+
self._stream = token_stream
111+
self._child: Optional[StreamingJSONBase] = None
112+
113113
self._started = False
114114
self._persistent_children = False
115115

@@ -137,29 +137,35 @@ def __repr__(self): # pragma: no cover
137137
return f"<{type(self).__name__}: TRANSIENT, {'STREAMING' if self.streaming else 'DONE'}>"
138138

139139

140-
class StreamingJSONList(StreamingJSONBase, ABC):
140+
class StreamingJSONList(StreamingJSONBase):
141141
INCOMPLETE_ERROR = "Unterminated list at end of file"
142142

143+
__slots__ = ()
144+
143145
def _load_item(self):
144-
token_type, v = next(self._stream)
145-
if token_type == TokenType.OPERATOR:
146+
stream = self._stream
147+
token_type, v = next(stream)
148+
if token_type == OPERATOR:
146149
if v == ']':
147-
self._done()
150+
self.streaming = False
151+
raise StopIteration()
148152
if v == ',':
149-
token_type, v = next(self._stream)
153+
token_type, v = next(stream)
150154
elif v in '{[':
151155
pass
152156
else: # pragma: no cover
153157
raise ValueError(f"Expecting value, comma or ], got {v}")
154-
if token_type == TokenType.OPERATOR:
155-
self._child = v = self.factory(v, self._stream, self._persistent_children)
158+
if token_type == OPERATOR:
159+
self._child = v = factory[self._persistent_children, v](stream)
156160
return v
157161

158162
def _get__iter__(self):
159163
return self._iter_items()
160164

161165

162166
class PersistentStreamingJSONList(PersistentStreamingJSONBase, StreamingJSONList):
167+
__slots__ = ()
168+
163169
def _init_persistent_data(self):
164170
return []
165171

@@ -185,8 +191,16 @@ def __getitem__(self, k) -> Any:
185191

186192

187193
class TransientStreamingJSONList(TransientStreamingJSONBase, StreamingJSONList):
194+
__slots__ = "_index",
195+
188196
def __init__(self, token_stream):
189-
super().__init__(token_stream)
197+
# inlined from super
198+
self.streaming = True
199+
self._stream = token_stream
200+
self._child: Optional[StreamingJSONBase] = None
201+
self._started = False
202+
self._persistent_children = False
203+
190204
self._index = -1
191205

192206
def _load_item(self):
@@ -203,26 +217,29 @@ def _find_item(self, i):
203217
raise IndexError(f"Index {i} out of range")
204218

205219

206-
class StreamingJSONObject(StreamingJSONBase, ABC):
220+
class StreamingJSONObject(StreamingJSONBase):
207221
INCOMPLETE_ERROR = "Unterminated object at end of file"
208222

223+
__slots__ = ()
224+
209225
def _load_item(self):
210-
token_type, k = next(self._stream)
211-
if token_type == TokenType.OPERATOR:
226+
stream = self._stream
227+
token_type, k = next(stream)
228+
if token_type == OPERATOR:
212229
if k == '}':
213-
self._done()
230+
self.streaming = False
231+
raise StopIteration()
214232
if k == ',':
215-
token_type, k = next(self._stream)
216-
if token_type != TokenType.STRING: # pragma: no cover
233+
token_type, k = next(stream)
234+
if token_type != STRING: # pragma: no cover
217235
raise ValueError(f"Expecting string, comma or }}, got {k} ({token_type})")
218236

219-
token_type, token = next(self._stream)
220-
if token_type != TokenType.OPERATOR or token != ":":
237+
if next(stream) != COLON:
221238
raise ValueError("Expecting :") # pragma: no cover
222239

223-
token_type, v = next(self._stream)
224-
if token_type == TokenType.OPERATOR:
225-
self._child = v = self.factory(v, self._stream, self._persistent_children)
240+
token_type, v = next(stream)
241+
if token_type == OPERATOR:
242+
self._child = v = factory[self._persistent_children, v](stream)
226243
return k, v
227244

228245
def _get__iter__(self):
@@ -251,6 +268,8 @@ def get(self, k, default=None) -> Any:
251268

252269

253270
class PersistentStreamingJSONObject(PersistentStreamingJSONBase, StreamingJSONObject):
271+
__slots__ = ()
272+
254273
def _init_persistent_data(self):
255274
return OrderedDict()
256275

@@ -277,6 +296,8 @@ def __getitem__(self, k) -> Any:
277296

278297

279298
class TransientStreamingJSONObject(TransientStreamingJSONBase, StreamingJSONObject):
299+
__slots__ = ()
300+
280301
def _find_item(self, k):
281302
was_started = self._started
282303
try:
@@ -299,3 +320,11 @@ def keys(self):
299320
def values(self):
300321
self._check_started()
301322
return (v for k, v in self._iter_items())
323+
324+
325+
factory = {
326+
(True, '{'): PersistentStreamingJSONObject,
327+
(True, '['): PersistentStreamingJSONList,
328+
(False, '{'): TransientStreamingJSONObject,
329+
(False, '['): TransientStreamingJSONList,
330+
}

src/json_stream/loader.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
1-
from json_stream.base import StreamingJSONBase, TokenType
1+
from json_stream.base import factory
22
from json_stream.iterators import ensure_file
33
from json_stream.select_tokenizer import default_tokenizer
4+
from json_stream.tokenizer import TokenType
45

56

67
def load(fp_or_iterable, persistent=False, tokenizer=default_tokenizer):
78
fp = ensure_file(fp_or_iterable)
89
token_stream = tokenizer(fp)
910
token_type, token = next(token_stream)
1011
if token_type == TokenType.OPERATOR:
11-
return StreamingJSONBase.factory(token, token_stream, persistent)
12+
return factory[persistent, token](token_stream)
1213
return token

src/json_stream/visitor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from json_stream.base import StreamingJSONObject, StreamingJSONList, StreamingJSONBase
1+
from json_stream.base import StreamingJSONObject, StreamingJSONList, factory
22
from json_stream.iterators import ensure_file
33
from json_stream.select_tokenizer import default_tokenizer
44

@@ -23,5 +23,5 @@ def visit(fp_or_iterator, visitor, tokenizer=default_tokenizer):
2323
fp = ensure_file(fp_or_iterator)
2424
token_stream = tokenizer(fp)
2525
_, token = next(token_stream)
26-
obj = StreamingJSONBase.factory(token, token_stream, persistent=False)
26+
obj = factory[False, token](token_stream)
2727
_visit(obj, visitor, ())

0 commit comments

Comments
 (0)