Skip to content

Commit 8e8ed3e

Browse files
committed
Some skeletal work for non-blocking parsing
1 parent 81fb43c commit 8e8ed3e

File tree

3 files changed

+273
-9
lines changed

3 files changed

+273
-9
lines changed

src/main/java/com/fasterxml/jackson/core/json/ByteSourceJsonBootstrapper.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
*/
1919
public final class ByteSourceJsonBootstrapper
2020
{
21-
final static byte UTF8_BOM_1 = (byte) 0xEF;
22-
final static byte UTF8_BOM_2 = (byte) 0xBB;
23-
final static byte UTF8_BOM_3 = (byte) 0xBF;
21+
public final static byte UTF8_BOM_1 = (byte) 0xEF;
22+
public final static byte UTF8_BOM_2 = (byte) 0xBB;
23+
public final static byte UTF8_BOM_3 = (byte) 0xBF;
2424

2525
/*
2626
/**********************************************************

src/main/java/com/fasterxml/jackson/core/json/async/NonBlockingJsonParser.java

+185-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
import com.fasterxml.jackson.core.async.ByteArrayFeeder;
88
import com.fasterxml.jackson.core.async.NonBlockingInputFeeder;
99
import com.fasterxml.jackson.core.io.IOContext;
10+
import com.fasterxml.jackson.core.json.ByteSourceJsonBootstrapper;
1011
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
1112
import com.fasterxml.jackson.core.util.VersionUtil;
1213

@@ -37,6 +38,29 @@ public class NonBlockingJsonParser
3738
// protected int _inputPtr;
3839
// protected int _inputEnd;
3940

41+
/*
42+
/**********************************************************************
43+
/* Location tracking, additional
44+
/**********************************************************************
45+
*/
46+
47+
/**
48+
* Alternate row tracker, used to keep track of position by `\r` marker
49+
* (whereas <code>_currInputRow</code> tracks `\n`). Used to simplify
50+
* tracking of linefeeds, assuming that input typically uses various
51+
* linefeed combinations (`\r`, `\n` or `\r\n`) consistently, in which
52+
* case we can simply choose max of two row candidates.
53+
*/
54+
protected int _currInputRowAlt = 1;
55+
56+
/*
57+
/**********************************************************************
58+
/* Other state
59+
/**********************************************************************
60+
*/
61+
62+
protected int _currentQuote;
63+
4064
/*
4165
/**********************************************************************
4266
/* Life-cycle
@@ -148,14 +172,14 @@ public JsonToken nextToken() throws IOException
148172

149173
// No: fresh new token; may or may not have existing one
150174
_numTypesValid = NR_UNKNOWN;
151-
// _tokenInputTotal = _currInputProcessed + _inputPtr;
175+
_tokenInputTotal = _currInputProcessed + _inputPtr;
152176
// also: clear any data retained so far
153177
_binaryValue = null;
154178
int ch = _inputBuffer[_inputPtr++];
155179

156180
switch (_majorState) {
157181
case MAJOR_INITIAL:
158-
// TODO: Bootstrapping? BOM?
182+
return _startDocument(ch);
159183

160184
case MAJOR_ROOT:
161185
return _startValue(ch);
@@ -193,12 +217,171 @@ protected final JsonToken _finishToken() throws IOException
193217
/**********************************************************************
194218
*/
195219

220+
private final JsonToken _startDocument(int ch) throws IOException
221+
{
222+
ch &= 0xFF;
223+
224+
// Very first byte: could be BOM
225+
if (ch == ByteSourceJsonBootstrapper.UTF8_BOM_1) {
226+
// !!! TODO
227+
}
228+
229+
// If not BOM (or we got past it), could be whitespace or comment to skip
230+
while (ch <= 0x020) {
231+
if (ch != INT_SPACE) {
232+
if (ch == INT_LF) {
233+
++_currInputRow;
234+
_currInputRowStart = _inputPtr;
235+
} else if (ch == INT_CR) {
236+
++_currInputRowAlt;
237+
_currInputRowStart = _inputPtr;
238+
} else if (ch != INT_TAB) {
239+
_throwInvalidSpace(ch);
240+
}
241+
}
242+
if (_inputPtr >= _inputEnd) {
243+
_minorState = MINOR_FIELD_ROOT_GOT_SEPARATOR;
244+
if (_closed) {
245+
return null;
246+
}
247+
// note: if so, do not even bother changing state
248+
if (_endOfInput) { // except for this special case
249+
return _eofAsNextToken();
250+
}
251+
return JsonToken.NOT_AVAILABLE;
252+
}
253+
ch = _inputBuffer[_inputPtr++] & 0xFF;
254+
}
255+
return _startValue(ch);
256+
}
257+
258+
/*
259+
/**********************************************************************
260+
/* Second-level decoding, value parsing
261+
/**********************************************************************
262+
*/
263+
196264
/**
197265
* Helper method called to detect type of a value token (at any level), and possibly
198266
* decode it if contained in input buffer.
199267
* Note that possible header has been ruled out by caller and is not checked here.
200268
*/
201269
private final JsonToken _startValue(int ch) throws IOException
270+
{
271+
if (ch == INT_QUOTE) {
272+
return _startString(ch);
273+
}
274+
switch (ch) {
275+
case '-':
276+
return _startNegativeNumber();
277+
278+
// Should we have separate handling for plus? Although
279+
// it is not allowed per se, it may be erroneously used,
280+
// and could be indicate by a more specific error message.
281+
case '0':
282+
case '1':
283+
case '2':
284+
case '3':
285+
case '4':
286+
case '5':
287+
case '6':
288+
case '7':
289+
case '8':
290+
case '9':
291+
return _startPositiveNumber(ch);
292+
case 'f':
293+
return _startFalseToken();
294+
case 'n':
295+
return _startNullToken();
296+
case 't':
297+
return _startTrueToken();
298+
case '[':
299+
return _startArrayScope();
300+
case ']':
301+
return _closeArrayScope();
302+
case '{':
303+
return _startObjectScope();
304+
case '}':
305+
return _closeObjectScope();
306+
default:
307+
}
308+
return _startUnexpectedValue(ch);
309+
}
310+
311+
protected JsonToken _startUnexpectedValue(int ch) throws IOException
312+
{
313+
// TODO: Maybe support non-standard tokens that streaming parser does:
314+
//
315+
// * NaN
316+
// * Infinity
317+
// * Plus-prefix for numbers
318+
// * Apostrophe for Strings
319+
320+
switch (ch) {
321+
case '\'':
322+
return _startString(ch);
323+
324+
case ',':
325+
// If Feature.ALLOW_MISSING_VALUES is enabled we may allow "missing values",
326+
// that is, encountering a trailing comma or closing marker where value would be expected
327+
if (!_parsingContext.inObject() && isEnabled(Feature.ALLOW_MISSING_VALUES)) {
328+
// Important to "push back" separator, to be consumed before next value;
329+
// does not lead to infinite loop
330+
--_inputPtr;
331+
return _valueComplete(JsonToken.VALUE_NULL);
332+
}
333+
break;
334+
}
335+
// !!! TODO: maybe try to collect more information for better diagnostics
336+
_reportUnexpectedChar(ch, "expected a valid value (number, String, array, object, 'true', 'false' or 'null')");
337+
return null;
338+
}
339+
340+
/*
341+
/**********************************************************************
342+
/* Second-level decoding, simple tokens
343+
/**********************************************************************
344+
*/
345+
346+
protected JsonToken _startFalseToken() throws IOException
347+
{
348+
return null;
349+
}
350+
351+
protected JsonToken _startTrueToken() throws IOException
352+
{
353+
return null;
354+
}
355+
356+
protected JsonToken _startNullToken() throws IOException
357+
{
358+
return null;
359+
}
360+
361+
/*
362+
/**********************************************************************
363+
/* Second-level decoding, String decoding
364+
/**********************************************************************
365+
*/
366+
367+
protected JsonToken _startString(int q) throws IOException
368+
{
369+
_currentQuote = q;
370+
return null;
371+
}
372+
373+
/*
374+
/**********************************************************************
375+
/* Second-level decoding, String decoding
376+
/**********************************************************************
377+
*/
378+
379+
protected JsonToken _startPositiveNumber(int ch) throws IOException
380+
{
381+
return null;
382+
}
383+
384+
protected JsonToken _startNegativeNumber() throws IOException
202385
{
203386
return null;
204387
}

src/main/java/com/fasterxml/jackson/core/json/async/NonBlockingJsonParserBase.java

+85-4
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import com.fasterxml.jackson.core.*;
77
import com.fasterxml.jackson.core.base.ParserBase;
88
import com.fasterxml.jackson.core.io.IOContext;
9+
import com.fasterxml.jackson.core.json.JsonReadContext;
910
import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
1011

1112
/**
@@ -42,12 +43,29 @@ public abstract class NonBlockingJsonParserBase
4243
* is forthcoming AND we have exhausted all the input
4344
*/
4445
protected final static int MAJOR_CLOSED = 5;
45-
46-
// // // "Sub-states"
4746

48-
protected final static int MINOR_FIELD_NAME = 1;
47+
/*
48+
/**********************************************************************
49+
/* Minor state constants
50+
/**********************************************************************
51+
*/
4952

50-
protected final static int MINOR_VALUE_NUMBER = 6;
53+
/**
54+
* State between root-level value, waiting for at least one white-space
55+
* character as separator
56+
*/
57+
protected final static int MINOR_FIELD_ROOT_NEED_SEPARATOR = 1;
58+
59+
/**
60+
* State between root-level value, having processed at least one white-space
61+
* character, and expecting either more, start of a value, or end of input
62+
* stream.
63+
*/
64+
protected final static int MINOR_FIELD_ROOT_GOT_SEPARATOR = 2;
65+
66+
protected final static int MINOR_FIELD_NAME = 10;
67+
68+
protected final static int MINOR_VALUE_NUMBER = 11;
5169

5270
protected final static int MINOR_VALUE_STRING = 15;
5371

@@ -148,6 +166,7 @@ public NonBlockingJsonParserBase(IOContext ctxt, int parserFeatures,
148166

149167
_currToken = null;
150168
_majorState = MAJOR_INITIAL;
169+
_majorStateAfterValue = MAJOR_ROOT;
151170
}
152171

153172
@Override
@@ -350,6 +369,68 @@ public int readBinaryValue(Base64Variant b64variant, OutputStream out)
350369
return _binaryValue.length;
351370
}
352371

372+
/*
373+
/**********************************************************************
374+
/* Handling of nested scope, state
375+
/**********************************************************************
376+
*/
377+
378+
protected final JsonToken _startArrayScope() throws IOException
379+
{
380+
_parsingContext = _parsingContext.createChildArrayContext(-1, -1);
381+
_majorState = MAJOR_ARRAY_ELEMENT;
382+
_majorStateAfterValue = MAJOR_ARRAY_ELEMENT;
383+
return (_currToken = JsonToken.START_ARRAY);
384+
}
385+
386+
protected final JsonToken _startObjectScope() throws IOException
387+
{
388+
_parsingContext = _parsingContext.createChildObjectContext(-1, -1);
389+
_majorState = MAJOR_OBJECT_FIELD;
390+
_majorStateAfterValue = MAJOR_OBJECT_FIELD;
391+
return (_currToken = JsonToken.START_OBJECT);
392+
}
393+
394+
protected final JsonToken _closeArrayScope() throws IOException
395+
{
396+
if (!_parsingContext.inArray()) {
397+
_reportMismatchedEndMarker(']', '}');
398+
}
399+
JsonReadContext ctxt = _parsingContext.getParent();
400+
_parsingContext = ctxt;
401+
int st;
402+
if (ctxt.inObject()) {
403+
st = MAJOR_OBJECT_FIELD;
404+
} else if (ctxt.inArray()) {
405+
st = MAJOR_ARRAY_ELEMENT;
406+
} else {
407+
st = MAJOR_ROOT;
408+
}
409+
_majorState = st;
410+
_majorStateAfterValue = st;
411+
return (_currToken = JsonToken.END_ARRAY);
412+
}
413+
414+
protected final JsonToken _closeObjectScope() throws IOException
415+
{
416+
if (!_parsingContext.inObject()) {
417+
_reportMismatchedEndMarker('}', ']');
418+
}
419+
JsonReadContext ctxt = _parsingContext.getParent();
420+
_parsingContext = ctxt;
421+
int st;
422+
if (ctxt.inObject()) {
423+
st = MAJOR_OBJECT_FIELD;
424+
} else if (ctxt.inArray()) {
425+
st = MAJOR_ARRAY_ELEMENT;
426+
} else {
427+
st = MAJOR_ROOT;
428+
}
429+
_majorState = st;
430+
_majorStateAfterValue = st;
431+
return (_currToken = JsonToken.END_OBJECT);
432+
}
433+
353434
/*
354435
/**********************************************************************
355436
/* Internal methods, field name parsing

0 commit comments

Comments
 (0)