Skip to content

Commit 97db1f8

Browse files
committed
try to slightly improve true/false/null tokenization for byte-backed parsers
1 parent 841b90d commit 97db1f8

File tree

3 files changed

+136
-54
lines changed

3 files changed

+136
-54
lines changed

src/main/java/com/fasterxml/jackson/core/json/ReaderBasedJsonParser.java

+31-15
Original file line numberDiff line numberDiff line change
@@ -2594,34 +2594,50 @@ private final void _matchNull() throws IOException {
25942594
protected final void _matchToken(String matchStr, int i) throws IOException
25952595
{
25962596
final int len = matchStr.length();
2597+
if ((_inputPtr + len) >= _inputEnd) {
2598+
_matchToken2(matchStr, i);
2599+
return;
2600+
}
25972601

25982602
do {
2599-
if (_inputPtr >= _inputEnd) {
2600-
if (!_loadMore()) {
2601-
_reportInvalidToken(matchStr.substring(0, i));
2602-
}
2603-
}
26042603
if (_inputBuffer[_inputPtr] != matchStr.charAt(i)) {
26052604
_reportInvalidToken(matchStr.substring(0, i));
26062605
}
26072606
++_inputPtr;
26082607
} while (++i < len);
2608+
int ch = _inputBuffer[_inputPtr];
2609+
if (ch >= '0' && ch != ']' && ch != '}') { // expected/allowed chars
2610+
_checkMatchEnd(matchStr, i, ch);
2611+
}
2612+
}
26092613

2610-
// but let's also ensure we either get EOF, or non-alphanum char...
2611-
if (_inputPtr >= _inputEnd) {
2612-
if (!_loadMore()) {
2613-
return;
2614+
private final void _matchToken2(String matchStr, int i) throws IOException
2615+
{
2616+
final int len = matchStr.length();
2617+
do {
2618+
if (((_inputPtr >= _inputEnd) && !_loadMore())
2619+
|| (_inputBuffer[_inputPtr] != matchStr.charAt(i))) {
2620+
_reportInvalidToken(matchStr.substring(0, i));
26142621
}
2615-
}
2616-
char c = _inputBuffer[_inputPtr];
2617-
if (c < '0' || c == ']' || c == '}') { // expected/allowed chars
2622+
++_inputPtr;
2623+
} while (++i < len);
2624+
2625+
// but let's also ensure we either get EOF, or non-alphanum char...
2626+
if (_inputPtr >= _inputEnd && !_loadMore()) {
26182627
return;
26192628
}
2620-
// if Java letter, it's a problem tho
2621-
if (Character.isJavaIdentifierPart(c)) {
2629+
int ch = _inputBuffer[_inputPtr];
2630+
if (ch >= '0' && ch != ']' && ch != '}') { // expected/allowed chars
2631+
_checkMatchEnd(matchStr, i, ch);
2632+
}
2633+
}
2634+
2635+
private final void _checkMatchEnd(String matchStr, int i, int c) throws IOException {
2636+
// but actually only alphanums are problematic
2637+
char ch = (char) c;
2638+
if (Character.isJavaIdentifierPart(ch)) {
26222639
_reportInvalidToken(matchStr.substring(0, i));
26232640
}
2624-
return;
26252641
}
26262642

26272643
/*

src/main/java/com/fasterxml/jackson/core/json/UTF8StreamJsonParser.java

+101-35
Original file line numberDiff line numberDiff line change
@@ -805,15 +805,15 @@ public JsonToken nextToken() throws IOException
805805
t = _parsePosNumber(i);
806806
break;
807807
case 'f':
808-
_matchToken("false", 1);
808+
_matchFalse();
809809
t = JsonToken.VALUE_FALSE;
810810
break;
811811
case 'n':
812-
_matchToken("null", 1);
812+
_matchNull();
813813
t = JsonToken.VALUE_NULL;
814814
break;
815815
case 't':
816-
_matchToken("true", 1);
816+
_matchTrue();
817817
t = JsonToken.VALUE_TRUE;
818818
break;
819819
case '[':
@@ -844,13 +844,13 @@ private final JsonToken _nextTokenNotInObject(int i) throws IOException
844844
_parsingContext = _parsingContext.createChildObjectContext(_tokenInputRow, _tokenInputCol);
845845
return (_currToken = JsonToken.START_OBJECT);
846846
case 't':
847-
_matchToken("true", 1);
847+
_matchTrue();
848848
return (_currToken = JsonToken.VALUE_TRUE);
849849
case 'f':
850-
_matchToken("false", 1);
850+
_matchFalse();
851851
return (_currToken = JsonToken.VALUE_FALSE);
852852
case 'n':
853-
_matchToken("null", 1);
853+
_matchNull();
854854
return (_currToken = JsonToken.VALUE_NULL);
855855
case '-':
856856
return (_currToken = _parseNegNumber());
@@ -1073,15 +1073,15 @@ public String nextFieldName() throws IOException
10731073
t = _parsePosNumber(i);
10741074
break;
10751075
case 'f':
1076-
_matchToken("false", 1);
1076+
_matchFalse();
10771077
t = JsonToken.VALUE_FALSE;
10781078
break;
10791079
case 'n':
1080-
_matchToken("null", 1);
1080+
_matchNull();
10811081
t = JsonToken.VALUE_NULL;
10821082
break;
10831083
case 't':
1084-
_matchToken("true", 1);
1084+
_matchTrue();
10851085
t = JsonToken.VALUE_TRUE;
10861086
break;
10871087
case '[':
@@ -1164,15 +1164,15 @@ private final void _isNextTokenNameYes(int i) throws IOException
11641164
_nextToken = JsonToken.START_OBJECT;
11651165
return;
11661166
case 't':
1167-
_matchToken("true", 1);
1167+
_matchTrue();
11681168
_nextToken = JsonToken.VALUE_TRUE;
11691169
return;
11701170
case 'f':
1171-
_matchToken("false", 1);
1171+
_matchFalse();
11721172
_nextToken = JsonToken.VALUE_FALSE;
11731173
return;
11741174
case 'n':
1175-
_matchToken("null", 1);
1175+
_matchNull();
11761176
_nextToken = JsonToken.VALUE_NULL;
11771177
return;
11781178
case '-':
@@ -1221,15 +1221,15 @@ private final boolean _isNextTokenNameMaybe(int i, SerializableString str) throw
12211221
t = JsonToken.START_OBJECT;
12221222
break;
12231223
case 't':
1224-
_matchToken("true", 1);
1224+
_matchTrue();
12251225
t = JsonToken.VALUE_TRUE;
12261226
break;
12271227
case 'f':
1228-
_matchToken("false", 1);
1228+
_matchFalse();
12291229
t = JsonToken.VALUE_FALSE;
12301230
break;
12311231
case 'n':
1232-
_matchToken("null", 1);
1232+
_matchNull();
12331233
t = JsonToken.VALUE_NULL;
12341234
break;
12351235
case '-':
@@ -2616,17 +2616,15 @@ protected void _skipString() throws IOException
26162616
* Method for handling cases where first non-space character
26172617
* of an expected value token is not legal for standard JSON content.
26182618
*/
2619-
protected JsonToken _handleUnexpectedValue(int c)
2620-
throws IOException
2619+
protected JsonToken _handleUnexpectedValue(int c) throws IOException
26212620
{
26222621
// Most likely an error, unless we are to allow single-quote-strings
26232622
switch (c) {
2624-
/*
2625-
* This check proceeds only if the Feature.ALLOW_MISSING_VALUES is enabled
2626-
* The Check is for missing values. Incase of missing values in an array, the next token will be either ',' or ']'.
2627-
* This case, decrements the already incremented _inputPtr in the buffer in case of comma(,)
2628-
* so that the existing flow goes back to checking the next token which will be comma again and
2629-
* it continues the parsing.
2623+
/* This check proceeds only if `Feature.ALLOW_MISSING_VALUES` is enabled;
2624+
* it is for missing values. In case of missing values in an array the next token
2625+
* will be either ',' or ']'. This case, decrements the already incremented _inputPtr
2626+
* in the buffer in case of comma (`,`) so that the existing flow goes back to checking
2627+
* the next token which will be comma again and it parsing continues.
26302628
* Also the case returns NULL as current token in case of ',' or ']'.
26312629
*/
26322630
case ']':
@@ -2635,13 +2633,12 @@ protected JsonToken _handleUnexpectedValue(int c)
26352633
}
26362634
// fall through
26372635
case ',':
2638-
/* 28-Mar-2016: [core#116]: If Feature.ALLOW_MISSING_VALUES is enabled
2639-
* we may allow "missing values", that is, encountering a trailing
2640-
* comma or closing marker where value would be expected
2641-
*/
2636+
// 28-Mar-2016: [core#116]: If Feature.ALLOW_MISSING_VALUES is enabled
2637+
// we may allow "missing values", that is, encountering a trailing
2638+
// comma or closing marker where value would be expected
26422639
if (isEnabled(Feature.ALLOW_MISSING_VALUES)) {
2643-
_inputPtr--;
2644-
return JsonToken.VALUE_NULL;
2640+
--_inputPtr;
2641+
return JsonToken.VALUE_NULL;
26452642
}
26462643
// fall through
26472644
case '}':
@@ -2684,8 +2681,7 @@ protected JsonToken _handleUnexpectedValue(int c)
26842681
return null;
26852682
}
26862683

2687-
protected JsonToken _handleApos()
2688-
throws IOException
2684+
protected JsonToken _handleApos() throws IOException
26892685
{
26902686
int c = 0;
26912687
// Otherwise almost verbatim copy of _finishString()
@@ -2773,13 +2769,18 @@ protected JsonToken _handleApos()
27732769

27742770
return JsonToken.VALUE_STRING;
27752771
}
2776-
2772+
2773+
/*
2774+
/**********************************************************
2775+
/* Internal methods, well-known token decoding
2776+
/**********************************************************
2777+
*/
2778+
27772779
/**
27782780
* Method called if expected numeric value (due to leading sign) does not
27792781
* look like a number
27802782
*/
2781-
protected JsonToken _handleInvalidNumberStart(int ch, boolean neg)
2782-
throws IOException
2783+
protected JsonToken _handleInvalidNumberStart(int ch, boolean neg) throws IOException
27832784
{
27842785
while (ch == 'I') {
27852786
if (_inputPtr >= _inputEnd) {
@@ -2800,12 +2801,72 @@ protected JsonToken _handleInvalidNumberStart(int ch, boolean neg)
28002801
if (isEnabled(Feature.ALLOW_NON_NUMERIC_NUMBERS)) {
28012802
return resetAsNaN(match, neg ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY);
28022803
}
2803-
_reportError("Non-standard token '"+match+"': enable JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS to allow");
2804+
_reportError("Non-standard token '%s': enable JsonParser.Feature.ALLOW_NON_NUMERIC_NUMBERS to allow",
2805+
match);
28042806
}
28052807
reportUnexpectedNumberChar(ch, "expected digit (0-9) to follow minus sign, for valid numeric value");
28062808
return null;
28072809
}
28082810

2811+
// NOTE: first character already decoded
2812+
protected final void _matchTrue() throws IOException
2813+
{
2814+
int ptr = _inputPtr;
2815+
if ((ptr + 3) < _inputEnd) {
2816+
byte[] buf = _inputBuffer;
2817+
if ((buf[ptr++] == 'r')
2818+
&& (buf[ptr++] == 'u')
2819+
&& (buf[ptr++] == 'e')) {
2820+
int ch = buf[ptr] & 0xFF;
2821+
_inputPtr = ptr;
2822+
if (ch >= INT_0 && ch != INT_RBRACKET && ch != INT_RCURLY) { // expected/allowed chars
2823+
_checkMatchEnd("true", 4, ch);
2824+
}
2825+
return;
2826+
}
2827+
}
2828+
_matchToken2("true", 1);
2829+
}
2830+
2831+
protected final void _matchFalse() throws IOException
2832+
{
2833+
int ptr = _inputPtr;
2834+
if ((ptr + 4) < _inputEnd) {
2835+
byte[] buf = _inputBuffer;
2836+
if ((buf[ptr++] == 'a')
2837+
&& (buf[ptr++] == 'l')
2838+
&& (buf[ptr++] == 's')
2839+
&& (buf[ptr++] == 'e')) {
2840+
int ch = buf[ptr] & 0xFF;
2841+
_inputPtr = ptr;
2842+
if (ch >= INT_0 && ch != INT_RBRACKET && ch != INT_RCURLY) { // expected/allowed chars
2843+
_checkMatchEnd("false", 5, ch);
2844+
}
2845+
return;
2846+
}
2847+
}
2848+
_matchToken2("false", 1);
2849+
}
2850+
2851+
protected final void _matchNull() throws IOException
2852+
{
2853+
int ptr = _inputPtr;
2854+
if ((ptr + 3) < _inputEnd) {
2855+
byte[] buf = _inputBuffer;
2856+
if ((buf[ptr++] == 'u')
2857+
&& (buf[ptr++] == 'l')
2858+
&& (buf[ptr++] == 'l')) {
2859+
int ch = buf[ptr] & 0xFF;
2860+
_inputPtr = ptr;
2861+
if (ch >= INT_0 && ch != INT_RBRACKET && ch != INT_RCURLY) { // expected/allowed chars
2862+
_checkMatchEnd("null", 4, ch);
2863+
}
2864+
return;
2865+
}
2866+
}
2867+
_matchToken2("null", 1);
2868+
}
2869+
28092870
protected final void _matchToken(String matchStr, int i) throws IOException
28102871
{
28112872
final int len = matchStr.length();
@@ -3491,6 +3552,11 @@ private int nextByte() throws IOException
34913552
/**********************************************************
34923553
*/
34933554

3555+
protected void _reportInvalidToken(String matchedPart, int ptr) throws IOException {
3556+
_inputPtr = ptr;
3557+
_reportInvalidToken(matchedPart, "'null', 'true', 'false' or NaN");
3558+
}
3559+
34943560
protected void _reportInvalidToken(String matchedPart) throws IOException {
34953561
_reportInvalidToken(matchedPart, "'null', 'true', 'false' or NaN");
34963562
}

src/test/java/com/fasterxml/jackson/core/json/TestRootValues.java

+4-4
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@ private void _testSimpleNumbers(boolean useStream) throws Exception
2929
jp.close();
3030
}
3131

32-
public void testBrokeanNumber() throws Exception
32+
public void testBrokenNumber() throws Exception
3333
{
34-
_testBrokeanNumber(false);
35-
_testBrokeanNumber(true);
34+
_testBrokenNumber(false);
35+
_testBrokenNumber(true);
3636
}
3737

38-
private void _testBrokeanNumber(boolean useStream) throws Exception
38+
private void _testBrokenNumber(boolean useStream) throws Exception
3939
{
4040
JsonFactory f = new JsonFactory();
4141
final String DOC = "14:89:FD:D3:E7:8C";

0 commit comments

Comments
 (0)