|
7 | 7 | import com.fasterxml.jackson.core.async.ByteArrayFeeder;
|
8 | 8 | import com.fasterxml.jackson.core.async.NonBlockingInputFeeder;
|
9 | 9 | import com.fasterxml.jackson.core.io.IOContext;
|
| 10 | +import com.fasterxml.jackson.core.json.ByteSourceJsonBootstrapper; |
10 | 11 | import com.fasterxml.jackson.core.sym.ByteQuadsCanonicalizer;
|
11 | 12 | import com.fasterxml.jackson.core.util.VersionUtil;
|
12 | 13 |
|
@@ -37,6 +38,29 @@ public class NonBlockingJsonParser
|
37 | 38 | // protected int _inputPtr;
|
38 | 39 | // protected int _inputEnd;
|
39 | 40 |
|
| 41 | + /* |
| 42 | + /********************************************************************** |
| 43 | + /* Location tracking, additional |
| 44 | + /********************************************************************** |
| 45 | + */ |
| 46 | + |
| 47 | + /** |
| 48 | + * Alternate row tracker, used to keep track of position by `\r` marker |
| 49 | + * (whereas <code>_currInputRow</code> tracks `\n`). Used to simplify |
| 50 | + * tracking of linefeeds, assuming that input typically uses various |
| 51 | + * linefeed combinations (`\r`, `\n` or `\r\n`) consistently, in which |
| 52 | + * case we can simply choose max of two row candidates. |
| 53 | + */ |
| 54 | + protected int _currInputRowAlt = 1; |
| 55 | + |
| 56 | + /* |
| 57 | + /********************************************************************** |
| 58 | + /* Other state |
| 59 | + /********************************************************************** |
| 60 | + */ |
| 61 | + |
| 62 | + protected int _currentQuote; |
| 63 | + |
40 | 64 | /*
|
41 | 65 | /**********************************************************************
|
42 | 66 | /* Life-cycle
|
@@ -148,14 +172,14 @@ public JsonToken nextToken() throws IOException
|
148 | 172 |
|
149 | 173 | // No: fresh new token; may or may not have existing one
|
150 | 174 | _numTypesValid = NR_UNKNOWN;
|
151 |
| -// _tokenInputTotal = _currInputProcessed + _inputPtr; |
| 175 | + _tokenInputTotal = _currInputProcessed + _inputPtr; |
152 | 176 | // also: clear any data retained so far
|
153 | 177 | _binaryValue = null;
|
154 | 178 | int ch = _inputBuffer[_inputPtr++];
|
155 | 179 |
|
156 | 180 | switch (_majorState) {
|
157 | 181 | case MAJOR_INITIAL:
|
158 |
| - // TODO: Bootstrapping? BOM? |
| 182 | + return _startDocument(ch); |
159 | 183 |
|
160 | 184 | case MAJOR_ROOT:
|
161 | 185 | return _startValue(ch);
|
@@ -193,12 +217,171 @@ protected final JsonToken _finishToken() throws IOException
|
193 | 217 | /**********************************************************************
|
194 | 218 | */
|
195 | 219 |
|
| 220 | + private final JsonToken _startDocument(int ch) throws IOException |
| 221 | + { |
| 222 | + ch &= 0xFF; |
| 223 | + |
| 224 | + // Very first byte: could be BOM |
| 225 | + if (ch == ByteSourceJsonBootstrapper.UTF8_BOM_1) { |
| 226 | + // !!! TODO |
| 227 | + } |
| 228 | + |
| 229 | + // If not BOM (or we got past it), could be whitespace or comment to skip |
| 230 | + while (ch <= 0x020) { |
| 231 | + if (ch != INT_SPACE) { |
| 232 | + if (ch == INT_LF) { |
| 233 | + ++_currInputRow; |
| 234 | + _currInputRowStart = _inputPtr; |
| 235 | + } else if (ch == INT_CR) { |
| 236 | + ++_currInputRowAlt; |
| 237 | + _currInputRowStart = _inputPtr; |
| 238 | + } else if (ch != INT_TAB) { |
| 239 | + _throwInvalidSpace(ch); |
| 240 | + } |
| 241 | + } |
| 242 | + if (_inputPtr >= _inputEnd) { |
| 243 | + _minorState = MINOR_FIELD_ROOT_GOT_SEPARATOR; |
| 244 | + if (_closed) { |
| 245 | + return null; |
| 246 | + } |
| 247 | + // note: if so, do not even bother changing state |
| 248 | + if (_endOfInput) { // except for this special case |
| 249 | + return _eofAsNextToken(); |
| 250 | + } |
| 251 | + return JsonToken.NOT_AVAILABLE; |
| 252 | + } |
| 253 | + ch = _inputBuffer[_inputPtr++] & 0xFF; |
| 254 | + } |
| 255 | + return _startValue(ch); |
| 256 | + } |
| 257 | + |
| 258 | + /* |
| 259 | + /********************************************************************** |
| 260 | + /* Second-level decoding, value parsing |
| 261 | + /********************************************************************** |
| 262 | + */ |
| 263 | + |
196 | 264 | /**
|
197 | 265 | * Helper method called to detect type of a value token (at any level), and possibly
|
198 | 266 | * decode it if contained in input buffer.
|
199 | 267 | * Note that possible header has been ruled out by caller and is not checked here.
|
200 | 268 | */
|
201 | 269 | private final JsonToken _startValue(int ch) throws IOException
|
| 270 | + { |
| 271 | + if (ch == INT_QUOTE) { |
| 272 | + return _startString(ch); |
| 273 | + } |
| 274 | + switch (ch) { |
| 275 | + case '-': |
| 276 | + return _startNegativeNumber(); |
| 277 | + |
| 278 | + // Should we have separate handling for plus? Although |
| 279 | + // it is not allowed per se, it may be erroneously used, |
| 280 | + // and could be indicate by a more specific error message. |
| 281 | + case '0': |
| 282 | + case '1': |
| 283 | + case '2': |
| 284 | + case '3': |
| 285 | + case '4': |
| 286 | + case '5': |
| 287 | + case '6': |
| 288 | + case '7': |
| 289 | + case '8': |
| 290 | + case '9': |
| 291 | + return _startPositiveNumber(ch); |
| 292 | + case 'f': |
| 293 | + return _startFalseToken(); |
| 294 | + case 'n': |
| 295 | + return _startNullToken(); |
| 296 | + case 't': |
| 297 | + return _startTrueToken(); |
| 298 | + case '[': |
| 299 | + return _startArrayScope(); |
| 300 | + case ']': |
| 301 | + return _closeArrayScope(); |
| 302 | + case '{': |
| 303 | + return _startObjectScope(); |
| 304 | + case '}': |
| 305 | + return _closeObjectScope(); |
| 306 | + default: |
| 307 | + } |
| 308 | + return _startUnexpectedValue(ch); |
| 309 | + } |
| 310 | + |
| 311 | + protected JsonToken _startUnexpectedValue(int ch) throws IOException |
| 312 | + { |
| 313 | + // TODO: Maybe support non-standard tokens that streaming parser does: |
| 314 | + // |
| 315 | + // * NaN |
| 316 | + // * Infinity |
| 317 | + // * Plus-prefix for numbers |
| 318 | + // * Apostrophe for Strings |
| 319 | + |
| 320 | + switch (ch) { |
| 321 | + case '\'': |
| 322 | + return _startString(ch); |
| 323 | + |
| 324 | + case ',': |
| 325 | + // If Feature.ALLOW_MISSING_VALUES is enabled we may allow "missing values", |
| 326 | + // that is, encountering a trailing comma or closing marker where value would be expected |
| 327 | + if (!_parsingContext.inObject() && isEnabled(Feature.ALLOW_MISSING_VALUES)) { |
| 328 | + // Important to "push back" separator, to be consumed before next value; |
| 329 | + // does not lead to infinite loop |
| 330 | + --_inputPtr; |
| 331 | + return _valueComplete(JsonToken.VALUE_NULL); |
| 332 | + } |
| 333 | + break; |
| 334 | + } |
| 335 | + // !!! TODO: maybe try to collect more information for better diagnostics |
| 336 | + _reportUnexpectedChar(ch, "expected a valid value (number, String, array, object, 'true', 'false' or 'null')"); |
| 337 | + return null; |
| 338 | + } |
| 339 | + |
| 340 | + /* |
| 341 | + /********************************************************************** |
| 342 | + /* Second-level decoding, simple tokens |
| 343 | + /********************************************************************** |
| 344 | + */ |
| 345 | + |
| 346 | + protected JsonToken _startFalseToken() throws IOException |
| 347 | + { |
| 348 | + return null; |
| 349 | + } |
| 350 | + |
| 351 | + protected JsonToken _startTrueToken() throws IOException |
| 352 | + { |
| 353 | + return null; |
| 354 | + } |
| 355 | + |
| 356 | + protected JsonToken _startNullToken() throws IOException |
| 357 | + { |
| 358 | + return null; |
| 359 | + } |
| 360 | + |
| 361 | + /* |
| 362 | + /********************************************************************** |
| 363 | + /* Second-level decoding, String decoding |
| 364 | + /********************************************************************** |
| 365 | + */ |
| 366 | + |
| 367 | + protected JsonToken _startString(int q) throws IOException |
| 368 | + { |
| 369 | + _currentQuote = q; |
| 370 | + return null; |
| 371 | + } |
| 372 | + |
| 373 | + /* |
| 374 | + /********************************************************************** |
| 375 | + /* Second-level decoding, String decoding |
| 376 | + /********************************************************************** |
| 377 | + */ |
| 378 | + |
| 379 | + protected JsonToken _startPositiveNumber(int ch) throws IOException |
| 380 | + { |
| 381 | + return null; |
| 382 | + } |
| 383 | + |
| 384 | + protected JsonToken _startNegativeNumber() throws IOException |
202 | 385 | {
|
203 | 386 | return null;
|
204 | 387 | }
|
|
0 commit comments