@@ -3233,6 +3233,7 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3233
3233
case '<' :
3234
3234
case '&' :
3235
3235
case '\u0000' :
3236
+ case ';' :
3236
3237
emitOrAppendCharRefBuf (returnState );
3237
3238
if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
3238
3239
cstart = pos ;
@@ -3261,17 +3262,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3261
3262
firstCharKey = c - 'A' ;
3262
3263
} else {
3263
3264
// No match
3264
- /*
3265
- * If no match can be made, then this is a parse
3266
- * error.
3267
- */
3268
- errNoNamedCharacterMatch ();
3269
3265
emitOrAppendCharRefBuf (returnState );
3270
3266
if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
3271
3267
cstart = pos ;
3272
3268
}
3273
3269
reconsume = true ;
3274
- state = transition (state , returnState , reconsume , pos );
3270
+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
3275
3271
continue stateloop ;
3276
3272
}
3277
3273
// Didn't fail yet
@@ -3332,17 +3328,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3332
3328
}
3333
3329
}
3334
3330
if (hilo == 0 ) {
3335
- /*
3336
- * If no match can be made, then this is a parse
3337
- * error.
3338
- */
3339
- errNoNamedCharacterMatch ();
3340
3331
emitOrAppendCharRefBuf (returnState );
3341
3332
if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
3342
3333
cstart = pos ;
3343
3334
}
3344
3335
reconsume = true ;
3345
- state = transition (state , returnState , reconsume , pos );
3336
+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
3346
3337
continue stateloop ;
3347
3338
}
3348
3339
// Didn't fail yet
@@ -3425,16 +3416,12 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3425
3416
3426
3417
if (candidate == -1 ) {
3427
3418
// reconsume deals with CR, LF or nul
3428
- /*
3429
- * If no match can be made, then this is a parse error.
3430
- */
3431
- errNoNamedCharacterMatch ();
3432
3419
emitOrAppendCharRefBuf (returnState );
3433
3420
if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
3434
3421
cstart = pos ;
3435
3422
}
3436
3423
reconsume = true ;
3437
- state = transition (state , returnState , reconsume , pos );
3424
+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
3438
3425
continue stateloop ;
3439
3426
} else {
3440
3427
// c can't be CR, LF or nul if we got here
@@ -3472,10 +3459,9 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3472
3459
* after the U+0026 AMPERSAND (&) must be
3473
3460
* unconsumed, and nothing is returned.
3474
3461
*/
3475
- errNoNamedCharacterMatch ();
3476
3462
appendCharRefBufToStrBuf ();
3477
3463
reconsume = true ;
3478
- state = transition (state , returnState , reconsume , pos );
3464
+ state = transition (state , Tokenizer . AMBIGUOUS_AMPERSAND , reconsume , pos );
3479
3465
continue stateloop ;
3480
3466
}
3481
3467
}
@@ -3538,6 +3524,37 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
3538
3524
* I'm ∉ I tell you.
3539
3525
*/
3540
3526
}
3527
+ // XXX reorder point
3528
+ case AMBIGUOUS_AMPERSAND :
3529
+ /*
3530
+ * Unlike the definition is the spec, we don't consume the
3531
+ * next input character right away when entering this state;
3532
+ * that's because our current implementation differs from
3533
+ * the spec in that we've already consumed the relevant
3534
+ * character *before* entering this state.
3535
+ * Also, our implementation of this state has no looping.
3536
+ * So we never stay in this state; instead, we always
3537
+ * transition out from it back to returnState.
3538
+ */
3539
+ state = returnState ;
3540
+ if (c == ';' ) {
3541
+ errNoNamedCharacterMatch ();
3542
+ continue stateloop ;
3543
+ } else if ((c >= '0' && c <= '9' )
3544
+ || (c >= 'A' && c <= 'Z' )
3545
+ || (c >= 'a' && c <= 'z' )) {
3546
+ appendCharRefBuf (c );
3547
+ emitOrAppendCharRefBuf (returnState );
3548
+ if ((returnState & DATA_AND_RCDATA_MASK ) == 0 ) {
3549
+ cstart = pos + 1 ;
3550
+ }
3551
+ if (++pos == endPos ) {
3552
+ break stateloop ;
3553
+ }
3554
+ c = checkChar (buf , pos );
3555
+ continue stateloop ;
3556
+ }
3557
+ continue stateloop ;
3541
3558
case CONSUME_NCR :
3542
3559
if (++pos == endPos ) {
3543
3560
break stateloop ;
@@ -6632,7 +6649,6 @@ public void eof() throws SAXException {
6632
6649
state = returnState ;
6633
6650
continue ;
6634
6651
case CHARACTER_REFERENCE_HILO_LOOKUP :
6635
- errNoNamedCharacterMatch ();
6636
6652
emitOrAppendCharRefBuf (returnState );
6637
6653
state = returnState ;
6638
6654
continue ;
@@ -6686,10 +6702,6 @@ public void eof() throws SAXException {
6686
6702
}
6687
6703
6688
6704
if (candidate == -1 ) {
6689
- /*
6690
- * If no match can be made, then this is a parse error.
6691
- */
6692
- errNoNamedCharacterMatch ();
6693
6705
emitOrAppendCharRefBuf (returnState );
6694
6706
state = returnState ;
6695
6707
continue eofloop ;
@@ -6727,7 +6739,6 @@ public void eof() throws SAXException {
6727
6739
* after the U+0026 AMPERSAND (&) must be
6728
6740
* unconsumed, and nothing is returned.
6729
6741
*/
6730
- errNoNamedCharacterMatch ();
6731
6742
appendCharRefBufToStrBuf ();
6732
6743
state = returnState ;
6733
6744
continue eofloop ;
0 commit comments