@@ -1493,12 +1493,22 @@ public boolean tokenizeBuffer(UTF16Buffer buffer) throws SAXException {
14931493 */
14941494 // CPPONLY: if (mViewSource) {
14951495 // CPPONLY: mViewSource.SetBuffer(buffer);
1496- // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1496+ // CPPONLY: if (htmlaccel_enabled()) {
1497+ // CPPONLY: pos = StateLoopViewSourceSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1498+ // CPPONLY: } else {
1499+ // CPPONLY: pos = StateLoopViewSourceALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1500+ // CPPONLY: }
14971501 // CPPONLY: mViewSource.DropBuffer((pos == buffer.getEnd()) ? pos : pos + 1);
14981502 // CPPONLY: } else if (tokenHandler.WantsLineAndColumn()) {
1499- // CPPONLY: pos = stateLoop(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1503+ // CPPONLY: if (htmlaccel_enabled()) {
1504+ // CPPONLY: pos = StateLoopLineColSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1505+ // CPPONLY: } else {
1506+ // CPPONLY: pos = StateLoopLineColALU(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1507+ // CPPONLY: }
1508+ // CPPONLY: } else if (htmlaccel_enabled() && ((buffer.getEnd() - pos) >= 32)) {
1509+ // CPPONLY: pos = StateLoopFastestSIMD(state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
15001510 // CPPONLY: } else {
1501- // CPPONLY: pos = stateLoop (state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
1511+ // CPPONLY: pos = StateLoopFastestALU (state, c, pos, buffer.getBuffer(), false, returnState, buffer.getEnd());
15021512 // CPPONLY: }
15031513 // [NOCPP[
15041514 pos = stateLoop (state , c , pos , buffer .getBuffer (), false , returnState ,
@@ -1623,54 +1633,118 @@ private void ensureBufferSpace(int inputLength) throws SAXException {
16231633 switch (state ) {
16241634 case DATA :
16251635 dataloop : for (;;) {
1636+ // Ideally this reconsume block would be a separate state, DATA_RECONSUME above this one
1637+ // with fallthrough into this state. However, such a change would be disruptive to
1638+ // TransitionHandler and everything that works with returnState.
16261639 if (reconsume ) {
16271640 reconsume = false ;
1628- } else {
1629- if (++pos == endPos ) {
1630- break stateloop ;
1641+ // This is a manual copy of the switch below with break/continue
1642+ // adjusted as relevant. Make sure to keep in sync with the switch below!
1643+ switch (c ) {
1644+ case '&' :
1645+ /*
1646+ * U+0026 AMPERSAND (&) Switch to the character
1647+ * reference in data state.
1648+ */
1649+ flushChars (buf , pos );
1650+ assert charRefBufLen == 0 : "charRefBufLen not reset after previous use!" ;
1651+ appendCharRefBuf (c );
1652+ setAdditionalAndRememberAmpersandLocation ('\u0000' );
1653+ returnState = state ;
1654+ state = transition (state , Tokenizer .CONSUME_CHARACTER_REFERENCE , reconsume , pos );
1655+ continue stateloop ;
1656+ case '<' :
1657+ /*
1658+ * U+003C LESS-THAN SIGN (<) Switch to the tag
1659+ * open state.
1660+ */
1661+ flushChars (buf , pos );
1662+
1663+ state = transition (state , Tokenizer .TAG_OPEN , reconsume , pos );
1664+ // `break` optimizes; `continue stateloop;` would be valid
1665+ break dataloop ;
1666+ case '\u0000' :
1667+ maybeEmitReplacementCharacter (buf , pos );
1668+ break ;
1669+ case '\r' :
1670+ emitCarriageReturn (buf , pos );
1671+ break stateloop ;
1672+ case '\n' :
1673+ silentLineFeed ();
1674+ // CPPONLY: MOZ_FALLTHROUGH;
1675+ default :
1676+ /*
1677+ * Anything else Emit the input character as a
1678+ * character token.
1679+ *
1680+ * Stay in the data state.
1681+ */
1682+ break ;
16311683 }
1632- c = checkChar (buf , pos );
16331684 }
1634- switch (c ) {
1635- case '&' :
1636- /*
1637- * U+0026 AMPERSAND (&) Switch to the character
1638- * reference in data state.
1639- */
1640- flushChars (buf , pos );
1641- assert charRefBufLen == 0 : "charRefBufLen not reset after previous use!" ;
1642- appendCharRefBuf (c );
1643- setAdditionalAndRememberAmpersandLocation ('\u0000' );
1644- returnState = state ;
1645- state = transition (state , Tokenizer .CONSUME_CHARACTER_REFERENCE , reconsume , pos );
1646- continue stateloop ;
1647- case '<' :
1648- /*
1649- * U+003C LESS-THAN SIGN (<) Switch to the tag
1650- * open state.
1651- */
1652- flushChars (buf , pos );
1653-
1654- state = transition (state , Tokenizer .TAG_OPEN , reconsume , pos );
1655- // `break` optimizes; `continue stateloop;` would be valid
1656- break dataloop ;
1657- case '\u0000' :
1658- maybeEmitReplacementCharacter (buf , pos );
1659- continue ;
1660- case '\r' :
1661- emitCarriageReturn (buf , pos );
1662- break stateloop ;
1663- case '\n' :
1664- silentLineFeed ();
1665- // CPPONLY: MOZ_FALLTHROUGH;
1666- default :
1667- /*
1668- * Anything else Emit the input character as a
1669- * character token.
1670- *
1671- * Stay in the data state.
1672- */
1673- continue ;
1685+ datamiddle : for (;;) {
1686+ ++pos ;
1687+ // Perhaps at some point, it will be appropriate to do SIMD in Java, but not today.
1688+ // The line below advances pos by some number of code units that this state is indifferent to.
1689+ // CPPONLY: pos += accelerateData(buf, pos, endPos);
1690+ for (;;) {
1691+ if (pos == endPos ) {
1692+ break stateloop ;
1693+ }
1694+ c = checkChar (buf , pos );
1695+ // Make sure to keep in sync with the switch above in the reconsume block!
1696+ switch (c ) {
1697+ case '&' :
1698+ /*
1699+ * U+0026 AMPERSAND (&) Switch to the character
1700+ * reference in data state.
1701+ */
1702+ flushChars (buf , pos );
1703+ assert charRefBufLen == 0 : "charRefBufLen not reset after previous use!" ;
1704+ appendCharRefBuf (c );
1705+ setAdditionalAndRememberAmpersandLocation ('\u0000' );
1706+ returnState = state ;
1707+ state = transition (state , Tokenizer .CONSUME_CHARACTER_REFERENCE , reconsume , pos );
1708+ continue stateloop ;
1709+ case '<' :
1710+ /*
1711+ * U+003C LESS-THAN SIGN (<) Switch to the tag
1712+ * open state.
1713+ */
1714+ flushChars (buf , pos );
1715+
1716+ state = transition (state , Tokenizer .TAG_OPEN , reconsume , pos );
1717+ // `break` optimizes; `continue stateloop;` would be valid
1718+ break dataloop ;
1719+ case '\u0000' :
1720+ maybeEmitReplacementCharacter (buf , pos );
1721+ // Climb back to the SIMD path.
1722+ continue datamiddle ;
1723+ case '\r' :
1724+ emitCarriageReturn (buf , pos );
1725+ break stateloop ;
1726+ case '\n' :
1727+ silentLineFeed ();
1728+ // Climb back to the SIMD path.
1729+ continue datamiddle ;
1730+ default :
1731+ /*
1732+ * Anything else Emit the input character as a
1733+ * character token.
1734+ *
1735+ * Stay in the data state.
1736+ */
1737+ // Don't go back to SIMD. We have less than a SIMD
1738+ // stride to go if we come here in the SIMD case with
1739+ // the fastest loop policy. With other policies, we
1740+ // can come here due to a non-BMP character, in which
1741+ // case we stay on the ALU path until the end of the
1742+ // line.
1743+ // We need to increment pos!
1744+ ++pos ;
1745+ continue ;
1746+ }
1747+ }
16741748 }
16751749 }
16761750 // CPPONLY: MOZ_FALLTHROUGH;
0 commit comments