Skip to content

Commit e218c3e

Browse files
committed
fix: unicode mask converter
1 parent 7daa578 commit e218c3e

File tree

1 file changed

+15
-7
lines changed

1 file changed

+15
-7
lines changed

include/parser5/parser5.hpp

+15-7
Original file line numberDiff line numberDiff line change
@@ -314,34 +314,42 @@ inline bool parser5<string_t>::unicode::isHexDigit(u8char ch)
314314
template <typename string_t>
315315
inline uint64_t parser5<string_t>::unicode::toUnicode(u8char ch)
316316
{
317-
std::stack<uint8_t> coded;
318317
if (ch == 0) {
319318
return ch;
320319
}
320+
321+
std::stack<uint8_t> coded;
321322
while (ch > 0) {
322323
coded.push(ch & 0xff);
323-
ch = ch >> 8;
324+
ch >>= 8;
324325
}
326+
325327
u8char charcode = 0;
326328
uint8_t t = coded.top();
327329
coded.pop();
328330
if (t < 128) {
329331
return t;
330332
}
331-
uint8_t high_bit_mask = (1 << 6) - 1;
333+
334+
uint8_t high_bit_mask = 0b00111111;
332335
uint8_t high_bit_shift = 0;
333336
int total_bits = 0;
334337
const int other_bits = 6;
338+
335339
while ((t & 0xC0) == 0xC0) {
336340
t <<= 1;
337341
t &= 0xff;
338-
total_bits += 6;
342+
total_bits += other_bits;
339343
high_bit_mask >>= 1;
340344
high_bit_shift++;
341-
charcode <<= other_bits;
342-
charcode |= coded.top() & ((1 << other_bits) - 1);
343-
coded.pop();
345+
346+
if (!coded.empty()) {
347+
charcode <<= other_bits;
348+
charcode |= coded.top() & ((1 << other_bits) - 1);
349+
coded.pop();
350+
}
344351
}
352+
345353
charcode |= static_cast<uint64_t>((t >> high_bit_shift) & high_bit_mask) << total_bits;
346354
return charcode;
347355
}

0 commit comments

Comments
 (0)