@@ -57,6 +57,7 @@ class parser
57
57
bool skip_string_literal_with_accel ();
58
58
bool skip_whitespace () noexcept ;
59
59
bool skip_digit ();
60
+ bool skip_unicode_escape (uint16_t & pair_high);
60
61
61
62
private:
62
63
parsing_iter_t _cur;
@@ -420,113 +421,34 @@ inline std::optional<string_t> parser<string_t, parsing_t, accel_traits>::parse_
420
421
}
421
422
switch (*_cur) {
422
423
case ' "' :
423
- result.push_back (' "' );
424
+ result.emplace_back (' "' );
424
425
break ;
425
426
case ' \\ ' :
426
- result.push_back (' \\ ' );
427
+ result.emplace_back (' \\ ' );
427
428
break ;
428
429
case ' /' :
429
- result.push_back (' /' );
430
+ result.emplace_back (' /' );
430
431
break ;
431
432
case ' b' :
432
- result.push_back (' \b ' );
433
+ result.emplace_back (' \b ' );
433
434
break ;
434
435
case ' f' :
435
- result.push_back (' \f ' );
436
+ result.emplace_back (' \f ' );
436
437
break ;
437
438
case ' n' :
438
- result.push_back (' \n ' );
439
+ result.emplace_back (' \n ' );
439
440
break ;
440
441
case ' r' :
441
- result.push_back (' \r ' );
442
+ result.emplace_back (' \r ' );
442
443
break ;
443
444
case ' t' :
444
- result.push_back (' \t ' );
445
+ result.emplace_back (' \t ' );
445
446
break ;
446
- case ' u' : {
447
- uint16_t cp = 0 ;
448
- for (int i = 0 ; i < 4 ; i++) {
449
- ++_cur;
450
- if (_cur == _end) {
451
- return std::nullopt;
452
- }
453
- if (!std::isxdigit (static_cast <unsigned char >(*_cur))) {
454
- return std::nullopt;
455
- }
456
- cp <<= 4 ;
457
- if (' 0' <= *_cur && *_cur <= ' 9' ) {
458
- cp |= *_cur - ' 0' ;
459
- }
460
- else if (' a' <= *_cur && *_cur <= ' f' ) {
461
- cp |= *_cur - ' a' + 10 ;
462
- }
463
- else if (' A' <= *_cur && *_cur <= ' F' ) {
464
- cp |= *_cur - ' A' + 10 ;
465
- }
466
- else {
467
- return std::nullopt;
468
- }
469
- }
470
- uint32_t ext_cp = cp;
471
- uint16_t hi_cp = 0 , lo_cp = 0 ;
472
- if (0xD800 <= cp && cp <= 0xDBFF ) {
473
- if (pair_high) {
474
- return std::nullopt;
475
- } else {
476
- pair_high = cp;
477
- break ;
478
- }
479
- } else if (0xDC00 <= cp && cp <= 0xDFFF ) {
480
- if (!pair_high) {
481
- return std::nullopt;
482
- } else {
483
- ext_cp = (((pair_high - 0xD800 ) << 10 ) | (cp - 0xDC00 )) + 0x10000 ;
484
- hi_cp = pair_high;
485
- lo_cp = cp;
486
- pair_high = 0 ;
487
- }
488
- }
489
- if constexpr (std::is_same_v<typename string_t ::value_type, char >) {
490
- // utf8
491
- if (ext_cp <= 0x7F ) {
492
- result.push_back (static_cast <char >(ext_cp));
493
- }
494
- else if (ext_cp <= 0x7FF ) {
495
- result.push_back (static_cast <char >(((ext_cp >> 6 ) & 0b00011111 ) | 0b11000000u ));
496
- result.push_back (static_cast <char >((ext_cp & 0b00111111 ) | 0b10000000u ));
497
- }
498
- else if (ext_cp <= 0xFFFF ) {
499
- result.push_back (
500
- static_cast <char >(((ext_cp >> 12 ) & 0b00001111 ) | 0b11100000u ));
501
- result.push_back (static_cast <char >(((ext_cp >> 6 ) & 0b00111111 ) | 0b10000000u ));
502
- result.push_back (static_cast <char >((ext_cp & 0b00111111 ) | 0b10000000u ));
503
- } else {
504
- result.push_back (
505
- static_cast <char >(((ext_cp >> 18 ) & 0b00000111 ) | 0b11110000u ));
506
- result.push_back (static_cast <char >(((ext_cp >> 12 ) & 0b00111111 ) | 0b10000000u ));
507
- result.push_back (static_cast <char >(((ext_cp >> 6 ) & 0b00111111 ) | 0b10000000u ));
508
- result.push_back (static_cast <char >((ext_cp & 0b00111111 ) | 0b10000000u ));
509
- }
510
- }
511
- else if constexpr (std::is_same_v<typename string_t ::value_type, wchar_t >) {
512
- if constexpr (sizeof (wchar_t ) == 4 ) {
513
- result.push_back (static_cast <wchar_t >(ext_cp));
514
- } else if constexpr (sizeof (wchar_t ) == 2 ) {
515
- if (ext_cp <= 0xFFFF ) {
516
- result.push_back (static_cast <wchar_t >(ext_cp));
517
- } else {
518
- result.push_back (static_cast <wchar_t >(hi_cp));
519
- result.push_back (static_cast <wchar_t >(lo_cp));
520
- }
521
- } else {
522
- static_assert (!sizeof (typename string_t ::value_type), " Unsupported wchar" );
523
- }
524
- }
525
- else {
526
- static_assert (!sizeof (typename string_t ::value_type), " Unsupported type" );
447
+ case ' u' :
448
+ if (!skip_unicode_escape (pair_high)) {
449
+ return std::nullopt;
527
450
}
528
451
break ;
529
- }
530
452
default :
531
453
// Illegal backslash escape
532
454
return std::nullopt;
@@ -552,6 +474,101 @@ inline std::optional<string_t> parser<string_t, parsing_t, accel_traits>::parse_
552
474
return std::nullopt;
553
475
}
554
476
477
+ template <typename string_t , typename parsing_t , typename accel_traits>
478
+ inline bool parser<string_t , parsing_t , accel_traits>::skip_unicode_escape(uint16_t & pair_high)
479
+ {
480
+ uint16_t cp = 0 ;
481
+ for (int i = 0 ; i < 4 ; ++i) {
482
+ if (++_cur == _end) {
483
+ return false ;
484
+ }
485
+
486
+ if (!std::isxdigit (static_cast <unsigned char >(*_cur))) {
487
+ return false ;
488
+ }
489
+
490
+ cp <<= 4 ;
491
+
492
+ if (' 0' <= *_cur && *_cur <= ' 9' ) {
493
+ cp |= *_cur - ' 0' ;
494
+ }
495
+ else if (' a' <= *_cur && *_cur <= ' f' ) {
496
+ cp |= *_cur - ' a' + 10 ;
497
+ }
498
+ else if (' A' <= *_cur && *_cur <= ' F' ) {
499
+ cp |= *_cur - ' A' + 10 ;
500
+ }
501
+ else {
502
+ return false ;
503
+ }
504
+ }
505
+
506
+ uint32_t ext_cp = cp;
507
+ uint16_t hi_cp = 0 , lo_cp = 0 ;
508
+
509
+ if (0xD800 <= cp && cp <= 0xDBFF ) {
510
+ if (pair_high) {
511
+ return false ;
512
+ }
513
+ pair_high = cp;
514
+ return true ;
515
+ }
516
+
517
+ if (0xDC00 <= cp && cp <= 0xDFFF ) {
518
+ if (!pair_high) {
519
+ return false ;
520
+ }
521
+ ext_cp = (((pair_high - 0xD800 ) << 10 ) | (cp - 0xDC00 )) + 0x10000 ;
522
+ hi_cp = pair_high;
523
+ lo_cp = cp;
524
+ pair_high = 0 ;
525
+ }
526
+
527
+ if constexpr (std::is_same_v<typename string_t ::value_type, char >) {
528
+ // utf8
529
+ if (ext_cp <= 0x7F ) {
530
+ result.emplace_back (static_cast <char >(ext_cp));
531
+ }
532
+ else if (ext_cp <= 0x7FF ) {
533
+ result.emplace_back (static_cast <char >(((ext_cp >> 6 ) & 0b00011111 ) | 0b11000000u ));
534
+ result.emplace_back (static_cast <char >((ext_cp & 0b00111111 ) | 0b10000000u ));
535
+ }
536
+ else if (ext_cp <= 0xFFFF ) {
537
+ result.emplace_back (static_cast <char >(((ext_cp >> 12 ) & 0b00001111 ) | 0b11100000u ));
538
+ result.emplace_back (static_cast <char >(((ext_cp >> 6 ) & 0b00111111 ) | 0b10000000u ));
539
+ result.emplace_back (static_cast <char >((ext_cp & 0b00111111 ) | 0b10000000u ));
540
+ }
541
+ else {
542
+ result.emplace_back (static_cast <char >(((ext_cp >> 18 ) & 0b00000111 ) | 0b11110000u ));
543
+ result.emplace_back (static_cast <char >(((ext_cp >> 12 ) & 0b00111111 ) | 0b10000000u ));
544
+ result.emplace_back (static_cast <char >(((ext_cp >> 6 ) & 0b00111111 ) | 0b10000000u ));
545
+ result.emplace_back (static_cast <char >((ext_cp & 0b00111111 ) | 0b10000000u ));
546
+ }
547
+ }
548
+ else if constexpr (std::is_same_v<typename string_t ::value_type, wchar_t >) {
549
+ if constexpr (sizeof (wchar_t ) == 4 ) {
550
+ result.emplace_back (static_cast <wchar_t >(ext_cp));
551
+ }
552
+ else if constexpr (sizeof (wchar_t ) == 2 ) {
553
+ if (ext_cp <= 0xFFFF ) {
554
+ result.emplace_back (static_cast <wchar_t >(ext_cp));
555
+ }
556
+ else {
557
+ result.emplace_back (static_cast <wchar_t >(hi_cp));
558
+ result.emplace_back (static_cast <wchar_t >(lo_cp));
559
+ }
560
+ }
561
+ else {
562
+ static_assert (!sizeof (typename string_t ::value_type), " Unsupported wchar" );
563
+ }
564
+ }
565
+ else {
566
+ static_assert (!sizeof (typename string_t ::value_type), " Unsupported type" );
567
+ }
568
+
569
+ return true ;
570
+ }
571
+
555
572
template <typename string_t , typename parsing_t , typename accel_traits>
556
573
inline bool parser<string_t , parsing_t , accel_traits>::skip_string_literal_with_accel()
557
574
{
0 commit comments