@@ -95,6 +95,14 @@ pub enum Error {
95
95
96
96
impl serde:: de:: StdError for Error { }
97
97
98
+ impl From < crate :: str:: StringUnescapeError > for Error {
99
+ fn from ( error : crate :: str:: StringUnescapeError ) -> Self {
100
+ match error {
101
+ crate :: str:: StringUnescapeError :: InvalidEscapeSequence => Self :: InvalidEscapeSequence ,
102
+ }
103
+ }
104
+ }
105
+
98
106
/// A structure that deserializes Rust values from JSON in a buffer.
99
107
pub struct Deserializer < ' b , ' s > {
100
108
slice : & ' b [ u8 ] ,
@@ -485,89 +493,43 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> {
485
493
where
486
494
V : Visitor < ' de > ,
487
495
{
488
- let s = self . parse_str ( ) ?;
496
+ let escaped_string = self . parse_str ( ) ?;
489
497
490
498
if let Some ( string_unescape_buffer) = self . string_unescape_buffer . as_deref_mut ( ) {
491
- if s . as_bytes ( ) . contains ( & b'\\' ) {
492
- let mut string_unescape_buffer_slots = string_unescape_buffer . iter_mut ( ) ;
499
+ if escaped_string . as_bytes ( ) . contains ( & b'\\' ) {
500
+ let mut string_unescape_buffer_write_position = 0 ;
493
501
494
- // We've already checked that the string is valid UTF-8, so the only b'\\' is the start of escape sequence
495
- let mut escaped_string_bytes = s . as_bytes ( ) . iter ( ) ;
502
+ for fragment in crate :: str :: unescape_fragments ( escaped_string ) {
503
+ let char_encode_buffer = & mut [ 0 ; 4 ] ;
496
504
497
- loop {
498
- match escaped_string_bytes. next ( ) . copied ( ) {
499
- None => break ,
500
- Some ( b'\\' ) => {
501
- let unescaped_byte = match escaped_string_bytes. next ( ) {
502
- Some ( b'"' ) => b'"' ,
503
- Some ( b'\\' ) => b'\\' ,
504
- Some ( b'/' ) => b'/' ,
505
- Some ( b'b' ) => 0x8 ,
506
- Some ( b'f' ) => 0xC ,
507
- Some ( b'n' ) => b'\n' ,
508
- Some ( b'r' ) => b'\r' ,
509
- Some ( b't' ) => b'\t' ,
510
- Some ( b'u' ) => {
511
- // TODO - Replace with `<[u8]>::split_first_chunk::<4>` once MSRV >= 1.77
512
- fn split_first_slice (
513
- bytes : & [ u8 ] ,
514
- len : usize ,
515
- ) -> Option < ( & [ u8 ] , & [ u8 ] ) >
516
- {
517
- Some ( ( bytes. get ( ..len) ?, bytes. get ( len..) ?) )
518
- }
519
-
520
- let ( escape_sequence, remaining_escaped_string_bytes) =
521
- split_first_slice ( escaped_string_bytes. as_slice ( ) , 4 )
522
- . ok_or ( Error :: InvalidEscapeSequence ) ?;
523
-
524
- escaped_string_bytes = remaining_escaped_string_bytes. iter ( ) ;
525
-
526
- let unescaped_char = core:: str:: from_utf8 ( escape_sequence)
527
- . ok ( )
528
- . and_then ( |escape_sequence| {
529
- u32:: from_str_radix ( escape_sequence, 16 ) . ok ( )
530
- } )
531
- . and_then ( char:: from_u32)
532
- . ok_or ( Error :: InvalidEscapeSequence ) ?;
533
-
534
- for & unescaped_byte in
535
- unescaped_char. encode_utf8 ( & mut [ 0 ; 4 ] ) . as_bytes ( )
536
- {
537
- * string_unescape_buffer_slots
538
- . next ( )
539
- . ok_or ( Error :: EscapedStringIsTooLong ) ? = unescaped_byte;
540
- }
541
-
542
- continue ;
543
- }
544
- _ => return Err ( Error :: InvalidEscapeSequence ) ,
545
- } ;
546
-
547
- * string_unescape_buffer_slots
548
- . next ( )
549
- . ok_or ( Error :: EscapedStringIsTooLong ) ? = unescaped_byte;
505
+ let unescaped_bytes = match fragment? {
506
+ crate :: str:: EscapedStringFragment :: NotEscaped ( fragment) => {
507
+ fragment. as_bytes ( )
550
508
}
551
- Some ( c) => {
552
- * string_unescape_buffer_slots
553
- . next ( )
554
- . ok_or ( Error :: EscapedStringIsTooLong ) ? = c;
509
+ crate :: str:: EscapedStringFragment :: Escaped ( c) => {
510
+ c. encode_utf8 ( char_encode_buffer) . as_bytes ( )
555
511
}
556
- }
557
- }
512
+ } ;
513
+
514
+ string_unescape_buffer[ string_unescape_buffer_write_position..]
515
+ . get_mut ( ..unescaped_bytes. len ( ) )
516
+ . ok_or ( Error :: EscapedStringIsTooLong ) ?
517
+ . copy_from_slice ( unescaped_bytes) ;
558
518
559
- let remaining_length = string_unescape_buffer_slots . len ( ) ;
560
- let unescaped_string_length = string_unescape_buffer . len ( ) - remaining_length ;
519
+ string_unescape_buffer_write_position += unescaped_bytes . len ( ) ;
520
+ }
561
521
562
522
visitor. visit_str (
563
- str:: from_utf8 ( & string_unescape_buffer[ ..unescaped_string_length] )
564
- . map_err ( |_| Error :: InvalidUnicodeCodePoint ) ?,
523
+ str:: from_utf8 (
524
+ & string_unescape_buffer[ ..string_unescape_buffer_write_position] ,
525
+ )
526
+ . map_err ( |_| Error :: InvalidUnicodeCodePoint ) ?,
565
527
)
566
528
} else {
567
- visitor. visit_borrowed_str ( s )
529
+ visitor. visit_borrowed_str ( escaped_string )
568
530
}
569
531
} else {
570
- visitor. visit_borrowed_str ( s )
532
+ visitor. visit_borrowed_str ( escaped_string )
571
533
}
572
534
}
573
535
@@ -638,11 +600,34 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> {
638
600
}
639
601
640
602
/// Unsupported. We can’t parse newtypes because we don’t know the underlying type.
641
- fn deserialize_newtype_struct < V > ( self , _name : & ' static str , visitor : V ) -> Result < V :: Value >
603
+ fn deserialize_newtype_struct < V > ( self , name : & ' static str , visitor : V ) -> Result < V :: Value >
642
604
where
643
605
V : Visitor < ' de > ,
644
606
{
645
- visitor. visit_newtype_struct ( self )
607
+ if name == crate :: str:: EscapedStr :: NAME {
608
+ struct EscapedStringDeserializer < ' a , ' de , ' s > ( & ' a mut Deserializer < ' de , ' s > ) ;
609
+
610
+ impl < ' a , ' de , ' s > serde:: Deserializer < ' de > for EscapedStringDeserializer < ' a , ' de , ' s > {
611
+ type Error = Error ;
612
+
613
+ fn deserialize_any < V > ( self , visitor : V ) -> Result < V :: Value >
614
+ where
615
+ V : Visitor < ' de > ,
616
+ {
617
+ visitor. visit_borrowed_str ( self . 0 . parse_str ( ) ?)
618
+ }
619
+
620
+ serde:: forward_to_deserialize_any! {
621
+ bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char str string
622
+ bytes byte_buf option unit unit_struct newtype_struct seq tuple
623
+ tuple_struct map struct enum identifier ignored_any
624
+ }
625
+ }
626
+
627
+ visitor. visit_newtype_struct ( EscapedStringDeserializer ( self ) )
628
+ } else {
629
+ visitor. visit_newtype_struct ( self )
630
+ }
646
631
}
647
632
648
633
fn deserialize_seq < V > ( self , visitor : V ) -> Result < V :: Value >
@@ -1058,6 +1043,14 @@ mod tests {
1058
1043
) ;
1059
1044
}
1060
1045
1046
+ #[ test]
1047
+ fn escaped_str ( ) {
1048
+ assert_eq ! (
1049
+ crate :: from_str( r#""Hello\nWorld""# ) ,
1050
+ Ok ( ( crate :: str :: EscapedStr :: new( r#"Hello\nWorld"# ) . unwrap( ) , 14 ) )
1051
+ ) ;
1052
+ }
1053
+
1061
1054
#[ test]
1062
1055
fn struct_bool ( ) {
1063
1056
#[ derive( Debug , Deserialize , PartialEq ) ]
0 commit comments