Skip to content

Commit 0141836

Browse files
committed
from_slice and from_str do not attempt to unescape strings
1 parent 4c6de3d commit 0141836

File tree

1 file changed

+100
-97
lines changed

1 file changed

+100
-97
lines changed

src/de/mod.rs

+100-97
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,6 @@ pub enum Error {
7171
/// Invalid String Escape Sequence
7272
InvalidEscapeSequence,
7373

74-
/// Unescaping and Escaped String requires a buffer
75-
EscapedStringRequiresBuffer,
76-
7774
/// Escaped String length exceeds buffer size
7875
EscapedStringIsTooLong,
7976

@@ -102,12 +99,16 @@ impl serde::de::StdError for Error {}
10299
pub struct Deserializer<'b, 's> {
103100
slice: &'b [u8],
104101
index: usize,
105-
string_unescape_buffer: &'s mut [u8],
102+
string_unescape_buffer: Option<&'s mut [u8]>,
106103
}
107104

108105
impl<'a, 's> Deserializer<'a, 's> {
109-
/// Create a new `Deserializer`
110-
pub fn new(slice: &'a [u8], string_unescape_buffer: &'s mut [u8]) -> Deserializer<'a, 's> {
106+
/// Create a new `Deserializer`, optionally with a buffer to use to unescape strings.
107+
/// If not present, strings are not unescaped.
108+
pub fn new(
109+
slice: &'a [u8],
110+
string_unescape_buffer: Option<&'s mut [u8]>,
111+
) -> Deserializer<'a, 's> {
111112
Deserializer {
112113
slice,
113114
index: 0,
@@ -193,6 +194,12 @@ impl<'a, 's> Deserializer<'a, 's> {
193194
}
194195

195196
fn parse_str(&mut self) -> Result<&'a str> {
197+
if self.parse_whitespace().ok_or(Error::EofWhileParsingValue)? == b'"' {
198+
self.eat_char();
199+
} else {
200+
return Err(Error::InvalidType);
201+
}
202+
196203
let start = self.index;
197204
loop {
198205
match self.peek() {
@@ -478,97 +485,89 @@ impl<'a, 'de, 's> de::Deserializer<'de> for &'a mut Deserializer<'de, 's> {
478485
where
479486
V: Visitor<'de>,
480487
{
481-
let peek = self.parse_whitespace().ok_or(Error::EofWhileParsingValue)?;
488+
let s = self.parse_str()?;
482489

483-
match peek {
484-
b'"' => {
485-
self.eat_char();
490+
if let Some(string_unescape_buffer) = self.string_unescape_buffer.as_deref_mut() {
491+
if s.as_bytes().contains(&b'\\') {
492+
let mut string_unescape_buffer_slots = string_unescape_buffer.iter_mut();
486493

487-
let s = self.parse_str()?;
488-
489-
if s.as_bytes().contains(&b'\\') {
490-
let mut string_unescape_buffer_slots = self.string_unescape_buffer.iter_mut();
491-
492-
// We've already checked that the string is valid UTF-8, so the only b'\\' is the start of escape sequence
493-
let mut escaped_string_bytes = s.as_bytes().iter();
494-
495-
loop {
496-
match escaped_string_bytes.next().copied() {
497-
None => break,
498-
Some(b'\\') => {
499-
let unescaped_byte = match escaped_string_bytes.next() {
500-
Some(b'"') => b'"',
501-
Some(b'\\') => b'\\',
502-
Some(b'/') => b'/',
503-
Some(b'b') => 0x8,
504-
Some(b'f') => 0xC,
505-
Some(b'n') => b'\n',
506-
Some(b'r') => b'\r',
507-
Some(b't') => b'\t',
508-
Some(b'u') => {
509-
// TODO - Replace with `<[u8]>::split_first_chunk::<4>` once MSRV >= 1.77
510-
fn split_first_slice(
511-
bytes: &[u8],
512-
len: usize,
513-
) -> Option<(&[u8], &[u8])>
514-
{
515-
Some((bytes.get(..len)?, bytes.get(len..)?))
516-
}
517-
518-
let (escape_sequence, remaining_escaped_string_bytes) =
519-
split_first_slice(escaped_string_bytes.as_slice(), 4)
520-
.ok_or(Error::InvalidEscapeSequence)?;
521-
522-
escaped_string_bytes =
523-
remaining_escaped_string_bytes.iter();
524-
525-
let unescaped_char = core::str::from_utf8(escape_sequence)
526-
.ok()
527-
.and_then(|escape_sequence| {
528-
u32::from_str_radix(escape_sequence, 16).ok()
529-
})
530-
.and_then(char::from_u32)
531-
.ok_or(Error::InvalidEscapeSequence)?;
494+
// We've already checked that the string is valid UTF-8, so the only b'\\' is the start of escape sequence
495+
let mut escaped_string_bytes = s.as_bytes().iter();
496+
497+
loop {
498+
match escaped_string_bytes.next().copied() {
499+
None => break,
500+
Some(b'\\') => {
501+
let unescaped_byte = match escaped_string_bytes.next() {
502+
Some(b'"') => b'"',
503+
Some(b'\\') => b'\\',
504+
Some(b'/') => b'/',
505+
Some(b'b') => 0x8,
506+
Some(b'f') => 0xC,
507+
Some(b'n') => b'\n',
508+
Some(b'r') => b'\r',
509+
Some(b't') => b'\t',
510+
Some(b'u') => {
511+
// TODO - Replace with `<[u8]>::split_first_chunk::<4>` once MSRV >= 1.77
512+
fn split_first_slice(
513+
bytes: &[u8],
514+
len: usize,
515+
) -> Option<(&[u8], &[u8])>
516+
{
517+
Some((bytes.get(..len)?, bytes.get(len..)?))
518+
}
532519

533-
for &unescaped_byte in
534-
unescaped_char.encode_utf8(&mut [0; 4]).as_bytes()
535-
{
536-
*string_unescape_buffer_slots
537-
.next()
538-
.ok_or(Error::EscapedStringIsTooLong)? =
539-
unescaped_byte;
540-
}
520+
let (escape_sequence, remaining_escaped_string_bytes) =
521+
split_first_slice(escaped_string_bytes.as_slice(), 4)
522+
.ok_or(Error::InvalidEscapeSequence)?;
541523

542-
continue;
524+
escaped_string_bytes = remaining_escaped_string_bytes.iter();
525+
526+
let unescaped_char = core::str::from_utf8(escape_sequence)
527+
.ok()
528+
.and_then(|escape_sequence| {
529+
u32::from_str_radix(escape_sequence, 16).ok()
530+
})
531+
.and_then(char::from_u32)
532+
.ok_or(Error::InvalidEscapeSequence)?;
533+
534+
for &unescaped_byte in
535+
unescaped_char.encode_utf8(&mut [0; 4]).as_bytes()
536+
{
537+
*string_unescape_buffer_slots
538+
.next()
539+
.ok_or(Error::EscapedStringIsTooLong)? = unescaped_byte;
543540
}
544-
_ => return Err(Error::InvalidEscapeSequence),
545-
};
546541

547-
*string_unescape_buffer_slots
548-
.next()
549-
.ok_or(Error::EscapedStringIsTooLong)? = unescaped_byte;
550-
}
551-
Some(c) => {
552-
*string_unescape_buffer_slots
553-
.next()
554-
.ok_or(Error::EscapedStringIsTooLong)? = c;
555-
}
542+
continue;
543+
}
544+
_ => return Err(Error::InvalidEscapeSequence),
545+
};
546+
547+
*string_unescape_buffer_slots
548+
.next()
549+
.ok_or(Error::EscapedStringIsTooLong)? = unescaped_byte;
550+
}
551+
Some(c) => {
552+
*string_unescape_buffer_slots
553+
.next()
554+
.ok_or(Error::EscapedStringIsTooLong)? = c;
556555
}
557556
}
557+
}
558558

559-
let remaining_length = string_unescape_buffer_slots.len();
560-
let unescaped_string_length =
561-
self.string_unescape_buffer.len() - remaining_length;
559+
let remaining_length = string_unescape_buffer_slots.len();
560+
let unescaped_string_length = string_unescape_buffer.len() - remaining_length;
562561

563-
visitor.visit_str(
564-
str::from_utf8(&self.string_unescape_buffer[..unescaped_string_length])
565-
.map_err(|_| Error::InvalidUnicodeCodePoint)?,
566-
)
567-
} else {
568-
visitor.visit_borrowed_str(s)
569-
}
562+
visitor.visit_str(
563+
str::from_utf8(&string_unescape_buffer[..unescaped_string_length])
564+
.map_err(|_| Error::InvalidUnicodeCodePoint)?,
565+
)
566+
} else {
567+
visitor.visit_borrowed_str(s)
570568
}
571-
_ => Err(Error::InvalidType),
569+
} else {
570+
visitor.visit_borrowed_str(s)
572571
}
573572
}
574573

@@ -839,11 +838,9 @@ impl fmt::Display for Error {
839838
}
840839
}
841840

842-
/// Deserializes an instance of type `T` from bytes of JSON text, using the provided buffer to unescape strings
843-
/// Returns the value and the number of bytes consumed in the process
844-
pub fn from_slice_escaped<'a, T>(
841+
fn from_slice_maybe_escaped<'a, T>(
845842
v: &'a [u8],
846-
string_unescape_buffer: &mut [u8],
843+
string_unescape_buffer: Option<&mut [u8]>,
847844
) -> Result<(T, usize)>
848845
where
849846
T: de::Deserialize<'a>,
@@ -855,19 +852,25 @@ where
855852
Ok((value, length))
856853
}
857854

855+
/// Deserializes an instance of type `T` from bytes of JSON text, using the provided buffer to unescape strings
856+
/// Returns the value and the number of bytes consumed in the process
857+
pub fn from_slice_escaped<'a, T>(
858+
v: &'a [u8],
859+
string_unescape_buffer: &mut [u8],
860+
) -> Result<(T, usize)>
861+
where
862+
T: de::Deserialize<'a>,
863+
{
864+
from_slice_maybe_escaped(v, Some(string_unescape_buffer))
865+
}
866+
858867
/// Deserializes an instance of type `T` from bytes of JSON text
859868
/// Returns the value and the number of bytes consumed in the process
860869
pub fn from_slice<'a, T>(v: &'a [u8]) -> Result<(T, usize)>
861870
where
862871
T: de::Deserialize<'a>,
863872
{
864-
from_slice_escaped(v, &mut []).map_err(|error| {
865-
if let Error::EscapedStringIsTooLong = error {
866-
Error::EscapedStringRequiresBuffer
867-
} else {
868-
error
869-
}
870-
})
873+
from_slice_maybe_escaped(v, None)
871874
}
872875

873876
/// Deserializes an instance of type T from a string of JSON text, using the provided buffer to unescape strings

0 commit comments

Comments
 (0)