Skip to content

Commit 7b6451a

Browse files
authored
fix: ExternalOneByteStringResource is not guaranteed to be valid UTF-8 (#1532)
A subtle unsoundness / undefined behaviour made its way into the fairly recently added ExternalOneByteStringResource object: The as_str API is not sound as the data inside may be be Latin-1, not ASCII. As the API was not used anywhere in deno or deno_core, I opted to simply remove it and replace it with an as_bytes API. I also modified the test to showcase the Latin-1 string case and added copious notes and explanations around the code to make sure this doesn't accidentally happen again. The likely reason why the API originally slipped in is because the OneByteConst has this API where it is safe because the OneByteConst creation checks the data for ASCII-ness. I also tried to add an API to extract an Option<&'static OneByteConst> from an &ExternalOneByteStringResource but run into rust-lang/rust#119618 ie. OneByteConst is actually duplicating the vtables... which is not great.
1 parent 6385bd6 commit 7b6451a

File tree

2 files changed

+31
-21
lines changed

2 files changed

+31
-21
lines changed

src/string.rs

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -135,12 +135,16 @@ pub struct ExternalStringResourceBase(Opaque);
135135
#[repr(C)]
136136
/// An external, one-byte string resource.
137137
/// This corresponds with `v8::String::ExternalOneByteStringResource`.
138+
///
139+
/// Note: The data contained in a one-byte string resource is guaranteed to be
140+
/// Latin-1 data. It is not safe to assume that it is valid UTF-8, as Latin-1
141+
/// only has commonality with UTF-8 in the ASCII range and differs beyond that.
138142
pub struct ExternalOneByteStringResource(Opaque);
139143

140144
impl ExternalOneByteStringResource {
141145
/// Returns a pointer to the data owned by this resource.
142146
/// This pointer is valid as long as the resource is alive.
143-
/// The data is guaranteed to be ASCII.
147+
/// The data is guaranteed to be Latin-1.
144148
pub fn data(&self) -> *const char {
145149
unsafe { v8__ExternalOneByteStringResource__data(self) }
146150
}
@@ -151,23 +155,19 @@ impl ExternalOneByteStringResource {
151155
}
152156

153157
/// Returns the data owned by this resource as a string slice.
154-
/// The data is guaranteed to be ASCII.
155-
pub fn as_str(&self) -> &str {
158+
/// The data is guaranteed to be Latin-1.
159+
pub fn as_bytes(&self) -> &[u8] {
156160
let len = self.length();
157161
if len == 0 {
158-
""
162+
&[]
159163
} else {
160-
// SAFETY: We know this is ASCII and length > 0
161-
unsafe {
162-
std::str::from_utf8_unchecked(std::slice::from_raw_parts(
163-
self.data().cast(),
164-
len,
165-
))
166-
}
164+
// SAFETY: We know this is Latin-1
165+
unsafe { std::slice::from_raw_parts(self.data().cast(), len) }
167166
}
168167
}
169168
}
170169

170+
/// A static ASCII string resource for usage in V8, created at build time.
171171
#[repr(C)]
172172
#[derive(Copy, Clone, Debug)]
173173
pub struct OneByteConst {
@@ -551,6 +551,12 @@ impl String {
551551

552552
/// Compile-time function to create an external string resource which
553553
/// skips the ASCII and length checks.
554+
///
555+
/// ## Safety
556+
///
557+
/// The passed in buffer must contain only ASCII data. Note that while V8
558+
/// allows OneByte string resources to contain Latin-1 data, the OneByteConst
559+
/// struct does not allow it.
554560
#[inline(always)]
555561
pub const unsafe fn create_external_onebyte_const_unchecked(
556562
buffer: &'static [u8],
@@ -563,7 +569,10 @@ impl String {
563569
}
564570

565571
/// Creates a v8::String from a `&'static OneByteConst`
566-
/// which is guaranteed to be Latin-1 or ASCII.
572+
/// which is guaranteed to be ASCII.
573+
///
574+
/// Note that OneByteConst guarantees ASCII even though V8 would allow
575+
/// OneByte string resources to contain Latin-1.
567576
#[inline(always)]
568577
pub fn new_from_onebyte_const<'s>(
569578
scope: &mut HandleScope<'s, ()>,

tests/test_api.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7839,22 +7839,23 @@ fn external_onebyte_string() {
78397839
let isolate = &mut v8::Isolate::new(Default::default());
78407840
let scope = &mut v8::HandleScope::new(isolate);
78417841

7842-
let input = "hello";
7843-
let s = v8::String::new_external_onebyte(
7844-
scope,
7845-
Box::<str>::from(input).into_boxed_bytes(),
7846-
)
7847-
.unwrap();
7842+
// "hello©"
7843+
// Note that we're specifically testing a byte array that is not ASCII nor
7844+
// UTF-8, but is valid Latin-1. V8's one-byte strings accept Latin-1 and we
7845+
// need to remember this detail: It is not safe to access one-byte strings as
7846+
// UTF-8 strings.
7847+
let input = Box::new([b'h', b'e', b'l', b'l', b'o', 0xA9]);
7848+
let s = v8::String::new_external_onebyte(scope, input).unwrap();
78487849

78497850
assert!(s.is_external_onebyte());
7850-
assert_eq!(s.utf8_length(scope), 5);
7851+
assert_eq!(s.utf8_length(scope), 7);
78517852

78527853
let one_byte =
78537854
unsafe { &*s.get_external_onebyte_string_resource().unwrap().as_ptr() };
78547855

7855-
assert_eq!(one_byte.length(), 5);
7856+
assert_eq!(one_byte.length(), 6);
78567857

7857-
assert_eq!(one_byte.as_str(), "hello");
7858+
assert_eq!(one_byte.as_bytes(), [b'h', b'e', b'l', b'l', b'o', 0xA9]);
78587859
}
78597860

78607861
#[test]

0 commit comments

Comments
 (0)