Skip to content

Commit 1eba610

Browse files
committed
add method to ValueView
1 parent 5b2734d commit 1eba610

File tree

1 file changed

+75
-2
lines changed

1 file changed

+75
-2
lines changed

src/string.rs

Lines changed: 75 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -778,7 +778,8 @@ impl String {
778778
}
779779
}
780780

781-
/// Converts a [`crate::String`] to either an owned [`std::string::String`], or a borrowed [`str`], depending on whether it fits into the
781+
/// Converts a [`crate::String`] to either an owned [`std::string::String`],
782+
/// or a borrowed [`str`], depending on whether it fits into the
782783
/// provided buffer.
783784
pub fn to_rust_cow_lossy<'a, const N: usize>(
784785
&self,
@@ -789,7 +790,9 @@ impl String {
789790
let string = unsafe { Local::from_raw(self).unwrap_unchecked() };
790791
let view = ValueView::new(scope, string);
791792
match view.data() {
792-
ValueViewData::OneByte(bytes) => latin1_to_cow_str(bytes, buffer),
793+
ValueViewData::OneByte(bytes) => {
794+
latin1_to_cow_str_always_copy(bytes, buffer)
795+
}
793796
ValueViewData::TwoByte(code_points) => {
794797
wtf16_to_cow_str(code_points, buffer)
795798
}
@@ -861,6 +864,44 @@ fn wtf16_to_string(code_points: &[u16]) -> std::string::String {
861864

862865
#[inline(always)]
863866
fn latin1_to_cow_str<'a, const N: usize>(
867+
bytes: &'a [u8],
868+
buffer: &'a mut [MaybeUninit<u8>; N],
869+
) -> Cow<'a, str> {
870+
if bytes.is_ascii() {
871+
// SAFETY: The string is ASCII, so it's valid UTF-8.
872+
Cow::Borrowed(unsafe { std::str::from_utf8_unchecked(bytes) })
873+
} else if bytes.len() * 2 < N {
874+
// SAFETY: The string is Latin1 - we need to convert to UTF-8. But it
875+
// is short enough to fit into the buffer, because the buffer is at
876+
// least twice as large as the string and any non-ASCII one-byte
877+
// character will be encoded as exactly two bytes in UTF-8.
878+
let written = unsafe {
879+
latin1_to_utf8(
880+
bytes.len(),
881+
bytes.as_ptr(),
882+
buffer.as_mut_ptr() as *mut u8,
883+
)
884+
};
885+
debug_assert!(written <= buffer.len());
886+
887+
// SAFETY: The buffer is filled with valid UTF-8 data.
888+
let str = unsafe {
889+
std::str::from_utf8_unchecked(std::slice::from_raw_parts(
890+
buffer.as_ptr() as *const u8,
891+
written,
892+
))
893+
};
894+
Cow::Borrowed(str)
895+
} else {
896+
// TODO: this could likely be optimized for large strings by using SIMD to
897+
// calculate the length of the resulting string and then allocating once,
898+
// and then converting the string using SIMD.
899+
Cow::Owned(std::string::String::from_utf8_lossy(bytes).into_owned())
900+
}
901+
}
902+
903+
#[inline(always)]
904+
fn latin1_to_cow_str_always_copy<'a, const N: usize>(
864905
bytes: &[u8],
865906
buffer: &'a mut [MaybeUninit<u8>; N],
866907
) -> Cow<'a, str> {
@@ -1145,3 +1186,35 @@ impl<'s> Drop for ValueView<'s> {
11451186
unsafe { v8__String__ValueView__DESTRUCT(self) }
11461187
}
11471188
}
1189+
1190+
impl ValueView<'_> {
1191+
/// Creates a copy of a [`ValueView`] in a [`std::string::String`].
1192+
/// Convenience function not present in the original V8 API.
1193+
pub fn to_rust_string_lossy(&self) -> std::string::String {
1194+
match self.data() {
1195+
ValueViewData::OneByte(bytes) => latin1_to_string(bytes),
1196+
ValueViewData::TwoByte(code_points) => wtf16_to_string(code_points),
1197+
}
1198+
}
1199+
1200+
/// Converts a [`ValueView`] to either an owned [`std::string::String`],
1201+
/// or a borrowed [`str`].
1202+
///
1203+
/// If the [`ValueView`] is an ASCII one-byte string, a reference to the
1204+
/// string is returned and no copies are performed. If the string is not
1205+
/// ASCII, but fits into the provided buffer, it is copied into the buffer
1206+
/// and a reference to the buffer is returned. If the string does not fit
1207+
/// into the buffer, it is copied into a newly allocated
1208+
/// [`std::string::String`] and returned.
1209+
pub fn to_rust_cow_lossy<'a, const N: usize>(
1210+
&'a self,
1211+
buffer: &'a mut [MaybeUninit<u8>; N],
1212+
) -> Cow<'a, str> {
1213+
match self.data() {
1214+
ValueViewData::OneByte(bytes) => latin1_to_cow_str(bytes, buffer),
1215+
ValueViewData::TwoByte(code_points) => {
1216+
wtf16_to_cow_str(code_points, buffer)
1217+
}
1218+
}
1219+
}
1220+
}

0 commit comments

Comments
 (0)