@@ -778,7 +778,8 @@ impl String {
778778 }
779779 }
780780
781- /// Converts a [`crate::String`] to either an owned [`std::string::String`], or a borrowed [`str`], depending on whether it fits into the
781+ /// Converts a [`crate::String`] to either an owned [`std::string::String`],
782+ /// or a borrowed [`str`], depending on whether it fits into the
782783 /// provided buffer.
783784 pub fn to_rust_cow_lossy < ' a , const N : usize > (
784785 & self ,
@@ -789,7 +790,9 @@ impl String {
789790 let string = unsafe { Local :: from_raw ( self ) . unwrap_unchecked ( ) } ;
790791 let view = ValueView :: new ( scope, string) ;
791792 match view. data ( ) {
792- ValueViewData :: OneByte ( bytes) => latin1_to_cow_str ( bytes, buffer) ,
793+ ValueViewData :: OneByte ( bytes) => {
794+ latin1_to_cow_str_always_copy ( bytes, buffer)
795+ }
793796 ValueViewData :: TwoByte ( code_points) => {
794797 wtf16_to_cow_str ( code_points, buffer)
795798 }
@@ -861,6 +864,44 @@ fn wtf16_to_string(code_points: &[u16]) -> std::string::String {
861864
862865#[ inline( always) ]
863866fn latin1_to_cow_str < ' a , const N : usize > (
867+ bytes : & ' a [ u8 ] ,
868+ buffer : & ' a mut [ MaybeUninit < u8 > ; N ] ,
869+ ) -> Cow < ' a , str > {
870+ if bytes. is_ascii ( ) {
871+ // SAFETY: The string is ASCII, so it's valid UTF-8.
872+ Cow :: Borrowed ( unsafe { std:: str:: from_utf8_unchecked ( bytes) } )
873+ } else if bytes. len ( ) * 2 < N {
874+ // SAFETY: The string is Latin1 - we need to convert to UTF-8. But it
875+ // is short enough to fit into the buffer, because the buffer is at
876+ // least twice as large as the string and any non-ASCII one-byte
877+ // character will be encoded as exactly two bytes in UTF-8.
878+ let written = unsafe {
879+ latin1_to_utf8 (
880+ bytes. len ( ) ,
881+ bytes. as_ptr ( ) ,
882+ buffer. as_mut_ptr ( ) as * mut u8 ,
883+ )
884+ } ;
885+ debug_assert ! ( written <= buffer. len( ) ) ;
886+
887+ // SAFETY: The buffer is filled with valid UTF-8 data.
888+ let str = unsafe {
889+ std:: str:: from_utf8_unchecked ( std:: slice:: from_raw_parts (
890+ buffer. as_ptr ( ) as * const u8 ,
891+ written,
892+ ) )
893+ } ;
894+ Cow :: Borrowed ( str)
895+ } else {
896+ // TODO: this could likely be optimized for large strings by using SIMD to
897+ // calculate the length of the resulting string and then allocating once,
898+ // and then converting the string using SIMD.
899+ Cow :: Owned ( std:: string:: String :: from_utf8_lossy ( bytes) . into_owned ( ) )
900+ }
901+ }
902+
903+ #[ inline( always) ]
904+ fn latin1_to_cow_str_always_copy < ' a , const N : usize > (
864905 bytes : & [ u8 ] ,
865906 buffer : & ' a mut [ MaybeUninit < u8 > ; N ] ,
866907) -> Cow < ' a , str > {
@@ -1145,3 +1186,35 @@ impl<'s> Drop for ValueView<'s> {
11451186 unsafe { v8__String__ValueView__DESTRUCT ( self ) }
11461187 }
11471188}
1189+
1190+ impl ValueView < ' _ > {
1191+ /// Creates a copy of a [`ValueView`] in a [`std::string::String`].
1192+ /// Convenience function not present in the original V8 API.
1193+ pub fn to_rust_string_lossy ( & self ) -> std:: string:: String {
1194+ match self . data ( ) {
1195+ ValueViewData :: OneByte ( bytes) => latin1_to_string ( bytes) ,
1196+ ValueViewData :: TwoByte ( code_points) => wtf16_to_string ( code_points) ,
1197+ }
1198+ }
1199+
1200+ /// Converts a [`ValueView`] to either an owned [`std::string::String`],
1201+ /// or a borrowed [`str`].
1202+ ///
1203+ /// If the [`ValueView`] is an ASCII one-byte string, a reference to the
1204+ /// string is returned and no copies are performed. If the string is not
1205+ /// ASCII, but fits into the provided buffer, it is copied into the buffer
1206+ /// and a reference to the buffer is returned. If the string does not fit
1207+ /// into the buffer, it is copied into a newly allocated
1208+ /// [`std::string::String`] and returned.
1209+ pub fn to_rust_cow_lossy < ' a , const N : usize > (
1210+ & ' a self ,
1211+ buffer : & ' a mut [ MaybeUninit < u8 > ; N ] ,
1212+ ) -> Cow < ' a , str > {
1213+ match self . data ( ) {
1214+ ValueViewData :: OneByte ( bytes) => latin1_to_cow_str ( bytes, buffer) ,
1215+ ValueViewData :: TwoByte ( code_points) => {
1216+ wtf16_to_cow_str ( code_points, buffer)
1217+ }
1218+ }
1219+ }
1220+ }
0 commit comments