|
1 | | -use std::ptr; |
2 | 1 |
|
3 | 2 | pub fn utf8_to_utf32(buf: &[u8]) -> Vec<u32> { |
4 | 3 | let mut pos = 0; |
5 | 4 | let len = buf.len(); |
6 | 5 | let mut output = Vec::with_capacity(utf8_to_utf32_length(buf)); // Use exact capacity for better performance |
7 | 6 |
|
8 | 7 | while pos < len { |
| 8 | + // Optimized ASCII fast path similar to C++ implementation |
9 | 9 | if pos + 16 <= len { |
10 | | - let mut v1: u64 = 0; |
11 | | - let mut v2: u64 = 0; |
12 | | - unsafe { |
13 | | - ptr::copy_nonoverlapping( |
14 | | - buf.as_ptr().add(pos), |
15 | | - (&mut v1 as *mut u64) as *mut u8, |
16 | | - 8, |
17 | | - ); |
18 | | - ptr::copy_nonoverlapping( |
19 | | - buf.as_ptr().add(pos + 8), |
20 | | - (&mut v2 as *mut u64) as *mut u8, |
21 | | - 8, |
22 | | - ); |
23 | | - } |
24 | | - let v = v1 | v2; |
25 | | - if (v & 0x8080808080808080) == 0 { |
26 | | - let final_pos = pos + 16; |
27 | | - while pos < final_pos { |
28 | | - output.push(buf[pos] as u32); |
29 | | - pos += 1; |
30 | | - } |
| 10 | + let chunk = unsafe { std::slice::from_raw_parts(buf.as_ptr().add(pos), 16) }; |
| 11 | + |
| 12 | + // Check if all 16 bytes are ASCII using efficient OR operation |
| 13 | + let mut ascii_check = 0u8; |
| 14 | + for &byte in chunk { |
| 15 | + ascii_check |= byte; |
| 16 | + } |
| 17 | + |
| 18 | + if ascii_check < 0x80 { |
| 19 | + // All ASCII - efficient bulk copy |
| 20 | + output.extend(chunk.iter().map(|&b| b as u32)); |
| 21 | + pos += 16; |
31 | 22 | continue; |
32 | 23 | } |
33 | 24 | } |
@@ -130,19 +121,20 @@ pub fn utf32_to_utf8(buf: &[u32]) -> Vec<u8> { |
130 | 121 | let mut output = Vec::with_capacity(utf8_length_from_utf32(buf)); // Use exact capacity for better performance |
131 | 122 |
|
132 | 123 | while pos < len { |
133 | | - if pos + 2 <= len { |
134 | | - let mut v: u64 = 0; |
135 | | - unsafe { |
136 | | - ptr::copy_nonoverlapping( |
137 | | - buf.as_ptr().add(pos), |
138 | | - (&mut v as *mut u64) as *mut u32, |
139 | | - 2, |
140 | | - ); |
141 | | - } |
142 | | - if (v & 0xFFFFFF80FFFFFF80) == 0 { |
143 | | - output.push(buf[pos] as u8); |
144 | | - output.push(buf[pos + 1] as u8); |
145 | | - pos += 2; |
| 124 | + // ASCII fast path for multiple codepoints |
| 125 | + if pos + 4 <= len { |
| 126 | + let chunk = unsafe { std::slice::from_raw_parts(buf.as_ptr().add(pos), 4) }; |
| 127 | + |
| 128 | + // Check if all 4 codepoints are ASCII |
| 129 | + let mut ascii_check = 0u32; |
| 130 | + for &cp in chunk { |
| 131 | + ascii_check |= cp; |
| 132 | + } |
| 133 | + |
| 134 | + if ascii_check < 0x80 { |
| 135 | + // All ASCII - bulk convert |
| 136 | + output.extend(chunk.iter().map(|&cp| cp as u8)); |
| 137 | + pos += 4; |
146 | 138 | continue; |
147 | 139 | } |
148 | 140 | } |
|
0 commit comments