diff --git a/Cargo.toml b/Cargo.toml index a921ee6..1867471 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,7 +18,7 @@ bench = false simd-accel = ["faster"] [dependencies] -faster = { version = "0.4.3", optional = true } +faster = { git = "https://github.com/AdamNiederer/faster", branch="master", optional = true } [dev-dependencies] quickcheck = "0.6" diff --git a/src/lib.rs b/src/lib.rs index 62b3bfb..47f9aa9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -187,14 +187,17 @@ pub fn count(haystack: &[u8], needle: u8) -> usize { ret += (&haystack[i * u8s::WIDTH * 255..(i + 1) * u8s::WIDTH * 255]) .simd_iter() .simd_reduce(u8s(0), u8s(needle.overflowing_add(1).0), |acc, v| { - acc + (PackedEq::eq(&v, &u8s(needle)).be_u8s() & u8s(0x01)) - }).scalar_reduce(0, |acc, s| acc + (s as usize)); + acc + v.eq_mask(u8s(needle)).be_u8s() & u8s(0x01) + }).sum_upcast() as usize; } - ret + (&haystack[haystack.len() - haystack.len() % (u8s::WIDTH * 255)..]) + + let final_chunk = (&haystack[haystack.len() - haystack.len() % (u8s::WIDTH * 255)..]) .simd_iter() .simd_reduce(u8s(0), u8s(needle.overflowing_add(1).0), |acc, v| { - acc + (PackedEq::eq(&v, &u8s(needle)).be_u8s() & u8s(0x01)) - }).scalar_reduce(0, |acc, s| acc + (s as usize)) + acc + v.eq_mask(u8s(needle)).be_u8s() & u8s(0x01) + }).sum_upcast() as usize; + + ret + final_chunk } } @@ -276,7 +279,18 @@ pub fn num_chars(haystack: &[u8]) -> usize { num_chars_generic::(32, haystack) } -/// f +/// Count the number of UTF-8 encoded unicode codepoints in a slice of bytes, fast +/// +/// This function is safe to use on any byte array, valid UTF-8 or not, +/// but the output is only meaningful for well-formed UTF-8. +/// +/// # Example +/// +/// ``` +/// let swordfish = "メカジキ"; +/// let char_count = bytecount::num_chars(swordfish.as_bytes()); +/// assert_eq!(char_count, 4); +/// ``` #[cfg(feature = "simd-accel")] pub fn num_chars(haystack: &[u8]) -> usize { if haystack.len() < 100 { @@ -288,14 +302,17 @@ pub fn num_chars(haystack: &[u8]) -> usize { ret += (&haystack[i * u8s::WIDTH * 255..(i + 1) * u8s::WIDTH * 255]) .simd_iter() .simd_reduce(u8s(0), u8s(0), |acc, v| { - acc + (PackedEq::eq(&(v & u8s(0xC0)), &u8s(0x80)).be_u8s() & u8s(0x01)) - }).scalar_reduce(0, |acc, s| acc + (s as usize)); + acc + (v & u8s(0xC0)).eq_mask(u8s(0x80)).be_u8s() & u8s(0x01) + }).sum_upcast() as usize; } - haystack.len() - ret - (&haystack[haystack.len() - haystack.len() % (u8s::WIDTH * 255)..]) + + let final_chunk = (&haystack[haystack.len() - haystack.len() % (u8s::WIDTH * 255)..]) .simd_iter() .simd_reduce(u8s(0), u8s(0), |acc, v| { - acc + (PackedEq::eq(&(v & u8s(0xC0)), &u8s(0x80)).be_u8s() & u8s(0x01)) - }).scalar_reduce(0, |acc, s| acc + (s as usize)) + acc + (v & u8s(0xC0)).eq_mask(u8s(0x80)).be_u8s() & u8s(0x01) + }).sum_upcast() as usize; + + haystack.len() - ret - final_chunk } }