diff --git a/Cargo.toml b/Cargo.toml index 8398d6333..ebadd65f6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "packed_simd" -version = "0.3.3" +version = "0.3.4" authors = ["Gonzalo Brito Gadeschi "] description = "Portable Packed SIMD vectors" documentation = "https://docs.rs/crate/packed_simd/" diff --git a/examples/aobench/src/random.rs b/examples/aobench/src/random.rs index c4f448e6f..b6fef0e20 100644 --- a/examples/aobench/src/random.rs +++ b/examples/aobench/src/random.rs @@ -67,7 +67,7 @@ pub mod scalar { pub fn thread_rng() -> RngH { RngH { - rng: THREAD_RNG_KEY.with(|t| t.clone()), + rng: THREAD_RNG_KEY.with(Clone::clone), } } } @@ -134,7 +134,7 @@ pub mod vector { pub fn thread_rng() -> RngH { RngH { - rng: THREAD_RNG_KEY.with(|t| t.clone()), + rng: THREAD_RNG_KEY.with(Clone::clone), } } } diff --git a/examples/slice_sum/src/main.rs b/examples/slice_sum/src/main.rs index 2b874ac84..18b3692aa 100644 --- a/examples/slice_sum/src/main.rs +++ b/examples/slice_sum/src/main.rs @@ -23,7 +23,7 @@ fn sum_hor(x: &[f32]) -> f32 { x.chunks_exact(f32s::lanes()) .map(f32s::from_slice_unaligned) - .map(|vec| vec.sum()) + .map(f32s::sum) .sum() } diff --git a/src/api.rs b/src/api.rs index 953685925..942183dc4 100644 --- a/src/api.rs +++ b/src/api.rs @@ -208,6 +208,7 @@ macro_rules! impl_f { impl_math_float_mul_add!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_mul_adde!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_powf!([$elem_ty; $elem_n]: $tuple_id | $test_tt); + impl_math_float_powi!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_recpre!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_rsqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt); impl_math_float_sin!([$elem_ty; $elem_n]: $tuple_id | $test_tt); diff --git a/src/api/math/float.rs b/src/api/math/float.rs index d5d2bee2e..ee0f6c883 100644 --- a/src/api/math/float.rs +++ b/src/api/math/float.rs @@ -15,6 +15,9 @@ mod exp; #[macro_use] mod powf; +#[macro_use] +mod powi; + #[macro_use] mod ln; diff --git a/src/api/math/float/powf.rs b/src/api/math/float/powf.rs index 83dc9ff9c..cb42f9dfa 100644 --- a/src/api/math/float/powf.rs +++ b/src/api/math/float/powf.rs @@ -11,12 +11,13 @@ macro_rules! impl_math_float_powf { } } - test_if!{ + test_if! { $test_tt: paste::item! { pub mod [<$id _math_powf>] { use super::*; - #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] fn powf() { let z = $id::splat(0 as $elem_ty); let o = $id::splat(1 as $elem_ty); diff --git a/src/api/math/float/powi.rs b/src/api/math/float/powi.rs new file mode 100644 index 000000000..2ffbd17fd --- /dev/null +++ b/src/api/math/float/powi.rs @@ -0,0 +1,42 @@ +//! Implements vertical (lane-wise) floating-point `powi`. + +macro_rules! impl_math_float_powi { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Raises `self` number to the integer power of `x`. + #[inline] + pub fn powi(self, x: i32) -> Self { + use crate::codegen::math::float::powi::Powi; + Powi::powi(self, x) + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _math_powi>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn powf() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + let s = $id::splat(7 as $elem_ty); + let e = $id::splat(8 as $elem_ty); + assert_eq!(z, z.powi(1)); + assert_eq!(o, z.powi(0)); + assert_eq!(o, o.powi(0)); + assert_eq!(o, t.powi(0)); + assert_eq!(o, o.powi(1)); + assert_eq!(t, t.powi(1)); + assert_eq!(f, t.powi(2)); + assert_eq!(e, t.powi(3)); + assert_eq!($id::splat(16807 as $elem_ty), s.powi(5)); + } + } + } + } + }; +} diff --git a/src/api/slice/write_to_slice.rs b/src/api/slice/write_to_slice.rs index fcb288da7..452710fcb 100644 --- a/src/api/slice/write_to_slice.rs +++ b/src/api/slice/write_to_slice.rs @@ -55,8 +55,8 @@ macro_rules! impl_slice_write_to_slice { 0 ); - #[allow(clippy::cast_ptr_alignment)] - #[allow(clippy::cast_ptr_alignment)] + #[allow(clippy::cast_ptr_alignment)] + #[allow(clippy::cast_ptr_alignment)] #[allow(clippy::cast_ptr_alignment)] #[allow(clippy::cast_ptr_alignment)] *(target_ptr as *mut Self) = self; diff --git a/src/codegen/math/float.rs b/src/codegen/math/float.rs index 3743b4990..c277a1452 100644 --- a/src/codegen/math/float.rs +++ b/src/codegen/math/float.rs @@ -11,6 +11,7 @@ crate mod ln; crate mod mul_add; crate mod mul_adde; crate mod powf; +crate mod powi; crate mod sin; crate mod sin_cos_pi; crate mod sin_pi; diff --git a/src/codegen/math/float/powi.rs b/src/codegen/math/float/powi.rs new file mode 100644 index 000000000..795b4665b --- /dev/null +++ b/src/codegen/math/float/powi.rs @@ -0,0 +1,54 @@ +//! Vertical floating-point `powf` +#![allow(unused)] + +use crate::*; + +crate trait Powi { + fn powi(self, x: i32) -> Self; +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.powi.v2f32"] + fn powi_v2f32(x: f32x2, y: i32) -> f32x2; + #[link_name = "llvm.powi.v4f32"] + fn powi_v4f32(x: f32x4, y: i32) -> f32x4; + #[link_name = "llvm.powi.v8f32"] + fn powi_v8f32(x: f32x8, y: i32) -> f32x8; + #[link_name = "llvm.powi.v16f32"] + fn powi_v16f32(x: f32x16, y: i32) -> f32x16; + /* FIXME 64-bit powigle elem vectors + #[link_name = "llvm.powi.v1f64"] + fn powi_v1f64(x: f64x1, y: i32) -> f64x1; + */ + #[link_name = "llvm.powi.v2f64"] + fn powi_v2f64(x: f64x2, y: i32) -> f64x2; + #[link_name = "llvm.powi.v4f64"] + fn powi_v4f64(x: f64x4, y: i32) -> f64x4; + #[link_name = "llvm.powi.v8f64"] + fn powi_v8f64(x: f64x8, y: i32) -> f64x8; + + #[link_name = "llvm.powi.f32"] + fn powi_f32(x: f32, y: i32) -> f32; + #[link_name = "llvm.powi.f64"] + fn powi_f64(x: f64, y: i32) -> f64; +} + +macro_rules! impl_ { + ($id:ident, $fn_id:ident) => { + impl Powi for $id { + fn powi(self, x: i32) -> Self { + use mem::transmute; + unsafe { transmute($fn_id(transmute(self), x)) } + } + } + }; +} + +impl_!(f32x2, powi_v2f32); +impl_!(f32x4, powi_v4f32); +impl_!(f32x8, powi_v8f32); +impl_!(f32x16, powi_v16f32); +impl_!(f64x2, powi_v2f64); +impl_!(f64x4, powi_v4f64); +impl_!(f64x8, powi_v8f64); diff --git a/src/codegen/math/float/tanh.rs b/src/codegen/math/float/tanh.rs index 5220c7d10..5baa7ae42 100644 --- a/src/codegen/math/float/tanh.rs +++ b/src/codegen/math/float/tanh.rs @@ -10,7 +10,6 @@ crate trait Tanh { } macro_rules! define_tanh { - ($name:ident, $basetype:ty, $simdtype:ty, $lanes:expr, $trait:path) => { fn $name(x: $simdtype) -> $simdtype { use core::intrinsics::transmute; @@ -31,8 +30,9 @@ macro_rules! define_tanh { }; } -// llvm does not seem to expose the hyperbolic versions of trigonometric functions; -// we thus call the classical rust versions on all of them (which stem from cmath). +// llvm does not seem to expose the hyperbolic versions of trigonometric +// functions; we thus call the classical rust versions on all of them (which +// stem from cmath). define_tanh!(f32 => tanh_v2f32, f32x2, 2); define_tanh!(f32 => tanh_v4f32, f32x4, 4); define_tanh!(f32 => tanh_v8f32, f32x8, 8); diff --git a/src/lib.rs b/src/lib.rs index d73645e72..348c8d12a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -219,9 +219,12 @@ clippy::cast_lossless, clippy::cast_possible_wrap, clippy::cast_precision_loss, - // This lint is currently broken for generic code + // FIXME: This lint is currently broken for generic code // See https://github.com/rust-lang/rust-clippy/issues/3410 - clippy::use_self + clippy::use_self, + // FIXME: This lint is currently broken for macros + // See https://github.com/rust-lang/rust-clippy/issues/3981 + clippy::unnecessary_cast, )] #![cfg_attr(test, feature(hashmap_internals))] #![deny(warnings, rust_2018_idioms, clippy::missing_inline_in_public_items)]