-
Notifications
You must be signed in to change notification settings - Fork 13.4k
intrinsics fmuladdf{32,64}: expose llvm.fmuladd.* semantics #124874
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1795,6 +1795,59 @@ extern "rust-intrinsic" { | |
#[rustc_nounwind] | ||
pub fn fmaf128(a: f128, b: f128, c: f128) -> f128; | ||
|
||
/// Returns `a * b + c` for `f16` values, non-deterministically executing | ||
/// either a fused multiply-add or two operations with rounding of the | ||
/// intermediate result. | ||
/// | ||
/// The operation is fused if the code generator determines that target | ||
/// instruction set has support for a fused operation, and that the fused | ||
/// operation is more efficient than the equivalent, separate pair of mul | ||
/// and add instructions. It is unspecified whether or not a fused operation | ||
/// is selected, and that may depend on optimization level and context, for | ||
/// example. | ||
#[rustc_nounwind] | ||
#[cfg(not(bootstrap))] | ||
pub fn fmuladdf16(a: f16, b: f16, c: f16) -> f16; | ||
/// Returns `a * b + c` for `f32` values, non-deterministically executing | ||
/// either a fused multiply-add or two operations with rounding of the | ||
/// intermediate result. | ||
/// | ||
/// The operation is fused if the code generator determines that target | ||
/// instruction set has support for a fused operation, and that the fused | ||
/// operation is more efficient than the equivalent, separate pair of mul | ||
/// and add instructions. It is unspecified whether or not a fused operation | ||
/// is selected, and that may depend on optimization level and context, for | ||
/// example. | ||
#[rustc_nounwind] | ||
#[cfg(not(bootstrap))] | ||
pub fn fmuladdf32(a: f32, b: f32, c: f32) -> f32; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this a good opportunity to use our fallback bodies, with a fallback implementation of Miri however should probably roll some dice to make both behaviors observable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't have strong rationale for choosing the fused versus separate, but it was the simplest to implement and presumably to change if/when Cranelift adds a corresponding intrinsic. Could you give a pointer for how/where to implement the "make both behaviors observable" in Miri? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The miri implementation would go in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I implemented this and added a test in the spirit of |
||
/// Returns `a * b + c` for `f64` values, non-deterministically executing | ||
/// either a fused multiply-add or two operations with rounding of the | ||
/// intermediate result. | ||
/// | ||
/// The operation is fused if the code generator determines that target | ||
/// instruction set has support for a fused operation, and that the fused | ||
/// operation is more efficient than the equivalent, separate pair of mul | ||
/// and add instructions. It is unspecified whether or not a fused operation | ||
/// is selected, and that may depend on optimization level and context, for | ||
/// example. | ||
#[rustc_nounwind] | ||
#[cfg(not(bootstrap))] | ||
pub fn fmuladdf64(a: f64, b: f64, c: f64) -> f64; | ||
/// Returns `a * b + c` for `f128` values, non-deterministically executing | ||
/// either a fused multiply-add or two operations with rounding of the | ||
/// intermediate result. | ||
/// | ||
/// The operation is fused if the code generator determines that target | ||
/// instruction set has support for a fused operation, and that the fused | ||
/// operation is more efficient than the equivalent, separate pair of mul | ||
/// and add instructions. It is unspecified whether or not a fused operation | ||
/// is selected, and that may depend on optimization level and context, for | ||
/// example. | ||
#[rustc_nounwind] | ||
#[cfg(not(bootstrap))] | ||
pub fn fmuladdf128(a: f128, b: f128, c: f128) -> f128; | ||
|
||
/// Returns the absolute value of an `f16`. | ||
/// | ||
/// The stabilized version of this intrinsic is | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,44 @@ | ||
#![feature(core_intrinsics)] | ||
use std::intrinsics::{fmuladdf32, fmuladdf64}; | ||
|
||
fn main() { | ||
let mut saw_zero = false; | ||
let mut saw_nonzero = false; | ||
for _ in 0..50 { | ||
let a = std::hint::black_box(0.1_f64); | ||
let b = std::hint::black_box(0.2); | ||
let c = std::hint::black_box(-a * b); | ||
// It is unspecified whether the following operation is fused or not. The | ||
// following evaluates to 0.0 if unfused, and nonzero (-1.66e-18) if fused. | ||
let x = unsafe { fmuladdf64(a, b, c) }; | ||
if x == 0.0 { | ||
saw_zero = true; | ||
} else { | ||
saw_nonzero = true; | ||
} | ||
} | ||
assert!( | ||
saw_zero && saw_nonzero, | ||
"`fmuladdf64` failed to be evaluated as both fused and unfused" | ||
); | ||
|
||
let mut saw_zero = false; | ||
let mut saw_nonzero = false; | ||
for _ in 0..50 { | ||
let a = std::hint::black_box(0.1_f32); | ||
let b = std::hint::black_box(0.2); | ||
let c = std::hint::black_box(-a * b); | ||
// It is unspecified whether the following operation is fused or not. The | ||
// following evaluates to 0.0 if unfused, and nonzero (-8.1956386e-10) if fused. | ||
let x = unsafe { fmuladdf32(a, b, c) }; | ||
if x == 0.0 { | ||
saw_zero = true; | ||
} else { | ||
saw_nonzero = true; | ||
} | ||
} | ||
assert!( | ||
saw_zero && saw_nonzero, | ||
"`fmuladdf32` failed to be evaluated as both fused and unfused" | ||
); | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
//@ run-pass | ||
#![feature(core_intrinsics)] | ||
|
||
use std::intrinsics::*; | ||
|
||
macro_rules! assert_approx_eq { | ||
($a:expr, $b:expr) => {{ | ||
let (a, b) = (&$a, &$b); | ||
assert!((*a - *b).abs() < 1.0e-6, "{} is not approximately equal to {}", *a, *b); | ||
}}; | ||
} | ||
|
||
fn main() { | ||
unsafe { | ||
let nan: f32 = f32::NAN; | ||
let inf: f32 = f32::INFINITY; | ||
let neg_inf: f32 = f32::NEG_INFINITY; | ||
assert_approx_eq!(fmuladdf32(1.23, 4.5, 0.67), 6.205); | ||
assert_approx_eq!(fmuladdf32(-1.23, -4.5, -0.67), 4.865); | ||
assert_approx_eq!(fmuladdf32(0.0, 8.9, 1.2), 1.2); | ||
assert_approx_eq!(fmuladdf32(3.4, -0.0, 5.6), 5.6); | ||
assert!(fmuladdf32(nan, 7.8, 9.0).is_nan()); | ||
assert_eq!(fmuladdf32(inf, 7.8, 9.0), inf); | ||
assert_eq!(fmuladdf32(neg_inf, 7.8, 9.0), neg_inf); | ||
assert_eq!(fmuladdf32(8.9, inf, 3.2), inf); | ||
assert_eq!(fmuladdf32(-3.2, 2.4, neg_inf), neg_inf); | ||
} | ||
unsafe { | ||
let nan: f64 = f64::NAN; | ||
let inf: f64 = f64::INFINITY; | ||
let neg_inf: f64 = f64::NEG_INFINITY; | ||
assert_approx_eq!(fmuladdf64(1.23, 4.5, 0.67), 6.205); | ||
assert_approx_eq!(fmuladdf64(-1.23, -4.5, -0.67), 4.865); | ||
assert_approx_eq!(fmuladdf64(0.0, 8.9, 1.2), 1.2); | ||
assert_approx_eq!(fmuladdf64(3.4, -0.0, 5.6), 5.6); | ||
assert!(fmuladdf64(nan, 7.8, 9.0).is_nan()); | ||
assert_eq!(fmuladdf64(inf, 7.8, 9.0), inf); | ||
assert_eq!(fmuladdf64(neg_inf, 7.8, 9.0), neg_inf); | ||
assert_eq!(fmuladdf64(8.9, inf, 3.2), inf); | ||
assert_eq!(fmuladdf64(-3.2, 2.4, neg_inf), neg_inf); | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.