@@ -970,28 +970,32 @@ Expr pow(Expr x, Expr y);
970
970
* mantissa. Vectorizes cleanly. */
971
971
Expr erf (const Expr &x);
972
972
973
- /* * Fast vectorizable approximation to some trigonometric functions for Float(32).
974
- * Absolute approximation error is less than 1e-5. */
973
+ /* * Fast vectorizable approximation to some trigonometric functions for
974
+ * Float(32). Absolute approximation error is less than 1e-5. Slow on x86 if
975
+ * you don't have at least sse 4.1. */
975
976
// @{
976
977
Expr fast_sin (const Expr &x);
977
978
Expr fast_cos (const Expr &x);
978
979
// @}
979
980
980
981
/* * Fast approximate cleanly vectorizable log for Float(32). Returns
981
982
* nonsense for x <= 0.0f. Accurate up to the last 5 bits of the
982
- * mantissa. Vectorizes cleanly. */
983
+ * mantissa. Vectorizes cleanly. Slow on x86 if you don't
984
+ * have at least sse 4.1. */
983
985
Expr fast_log (const Expr &x);
984
986
985
987
/* * Fast approximate cleanly vectorizable exp for Float(32). Returns
986
988
* nonsense for inputs that would overflow or underflow. Typically
987
989
* accurate up to the last 5 bits of the mantissa. Gets worse when
988
- * approaching overflow. Vectorizes cleanly. */
990
+ * approaching overflow. Vectorizes cleanly. Slow on x86 if you don't
991
+ * have at least sse 4.1. */
989
992
Expr fast_exp (const Expr &x);
990
993
991
994
/* * Fast approximate cleanly vectorizable pow for Float(32). Returns
992
995
* nonsense for x < 0.0f. Accurate up to the last 5 bits of the
993
996
* mantissa for typical exponents. Gets worse when approaching
994
- * overflow. Vectorizes cleanly. */
997
+ * overflow. Vectorizes cleanly. Slow on x86 if you don't
998
+ * have at least sse 4.1. */
995
999
Expr fast_pow (Expr x, Expr y);
996
1000
997
1001
/* * Fast approximate inverse for Float(32). Corresponds to the rcpps
0 commit comments