Skip to content

Commit efb77e5

Browse files
authored
Skip fast exp/log/pow/sin/cosine tests without sse 4.1 (#8541)
Fixes #8536
1 parent b5a5ca2 commit efb77e5

File tree

3 files changed

+22
-5
lines changed

3 files changed

+22
-5
lines changed

src/IROperator.h

+9-5
Original file line numberDiff line numberDiff line change
@@ -970,28 +970,32 @@ Expr pow(Expr x, Expr y);
970970
* mantissa. Vectorizes cleanly. */
971971
Expr erf(const Expr &x);
972972

973-
/** Fast vectorizable approximation to some trigonometric functions for Float(32).
974-
* Absolute approximation error is less than 1e-5. */
973+
/** Fast vectorizable approximation to some trigonometric functions for
974+
* Float(32). Absolute approximation error is less than 1e-5. Slow on x86 if
975+
* you don't have at least sse 4.1. */
975976
// @{
976977
Expr fast_sin(const Expr &x);
977978
Expr fast_cos(const Expr &x);
978979
// @}
979980

980981
/** Fast approximate cleanly vectorizable log for Float(32). Returns
981982
* nonsense for x <= 0.0f. Accurate up to the last 5 bits of the
982-
* mantissa. Vectorizes cleanly. */
983+
* mantissa. Vectorizes cleanly. Slow on x86 if you don't
984+
* have at least sse 4.1. */
983985
Expr fast_log(const Expr &x);
984986

985987
/** Fast approximate cleanly vectorizable exp for Float(32). Returns
986988
* nonsense for inputs that would overflow or underflow. Typically
987989
* accurate up to the last 5 bits of the mantissa. Gets worse when
988-
* approaching overflow. Vectorizes cleanly. */
990+
* approaching overflow. Vectorizes cleanly. Slow on x86 if you don't
991+
* have at least sse 4.1. */
989992
Expr fast_exp(const Expr &x);
990993

991994
/** Fast approximate cleanly vectorizable pow for Float(32). Returns
992995
* nonsense for x < 0.0f. Accurate up to the last 5 bits of the
993996
* mantissa for typical exponents. Gets worse when approaching
994-
* overflow. Vectorizes cleanly. */
997+
* overflow. Vectorizes cleanly. Slow on x86 if you don't
998+
* have at least sse 4.1. */
995999
Expr fast_pow(Expr x, Expr y);
9961000

9971001
/** Fast approximate inverse for Float(32). Corresponds to the rcpps

test/performance/fast_pow.cpp

+6
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ int main(int argc, char **argv) {
2020
printf("HL_TARGET is: %s\n", hl_target.to_string().c_str());
2121
printf("HL_JIT_TARGET is: %s\n", hl_jit_target.to_string().c_str());
2222

23+
if (hl_jit_target.arch == Target::X86 &&
24+
!hl_jit_target.has_feature(Target::SSE41)) {
25+
printf("[SKIP] These intrinsics are known to be slow on x86 without sse 4.1.\n");
26+
return 0;
27+
}
28+
2329
if (hl_jit_target.arch == Target::WebAssembly) {
2430
printf("[SKIP] Performance tests are meaningless and/or misleading under WebAssembly interpreter.\n");
2531
return 0;

test/performance/fast_sine_cosine.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,13 @@ using namespace Halide::Tools;
1010

1111
int main(int argc, char **argv) {
1212
Target target = get_jit_target_from_environment();
13+
14+
if (target.arch == Target::X86 &&
15+
!target.has_feature(Target::SSE41)) {
16+
printf("[SKIP] These intrinsics are known to be slow on x86 without sse 4.1.\n");
17+
return 0;
18+
}
19+
1320
if (target.arch == Target::WebAssembly) {
1421
printf("[SKIP] Performance tests are meaningless and/or misleading under WebAssembly interpreter.\n");
1522
return 0;

0 commit comments

Comments
 (0)