Skip to content

Commit 0cdfa8c

Browse files
authored
Merge pull request #800 from gangliao/check_avx
Add SIMD flags for runtime check
2 parents 9336756 + bf53427 commit 0cdfa8c

File tree

4 files changed

+181
-0
lines changed

4 files changed

+181
-0
lines changed

paddle/utils/CpuId.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
http://www.apache.org/licenses/LICENSE-2.0
6+
Unless required by applicable law or agreed to in writing, software
7+
distributed under the License is distributed on an "AS IS" BASIS,
8+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
See the License for the specific language governing permissions and
10+
limitations under the License. */
11+
12+
#include "paddle/utils/CpuId.h"
13+
#include "paddle/utils/Util.h"
14+
15+
#ifdef _WIN32
16+
17+
/// for MSVC
18+
#define CPUID(info, x) __cpuidex(info, x, 0)
19+
20+
#else
21+
22+
#include <cpuid.h>
23+
24+
/// for GCC/Clang
25+
#define CPUID(info, x) __cpuid_count(x, 0, info[0], info[1], info[2], info[3])
26+
27+
#endif
28+
29+
namespace paddle {
30+
31+
SIMDFlags::SIMDFlags() {
32+
unsigned int cpuInfo[4];
33+
// CPUID: https://en.wikipedia.org/wiki/CPUID
34+
CPUID(cpuInfo, 0x00000001);
35+
simd_flags_ |= cpuInfo[3] & (1 << 25) ? SIMD_SSE : SIMD_NONE;
36+
simd_flags_ |= cpuInfo[3] & (1 << 26) ? SIMD_SSE2 : SIMD_NONE;
37+
simd_flags_ |= cpuInfo[2] & (1 << 0) ? SIMD_SSE3 : SIMD_NONE;
38+
simd_flags_ |= cpuInfo[2] & (1 << 9) ? SIMD_SSSE3 : SIMD_NONE;
39+
simd_flags_ |= cpuInfo[2] & (1 << 19) ? SIMD_SSE41 : SIMD_NONE;
40+
simd_flags_ |= cpuInfo[2] & (1 << 20) ? SIMD_SSE42 : SIMD_NONE;
41+
simd_flags_ |= cpuInfo[2] & (1 << 12) ? SIMD_FMA3 : SIMD_NONE;
42+
simd_flags_ |= cpuInfo[2] & (1 << 28) ? SIMD_AVX : SIMD_NONE;
43+
44+
CPUID(cpuInfo, 0x00000007);
45+
simd_flags_ |= cpuInfo[1] & (1 << 5) ? SIMD_AVX2 : SIMD_NONE;
46+
simd_flags_ |= cpuInfo[1] & (1 << 16) ? SIMD_AVX512: SIMD_NONE;
47+
48+
CPUID(cpuInfo, 0x80000001);
49+
simd_flags_ |= cpuInfo[2] & (1 << 16) ? SIMD_FMA4 : SIMD_NONE;
50+
}
51+
52+
SIMDFlags* SIMDFlags::instance() {
53+
static SIMDFlags instance;
54+
return &instance;
55+
}
56+
57+
} // namespace paddle

paddle/utils/CpuId.h

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
http://www.apache.org/licenses/LICENSE-2.0
6+
Unless required by applicable law or agreed to in writing, software
7+
distributed under the License is distributed on an "AS IS" BASIS,
8+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
See the License for the specific language governing permissions and
10+
limitations under the License. */
11+
12+
#pragma once
13+
14+
#include <iostream>
15+
#include "DisableCopy.h"
16+
17+
namespace paddle {
18+
19+
class SIMDFlags final {
20+
public:
21+
DISABLE_COPY(SIMDFlags);
22+
23+
SIMDFlags();
24+
25+
static SIMDFlags* instance();
26+
27+
inline bool isSSE() const { return simd_flags_ & SIMD_SSE; }
28+
inline bool isSSE2() const { return simd_flags_ & SIMD_SSE2; }
29+
inline bool isSSE3() const { return simd_flags_ & SIMD_SSE3; }
30+
inline bool isSSSE3() const { return simd_flags_ & SIMD_SSSE3; }
31+
inline bool isSSE41() const { return simd_flags_ & SIMD_SSE41; }
32+
inline bool isSSE42() const { return simd_flags_ & SIMD_SSE42; }
33+
inline bool isFMA3() const { return simd_flags_ & SIMD_FMA3; }
34+
inline bool isFMA4() const { return simd_flags_ & SIMD_FMA4; }
35+
inline bool isAVX() const { return simd_flags_ & SIMD_AVX; }
36+
inline bool isAVX2() const { return simd_flags_ & SIMD_AVX2; }
37+
inline bool isAVX512()const { return simd_flags_ & SIMD_AVX512;}
38+
39+
private:
40+
enum simd_t {
41+
SIMD_NONE = 0, ///< None
42+
SIMD_SSE = 1 << 0, ///< SSE
43+
SIMD_SSE2 = 1 << 1, ///< SSE 2
44+
SIMD_SSE3 = 1 << 2, ///< SSE 3
45+
SIMD_SSSE3 = 1 << 3, ///< SSSE 3
46+
SIMD_SSE41 = 1 << 4, ///< SSE 4.1
47+
SIMD_SSE42 = 1 << 5, ///< SSE 4.2
48+
SIMD_FMA3 = 1 << 6, ///< FMA 3
49+
SIMD_FMA4 = 1 << 7, ///< FMA 4
50+
SIMD_AVX = 1 << 8, ///< AVX
51+
SIMD_AVX2 = 1 << 9, ///< AVX 2
52+
SIMD_AVX512 = 1 << 10, ///< AVX 512
53+
};
54+
55+
/// simd flags
56+
int simd_flags_ = SIMD_NONE;
57+
};
58+
59+
#define HAS_SSE SIMDFlags::instance()->isSSE()
60+
#define HAS_SSE2 SIMDFlags::instance()->isSSE2()
61+
#define HAS_SSE3 SIMDFlags::instance()->isSSE3()
62+
#define HAS_SSSE3 SIMDFlags::instance()->isSSSE3()
63+
#define HAS_SSE41 SIMDFlags::instance()->isSSE41()
64+
#define HAS_SSE42 SIMDFlags::instance()->isSSE42()
65+
#define HAS_FMA3 SIMDFlags::instance()->isFMA3()
66+
#define HAS_FMA4 SIMDFlags::instance()->isFMA4()
67+
#define HAS_AVX SIMDFlags::instance()->isAVX()
68+
#define HAS_AVX2 SIMDFlags::instance()->isAVX2()
69+
#define HAS_AVX512 SIMDFlags::instance()->isAVX512()
70+
71+
} // namespace paddle

paddle/utils/tests/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ add_simple_unittest(test_StringUtils)
55
add_simple_unittest(test_CustomStackTrace)
66
add_simple_unittest(test_ThreadBarrier)
77
add_simple_unittest(test_SpinLock)
8+
add_simple_unittest(test_SIMDFlags)
89

910
add_executable(
1011
test_CustomStackTracePrint
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
2+
Licensed under the Apache License, Version 2.0 (the "License");
3+
you may not use this file except in compliance with the License.
4+
You may obtain a copy of the License at
5+
http://www.apache.org/licenses/LICENSE-2.0
6+
Unless required by applicable law or agreed to in writing, software
7+
distributed under the License is distributed on an "AS IS" BASIS,
8+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
9+
See the License for the specific language governing permissions and
10+
limitations under the License. */
11+
12+
13+
#include <gtest/gtest.h>
14+
15+
#include "paddle/utils/CpuId.h"
16+
#include "paddle/utils/Logging.h"
17+
#include "paddle/utils/Util.h"
18+
19+
using namespace paddle; // NOLINT
20+
21+
TEST(SIMDFlags, gccTest) {
22+
#if (defined(__GNUC__) || defined(__GNUG__)) && !(defined(__clang__))
23+
CHECK(!__builtin_cpu_supports("sse") != HAS_SSE);
24+
CHECK(!__builtin_cpu_supports("sse2") != HAS_SSE2);
25+
CHECK(!__builtin_cpu_supports("sse3") != HAS_SSE3);
26+
CHECK(!__builtin_cpu_supports("ssse3") != HAS_SSSE3);
27+
CHECK(!__builtin_cpu_supports("sse4.1")!= HAS_SSE41);
28+
CHECK(!__builtin_cpu_supports("sse4.2")!= HAS_SSE42);
29+
CHECK(!__builtin_cpu_supports("avx") != HAS_AVX);
30+
CHECK(!__builtin_cpu_supports("avx2") != HAS_AVX2);
31+
#endif
32+
}
33+
34+
TEST(SIMDFlags, normalPrint) {
35+
auto simd = SIMDFlags::instance();
36+
LOG(INFO) << "Has SSE2: " << std::boolalpha << simd->isSSE2();
37+
LOG(INFO) << "Has SSE3: " << std::boolalpha << simd->isSSE3();
38+
LOG(INFO) << "Has SSSE3: " << std::boolalpha << simd->isSSSE3();
39+
LOG(INFO) << "Has SSE4.1: " << std::boolalpha << simd->isSSE41();
40+
LOG(INFO) << "Has SSE4.2: " << std::boolalpha << simd->isSSE42();
41+
LOG(INFO) << "Has FMA3: " << std::boolalpha << simd->isFMA3();
42+
LOG(INFO) << "Has FMA4: " << std::boolalpha << simd->isFMA4();
43+
LOG(INFO) << "Has AVX: " << std::boolalpha << simd->isAVX();
44+
LOG(INFO) << "Has AVX2: " << std::boolalpha << simd->isAVX2();
45+
LOG(INFO) << "Has AVX512: " << std::boolalpha << simd->isAVX512();
46+
}
47+
48+
int main(int argc, char** argv) {
49+
testing::InitGoogleTest(&argc, argv);
50+
paddle::initMain(argc, argv);
51+
return RUN_ALL_TESTS();
52+
}

0 commit comments

Comments
 (0)