diff --git a/src/config.h b/src/config.h index de0c24dcc2..df714d7678 100644 --- a/src/config.h +++ b/src/config.h @@ -383,6 +383,7 @@ void setcpuaffinity(const char *cpulist); #if HAVE_X86_SIMD #define ATTRIBUTE_TARGET_SSE2 __attribute__((target("sse2"))) +#define ATTRIBUTE_TARGET_PCLMUL __attribute__((target("pclmul"))) #define ATTRIBUTE_TARGET_AVX2 __attribute__((target("avx2"))) #define ATTRIBUTE_TARGET_AVX512 __attribute__((target("avx512f,avx512bw,avx512vl"))) #else diff --git a/src/crc16.c b/src/crc16.c index 2153177578..9dac507d81 100644 --- a/src/crc16.c +++ b/src/crc16.c @@ -1,5 +1,8 @@ #include "server.h" +#include +#include + /* * Copyright 2001-2010 Georges Menie (www.menie.org) * Copyright 2010-2012 Redis Ltd. (adapted to Redis coding style) @@ -44,6 +47,54 @@ * Output for "123456789" : 31C3 */ +#if HAVE_X86_SIMD +ATTRIBUTE_TARGET_SSE2 +ATTRIBUTE_TARGET_PCLMUL +static inline uint16_t crc16_base(uint16_t crc, uint8_t v) { + crc ^= v << 8; + __m128i orig = _mm_set_epi64x(0x0, (uint64_t)(crc) << (8)); + __m128i tmp = orig; + + uint64_t p = 0x11021; + //uint64_t p = 0x10811; + + // 2^96 / p + // 0x111303471a041b343e569 +//#define mu 0x859b040b1c581911 +//#define mu 0x111303471a041b343e569 + + // 0x111303471a041 + // 1 0001 0001 0011 0000 0011 0100 0111 0001 1010 0000 0100 0001 + // 1000 0010 0000 0101 1000 1110 0010 1100 0000 1100 1000 1000 1 + // 1[000 0|010 0|000 0|101 1|000 1|110 0|010 1|100 0|000 1|100 1|000 1|000 1] + // 1 0 4 0 b 1 c 5 8 1 9 1 1 + //uint64_t mu = 0x1040b1c581911; // 2^64 / p + uint64_t mu = 0x111303471a041; + + // 0x11130 + // 1 0001 0001 0011 0000 + // 0000 1100 1000 1000 1 + // 0[000 1|100 1|000 1|000 1] + // 0 1 9 1 1 + //uint64_t mu = 0x101911; // 2^32 / p + //uint64_t mu = 0x11130; + + __m128i mul = _mm_set_epi64x(p, mu); + + tmp = _mm_clmulepi64_si128( + tmp, + mul, 0x00); + + tmp = _mm_clmulepi64_si128(tmp, + mul, 0x11); + + tmp = _mm_xor_si128(tmp, orig); + + uint16_t ret = (uint16_t)(_mm_extract_epi16(tmp, 0x0)); + + return ret; +} +#else static const uint16_t crc16tab[256]= { 0x0000,0x1021,0x2042,0x3063,0x4084,0x50a5,0x60c6,0x70e7, 0x8108,0x9129,0xa14a,0xb16b,0xc18c,0xd1ad,0xe1ce,0xf1ef, @@ -78,11 +129,22 @@ static const uint16_t crc16tab[256]= { 0xef1f,0xff3e,0xcf5d,0xdf7c,0xaf9b,0xbfba,0x8fd9,0x9ff8, 0x6e17,0x7e36,0x4e55,0x5e74,0x2e93,0x3eb2,0x0ed1,0x1ef0 }; +#endif uint16_t crc16(const char *buf, int len) { +#if HAVE_X86_SIMD + int counter; + uint16_t crc = 0x0; + for(counter = 0; counter < len; counter++) { + uint8_t tmp = (uint8_t)buf[counter]; + crc = crc16_base(crc, tmp); + } + return crc; +#else int counter; uint16_t crc = 0; for (counter = 0; counter < len; counter++) crc = (crc<<8) ^ crc16tab[((crc>>8) ^ *buf++)&0x00FF]; return crc; +#endif } diff --git a/src/unit/test_crc16.c b/src/unit/test_crc16.c new file mode 100644 index 0000000000..c29b01cc0e --- /dev/null +++ b/src/unit/test_crc16.c @@ -0,0 +1,81 @@ +#include +#include +#include "../crc16.c" + +#include "test_help.h" + +/* CRC16 implementation according to CCITT standards. + * + * Note by @antirez: this is actually the XMODEM CRC 16 algorithm, using the + * following parameters: + * + * Name : "XMODEM", also known as "ZMODEM", "CRC-16/ACORN" + * Width : 16 bit + * Poly : 1021 (That is actually x^16 + x^12 + x^5 + 1) + * Initialization : 0000 + * Reflect Input byte : False + * Reflect Output CRC : False + * Xor constant to output CRC : 0000 + * Output for "123456789" : 31C3 + */ + +static const uint16_t crc16tab[256] = { + 0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7, + 0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef, + 0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6, + 0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de, + 0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485, + 0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d, + 0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4, + 0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc, + 0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823, + 0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b, + 0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12, + 0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a, + 0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41, + 0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49, + 0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70, + 0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78, + 0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f, + 0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067, + 0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e, + 0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256, + 0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d, + 0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405, + 0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c, + 0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634, + 0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab, + 0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3, + 0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a, + 0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92, + 0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9, + 0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1, + 0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8, + 0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0}; + +uint16_t crc16_ref(const char *buf, int len) { + int counter; + uint16_t crc = 0; + for (counter = 0; counter < len; counter++) + crc = (crc << 8) ^ crc16tab[((crc >> 8) ^ *buf++) & 0x00FF]; + return crc; +} + +int test_crc16(int argc, char **argv, int flags) { + UNUSED(argc); + UNUSED(argv); + UNUSED(flags); + + const char *s = "123456789"; + int len = strlen(s); + TEST_ASSERT(crc16(s, len) == crc16_ref(s, len)); + + // Test all permutation of bits in a byte + for (uint16_t i = 0; i <= 0xff; i++) { + char input[1]; + input[0] = (char)i; + int sz = sizeof(input); + TEST_ASSERT(crc16(input, sz) == crc16_ref(input, sz)); + } + return 0; +} diff --git a/src/unit/test_files.h b/src/unit/test_files.h index ca557984be..6686245c3c 100644 --- a/src/unit/test_files.h +++ b/src/unit/test_files.h @@ -7,6 +7,7 @@ typedef struct unitTest { } unitTest; int test_popcount(int argc, char **argv, int flags); +int test_crc16(int argc, char **argv, int flags); int test_crc64(int argc, char **argv, int flags); int test_crc64combine(int argc, char **argv, int flags); int test_dictCreate(int argc, char **argv, int flags); @@ -256,6 +257,7 @@ int test_zmallocAllocReallocCallocAndFree(int argc, char **argv, int flags); int test_zmallocAllocZeroByteAndFree(int argc, char **argv, int flags); unitTest __test_bitops_c[] = {{"test_popcount", test_popcount}, {NULL, NULL}}; +unitTest __test_crc16_c[] = {{"test_crc16", test_crc16}, {NULL, NULL}}; unitTest __test_crc64_c[] = {{"test_crc64", test_crc64}, {NULL, NULL}}; unitTest __test_crc64combine_c[] = {{"test_crc64combine", test_crc64combine}, {NULL, NULL}}; unitTest __test_dict_c[] = {{"test_dictCreate", test_dictCreate}, {"test_dictAdd16Keys", test_dictAdd16Keys}, {"test_dictDisableResize", test_dictDisableResize}, {"test_dictAddOneKeyTriggerResize", test_dictAddOneKeyTriggerResize}, {"test_dictDeleteKeys", test_dictDeleteKeys}, {"test_dictDeleteOneKeyTriggerResize", test_dictDeleteOneKeyTriggerResize}, {"test_dictEmptyDirAdd128Keys", test_dictEmptyDirAdd128Keys}, {"test_dictDisableResizeReduceTo3", test_dictDisableResizeReduceTo3}, {"test_dictDeleteOneKeyTriggerResizeAgain", test_dictDeleteOneKeyTriggerResizeAgain}, {"test_dictBenchmark", test_dictBenchmark}, {NULL, NULL}}; @@ -285,6 +287,7 @@ struct unitTestSuite { unitTest *tests; } unitTestSuite[] = { {"test_bitops.c", __test_bitops_c}, + {"test_crc16.c", __test_crc16_c}, {"test_crc64.c", __test_crc64_c}, {"test_crc64combine.c", __test_crc64combine_c}, {"test_dict.c", __test_dict_c},