diff --git a/include/crc32c/crc32c.h b/include/crc32c/crc32c.h index e8a7817..6e1e8bd 100644 --- a/include/crc32c/crc32c.h +++ b/include/crc32c/crc32c.h @@ -59,6 +59,13 @@ inline uint32_t Crc32c(const char* data, size_t count) { return Extend(0, reinterpret_cast(data), count); } +#if defined(__cpp_lib_byte) +// Computes the CRC32C of "count" bytes in the buffer pointed by "data". +inline uint32_t Crc32c(const std::byte* data, size_t count) { + return Extend(0, reinterpret_cast(data), count); +} +#endif // defined(__cpp_lib_byte) + // Computes the CRC32C of the string's content. inline uint32_t Crc32c(const std::string& string) { return Crc32c(reinterpret_cast(string.data()), diff --git a/src/crc32c_arm64.cc b/src/crc32c_arm64.cc index 711616c..154dceb 100644 --- a/src/crc32c_arm64.cc +++ b/src/crc32c_arm64.cc @@ -15,6 +15,7 @@ #include #include "./crc32c_internal.h" +#include "./crc32c_read_le.h" #ifdef CRC32C_HAVE_CONFIG_H #include "crc32c/crc32c_config.h" #endif @@ -28,16 +29,12 @@ #define SEGMENTBYTES 256 // compute 8bytes for each segment parallelly -#define CRC32C32BYTES(P, IND) \ - do { \ - std::memcpy(&d64, (P) + SEGMENTBYTES * 1 + (IND) * 8, sizeof(d64)); \ - crc1 = __crc32cd(crc1, d64); \ - std::memcpy(&d64, (P) + SEGMENTBYTES * 2 + (IND) * 8, sizeof(d64)); \ - crc2 = __crc32cd(crc2, d64); \ - std::memcpy(&d64, (P) + SEGMENTBYTES * 3 + (IND) * 8, sizeof(d64)); \ - crc3 = __crc32cd(crc3, d64); \ - std::memcpy(&d64, (P) + SEGMENTBYTES * 0 + (IND) * 8, sizeof(d64)); \ - crc0 = __crc32cd(crc0, d64); \ +#define CRC32C32BYTES(P, IND) \ + do { \ + crc1 = __crc32cd(crc1, ReadUint64LE((P) + SEGMENTBYTES * 1 + (IND)*8)); \ + crc2 = __crc32cd(crc2, ReadUint64LE((P) + SEGMENTBYTES * 2 + (IND)*8)); \ + crc3 = __crc32cd(crc3, ReadUint64LE((P) + SEGMENTBYTES * 3 + (IND)*8)); \ + crc0 = __crc32cd(crc0, ReadUint64LE((P) + SEGMENTBYTES * 0 + (IND)*8)); \ } while (0); // compute 8*8 bytes for each segment parallelly @@ -69,9 +66,6 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *data, size_t size) { int64_t length = size; uint32_t crc0, crc1, crc2, crc3; uint64_t t0, t1, t2; - uint16_t d16; - uint32_t d32; - uint64_t d64; // k0=CRC(x^(3*SEGMENTBYTES*8)), k1=CRC(x^(2*SEGMENTBYTES*8)), // k2=CRC(x^(SEGMENTBYTES*8)) @@ -92,8 +86,7 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *data, size_t size) { t2 = (uint64_t)vmull_p64(crc2, k2); t1 = (uint64_t)vmull_p64(crc1, k1); t0 = (uint64_t)vmull_p64(crc0, k0); - std::memcpy(&d64, data, sizeof(d64)); - crc = __crc32cd(crc3, d64); + crc = __crc32cd(crc3, ReadUint64LE(data)); data += sizeof(uint64_t); crc ^= __crc32cd(0, t2); crc ^= __crc32cd(0, t1); @@ -103,21 +96,18 @@ uint32_t ExtendArm64(uint32_t crc, const uint8_t *data, size_t size) { } while (length >= 8) { - std::memcpy(&d64, data, sizeof(d64)); - crc = __crc32cd(crc, d64); + crc = __crc32cd(crc, ReadUint64LE(data)); data += 8; length -= 8; } if (length & 4) { - std::memcpy(&d32, data, sizeof(d32)); - crc = __crc32cw(crc, d32); + crc = __crc32cw(crc, ReadUint32LE(data)); data += 4; } if (length & 2) { - std::memcpy(&d16, data, sizeof(d16)); - crc = __crc32ch(crc, d16); + crc = __crc32ch(crc, ReadUint16LE(data)); data += 2; } diff --git a/src/crc32c_read_le.h b/src/crc32c_read_le.h index 673a2a0..36567b5 100644 --- a/src/crc32c_read_le.h +++ b/src/crc32c_read_le.h @@ -14,13 +14,23 @@ namespace crc32c { -// Reads a little-endian 32-bit integer from a 32-bit-aligned buffer. +// Reads a little-endian 16-bit integer from bytes, not necessarily aligned. +inline uint16_t ReadUint16LE(const uint8_t* buffer) { +#if BYTE_ORDER_BIG_ENDIAN + return ((uint16_t{buffer[0]}) | (uint16_t{buffer[1]} << 8)); +#else // !BYTE_ORDER_BIG_ENDIAN + uint16_t result; + // This should be optimized to a single instruction. + std::memcpy(&result, buffer, sizeof(result)); + return result; +#endif // BYTE_ORDER_BIG_ENDIAN +} + +// Reads a little-endian 32-bit integer from bytes, not necessarily aligned. inline uint32_t ReadUint32LE(const uint8_t* buffer) { #if BYTE_ORDER_BIG_ENDIAN - return ((static_cast(static_cast(buffer[0]))) | - (static_cast(static_cast(buffer[1])) << 8) | - (static_cast(static_cast(buffer[2])) << 16) | - (static_cast(static_cast(buffer[3])) << 24)); + return ((uint32_t{buffer[0]}) | (uint32_t{buffer[1]} << 8) | + (uint32_t{buffer[2]} << 16) | (uint32_t{buffer[3]} << 24)); #else // !BYTE_ORDER_BIG_ENDIAN uint32_t result; // This should be optimized to a single instruction. @@ -29,17 +39,13 @@ inline uint32_t ReadUint32LE(const uint8_t* buffer) { #endif // BYTE_ORDER_BIG_ENDIAN } -// Reads a little-endian 64-bit integer from a 64-bit-aligned buffer. +// Reads a little-endian 64-bit integer from bytes, not necessarily aligned. inline uint64_t ReadUint64LE(const uint8_t* buffer) { #if BYTE_ORDER_BIG_ENDIAN - return ((static_cast(static_cast(buffer[0]))) | - (static_cast(static_cast(buffer[1])) << 8) | - (static_cast(static_cast(buffer[2])) << 16) | - (static_cast(static_cast(buffer[3])) << 24) | - (static_cast(static_cast(buffer[4])) << 32) | - (static_cast(static_cast(buffer[5])) << 40) | - (static_cast(static_cast(buffer[6])) << 48) | - (static_cast(static_cast(buffer[7])) << 56)); + return ((uint64_t{buffer[0]}) | (uint64_t{buffer[1]} << 8) | + (uint64_t{buffer[2]} << 16) | (uint64_t{buffer[3]} << 24) | + (uint64_t{buffer[4]} << 32) | (uint64_t{buffer[5]} << 40) | + (uint64_t{buffer[6]} << 48) | (uint64_t{buffer[7]} << 56)); #else // !BYTE_ORDER_BIG_ENDIAN uint64_t result; // This should be optimized to a single instruction. diff --git a/src/crc32c_read_le_unittest.cc b/src/crc32c_read_le_unittest.cc index 2a30302..8fbfc41 100644 --- a/src/crc32c_read_le_unittest.cc +++ b/src/crc32c_read_le_unittest.cc @@ -13,20 +13,25 @@ namespace crc32c { +TEST(Crc32CReadLETest, ReadUint16LE) { + // little-endian 0x1234 + uint8_t bytes[] = {0x34, 0x12}; + + EXPECT_EQ(uint16_t{0x1234}, ReadUint16LE(bytes)); +} + TEST(Crc32CReadLETest, ReadUint32LE) { // little-endian 0x12345678 - alignas(4) uint8_t bytes[] = {0x78, 0x56, 0x34, 0x12}; + uint8_t bytes[] = {0x78, 0x56, 0x34, 0x12}; - ASSERT_EQ(RoundUp<4>(bytes), bytes) << "Stack array is not aligned"; - EXPECT_EQ(static_cast(0x12345678), ReadUint32LE(bytes)); + EXPECT_EQ(uint32_t{0x12345678}, ReadUint32LE(bytes)); } TEST(Crc32CReadLETest, ReadUint64LE) { // little-endian 0x123456789ABCDEF0 - alignas(8) uint8_t bytes[] = {0xF0, 0xDE, 0xBC, 0x9A, 0x78, 0x56, 0x34, 0x12}; + uint8_t bytes[] = {0xF0, 0xDE, 0xBC, 0x9A, 0x78, 0x56, 0x34, 0x12}; - ASSERT_EQ(RoundUp<8>(bytes), bytes) << "Stack array is not aligned"; - EXPECT_EQ(static_cast(0x123456789ABCDEF0), ReadUint64LE(bytes)); + EXPECT_EQ(uint64_t{0x123456789ABCDEF0}, ReadUint64LE(bytes)); } } // namespace crc32c