diff options
author | Kostya Kortchinsky <kostyak@google.com> | 2017-05-09 15:12:38 +0000 |
---|---|---|
committer | Kostya Kortchinsky <kostyak@google.com> | 2017-05-09 15:12:38 +0000 |
commit | b5a980955aabf4d683d37ebbc9d48a724428786b (patch) | |
tree | b82d37f759f534469fc7bbbb249ef3d254cd37c1 /lib/scudo | |
parent | e12d24d3e1b367c601b6b9b6f45ede69ff03deee (diff) |
[scudo] CRC32 optimizations
Summary:
This change optimizes several aspects of the checksum used for chunk headers.
First, there is no point in checking the weak symbol `computeHardwareCRC32`
everytime, it will either be there or not when we start, so check it once
during initialization and set the checksum type accordingly.
Then, the loading of `HashAlgorithm` for SSE versions (and ARM equivalent) was
not optimized out, while not necessary. So I reshuffled that part of the code,
which duplicates a tiny bit of code, but ends up in a much cleaner assembly
(and faster as we avoid an extraneous load and some calls).
The following code is the checksum at the end of `scudoMalloc` for x86_64 with
full SSE 4.2, before:
```
mov rax, 0FFFFFFFFFFFFFFh
shl r10, 38h
mov edi, dword ptr cs:_ZN7__scudoL6CookieE ; __scudo::Cookie
and r14, rax
lea rsi, [r13-10h]
movzx eax, cs:_ZN7__scudoL13HashAlgorithmE ; __scudo::HashAlgorithm
or r14, r10
mov rbx, r14
xor bx, bx
call _ZN7__scudo20computeHardwareCRC32Ejm ; __scudo::computeHardwareCRC32(uint,ulong)
mov rsi, rbx
mov edi, eax
call _ZN7__scudo20computeHardwareCRC32Ejm ; __scudo::computeHardwareCRC32(uint,ulong)
mov r14w, ax
mov rax, r13
mov [r13-10h], r14
```
After:
```
mov rax, cs:_ZN7__scudoL6CookieE ; __scudo::Cookie
lea rcx, [rbx-10h]
mov rdx, 0FFFFFFFFFFFFFFh
and r14, rdx
shl r9, 38h
or r14, r9
crc32 eax, rcx
mov rdx, r14
xor dx, dx
mov eax, eax
crc32 eax, rdx
mov r14w, ax
mov rax, rbx
mov [rbx-10h], r14
```
Reviewers: dvyukov, alekseyshl, kcc
Reviewed By: alekseyshl
Subscribers: aemerson, rengolin, llvm-commits
Differential Revision: https://reviews.llvm.org/D32971
git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@302538 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'lib/scudo')
-rw-r--r-- | lib/scudo/scudo_allocator.cpp | 48 | ||||
-rw-r--r-- | lib/scudo/scudo_crc32.cpp | 19 | ||||
-rw-r--r-- | lib/scudo/scudo_crc32.h | 101 | ||||
-rw-r--r-- | lib/scudo/scudo_utils.h | 59 |
4 files changed, 129 insertions, 98 deletions
diff --git a/lib/scudo/scudo_allocator.cpp b/lib/scudo/scudo_allocator.cpp index 5420fc964..2b7f099df 100644 --- a/lib/scudo/scudo_allocator.cpp +++ b/lib/scudo/scudo_allocator.cpp @@ -15,6 +15,7 @@ //===----------------------------------------------------------------------===// #include "scudo_allocator.h" +#include "scudo_crc32.h" #include "scudo_tls.h" #include "scudo_utils.h" @@ -34,21 +35,28 @@ static uptr Cookie; // at compilation or at runtime. static atomic_uint8_t HashAlgorithm = { CRC32Software }; -SANITIZER_WEAK_ATTRIBUTE u32 computeHardwareCRC32(u32 Crc, uptr Data); - -INLINE u32 computeCRC32(u32 Crc, uptr Data, u8 HashType) { - // If SSE4.2 is defined here, it was enabled everywhere, as opposed to only - // for scudo_crc32.cpp. This means that other SSE instructions were likely - // emitted at other places, and as a result there is no reason to not use - // the hardware version of the CRC32. +INLINE u32 computeCRC32(uptr Crc, uptr Value, uptr *Array, uptr ArraySize) { + // If the hardware CRC32 feature is defined here, it was enabled everywhere, + // as opposed to only for scudo_crc32.cpp. This means that other hardware + // specific instructions were likely emitted at other places, and as a + // result there is no reason to not use it here. #if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) - return computeHardwareCRC32(Crc, Data); + Crc = CRC32_INTRINSIC(Crc, Value); + for (uptr i = 0; i < ArraySize; i++) + Crc = CRC32_INTRINSIC(Crc, Array[i]); + return Crc; #else - if (computeHardwareCRC32 && HashType == CRC32Hardware) - return computeHardwareCRC32(Crc, Data); - else - return computeSoftwareCRC32(Crc, Data); -#endif // defined(__SSE4_2__) + if (atomic_load_relaxed(&HashAlgorithm) == CRC32Hardware) { + Crc = computeHardwareCRC32(Crc, Value); + for (uptr i = 0; i < ArraySize; i++) + Crc = computeHardwareCRC32(Crc, Array[i]); + return Crc; + } + Crc = computeSoftwareCRC32(Crc, Value); + for (uptr i = 0; i < ArraySize; i++) + Crc = computeSoftwareCRC32(Crc, Array[i]); + return Crc; +#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) } static ScudoBackendAllocator &getBackendAllocator(); @@ -78,10 +86,8 @@ struct ScudoChunk : UnpackedHeader { ZeroChecksumHeader.Checksum = 0; uptr HeaderHolder[sizeof(UnpackedHeader) / sizeof(uptr)]; memcpy(&HeaderHolder, &ZeroChecksumHeader, sizeof(HeaderHolder)); - u8 HashType = atomic_load_relaxed(&HashAlgorithm); - u32 Crc = computeCRC32(Cookie, reinterpret_cast<uptr>(this), HashType); - for (uptr i = 0; i < ARRAY_SIZE(HeaderHolder); i++) - Crc = computeCRC32(Crc, HeaderHolder[i], HashType); + u32 Crc = computeCRC32(Cookie, reinterpret_cast<uptr>(this), HeaderHolder, + ARRAY_SIZE(HeaderHolder)); return static_cast<u16>(Crc); } @@ -195,10 +201,10 @@ void initScudo() { CHECK(!ScudoInitIsRunning && "Scudo init calls itself!"); ScudoInitIsRunning = true; - // Check is SSE4.2 is supported, if so, opt for the CRC32 hardware version. - if (testCPUFeature(CRC32CPUFeature)) { + // Check if hardware CRC32 is supported in the binary and by the platform, if + // so, opt for the CRC32 hardware version of the checksum. + if (computeHardwareCRC32 && testCPUFeature(CRC32CPUFeature)) atomic_store_relaxed(&HashAlgorithm, CRC32Hardware); - } initFlags(); @@ -228,7 +234,7 @@ struct QuarantineCallback { getBackendAllocator().Deallocate(Cache_, Ptr); } - /// Internal quarantine allocation and deallocation functions. + // Internal quarantine allocation and deallocation functions. void *Allocate(uptr Size) { // TODO(kostyak): figure out the best way to protect the batches. return getBackendAllocator().Allocate(Cache_, Size, MinAlignment); diff --git a/lib/scudo/scudo_crc32.cpp b/lib/scudo/scudo_crc32.cpp index 56be22f4e..a267dc4e3 100644 --- a/lib/scudo/scudo_crc32.cpp +++ b/lib/scudo/scudo_crc32.cpp @@ -12,24 +12,7 @@ /// //===----------------------------------------------------------------------===// -#include "sanitizer_common/sanitizer_internal_defs.h" - -// Hardware CRC32 is supported at compilation via the following: -// - for i386 & x86_64: -msse4.2 -// - for ARM & AArch64: -march=armv8-a+crc or -mcrc -// An additional check must be performed at runtime as well to make sure the -// emitted instructions are valid on the target host. - -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) -# ifdef __SSE4_2__ -# include <smmintrin.h> -# define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64) -# endif -# ifdef __ARM_FEATURE_CRC32 -# include <arm_acle.h> -# define CRC32_INTRINSIC FIRST_32_SECOND_64(__crc32cw, __crc32cd) -# endif -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#include "scudo_crc32.h" namespace __scudo { diff --git a/lib/scudo/scudo_crc32.h b/lib/scudo/scudo_crc32.h new file mode 100644 index 000000000..5ffcc6265 --- /dev/null +++ b/lib/scudo/scudo_crc32.h @@ -0,0 +1,101 @@ +//===-- scudo_crc32.h -------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// Scudo chunk header checksum related definitions. +/// +//===----------------------------------------------------------------------===// + +#ifndef SCUDO_CRC32_H_ +#define SCUDO_CRC32_H_ + +#include "sanitizer_common/sanitizer_internal_defs.h" + +// Hardware CRC32 is supported at compilation via the following: +// - for i386 & x86_64: -msse4.2 +// - for ARM & AArch64: -march=armv8-a+crc or -mcrc +// An additional check must be performed at runtime as well to make sure the +// emitted instructions are valid on the target host. + +#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +# ifdef __SSE4_2__ +# include <smmintrin.h> +# define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64) +# endif +# ifdef __ARM_FEATURE_CRC32 +# include <arm_acle.h> +# define CRC32_INTRINSIC FIRST_32_SECOND_64(__crc32cw, __crc32cd) +# endif +#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) + +namespace __scudo { + +enum : u8 { + CRC32Software = 0, + CRC32Hardware = 1, +}; + +const static u32 CRC32Table[] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, + 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, + 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, + 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, + 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, + 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, + 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, + 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, + 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, + 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, + 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, + 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, + 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, + 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, + 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, + 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, + 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, + 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, + 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, + 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, + 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, + 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + +INLINE u32 computeSoftwareCRC32(u32 Crc, uptr Data) { + for (uptr i = 0; i < sizeof(Data); i++) { + Crc = CRC32Table[(Crc ^ Data) & 0xff] ^ (Crc >> 8); + Data >>= 8; + } + return Crc; +} + +SANITIZER_WEAK_ATTRIBUTE u32 computeHardwareCRC32(u32 Crc, uptr Data); + +} // namespace __scudo + +#endif // SCUDO_CRC32_H_ diff --git a/lib/scudo/scudo_utils.h b/lib/scudo/scudo_utils.h index 484b0c859..7198476f4 100644 --- a/lib/scudo/scudo_utils.h +++ b/lib/scudo/scudo_utils.h @@ -53,65 +53,6 @@ struct Xorshift128Plus { u64 State[2]; }; -enum : u8 { - CRC32Software = 0, - CRC32Hardware = 1, -}; - -const static u32 CRC32Table[] = { - 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, - 0xe963a535, 0x9e6495a3, 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, - 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, 0x1db71064, 0x6ab020f2, - 0xf3b97148, 0x84be41de, 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, - 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, 0x14015c4f, 0x63066cd9, - 0xfa0f3d63, 0x8d080df5, 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, - 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, 0x35b5a8fa, 0x42b2986c, - 0xdbbbc9d6, 0xacbcf940, 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, - 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, 0x21b4f4b5, 0x56b3c423, - 0xcfba9599, 0xb8bda50f, 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, - 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, 0x76dc4190, 0x01db7106, - 0x98d220bc, 0xefd5102a, 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, - 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, 0x7f6a0dbb, 0x086d3d2d, - 0x91646c97, 0xe6635c01, 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, - 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, 0x65b0d9c6, 0x12b7e950, - 0x8bbeb8ea, 0xfcb9887c, 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, - 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, 0x4adfa541, 0x3dd895d7, - 0xa4d1c46d, 0xd3d6f4fb, 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, - 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, 0x5005713c, 0x270241aa, - 0xbe0b1010, 0xc90c2086, 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, - 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, 0x59b33d17, 0x2eb40d81, - 0xb7bd5c3b, 0xc0ba6cad, 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, - 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, 0xe3630b12, 0x94643b84, - 0x0d6d6a3e, 0x7a6a5aa8, 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, - 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, 0xf762575d, 0x806567cb, - 0x196c3671, 0x6e6b06e7, 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, - 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, 0xd6d6a3e8, 0xa1d1937e, - 0x38d8c2c4, 0x4fdff252, 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, - 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, 0xdf60efc3, 0xa867df55, - 0x316e8eef, 0x4669be79, 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, - 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, 0xc5ba3bbe, 0xb2bd0b28, - 0x2bb45a92, 0x5cb36a04, 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, - 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, 0x9c0906a9, 0xeb0e363f, - 0x72076785, 0x05005713, 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, - 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, 0x86d3d2d4, 0xf1d4e242, - 0x68ddb3f8, 0x1fda836e, 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, - 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, 0x8f659eff, 0xf862ae69, - 0x616bffd3, 0x166ccf45, 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, - 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, 0xaed16a4a, 0xd9d65adc, - 0x40df0b66, 0x37d83bf0, 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, - 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, 0xbad03605, 0xcdd70693, - 0x54de5729, 0x23d967bf, 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, - 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d -}; - -INLINE u32 computeSoftwareCRC32(u32 Crc, uptr Data) { - for (uptr i = 0; i < sizeof(Data); i++) { - Crc = CRC32Table[(Crc ^ Data) & 0xff] ^ (Crc >> 8); - Data >>= 8; - } - return Crc; -} - } // namespace __scudo #endif // SCUDO_UTILS_H_ |