[PATCH] D32971: [scudo] CRC32 optimizations
Kostya Kortchinsky via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Mon May 8 14:10:38 PDT 2017
cryptoad updated this revision to Diff 98207.
cryptoad added a comment.
This new solution should fit both Dmitry's and Aleksey's suggestions.
Wrap up the checksum logic in a function that takes as parameters all the
information needed to compute the checksum, and returns the checksum using
the fastest way available.
Regarding the assembly, the full SSE one looks like (no extra `HashAlgorithm`
load and inlined `crc32` calls):
mov rax, 0FFFFFFFFFFFFFFh
lea rcx, [r13-10h]
shl r10, 38h
and r14, rax
mov rax, cs:_ZN7__scudoL6CookieE ; __scudo::Cookie
or r14, r10
mov rdx, r14
crc32 eax, rcx
xor dx, dx
crc32 eax, rdx
mov r14w, ax
mov rax, r13
mov [r13-10h], r14
And the partial SSE one (only one `HashAlgorithm` load):
mov rax, cs:_ZN7__scudoL6CookieE ; __scudo::Cookie
mov dl, cs:_ZN7__scudoL13HashAlgorithmE ; unsigned __int64
cmp dl, 1
jnz short loc_8859
and rbx, 0FFFFFFFFFFFF0000h
mov edi, eax ; this
mov rsi, rcx ; unsigned int
call _ZN7__scudo20computeHardwareCRC32Ejm ; __scudo::computeHardwareCRC32(uint,ulong)
mov edi, eax ; this
mov rsi, rbx ; unsigned int
call _ZN7__scudo20computeHardwareCRC32Ejm ; __scudo::computeHardwareCRC32(uint,ulong)
jmp loc_8974
; ---------------------------------------------------------------------------
loc_8859: ; CODE XREF: __scudo::ScudoChunk::computeChecksum(__scudo::UnpackedHeader *)+17j
... software CRC32 ...
So this seems pretty good.
https://reviews.llvm.org/D32971
Files:
lib/scudo/scudo_allocator.cpp
lib/scudo/scudo_crc32.cpp
lib/scudo/scudo_crc32.h
lib/scudo/scudo_utils.h
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D32971.98207.patch
Type: text/x-patch
Size: 13635 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170508/4733df43/attachment.bin>
More information about the llvm-commits
mailing list