[llvm-dev] Rather poor code optimisation of current clang/LLVM targeting Intel x86 (both -64 and -32)
Sanjay Patel via llvm-dev
llvm-dev at lists.llvm.org
Thu Nov 8 11:08:08 PST 2018
IIUC, you want to use x86-specific bit-hacks (sbb masking) in cases like
this:
unsigned int foo(unsigned int crc) {
if (crc & 0x80000000)
crc <<= 1, crc ^= 0xEDB88320;
else
crc <<= 1;
return crc;
}
Which is this in LLVM IR:
define i32 @foo(i32 %x) {
%t2 = icmp slt i32 %x, 0
%t3 = shl i32 %x, 1
%t4 = xor i32 %t3, -306674912
%t5 = select i1 %t2, i32 %t4, i32 %t3
ret i32 %t5
}
Please a file a bug report for the x86 backend (including performance
numbers if you have that data).
On Wed, Nov 7, 2018 at 5:24 PM Stefan Kanthak via llvm-dev <
llvm-dev at lists.llvm.org> wrote:
> Hi @ll,
>
> while clang/LLVM recognizes common bit-twiddling idioms/expressions
> like
>
> unsigned int rotate(unsigned int x, unsigned int n)
> {
> return (x << n) | (x >> (32 - n));
> }
>
> and typically generates "rotate" machine instructions for this
> expression, it fails to recognize other also common bit-twiddling
> idioms/expressions.
>
> The standard IEEE CRC-32 for "big endian" alias "network" byte order
> (see <https://tools.ietf.org/html/rfc1952#section-8> for example):
>
> unsigned int crc32be(unsigned char const *octets, unsigned int count)
> {
> unsigned int crc = 0L;
> unsigned int i;
>
> while (count--) {
> crc ^= *octets++ << 24;
> for (i = 8; i > 0; i--)
> if (crc & 0x80000000L) // the code generated
> crc <<= 1, crc ^= 0xEDB88320L; // for Intel x86 from
> else // these 4 lines is
> crc <<= 1; // rather poor!
> }
> return crc;
> }
>
> The same function for "little endian" byte order, using the "inverse"
> or "mirrored" polynom:
>
> unsigned int crc32le(unsigned char const *octets, unsigned int count)
> {
> unsigned int crc = ~0L;
> unsigned int i;
>
> while (count--) {
> crc ^= *octets++;
> for (i = 8; i > 0; i--)
> if (crc & 1L) // the code generated
> crc >>= 1, crc ^= 0x04C11DB7L; // for Intel x86 from
> else // these 4 lines is
> crc >>= 1; // rather poor!
> }
> return ~crc;
> }
>
> See <https://godbolt.org/z/eYJeWt> (-O1) and <https://godbolt.org/z/zeExHm>
> (-O2)
>
> crc32be: # @crc32be
> xor eax, eax
> test esi, esi
> jne .LBB0_2
> jmp .LBB0_5
> .LBB0_4: # in Loop: Header=BB0_2 Depth=1
> add rdi, 1
> test esi, esi
> je .LBB0_5
> .LBB0_2: # =>This Loop Header: Depth=1
> add esi, -1
> movzx edx, byte ptr [rdi]
> shl edx, 24
> xor edx, eax
> mov ecx, -8
> mov eax, edx
> .LBB0_3: # Parent Loop BB0_2 Depth=1 | # 4 instructions instead of 6, r8
> not clobbered!
> lea r8d, [rax + rax] | add eax, eax
> mov edx, r8d | # CF is set from the MSB of EAX
> xor edx, -306674912 | sbb edx, edx
> test eax, eax | # EDX is 0xFFFFFFFF if CF set,
> else 0
> mov eax, edx | and edx, -306674912
> cmovns eax, r8d | xor eax, edx
> add ecx, 1
> jne .LBB0_3
> jmp .LBB0_4
> .LBB0_5:
> ret
> crc32le: # @crc32le
> test esi, esi
> je .LBB1_1
> mov eax, -1
> .LBB1_4: # =>This Loop Header: Depth=1
> add esi, -1
> movzx ecx, byte ptr [rdi]
> xor eax, ecx
> mov r8d, -8
> .LBB1_5: # Parent Loop BB1_4 Depth=1 | # 4 instructions instead of 7, and
> mov edx, eax | # neither r8 nor rcx clobbered!
> shr edx | shr eax, 1
> mov ecx, edx | # CF is set from the LSB of EAX
> xor ecx, 79764919 | sbb edx, edx
> test al, 1 | # EDX is 0xFFFFFFFF if CF set,
> else 0
> mov eax, ecx | and edx, 79764919
> cmove eax, edx | xor eax, edx
> add r8d, 1
> jne .LBB1_5
> add rdi, 1
> test esi, esi
> jne .LBB1_4
> not eax
> ret
> .LBB1_1:
> xor eax, eax
> ret
>
> JFTR: with -O2, the inner loop gets unrolled, using the same non-optimal
> code sequence with 6 and 7 instructions; this accounts for a total
> of 16 and 24 superfluous instructions respectively.
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20181108/5f1c5c30/attachment.html>
More information about the llvm-dev
mailing list