[llvm-dev] Were's the optimizer gone?
Stefan Kanthak via llvm-dev
llvm-dev at lists.llvm.org
Mon Nov 26 10:26:08 PST 2018
--- byteswap.c ---
// inline
unsigned short swap16(unsigned short argument)
{
return (argument >> 8) | (argument << 8);
}
// inline
unsigned int swap32(unsigned int argument)
{
return (unsigned int) swap16((unsigned short) argument) << 16
| (unsigned int) swap16((unsigned short) (argument >> 16));
}
unsigned long swap64(unsigned long argument)
{
return (unsigned long) swap32((unsigned int) argument) << 32
| (unsigned long) swap32((unsigned int) (argument >> 32));
}
--- EOF ---
Compiled with "-O3" this generates the following UNOPTIMISED code for
the swap32() and swap64() functions (see <https://godbolt.org/z/DwnG-X>):
swap16: # @swap16
rol di, 8
mov eax, edi
ret
swap32: # @swap32
mov ecx, edi
rol cx, 8
shl ecx, 16
shr edi, 16
rol di, 8
movzx eax, di
or eax, ecx
ret
swap64: # @swap64
mov eax, edi
rol ax, 8
mov ecx, edi
shr ecx, 16
rol cx, 8
shl eax, 16
movzx ecx, cx
or ecx, eax
shl rcx, 32
mov rax, rdi
shr rax, 32
rol ax, 8
movzx edx, ax
shr rdi, 48
rol di, 8
shl rdx, 16
movzx eax, di
or rax, rdx
or rax, rcx
ret
Now look what GCC 8.2 generates (see <https://godbolt.org/z/2_XhQN>):
swap16:
mov eax, edi
rol ax, 8
ret
swap32:
mov edx, edi
shr edi, 16
rol dx, 8
rol di, 8
sal edx, 16
movzx eax, di
or eax, edx
ret
swap64:
mov rax, rdi
bswap rax
ret
While GCC too fails to optimise swap32() to a BSWAP, it but does so for
swap64()!
regards
Stefan Kanthak
More information about the llvm-dev
mailing list