[llvm-dev] Were's the optimizer gone?

Stefan Kanthak via llvm-dev llvm-dev at lists.llvm.org
Mon Nov 26 10:26:08 PST 2018


--- byteswap.c ---
// inline
unsigned short swap16(unsigned short argument)
{
    return (argument >> 8) | (argument << 8);
}

// inline
unsigned int swap32(unsigned int argument)
{
    return (unsigned int) swap16((unsigned short) argument) << 16
         | (unsigned int) swap16((unsigned short) (argument >> 16));
}

unsigned long swap64(unsigned long argument) 
{
    return (unsigned long) swap32((unsigned int) argument) << 32
         | (unsigned long) swap32((unsigned int) (argument >> 32));
}
--- EOF ---

Compiled with "-O3" this generates the following UNOPTIMISED code for
the swap32() and swap64() functions (see <https://godbolt.org/z/DwnG-X>):

swap16: # @swap16
    rol   di, 8
    mov   eax, edi
    ret
swap32: # @swap32
    mov   ecx, edi
    rol   cx, 8
    shl   ecx, 16
    shr   edi, 16
    rol   di, 8
    movzx eax, di
    or    eax, ecx
    ret
swap64: # @swap64
    mov   eax, edi
    rol   ax, 8
    mov   ecx, edi
    shr   ecx, 16
    rol   cx, 8
    shl   eax, 16
    movzx ecx, cx
    or    ecx, eax
    shl   rcx, 32
    mov   rax, rdi
    shr   rax, 32
    rol   ax, 8
    movzx edx, ax
    shr   rdi, 48
    rol   di, 8
    shl   rdx, 16
    movzx eax, di
    or    rax, rdx
    or    rax, rcx
    ret

Now look what GCC 8.2 generates (see <https://godbolt.org/z/2_XhQN>):

swap16:
    mov   eax, edi
    rol   ax, 8
    ret
swap32:
    mov   edx, edi
    shr   edi, 16
    rol   dx, 8
    rol   di, 8
    sal   edx, 16
    movzx eax, di
    or    eax, edx
    ret
swap64:
    mov   rax, rdi
    bswap rax
    ret


While GCC too fails to optimise swap32() to a BSWAP, it but does so for
swap64()!

regards
Stefan Kanthak


More information about the llvm-dev mailing list