[cfe-dev] Even abs() comes with a performance penalty

Stefan Kanthak via cfe-dev cfe-dev at lists.llvm.org
Sun Sep 6 05:08:47 PDT 2020


--- bugs-bunny.c ---
// Copyleft © 2014-2020, Stefan Kanthak <stefan.kanthak at nexgo.de>

#ifdef __amd64__
__int128_t __absti2(__int128_t argument) {
    return argument < 0 ? -argument : argument;
}
#else
long long __absdi2(long long argument) {
#ifdef BUNNY
    return __builtin_llabs(argument);
#else
    return argument < 0 ? -argument : argument;
#endif // BUNNY
}

long __abssi2(long argument) {
#ifdef BUNNY
    return __builtin_labs(argument);
#else
    return argument < 0 ? -argument : argument;
#endif // BUNNY
}
#endif // __amd64__
--- EOF ---

Run clang -c -o- -O3 -S -target amd64-pc-linux bugs-bunny.c

Left: inperformant original code # right: proper code,
                                 #        faster and 3 bytes shorter

__absti2:      # @__absti2
# %bb.0:                         # .intel_syntax noprefix
      xorl     %edx, %edx        #        mov    rax, rsi
      movq     %rdi, %rax        #        cqo
      negq     %rax              #        mov    rax, rdx
      sbbq     %rsi, %rdx        #        add    rdi, rdx
      testq    %rsi, %rsi        #        adc    rsi, rdx
      cmovnsq  %rdi, %rax        #        xor    rax, rdi
      cmovnsq  %rsi, %rdx        #        xor    rdx, rsi
      retq                       #        ret

CMOVcc introduces a data dependency here, WITHOUT necessity!


Run clang -c -o- -O3 -S -target i386-pc-linux bugs-bunny.c

Left: inperformant original code # right: proper code, runs even on real
                                 #        i386, not just PentiumPro+

___abssi2:    # @__abssi2
# %bb.0:                         # .intel_syntax noprefix
      movl    4(%esp), %ecx      #        mov    eax, [esp+4]
      movl    %ecx, %eax         #        cdq
      negl    %eax               #        add    eax, edx
      cmovll  %ecx, %eax         #        xor    eax, edx
      retl                       #        ret


Writing shorter code for __absdi2() for i386 is left as an
exercise to the reader.



More information about the cfe-dev mailing list