[cfe-dev] "Optimized implementations"?

Sun Sep 6 12:45:51 PDT 2020

Clang never generates calls to ___paritysi2, ___paritydi2, ___cmpdi2, or
___ucmpdi2 on X86 so its not clear the performance of this matters at all.

~Craig

On Sun, Sep 6, 2020 at 12:31 PM Stefan Kanthak via cfe-dev <
cfe-dev at lists.llvm.org> wrote:

> <https://compiler-rt.llvm.org/index.html> boasts:
>
> | The builtins library provides optimized implementations of this
> | and other low-level routines, either in target-independent C form,
> | or as a heavily-optimized assembly.
>
> Really?
>
> Left: inperformant code shipped in    # Right: slightly improved code,
>       clang_rt.builtins-*             #        which the optimiser REALLY
>                                       #        should have generated
>
> ___cmpdi2:
>         mov     ecx, [esp+16]         #       mov     ecx, [esp+16]
>         xor     eax, eax              #       xor     eax, eax
>         cmp     [esp+8], ecx          #       cmp     ecx, [esp+8]
>         jl      @f                    #       jg      @f
>         mov     eax, 2                #       mov     eax, 2
>         jg      @f                    #       jl      @f
>         mov     ecx, [esp+4]          #
>         mov     edx, [esp+12]         #       mov     ecx, [esp+12]
>         mov     eax, 0                #       xor     eax, eax
>         cmp     ecx, edx              #       cmp     ecx, [esp+4]
>         jb      @f                    #       ja      @f
>         cmp     edx, ecx              #
>         mov     eax, 1                #
>         adc     eax, 0                #       adc     eax, 1
> @@:                                   # @@:
>         ret                           #       ret
>
>                                       # 3 instructions less, 10 bytes saved
>
> ___ucmpdi2:
>         mov     ecx, [esp+16]         #       mov     ecx, [esp+16]
>         xor     eax, eax              #       xor     eax, eax
>         cmp     [esp+8], ecx          #       cmp     ecx, [esp+8]
>         jb      @f                    #       ja      @f
>         mov     eax, 2                #       mov     eax, 2
>         ja      @f                    #       jb      @f
>         mov     ecx, [esp+4]          #
>         mov     edx, [esp+12]         #       mov     ecx, [esp+12]
>         mov     eax, 0                #       xor     eax, eax
>         cmp     ecx, edx              #       cmp     ecx, [esp+4]
>         jb      @f                    #       ja      @f
>         cmp     edx, ecx              #
>         mov     eax, 1                #
>         adc     eax, 0                #       adc     eax, 1
> @@:                                   # @@:
>         ret                           #       ret
>
>                                       # 3 instructions less, 10 bytes saved
>
>
> Now properly written code, of course branch-free, faster and shorter:
>
> # Copyright (C) 2004-2020, Stefan Kanthak <stefan.kanthak at nexgo.de>
>
> ___cmpdi2:
>         mov     ecx, [esp+4]
>         mov     edx, [esp+12]
>         cmp     ecx, edx
>         mov     eax, [esp+8]
>         sbb     eax, [esp+16]
>         setl    ah
>         cmp     edx, ecx
>         mov     edx, [esp+16]
>         sbb     edx, [esp+8]
>         setl    al
>         sub     al, ah
>         movsx   eax, al
>         inc     eax
>         ret
>
> ___ucmpdi2:
>         mov     ecx, [esp+4]
>         mov     edx, [esp+12]
>         cmp     ecx, edx
>         mov     eax, [esp+8]
>         sbb     eax, [esp+16]
>         sbb     eax, eax
>         cmp     edx, ecx
>         mov     edx, [esp+16]
>         sbb     edx, [esp+8]
>         adc     eax, 1
>         ret
>
>
> AGAIN:
> Remove every occurance of the word "optimized" on the above web page.
>
> 'nuff said
> Stefan
> _______________________________________________
> cfe-dev mailing list
> cfe-dev at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-dev/attachments/20200906/de89f4cd/attachment.html>