[llvm-bugs] [Bug 44596] New: Better block placement - avoid jmp at entry block

Mon Jan 20 06:59:45 PST 2020

https://bugs.llvm.org/show_bug.cgi?id=44596

            Bug ID: 44596
           Summary: Better block placement - avoid jmp at entry block
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Linux
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: X86
          Assignee: unassignedbugs at nondot.org
          Reporter: david.bolvansky at gmail.com
                CC: craig.topper at gmail.com, llvm-bugs at lists.llvm.org,
                    llvm-dev at redking.me.uk, spatel+llvm at rotateright.com

Function:
https://github.com/microsoft/test-suite/blob/master/MultiSource/Benchmarks/mediabench/g721/g721encode/g711.c#L149

Current ASM: https://godbolt.org/z/qYf5tP

alaw2linear(unsigned char):                      
        mov     eax, edi
        xor     al, 85
        movzx   eax, al
        mov     ecx, eax
        shl     ecx, 4
        movzx   ecx, cl
        shr     eax, 4
        and     eax, 7
        cmp     eax, 1
        je      .LBB0_3
        test    eax, eax
        jne     .LBB0_4
        or      ecx, 8
        jmp     .LBB0_5
.LBB0_3:
        or      ecx, 264
        jmp     .LBB0_5
.LBB0_4:
        or      ecx, 264
        dec     al
        shlx    ecx, ecx, eax
.LBB0_5:
        mov     eax, ecx
        neg     eax
        test    dil, dil
        cmovs   eax, ecx
        ret

Better block placement for standard codegen?:

alaw2linear(unsigned char):                       # @alaw2linear(unsigned char)
        mov     eax, edi
        xor     al, 85
        movzx   eax, al
        mov     ecx, eax
        shl     ecx, 4
        movzx   ecx, cl
        shr     eax, 4
        and     eax, 7
        cmp     eax, 1
        je      .LBB0_3
        test    eax, eax
        jne     .LBB0_4
        or      ecx, 8
.LBB0_5:
        mov     eax, ecx
        neg     eax
        test    dil, dil
        cmovs   eax, ecx
        ret
.LBB0_3:
        or      ecx, 264
        jmp     .LBB0_5
.LBB0_4:
        or      ecx, 264
        dec     al
        shlx    ecx, ecx, eax
        jmp     .LBB0_5

For this specific case, we have an cmov opportunity too - GCC produces:
alaw2linear(unsigned char):
        xor     edi, 85
        mov     ecx, edi
        sal     ecx, 4
        mov     edx, edi
        and     ecx, 240
        shr     dl, 4
        lea     eax, [rcx+8]
        and     edx, 7
        je      .L5
        lea     eax, [rcx+264]
        movzx   ecx, dl
        mov     esi, eax
        dec     ecx
        cmp     dl, 1
        shlx    eax, eax, ecx
        cmove   eax, esi
.L5:
        mov     edx, eax
        neg     edx
        test    dil, dil
        cmovns  eax, edx
        ret

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20200120/c30690cc/attachment-0001.html>