[LLVMbugs] [Bug 23508] New: 2 testcases for which vs2015 generates faster code than llvm on x86

bugzilla-daemon at llvm.org bugzilla-daemon at llvm.org
Tue May 12 14:50:00 PDT 2015


https://llvm.org/bugs/show_bug.cgi?id=23508

            Bug ID: 23508
           Summary: 2 testcases for which vs2015 generates faster code
                    than llvm on x86
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Windows XP
            Status: NEW
          Severity: normal
          Priority: P
         Component: Backend: X86
          Assignee: unassignedbugs at nondot.org
          Reporter: wmi at google.com
                CC: llvmbugs at cs.uiuc.edu
    Classification: Unclassified

For the following testcase, visual studio 2015 generated better code than llvm
(more than 2 times faster on windows+sandybridge).

#define uint8 unsigned char
void foo(const uint8* src_argb, uint8* dst_rgb, int width) {
  int x;
  for (x = 0; x < width; ++x) {
    uint8 b = src_argb[0];
    uint8 g = src_argb[1];
    uint8 r = src_argb[2];
    dst_rgb[0] = r;
    dst_rgb[1] = g;
    dst_rgb[2] = b;
    dst_rgb += 3;
    src_argb += 4;
  }
}

vs2015
foo (9863 ms)
  0042D860: 8A 58 FE           mov         bl,byte ptr [eax-2]
  0042D863: 8D 76 03           lea         esi,[esi+3]
  0042D866: 8A 50 FF           mov         dl,byte ptr [eax-1]
  0042D869: 8D 40 04           lea         eax,[eax+4]
  0042D86C: 8A 48 FC           mov         cl,byte ptr [eax-4]
  0042D86F: 88 4E FB           mov         byte ptr [esi-5],cl
  0042D872: 88 56 FC           mov         byte ptr [esi-4],dl
  0042D875: 88 5E FD           mov         byte ptr [esi-3],bl
  0042D878: 83 EF 01           sub         edi,1
  0042D87B: 75 E3              jne         0042D860

clang
foo (22431 ms)
  004E88FE: 8A 1A              mov         bl,byte ptr [edx]
  004E8900: 88 5C 24 03        mov         byte ptr [esp+3],bl
  004E8904: 8A 7A 01           mov         bh,byte ptr [edx+1]
  004E8907: 8A 5A 02           mov         bl,byte ptr [edx+2]
  004E890A: 88 19              mov         byte ptr [ecx],bl
  004E890C: 88 79 01           mov         byte ptr [ecx+1],bh
  004E890F: 8A 5C 24 03        mov         bl,byte ptr [esp+3]
  004E8913: 88 59 02           mov         byte ptr [ecx+2],bl
  004E8916: 83 C2 04           add         edx,4
  004E8919: 83 C1 03           add         ecx,3
  004E891C: 48                 dec         eax
  004E891D: 75 DF              jne         004E88FE

Another testcase:

#define uint8 unsigned char
#define uint32 unsigned
void foo(const uint8* src_argb, uint8* dst_rgb, int width) {
  int x;
  for (x = 0; x < width - 1; x += 2) {
    uint8 b0 = src_argb[0] >> 4;
    uint8 g0 = src_argb[1] >> 4;
    uint8 r0 = src_argb[2] >> 4;
    uint8 a0 = src_argb[3] >> 4;
    uint8 b1 = src_argb[4] >> 4;
    uint8 g1 = src_argb[5] >> 4;
    uint8 r1 = src_argb[6] >> 4;
    uint8 a1 = src_argb[7] >> 4;
    *(uint32*)(dst_rgb) =
        b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
        (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
    dst_rgb += 4;
    src_argb += 8;
  }
}

VS2015 ARGBToARGB4444_Opt (13664 ms)
  0042DBD0: 0F B6 70 07        movzx       esi,byte ptr [eax+7]
  0042DBD4: 0F B6 48 06        movzx       ecx,byte ptr [eax+6]
  0042DBD8: 83 E6 F0           and         esi,0FFFFFFF0h
  0042DBDB: 0F B6 50 02        movzx       edx,byte ptr [eax+2]
  0042DBDF: 83 E1 F0           and         ecx,0FFFFFFF0h
  0042DBE2: C1 E6 04           shl         esi,4
  0042DBE5: 83 E2 F0           and         edx,0FFFFFFF0h
  0042DBE8: 0B F1              or          esi,ecx
  0042DBEA: C1 E2 04           shl         edx,4
  0042DBED: 0F B6 48 05        movzx       ecx,byte ptr [eax+5]
  0042DBF1: C1 E6 04           shl         esi,4
  0042DBF4: 83 E1 F0           and         ecx,0FFFFFFF0h
  0042DBF7: 0B F1              or          esi,ecx
  0042DBF9: 0F B6 48 04        movzx       ecx,byte ptr [eax+4]
  0042DBFD: C1 E6 04           shl         esi,4
  0042DC00: 83 E1 F0           and         ecx,0FFFFFFF0h
  0042DC03: 0B F1              or          esi,ecx
  0042DC05: 0F B6 48 03        movzx       ecx,byte ptr [eax+3]
  0042DC09: C1 E6 04           shl         esi,4
  0042DC0C: 83 E1 F0           and         ecx,0FFFFFFF0h
  0042DC0F: 0B F1              or          esi,ecx
  0042DC11: 0F B6 48 01        movzx       ecx,byte ptr [eax+1]
  0042DC15: 0B D1              or          edx,ecx
  0042DC17: C1 E6 08           shl         esi,8
  0042DC1A: 0F B6 08           movzx       ecx,byte ptr [eax]
  0042DC1D: 83 E2 F0           and         edx,0FFFFFFF0h
  0042DC20: 0B F2              or          esi,edx
  0042DC22: C1 E9 04           shr         ecx,4
  0042DC25: 0B F1              or          esi,ecx
  0042DC27: 83 C0 08           add         eax,8
  0042DC2A: 89 33              mov         dword ptr [ebx],esi
  0042DC2C: 83 C3 04           add         ebx,4
  0042DC2F: 83 EF 01           sub         edi,1
  0042DC32: 75 9C              jne         0042DBD0

clang ARGBToARGB4444_Opt (28555 ms)
  004E8CFC: 8A 1C A9           mov         bl,byte ptr [ecx+ebp*4]
  004E8CFF: C0 EB 04           shr         bl,4
  004E8D02: 8A 7C A9 01        mov         bh,byte ptr [ecx+ebp*4+1]
  004E8D06: 80 E7 F0           and         bh,0F0h
  004E8D09: 8A 54 A9 02        mov         dl,byte ptr [ecx+ebp*4+2]
  004E8D0D: C0 EA 04           shr         dl,4
  004E8D10: 08 DF              or          bh,bl
  004E8D12: 0F B6 DF           movzx       ebx,bh
  004E8D15: 0F B6 D2           movzx       edx,dl
  004E8D18: C1 E2 08           shl         edx,8
  004E8D1B: 09 DA              or          edx,ebx
  004E8D1D: 8A 5C A9 03        mov         bl,byte ptr [ecx+ebp*4+3]
  004E8D21: C0 EB 04           shr         bl,4
  004E8D24: 0F B6 DB           movzx       ebx,bl
  004E8D27: C1 E3 0C           shl         ebx,0Ch
  004E8D2A: 09 D3              or          ebx,edx
  004E8D2C: 8A 54 A9 04        mov         dl,byte ptr [ecx+ebp*4+4]
  004E8D30: C0 EA 04           shr         dl,4
  004E8D33: 0F B6 D2           movzx       edx,dl
  004E8D36: C1 E2 10           shl         edx,10h
  004E8D39: 09 DA              or          edx,ebx
  004E8D3B: 8A 5C A9 05        mov         bl,byte ptr [ecx+ebp*4+5]
  004E8D3F: C0 EB 04           shr         bl,4
  004E8D42: 0F B6 DB           movzx       ebx,bl
  004E8D45: C1 E3 14           shl         ebx,14h
  004E8D48: 09 D3              or          ebx,edx
  004E8D4A: 8A 54 A9 06        mov         dl,byte ptr [ecx+ebp*4+6]
  004E8D4E: C0 EA 04           shr         dl,4
  004E8D51: 0F B6 D2           movzx       edx,dl
  004E8D54: C1 E2 18           shl         edx,18h
  004E8D57: 09 DA              or          edx,ebx
  004E8D59: 8A 5C A9 07        mov         bl,byte ptr [ecx+ebp*4+7]
  004E8D5D: C0 EB 04           shr         bl,4
  004E8D60: 0F B6 DB           movzx       ebx,bl
  004E8D63: C1 E3 1C           shl         ebx,1Ch
  004E8D66: 09 D3              or          ebx,edx
  004E8D68: 89 1C 68           mov         dword ptr [eax+ebp*2],ebx
  004E8D6B: 83 C5 02           add         ebp,2
  004E8D6E: 39 F5              cmp         ebp,esi
  004E8D70: 7C 8A              jl          004E8CFC

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20150512/f81fe2e7/attachment.html>


More information about the llvm-bugs mailing list