<html>
<head>
<base href="https://llvm.org/bugs/" />
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW --- - 2 testcases for which vs2015 generates faster code than llvm on x86"
href="https://llvm.org/bugs/show_bug.cgi?id=23508">23508</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>2 testcases for which vs2015 generates faster code than llvm on x86
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Windows XP
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>normal
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Backend: X86
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>wmi@google.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvmbugs@cs.uiuc.edu
</td>
</tr>
<tr>
<th>Classification</th>
<td>Unclassified
</td>
</tr></table>
<p>
<div>
<pre>For the following testcase, visual studio 2015 generated better code than llvm
(more than 2 times faster on windows+sandybridge).
#define uint8 unsigned char
void foo(const uint8* src_argb, uint8* dst_rgb, int width) {
int x;
for (x = 0; x < width; ++x) {
uint8 b = src_argb[0];
uint8 g = src_argb[1];
uint8 r = src_argb[2];
dst_rgb[0] = r;
dst_rgb[1] = g;
dst_rgb[2] = b;
dst_rgb += 3;
src_argb += 4;
}
}
vs2015
foo (9863 ms)
0042D860: 8A 58 FE mov bl,byte ptr [eax-2]
0042D863: 8D 76 03 lea esi,[esi+3]
0042D866: 8A 50 FF mov dl,byte ptr [eax-1]
0042D869: 8D 40 04 lea eax,[eax+4]
0042D86C: 8A 48 FC mov cl,byte ptr [eax-4]
0042D86F: 88 4E FB mov byte ptr [esi-5],cl
0042D872: 88 56 FC mov byte ptr [esi-4],dl
0042D875: 88 5E FD mov byte ptr [esi-3],bl
0042D878: 83 EF 01 sub edi,1
0042D87B: 75 E3 jne 0042D860
clang
foo (22431 ms)
004E88FE: 8A 1A mov bl,byte ptr [edx]
004E8900: 88 5C 24 03 mov byte ptr [esp+3],bl
004E8904: 8A 7A 01 mov bh,byte ptr [edx+1]
004E8907: 8A 5A 02 mov bl,byte ptr [edx+2]
004E890A: 88 19 mov byte ptr [ecx],bl
004E890C: 88 79 01 mov byte ptr [ecx+1],bh
004E890F: 8A 5C 24 03 mov bl,byte ptr [esp+3]
004E8913: 88 59 02 mov byte ptr [ecx+2],bl
004E8916: 83 C2 04 add edx,4
004E8919: 83 C1 03 add ecx,3
004E891C: 48 dec eax
004E891D: 75 DF jne 004E88FE
Another testcase:
#define uint8 unsigned char
#define uint32 unsigned
void foo(const uint8* src_argb, uint8* dst_rgb, int width) {
int x;
for (x = 0; x < width - 1; x += 2) {
uint8 b0 = src_argb[0] >> 4;
uint8 g0 = src_argb[1] >> 4;
uint8 r0 = src_argb[2] >> 4;
uint8 a0 = src_argb[3] >> 4;
uint8 b1 = src_argb[4] >> 4;
uint8 g1 = src_argb[5] >> 4;
uint8 r1 = src_argb[6] >> 4;
uint8 a1 = src_argb[7] >> 4;
*(uint32*)(dst_rgb) =
b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) |
(b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28);
dst_rgb += 4;
src_argb += 8;
}
}
VS2015 ARGBToARGB4444_Opt (13664 ms)
0042DBD0: 0F B6 70 07 movzx esi,byte ptr [eax+7]
0042DBD4: 0F B6 48 06 movzx ecx,byte ptr [eax+6]
0042DBD8: 83 E6 F0 and esi,0FFFFFFF0h
0042DBDB: 0F B6 50 02 movzx edx,byte ptr [eax+2]
0042DBDF: 83 E1 F0 and ecx,0FFFFFFF0h
0042DBE2: C1 E6 04 shl esi,4
0042DBE5: 83 E2 F0 and edx,0FFFFFFF0h
0042DBE8: 0B F1 or esi,ecx
0042DBEA: C1 E2 04 shl edx,4
0042DBED: 0F B6 48 05 movzx ecx,byte ptr [eax+5]
0042DBF1: C1 E6 04 shl esi,4
0042DBF4: 83 E1 F0 and ecx,0FFFFFFF0h
0042DBF7: 0B F1 or esi,ecx
0042DBF9: 0F B6 48 04 movzx ecx,byte ptr [eax+4]
0042DBFD: C1 E6 04 shl esi,4
0042DC00: 83 E1 F0 and ecx,0FFFFFFF0h
0042DC03: 0B F1 or esi,ecx
0042DC05: 0F B6 48 03 movzx ecx,byte ptr [eax+3]
0042DC09: C1 E6 04 shl esi,4
0042DC0C: 83 E1 F0 and ecx,0FFFFFFF0h
0042DC0F: 0B F1 or esi,ecx
0042DC11: 0F B6 48 01 movzx ecx,byte ptr [eax+1]
0042DC15: 0B D1 or edx,ecx
0042DC17: C1 E6 08 shl esi,8
0042DC1A: 0F B6 08 movzx ecx,byte ptr [eax]
0042DC1D: 83 E2 F0 and edx,0FFFFFFF0h
0042DC20: 0B F2 or esi,edx
0042DC22: C1 E9 04 shr ecx,4
0042DC25: 0B F1 or esi,ecx
0042DC27: 83 C0 08 add eax,8
0042DC2A: 89 33 mov dword ptr [ebx],esi
0042DC2C: 83 C3 04 add ebx,4
0042DC2F: 83 EF 01 sub edi,1
0042DC32: 75 9C jne 0042DBD0
clang ARGBToARGB4444_Opt (28555 ms)
004E8CFC: 8A 1C A9 mov bl,byte ptr [ecx+ebp*4]
004E8CFF: C0 EB 04 shr bl,4
004E8D02: 8A 7C A9 01 mov bh,byte ptr [ecx+ebp*4+1]
004E8D06: 80 E7 F0 and bh,0F0h
004E8D09: 8A 54 A9 02 mov dl,byte ptr [ecx+ebp*4+2]
004E8D0D: C0 EA 04 shr dl,4
004E8D10: 08 DF or bh,bl
004E8D12: 0F B6 DF movzx ebx,bh
004E8D15: 0F B6 D2 movzx edx,dl
004E8D18: C1 E2 08 shl edx,8
004E8D1B: 09 DA or edx,ebx
004E8D1D: 8A 5C A9 03 mov bl,byte ptr [ecx+ebp*4+3]
004E8D21: C0 EB 04 shr bl,4
004E8D24: 0F B6 DB movzx ebx,bl
004E8D27: C1 E3 0C shl ebx,0Ch
004E8D2A: 09 D3 or ebx,edx
004E8D2C: 8A 54 A9 04 mov dl,byte ptr [ecx+ebp*4+4]
004E8D30: C0 EA 04 shr dl,4
004E8D33: 0F B6 D2 movzx edx,dl
004E8D36: C1 E2 10 shl edx,10h
004E8D39: 09 DA or edx,ebx
004E8D3B: 8A 5C A9 05 mov bl,byte ptr [ecx+ebp*4+5]
004E8D3F: C0 EB 04 shr bl,4
004E8D42: 0F B6 DB movzx ebx,bl
004E8D45: C1 E3 14 shl ebx,14h
004E8D48: 09 D3 or ebx,edx
004E8D4A: 8A 54 A9 06 mov dl,byte ptr [ecx+ebp*4+6]
004E8D4E: C0 EA 04 shr dl,4
004E8D51: 0F B6 D2 movzx edx,dl
004E8D54: C1 E2 18 shl edx,18h
004E8D57: 09 DA or edx,ebx
004E8D59: 8A 5C A9 07 mov bl,byte ptr [ecx+ebp*4+7]
004E8D5D: C0 EB 04 shr bl,4
004E8D60: 0F B6 DB movzx ebx,bl
004E8D63: C1 E3 1C shl ebx,1Ch
004E8D66: 09 D3 or ebx,edx
004E8D68: 89 1C 68 mov dword ptr [eax+ebp*2],ebx
004E8D6B: 83 C5 02 add ebp,2
004E8D6E: 39 F5 cmp ebp,esi
004E8D70: 7C 8A jl 004E8CFC</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>