<html>
<head>
<base href="https://llvm.org/bugs/" />
</head>
<body><span class="vcard"><a class="email" href="mailto:llvm-dev@redking.me.uk" title="Simon Pilgrim <llvm-dev@redking.me.uk>"> <span class="fn">Simon Pilgrim</span></a>
</span> changed
<a class="bz_bug_link
bz_status_RESOLVED bz_closed"
title="RESOLVED FIXED - Should not vectorize variable shifts when no instructions for it are available"
href="https://llvm.org/bugs/show_bug.cgi?id=15077">bug 15077</a>
<br>
<table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>What</th>
<th>Removed</th>
<th>Added</th>
</tr>
<tr>
<td style="text-align:right;">Status</td>
<td>NEW
</td>
<td>RESOLVED
</td>
</tr>
<tr>
<td style="text-align:right;">Resolution</td>
<td>---
</td>
<td>FIXED
</td>
</tr></table>
<p>
<div>
<b><a class="bz_bug_link
bz_status_RESOLVED bz_closed"
title="RESOLVED FIXED - Should not vectorize variable shifts when no instructions for it are available"
href="https://llvm.org/bugs/show_bug.cgi?id=15077#c4">Comment # 4</a>
on <a class="bz_bug_link
bz_status_RESOLVED bz_closed"
title="RESOLVED FIXED - Should not vectorize variable shifts when no instructions for it are available"
href="https://llvm.org/bugs/show_bug.cgi?id=15077">bug 15077</a>
from <span class="vcard"><a class="email" href="mailto:llvm-dev@redking.me.uk" title="Simon Pilgrim <llvm-dev@redking.me.uk>"> <span class="fn">Simon Pilgrim</span></a>
</span></b>
<pre>Resolving this. With rL284939 we have accurate vector shift costs for SSE4.1+
which means that we can correctly vectorize the inner loop as 4 * <4 x i32>:
LBB0_8:
vpmovsxbd -12(%ebx), %xmm5
vmovdqu -48(%edi), %xmm3
vmovdqu -32(%edi), %xmm4
vmovdqu -16(%edi), %xmm2
vmovdqu (%edi), %xmm1
vpsrldq $12, %xmm5, %xmm6 # xmm6 =
xmm5[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
vpsrlq $32, %xmm5, %xmm7
vpsrld %xmm6, %xmm3, %xmm6
vpsrld %xmm7, %xmm3, %xmm7
vpblendw $240, %xmm6, %xmm7, %xmm6 # xmm6 = xmm7[0,1,2,3],xmm6[4,5,6,7]
vpunpckhdq %xmm0, %xmm5, %xmm7 # xmm7 = xmm5[2],xmm0[2],xmm5[3],xmm0[3]
vpmovzxdq %xmm5, %xmm5 # xmm5 = xmm5[0],zero,xmm5[1],zero
vpsrld %xmm7, %xmm3, %xmm7
vpsrld %xmm5, %xmm3, %xmm3
vpmovsxbd -8(%ebx), %xmm5
vpblendw $240, %xmm7, %xmm3, %xmm3 # xmm3 = xmm3[0,1,2,3],xmm7[4,5,6,7]
vpblendw $204, %xmm6, %xmm3, %xmm3 # xmm3 =
xmm3[0,1],xmm6[2,3],xmm3[4,5],xmm6[6,7]
vpsrldq $12, %xmm5, %xmm6 # xmm6 =
xmm5[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
vpsrlq $32, %xmm5, %xmm7
vpsrld %xmm6, %xmm4, %xmm6
vpsrld %xmm7, %xmm4, %xmm7
vpblendw $240, %xmm6, %xmm7, %xmm6 # xmm6 = xmm7[0,1,2,3],xmm6[4,5,6,7]
vpunpckhdq %xmm0, %xmm5, %xmm7 # xmm7 = xmm5[2],xmm0[2],xmm5[3],xmm0[3]
vpmovzxdq %xmm5, %xmm5 # xmm5 = xmm5[0],zero,xmm5[1],zero
vpsrld %xmm7, %xmm4, %xmm7
vpsrld %xmm5, %xmm4, %xmm4
vpmovsxbd -4(%ebx), %xmm5
vpblendw $240, %xmm7, %xmm4, %xmm4 # xmm4 = xmm4[0,1,2,3],xmm7[4,5,6,7]
vpblendw $204, %xmm6, %xmm4, %xmm4 # xmm4 =
xmm4[0,1],xmm6[2,3],xmm4[4,5],xmm6[6,7]
vpsrldq $12, %xmm5, %xmm6 # xmm6 =
xmm5[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
vpsrlq $32, %xmm5, %xmm7
vpsrld %xmm6, %xmm2, %xmm6
vpsrld %xmm7, %xmm2, %xmm7
vpblendw $240, %xmm6, %xmm7, %xmm6 # xmm6 = xmm7[0,1,2,3],xmm6[4,5,6,7]
vpunpckhdq %xmm0, %xmm5, %xmm7 # xmm7 = xmm5[2],xmm0[2],xmm5[3],xmm0[3]
vpmovzxdq %xmm5, %xmm5 # xmm5 = xmm5[0],zero,xmm5[1],zero
vpsrld %xmm7, %xmm2, %xmm7
vpsrld %xmm5, %xmm2, %xmm2
vpmovsxbd (%ebx), %xmm5
vmovdqu %xmm3, -48(%edi)
vmovdqu %xmm4, -32(%edi)
addl $16, %ebx
vpblendw $240, %xmm7, %xmm2, %xmm2 # xmm2 = xmm2[0,1,2,3],xmm7[4,5,6,7]
vpblendw $204, %xmm6, %xmm2, %xmm2 # xmm2 =
xmm2[0,1],xmm6[2,3],xmm2[4,5],xmm6[6,7]
vmovdqu %xmm2, -16(%edi)
vpsrldq $12, %xmm5, %xmm6 # xmm6 =
xmm5[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
vpsrlq $32, %xmm5, %xmm7
vpsrld %xmm6, %xmm1, %xmm6
vpsrld %xmm7, %xmm1, %xmm7
vpblendw $240, %xmm6, %xmm7, %xmm6 # xmm6 = xmm7[0,1,2,3],xmm6[4,5,6,7]
vpunpckhdq %xmm0, %xmm5, %xmm7 # xmm7 = xmm5[2],xmm0[2],xmm5[3],xmm0[3]
vpmovzxdq %xmm5, %xmm5 # xmm5 = xmm5[0],zero,xmm5[1],zero
vpsrld %xmm7, %xmm1, %xmm7
vpsrld %xmm5, %xmm1, %xmm1
vpblendw $240, %xmm7, %xmm1, %xmm1 # xmm1 = xmm1[0,1,2,3],xmm7[4,5,6,7]
vpblendw $204, %xmm6, %xmm1, %xmm1 # xmm1 =
xmm1[0,1],xmm6[2,3],xmm1[4,5],xmm6[6,7]
vmovdqu %xmm1, (%edi)
addl $64, %edi
addl $-16, %ebp
jne LBB0_8
movl %esi, %edi
cmpl %eax, %esi
je LBB0_10</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>