<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - Enable extractelement to alias with vector spills"
href="https://bugs.llvm.org/show_bug.cgi?id=33247">33247</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Enable extractelement to alias with vector spills
</td>
</tr>
<tr>
<th>Product</th>
<td>new-bugs
</td>
</tr>
<tr>
<th>Version</th>
<td>unspecified
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Windows NT
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>new bugs
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>llvm-dev@redking.me.uk
</td>
</tr>
<tr>
<th>CC</th>
<td>andrea.dibiagio@gmail.com, davide@freebsd.org, filcab@gmail.com, llvm-bugs@lists.llvm.org, spatel+llvm@rotateright.com
</td>
</tr></table>
<p>
<div>
<pre>define i32 @popcnt_i128(<4 x i32> %a0) {
%1 = tail call <2 x i64> asm sideeffect "nop",
"=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = extractelement <4 x i32> %a0, i32 0
%3 = extractelement <4 x i32> %a0, i32 1
%4 = extractelement <4 x i32> %a0, i32 2
%5 = extractelement <4 x i32> %a0, i32 3
%6 = call i32 @llvm.ctpop.i32(i32 %2)
%7 = call i32 @llvm.ctpop.i32(i32 %3)
%8 = call i32 @llvm.ctpop.i32(i32 %4)
%9 = call i32 @llvm.ctpop.i32(i32 %5)
%10 = add i32 %6, %7
%11 = add i32 %8, %9
%12 = add i32 %10, %11
ret i32 %12
}
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
llc -mtriple=x86_64-unknown -mcpu=btver2
popcnt_i128:
pushq %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
vmovd %xmm0, -4(%rsp) # 4-byte Folded Spill
vpextrd $1, %xmm0, -8(%rsp) # 4-byte Folded Spill
vpextrd $2, %xmm0, -12(%rsp) # 4-byte Folded Spill
vpextrd $3, %xmm0, -16(%rsp) # 4-byte Folded Spill
#APP
nop
#NO_APP
popcntl -4(%rsp), %ecx # 4-byte Folded Reload
popcntl -8(%rsp), %edx # 4-byte Folded Reload
popcntl -12(%rsp), %esi # 4-byte Folded Reload
popcntl -16(%rsp), %eax # 4-byte Folded Reload
addl %ecx, %edx
addl %esi, %eax
addl %edx, %eax
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
retq
Each extractelement is being spilled to stack separately, requiring 4 scalar
stores. Ideally the vector would be spilled whole and the reloads done as
scalars:
popcnt_i128_IDEAL:
pushq %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
vmovdqa %xmm0, -16(%rsp) # 16-byte Folded Spill
#APP
nop
#NO_APP
popcntl -4(%rsp), %ecx # 4-byte Folded Reload
popcntl -8(%rsp), %edx # 4-byte Folded Reload
popcntl -12(%rsp), %esi # 4-byte Folded Reload
popcntl -16(%rsp), %eax # 4-byte Folded Reload
addl %ecx, %edx
addl %esi, %eax
addl %edx, %eax
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
retq</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>