[llvm-bugs] [Bug 33247] New: Enable extractelement to alias with vector spills
via llvm-bugs
llvm-bugs at lists.llvm.org
Wed May 31 10:33:57 PDT 2017
https://bugs.llvm.org/show_bug.cgi?id=33247
Bug ID: 33247
Summary: Enable extractelement to alias with vector spills
Product: new-bugs
Version: unspecified
Hardware: PC
OS: Windows NT
Status: NEW
Severity: enhancement
Priority: P
Component: new bugs
Assignee: unassignedbugs at nondot.org
Reporter: llvm-dev at redking.me.uk
CC: andrea.dibiagio at gmail.com, davide at freebsd.org,
filcab at gmail.com, llvm-bugs at lists.llvm.org,
spatel+llvm at rotateright.com
define i32 @popcnt_i128(<4 x i32> %a0) {
%1 = tail call <2 x i64> asm sideeffect "nop",
"=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
%2 = extractelement <4 x i32> %a0, i32 0
%3 = extractelement <4 x i32> %a0, i32 1
%4 = extractelement <4 x i32> %a0, i32 2
%5 = extractelement <4 x i32> %a0, i32 3
%6 = call i32 @llvm.ctpop.i32(i32 %2)
%7 = call i32 @llvm.ctpop.i32(i32 %3)
%8 = call i32 @llvm.ctpop.i32(i32 %4)
%9 = call i32 @llvm.ctpop.i32(i32 %5)
%10 = add i32 %6, %7
%11 = add i32 %8, %9
%12 = add i32 %10, %11
ret i32 %12
}
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
llc -mtriple=x86_64-unknown -mcpu=btver2
popcnt_i128:
pushq %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
vmovd %xmm0, -4(%rsp) # 4-byte Folded Spill
vpextrd $1, %xmm0, -8(%rsp) # 4-byte Folded Spill
vpextrd $2, %xmm0, -12(%rsp) # 4-byte Folded Spill
vpextrd $3, %xmm0, -16(%rsp) # 4-byte Folded Spill
#APP
nop
#NO_APP
popcntl -4(%rsp), %ecx # 4-byte Folded Reload
popcntl -8(%rsp), %edx # 4-byte Folded Reload
popcntl -12(%rsp), %esi # 4-byte Folded Reload
popcntl -16(%rsp), %eax # 4-byte Folded Reload
addl %ecx, %edx
addl %esi, %eax
addl %edx, %eax
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
retq
Each extractelement is being spilled to stack separately, requiring 4 scalar
stores. Ideally the vector would be spilled whole and the reloads done as
scalars:
popcnt_i128_IDEAL:
pushq %rbp
pushq %r15
pushq %r14
pushq %r13
pushq %r12
pushq %rbx
vmovdqa %xmm0, -16(%rsp) # 16-byte Folded Spill
#APP
nop
#NO_APP
popcntl -4(%rsp), %ecx # 4-byte Folded Reload
popcntl -8(%rsp), %edx # 4-byte Folded Reload
popcntl -12(%rsp), %esi # 4-byte Folded Reload
popcntl -16(%rsp), %eax # 4-byte Folded Reload
addl %ecx, %edx
addl %esi, %eax
addl %edx, %eax
popq %rbx
popq %r12
popq %r13
popq %r14
popq %r15
popq %rbp
retq
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170531/9ee013eb/attachment.html>
More information about the llvm-bugs
mailing list