[llvm-bugs] [Bug 24413] New: [loop vectorizer] unoptimized vectorized code for induction variable
via llvm-bugs
llvm-bugs at lists.llvm.org
Mon Aug 10 09:47:36 PDT 2015
https://llvm.org/bugs/show_bug.cgi?id=24413
Bug ID: 24413
Summary: [loop vectorizer] unoptimized vectorized code for
induction variable
Product: libraries
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: normal
Priority: P
Component: Loop Optimizer
Assignee: unassignedbugs at nondot.org
Reporter: wmi at google.com
CC: llvm-bugs at lists.llvm.org
Classification: Unclassified
For the simple loop below,
testcase 1.c:
typedef struct ST {
unsigned char u;
unsigned char v;
} ST;
ST c[10000];
int foo(int j, int N) {
int i;
int total1;
for (i = j; i < N; i++) {
total1 += c[i].u;
}
return total1;
}
~/workarea/llvm-r243653/build/bin/clang -O2 -fno-unroll-loops -S 1.c
llvm generated vectorized code for the kernel loop:
.LBB0_4: # %vector.body
# =>This Inner Loop Header: Depth=1
***
movd %rcx, %xmm4
pshufd $68, %xmm4, %xmm4 # xmm4 = xmm4[0,1,0,1]
movdqa %xmm4, %xmm5
paddq %xmm1, %xmm5
paddq %xmm2, %xmm4
pshufd $78, %xmm5, %xmm5 # xmm5 = xmm5[2,3,0,1]
movd %xmm5, %rdi
movd %xmm4, %r11
pshufd $78, %xmm4, %xmm4 # xmm4 = xmm4[2,3,0,1]
movd %xmm4, %r9
*** # the code segment above is to set rdi/r11/r9 to i+1, i+2, i+3
movzbl c(%rcx,%rcx), %edx
pinsrw $0, %edx, %xmm4
movzbl c(%rdi,%rdi), %edx
pinsrw $2, %edx, %xmm4
movzbl c(%r11,%r11), %edx
pinsrw $4, %edx, %xmm4
movzbl c(%r9,%r9), %edx
pinsrw $6, %edx, %xmm4
pand %xmm3, %xmm4
paddd %xmm4, %xmm0
addq $4, %rcx
addq $-4, %rax
jne .LBB0_4
It splats induction variable into a xmm register, adds the xmm register with
vector [0, 1, 2, 3] and then extracts scalar element from the resulting xmm
register. It is unnecessarily complexer than just using a scalar version of
induction variable.
.LBB0_4: # %vector.body
# =>This Inner Loop Header: Depth=1
***
leaq 1(%rcx), %rdi
leaq 2(%rcx), %rl1
leaq 3(%rcx), %r9
*** # the code segment above is to set rdi/r11/r9 to i+1, i+2, i+3
movzbl c(%rcx,%rcx), %edx
pinsrw $0, %edx, %xmm4
movzbl c(%rdi,%rdi), %edx
pinsrw $2, %edx, %xmm4
movzbl c(%r11,%r11), %edx
pinsrw $4, %edx, %xmm4
movzbl c(%r9,%r9), %edx
pinsrw $6, %edx, %xmm4
pand %xmm3, %xmm4
paddd %xmm4, %xmm0
addq $4, %rcx
addq $-4, %rax
jne .LBB0_4
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20150810/a3e9c44e/attachment.html>
More information about the llvm-bugs
mailing list