<html>
<head>
<base href="https://llvm.org/bugs/" />
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW --- - [loop vectorizer] unoptimized vectorized code for induction variable"
href="https://llvm.org/bugs/show_bug.cgi?id=24413">24413</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>[loop vectorizer] unoptimized vectorized code for induction variable
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>All
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>normal
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Loop Optimizer
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>wmi@google.com
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr>
<tr>
<th>Classification</th>
<td>Unclassified
</td>
</tr></table>
<p>
<div>
<pre>For the simple loop below,
testcase 1.c:
typedef struct ST {
unsigned char u;
unsigned char v;
} ST;
ST c[10000];
int foo(int j, int N) {
int i;
int total1;
for (i = j; i < N; i++) {
total1 += c[i].u;
}
return total1;
}
~/workarea/llvm-r243653/build/bin/clang -O2 -fno-unroll-loops -S 1.c
llvm generated vectorized code for the kernel loop:
.LBB0_4: # %vector.body
# =>This Inner Loop Header: Depth=1
***
movd %rcx, %xmm4
pshufd $68, %xmm4, %xmm4 # xmm4 = xmm4[0,1,0,1]
movdqa %xmm4, %xmm5
paddq %xmm1, %xmm5
paddq %xmm2, %xmm4
pshufd $78, %xmm5, %xmm5 # xmm5 = xmm5[2,3,0,1]
movd %xmm5, %rdi
movd %xmm4, %r11
pshufd $78, %xmm4, %xmm4 # xmm4 = xmm4[2,3,0,1]
movd %xmm4, %r9
*** # the code segment above is to set rdi/r11/r9 to i+1, i+2, i+3
movzbl c(%rcx,%rcx), %edx
pinsrw $0, %edx, %xmm4
movzbl c(%rdi,%rdi), %edx
pinsrw $2, %edx, %xmm4
movzbl c(%r11,%r11), %edx
pinsrw $4, %edx, %xmm4
movzbl c(%r9,%r9), %edx
pinsrw $6, %edx, %xmm4
pand %xmm3, %xmm4
paddd %xmm4, %xmm0
addq $4, %rcx
addq $-4, %rax
jne .LBB0_4
It splats induction variable into a xmm register, adds the xmm register with
vector [0, 1, 2, 3] and then extracts scalar element from the resulting xmm
register. It is unnecessarily complexer than just using a scalar version of
induction variable.
.LBB0_4: # %vector.body
# =>This Inner Loop Header: Depth=1
***
leaq 1(%rcx), %rdi
leaq 2(%rcx), %rl1
leaq 3(%rcx), %r9
*** # the code segment above is to set rdi/r11/r9 to i+1, i+2, i+3
movzbl c(%rcx,%rcx), %edx
pinsrw $0, %edx, %xmm4
movzbl c(%rdi,%rdi), %edx
pinsrw $2, %edx, %xmm4
movzbl c(%r11,%r11), %edx
pinsrw $4, %edx, %xmm4
movzbl c(%r9,%r9), %edx
pinsrw $6, %edx, %xmm4
pand %xmm3, %xmm4
paddd %xmm4, %xmm0
addq $4, %rcx
addq $-4, %rax
jne .LBB0_4</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>