<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - Wrong vector store scheduled before vector load for overlapped array memory"
href="https://bugs.llvm.org/show_bug.cgi?id=32404">32404</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>Wrong vector store scheduled before vector load for overlapped array memory
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>Windows NT
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Backend: ARM
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>huihuiz@codeaurora.org
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>Created <span class=""><a href="attachment.cgi?id=18163" name="attach_18163" title="test input">attachment 18163</a> <a href="attachment.cgi?id=18163&action=edit" title="test input">[details]</a></span>
test input
Consider vectorizing the loop below, give vector factor 8. Pointer a,b,c are
initialized as array of short, and array sizes are 1000.
void foo(short *a, short *b, short *c) {
...
int n = 13;
for (int i=0; i<n-1; i++) {
a[i] = b[i] + c[i];
b[i] = a[i] * c[i];
a[i] = b[i] + a[i+1] * c[i];
}
...
}
Using "llc test.ll -O3 -o - -mcpu=krait" to generate assembly.
The first vector store to a[i] "vstr d16, [r1, #16]" is scheduled before the
load of a[i+1] "vldr d19, [r0]". The store of a[8..11] will pollute the vector
memory read of a[9..12].
In the output of "llc test.ll -O3 -o - -mcpu=krait -debug-only=isel"
The optimized vector-legalized selection DAG also shows the first vector store
to a[i] being moved before the vector read of a[i+1]
add r0, r2, #16
vld1.16 {d16}, [r0:64]
add r0, r3, #16
vld1.16 {d17}, [r0:64]
vmovl.u16 q9, d16
vmovl.s16 q8, d17
vadd.i32 q8, q8, q9
vmovn.i32 d16, q8
vldr d17, [r3, #16]
vstr d16, [r1, #16]
vldr d16, [r3, #16]
vld1.16 {d20}, [r0:64]
add r0, r1, #18
vmull.s16 q8, d17, d16
vmovl.s16 q10, d20
vmla.i32 q8, q9, q10
vmovn.i32 d18, q8
vldr d19, [r0]
vstr d18, [r2, #16]
vldr d18, [r3, #16]
vmlal.s16 q8, d18, d19
vmovn.i32 d16, q8
vstr d16, [r1, #16]
bx lr
Input IR
define void @func(i16* %a, i16* %b, i16* %c) {
entry:
%scevgep0 = getelementptr i16, i16* %a, i32 9
%vector_ptr0 = bitcast i16* %scevgep0 to <4 x i16>*
%vec0 = load <4 x i16>, <4 x i16>* %vector_ptr0, align 8, !alias.scope !7,
!noalias !9
%scevgep1 = getelementptr i16, i16* %b, i32 8
%vector_ptr1 = bitcast i16* %scevgep1 to <4 x i16>*
%vec1 = load <4 x i16>, <4 x i16>* %vector_ptr1, align 8, !alias.scope !13,
!noalias !14
%0 = zext <4 x i16> %vec1 to <4 x i32>
%scevgep2 = getelementptr i16, i16* %c, i32 8
%vector_ptr2 = bitcast i16* %scevgep2 to <4 x i16>*
%vec2 = load <4 x i16>, <4 x i16>* %vector_ptr2, align 8, !alias.scope !12,
!noalias !16
%1 = sext <4 x i16> %vec2 to <4 x i32>
%vec3 = add <4 x i32> %1, %0
%2 = trunc <4 x i32> %vec3 to <4 x i16>
%scevgep3 = getelementptr i16, i16* %a, i32 8
%vector_ptr3 = bitcast i16* %scevgep3 to <4 x i16>*
store <4 x i16> %2, <4 x i16>* %vector_ptr3, align 8, !alias.scope !7,
!noalias !9
%vector_ptr4 = bitcast i16* %scevgep2 to <4 x i16>*
%vec4 = load <4 x i16>, <4 x i16>* %vector_ptr4, align 8, !alias.scope !12,
!noalias !16
%3 = sext <4 x i16> %vec4 to <4 x i32>
%vec5 = mul <4 x i32> %3, %vec3
%4 = trunc <4 x i32> %vec5 to <4 x i16>
%vector_ptr5 = bitcast i16* %scevgep1 to <4 x i16>*
store <4 x i16> %4, <4 x i16>* %vector_ptr5, align 8, !alias.scope !13,
!noalias !14
%5 = sext <4 x i16> %vec0 to <4 x i32>
%vector_ptr6 = bitcast i16* %scevgep2 to <4 x i16>*
%vec6 = load <4 x i16>, <4 x i16>* %vector_ptr6, align 8, !alias.scope !12,
!noalias !16
%6 = sext <4 x i16> %vec6 to <4 x i32>
%vec7 = mul <4 x i32> %6, %5
%vec8 = add <4 x i32> %vec7, %vec5
%7 = trunc <4 x i32> %vec8 to <4 x i16>
%vector_ptr7 = bitcast i16* %scevgep3 to <4 x i16>*
store <4 x i16> %7, <4 x i16>* %vector_ptr7, align 8, !alias.scope !7,
!noalias !9
ret void
}
!7 = distinct !{!7, !8, !"polly.alias.scope.MemRef_a"}
!8 = distinct !{!8, !"polly.alias.scope.domain"}
!9 = !{!12, !13}
!12 = distinct !{!12, !8, !"polly.alias.scope.MemRef_c"}
!13 = distinct !{!13, !8, !"polly.alias.scope.MemRef_b"}
!14 = !{!12, !7}
!16 = !{!13, !7}</pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>