[llvm-bugs] [Bug 32404] New: Wrong vector store scheduled before vector load for overlapped array memory

via llvm-bugs llvm-bugs at lists.llvm.org
Thu Mar 23 23:16:47 PDT 2017


https://bugs.llvm.org/show_bug.cgi?id=32404

            Bug ID: 32404
           Summary: Wrong vector store scheduled before vector load for
                    overlapped array memory
           Product: libraries
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: Backend: ARM
          Assignee: unassignedbugs at nondot.org
          Reporter: huihuiz at codeaurora.org
                CC: llvm-bugs at lists.llvm.org

Created attachment 18163
  --> https://bugs.llvm.org/attachment.cgi?id=18163&action=edit
test input

Consider vectorizing the loop below, give vector factor 8. Pointer a,b,c are
initialized as array of short, and array sizes are 1000.

void foo(short *a, short *b, short *c) {
  ...
  int n = 13;
  for (int i=0; i<n-1; i++) {
    a[i] = b[i] + c[i];
    b[i] = a[i] * c[i];
    a[i] = b[i] + a[i+1] * c[i];
  }
  ...
}

Using "llc test.ll -O3 -o - -mcpu=krait" to generate assembly.
The first vector store to a[i] "vstr d16, [r1, #16]" is scheduled before the
load of a[i+1] "vldr d19, [r0]". The store of a[8..11] will pollute the vector
memory read of a[9..12].

In the output of "llc test.ll -O3 -o - -mcpu=krait -debug-only=isel"
The optimized vector-legalized selection DAG also shows the first vector store
to a[i] being moved before the vector read of a[i+1]

        add     r0, r2, #16
        vld1.16 {d16}, [r0:64]
        add     r0, r3, #16
        vld1.16 {d17}, [r0:64]
        vmovl.u16       q9, d16
        vmovl.s16       q8, d17
        vadd.i32        q8, q8, q9
        vmovn.i32       d16, q8
        vldr    d17, [r3, #16]
        vstr    d16, [r1, #16]
        vldr    d16, [r3, #16]
        vld1.16 {d20}, [r0:64]
        add     r0, r1, #18
        vmull.s16       q8, d17, d16
        vmovl.s16       q10, d20
        vmla.i32        q8, q9, q10
        vmovn.i32       d18, q8
        vldr    d19, [r0]
        vstr    d18, [r2, #16]
        vldr    d18, [r3, #16]
        vmlal.s16       q8, d18, d19
        vmovn.i32       d16, q8
        vstr    d16, [r1, #16]
        bx      lr


Input IR
define void @func(i16* %a, i16* %b, i16* %c) {
entry:
  %scevgep0 = getelementptr i16, i16* %a, i32 9
  %vector_ptr0 = bitcast i16* %scevgep0 to <4 x i16>*
  %vec0 = load <4 x i16>, <4 x i16>* %vector_ptr0, align 8, !alias.scope !7,
!noalias !9
  %scevgep1 = getelementptr i16, i16* %b, i32 8
  %vector_ptr1 = bitcast i16* %scevgep1 to <4 x i16>*
  %vec1 = load <4 x i16>, <4 x i16>* %vector_ptr1, align 8, !alias.scope !13,
!noalias !14
  %0 = zext <4 x i16> %vec1 to <4 x i32>
  %scevgep2 = getelementptr i16, i16* %c, i32 8
  %vector_ptr2 = bitcast i16* %scevgep2 to <4 x i16>*
  %vec2 = load <4 x i16>, <4 x i16>* %vector_ptr2, align 8, !alias.scope !12,
!noalias !16
  %1 = sext <4 x i16> %vec2 to <4 x i32>
  %vec3 = add <4 x i32> %1, %0
  %2 = trunc <4 x i32> %vec3 to <4 x i16>
  %scevgep3 = getelementptr i16, i16* %a, i32 8
  %vector_ptr3 = bitcast i16* %scevgep3 to <4 x i16>*
  store <4 x i16> %2, <4 x i16>* %vector_ptr3, align 8, !alias.scope !7,
!noalias !9
  %vector_ptr4 = bitcast i16* %scevgep2 to <4 x i16>*
  %vec4 = load <4 x i16>, <4 x i16>* %vector_ptr4, align 8, !alias.scope !12,
!noalias !16
  %3 = sext <4 x i16> %vec4 to <4 x i32>
  %vec5 = mul <4 x i32> %3, %vec3
  %4 = trunc <4 x i32> %vec5 to <4 x i16>
  %vector_ptr5 = bitcast i16* %scevgep1 to <4 x i16>*
  store <4 x i16> %4, <4 x i16>* %vector_ptr5, align 8, !alias.scope !13,
!noalias !14
  %5 = sext <4 x i16> %vec0 to <4 x i32>
  %vector_ptr6 = bitcast i16* %scevgep2 to <4 x i16>*
  %vec6 = load <4 x i16>, <4 x i16>* %vector_ptr6, align 8, !alias.scope !12,
!noalias !16
  %6 = sext <4 x i16> %vec6 to <4 x i32>
  %vec7 = mul <4 x i32> %6, %5
  %vec8 = add <4 x i32> %vec7, %vec5
  %7 = trunc <4 x i32> %vec8 to <4 x i16>
  %vector_ptr7 = bitcast i16* %scevgep3 to <4 x i16>*
  store <4 x i16> %7, <4 x i16>* %vector_ptr7, align 8, !alias.scope !7,
!noalias !9
  ret void
}

!7 = distinct !{!7, !8, !"polly.alias.scope.MemRef_a"}
!8 = distinct !{!8, !"polly.alias.scope.domain"}
!9 = !{!12, !13}
!12 = distinct !{!12, !8, !"polly.alias.scope.MemRef_c"}
!13 = distinct !{!13, !8, !"polly.alias.scope.MemRef_b"}
!14 = !{!12, !7}
!16 = !{!13, !7}

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20170324/63698cfc/attachment.html>


More information about the llvm-bugs mailing list