<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - Wrong vector store scheduled before vector load for overlapped array memory"

   href="https://bugs.llvm.org/show_bug.cgi?id=32404">32404</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>Wrong vector store scheduled before vector load for overlapped array memory

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Windows NT

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Backend: ARM

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>huihuiz@codeaurora.org

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Created <span class=""><a href="attachment.cgi?id=18163" name="attach_18163" title="test input">attachment 18163</a> <a href="attachment.cgi?id=18163&action=edit" title="test input">[details]</a></span>

test input

Consider vectorizing the loop below, give vector factor 8. Pointer a,b,c are

initialized as array of short, and array sizes are 1000.

void foo(short *a, short *b, short *c) {

  ...

  int n = 13;

  for (int i=0; i<n-1; i++) {

    a[i] = b[i] + c[i];

    b[i] = a[i] * c[i];

    a[i] = b[i] + a[i+1] * c[i];

  }

  ...

}

Using "llc test.ll -O3 -o - -mcpu=krait" to generate assembly.

The first vector store to a[i] "vstr d16, [r1, #16]" is scheduled before the

load of a[i+1] "vldr d19, [r0]". The store of a[8..11] will pollute the vector

memory read of a[9..12].

In the output of "llc test.ll -O3 -o - -mcpu=krait -debug-only=isel"

The optimized vector-legalized selection DAG also shows the first vector store

to a[i] being moved before the vector read of a[i+1]

        add     r0, r2, #16

        vld1.16 {d16}, [r0:64]

        add     r0, r3, #16

        vld1.16 {d17}, [r0:64]

        vmovl.u16       q9, d16

        vmovl.s16       q8, d17

        vadd.i32        q8, q8, q9

        vmovn.i32       d16, q8

        vldr    d17, [r3, #16]

        vstr    d16, [r1, #16]

        vldr    d16, [r3, #16]

        vld1.16 {d20}, [r0:64]

        add     r0, r1, #18

        vmull.s16       q8, d17, d16

        vmovl.s16       q10, d20

        vmla.i32        q8, q9, q10

        vmovn.i32       d18, q8

        vldr    d19, [r0]

        vstr    d18, [r2, #16]

        vldr    d18, [r3, #16]

        vmlal.s16       q8, d18, d19

        vmovn.i32       d16, q8

        vstr    d16, [r1, #16]

        bx      lr

Input IR

define void @func(i16* %a, i16* %b, i16* %c) {

entry:

  %scevgep0 = getelementptr i16, i16* %a, i32 9

  %vector_ptr0 = bitcast i16* %scevgep0 to <4 x i16>*

  %vec0 = load <4 x i16>, <4 x i16>* %vector_ptr0, align 8, !alias.scope !7,

!noalias !9

  %scevgep1 = getelementptr i16, i16* %b, i32 8

  %vector_ptr1 = bitcast i16* %scevgep1 to <4 x i16>*

  %vec1 = load <4 x i16>, <4 x i16>* %vector_ptr1, align 8, !alias.scope !13,

!noalias !14

  %0 = zext <4 x i16> %vec1 to <4 x i32>

  %scevgep2 = getelementptr i16, i16* %c, i32 8

  %vector_ptr2 = bitcast i16* %scevgep2 to <4 x i16>*

  %vec2 = load <4 x i16>, <4 x i16>* %vector_ptr2, align 8, !alias.scope !12,

!noalias !16

  %1 = sext <4 x i16> %vec2 to <4 x i32>

  %vec3 = add <4 x i32> %1, %0

  %2 = trunc <4 x i32> %vec3 to <4 x i16>

  %scevgep3 = getelementptr i16, i16* %a, i32 8

  %vector_ptr3 = bitcast i16* %scevgep3 to <4 x i16>*

  store <4 x i16> %2, <4 x i16>* %vector_ptr3, align 8, !alias.scope !7,

!noalias !9

  %vector_ptr4 = bitcast i16* %scevgep2 to <4 x i16>*

  %vec4 = load <4 x i16>, <4 x i16>* %vector_ptr4, align 8, !alias.scope !12,

!noalias !16

  %3 = sext <4 x i16> %vec4 to <4 x i32>

  %vec5 = mul <4 x i32> %3, %vec3

  %4 = trunc <4 x i32> %vec5 to <4 x i16>

  %vector_ptr5 = bitcast i16* %scevgep1 to <4 x i16>*

  store <4 x i16> %4, <4 x i16>* %vector_ptr5, align 8, !alias.scope !13,

!noalias !14

  %5 = sext <4 x i16> %vec0 to <4 x i32>

  %vector_ptr6 = bitcast i16* %scevgep2 to <4 x i16>*

  %vec6 = load <4 x i16>, <4 x i16>* %vector_ptr6, align 8, !alias.scope !12,

!noalias !16

  %6 = sext <4 x i16> %vec6 to <4 x i32>

  %vec7 = mul <4 x i32> %6, %5

  %vec8 = add <4 x i32> %vec7, %vec5

  %7 = trunc <4 x i32> %vec8 to <4 x i16>

  %vector_ptr7 = bitcast i16* %scevgep3 to <4 x i16>*

  store <4 x i16> %7, <4 x i16>* %vector_ptr7, align 8, !alias.scope !7,

!noalias !9

  ret void

}

!7 = distinct !{!7, !8, !"polly.alias.scope.MemRef_a"}

!8 = distinct !{!8, !"polly.alias.scope.domain"}

!9 = !{!12, !13}

!12 = distinct !{!12, !8, !"polly.alias.scope.MemRef_c"}

!13 = distinct !{!13, !8, !"polly.alias.scope.MemRef_b"}

!14 = !{!12, !7}

!16 = !{!13, !7}</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>