<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - [LV] Incorrect codegen for predicated loads/stores on targets supporting masking"

   href="https://bugs.llvm.org/show_bug.cgi?id=50082">50082</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>[LV] Incorrect codegen for predicated loads/stores on targets supporting masking

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>libraries

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Linux

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>Loop Optimizer

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>bmahjour@ca.ibm.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Created <span class=""><a href="attachment.cgi?id=24792" name="attach_24792" title="input IR">attachment 24792</a> <a href="attachment.cgi?id=24792&action=edit" title="input IR">[details]</a></span>

input IR

The LoopVectorize pass has a bug in handling predication that is hidden by the

cost-model, where we could end up generating bad code that executes predicated

scalar load/stores unconditionally if a VF=1 is chosen.

Consider a simple loop like this:

```

void foo(int * restrict A, int * restrict B, int n)

{

  for (int i = 0; i < n; i++)

    if (A[i])

      A[i] = B[i] + i;

}

```

with the corresponding IR below:

```

target datalayout =

"e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"

target triple = "x86_64-pc-linux-gnu"

; Function Attrs: nounwind uwtable

define dso_local void @foo(i32* noalias %A, i32* noalias %B, i32 %n) #0 {

entry:

  %cmp1 = icmp sgt i32 %n, 0

  br i1 %cmp1, label %for.body.lr.ph, label %for.end

for.body.lr.ph:                                   ; preds = %entry

  %wide.trip.count = zext i32 %n to i64

  br label %for.body

for.body:                                         ; preds = %for.body.lr.ph,

%for.inc

  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.inc ]

  %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv

  %0 = load i32, i32* %arrayidx, align 4, !tbaa !2

  %tobool.not = icmp eq i32 %0, 0

  br i1 %tobool.not, label %for.inc, label %if.then

if.then:                                          ; preds = %for.body

  %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv

  %1 = load i32, i32* %arrayidx2, align 4, !tbaa !2

  %2 = trunc i64 %indvars.iv to i32

  %add = add nsw i32 %1, %2

  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv

  store i32 %add, i32* %arrayidx4, align 4, !tbaa !2

  br label %for.inc

for.inc:                                          ; preds = %for.body, %if.then

  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1

  %exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count

  br i1 %exitcond, label %for.body, label %for.cond.for.end_crit_edge

for.cond.for.end_crit_edge:                       ; preds = %for.inc

  br label %for.end

for.end:                                          ; preds =

%for.cond.for.end_crit_edge, %entry

  ret void

}

attributes #0 = { nounwind uwtable "target-cpu"="core-avx2"

"target-features"="+avx,+avx2" }

!2 = !{!3, !3, i64 0}

!3 = !{!"int", !4, i64 0}

!4 = !{!"omnipotent char", !5, i64 0}

!5 = !{!"Simple C/C++ TBAA"}

```

Running loop-vectorize pass with some options to make vectorizer make changes

(via interleaving) but not vectorize anything (forcing width to 1) as follows:

```

opt repro.ll -passes="loop-vectorize" -S -force-vector-width=1

-force-vector-interleave=2 -o out.ll

```

will generate `store` instructions that are not guarded and executed

unconditionally:

```

vector.body:                                      ; preds = %vector.body,

%vector.ph

  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]

  %induction = add i64 %index, 0

  %induction1 = add i64 %index, 1

  %0 = getelementptr inbounds i32, i32* %A, i64 %induction

  %1 = getelementptr inbounds i32, i32* %A, i64 %induction1

  %2 = load i32, i32* %0, align 4, !tbaa !0

  %3 = load i32, i32* %1, align 4, !tbaa !0

  %4 = icmp eq i32 %2, 0

  %5 = icmp eq i32 %3, 0

  %6 = getelementptr inbounds i32, i32* %B, i64 %induction

  %7 = getelementptr inbounds i32, i32* %B, i64 %induction1

  %8 = load i32, i32* %6, align 4, !tbaa !0

  %9 = load i32, i32* %7, align 4, !tbaa !0

  %10 = trunc i64 %index to i32

  %induction2 = add i32 %10, 0

  %induction3 = add i32 %10, 1

  %11 = add nsw i32 %8, %induction2

  %12 = add nsw i32 %9, %induction3

  store i32 %11, i32* %0, align 4, !tbaa !0

  store i32 %12, i32* %1, align 4, !tbaa !0

  %index.next = add i64 %index, 2

  %13 = icmp eq i64 %index.next, %n.vec

  br i1 %13, label %middle.block, label %vector.body, !llvm.loop !4

middle.block:                                     ; preds = %vector.body

```

This is a problem on targets that support masked load/store instructions

nativly.</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>