<html>

    <head>

      <base href="https://bugs.llvm.org/">

    </head>

    <body><table border="1" cellspacing="0" cellpadding="8">

        <tr>

          <th>Bug ID</th>

          <td><a class="bz_bug_link 

          bz_status_NEW "

   title="NEW - Miscompile with LoopLoadElim due to wrong store forwarding"

   href="https://bugs.llvm.org/show_bug.cgi?id=49876">49876</a>

          </td>

        </tr>

        <tr>

          <th>Summary</th>

          <td>Miscompile with LoopLoadElim due to wrong store forwarding

          </td>

        </tr>

        <tr>

          <th>Product</th>

          <td>new-bugs

          </td>

        </tr>

        <tr>

          <th>Version</th>

          <td>trunk

          </td>

        </tr>

        <tr>

          <th>Hardware</th>

          <td>PC

          </td>

        </tr>

        <tr>

          <th>OS</th>

          <td>Windows NT

          </td>

        </tr>

        <tr>

          <th>Status</th>

          <td>NEW

          </td>

        </tr>

        <tr>

          <th>Severity</th>

          <td>enhancement

          </td>

        </tr>

        <tr>

          <th>Priority</th>

          <td>P

          </td>

        </tr>

        <tr>

          <th>Component</th>

          <td>new bugs

          </td>

        </tr>

        <tr>

          <th>Assignee</th>

          <td>unassignedbugs@nondot.org

          </td>

        </tr>

        <tr>

          <th>Reporter</th>

          <td>max.kazantsev@azul.com

          </td>

        </tr>

        <tr>

          <th>CC</th>

          <td>htmldeveloper@gmail.com, llvm-bugs@lists.llvm.org

          </td>

        </tr></table>

      <p>

        <div>

        <pre>Run opt -loop-load-elim -S on the following test:

-----------------------------------------------

define void @foo(i32** %addr) {

entry:

  %arr.start = load i32*, i32** %addr

  %arr.start2 = load i32*, i32** %addr

  %elem.1 = getelementptr i32, i32* %arr.start2, i64 1

  br label %preheader

preheader:

  br label %header

header:

  %iv = phi i64 [ %iv.next, %header ], [ 1, %preheader ]

  %prev.iv = add nsw i64 %iv, -1

  %prev.elem = getelementptr i32, i32* %arr.start, i64 %prev.iv

  %prev.elem.val = load i32, i32* %prev.elem

  %current.elem = getelementptr i32, i32* %arr.start, i64 %iv

  store i32 %prev.elem.val, i32* %current.elem

  %elem.1.val = load i32, i32* %elem.1

  %elem.1.new.val = add i32 %elem.1.val, 13

  store i32 %elem.1.new.val, i32* %elem.1

  %iv.next = add nuw nsw i64 %iv, 1

  %cond = icmp ugt i64 %iv, 2

  br i1 %cond, label %exit, label %header

exit:

  ret void

}

---------------------------------------------------------

Semantically, it's doing the following:

for (i = 1; i <= 2; i++) {

  a[i] = a[i - 1];

  a[1] += 13;

}

The out of this optimization will be:

---------------------------------------------------------

; ModuleID = './ir.ll'

source_filename = "./ir.ll"

define void @foo(i32** %addr) {

entry:

  %arr.start = load i32*, i32** %addr, align 8

  %arr.start1 = bitcast i32* %arr.start to i8*

  %arr.start2 = load i32*, i32** %addr, align 8

  %elem.1 = getelementptr i32, i32* %arr.start2, i64 1

  br label %header.lver.check

header.lver.check:                                ; preds = %entry

  %scevgep = getelementptr i32, i32* %arr.start, i64 4

  %scevgep2 = bitcast i32* %scevgep to i8*

  %scevgep3 = getelementptr i32, i32* %arr.start2, i64 1

  %scevgep34 = bitcast i32* %scevgep3 to i8*

  %uglygep = getelementptr i8, i8* %scevgep34, i64 1

  %bc = bitcast i32* %elem.1 to i8*

  %bound0 = icmp ult i8* %arr.start1, %uglygep

  %bound1 = icmp ult i8* %bc, %scevgep2

  %found.conflict = and i1 %bound0, %bound1

  %memcheck.conflict = and i1 %found.conflict, true

  br i1 %memcheck.conflict, label %header.ph.lver.orig, label %header.ph

header.ph.lver.orig:                              ; preds = %header.lver.check

  br label %header.lver.orig

header.lver.orig:                                 ; preds = %header.lver.orig,

%header.ph.lver.orig

  %iv.lver.orig = phi i64 [ %iv.next.lver.orig, %header.lver.orig ], [ 1,

%header.ph.lver.orig ]

  %prev.iv.lver.orig = add nsw i64 %iv.lver.orig, -1

  %prev.elem.lver.orig = getelementptr i32, i32* %arr.start, i64

%prev.iv.lver.orig

  %prev.elem.val.lver.orig = load i32, i32* %prev.elem.lver.orig, align 4

  %current.elem.lver.orig = getelementptr i32, i32* %arr.start, i64

%iv.lver.orig

  store i32 %prev.elem.val.lver.orig, i32* %current.elem.lver.orig, align 4

  %elem.1.val.lver.orig = load i32, i32* %elem.1, align 4

  %elem.1.new.val.lver.orig = add i32 %elem.1.val.lver.orig, 13

  store i32 %elem.1.new.val.lver.orig, i32* %elem.1, align 4

  %iv.next.lver.orig = add nuw nsw i64 %iv.lver.orig, 1

  %cond.lver.orig = icmp ugt i64 %iv.lver.orig, 2

  br i1 %cond.lver.orig, label %exit.loopexit, label %header.lver.orig

header.ph:                                        ; preds = %header.lver.check

  %load_initial = load i32, i32* %arr.start, align 4

  br label %header

header:                                           ; preds = %header, %header.ph

  %store_forwarded = phi i32 [ %load_initial, %header.ph ], [ %store_forwarded,

%header ]

  %iv = phi i64 [ %iv.next, %header ], [ 1, %header.ph ]

  %prev.iv = add nsw i64 %iv, -1

  %prev.elem = getelementptr i32, i32* %arr.start, i64 %prev.iv

  %prev.elem.val = load i32, i32* %prev.elem, align 4

  %current.elem = getelementptr i32, i32* %arr.start, i64 %iv

  store i32 %store_forwarded, i32* %current.elem, align 4

  %elem.1.val = load i32, i32* %elem.1, align 4

  %elem.1.new.val = add i32 %elem.1.val, 13

  store i32 %elem.1.new.val, i32* %elem.1, align 4

  %iv.next = add nuw nsw i64 %iv, 1

  %cond = icmp ugt i64 %iv, 2

  br i1 %cond, label %exit.loopexit5, label %header

exit.loopexit:                                    ; preds = %header.lver.orig

  br label %exit

exit.loopexit5:                                   ; preds = %header

  br label %exit

exit:                                             ; preds = %exit.loopexit5,

%exit.loopexit

  ret void

}

---------------------------------------------------------

Not that now the versioned version of the loop with store forwarding does the

following:

for (i = 1; i <= 2; i++) {

  a[i] = a[0];

  a[1] += 13;

}

which is not equivalent. Take initial array filled with zeroes, the original

loop gives {0, 26, 13} and the new version gives {0, 13, 0}.

So we are not supposed to go into this version, but we do because %bound1 =

icmp ult i8* %bc, %scevgep2 is false (bc = &a[1], scevgep2 = &a[0]), so both

%found.conflict and %memcheck.conflict are false and lead to the bad version of

the loop.</pre>

        </div>

      </p>

      <hr>

      <span>You are receiving this mail because:</span>

      <ul>

          <li>You are on the CC list for the bug.</li>

      </ul>

    </body>

</html>