[llvm-bugs] [Bug 49889] New: Miscompile with LoopLoadElim due to wrong store forwarding (Attempt 3)

Wed Apr 7 21:33:11 PDT 2021

https://bugs.llvm.org/show_bug.cgi?id=49889

            Bug ID: 49889
           Summary: Miscompile with LoopLoadElim due to wrong store
                    forwarding (Attempt 3)
           Product: new-bugs
           Version: trunk
          Hardware: PC
                OS: Windows NT
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: new bugs
          Assignee: unassignedbugs at nondot.org
          Reporter: max.kazantsev at azul.com
                CC: htmldeveloper at gmail.com, llvm-bugs at lists.llvm.org

Run opt -loop-load-elim -S on the following test:
------------------------------------------------------------------
define void @foo(i8** %addr) {
entry:
  %val26 = load i8*, i8** %addr, align 8
  %val94 = load i8*, i8** %addr, align 8
  %val59 = bitcast i8* %val26 to i32*
  %val96 = bitcast i8* %val94 to i32*
  %val100 = getelementptr inbounds i32, i32* %val96, i64 2
  br label %preheader

preheader:                              ; preds = %entry
  br label %header

header:                                 ; preds = %preheader, %header
  %iv = phi i64 [ 2, %preheader ], [ %iv.next, %header ]
  %iv.prev = add nsw i64 %iv, -1
  %val108 = getelementptr inbounds i32, i32* %val59, i64 %iv.prev
  %val109 = load i32, i32* %val108, align 8
  %val110 = getelementptr inbounds i32, i32* %val59, i64 %iv
  store i32 %val109, i32* %val110, align 8
  %val116 = load i32, i32* %val96, align 8 ; IMPORTANT!
  %val119 = load i32, i32* %val100, align 8
  %val120 = add i32 %val119, 13
  store i32 %val120, i32* %val100, align 8
  %iv.next = add nuw nsw i64 %iv, 1
  %val122 = icmp ugt i64 %iv, 2
  br i1 %val122, label %exit, label %header

exit:                                   ; preds = %header
  ret void
}
------------------------------------------------------------------

This original loop executes body twice, performing the following job:

for (iv = 2; iv <= 3; iv++) {
  arr[iv] = arr[iv - 1]
  arr[2] = arr[2] + 13
}

LoopLoadElim produces the following code:

------------------------------------------------------------------
define void @foo(i8** %addr) {
entry:
  %val26 = load i8*, i8** %addr, align 8
  %val94 = load i8*, i8** %addr, align 8
  %val59 = bitcast i8* %val26 to i32*
  %val96 = bitcast i8* %val94 to i32*
  %val100 = getelementptr inbounds i32, i32* %val96, i64 2
  br label %header.lver.check

header.lver.check:                                ; preds = %entry
  %scevgep = getelementptr i8, i8* %val26, i64 4
  %scevgep1 = getelementptr i8, i8* %val26, i64 16
  %scevgep2 = getelementptr i8, i8* %val94, i64 1
  %bc = bitcast i32* %val96 to i8*
  %bound0 = icmp ult i8* %scevgep, %scevgep2
  %bound1 = icmp ult i8* %bc, %scevgep1
  %found.conflict = and i1 %bound0, %bound1
  %memcheck.conflict = and i1 %found.conflict, true
  br i1 %memcheck.conflict, label %header.ph.lver.orig, label %header.ph

header.ph.lver.orig:                              ; preds = %header.lver.check
  br label %header.lver.orig

header.lver.orig:                                 ; preds = %header.lver.orig,
%header.ph.lver.orig
  %iv.lver.orig = phi i64 [ 2, %header.ph.lver.orig ], [ %iv.next.lver.orig,
%header.lver.orig ]
  %iv.prev.lver.orig = add nsw i64 %iv.lver.orig, -1
  %val108.lver.orig = getelementptr inbounds i32, i32* %val59, i64
%iv.prev.lver.orig
  %val109.lver.orig = load i32, i32* %val108.lver.orig, align 8
  %val110.lver.orig = getelementptr inbounds i32, i32* %val59, i64
%iv.lver.orig
  store i32 %val109.lver.orig, i32* %val110.lver.orig, align 8
  %val116.lver.orig = load i32, i32* %val96, align 8
  %val119.lver.orig = load i32, i32* %val100, align 8
  %val120.lver.orig = add i32 %val119.lver.orig, 13
  store i32 %val120.lver.orig, i32* %val100, align 8
  %iv.next.lver.orig = add nuw nsw i64 %iv.lver.orig, 1
  %val122.lver.orig = icmp ugt i64 %iv.lver.orig, 2
  br i1 %val122.lver.orig, label %exit.loopexit, label %header.lver.orig

header.ph:                                        ; preds = %header.lver.check
  %scevgep4 = getelementptr i8, i8* %val26, i64 4
  %scevgep45 = bitcast i8* %scevgep4 to i32*
  %load_initial = load i32, i32* %scevgep45, align 8
  br label %header

header:                                           ; preds = %header, %header.ph
  %store_forwarded = phi i32 [ %load_initial, %header.ph ], [ %store_forwarded,
%header ]
  %iv = phi i64 [ 2, %header.ph ], [ %iv.next, %header ]
  %iv.prev = add nsw i64 %iv, -1
  %val108 = getelementptr inbounds i32, i32* %val59, i64 %iv.prev
  %val109 = load i32, i32* %val108, align 8
  %val110 = getelementptr inbounds i32, i32* %val59, i64 %iv
  store i32 %store_forwarded, i32* %val110, align 8
  %val116 = load i32, i32* %val96, align 8
  %val119 = load i32, i32* %val100, align 8
  %val120 = add i32 %val119, 13
  store i32 %val120, i32* %val100, align 8
  %iv.next = add nuw nsw i64 %iv, 1
  %val122 = icmp ugt i64 %iv, 2
  br i1 %val122, label %exit.loopexit3, label %header

exit.loopexit:                                    ; preds = %header.lver.orig
  br label %exit

exit.loopexit3:                                   ; preds = %header
  br label %exit

exit:                                             ; preds = %exit.loopexit3,
%exit.loopexit
  ret void
}
------------------------------------------------------------------

We go to the versioned loop (header.ph) because of this:

  %val26 = load i8*, i8** %addr, align 8
  %val94 = load i8*, i8** %addr, align 8
  ...
  %scevgep = getelementptr i8, i8* %val26, i64 4
  ...
  %scevgep2 = getelementptr i8, i8* %val94, i64 1
  ...
  %bound0 = icmp ult i8* %scevgep, %scevgep2 ; false

And in the versioned loop, we have miscompile:

store_forwarded = arr[1]
for (iv = 2; iv <= 3; iv++) {
  arr[iv] = store_forwarded
  arr[2] = arr[2] + 13
}

Let initial arr be filled with zeros. Then the inital loop would do:

After iter 1: {0, 0, 13, 0}
After iter 2: {0, 0, 26, 13}

And the new loop does:

After iter 1: {0, 0, 13, 0}
After iter 2: {0, 0, 26, 0}

So the problem is that store_forwarded  is computed before loop and not re-read
when 2nd element of the array changes.

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20210408/cb5b3acf/attachment-0001.html>