[llvm-bugs] [Bug 35734] New: [x86, loop vectorizer] Loop vectorizer generating incorrect trunc/ext pair

via llvm-bugs llvm-bugs at lists.llvm.org
Fri Dec 22 13:57:32 PST 2017


https://bugs.llvm.org/show_bug.cgi?id=35734

            Bug ID: 35734
           Summary: [x86, loop vectorizer] Loop vectorizer generating
                    incorrect trunc/ext pair
           Product: new-bugs
           Version: trunk
          Hardware: PC
                OS: All
            Status: NEW
          Severity: enhancement
          Priority: P
         Component: new bugs
          Assignee: unassignedbugs at nondot.org
          Reporter: dneilson at azul.com
                CC: llvm-bugs at lists.llvm.org

Created attachment 19596
  --> https://bugs.llvm.org/attachment.cgi?id=19596&action=edit
reduced.ll

Reproduce with:
opt -S -loop-vectorize reduced.ll


In this reduced example, we obviously have %10 guaranteed to be either 0 or -1.
bci_42:
  %local_2_3 = phi i32 [ %3, %bci_42.peel.next ], [ %10, %bci_42 ]
  %local_1_2 = phi i32 [ %5, %bci_42.peel.next ], [ %11, %bci_42 ]
  %9 = and i32 %local_2_3, 1
  %10 = add nsw i32 %9, -1
  %11 = add nsw i32 %local_1_2, 1
  %12 = icmp sgt i32 %local_1_2, 77
  br i1 %12, label %bci_72.loopexit, label %bci_42

When we run this code through -loop-vectorize, we end up with this in the
vectorized loop body:
...
  %11 = and <16 x i32> %vec.phi, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32
1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
  %12 = and <16 x i32> %vec.phi1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
  %13 = add nsw <16 x i32> %11, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32
-1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32
-1>
  %14 = add nsw <16 x i32> %12, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32
-1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32
-1>
...
  %18 = trunc <16 x i32> %13 to <16 x i1>
  %19 = zext <16 x i1> %18 to <16 x i32>
  %20 = trunc <16 x i32> %14 to <16 x i1>
  %21 = zext <16 x i1> %20 to <16 x i32>
...

These trunc/zext pairs are limiting the values of the vectorized %10 to the
values 0 or 1; which is clearly incorrect since %10 could be either 0 or -1.
These zext's should be sext's instead.

=====
; reduced.ll
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
target triple = "x86_64-unknown-linux-gnu"

define i64 @foo(i32 %v, i32 %v2, i32 %v3) {
  br label %guarded

guarded:                                          ; preds = %0
  %1 = icmp sgt i32 %v2, 78
  br i1 %1, label %bci_72, label %bci_42.lr.ph

bci_42.lr.ph:                                     ; preds = %guarded
  %2 = and i32 %v3, %v
  %3 = add i32 %2, -1
  %4 = icmp eq i32 %v2, 78
  br i1 %4, label %bci_72, label %bci_42.peel.next

bci_42.peel.next:                                 ; preds = %bci_42.lr.ph
  %5 = add nsw i32 %v2, 1
  br label %bci_42

bci_72.loopexit:                                  ; preds = %bci_42
  %.lcssa11 = phi i32 [ %10, %bci_42 ]
  %.lcssa = phi i32 [ %11, %bci_42 ]
  br label %bci_72

bci_72:                                           ; preds = %bci_72.loopexit,
%bci_42.lr.ph, %guarded
  %local_0_.lcssa = phi i32 [ %v, %guarded ], [ 1, %bci_42.lr.ph ], [ 1,
%bci_72.loopexit ]
  %local_1_.lcssa = phi i32 [ %v2, %guarded ], [ 79, %bci_42.lr.ph ], [
%.lcssa, %bci_72.loopexit ]
  %local_2_.lcssa = phi i32 [ %v3, %guarded ], [ %3, %bci_42.lr.ph ], [
%.lcssa11, %bci_72.loopexit ]
  %6 = add i32 %local_0_.lcssa, %local_1_.lcssa
  %7 = add i32 %6, %local_2_.lcssa
  %8 = sext i32 %7 to i64
  ret i64 %8

bci_42:                                           ; preds = %bci_42,
%bci_42.peel.next
  %local_2_3 = phi i32 [ %3, %bci_42.peel.next ], [ %10, %bci_42 ]
  %local_1_2 = phi i32 [ %5, %bci_42.peel.next ], [ %11, %bci_42 ]
  %9 = and i32 %local_2_3, 1
  %10 = add nsw i32 %9, -1
  %11 = add nsw i32 %local_1_2, 1
  %12 = icmp sgt i32 %local_1_2, 77
  br i1 %12, label %bci_72.loopexit, label %bci_42
}

-- 
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20171222/91107b4b/attachment.html>


More information about the llvm-bugs mailing list