[llvm-bugs] [Bug 35734] New: [x86, loop vectorizer] Loop vectorizer generating incorrect trunc/ext pair
via llvm-bugs
llvm-bugs at lists.llvm.org
Fri Dec 22 13:57:32 PST 2017
https://bugs.llvm.org/show_bug.cgi?id=35734
Bug ID: 35734
Summary: [x86, loop vectorizer] Loop vectorizer generating
incorrect trunc/ext pair
Product: new-bugs
Version: trunk
Hardware: PC
OS: All
Status: NEW
Severity: enhancement
Priority: P
Component: new bugs
Assignee: unassignedbugs at nondot.org
Reporter: dneilson at azul.com
CC: llvm-bugs at lists.llvm.org
Created attachment 19596
--> https://bugs.llvm.org/attachment.cgi?id=19596&action=edit
reduced.ll
Reproduce with:
opt -S -loop-vectorize reduced.ll
In this reduced example, we obviously have %10 guaranteed to be either 0 or -1.
bci_42:
%local_2_3 = phi i32 [ %3, %bci_42.peel.next ], [ %10, %bci_42 ]
%local_1_2 = phi i32 [ %5, %bci_42.peel.next ], [ %11, %bci_42 ]
%9 = and i32 %local_2_3, 1
%10 = add nsw i32 %9, -1
%11 = add nsw i32 %local_1_2, 1
%12 = icmp sgt i32 %local_1_2, 77
br i1 %12, label %bci_72.loopexit, label %bci_42
When we run this code through -loop-vectorize, we end up with this in the
vectorized loop body:
...
%11 = and <16 x i32> %vec.phi, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32
1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%12 = and <16 x i32> %vec.phi1, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1,
i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
%13 = add nsw <16 x i32> %11, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32
-1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32
-1>
%14 = add nsw <16 x i32> %12, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32
-1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32
-1>
...
%18 = trunc <16 x i32> %13 to <16 x i1>
%19 = zext <16 x i1> %18 to <16 x i32>
%20 = trunc <16 x i32> %14 to <16 x i1>
%21 = zext <16 x i1> %20 to <16 x i32>
...
These trunc/zext pairs are limiting the values of the vectorized %10 to the
values 0 or 1; which is clearly incorrect since %10 could be either 0 or -1.
These zext's should be sext's instead.
=====
; reduced.ll
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
target triple = "x86_64-unknown-linux-gnu"
define i64 @foo(i32 %v, i32 %v2, i32 %v3) {
br label %guarded
guarded: ; preds = %0
%1 = icmp sgt i32 %v2, 78
br i1 %1, label %bci_72, label %bci_42.lr.ph
bci_42.lr.ph: ; preds = %guarded
%2 = and i32 %v3, %v
%3 = add i32 %2, -1
%4 = icmp eq i32 %v2, 78
br i1 %4, label %bci_72, label %bci_42.peel.next
bci_42.peel.next: ; preds = %bci_42.lr.ph
%5 = add nsw i32 %v2, 1
br label %bci_42
bci_72.loopexit: ; preds = %bci_42
%.lcssa11 = phi i32 [ %10, %bci_42 ]
%.lcssa = phi i32 [ %11, %bci_42 ]
br label %bci_72
bci_72: ; preds = %bci_72.loopexit,
%bci_42.lr.ph, %guarded
%local_0_.lcssa = phi i32 [ %v, %guarded ], [ 1, %bci_42.lr.ph ], [ 1,
%bci_72.loopexit ]
%local_1_.lcssa = phi i32 [ %v2, %guarded ], [ 79, %bci_42.lr.ph ], [
%.lcssa, %bci_72.loopexit ]
%local_2_.lcssa = phi i32 [ %v3, %guarded ], [ %3, %bci_42.lr.ph ], [
%.lcssa11, %bci_72.loopexit ]
%6 = add i32 %local_0_.lcssa, %local_1_.lcssa
%7 = add i32 %6, %local_2_.lcssa
%8 = sext i32 %7 to i64
ret i64 %8
bci_42: ; preds = %bci_42,
%bci_42.peel.next
%local_2_3 = phi i32 [ %3, %bci_42.peel.next ], [ %10, %bci_42 ]
%local_1_2 = phi i32 [ %5, %bci_42.peel.next ], [ %11, %bci_42 ]
%9 = and i32 %local_2_3, 1
%10 = add nsw i32 %9, -1
%11 = add nsw i32 %local_1_2, 1
%12 = icmp sgt i32 %local_1_2, 77
br i1 %12, label %bci_72.loopexit, label %bci_42
}
--
You are receiving this mail because:
You are on the CC list for the bug.
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-bugs/attachments/20171222/91107b4b/attachment.html>
More information about the llvm-bugs
mailing list