<html>
<head>
<base href="https://bugs.llvm.org/">
</head>
<body><table border="1" cellspacing="0" cellpadding="8">
<tr>
<th>Bug ID</th>
<td><a class="bz_bug_link
bz_status_NEW "
title="NEW - GEP inbound with negative offset generated by loop vectorizer"
href="https://bugs.llvm.org/show_bug.cgi?id=48126">48126</a>
</td>
</tr>
<tr>
<th>Summary</th>
<td>GEP inbound with negative offset generated by loop vectorizer
</td>
</tr>
<tr>
<th>Product</th>
<td>libraries
</td>
</tr>
<tr>
<th>Version</th>
<td>trunk
</td>
</tr>
<tr>
<th>Hardware</th>
<td>PC
</td>
</tr>
<tr>
<th>OS</th>
<td>All
</td>
</tr>
<tr>
<th>Status</th>
<td>NEW
</td>
</tr>
<tr>
<th>Severity</th>
<td>enhancement
</td>
</tr>
<tr>
<th>Priority</th>
<td>P
</td>
</tr>
<tr>
<th>Component</th>
<td>Loop Optimizer
</td>
</tr>
<tr>
<th>Assignee</th>
<td>unassignedbugs@nondot.org
</td>
</tr>
<tr>
<th>Reporter</th>
<td>liuz@cs.utah.edu
</td>
</tr>
<tr>
<th>CC</th>
<td>llvm-bugs@lists.llvm.org
</td>
</tr></table>
<p>
<div>
<pre>Test case from test/Transforms/LoopVectorize/gcc-examples.ll
;CHECK-LABEL: @example21(
;CHECK: load <4 x i32>
;CHECK: shufflevector {{.*}} <i32 3, i32 2, i32 1, i32 0>
;CHECK: ret i32
define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp
{
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0
%2 = sext i32 %n to i64
br label %3
; <label>:3 ; preds = %.lr.ph, %3
%indvars.iv = phi i64 [ %2, %.lr.ph ], [ %indvars.iv.next, %3 ]
%a.02 = phi i32 [ 0, %.lr.ph ], [ %6, %3 ]
%indvars.iv.next = add i64 %indvars.iv, -1
%4 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
%5 = load i32, i32* %4, align 4
%6 = add nsw i32 %5, %a.02
%7 = trunc i64 %indvars.iv.next to i32
%8 = icmp sgt i32 %7, 0
br i1 %8, label %3, label %._crit_edge
._crit_edge: ; preds = %3, %0
%a.0.lcssa = phi i32 [ 0, %0 ], [ %6, %3 ]
ret i32 %a.0.lcssa
}
Below is the vectorized function after calling opt with the arguments listed in
the test case (-basic-aa -loop-vectorize -force-vector-width=4
-force-vector-interleave=1 -dce -instcombine).
; Function Attrs: nounwind readonly ssp uwtable
define i32 @example21(i32* nocapture %b, i32 %n) #0 {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph, label %._crit_edge
.lr.ph: ; preds = %0
%2 = sext i32 %n to i64
%3 = add i32 %n, -1
%4 = zext i32 %3 to i64
%5 = add nuw nsw i64 %4, 1
%min.iters.check = icmp ult i32 %3, 3
br i1 %min.iters.check, label %scalar.ph, label %vector.ph
vector.ph: ; preds = %.lr.ph
%n.vec = and i64 %5, 8589934588
%ind.end = sub nsw i64 %2, %n.vec
br label %vector.body
vector.body: ; preds = %vector.body,
%vector.ph
%index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
%vec.phi = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ %11, %vector.body
]
%6 = xor i64 %index, -1
%7 = add i64 %6, %2
%8 = getelementptr inbounds i32, i32* %b, i64 -3
%9 = getelementptr inbounds i32, i32* %8, i64 %7
%10 = bitcast i32* %9 to <4 x i32>*
%wide.load = load <4 x i32>, <4 x i32>* %10, align 4
%reverse = shufflevector <4 x i32> %wide.load, <4 x i32> undef, <4 x i32>
<i32 3, i32 2, i32 1, i32 0>
%11 = add <4 x i32> %reverse, %vec.phi
%index.next = add i64 %index, 4
%12 = icmp eq i64 %index.next, %n.vec
br i1 %12, label %middle.block, label %vector.body, !llvm.loop !0
middle.block: ; preds = %vector.body
%rdx.shuf = shufflevector <4 x i32> %11, <4 x i32> undef, <4 x i32> <i32 2,
i32 3, i32 undef, i32 undef>
%bin.rdx = add <4 x i32> %11, %rdx.shuf
%rdx.shuf1 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32>
<i32 1, i32 undef, i32 undef, i32 undef>
%bin.rdx2 = add <4 x i32> %bin.rdx, %rdx.shuf1
%13 = extractelement <4 x i32> %bin.rdx2, i32 0
%cmp.n = icmp eq i64 %5, %n.vec
br i1 %cmp.n, label %._crit_edge.loopexit, label %scalar.ph
scalar.ph: ; preds = %middle.block,
%.lr.ph
%bc.resume.val = phi i64 [ %ind.end, %middle.block ], [ %2, %.lr.ph ]
%bc.merge.rdx = phi i32 [ %13, %middle.block ], [ 0, %.lr.ph ]
br label %14
14: ; preds = %14, %scalar.ph
%indvars.iv = phi i64 [ %bc.resume.val, %scalar.ph ], [ %indvars.iv.next, %14
]
%a.02 = phi i32 [ %bc.merge.rdx, %scalar.ph ], [ %17, %14 ]
%indvars.iv.next = add i64 %indvars.iv, -1
%15 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv.next
%16 = load i32, i32* %15, align 4
%17 = add nsw i32 %16, %a.02
%18 = trunc i64 %indvars.iv.next to i32
%19 = icmp sgt i32 %18, 0
br i1 %19, label %14, label %._crit_edge.loopexit, !llvm.loop !2
._crit_edge.loopexit: ; preds = %middle.block, %14
%.lcssa = phi i32 [ %17, %14 ], [ %13, %middle.block ]
br label %._crit_edge
._crit_edge: ; preds =
%._crit_edge.loopexit, %0
%a.0.lcssa = phi i32 [ 0, %0 ], [ %.lcssa, %._crit_edge.loopexit ]
ret i32 %a.0.lcssa
}
attributes #0 = { nounwind readonly ssp uwtable }
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.isvectorized", i32 1}
!2 = distinct !{!2, !3, !1}
!3 = !{!"llvm.loop.unroll.runtime.disable"}
Here %8 is a generated gep inbound with negative offset. The source in this
transformation is more defined than target.
Alive2: <a href="https://alive2.llvm.org/ce/z/g2VueK">https://alive2.llvm.org/ce/z/g2VueK</a></pre>
</div>
</p>
<hr>
<span>You are receiving this mail because:</span>
<ul>
<li>You are on the CC list for the bug.</li>
</ul>
</body>
</html>