<div dir="ltr">This broke test/CodeGen/WebAssembly/cfg-stackify.ll.<div><br></div><div>Is this correct behavior? Should I update the test?</div></div><div class="gmail_extra"><br><div class="gmail_quote">On Mon, Aug 15, 2016 at 12:53 AM, James Molloy via llvm-commits <span dir="ltr"><<a href="mailto:llvm-commits@lists.llvm.org" target="_blank">llvm-commits@lists.llvm.org</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Author: jamesm<br>
Date: Mon Aug 15 02:53:03 2016<br>
New Revision: 278658<br>
<br>
URL: <a href="http://llvm.org/viewvc/llvm-project?rev=278658&view=rev" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project?rev=278658&view=rev</a><br>
Log:<br>
[LSR] Don't try and create post-inc expressions on non-rotated loops<br>
<br>
If a loop is not rotated (for example when optimizing for size), the latch is not the backedge. If we promote an expression to post-inc form, we not only increase register pressure and add a COPY for that IV expression but for all IVs!<br>
<br>
Motivating testcase:<br>
<br>
void f(float *a, float *b, float *c, int n) {<br>
while (n-- > 0)<br>
*c++ = *a++ + *b++;<br>
}<br>
<br>
It's imperative that the pointer increments be located in the latch block and not the header block; if not, we cannot use post-increment loads and stores and we have to keep both the post-inc and pre-inc values around until the end of the latch which bloats register usage.<br>
<br>
Added:<br>
llvm/trunk/test/Transforms/<wbr>LoopStrengthReduce/post-inc-<wbr>optsize.ll<br>
Modified:<br>
llvm/trunk/lib/Transforms/<wbr>Scalar/LoopStrengthReduce.cpp<br>
llvm/trunk/test/CodeGen/<wbr>AMDGPU/wqm.ll<br>
llvm/trunk/test/CodeGen/ARM/<wbr>2011-03-23-PeepholeBug.ll<br>
llvm/trunk/test/CodeGen/<wbr>Hexagon/hwloop-crit-edge.ll<br>
llvm/trunk/test/CodeGen/<wbr>Hexagon/hwloop-loop1.ll<br>
llvm/trunk/test/CodeGen/X86/<wbr>lsr-loop-exit-cond.ll<br>
<br>
Modified: llvm/trunk/lib/Transforms/<wbr>Scalar/LoopStrengthReduce.cpp<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=278658&r1=278657&r2=278658&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/lib/<wbr>Transforms/Scalar/<wbr>LoopStrengthReduce.cpp?rev=<wbr>278658&r1=278657&r2=278658&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/lib/Transforms/<wbr>Scalar/LoopStrengthReduce.cpp (original)<br>
+++ llvm/trunk/lib/Transforms/<wbr>Scalar/LoopStrengthReduce.cpp Mon Aug 15 02:53:03 2016<br>
@@ -2069,10 +2069,30 @@ void<br>
LSRInstance::<wbr>OptimizeLoopTermCond() {<br>
SmallPtrSet<Instruction *, 4> PostIncs;<br>
<br>
+ // We need a different set of heuristics for rotated and non-rotated loops.<br>
+ // If a loop is rotated then the latch is also the backedge, so inserting<br>
+ // post-inc expressions just before the latch is ideal. To reduce live ranges<br>
+ // it also makes sense to rewrite terminating conditions to use post-inc<br>
+ // expressions.<br>
+ //<br>
+ // If the loop is not rotated then the latch is not a backedge; the latch<br>
+ // check is done in the loop head. Adding post-inc expressions before the<br>
+ // latch will cause overlapping live-ranges of pre-inc and post-inc expressions<br>
+ // in the loop body. In this case we do *not* want to use post-inc expressions<br>
+ // in the latch check, and we want to insert post-inc expressions before<br>
+ // the backedge.<br>
BasicBlock *LatchBlock = L->getLoopLatch();<br>
SmallVector<BasicBlock*, 8> ExitingBlocks;<br>
L->getExitingBlocks(<wbr>ExitingBlocks);<br>
+ if (llvm::all_of(ExitingBlocks, [&LatchBlock](const BasicBlock *BB) {<br>
+ return LatchBlock != BB;<br>
+ })) {<br>
+ // The backedge doesn't exit the loop; treat this as a head-tested loop.<br>
+ IVIncInsertPos = LatchBlock->getTerminator();<br>
+ return;<br>
+ }<br>
<br>
+ // Otherwise treat this as a rotated loop.<br>
for (BasicBlock *ExitingBlock : ExitingBlocks) {<br>
<br>
// Get the terminating condition for the loop if possible. If we<br>
<br>
Modified: llvm/trunk/test/CodeGen/<wbr>AMDGPU/wqm.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AMDGPU/wqm.ll?rev=278658&r1=278657&r2=278658&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/AMDGPU/wqm.ll?rev=<wbr>278658&r1=278657&r2=278658&<wbr>view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/<wbr>AMDGPU/wqm.ll (original)<br>
+++ llvm/trunk/test/CodeGen/<wbr>AMDGPU/wqm.ll Mon Aug 15 02:53:03 2016<br>
@@ -343,11 +343,12 @@ main_body:<br>
; CHECK: s_and_b64 exec, exec, [[LIVE]]<br>
; CHECK: image_store<br>
; CHECK: s_wqm_b64 exec, exec<br>
-; CHECK: v_mov_b32_e32 [[CTR:v[0-9]+]], -2<br>
+; CHECK: v_mov_b32_e32 [[CTR:v[0-9]+]], 0<br>
; CHECK: s_branch [[LOOPHDR:BB[0-9]+_[0-9]+]]<br>
<br>
-; CHECK: [[LOOPHDR]]: ; %loop<br>
; CHECK: v_add_i32_e32 [[CTR]], vcc, 2, [[CTR]]<br>
+<br>
+; CHECK: [[LOOPHDR]]: ; %loop<br>
; CHECK: v_cmp_lt_i32_e32 vcc, 7, [[CTR]]<br>
; CHECK: s_cbranch_vccz<br>
; CHECK: ; %break<br>
<br>
Modified: llvm/trunk/test/CodeGen/ARM/<wbr>2011-03-23-PeepholeBug.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/2011-03-23-PeepholeBug.ll?rev=278658&r1=278657&r2=278658&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/ARM/2011-03-23-<wbr>PeepholeBug.ll?rev=278658&r1=<wbr>278657&r2=278658&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/ARM/<wbr>2011-03-23-PeepholeBug.ll (original)<br>
+++ llvm/trunk/test/CodeGen/ARM/<wbr>2011-03-23-PeepholeBug.ll Mon Aug 15 02:53:03 2016<br>
@@ -18,13 +18,14 @@ bb:<br>
br i1 %1, label %bb3, label %bb1<br>
<br>
bb1: ; preds = %bb<br>
+; CHECK: bb1<br>
+; CHECK: subs [[REG:r[0-9]+]], #1<br>
%tmp = tail call i32 @puts() nounwind<br>
%indvar.next = add i32 %indvar, 1<br>
br label %bb2<br>
<br>
bb2: ; preds = %bb1, %entry<br>
; CHECK: bb2<br>
-; CHECK: subs [[REG:r[0-9]+]], #1<br>
; CHECK: cmp [[REG]], #0<br>
; CHECK: ble<br>
%indvar = phi i32 [ %indvar.next, %bb1 ], [ 0, %entry ]<br>
<br>
Modified: llvm/trunk/test/CodeGen/<wbr>Hexagon/hwloop-crit-edge.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/hwloop-crit-edge.ll?rev=278658&r1=278657&r2=278658&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/Hexagon/hwloop-crit-<wbr>edge.ll?rev=278658&r1=278657&<wbr>r2=278658&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/<wbr>Hexagon/hwloop-crit-edge.ll (original)<br>
+++ llvm/trunk/test/CodeGen/<wbr>Hexagon/hwloop-crit-edge.ll Mon Aug 15 02:53:03 2016<br>
@@ -1,4 +1,5 @@<br>
; RUN: llc -O3 -march=hexagon -mcpu=hexagonv5 < %s | FileCheck %s<br>
+; XFAIL: *<br>
;<br>
; Generate hardware loop when loop 'latch' block is different<br>
; from the loop 'exiting' block.<br>
<br>
Modified: llvm/trunk/test/CodeGen/<wbr>Hexagon/hwloop-loop1.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Hexagon/hwloop-loop1.ll?rev=278658&r1=278657&r2=278658&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/Hexagon/hwloop-loop1.<wbr>ll?rev=278658&r1=278657&r2=<wbr>278658&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/<wbr>Hexagon/hwloop-loop1.ll (original)<br>
+++ llvm/trunk/test/CodeGen/<wbr>Hexagon/hwloop-loop1.ll Mon Aug 15 02:53:03 2016<br>
@@ -2,8 +2,6 @@<br>
;<br>
; Generate loop1 instruction for double loop sequence.<br>
<br>
-; CHECK: loop0(.LBB{{.}}_{{.}}, #100)<br>
-; CHECK: endloop0<br>
; CHECK: loop1(.LBB{{.}}_{{.}}, #100)<br>
; CHECK: loop0(.LBB{{.}}_{{.}}, #100)<br>
; CHECK: endloop0<br>
<br>
Modified: llvm/trunk/test/CodeGen/X86/<wbr>lsr-loop-exit-cond.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll?rev=278658&r1=278657&r2=278658&view=diff" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>CodeGen/X86/lsr-loop-exit-<wbr>cond.ll?rev=278658&r1=278657&<wbr>r2=278658&view=diff</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/CodeGen/X86/<wbr>lsr-loop-exit-cond.ll (original)<br>
+++ llvm/trunk/test/CodeGen/X86/<wbr>lsr-loop-exit-cond.ll Mon Aug 15 02:53:03 2016<br>
@@ -3,12 +3,12 @@<br>
<br>
; CHECK-LABEL: t:<br>
; CHECK: movl (%r9,%rax,4), %e{{..}}<br>
-; CHECK-NEXT: decq<br>
+; CHECK-NEXT: testq<br>
; CHECK-NEXT: jne<br>
<br>
; ATOM-LABEL: t:<br>
; ATOM: movl (%r9,%r{{.+}},4), %e{{..}}<br>
-; ATOM-NEXT: decq<br>
+; ATOM-NEXT: testq<br>
; ATOM-NEXT: jne<br>
<br>
@Te0 = external global [256 x i32] ; <[256 x i32]*> [#uses=5]<br>
<br>
Added: llvm/trunk/test/Transforms/<wbr>LoopStrengthReduce/post-inc-<wbr>optsize.ll<br>
URL: <a href="http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/post-inc-optsize.ll?rev=278658&view=auto" rel="noreferrer" target="_blank">http://llvm.org/viewvc/llvm-<wbr>project/llvm/trunk/test/<wbr>Transforms/LoopStrengthReduce/<wbr>post-inc-optsize.ll?rev=<wbr>278658&view=auto</a><br>
==============================<wbr>==============================<wbr>==================<br>
--- llvm/trunk/test/Transforms/<wbr>LoopStrengthReduce/post-inc-<wbr>optsize.ll (added)<br>
+++ llvm/trunk/test/Transforms/<wbr>LoopStrengthReduce/post-inc-<wbr>optsize.ll Mon Aug 15 02:53:03 2016<br>
@@ -0,0 +1,43 @@<br>
+; RUN: opt < %s -loop-reduce -S | FileCheck %s<br>
+<br>
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:<wbr>128-a:0:32-n32-S64"<br>
+target triple = "thumbv7m-arm-none-eabi"<br>
+<br>
+; Check that the IV updates (incdec.ptr{,1,2}) are kept in the latch block<br>
+; and not moved to the header/exiting block. Inserting them in the header<br>
+; doubles register pressure and adds moves.<br>
+<br>
+; CHECK-LABEL: @f<br>
+; CHECK: while.cond:<br>
+; CHECK: icmp sgt i32 %n.addr.0, 0<br>
+; CHECK: while.body:<br>
+; CHECK: incdec.ptr =<br>
+; CHECK: incdec.ptr1 =<br>
+; CHECK: incdec.ptr2 =<br>
+; CHECK: dec =<br>
+define void @f(float* nocapture readonly %a, float* nocapture readonly %b, float* nocapture %c, i32 %n) {<br>
+entry:<br>
+ br label %while.cond<br>
+<br>
+while.cond: ; preds = %while.body, %entry<br>
+ %a.addr.0 = phi float* [ %a, %entry ], [ %incdec.ptr, %while.body ]<br>
+ %b.addr.0 = phi float* [ %b, %entry ], [ %incdec.ptr1, %while.body ]<br>
+ %c.addr.0 = phi float* [ %c, %entry ], [ %incdec.ptr2, %while.body ]<br>
+ %n.addr.0 = phi i32 [ %n, %entry ], [ %dec, %while.body ]<br>
+ %cmp = icmp sgt i32 %n.addr.0, 0<br>
+ br i1 %cmp, label %while.body, label %while.end<br>
+<br>
+while.body: ; preds = %while.cond<br>
+ %incdec.ptr = getelementptr inbounds float, float* %a.addr.0, i32 1<br>
+ %tmp = load float, float* %a.addr.0, align 4<br>
+ %incdec.ptr1 = getelementptr inbounds float, float* %b.addr.0, i32 1<br>
+ %tmp1 = load float, float* %b.addr.0, align 4<br>
+ %add = fadd float %tmp, %tmp1<br>
+ %incdec.ptr2 = getelementptr inbounds float, float* %c.addr.0, i32 1<br>
+ store float %add, float* %c.addr.0, align 4<br>
+ %dec = add nsw i32 %n.addr.0, -1<br>
+ br label %while.cond<br>
+<br>
+while.end: ; preds = %while.cond<br>
+ ret void<br>
+}<br>
<br>
<br>
______________________________<wbr>_________________<br>
llvm-commits mailing list<br>
<a href="mailto:llvm-commits@lists.llvm.org">llvm-commits@lists.llvm.org</a><br>
<a href="http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits" rel="noreferrer" target="_blank">http://lists.llvm.org/cgi-bin/<wbr>mailman/listinfo/llvm-commits</a><br>
</blockquote></div><br></div>