[llvm-branch-commits] [llvm-branch] r71460 - in /llvm/branches/Apple/Dib: lib/Transforms/Scalar/LoopStrengthReduce.cpp test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll test/CodeGen/X86/iv-users-in-other-loops.ll test/CodeGen/X86/masked-iv-safe.ll test/CodeGen/X86/pr3495.ll
Bill Wendling
isanbard at gmail.com
Mon May 11 11:55:30 PDT 2009
Author: void
Date: Mon May 11 13:55:29 2009
New Revision: 71460
URL: http://llvm.org/viewvc/llvm-project?rev=71460&view=rev
Log:
--- Merging r71439 into '.':
U test/CodeGen/X86/iv-users-in-other-loops.ll
U test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
U test/CodeGen/X86/pr3495.ll
U test/CodeGen/X86/masked-iv-safe.ll
U lib/Transforms/Scalar/LoopStrengthReduce.cpp
Reverse a loop that is counting up to a maximum to
count down to 0 instead, under very restricted
circumstances. Adjust 4 testcases in which this
optimization fires.
Modified:
llvm/branches/Apple/Dib/lib/Transforms/Scalar/LoopStrengthReduce.cpp
llvm/branches/Apple/Dib/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
llvm/branches/Apple/Dib/test/CodeGen/X86/iv-users-in-other-loops.ll
llvm/branches/Apple/Dib/test/CodeGen/X86/masked-iv-safe.ll
llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495.ll
Modified: llvm/branches/Apple/Dib/lib/Transforms/Scalar/LoopStrengthReduce.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/lib/Transforms/Scalar/LoopStrengthReduce.cpp?rev=71460&r1=71459&r2=71460&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/lib/Transforms/Scalar/LoopStrengthReduce.cpp (original)
+++ llvm/branches/Apple/Dib/lib/Transforms/Scalar/LoopStrengthReduce.cpp Mon May 11 13:55:29 2009
@@ -164,7 +164,7 @@
IVStrideUse* &CondUse,
const SCEVHandle* &CondStride);
void OptimizeIndvars(Loop *L);
-
+ void OptimizeLoopCountIV(Loop *L);
/// OptimizeShadowIV - If IV is used in a int-to-float cast
/// inside the loop then try to eliminate the cast opeation.
void OptimizeShadowIV(Loop *L);
@@ -1751,11 +1751,11 @@
CommonBaseV = PreheaderRewriter.expandCodeFor(CommonExprs, ReplacedTy,
PreInsertPt);
- // If all uses are addresses, check if it is possible to reuse an IV with a
- // stride that is a factor of this stride. And that the multiple is a number
- // that can be encoded in the scale field of the target addressing mode. And
- // that we will have a valid instruction after this substition, including
- // the immediate field, if any.
+ // If all uses are addresses, check if it is possible to reuse an IV. The
+ // new IV must have a stride that is a multiple of the old stride; the
+ // multiple must be a number that can be encoded in the scale field of the
+ // target addressing mode; and we must have a valid instruction after this
+ // substitution, including the immediate field, if any.
RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
Stride, ReuseIV, ReplacedTy,
@@ -2447,6 +2447,114 @@
Changed = true;
}
+// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
+// when to exit the loop is used only for that purpose, try to rearrange things
+// so it counts down to a test against zero.
+void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
+
+ // If the number of times the loop is executed isn't computable, give up.
+ SCEVHandle BackedgeTakenCount = SE->getBackedgeTakenCount(L);
+ if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
+ return;
+
+ // Get the terminating condition for the loop if possible (this isn't
+ // necessarily in the latch, or a block that's a predecessor of the header).
+ SmallVector<BasicBlock*, 8> ExitBlocks;
+ L->getExitBlocks(ExitBlocks);
+ if (ExitBlocks.size() != 1) return;
+
+ // Okay, there is one exit block. Try to find the condition that causes the
+ // loop to be exited.
+ BasicBlock *ExitBlock = ExitBlocks[0];
+
+ BasicBlock *ExitingBlock = 0;
+ for (pred_iterator PI = pred_begin(ExitBlock), E = pred_end(ExitBlock);
+ PI != E; ++PI)
+ if (L->contains(*PI)) {
+ if (ExitingBlock == 0)
+ ExitingBlock = *PI;
+ else
+ return; // More than one block exiting!
+ }
+ assert(ExitingBlock && "No exits from loop, something is broken!");
+
+ // Okay, we've computed the exiting block. See what condition causes us to
+ // exit.
+ //
+ // FIXME: we should be able to handle switch instructions (with a single exit)
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (TermBr == 0) return;
+ assert(TermBr->isConditional() && "If unconditional, it can't be in loop!");
+ if (!isa<ICmpInst>(TermBr->getCondition()))
+ return;
+ ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+
+ // Handle only tests for equality for the moment, and only stride 1.
+ if (Cond->getPredicate() != CmpInst::ICMP_EQ)
+ return;
+ SCEVHandle IV = SE->getSCEV(Cond->getOperand(0));
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ SCEVHandle One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
+ if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One)
+ return;
+
+ // Make sure the IV is only used for counting. Value may be preinc or
+ // postinc; 2 uses in either case.
+ if (!Cond->getOperand(0)->hasNUses(2))
+ return;
+ PHINode *phi = dyn_cast<PHINode>(Cond->getOperand(0));
+ Instruction *incr;
+ if (phi && phi->getParent()==L->getHeader()) {
+ // value tested is preinc. Find the increment.
+ // A CmpInst is not a BinaryOperator; we depend on this.
+ Instruction::use_iterator UI = phi->use_begin();
+ incr = dyn_cast<BinaryOperator>(UI);
+ if (!incr)
+ incr = dyn_cast<BinaryOperator>(++UI);
+ // 1 use for postinc value, the phi. Unnecessarily conservative?
+ if (!incr || !incr->hasOneUse() || incr->getOpcode()!=Instruction::Add)
+ return;
+ } else {
+ // Value tested is postinc. Find the phi node.
+ incr = dyn_cast<BinaryOperator>(Cond->getOperand(0));
+ if (!incr || incr->getOpcode()!=Instruction::Add)
+ return;
+
+ Instruction::use_iterator UI = Cond->getOperand(0)->use_begin();
+ phi = dyn_cast<PHINode>(UI);
+ if (!phi)
+ phi = dyn_cast<PHINode>(++UI);
+ // 1 use for preinc value, the increment.
+ if (!phi || phi->getParent()!=L->getHeader() || !phi->hasOneUse())
+ return;
+ }
+
+ // Replace the increment with a decrement.
+ BinaryOperator *decr =
+ BinaryOperator::Create(Instruction::Sub, incr->getOperand(0),
+ incr->getOperand(1), "tmp", incr);
+ incr->replaceAllUsesWith(decr);
+ incr->eraseFromParent();
+
+ // Substitute endval-startval for the original startval, and 0 for the
+ // original endval. Since we're only testing for equality this is OK even
+ // if the computation wraps around.
+ BasicBlock *Preheader = L->getLoopPreheader();
+ Instruction *PreInsertPt = Preheader->getTerminator();
+ int inBlock = L->contains(phi->getIncomingBlock(0)) ? 1 : 0;
+ Value *startVal = phi->getIncomingValue(inBlock);
+ Value *endVal = Cond->getOperand(1);
+ // FIXME check for case where both are constant
+ ConstantInt* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
+ BinaryOperator *NewStartVal =
+ BinaryOperator::Create(Instruction::Sub, endVal, startVal,
+ "tmp", PreInsertPt);
+ phi->setIncomingValue(inBlock, NewStartVal);
+ Cond->setOperand(1, Zero);
+
+ Changed = true;
+}
+
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
@@ -2502,6 +2610,10 @@
}
}
+ // After all sharing is done, see if we can adjust the loop to test against
+ // zero instead of counting up to a maximum. This is usually faster.
+ OptimizeLoopCountIV(L);
+
// We're done analyzing this loop; release all the state we built up for it.
IVUsesByStride.clear();
IVsByStride.clear();
Modified: llvm/branches/Apple/Dib/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll?rev=71460&r1=71459&r2=71460&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/2007-11-30-LoadFolding-Bug.ll Mon May 11 13:55:29 2009
@@ -1,5 +1,7 @@
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse2 -stats |& \
; RUN: grep {1 .*folded into instructions}
+; Increment in loop bb.128.i adjusted to 2, to prevent loop reversal from
+; kicking in.
declare fastcc void @rdft(i32, i32, double*, i32*, double*)
@@ -41,7 +43,7 @@
%tmp1213.i23.i = sitofp i32 %x.0.i21.i to double ; <double> [#uses=1]
%tmp15.i24.i = sub double 0.000000e+00, %tmp1213.i23.i ; <double> [#uses=1]
%tmp16.i25.i = mul double 0.000000e+00, %tmp15.i24.i ; <double> [#uses=1]
- %indvar.next39.i = add i32 %j.0.reg2mem.0.i16.i, 1 ; <i32> [#uses=2]
+ %indvar.next39.i = add i32 %j.0.reg2mem.0.i16.i, 2 ; <i32> [#uses=2]
%exitcond40.i = icmp eq i32 %indvar.next39.i, %tmp8.i14.i ; <i1> [#uses=1]
br i1 %exitcond40.i, label %mp_unexp_d2mp.exit29.i, label %bb.i28.i
Modified: llvm/branches/Apple/Dib/test/CodeGen/X86/iv-users-in-other-loops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/iv-users-in-other-loops.ll?rev=71460&r1=71459&r2=71460&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/iv-users-in-other-loops.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/iv-users-in-other-loops.ll Mon May 11 13:55:29 2009
@@ -1,5 +1,6 @@
; RUN: llvm-as < %s | llc -march=x86-64 -f -o %t
-; RUN: grep inc %t | count 2
+; RUN: grep inc %t | count 1
+; RUN: grep dec %t | count 2
; RUN: grep addq %t | count 13
; RUN: grep leaq %t | count 8
; RUN: grep leal %t | count 4
@@ -8,6 +9,7 @@
; IV users in each of the loops from other loops shouldn't cause LSR
; to insert new induction variables. Previously it would create a
; flood of new induction variables.
+; Also, the loop reversal should kick in once.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
Modified: llvm/branches/Apple/Dib/test/CodeGen/X86/masked-iv-safe.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/masked-iv-safe.ll?rev=71460&r1=71459&r2=71460&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/masked-iv-safe.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/masked-iv-safe.ll Mon May 11 13:55:29 2009
@@ -4,12 +4,13 @@
; RUN: not grep sar %t
; RUN: not grep shl %t
; RUN: grep add %t | count 6
-; RUN: grep inc %t | count 4
-; RUN: grep dec %t | count 2
+; RUN: grep inc %t | count 2
+; RUN: grep dec %t | count 4
; RUN: grep lea %t | count 2
; Optimize away zext-inreg and sext-inreg on the loop induction
; variable using trip-count information.
+; Also, the loop-reversal algorithm kicks in twice.
define void @count_up(double* %d, i64 %n) nounwind {
entry:
Modified: llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495.ll?rev=71460&r1=71459&r2=71460&view=diff
==============================================================================
--- llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495.ll (original)
+++ llvm/branches/Apple/Dib/test/CodeGen/X86/pr3495.ll Mon May 11 13:55:29 2009
@@ -1,7 +1,8 @@
; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of reloads omited} | grep 2
; RUN: llvm-as < %s | llc -march=x86 -stats |& not grep {Number of available reloads turned into copies}
-; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 39
+; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of machine instrs printed} | grep 38
; PR3495
+; The loop reversal kicks in once here, resulting in one fewer instruction.
target triple = "i386-pc-linux-gnu"
@x = external global [8 x i32], align 32 ; <[8 x i32]*> [#uses=1]
More information about the llvm-branch-commits
mailing list