[llvm-commits] [llvm] r74396 - in /llvm/trunk: include/llvm/Transforms/Utils/Local.h lib/Transforms/Utils/LoopSimplify.cpp lib/Transforms/Utils/SimplifyCFG.cpp test/Transforms/LoopSimplify/merge-exits.ll

Dan Gohman gohman at apple.com
Sat Jun 27 14:30:38 PDT 2009


Author: djg
Date: Sat Jun 27 16:30:38 2009
New Revision: 74396

URL: http://llvm.org/viewvc/llvm-project?rev=74396&view=rev
Log:
Teach LoopSimplify how to merge multiple loop exits into a single exit,
when one of them can be converted to a trivial icmp and conditional
branch.

This addresses what is essentially a phase ordering problem.
SimplifyCFG knows how to do this transformation, but it doesn't do so
if the primary block has any instructions in it other than an icmp and
a branch. In the given testcase, the block contains other instructions,
however they are loop-invariant and can be hoisted. SimplifyCFG doesn't
have LoopInfo though, so it can't hoist them. And, it's important that
the blocks be merged before LoopRotation, as it doesn't support
multiple-exit loops.

Added:
    llvm/trunk/test/Transforms/LoopSimplify/merge-exits.ll
Modified:
    llvm/trunk/include/llvm/Transforms/Utils/Local.h
    llvm/trunk/lib/Transforms/Utils/LoopSimplify.cpp
    llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp

Modified: llvm/trunk/include/llvm/Transforms/Utils/Local.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Transforms/Utils/Local.h?rev=74396&r1=74395&r2=74396&view=diff

==============================================================================
--- llvm/trunk/include/llvm/Transforms/Utils/Local.h (original)
+++ llvm/trunk/include/llvm/Transforms/Utils/Local.h Sat Jun 27 16:30:38 2009
@@ -19,6 +19,7 @@
 
 class User;
 class BasicBlock;
+class BranchInst;
 class Instruction;
 class Value;
 class Pass;
@@ -94,6 +95,12 @@
 ///
 bool SimplifyCFG(BasicBlock *BB);
 
+/// FoldBranchToCommonDest - If this basic block is ONLY a setcc and a branch,
+/// and if a predecessor branches to us and one of our successors, fold the
+/// setcc into the predecessor and use logical operations to pick the right
+/// destination.
+bool FoldBranchToCommonDest(BranchInst *BI);
+
 /// DemoteRegToStack - This function takes a virtual register computed by an
 /// Instruction and replaces it with a slot in the stack frame, allocated via
 /// alloca.  This allows the CFG to be changed around without fear of

Modified: llvm/trunk/lib/Transforms/Utils/LoopSimplify.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/LoopSimplify.cpp?rev=74396&r1=74395&r2=74396&view=diff

==============================================================================
--- llvm/trunk/lib/Transforms/Utils/LoopSimplify.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/LoopSimplify.cpp Sat Jun 27 16:30:38 2009
@@ -42,6 +42,7 @@
 #include "llvm/Analysis/Dominators.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Support/CFG.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/ADT/SetOperations.h"
@@ -258,6 +259,80 @@
       PN->eraseFromParent();
     }
 
+  // If this loop has muliple exits and the exits all go to the same
+  // block, attempt to merge the exits. This helps several passes, such
+  // as LoopRotation, which do not support loops with multiple exits.
+  // SimplifyCFG also does this (and this code uses the same utility
+  // function), however this code is loop-aware, where SimplifyCFG is
+  // not. That gives it the advantage of being able to hoist
+  // loop-invariant instructions out of the way to open up more
+  // opportunities, and the disadvantage of having the responsibility
+  // to preserve dominator information.
+  if (ExitBlocks.size() > 1 && L->getUniqueExitBlock()) {
+    SmallVector<BasicBlock*, 8> ExitingBlocks;
+    L->getExitingBlocks(ExitingBlocks);
+    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+      BasicBlock *ExitingBlock = ExitingBlocks[i];
+      if (!ExitingBlock->getSinglePredecessor()) continue;
+      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+      if (!BI || !BI->isConditional()) continue;
+      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+      if (!CI || CI->getParent() != ExitingBlock) continue;
+
+      // Attempt to hoist out all instructions except for the
+      // comparison and the branch.
+      bool AllInvariant = true;
+      for (BasicBlock::iterator I = ExitingBlock->begin(),
+           E = ExitingBlock->end(); I != E; ) {
+        Instruction *Inst = I++;
+        if (Inst == BI || Inst == CI)
+          continue;
+        if (Inst->isTrapping()) {
+          AllInvariant = false;
+          break;
+        }
+        for (unsigned j = 0, f = Inst->getNumOperands(); j != f; ++j)
+          if (!L->isLoopInvariant(Inst->getOperand(j))) {
+            AllInvariant = false;
+            break;
+          }
+        if (!AllInvariant)
+          break;
+        // Hoist.
+        Inst->moveBefore(L->getLoopPreheader()->getTerminator());
+      }
+      if (!AllInvariant) continue;
+
+      // The block has now been cleared of all instructions except for
+      // a comparison and a conditional branch. SimplifyCFG may be able
+      // to fold it now.
+      if (!FoldBranchToCommonDest(BI)) continue;
+
+      // Success. The block is now dead, so remove it from the loop,
+      // update the dominator tree and dominance frontier, and delete it.
+      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+      Changed = true;
+      L->removeBlockFromLoop(ExitingBlock);
+
+      DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
+      DomTreeNode *Node = DT->getNode(ExitingBlock);
+      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+        Node->getChildren();
+      for (unsigned k = 0, g = Children.size(); k != g; ++k) {
+        DT->changeImmediateDominator(Children[k], Node->getIDom());
+        if (DF) DF->changeImmediateDominator(Children[k]->getBlock(),
+                                             Node->getIDom()->getBlock(),
+                                             DT);
+      }
+      DT->eraseNode(ExitingBlock);
+      if (DF) DF->removeBlock(ExitingBlock);
+
+      BI->getSuccessor(0)->removePredecessor(ExitingBlock);
+      BI->getSuccessor(1)->removePredecessor(ExitingBlock);
+      ExitingBlock->eraseFromParent();
+    }
+  }
+
   return Changed;
 }
 

Modified: llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp?rev=74396&r1=74395&r2=74396&view=diff

==============================================================================
--- llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/SimplifyCFG.cpp Sat Jun 27 16:30:38 2009
@@ -1492,7 +1492,7 @@
 /// and if a predecessor branches to us and one of our successors, fold the
 /// setcc into the predecessor and use logical operations to pick the right
 /// destination.
-static bool FoldBranchToCommonDest(BranchInst *BI) {
+bool llvm::FoldBranchToCommonDest(BranchInst *BI) {
   BasicBlock *BB = BI->getParent();
   Instruction *Cond = dyn_cast<Instruction>(BI->getCondition());
   if (Cond == 0) return false;

Added: llvm/trunk/test/Transforms/LoopSimplify/merge-exits.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopSimplify/merge-exits.ll?rev=74396&view=auto

==============================================================================
--- llvm/trunk/test/Transforms/LoopSimplify/merge-exits.ll (added)
+++ llvm/trunk/test/Transforms/LoopSimplify/merge-exits.ll Sat Jun 27 16:30:38 2009
@@ -0,0 +1,45 @@
+; RUN: llvm-as < %s | opt -loopsimplify -loop-rotate -instcombine -indvars \
+; RUN:  | llvm-dis > %t
+; RUN: not grep sext %t
+; RUN: grep {phi i64} %t | count 1
+
+; Loopsimplify should be able to merge the two loop exits
+; into one, so that loop rotate can rotate the loop, so
+; that indvars can promote the induction variable to i64
+; without needing casts.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+
+define float @t(float* %pTmp1, float* %peakWeight, i32 %bandEdgeIndex) nounwind {
+entry:
+	%t0 = load float* %peakWeight, align 4		; <float> [#uses=1]
+	br label %bb1
+
+bb:		; preds = %bb2
+	%t1 = sext i32 %hiPart.0 to i64		; <i64> [#uses=1]
+	%t2 = getelementptr float* %pTmp1, i64 %t1		; <float*> [#uses=1]
+	%t3 = load float* %t2, align 4		; <float> [#uses=1]
+	%t4 = fadd float %t3, %distERBhi.0		; <float> [#uses=1]
+	%t5 = add i32 %hiPart.0, 1		; <i32> [#uses=2]
+	%t6 = sext i32 %t5 to i64		; <i64> [#uses=1]
+	%t7 = getelementptr float* %peakWeight, i64 %t6		; <float*> [#uses=1]
+	%t8 = load float* %t7, align 4		; <float> [#uses=1]
+	%t9 = fadd float %t8, %peakCount.0		; <float> [#uses=1]
+	br label %bb1
+
+bb1:		; preds = %bb, %entry
+	%peakCount.0 = phi float [ %t0, %entry ], [ %t9, %bb ]		; <float> [#uses=2]
+	%hiPart.0 = phi i32 [ 0, %entry ], [ %t5, %bb ]		; <i32> [#uses=3]
+	%distERBhi.0 = phi float [ 0.000000e+00, %entry ], [ %t4, %bb ]		; <float> [#uses=3]
+	%t10 = fcmp uge float %distERBhi.0, 2.500000e+00		; <i1> [#uses=1]
+	br i1 %t10, label %bb3, label %bb2
+
+bb2:		; preds = %bb1
+	%t11 = add i32 %bandEdgeIndex, -1		; <i32> [#uses=1]
+	%t12 = icmp sgt i32 %t11, %hiPart.0		; <i1> [#uses=1]
+	br i1 %t12, label %bb, label %bb3
+
+bb3:		; preds = %bb2, %bb1
+	%t13 = fdiv float %peakCount.0, %distERBhi.0		; <float> [#uses=1]
+	ret float %t13
+}





More information about the llvm-commits mailing list