[llvm-commits] [llvm] r61248 - in /llvm/trunk: lib/Transforms/Scalar/CodeGenPrepare.cpp test/CodeGen/X86/critical-edge-split.ll test/CodeGen/X86/remat-mov0.ll
Evan Cheng
evan.cheng at apple.com
Fri Dec 19 10:03:22 PST 2008
Author: evancheng
Date: Fri Dec 19 12:03:11 2008
New Revision: 61248
URL: http://llvm.org/viewvc/llvm-project?rev=61248&view=rev
Log:
- CodeGenPrepare does not split loop back edges but it only knows about back edges of single block loops. It now does a DFS walk to find loop back edges.
- Use SplitBlockPredecessors to factor out common predecessors of the critical edge destination. This is disabled for now due to some regressions.
Added:
llvm/trunk/test/CodeGen/X86/critical-edge-split.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp
llvm/trunk/test/CodeGen/X86/remat-mov0.ll
Modified: llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp?rev=61248&r1=61247&r2=61248&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/CodeGenPrepare.cpp Fri Dec 19 12:03:11 2008
@@ -30,6 +30,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/Support/CallSite.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
@@ -37,11 +38,18 @@
using namespace llvm;
using namespace llvm::PatternMatch;
+static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak",
+ cl::init(false), cl::Hidden);
+
namespace {
class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass {
/// TLI - Keep a pointer of a TargetLowering to consult for determining
/// transformation profitability.
const TargetLowering *TLI;
+
+ /// BackEdges - Keep a set of all the loop back edges.
+ ///
+ SmallSet<std::pair<BasicBlock*,BasicBlock*>, 8> BackEdges;
public:
static char ID; // Pass identification, replacement for typeid
explicit CodeGenPrepare(const TargetLowering *tli = 0)
@@ -58,6 +66,7 @@
bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
DenseMap<Value*,Value*> &SunkAddrs);
bool OptimizeExtUses(Instruction *I);
+ void findLoopBackEdges(Function &F);
};
}
@@ -69,10 +78,55 @@
return new CodeGenPrepare(TLI);
}
+/// findLoopBackEdges - Do a DFS walk to find loop back edges.
+///
+void CodeGenPrepare::findLoopBackEdges(Function &F) {
+ SmallPtrSet<BasicBlock*, 8> Visited;
+ SmallVector<std::pair<BasicBlock*, succ_iterator>, 8> VisitStack;
+ SmallPtrSet<BasicBlock*, 8> InStack;
+
+ BasicBlock *BB = &F.getEntryBlock();
+ if (succ_begin(BB) == succ_end(BB))
+ return;
+ Visited.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ InStack.insert(BB);
+ do {
+ std::pair<BasicBlock*, succ_iterator> &Top = VisitStack.back();
+ BasicBlock *ParentBB = Top.first;
+ succ_iterator &I = Top.second;
+
+ bool FoundNew = false;
+ while (I != succ_end(ParentBB)) {
+ BB = *I++;
+ if (Visited.insert(BB)) {
+ FoundNew = true;
+ break;
+ }
+ // Successor is in VisitStack, it's a back edge.
+ if (InStack.count(BB))
+ BackEdges.insert(std::make_pair(ParentBB, BB));
+ }
+
+ if (FoundNew) {
+ // Go down one level if there is a unvisited successor.
+ InStack.insert(BB);
+ VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
+ } else {
+ // Go up one level.
+ std::pair<BasicBlock*, succ_iterator> &Pop = VisitStack.back();
+ InStack.erase(Pop.first);
+ VisitStack.pop_back();
+ }
+ } while (!VisitStack.empty());
+}
+
bool CodeGenPrepare::runOnFunction(Function &F) {
bool EverMadeChange = false;
+ findLoopBackEdges(F);
+
// First pass, eliminate blocks that contain only PHI nodes and an
// unconditional branch.
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
@@ -262,7 +316,9 @@
/// phi nodes (otherwise critical edges are ok). If there is already another
/// predecessor of the succ that is empty (and thus has no phi nodes), use it
/// instead of introducing a new block.
-static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, Pass *P) {
+static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
+ SmallSet<std::pair<BasicBlock*,BasicBlock*>, 8> &BackEdges,
+ Pass *P) {
BasicBlock *TIBB = TI->getParent();
BasicBlock *Dest = TI->getSuccessor(SuccNum);
assert(isa<PHINode>(Dest->begin()) &&
@@ -271,55 +327,90 @@
// As a hack, never split backedges of loops. Even though the copy for any
// PHIs inserted on the backedge would be dead for exits from the loop, we
// assume that the cost of *splitting* the backedge would be too high.
- if (Dest == TIBB)
+ if (BackEdges.count(std::make_pair(TIBB, Dest)))
return;
- /// TIPHIValues - This array is lazily computed to determine the values of
- /// PHIs in Dest that TI would provide.
- SmallVector<Value*, 32> TIPHIValues;
+ if (!FactorCommonPreds) {
+ /// TIPHIValues - This array is lazily computed to determine the values of
+ /// PHIs in Dest that TI would provide.
+ SmallVector<Value*, 32> TIPHIValues;
+
+ // Check to see if Dest has any blocks that can be used as a split edge for
+ // this terminator.
+ for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ // To be usable, the pred has to end with an uncond branch to the dest.
+ BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
+ if (!PredBr || !PredBr->isUnconditional() ||
+ // Must be empty other than the branch.
+ &Pred->front() != PredBr ||
+ // Cannot be the entry block; its label does not get emitted.
+ Pred == &(Dest->getParent()->getEntryBlock()))
+ continue;
- // Check to see if Dest has any blocks that can be used as a split edge for
- // this terminator.
- for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
- BasicBlock *Pred = *PI;
- // To be usable, the pred has to end with an uncond branch to the dest.
- BranchInst *PredBr = dyn_cast<BranchInst>(Pred->getTerminator());
- if (!PredBr || !PredBr->isUnconditional() ||
- // Must be empty other than the branch.
- &Pred->front() != PredBr ||
- // Cannot be the entry block; its label does not get emitted.
- Pred == &(Dest->getParent()->getEntryBlock()))
- continue;
+ // Finally, since we know that Dest has phi nodes in it, we have to make
+ // sure that jumping to Pred will have the same affect as going to Dest in
+ // terms of PHI values.
+ PHINode *PN;
+ unsigned PHINo = 0;
+ bool FoundMatch = true;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
+ if (PHINo == TIPHIValues.size())
+ TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
+
+ // If the PHI entry doesn't work, we can't use this pred.
+ if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
+ FoundMatch = false;
+ break;
+ }
+ }
- // Finally, since we know that Dest has phi nodes in it, we have to make
- // sure that jumping to Pred will have the same affect as going to Dest in
- // terms of PHI values.
- PHINode *PN;
- unsigned PHINo = 0;
- bool FoundMatch = true;
- for (BasicBlock::iterator I = Dest->begin();
- (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo) {
- if (PHINo == TIPHIValues.size())
- TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
-
- // If the PHI entry doesn't work, we can't use this pred.
- if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
- FoundMatch = false;
- break;
+ // If we found a workable predecessor, change TI to branch to Succ.
+ if (FoundMatch) {
+ Dest->removePredecessor(TIBB);
+ TI->setSuccessor(SuccNum, Pred);
+ return;
}
}
- // If we found a workable predecessor, change TI to branch to Succ.
- if (FoundMatch) {
- Dest->removePredecessor(TIBB);
- TI->setSuccessor(SuccNum, Pred);
- return;
+ SplitCriticalEdge(TI, SuccNum, P, true);
+ return;
+ }
+
+ PHINode *PN;
+ SmallVector<Value*, 8> TIPHIValues;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I)
+ TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
+
+ SmallVector<BasicBlock*, 8> IdenticalPreds;
+ for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
+ BasicBlock *Pred = *PI;
+ if (BackEdges.count(std::make_pair(Pred, Dest)))
+ continue;
+ if (PI == TIBB)
+ IdenticalPreds.push_back(Pred);
+ else {
+ bool Identical = true;
+ unsigned PHINo = 0;
+ for (BasicBlock::iterator I = Dest->begin();
+ (PN = dyn_cast<PHINode>(I)); ++I, ++PHINo)
+ if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
+ Identical = false;
+ break;
+ }
+ if (Identical)
+ IdenticalPreds.push_back(Pred);
}
}
- SplitCriticalEdge(TI, SuccNum, P, true);
+ assert(!IdenticalPreds.empty());
+ SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(),
+ ".critedge", P);
}
+
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
/// copy (e.g. it's casting from one pointer type to another, int->uint, or
/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual
@@ -1350,17 +1441,16 @@
bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
bool MadeChange = false;
- // Split all critical edges where the dest block has a PHI and where the phi
- // has shared immediate operands.
+ // Split all critical edges where the dest block has a PHI.
TerminatorInst *BBTI = BB.getTerminator();
if (BBTI->getNumSuccessors() > 1) {
- for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i)
- if (isa<PHINode>(BBTI->getSuccessor(i)->begin()) &&
- isCriticalEdge(BBTI, i, true))
- SplitEdgeNicely(BBTI, i, this);
+ for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
+ BasicBlock *SuccBB = BBTI->getSuccessor(i);
+ if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
+ SplitEdgeNicely(BBTI, i, BackEdges, this);
+ }
}
-
// Keep track of non-local addresses that have been sunk into this block.
// This allows us to avoid inserting duplicate code for blocks with multiple
// load/stores of the same address.
Added: llvm/trunk/test/CodeGen/X86/critical-edge-split.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/critical-edge-split.ll?rev=61248&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/critical-edge-split.ll (added)
+++ llvm/trunk/test/CodeGen/X86/critical-edge-split.ll Fri Dec 19 12:03:11 2008
@@ -0,0 +1,50 @@
+; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -stats -info-output-file - | grep asm-printer | grep 31
+
+ %CC = type { %Register }
+ %II = type { %"struct.XX::II::$_74" }
+ %JITFunction = type %YYValue* (%CC*, %YYValue**)
+ %YYValue = type { i32 (...)** }
+ %Register = type { %"struct.XX::ByteCodeFeatures" }
+ %"struct.XX::ByteCodeFeatures" = type { i32 }
+ %"struct.XX::II::$_74" = type { i8* }
+ at llvm.used = appending global [1 x i8*] [ i8* bitcast (%JITFunction* @loop to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
+
+define %YYValue* @loop(%CC*, %YYValue**) nounwind {
+; <label>:2
+ %3 = getelementptr %CC* %0, i32 -9 ; <%CC*> [#uses=1]
+ %4 = bitcast %CC* %3 to %YYValue** ; <%YYValue**> [#uses=2]
+ %5 = load %YYValue** %4 ; <%YYValue*> [#uses=3]
+ %unique_1.i = ptrtoint %YYValue* %5 to i1 ; <i1> [#uses=1]
+ br i1 %unique_1.i, label %loop, label %11
+
+loop: ; preds = %6, %2
+ %.1 = phi %YYValue* [ inttoptr (i32 1 to %YYValue*), %2 ], [ %intAddValue, %6 ] ; <%YYValue*> [#uses=3]
+ %immediateCmp = icmp slt %YYValue* %.1, %5 ; <i1> [#uses=1]
+ br i1 %immediateCmp, label %6, label %8
+
+; <label>:6 ; preds = %loop
+ %lhsInt = ptrtoint %YYValue* %.1 to i32 ; <i32> [#uses=1]
+ %7 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhsInt, i32 2) ; <{ i32, i1 }> [#uses=2]
+ %intAdd = extractvalue { i32, i1 } %7, 0 ; <i32> [#uses=1]
+ %intAddValue = inttoptr i32 %intAdd to %YYValue* ; <%YYValue*> [#uses=1]
+ %intAddOverflow = extractvalue { i32, i1 } %7, 1 ; <i1> [#uses=1]
+ br i1 %intAddOverflow, label %.loopexit, label %loop
+
+; <label>:8 ; preds = %loop
+ ret %YYValue* inttoptr (i32 10 to %YYValue*)
+
+.loopexit: ; preds = %6
+ %9 = bitcast %CC* %0 to %YYValue** ; <%YYValue**> [#uses=1]
+ store %YYValue* %.1, %YYValue** %9
+ store %YYValue* %5, %YYValue** %4
+ %10 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431104 to %II*), %CC* %0, %YYValue** %1) ; <%YYValue*> [#uses=1]
+ ret %YYValue* %10
+
+; <label>:11 ; preds = %2
+ %12 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431080 to %II*), %CC* %0, %YYValue** %1) ; <%YYValue*> [#uses=1]
+ ret %YYValue* %12
+}
+
+declare fastcc %YYValue* @foobar(%II*, %CC*, %YYValue**) nounwind
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind
Modified: llvm/trunk/test/CodeGen/X86/remat-mov0.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/remat-mov0.ll?rev=61248&r1=61247&r2=61248&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/remat-mov0.ll (original)
+++ llvm/trunk/test/CodeGen/X86/remat-mov0.ll Fri Dec 19 12:03:11 2008
@@ -1,4 +1,4 @@
-; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 2
+; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 1
%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
%struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* }
More information about the llvm-commits
mailing list