R600: AMDILCFGStructurizer simplification
Tom Stellard
tom at stellard.net
Fri Jul 19 09:48:22 PDT 2013
On Fri, Jul 19, 2013 at 09:31:11AM -0700, Vincent Lejeune wrote:
> Hi,
>
> this serie is simplifying our AMDILCFGStructurizer pass currently used by non SI target.
> I removed all the templates (there was only a single template instanciation, the original
> author probably wanted the code to work on LLVM IR as well as on Machine Code but it wasn't
> implemented) and removed some code by assuming that every loop has a single exit.
> While it may looks like a limitation from previous situation, the pass generated wrong code
>
> (it added vreg assignement after regalloc) and made llvm crash. Actually the simplified code is even able
> to solve some TFB piglit test crashes.
> I also took the opportunity to make the code more compliant with llvm code style standard (Uppercase variable,
> brace in single line loops...)
>
>
> I also added a patch that makes triangle if/then/else pattern correctly lowered, ie sparing an extra THEN...ENDIF
> constructions, which should spare a couple of cycle in resulting code.
>
> Vincent.
The series is:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>
Thanks for doing this cleanup!
-Tom
> From ffc0d774dbcb6914e3f707d6edf5afb4e1114db9 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Wed, 10 Jul 2013 18:43:10 +0200
> Subject: [PATCH 1/3] R600: Replace legacy debug code in
> AMDILCFGStructurizer.cpp
>
> ---
> lib/Target/R600/AMDILCFGStructurizer.cpp | 463 ++++++++++++++++---------------
> 1 file changed, 235 insertions(+), 228 deletions(-)
>
> diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
> index 437480c..bb8c217 100644
> --- a/lib/Target/R600/AMDILCFGStructurizer.cpp
> +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
> @@ -8,11 +8,12 @@
> /// \file
> //==-----------------------------------------------------------------------===//
>
> -#define DEBUGME 0
> #define DEBUG_TYPE "structcfg"
>
> #include "AMDGPU.h"
> #include "AMDGPUInstrInfo.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/raw_ostream.h"
> #include "llvm/ADT/SCCIterator.h"
> #include "llvm/ADT/SmallVector.h"
> #include "llvm/ADT/Statistic.h"
> @@ -62,22 +63,22 @@ STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions");
> //===----------------------------------------------------------------------===//
> namespace {
> #define SHOWNEWINSTR(i) \
> - if (DEBUGME) errs() << "New instr: " << *i << "\n"
> + DEBUG(dbgs() << "New instr: " << *i << "\n");
>
> #define SHOWNEWBLK(b, msg) \
> -if (DEBUGME) { \
> - errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
> - errs() << "\n"; \
> -}
> +DEBUG( \
> + dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
> + dbgs() << "\n"; \
> +);
>
> #define SHOWBLK_DETAIL(b, msg) \
> -if (DEBUGME) { \
> +DEBUG( \
> if (b) { \
> - errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
> - b->print(errs()); \
> - errs() << "\n"; \
> + dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
> + b->print(dbgs()); \
> + dbgs() << "\n"; \
> } \
> -}
> +);
>
> #define INVALIDSCCNUM -1
> #define INVALIDREGNUM 0
> @@ -332,21 +333,27 @@ bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
>
> //FIXME: if not reducible flow graph, make it so ???
>
> - if (DEBUGME) {
> - errs() << "AMDGPUCFGStructurizer::prepare\n";
> - }
> + DEBUG(
> + dbgs() << "AMDGPUCFGStructurizer::prepare\n";
> + );
>
> loopInfo = CFGTraits::getLoopInfo(pass);
> - if (DEBUGME) {
> - errs() << "LoopInfo:\n";
> - PrintLoopinfo(*loopInfo, errs());
> - }
> + DEBUG(
> + dbgs() << "LoopInfo:\n";
> + PrintLoopinfo(*loopInfo, dbgs());
> + );
>
> orderBlocks();
> - if (DEBUGME) {
> - errs() << "Ordered blocks:\n";
> - printOrderedBlocks(errs());
> - }
> + DEBUG(
> + for (typename SmallVectorImpl<BlockT *>::const_iterator
> + iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
> + iterBlk != iterBlkEnd;
> + ++iterBlk) {
> + (*iterBlk)->dump();
> + }
> + dbgs() << "Ordered blocks:\n";
> + printOrderedBlocks(dbgs());
> + );
>
> SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
>
> @@ -396,26 +403,26 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
> TRI = tri;
>
> //Assume reducible CFG...
> - if (DEBUGME) {
> - errs() << "AMDGPUCFGStructurizer::run\n";
> + DEBUG(
> + dbgs() << "AMDGPUCFGStructurizer::run\n";
> func.viewCFG();
> - }
> + );
>
> domTree = CFGTraits::getDominatorTree(pass);
> - if (DEBUGME) {
> - domTree->print(errs(), (const llvm::Module*)0);
> - }
> + DEBUG(
> + domTree->print(dbgs(), (const llvm::Module*)0);
> + );
>
> postDomTree = CFGTraits::getPostDominatorTree(pass);
> - if (DEBUGME) {
> - postDomTree->print(errs());
> - }
> + DEBUG(
> + postDomTree->print(dbgs());
> + );
>
> loopInfo = CFGTraits::getLoopInfo(pass);
> - if (DEBUGME) {
> - errs() << "LoopInfo:\n";
> - PrintLoopinfo(*loopInfo, errs());
> - }
> + DEBUG(
> + dbgs() << "LoopInfo:\n";
> + PrintLoopinfo(*loopInfo, dbgs());
> + );
>
> orderBlocks();
> #ifdef STRESSTEST
> @@ -423,10 +430,10 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
> ReverseVector(orderedBlks);
> #endif
>
> - if (DEBUGME) {
> - errs() << "Ordered blocks:\n";
> - printOrderedBlocks(errs());
> - }
> + DEBUG(
> + dbgs() << "Ordered blocks:\n";
> + printOrderedBlocks(dbgs());
> + );
> int numIter = 0;
> bool finish = false;
> BlockT *curBlk;
> @@ -436,10 +443,10 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
>
> do {
> ++numIter;
> - if (DEBUGME) {
> - errs() << "numIter = " << numIter
> + DEBUG(
> + dbgs() << "numIter = " << numIter
> << ", numRemaintedBlk = " << numRemainedBlk << "\n";
> - }
> + );
>
> typename SmallVectorImpl<BlockT *>::const_iterator
> iterBlk = orderedBlks.begin();
> @@ -461,10 +468,10 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
> sccBeginBlk = curBlk;
> sccNumIter = 0;
> sccNumBlk = numRemainedBlk; // Init to maximum possible number.
> - if (DEBUGME) {
> - errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
> - errs() << "\n";
> - }
> + DEBUG(
> + dbgs() << "start processing SCC" << getSCCNum(sccBeginBlk);
> + dbgs() << "\n";
> + );
> }
>
> if (!isRetiredBlock(curBlk)) {
> @@ -480,21 +487,21 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
> ++sccNumIter;
> int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
> if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
> - if (DEBUGME) {
> - errs() << "Can't reduce SCC " << getSCCNum(curBlk)
> + DEBUG(
> + dbgs() << "Can't reduce SCC " << getSCCNum(curBlk)
> << ", sccNumIter = " << sccNumIter;
> - errs() << "doesn't make any progress\n";
> - }
> + dbgs() << "doesn't make any progress\n";
> + );
> contNextScc = true;
> } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
> sccNumBlk = sccRemainedNumBlk;
> iterBlk = sccBeginIter;
> contNextScc = false;
> - if (DEBUGME) {
> - errs() << "repeat processing SCC" << getSCCNum(curBlk)
> + DEBUG(
> + dbgs() << "repeat processing SCC" << getSCCNum(curBlk)
> << "sccNumIter = " << sccNumIter << "\n";
> func.viewCFG();
> - }
> + );
> } else {
> // Finish the current scc.
> contNextScc = true;
> @@ -512,9 +519,9 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
> BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
> if (entryBlk->succ_size() == 0) {
> finish = true;
> - if (DEBUGME) {
> - errs() << "Reduce to one block\n";
> - }
> + DEBUG(
> + dbgs() << "Reduce to one block\n";
> + );
> } else {
> int newnumRemainedBlk
> = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
> @@ -524,9 +531,9 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
> numRemainedBlk = newnumRemainedBlk;
> } else {
> makeProgress = false;
> - if (DEBUGME) {
> - errs() << "No progress\n";
> - }
> + DEBUG(
> + dbgs() << "No progress\n";
> + );
> }
> }
> } while (!finish && makeProgress);
> @@ -539,9 +546,9 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
> iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
> if ((*iterMap).second && (*iterMap).second->isRetired) {
> assert(((*iterMap).first)->getNumber() != -1);
> - if (DEBUGME) {
> - errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
> - }
> + DEBUG(
> + dbgs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
> + );
> (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
> }
> delete (*iterMap).second;
> @@ -555,12 +562,12 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
> }
> loopLandInfoMap.clear();
>
> - if (DEBUGME) {
> + DEBUG(
> func.viewCFG();
> - }
> + );
>
> if (!finish) {
> - assert(!"IRREDUCIBL_CF");
> + llvm_unreachable("IRREDUCIBL_CF");
> }
>
> return true;
> @@ -609,7 +616,7 @@ template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
> BlockT *bb = &(*blockIter1);
> sccNum = getSCCNum(bb);
> if (sccNum == INVALIDSCCNUM) {
> - errs() << "unreachable block BB" << bb->getNumber() << "\n";
> + dbgs() << "unreachable block BB" << bb->getNumber() << "\n";
> }
> }
> } //orderBlocks
> @@ -618,18 +625,18 @@ template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
> int numMatch = 0;
> int curMatch;
>
> - if (DEBUGME) {
> - errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
> - }
> + DEBUG(
> + dbgs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
> + );
>
> while ((curMatch = patternMatchGroup(curBlk)) > 0) {
> numMatch += curMatch;
> }
>
> - if (DEBUGME) {
> - errs() << "End patternMatch BB" << curBlk->getNumber()
> + DEBUG(
> + dbgs() << "End patternMatch BB" << curBlk->getNumber()
> << ", numMatch = " << numMatch << "\n";
> - }
> + );
>
> return numMatch;
> } //patternMatch
> @@ -811,9 +818,9 @@ int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
> BlockTSmallerVector exitingBlks;
> loopRep->getExitingBlocks(exitingBlks);
>
> - if (DEBUGME) {
> - errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
> - }
> + DEBUG(
> + dbgs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
> + );
>
> if (exitingBlks.size() == 0) {
> setLoopLandBlock(loopRep);
> @@ -834,9 +841,9 @@ int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
> assert(exitBlkSet.size() > 0);
> assert(exitBlks.size() == exitingBlks.size());
>
> - if (DEBUGME) {
> - errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
> - }
> + DEBUG(
> + dbgs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
> + );
>
> // Find exitLandBlk.
> BlockT *exitLandBlk = NULL;
> @@ -861,19 +868,19 @@ int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
> BlockT *exitBlk = *iter;
>
> PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
> - if (DEBUGME) {
> - errs() << "BB" << exitBlk->getNumber()
> + DEBUG(
> + dbgs() << "BB" << exitBlk->getNumber()
> << " to BB" << exitLandBlk->getNumber() << " PathToKind="
> << pathKind << "\n";
> - }
> + );
>
> allInPath = allInPath && (pathKind == SinglePath_InPath);
> allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
>
> if (!allInPath && !allNotInPath) {
> - if (DEBUGME) {
> - errs() << "singlePath check fail\n";
> - }
> + DEBUG(
> + dbgs() << "singlePath check fail\n";
> + );
> return -1;
> }
> } // check all exit blocks
> @@ -891,19 +898,19 @@ int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
> loopRep,
> exitBlkSet,
> exitLandBlk)) != NULL) {
> - if (DEBUGME) {
> - errs() << "relocateLoopcontBlock success\n";
> - }
> + DEBUG(
> + dbgs() << "relocateLoopcontBlock success\n";
> + );
> } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
> exitingBlks,
> exitBlks)) != NULL) {
> - if (DEBUGME) {
> - errs() << "insertEndbranchBlock success\n";
> - }
> + DEBUG(
> + dbgs() << "insertEndbranchBlock success\n";
> + );
> } else {
> - if (DEBUGME) {
> - errs() << "loop exit fail\n";
> - }
> + DEBUG(
> + dbgs() << "loop exit fail\n";
> + );
> return -1;
> }
> }
> @@ -1017,11 +1024,11 @@ bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
> if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
> LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> if (theEntry != NULL) {
> - if (DEBUGME) {
> - errs() << "isLoopContBreakBlock yes src1 = BB"
> + DEBUG(
> + dbgs() << "isLoopContBreakBlock yes src1 = BB"
> << src1Blk->getNumber()
> << " src2 = BB" << src2Blk->getNumber() << "\n";
> - }
> + );
> return true;
> }
> }
> @@ -1035,9 +1042,9 @@ int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
> BlockT *falseBlk) {
> int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
> if (num == 0) {
> - if (DEBUGME) {
> - errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
> - }
> + DEBUG(
> + dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
> + );
> num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
> }
> return num;
> @@ -1053,22 +1060,22 @@ int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
> //trueBlk could be the common post dominator
> downBlk = trueBlk;
>
> - if (DEBUGME) {
> - errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
> + DEBUG(
> + dbgs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
> << " true = BB" << trueBlk->getNumber()
> << ", numSucc=" << trueBlk->succ_size()
> << " false = BB" << falseBlk->getNumber() << "\n";
> - }
> + );
>
> while (downBlk) {
> - if (DEBUGME) {
> - errs() << "check down = BB" << downBlk->getNumber();
> - }
> + DEBUG(
> + dbgs() << "check down = BB" << downBlk->getNumber();
> + );
>
> if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
> - if (DEBUGME) {
> - errs() << " working\n";
> - }
> + DEBUG(
> + dbgs() << " working\n";
> + );
>
> num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
> num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
> @@ -1081,9 +1088,9 @@ int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
>
> break;
> }
> - if (DEBUGME) {
> - errs() << " not working\n";
> - }
> + DEBUG(
> + dbgs() << " not working\n";
> + );
> downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
> } // walk down the postDomTree
>
> @@ -1096,43 +1103,43 @@ void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
> BlockT *falseBlk,
> BlockT *landBlk,
> bool detail) {
> - errs() << "head = BB" << headBlk->getNumber()
> + dbgs() << "head = BB" << headBlk->getNumber()
> << " size = " << headBlk->size();
> if (detail) {
> - errs() << "\n";
> - headBlk->print(errs());
> - errs() << "\n";
> + dbgs() << "\n";
> + headBlk->print(dbgs());
> + dbgs() << "\n";
> }
>
> if (trueBlk) {
> - errs() << ", true = BB" << trueBlk->getNumber() << " size = "
> + dbgs() << ", true = BB" << trueBlk->getNumber() << " size = "
> << trueBlk->size() << " numPred = " << trueBlk->pred_size();
> if (detail) {
> - errs() << "\n";
> - trueBlk->print(errs());
> - errs() << "\n";
> + dbgs() << "\n";
> + trueBlk->print(dbgs());
> + dbgs() << "\n";
> }
> }
> if (falseBlk) {
> - errs() << ", false = BB" << falseBlk->getNumber() << " size = "
> + dbgs() << ", false = BB" << falseBlk->getNumber() << " size = "
> << falseBlk->size() << " numPred = " << falseBlk->pred_size();
> if (detail) {
> - errs() << "\n";
> - falseBlk->print(errs());
> - errs() << "\n";
> + dbgs() << "\n";
> + falseBlk->print(dbgs());
> + dbgs() << "\n";
> }
> }
> if (landBlk) {
> - errs() << ", land = BB" << landBlk->getNumber() << " size = "
> + dbgs() << ", land = BB" << landBlk->getNumber() << " size = "
> << landBlk->size() << " numPred = " << landBlk->pred_size();
> if (detail) {
> - errs() << "\n";
> - landBlk->print(errs());
> - errs() << "\n";
> + dbgs() << "\n";
> + landBlk->print(dbgs());
> + dbgs() << "\n";
> }
> }
>
> - errs() << "\n";
> + dbgs() << "\n";
> } //showImproveSimpleJumpintoIf
>
> template<class PassT>
> @@ -1169,10 +1176,10 @@ int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
> migrateFalse = true;
> }
>
> - if (DEBUGME) {
> - errs() << "before improveSimpleJumpintoIf: ";
> + DEBUG(
> + dbgs() << "before improveSimpleJumpintoIf: ";
> showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
> - }
> + );
>
> // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
> //
> @@ -1269,10 +1276,10 @@ int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
> }
> } //for
> }
> - if (DEBUGME) {
> - errs() << "result from improveSimpleJumpintoIf: ";
> + DEBUG(
> + dbgs() << "result from improveSimpleJumpintoIf: ";
> showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
> - }
> + );
>
> // update landBlk
> *plandBlk = landBlk;
> @@ -1286,10 +1293,10 @@ void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
> BlockT *exitBlk,
> LoopT *exitLoop,
> BlockT *landBlk) {
> - if (DEBUGME) {
> - errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
> + DEBUG(
> + dbgs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
> << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
> - }
> + );
> const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
>
> RegiT initReg = INVALIDREGNUM;
> @@ -1314,14 +1321,14 @@ void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
> LoopT *contingLoop,
> BlockT *contBlk,
> LoopT *contLoop) {
> - if (DEBUGME) {
> - errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
> + DEBUG(
> + dbgs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
> << " header = BB" << contBlk->getNumber() << "\n";
>
> - errs() << "Trying to continue loop-depth = "
> + dbgs() << "Trying to continue loop-depth = "
> << getLoopDepth(contLoop)
> << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
> - }
> + );
>
> RegiT initReg = INVALIDREGNUM;
> const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
> @@ -1343,10 +1350,10 @@ void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
>
> template<class PassT>
> void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
> - if (DEBUGME) {
> - errs() << "serialPattern BB" << dstBlk->getNumber()
> + DEBUG(
> + dbgs() << "serialPattern BB" << dstBlk->getNumber()
> << " <= BB" << srcBlk->getNumber() << "\n";
> - }
> + );
> dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
>
> dstBlk->removeSuccessor(srcBlk);
> @@ -1362,26 +1369,26 @@ void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
> BlockT *trueBlk,
> BlockT *falseBlk,
> BlockT *landBlk) {
> - if (DEBUGME) {
> - errs() << "ifPattern BB" << curBlk->getNumber();
> - errs() << "{ ";
> + DEBUG(
> + dbgs() << "ifPattern BB" << curBlk->getNumber();
> + dbgs() << "{ ";
> if (trueBlk) {
> - errs() << "BB" << trueBlk->getNumber();
> + dbgs() << "BB" << trueBlk->getNumber();
> }
> - errs() << " } else ";
> - errs() << "{ ";
> + dbgs() << " } else ";
> + dbgs() << "{ ";
> if (falseBlk) {
> - errs() << "BB" << falseBlk->getNumber();
> + dbgs() << "BB" << falseBlk->getNumber();
> }
> - errs() << " }\n ";
> - errs() << "landBlock: ";
> + dbgs() << " }\n ";
> + dbgs() << "landBlock: ";
> if (landBlk == NULL) {
> - errs() << "NULL";
> + dbgs() << "NULL";
> } else {
> - errs() << "BB" << landBlk->getNumber();
> + dbgs() << "BB" << landBlk->getNumber();
> }
> - errs() << "\n";
> - }
> + dbgs() << "\n";
> + );
>
> int oldOpcode = branchInstr->getOpcode();
> DebugLoc branchDL = branchInstr->getDebugLoc();
> @@ -1435,10 +1442,10 @@ void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
> LoopLandInfo *loopLand) {
> BlockT *landBlk = loopLand->landBlk;
>
> - if (DEBUGME) {
> - errs() << "loopPattern header = BB" << dstBlk->getNumber()
> + DEBUG(
> + dbgs() << "loopPattern header = BB" << dstBlk->getNumber()
> << " land = BB" << landBlk->getNumber() << "\n";
> - }
> + );
>
> // Loop contInitRegs are init at the beginning of the loop.
> for (typename std::set<RegiT>::const_iterator iter =
> @@ -1521,7 +1528,7 @@ void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
> static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
> return;
> default:
> - assert(0 && "PRED_X Opcode invalid!");
> + llvm_unreachable("PRED_X Opcode invalid!");
> }
> }
> }
> @@ -1532,11 +1539,11 @@ void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
> BlockT *exitBlk,
> BlockT *exitLandBlk,
> RegiT setReg) {
> - if (DEBUGME) {
> - errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
> + DEBUG(
> + dbgs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
> << " exit = BB" << exitBlk->getNumber()
> << " land = BB" << exitLandBlk->getNumber() << "\n";
> - }
> + );
>
> InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
> assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
> @@ -1596,11 +1603,11 @@ template<class PassT>
> void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
> BlockT *contBlk,
> RegiT setReg) {
> - if (DEBUGME) {
> - errs() << "settleLoopcontBlock conting = BB"
> + DEBUG(
> + dbgs() << "settleLoopcontBlock conting = BB"
> << contingBlk->getNumber()
> << ", cont = BB" << contBlk->getNumber() << "\n";
> - }
> + );
>
> InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
> if (branchInstr) {
> @@ -1711,10 +1718,10 @@ CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
> contInstr->eraseFromParent();
> }
> endBlk->addSuccessor(newBlk);
> - if (DEBUGME) {
> - errs() << "Add new continue Block to BB"
> + DEBUG(
> + dbgs() << "Add new continue Block to BB"
> << endBlk->getNumber() << " successors\n";
> - }
> + );
> }
>
> return newBlk;
> @@ -1927,10 +1934,10 @@ CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
>
> numClonedInstr += curBlk->size();
>
> - if (DEBUGME) {
> - errs() << "Cloned block: " << "BB"
> + DEBUG(
> + dbgs() << "Cloned block: " << "BB"
> << curBlk->getNumber() << "size " << curBlk->size() << "\n";
> - }
> + );
>
> SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
>
> @@ -1966,29 +1973,29 @@ void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
> //look for the input branchinstr, not the AMDGPU branchinstr
> InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
> if (branchInstr == NULL) {
> - if (DEBUGME) {
> - errs() << "migrateInstruction don't see branch instr\n" ;
> - }
> + DEBUG(
> + dbgs() << "migrateInstruction don't see branch instr\n" ;
> + );
> spliceEnd = srcBlk->end();
> } else {
> - if (DEBUGME) {
> - errs() << "migrateInstruction see branch instr\n" ;
> + DEBUG(
> + dbgs() << "migrateInstruction see branch instr\n" ;
> branchInstr->dump();
> - }
> + );
> spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
> }
> - if (DEBUGME) {
> - errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
> + DEBUG(
> + dbgs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
> << "srcSize = " << srcBlk->size() << "\n";
> - }
> + );
>
> //splice insert before insertPos
> dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
>
> - if (DEBUGME) {
> - errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
> + DEBUG(
> + dbgs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
> << "srcSize = " << srcBlk->size() << "\n";
> - }
> + );
> } //migrateInstruction
>
> // normalizeInfiniteLoopExit change
> @@ -2016,7 +2023,7 @@ CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
> funcRep->push_back(dummyExitBlk); //insert to function
> SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
>
> - if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
> + DEBUG(dbgs() << "Old branch instr: " << *branchInstr << "\n";);
>
> typename BlockT::iterator insertPos =
> CFGTraits::getInstrPos(loopLatch, branchInstr);
> @@ -2047,10 +2054,10 @@ void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
> // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
> while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
> && CFGTraits::isUncondBranch(branchInstr)) {
> - if (DEBUGME) {
> - errs() << "Removing unconditional branch instruction" ;
> + DEBUG(
> + dbgs() << "Removing unconditional branch instruction" ;
> branchInstr->dump();
> - }
> + );
> branchInstr->eraseFromParent();
> }
> } //removeUnconditionalBranch
> @@ -2064,10 +2071,10 @@ void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
> if (blk1 == blk2) {
> InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
> assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
> - if (DEBUGME) {
> - errs() << "Removing unneeded conditional branch instruction" ;
> + DEBUG(
> + dbgs() << "Removing unneeded conditional branch instruction" ;
> branchInstr->dump();
> - }
> + );
> branchInstr->eraseFromParent();
> SHOWNEWBLK(blk1, "Removing redundant successor");
> srcBlk->removeSuccessor(blk1);
> @@ -2091,10 +2098,10 @@ void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
> curInstr->eraseFromParent();
> }
> curBlk->addSuccessor(dummyExitBlk);
> - if (DEBUGME) {
> - errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
> + DEBUG(
> + dbgs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
> << " successors\n";
> - }
> + );
> } //for
>
> SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
> @@ -2126,9 +2133,9 @@ int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
>
> template<class PassT>
> void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
> - if (DEBUGME) {
> - errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
> - }
> + DEBUG(
> + dbgs() << "Retiring BB" << srcBlk->getNumber() << "\n";
> + );
>
> BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
>
> @@ -2245,11 +2252,11 @@ void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
>
> theEntry->landBlk = blk;
>
> - if (DEBUGME) {
> - errs() << "setLoopLandBlock loop-header = BB"
> + DEBUG(
> + dbgs() << "setLoopLandBlock loop-header = BB"
> << loopRep->getHeader()->getNumber()
> << " landing-block = BB" << blk->getNumber() << "\n";
> - }
> + );
> } // setLoopLandBlock
>
> template<class PassT>
> @@ -2262,11 +2269,11 @@ void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
>
> theEntry->breakOnRegs.insert(regNum);
>
> - if (DEBUGME) {
> - errs() << "addLoopBreakOnReg loop-header = BB"
> + DEBUG(
> + dbgs() << "addLoopBreakOnReg loop-header = BB"
> << loopRep->getHeader()->getNumber()
> << " regNum = " << regNum << "\n";
> - }
> + );
> } // addLoopBreakOnReg
>
> template<class PassT>
> @@ -2278,11 +2285,11 @@ void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
> }
> theEntry->contOnRegs.insert(regNum);
>
> - if (DEBUGME) {
> - errs() << "addLoopContOnReg loop-header = BB"
> + DEBUG(
> + dbgs() << "addLoopContOnReg loop-header = BB"
> << loopRep->getHeader()->getNumber()
> << " regNum = " << regNum << "\n";
> - }
> + );
> } // addLoopContOnReg
>
> template<class PassT>
> @@ -2294,11 +2301,11 @@ void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
> }
> theEntry->breakInitRegs.insert(regNum);
>
> - if (DEBUGME) {
> - errs() << "addLoopBreakInitReg loop-header = BB"
> + DEBUG(
> + dbgs() << "addLoopBreakInitReg loop-header = BB"
> << loopRep->getHeader()->getNumber()
> << " regNum = " << regNum << "\n";
> - }
> + );
> } // addLoopBreakInitReg
>
> template<class PassT>
> @@ -2310,11 +2317,11 @@ void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
> }
> theEntry->contInitRegs.insert(regNum);
>
> - if (DEBUGME) {
> - errs() << "addLoopContInitReg loop-header = BB"
> + DEBUG(
> + dbgs() << "addLoopContInitReg loop-header = BB"
> << loopRep->getHeader()->getNumber()
> << " regNum = " << regNum << "\n";
> - }
> + );
> } // addLoopContInitReg
>
> template<class PassT>
> @@ -2327,11 +2334,11 @@ void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
> }
> theEntry->endbranchInitRegs.insert(regNum);
>
> - if (DEBUGME) {
> - errs() << "addLoopEndbranchInitReg loop-header = BB"
> + DEBUG(
> + dbgs() << "addLoopEndbranchInitReg loop-header = BB"
> << loopRep->getHeader()->getNumber()
> << " regNum = " << regNum << "\n";
> - }
> + );
> } // addLoopEndbranchInitReg
>
> template<class PassT>
> @@ -2437,14 +2444,14 @@ CFGStructurizer<PassT>::findNearestCommonPostDom
> }
> }
>
> - if (DEBUGME) {
> - errs() << "Common post dominator for exit blocks is ";
> + DEBUG(
> + dbgs() << "Common post dominator for exit blocks is ";
> if (commonDom) {
> - errs() << "BB" << commonDom->getNumber() << "\n";
> + dbgs() << "BB" << commonDom->getNumber() << "\n";
> } else {
> - errs() << "NULL\n";
> + dbgs() << "NULL\n";
> }
> - }
> + );
>
> return commonDom;
> } //findNearestCommonPostDom
> @@ -2591,7 +2598,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
> case AMDGPU::BRANCH_COND_i32:
> case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
> default:
> - assert(0 && "internal error");
> + llvm_unreachable("internal error");
> }
> return -1;
> }
> @@ -2603,7 +2610,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
> case AMDGPU::BRANCH_COND_i32:
> case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
> default:
> - assert(0 && "internal error");
> + llvm_unreachable("internal error");
> }
> return -1;
> }
> @@ -2613,7 +2620,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
> case AMDGPU::JUMP_COND:
> case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
> default:
> - assert(0 && "internal error");
> + llvm_unreachable("internal error");
> };
> return -1;
> }
> @@ -2623,7 +2630,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
> case AMDGPU::JUMP_COND:
> case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
> default:
> - assert(0 && "internal error");
> + llvm_unreachable("internal error");
> }
> return -1;
> }
> @@ -2753,10 +2760,10 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
> if (instr) {
> assert(isReturn);
> } else if (isReturn) {
> - if (DEBUGME) {
> - errs() << "BB" << blk->getNumber()
> + DEBUG(
> + dbgs() << "BB" << blk->getNumber()
> <<" is return block without RETURN instr\n";
> - }
> + );
> }
>
> return isReturn;
> --
> 1.8.3.1
>
> From 5de07edc6f8a54fd28873a61ac84ad5fa16757e6 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Wed, 10 Jul 2013 23:49:09 +0200
> Subject: [PATCH 2/3] R600: Simplify AMDILCFGStructurize by removing templates
> and assuming single exit
>
> ---
> lib/Target/R600/AMDGPU.h | 1 -
> lib/Target/R600/AMDGPUTargetMachine.cpp | 1 -
> lib/Target/R600/AMDILCFGStructurizer.cpp | 3845 +++++++++++-------------------
> 3 files changed, 1341 insertions(+), 2506 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
> index f284291..7621422 100644
> --- a/lib/Target/R600/AMDGPU.h
> +++ b/lib/Target/R600/AMDGPU.h
> @@ -31,7 +31,6 @@ FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
> FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
> FunctionPass *createR600Packetizer(TargetMachine &tm);
> FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
> -FunctionPass *createAMDGPUCFGPreparationPass(TargetMachine &tm);
> FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm);
>
> // SI Passes
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
> index 7a14e50..1dc1b6b 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> @@ -160,7 +160,6 @@ bool AMDGPUPassConfig::addPreSched2() {
> bool AMDGPUPassConfig::addPreEmitPass() {
> const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
> if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
> - addPass(createAMDGPUCFGPreparationPass(*TM));
> addPass(createAMDGPUCFGStructurizerPass(*TM));
> addPass(createR600ExpandSpecialInstrsPass(*TM));
> addPass(&FinalizeMachineBundlesID);
> diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
> index bb8c217..6ace97a 100644
> --- a/lib/Target/R600/AMDILCFGStructurizer.cpp
> +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
> @@ -12,11 +12,13 @@
>
> #include "AMDGPU.h"
> #include "AMDGPUInstrInfo.h"
> +#include "R600InstrInfo.h"
> #include "llvm/Support/Debug.h"
> #include "llvm/Support/raw_ostream.h"
> #include "llvm/ADT/SCCIterator.h"
> #include "llvm/ADT/SmallVector.h"
> #include "llvm/ADT/Statistic.h"
> +#include "llvm/ADT/DepthFirstIterator.h"
> #include "llvm/Analysis/DominatorInternals.h"
> #include "llvm/Analysis/Dominators.h"
> #include "llvm/CodeGen/MachineDominators.h"
> @@ -81,16 +83,6 @@ DEBUG( \
> );
>
> #define INVALIDSCCNUM -1
> -#define INVALIDREGNUM 0
> -
> -template<class LoopinfoT>
> -void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
> - for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
> - iterEnd = LoopInfo.end();
> - iter != iterEnd; ++iter) {
> - (*iter)->print(OS, 0);
> - }
> -}
>
> template<class NodeT>
> void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
> @@ -110,40 +102,14 @@ void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
> //
> //===----------------------------------------------------------------------===//
>
> +
> namespace {
> -template<class PassT>
> -struct CFGStructTraits {
> -};
>
> -template <class InstrT>
> class BlockInformation {
> public:
> - bool isRetired;
> - int sccNum;
> - //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
> - //Instructions defining the corresponding successor.
> - BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
> -};
> -
> -template <class BlockT, class InstrT, class RegiT>
> -class LandInformation {
> -public:
> - BlockT *landBlk;
> - std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before
> - //WHILELOOP(thisloop) init before entering
> - //thisloop.
> - std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after
> - //WHILELOOP(thisloop) init after entering
> - //thisloop.
> - std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
> - //land block, branch cond on this reg.
> - std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break
> - //endif" after ENDLOOP(thisloop) break
> - //outerLoopOf(thisLoop).
> - std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue
> - //endif" after ENDLOOP(thisloop) continue on
> - //outerLoopOf(thisLoop).
> - LandInformation() : landBlk(NULL) {}
> + bool IsRetired;
> + int SccNum;
> + BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
> };
>
> } // end anonymous namespace
> @@ -155,1030 +121,1219 @@ public:
> //===----------------------------------------------------------------------===//
>
> namespace {
> -// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
> -template<class PassT>
> -class CFGStructurizer {
> +class AMDGPUCFGStructurizer : public MachineFunctionPass {
> public:
> - typedef enum {
> + typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
> + typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
> + typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
> +
> + enum PathToKind {
> Not_SinglePath = 0,
> SinglePath_InPath = 1,
> SinglePath_NotInPath = 2
> - } PathToKind;
> + };
>
> -public:
> - typedef typename PassT::InstructionType InstrT;
> - typedef typename PassT::FunctionType FuncT;
> - typedef typename PassT::DominatortreeType DomTreeT;
> - typedef typename PassT::PostDominatortreeType PostDomTreeT;
> - typedef typename PassT::DomTreeNodeType DomTreeNodeT;
> - typedef typename PassT::LoopinfoType LoopInfoT;
> -
> - typedef GraphTraits<FuncT *> FuncGTraits;
> - //typedef FuncGTraits::nodes_iterator BlockIterator;
> - typedef typename FuncT::iterator BlockIterator;
> -
> - typedef typename FuncGTraits::NodeType BlockT;
> - typedef GraphTraits<BlockT *> BlockGTraits;
> - typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits;
> - //typedef BlockGTraits::succ_iterator InstructionIterator;
> - typedef typename BlockT::iterator InstrIterator;
> -
> - typedef CFGStructTraits<PassT> CFGTraits;
> - typedef BlockInformation<InstrT> BlockInfo;
> - typedef std::map<BlockT *, BlockInfo *> BlockInfoMap;
> -
> - typedef int RegiT;
> - typedef typename PassT::LoopType LoopT;
> - typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo;
> - typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
> - //landing info for loop break
> - typedef SmallVector<BlockT *, 32> BlockTSmallerVector;
> + static char ID;
>
> -public:
> - CFGStructurizer();
> - ~CFGStructurizer();
> + AMDGPUCFGStructurizer(TargetMachine &tm) :
> + MachineFunctionPass(ID), TM(tm),
> + TII(static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
> + TRI(&TII->getRegisterInfo()) { }
> +
> + const char *getPassName() const {
> + return "AMD IL Control Flow Graph structurizer Pass";
> + }
> +
> + void getAnalysisUsage(AnalysisUsage &AU) const {
> + AU.addPreserved<MachineFunctionAnalysis>();
> + AU.addRequired<MachineFunctionAnalysis>();
> + AU.addRequired<MachineDominatorTree>();
> + AU.addRequired<MachinePostDominatorTree>();
> + AU.addRequired<MachineLoopInfo>();
> + }
>
> /// Perform the CFG structurization
> - bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
> + bool run();
>
> /// Perform the CFG preparation
> - bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
> + /// This step will remove every unconditionnal/dead jump instructions and make
> + /// sure all loops have an exit block
> + bool prepare();
> +
> + bool runOnMachineFunction(MachineFunction &MF) {
> + DEBUG(MF.dump(););
> + OrderedBlks.clear();
> + FuncRep = &MF;
> + MLI = &getAnalysis<MachineLoopInfo>();
> + DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
> + MDT = &getAnalysis<MachineDominatorTree>();
> + DEBUG(MDT->print(dbgs(), (const llvm::Module*)0););
> + PDT = &getAnalysis<MachinePostDominatorTree>();
> + DEBUG(PDT->print(dbgs()););
> + prepare();
> + run();
> + DEBUG(MF.dump(););
> + return true;
> + }
>
> -private:
> - void reversePredicateSetter(typename BlockT::iterator);
> - void orderBlocks();
> - void printOrderedBlocks(llvm::raw_ostream &OS);
> - int patternMatch(BlockT *CurBlock);
> - int patternMatchGroup(BlockT *CurBlock);
> -
> - int serialPatternMatch(BlockT *CurBlock);
> - int ifPatternMatch(BlockT *CurBlock);
> - int switchPatternMatch(BlockT *CurBlock);
> - int loopendPatternMatch(BlockT *CurBlock);
> - int loopPatternMatch(BlockT *CurBlock);
> -
> - int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
> - int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
> - //int loopWithoutBreak(BlockT *);
> -
> - void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
> - BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
> - void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
> - BlockT *ContBlock, LoopT *contLoop);
> - bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
> - int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
> - BlockT *FalseBlock);
> - int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
> - BlockT *FalseBlock);
> - int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
> - BlockT *FalseBlock, BlockT **LandBlockPtr);
> - void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
> - BlockT *FalseBlock, BlockT *LandBlock,
> - bool Detail = false);
> - PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
> - bool AllowSideEntry = true);
> - BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
> - bool AllowSideEntry = true);
> - int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
> - void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
> -
> - void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
> - BlockT *TrueBlock, BlockT *FalseBlock,
> - BlockT *LandBlock);
> - void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
> - void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
> - BlockT *ExitLandBlock, RegiT SetReg);
> - void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
> - RegiT SetReg);
> - BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
> - std::set<BlockT*> &ExitBlockSet,
> - BlockT *ExitLandBlk);
> - BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
> - BlockTSmallerVector &ExitingBlocks,
> - BlockTSmallerVector &ExitBlocks);
> - BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
> - void removeUnconditionalBranch(BlockT *SrcBlock);
> - void removeRedundantConditionalBranch(BlockT *SrcBlock);
> - void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
> -
> - void removeSuccessor(BlockT *SrcBlock);
> - BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
> - BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
> -
> - void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
> - InstrIterator InsertPos);
> -
> - void recordSccnum(BlockT *SrcBlock, int SCCNum);
> - int getSCCNum(BlockT *srcBlk);
> -
> - void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
> - bool isRetiredBlock(BlockT *SrcBlock);
> - bool isActiveLoophead(BlockT *CurBlock);
> - bool needMigrateBlock(BlockT *Block);
> -
> - BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
> - BlockTSmallerVector &exitBlocks,
> - std::set<BlockT*> &ExitBlockSet);
> - void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
> - BlockT *getLoopLandBlock(LoopT *LoopRep);
> - LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
> -
> - void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
> - void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
> - void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
> - void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
> - void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
> -
> - bool hasBackEdge(BlockT *curBlock);
> - unsigned getLoopDepth (LoopT *LoopRep);
> - int countActiveBlock(
> - typename SmallVectorImpl<BlockT *>::const_iterator IterStart,
> - typename SmallVectorImpl<BlockT *>::const_iterator IterEnd);
> - BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
> - BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
> +protected:
> + TargetMachine &TM;
> + MachineDominatorTree *MDT;
> + MachinePostDominatorTree *PDT;
> + MachineLoopInfo *MLI;
> + const R600InstrInfo *TII;
> + const AMDGPURegisterInfo *TRI;
> +
> + // PRINT FUNCTIONS
> + /// Print the ordered Blocks.
> + void printOrderedBlocks() const {
> + size_t i = 0;
> + for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
> + iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
> + dbgs() << "BB" << (*iterBlk)->getNumber();
> + dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
> + if (i != 0 && i % 10 == 0) {
> + dbgs() << "\n";
> + } else {
> + dbgs() << " ";
> + }
> + }
> + }
> + static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
> + for (MachineLoop::iterator iter = LoopInfo.begin(),
> + iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
> + (*iter)->print(dbgs(), 0);
> + }
> + }
> +
> + // UTILITY FUNCTIONS
> + int getSCCNum(MachineBasicBlock *MBB) const;
> + MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
> + bool hasBackEdge(MachineBasicBlock *MBB) const;
> + static unsigned getLoopDepth(MachineLoop *LoopRep);
> + bool isRetiredBlock(MachineBasicBlock *MBB) const;
> + bool isActiveLoophead(MachineBasicBlock *MBB) const;
> + PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
> + bool AllowSideEntry = true) const;
> + int countActiveBlock(MBBVector::const_iterator It,
> + MBBVector::const_iterator E) const;
> + bool needMigrateBlock(MachineBasicBlock *MBB) const;
> +
> + // Utility Functions
> + void reversePredicateSetter(MachineBasicBlock::iterator I);
> + /// Compute the reversed DFS post order of Blocks
> + void orderBlocks(MachineFunction *MF);
> +
> + // Function originaly from CFGStructTraits
> + void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
> + DebugLoc DL = DebugLoc());
> + MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
> + DebugLoc DL = DebugLoc());
> + MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
> + void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
> + DebugLoc DL);
> + void insertCondBranchBefore(MachineBasicBlock *MBB,
> + MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
> + DebugLoc DL);
> + void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum);
> + static int getBranchNzeroOpcode(int OldOpcode);
> + static int getBranchZeroOpcode(int OldOpcode);
> + static int getContinueNzeroOpcode(int OldOpcode);
> + static int getContinueZeroOpcode(int OldOpcode);
> + static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
> + static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
> + static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
> + MachineInstr *MI);
> + static bool isCondBranch(MachineInstr *MI);
> + static bool isUncondBranch(MachineInstr *MI);
> + static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
> + static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
> + /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
> + ///
> + /// BB with backward-edge could have move instructions after the branch
> + /// instruction. Such move instruction "belong to" the loop backward-edge.
> + MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
> + static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
> + static MachineInstr *getContinueInstr(MachineBasicBlock *MBB);
> + static MachineInstr *getLoopBreakInstr(MachineBasicBlock *MBB);
> + static bool isReturnBlock(MachineBasicBlock *MBB);
> + static void cloneSuccessorList(MachineBasicBlock *DstMBB,
> + MachineBasicBlock *SrcMBB) ;
> + static MachineBasicBlock *clone(MachineBasicBlock *MBB);
> + /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
> + /// because the AMDGPU instruction is not recognized as terminator fix this
> + /// and retire this routine
> + void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
> + MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
> + static void wrapup(MachineBasicBlock *MBB);
> +
> +
> + int patternMatch(MachineBasicBlock *MBB);
> + int patternMatchGroup(MachineBasicBlock *MBB);
> + int serialPatternMatch(MachineBasicBlock *MBB);
> + int ifPatternMatch(MachineBasicBlock *MBB);
> + int loopendPatternMatch();
> + int mergeLoop(MachineLoop *LoopRep);
> + int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader);
> +
> + void handleLoopcontBlock(MachineBasicBlock *ContingMBB,
> + MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
> + MachineLoop *ContLoop);
> + /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
> + /// the same loop with LoopLandInfo without explicitly keeping track of
> + /// loopContBlks and loopBreakBlks, this is a method to get the information.
> + bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
> + MachineBasicBlock *Src2MBB);
> + int handleJumpintoIf(MachineBasicBlock *HeadMBB,
> + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
> + int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
> + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
> + int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
> + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
> + MachineBasicBlock **LandMBBPtr);
> + void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
> + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
> + MachineBasicBlock *LandMBB, bool Detail = false);
> + int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
> + MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
> + void mergeSerialBlock(MachineBasicBlock *DstMBB,
> + MachineBasicBlock *SrcMBB);
> +
> + void mergeIfthenelseBlock(MachineInstr *BranchMI,
> + MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
> + MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
> + void mergeLooplandBlock(MachineBasicBlock *DstMBB,
> + MachineBasicBlock *LandMBB);
> + void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
> + MachineBasicBlock *LandMBB);
> + void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
> + MachineBasicBlock *ContMBB);
> + /// normalizeInfiniteLoopExit change
> + /// B1:
> + /// uncond_br LoopHeader
> + ///
> + /// to
> + /// B1:
> + /// cond_br 1 LoopHeader dummyExit
> + /// and return the newly added dummy exit block
> + MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
> + void removeUnconditionalBranch(MachineBasicBlock *MBB);
> + /// Remove duplicate branches instructions in a block.
> + /// For instance
> + /// B0:
> + /// cond_br X B1 B2
> + /// cond_br X B1 B2
> + /// is transformed to
> + /// B0:
> + /// cond_br X B1 B2
> + void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
> + void addDummyExitBlock(
> + SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> &RetMBB);
> + void removeSuccessor(MachineBasicBlock *MBB);
> + MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
> + MachineBasicBlock *PredMBB);
> + void migrateInstruction(MachineBasicBlock *SrcMBB,
> + MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
> + void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
> + void retireBlock(MachineBasicBlock *MBB);
> + void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = NULL);
> +
> + MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
> + /// This is work around solution for findNearestCommonDominator not avaiable
> + /// to post dom a proper fix should go to Dominators.h.
> + MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
> + MachineBasicBlock *MBB2);
>
> private:
> - DomTreeT *domTree;
> - PostDomTreeT *postDomTree;
> - LoopInfoT *loopInfo;
> - PassT *passRep;
> - FuncT *funcRep;
> -
> - BlockInfoMap blockInfoMap;
> - LoopLandInfoMap loopLandInfoMap;
> - SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
> - const AMDGPURegisterInfo *TRI;
> + MBBInfoMap BlockInfoMap;
> + LoopLandInfoMap LLInfoMap;
> + std::map<MachineLoop *, bool> Visited;
> + MachineFunction *FuncRep;
> + SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
> +};
> +
> +int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
> + MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
> + if (It == BlockInfoMap.end())
> + return INVALIDSCCNUM;
> + return (*It).second->SccNum;
> +}
> +
> +MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
> + const {
> + LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
> + if (It == LLInfoMap.end())
> + return NULL;
> + return (*It).second;
> +}
> +
> +bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
> + MachineLoop *LoopRep = MLI->getLoopFor(MBB);
> + if (!LoopRep)
> + return false;
> + MachineBasicBlock *LoopHeader = LoopRep->getHeader();
> + return MBB->isSuccessor(LoopHeader);
> +}
>
> -}; //template class CFGStructurizer
> +unsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) {
> + return LoopRep ? LoopRep->getLoopDepth() : 0;
> +}
>
> -template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
> - : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
> +bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
> + MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
> + if (It == BlockInfoMap.end())
> + return false;
> + return (*It).second->IsRetired;
> }
>
> -template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
> - for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
> - E = blockInfoMap.end(); I != E; ++I) {
> - delete I->second;
> +bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
> + MachineLoop *LoopRep = MLI->getLoopFor(MBB);
> + while (LoopRep && LoopRep->getHeader() == MBB) {
> + MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
> + if(!LoopLand)
> + return true;
> + if (!isRetiredBlock(LoopLand))
> + return true;
> + LoopRep = LoopRep->getParentLoop();
> + }
> + return false;
> +}
> +AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
> + MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
> + bool AllowSideEntry) const {
> + assert(DstMBB);
> + if (SrcMBB == DstMBB)
> + return SinglePath_InPath;
> + while (SrcMBB && SrcMBB->succ_size() == 1) {
> + SrcMBB = *SrcMBB->succ_begin();
> + if (SrcMBB == DstMBB)
> + return SinglePath_InPath;
> + if (!AllowSideEntry && SrcMBB->pred_size() > 1)
> + return Not_SinglePath;
> }
> + if (SrcMBB && SrcMBB->succ_size()==0)
> + return SinglePath_NotInPath;
> + return Not_SinglePath;
> }
>
> -template<class PassT>
> -bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
> - const AMDGPURegisterInfo * tri) {
> - passRep = &pass;
> - funcRep = &func;
> - TRI = tri;
> +int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
> + MBBVector::const_iterator E) const {
> + int Count = 0;
> + while (It != E) {
> + if (!isRetiredBlock(*It))
> + ++Count;
> + ++It;
> + }
> + return Count;
> +}
>
> - bool changed = false;
> +bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
> + unsigned BlockSizeThreshold = 30;
> + unsigned CloneInstrThreshold = 100;
> + bool MultiplePreds = MBB && (MBB->pred_size() > 1);
>
> - //FIXME: if not reducible flow graph, make it so ???
> + if(!MultiplePreds)
> + return false;
> + unsigned BlkSize = MBB->size();
> + return ((BlkSize > BlockSizeThreshold) &&
> + (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
> +}
>
> - DEBUG(
> - dbgs() << "AMDGPUCFGStructurizer::prepare\n";
> - );
> +void AMDGPUCFGStructurizer::reversePredicateSetter(
> + MachineBasicBlock::iterator I) {
> + while (I--) {
> + if (I->getOpcode() == AMDGPU::PRED_X) {
> + switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
> + case OPCODE_IS_ZERO_INT:
> + static_cast<MachineInstr *>(I)->getOperand(2)
> + .setImm(OPCODE_IS_NOT_ZERO_INT);
> + return;
> + case OPCODE_IS_NOT_ZERO_INT:
> + static_cast<MachineInstr *>(I)->getOperand(2)
> + .setImm(OPCODE_IS_ZERO_INT);
> + return;
> + case OPCODE_IS_ZERO:
> + static_cast<MachineInstr *>(I)->getOperand(2)
> + .setImm(OPCODE_IS_NOT_ZERO);
> + return;
> + case OPCODE_IS_NOT_ZERO:
> + static_cast<MachineInstr *>(I)->getOperand(2)
> + .setImm(OPCODE_IS_ZERO);
> + return;
> + default:
> + llvm_unreachable("PRED_X Opcode invalid!");
> + }
> + }
> + }
> +}
>
> - loopInfo = CFGTraits::getLoopInfo(pass);
> - DEBUG(
> - dbgs() << "LoopInfo:\n";
> - PrintLoopinfo(*loopInfo, dbgs());
> - );
> +void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
> + int NewOpcode, DebugLoc DL) {
> + MachineInstr *MI = MBB->getParent()
> + ->CreateMachineInstr(TII->get(NewOpcode), DL);
> + MBB->push_back(MI);
> + //assume the instruction doesn't take any reg operand ...
> + SHOWNEWINSTR(MI);
> +}
>
> - orderBlocks();
> - DEBUG(
> - for (typename SmallVectorImpl<BlockT *>::const_iterator
> - iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
> - iterBlk != iterBlkEnd;
> - ++iterBlk) {
> - (*iterBlk)->dump();
> +MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
> + int NewOpcode, DebugLoc DL) {
> + MachineInstr *MI =
> + MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
> + if (MBB->begin() != MBB->end())
> + MBB->insert(MBB->begin(), MI);
> + else
> + MBB->push_back(MI);
> + SHOWNEWINSTR(MI);
> + return MI;
> +}
> +
> +MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
> + MachineBasicBlock::iterator I, int NewOpcode) {
> + MachineInstr *OldMI = &(*I);
> + MachineBasicBlock *MBB = OldMI->getParent();
> + MachineInstr *NewMBB =
> + MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
> + MBB->insert(I, NewMBB);
> + //assume the instruction doesn't take any reg operand ...
> + SHOWNEWINSTR(NewMBB);
> + return NewMBB;
> +}
> +
> +void AMDGPUCFGStructurizer::insertCondBranchBefore(
> + MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) {
> + MachineInstr *OldMI = &(*I);
> + MachineBasicBlock *MBB = OldMI->getParent();
> + MachineFunction *MF = MBB->getParent();
> + MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
> + MBB->insert(I, NewMI);
> + MachineInstrBuilder MIB(*MF, NewMI);
> + MIB.addReg(OldMI->getOperand(1).getReg(), false);
> + SHOWNEWINSTR(NewMI);
> + //erase later oldInstr->eraseFromParent();
> +}
> +
> +void AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk,
> + MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
> + DebugLoc DL) {
> + MachineFunction *MF = blk->getParent();
> + MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
> + //insert before
> + blk->insert(I, NewInstr);
> + MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
> + SHOWNEWINSTR(NewInstr);
> +}
> +
> +void AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB,
> + int NewOpcode, int RegNum) {
> + MachineFunction *MF = MBB->getParent();
> + MachineInstr *NewInstr =
> + MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
> + MBB->push_back(NewInstr);
> + MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
> + SHOWNEWINSTR(NewInstr);
> +}
> +
> +int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
> + switch(OldOpcode) {
> + case AMDGPU::JUMP_COND:
> + case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
> + case AMDGPU::BRANCH_COND_i32:
> + case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
> + default: llvm_unreachable("internal error");
> + }
> + return -1;
> +}
> +
> +int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
> + switch(OldOpcode) {
> + case AMDGPU::JUMP_COND:
> + case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
> + case AMDGPU::BRANCH_COND_i32:
> + case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
> + default: llvm_unreachable("internal error");
> + }
> + return -1;
> +}
> +
> +int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
> + switch(OldOpcode) {
> + case AMDGPU::JUMP_COND:
> + case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
> + default: llvm_unreachable("internal error");
> + };
> + return -1;
> +}
> +
> +int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
> + switch(OldOpcode) {
> + case AMDGPU::JUMP_COND:
> + case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
> + default: llvm_unreachable("internal error");
> + }
> + return -1;
> +}
> +
> +MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
> + return MI->getOperand(0).getMBB();
> +}
> +
> +void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
> + MachineBasicBlock *MBB) {
> + MI->getOperand(0).setMBB(MBB);
> +}
> +
> +MachineBasicBlock *
> +AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
> + MachineInstr *MI) {
> + assert(MBB->succ_size() == 2);
> + MachineBasicBlock *TrueBranch = getTrueBranch(MI);
> + MachineBasicBlock::succ_iterator It = MBB->succ_begin();
> + MachineBasicBlock::succ_iterator Next = It;
> + ++Next;
> + return (*It == TrueBranch) ? *Next : *It;
> +}
> +
> +bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
> + switch (MI->getOpcode()) {
> + case AMDGPU::JUMP_COND:
> + case AMDGPU::BRANCH_COND_i32:
> + case AMDGPU::BRANCH_COND_f32: return true;
> + default:
> + return false;
> + }
> + return false;
> +}
> +
> +bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
> + switch (MI->getOpcode()) {
> + case AMDGPU::JUMP:
> + case AMDGPU::BRANCH:
> + return true;
> + default:
> + return false;
> + }
> + return false;
> +}
> +
> +DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
> + //get DebugLoc from the first MachineBasicBlock instruction with debug info
> + DebugLoc DL;
> + for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
> + ++It) {
> + MachineInstr *instr = &(*It);
> + if (instr->getDebugLoc().isUnknown() == false)
> + DL = instr->getDebugLoc();
> + }
> + return DL;
> +}
> +
> +MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
> + MachineBasicBlock *MBB) {
> + MachineBasicBlock::reverse_iterator It = MBB->rbegin();
> + MachineInstr *MI = &*It;
> + if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
> + return MI;
> + return NULL;
> +}
> +
> +MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
> + MachineBasicBlock *MBB) {
> + for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
> + It != E; ++It) {
> + // FIXME: Simplify
> + MachineInstr *MI = &*It;
> + if (MI) {
> + if (isCondBranch(MI) || isUncondBranch(MI))
> + return MI;
> + else if (!TII->isMov(MI->getOpcode()))
> + break;
> }
> - dbgs() << "Ordered blocks:\n";
> - printOrderedBlocks(dbgs());
> - );
> + }
> + return NULL;
> +}
>
> - SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
> -
> - for (typename LoopInfoT::iterator iter = loopInfo->begin(),
> - iterEnd = loopInfo->end();
> - iter != iterEnd; ++iter) {
> - LoopT* loopRep = (*iter);
> - BlockTSmallerVector exitingBlks;
> - loopRep->getExitingBlocks(exitingBlks);
> -
> - if (exitingBlks.size() == 0) {
> - BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
> - if (dummyExitBlk != NULL)
> - retBlks.push_back(dummyExitBlk);
> +MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
> + MachineBasicBlock::reverse_iterator It = MBB->rbegin();
> + if (It != MBB->rend()) {
> + MachineInstr *instr = &(*It);
> + if (instr->getOpcode() == AMDGPU::RETURN)
> + return instr;
> + }
> + return NULL;
> +}
> +
> +MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) {
> + MachineBasicBlock::reverse_iterator It = MBB->rbegin();
> + if (It != MBB->rend()) {
> + MachineInstr *MI = &(*It);
> + if (MI->getOpcode() == AMDGPU::CONTINUE)
> + return MI;
> + }
> + return NULL;
> +}
> +
> +MachineInstr *AMDGPUCFGStructurizer::getLoopBreakInstr(MachineBasicBlock *MBB) {
> + for (MachineBasicBlock::iterator It = MBB->begin(); (It != MBB->end());
> + ++It) {
> + MachineInstr *MI = &(*It);
> + if (MI->getOpcode() == AMDGPU::PREDICATED_BREAK)
> + return MI;
> + }
> + return NULL;
> +}
> +
> +bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
> + MachineInstr *MI = getReturnInstr(MBB);
> + bool IsReturn = (MBB->succ_size() == 0);
> + if (MI)
> + assert(IsReturn);
> + else if (IsReturn)
> + DEBUG(
> + dbgs() << "BB" << MBB->getNumber()
> + <<" is return block without RETURN instr\n";);
> + return IsReturn;
> +}
> +
> +void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
> + MachineBasicBlock *SrcMBB) {
> + for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
> + iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
> + DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of
> +}
> +
> +MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
> + MachineFunction *Func = MBB->getParent();
> + MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
> + Func->push_back(NewMBB); //insert to function
> + for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end();
> + It != E; ++It) {
> + MachineInstr *MI = Func->CloneMachineInstr(It);
> + NewMBB->push_back(MI);
> + }
> + return NewMBB;
> +}
> +
> +void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
> + MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
> + MachineBasicBlock *NewBlk) {
> + MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
> + if (BranchMI && isCondBranch(BranchMI) &&
> + getTrueBranch(BranchMI) == OldMBB)
> + setTrueBranch(BranchMI, NewBlk);
> +}
> +
> +void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
> + assert((!MBB->getParent()->getJumpTableInfo()
> + || MBB->getParent()->getJumpTableInfo()->isEmpty())
> + && "found a jump table");
> +
> + //collect continue right before endloop
> + SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
> + MachineBasicBlock::iterator Pre = MBB->begin();
> + MachineBasicBlock::iterator E = MBB->end();
> + MachineBasicBlock::iterator It = Pre;
> + while (It != E) {
> + if (Pre->getOpcode() == AMDGPU::CONTINUE
> + && It->getOpcode() == AMDGPU::ENDLOOP)
> + ContInstr.push_back(Pre);
> + Pre = It;
> + ++It;
> + }
> +
> + //delete continue right before endloop
> + for (unsigned i = 0; i < ContInstr.size(); ++i)
> + ContInstr[i]->eraseFromParent();
> +
> + // TODO to fix up jump table so later phase won't be confused. if
> + // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
> + // there isn't such an interface yet. alternatively, replace all the other
> + // blocks in the jump table with the entryBlk //}
> +
> +}
> +
> +
> +bool AMDGPUCFGStructurizer::prepare() {
> + bool Changed = false;
> +
> + //FIXME: if not reducible flow graph, make it so ???
> +
> + DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
> +
> + orderBlocks(FuncRep);
> +
> + SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
> +
> + // Add an ExitBlk to loop that don't have one
> + for (MachineLoopInfo::iterator It = MLI->begin(),
> + E = MLI->end(); It != E; ++It) {
> + MachineLoop *LoopRep = (*It);
> + MBBVector ExitingMBBs;
> + LoopRep->getExitingBlocks(ExitingMBBs);
> +
> + if (ExitingMBBs.size() == 0) {
> + MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
> + if (DummyExitBlk)
> + RetBlks.push_back(DummyExitBlk);
> }
> }
>
> // Remove unconditional branch instr.
> // Add dummy exit block iff there are multiple returns.
> -
> - for (typename SmallVectorImpl<BlockT *>::const_iterator
> - iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
> - iterBlk != iterEndBlk;
> - ++iterBlk) {
> - BlockT *curBlk = *iterBlk;
> - removeUnconditionalBranch(curBlk);
> - removeRedundantConditionalBranch(curBlk);
> - if (CFGTraits::isReturnBlock(curBlk)) {
> - retBlks.push_back(curBlk);
> + for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
> + It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
> + MachineBasicBlock *MBB = *It;
> + removeUnconditionalBranch(MBB);
> + removeRedundantConditionalBranch(MBB);
> + if (isReturnBlock(MBB)) {
> + RetBlks.push_back(MBB);
> }
> - assert(curBlk->succ_size() <= 2);
> - } //for
> + assert(MBB->succ_size() <= 2);
> + }
>
> - if (retBlks.size() >= 2) {
> - addDummyExitBlock(retBlks);
> - changed = true;
> + if (RetBlks.size() >= 2) {
> + addDummyExitBlock(RetBlks);
> + Changed = true;
> }
>
> - return changed;
> -} //CFGStructurizer::prepare
> + return Changed;
> +}
>
> -template<class PassT>
> -bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
> - const AMDGPURegisterInfo * tri) {
> - passRep = &pass;
> - funcRep = &func;
> - TRI = tri;
> +bool AMDGPUCFGStructurizer::run() {
>
> //Assume reducible CFG...
> - DEBUG(
> - dbgs() << "AMDGPUCFGStructurizer::run\n";
> - func.viewCFG();
> - );
> -
> - domTree = CFGTraits::getDominatorTree(pass);
> - DEBUG(
> - domTree->print(dbgs(), (const llvm::Module*)0);
> - );
> -
> - postDomTree = CFGTraits::getPostDominatorTree(pass);
> - DEBUG(
> - postDomTree->print(dbgs());
> - );
> -
> - loopInfo = CFGTraits::getLoopInfo(pass);
> - DEBUG(
> - dbgs() << "LoopInfo:\n";
> - PrintLoopinfo(*loopInfo, dbgs());
> - );
> + DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n";FuncRep->viewCFG(););
>
> - orderBlocks();
> #ifdef STRESSTEST
> //Use the worse block ordering to test the algorithm.
> ReverseVector(orderedBlks);
> #endif
>
> - DEBUG(
> - dbgs() << "Ordered blocks:\n";
> - printOrderedBlocks(dbgs());
> - );
> - int numIter = 0;
> - bool finish = false;
> - BlockT *curBlk;
> - bool makeProgress = false;
> - int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
> - orderedBlks.end());
> + DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
> + int NumIter = 0;
> + bool Finish = false;
> + MachineBasicBlock *MBB;
> + bool MakeProgress = false;
> + int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
> + OrderedBlks.end());
>
> do {
> - ++numIter;
> + ++NumIter;
> DEBUG(
> - dbgs() << "numIter = " << numIter
> - << ", numRemaintedBlk = " << numRemainedBlk << "\n";
> + dbgs() << "numIter = " << NumIter
> + << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
> );
>
> - typename SmallVectorImpl<BlockT *>::const_iterator
> - iterBlk = orderedBlks.begin();
> - typename SmallVectorImpl<BlockT *>::const_iterator
> - iterBlkEnd = orderedBlks.end();
> + SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
> + OrderedBlks.begin();
> + SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
> + OrderedBlks.end();
>
> - typename SmallVectorImpl<BlockT *>::const_iterator
> - sccBeginIter = iterBlk;
> - BlockT *sccBeginBlk = NULL;
> - int sccNumBlk = 0; // The number of active blocks, init to a
> + SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
> + It;
> + MachineBasicBlock *SccBeginMBB = NULL;
> + int SccNumBlk = 0; // The number of active blocks, init to a
> // maximum possible number.
> - int sccNumIter; // Number of iteration in this SCC.
> + int SccNumIter; // Number of iteration in this SCC.
>
> - while (iterBlk != iterBlkEnd) {
> - curBlk = *iterBlk;
> + while (It != E) {
> + MBB = *It;
>
> - if (sccBeginBlk == NULL) {
> - sccBeginIter = iterBlk;
> - sccBeginBlk = curBlk;
> - sccNumIter = 0;
> - sccNumBlk = numRemainedBlk; // Init to maximum possible number.
> + if (!SccBeginMBB) {
> + SccBeginIter = It;
> + SccBeginMBB = MBB;
> + SccNumIter = 0;
> + SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
> DEBUG(
> - dbgs() << "start processing SCC" << getSCCNum(sccBeginBlk);
> + dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
> dbgs() << "\n";
> );
> }
>
> - if (!isRetiredBlock(curBlk)) {
> - patternMatch(curBlk);
> - }
> + if (!isRetiredBlock(MBB))
> + patternMatch(MBB);
>
> - ++iterBlk;
> + ++It;
>
> - bool contNextScc = true;
> - if (iterBlk == iterBlkEnd
> - || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
> + bool ContNextScc = true;
> + if (It == E
> + || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
> // Just finish one scc.
> - ++sccNumIter;
> - int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
> - if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
> + ++SccNumIter;
> + int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
> + if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
> DEBUG(
> - dbgs() << "Can't reduce SCC " << getSCCNum(curBlk)
> - << ", sccNumIter = " << sccNumIter;
> + dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
> + << ", sccNumIter = " << SccNumIter;
> dbgs() << "doesn't make any progress\n";
> );
> - contNextScc = true;
> - } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
> - sccNumBlk = sccRemainedNumBlk;
> - iterBlk = sccBeginIter;
> - contNextScc = false;
> + ContNextScc = true;
> + } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
> + SccNumBlk = sccRemainedNumBlk;
> + It = SccBeginIter;
> + ContNextScc = false;
> DEBUG(
> - dbgs() << "repeat processing SCC" << getSCCNum(curBlk)
> - << "sccNumIter = " << sccNumIter << "\n";
> - func.viewCFG();
> + dbgs() << "repeat processing SCC" << getSCCNum(MBB)
> + << "sccNumIter = " << SccNumIter << "\n";
> + FuncRep->viewCFG();
> );
> } else {
> // Finish the current scc.
> - contNextScc = true;
> + ContNextScc = true;
> }
> } else {
> // Continue on next component in the current scc.
> - contNextScc = false;
> + ContNextScc = false;
> }
>
> - if (contNextScc) {
> - sccBeginBlk = NULL;
> - }
> + if (ContNextScc)
> + SccBeginMBB = NULL;
> } //while, "one iteration" over the function.
>
> - BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
> - if (entryBlk->succ_size() == 0) {
> - finish = true;
> + MachineBasicBlock *EntryMBB =
> + GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
> + if (EntryMBB->succ_size() == 0) {
> + Finish = true;
> DEBUG(
> dbgs() << "Reduce to one block\n";
> );
> } else {
> - int newnumRemainedBlk
> - = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
> + int NewnumRemainedBlk
> + = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
> // consider cloned blocks ??
> - if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
> - makeProgress = true;
> - numRemainedBlk = newnumRemainedBlk;
> + if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
> + MakeProgress = true;
> + NumRemainedBlk = NewnumRemainedBlk;
> } else {
> - makeProgress = false;
> + MakeProgress = false;
> DEBUG(
> dbgs() << "No progress\n";
> );
> }
> }
> - } while (!finish && makeProgress);
> + } while (!Finish && MakeProgress);
>
> // Misc wrap up to maintain the consistency of the Function representation.
> - CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
> + wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
>
> // Detach retired Block, release memory.
> - for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
> - iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
> - if ((*iterMap).second && (*iterMap).second->isRetired) {
> - assert(((*iterMap).first)->getNumber() != -1);
> + for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
> + It != E; ++It) {
> + if ((*It).second && (*It).second->IsRetired) {
> + assert(((*It).first)->getNumber() != -1);
> DEBUG(
> - dbgs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
> + dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
> );
> - (*iterMap).first->eraseFromParent(); //Remove from the parent Function.
> + (*It).first->eraseFromParent(); //Remove from the parent Function.
> }
> - delete (*iterMap).second;
> + delete (*It).second;
> }
> - blockInfoMap.clear();
> -
> - // clear loopLandInfoMap
> - for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
> - iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
> - delete (*iterMap).second;
> - }
> - loopLandInfoMap.clear();
> + BlockInfoMap.clear();
> + LLInfoMap.clear();
>
> DEBUG(
> - func.viewCFG();
> + FuncRep->viewCFG();
> );
>
> - if (!finish) {
> + if (!Finish)
> llvm_unreachable("IRREDUCIBL_CF");
> - }
>
> return true;
> -} //CFGStructurizer::run
> -
> -/// Print the ordered Blocks.
> -///
> -template<class PassT>
> -void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
> - size_t i = 0;
> - for (typename SmallVectorImpl<BlockT *>::const_iterator
> - iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
> - iterBlk != iterBlkEnd;
> - ++iterBlk, ++i) {
> - os << "BB" << (*iterBlk)->getNumber();
> - os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
> - if (i != 0 && i % 10 == 0) {
> - os << "\n";
> - } else {
> - os << " ";
> - }
> - }
> -} //printOrderedBlocks
> -
> -/// Compute the reversed DFS post order of Blocks
> -///
> -template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
> - int sccNum = 0;
> - BlockT *bb;
> - for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
> - sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
> - std::vector<BlockT *> &sccNext = *sccIter;
> - for (typename std::vector<BlockT *>::const_iterator
> - blockIter = sccNext.begin(), blockEnd = sccNext.end();
> +}
> +
> +
> +
> +void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
> + int SccNum = 0;
> + MachineBasicBlock *MBB;
> + for (scc_iterator<MachineFunction *> It = scc_begin(MF), E = scc_end(MF);
> + It != E; ++It, ++SccNum) {
> + std::vector<MachineBasicBlock *> &SccNext = *It;
> + for (std::vector<MachineBasicBlock *>::const_iterator
> + blockIter = SccNext.begin(), blockEnd = SccNext.end();
> blockIter != blockEnd; ++blockIter) {
> - bb = *blockIter;
> - orderedBlks.push_back(bb);
> - recordSccnum(bb, sccNum);
> + MBB = *blockIter;
> + OrderedBlks.push_back(MBB);
> + recordSccnum(MBB, SccNum);
> }
> }
>
> //walk through all the block in func to check for unreachable
> - for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
> - blockEnd1 = FuncGTraits::nodes_end(funcRep);
> - blockIter1 != blockEnd1; ++blockIter1) {
> - BlockT *bb = &(*blockIter1);
> - sccNum = getSCCNum(bb);
> - if (sccNum == INVALIDSCCNUM) {
> - dbgs() << "unreachable block BB" << bb->getNumber() << "\n";
> - }
> + typedef GraphTraits<MachineFunction *> GTM;
> + MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
> + for (; It != E; ++It) {
> + MachineBasicBlock *MBB = &(*It);
> + SccNum = getSCCNum(MBB);
> + if (SccNum == INVALIDSCCNUM)
> + dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
> }
> -} //orderBlocks
> +}
>
> -template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
> - int numMatch = 0;
> - int curMatch;
> +int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
> + int NumMatch = 0;
> + int CurMatch;
>
> DEBUG(
> - dbgs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
> + dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
> );
>
> - while ((curMatch = patternMatchGroup(curBlk)) > 0) {
> - numMatch += curMatch;
> - }
> + while ((CurMatch = patternMatchGroup(MBB)) > 0)
> + NumMatch += CurMatch;
>
> DEBUG(
> - dbgs() << "End patternMatch BB" << curBlk->getNumber()
> - << ", numMatch = " << numMatch << "\n";
> + dbgs() << "End patternMatch BB" << MBB->getNumber()
> + << ", numMatch = " << NumMatch << "\n";
> );
>
> - return numMatch;
> -} //patternMatch
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
> - int numMatch = 0;
> - numMatch += serialPatternMatch(curBlk);
> - numMatch += ifPatternMatch(curBlk);
> - numMatch += loopendPatternMatch(curBlk);
> - numMatch += loopPatternMatch(curBlk);
> - return numMatch;
> -}//patternMatchGroup
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
> - if (curBlk->succ_size() != 1) {
> + return NumMatch;
> +}
> +
> +int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
> + int NumMatch = 0;
> + NumMatch += loopendPatternMatch();
> + NumMatch += serialPatternMatch(MBB);
> + NumMatch += ifPatternMatch(MBB);
> + return NumMatch;
> +}
> +
> +
> +int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
> + if (MBB->succ_size() != 1)
> return 0;
> - }
>
> - BlockT *childBlk = *curBlk->succ_begin();
> - if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
> + MachineBasicBlock *childBlk = *MBB->succ_begin();
> + if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
> return 0;
> - }
>
> - mergeSerialBlock(curBlk, childBlk);
> + mergeSerialBlock(MBB, childBlk);
> ++numSerialPatternMatch;
> return 1;
> -} //serialPatternMatch
> +}
>
> -template<class PassT>
> -int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
> +int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
> //two edges
> - if (curBlk->succ_size() != 2) {
> + if (MBB->succ_size() != 2)
> return 0;
> - }
> -
> - if (hasBackEdge(curBlk)) {
> + if (hasBackEdge(MBB))
> return 0;
> - }
> -
> - InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
> - if (branchInstr == NULL) {
> + MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
> + if (!BranchMI)
> return 0;
> - }
>
> - assert(CFGTraits::isCondBranch(branchInstr));
> + assert(isCondBranch(BranchMI));
>
> - BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
> - BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
> - BlockT *landBlk;
> - int cloned = 0;
> + MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
> + serialPatternMatch(TrueMBB);
> + ifPatternMatch(TrueMBB);
> + MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
> + serialPatternMatch(FalseMBB);
> + ifPatternMatch(FalseMBB);
> + MachineBasicBlock *LandBlk;
> + int Cloned = 0;
>
> + assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
> // TODO: Simplify
> - if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
> - && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
> - landBlk = *trueBlk->succ_begin();
> - } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
> - landBlk = NULL;
> - } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
> - landBlk = falseBlk;
> - falseBlk = NULL;
> - } else if (falseBlk->succ_size() == 1
> - && *falseBlk->succ_begin() == trueBlk) {
> - landBlk = trueBlk;
> - trueBlk = NULL;
> - } else if (falseBlk->succ_size() == 1
> - && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
> - landBlk = *falseBlk->succ_begin();
> - } else if (trueBlk->succ_size() == 1
> - && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
> - landBlk = *trueBlk->succ_begin();
> + if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
> + && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
> + // Diamond pattern
> + LandBlk = *TrueMBB->succ_begin();
> + } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
> + // Triangle pattern, false is empty
> + LandBlk = FalseMBB;
> + FalseMBB = NULL;
> + } else if (FalseMBB->succ_size() == 1
> + && *FalseMBB->succ_begin() == TrueMBB) {
> + // Triangle pattern, true is empty
> + LandBlk = TrueMBB;
> + TrueMBB = NULL;
> + } else if (FalseMBB->succ_size() == 1
> + && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
> + LandBlk = *FalseMBB->succ_begin();
> + } else if (TrueMBB->succ_size() == 1
> + && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
> + LandBlk = *TrueMBB->succ_begin();
> } else {
> - return handleJumpintoIf(curBlk, trueBlk, falseBlk);
> + return handleJumpintoIf(MBB, TrueMBB, FalseMBB);
> }
>
> // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
> // new BB created for landBlk==NULL may introduce new challenge to the
> // reduction process.
> - if (landBlk != NULL &&
> - ((trueBlk && trueBlk->pred_size() > 1)
> - || (falseBlk && falseBlk->pred_size() > 1))) {
> - cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
> + if (LandBlk &&
> + ((TrueMBB && TrueMBB->pred_size() > 1)
> + || (FalseMBB && FalseMBB->pred_size() > 1))) {
> + Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
> }
>
> - if (trueBlk && trueBlk->pred_size() > 1) {
> - trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
> - ++cloned;
> + if (TrueMBB && TrueMBB->pred_size() > 1) {
> + TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
> + ++Cloned;
> }
>
> - if (falseBlk && falseBlk->pred_size() > 1) {
> - falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
> - ++cloned;
> + if (FalseMBB && FalseMBB->pred_size() > 1) {
> + FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
> + ++Cloned;
> }
>
> - mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
> + mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
>
> ++numIfPatternMatch;
>
> - numClonedBlock += cloned;
> + numClonedBlock += Cloned;
>
> - return 1 + cloned;
> -} //ifPatternMatch
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
> - return 0;
> -} //switchPatternMatch
> + return 1 + Cloned;
> +}
>
> -template<class PassT>
> -int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
> - LoopT *loopRep = loopInfo->getLoopFor(curBlk);
> - typename std::vector<LoopT *> nestedLoops;
> - while (loopRep) {
> - nestedLoops.push_back(loopRep);
> - loopRep = loopRep->getParentLoop();
> +int AMDGPUCFGStructurizer::loopendPatternMatch() {
> + std::vector<MachineLoop *> NestedLoops;
> + for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end();
> + It != E; ++It) {
> + df_iterator<MachineLoop *> LpIt = df_begin(*It),
> + LpE = df_end(*It);
> + for (; LpIt != LpE; ++LpIt)
> + NestedLoops.push_back(*LpIt);
> }
> -
> - if (nestedLoops.size() == 0) {
> + if (NestedLoops.size() == 0)
> return 0;
> - }
>
> // Process nested loop outside->inside, so "continue" to a outside loop won't
> // be mistaken as "break" of the current loop.
> - int num = 0;
> - for (typename std::vector<LoopT *>::reverse_iterator
> - iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
> - iter != iterEnd; ++iter) {
> - loopRep = *iter;
> -
> - if (getLoopLandBlock(loopRep) != NULL) {
> + int Num = 0;
> + for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(),
> + E = NestedLoops.rend(); It != E; ++It) {
> + MachineLoop *ExaminedLoop = *It;
> + if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
> continue;
> - }
> -
> - BlockT *loopHeader = loopRep->getHeader();
> -
> - int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
> -
> - if (numBreak == -1) {
> + DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
> + int NumBreak = mergeLoop(ExaminedLoop);
> + if (NumBreak == -1)
> break;
> - }
> -
> - int numCont = loopcontPatternMatch(loopRep, loopHeader);
> - num += numBreak + numCont;
> + Num += NumBreak;
> }
> + return Num;
> +}
>
> - return num;
> -} //loopendPatternMatch
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
> - if (curBlk->succ_size() != 0) {
> - return 0;
> - }
> +int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
> + MachineBasicBlock *LoopHeader = LoopRep->getHeader();
> + MBBVector ExitingMBBs;
> + LoopRep->getExitingBlocks(ExitingMBBs);
> + assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
> + DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
> + // We assume a single ExitBlk
> + MBBVector ExitBlks;
> + LoopRep->getExitBlocks(ExitBlks);
> + SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
> + for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
> + ExitBlkSet.insert(ExitBlks[i]);
> + assert(ExitBlkSet.size() == 1);
> + MachineBasicBlock *ExitBlk = *ExitBlks.begin();
> + assert(ExitBlk && "Loop has several exit block");
> + MBBVector LatchBlks;
> + typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
> + InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
> + PE = InvMBBTraits::child_end(LoopHeader);
> + for (; PI != PE; PI++) {
> + if (LoopRep->contains(*PI))
> + LatchBlks.push_back(*PI);
> + }
> +
> + for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
> + mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
> + for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
> + settleLoopcontBlock(LatchBlks[i], LoopHeader);
> + int Match = 0;
> + do {
> + Match = 0;
> + Match += serialPatternMatch(LoopHeader);
> + Match += ifPatternMatch(LoopHeader);
> + } while (Match > 0);
> + mergeLooplandBlock(LoopHeader, ExitBlk);
> + MachineLoop *ParentLoop = LoopRep->getParentLoop();
> + if (ParentLoop)
> + MLI->changeLoopFor(LoopHeader, ParentLoop);
> + else
> + MLI->removeBlock(LoopHeader);
> + Visited[LoopRep] = true;
> + return 1;
> +}
>
> - int numLoop = 0;
> - LoopT *loopRep = loopInfo->getLoopFor(curBlk);
> - while (loopRep && loopRep->getHeader() == curBlk) {
> - LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
> - if (loopLand) {
> - BlockT *landBlk = loopLand->landBlk;
> - assert(landBlk);
> - if (!isRetiredBlock(landBlk)) {
> - mergeLooplandBlock(curBlk, loopLand);
> - ++numLoop;
> - }
> +int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
> + MachineBasicBlock *LoopHeader) {
> + int NumCont = 0;
> + SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> ContMBB;
> + typedef GraphTraits<Inverse<MachineBasicBlock *> > GTIM;
> + GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader),
> + E = GTIM::child_end(LoopHeader);
> + for (; It != E; ++It) {
> + MachineBasicBlock *MBB = *It;
> + if (LoopRep->contains(MBB)) {
> + handleLoopcontBlock(MBB, MLI->getLoopFor(MBB),
> + LoopHeader, LoopRep);
> + ContMBB.push_back(MBB);
> + ++NumCont;
> }
> - loopRep = loopRep->getParentLoop();
> }
>
> - numLoopPatternMatch += numLoop;
> + for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(),
> + E = ContMBB.end(); It != E; ++It) {
> + (*It)->removeSuccessor(LoopHeader);
> + }
>
> - return numLoop;
> -} //loopPatternMatch
> + numLoopcontPatternMatch += NumCont;
>
> -template<class PassT>
> -int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
> - BlockT *loopHeader) {
> - BlockTSmallerVector exitingBlks;
> - loopRep->getExitingBlocks(exitingBlks);
> + return NumCont;
> +}
>
> - DEBUG(
> - dbgs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
> - );
>
> - if (exitingBlks.size() == 0) {
> - setLoopLandBlock(loopRep);
> - return 0;
> +bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
> + MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
> + if (Src1MBB->succ_size() == 0) {
> + MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
> + if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
> + MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
> + if (TheEntry) {
> + DEBUG(
> + dbgs() << "isLoopContBreakBlock yes src1 = BB"
> + << Src1MBB->getNumber()
> + << " src2 = BB" << Src2MBB->getNumber() << "\n";
> + );
> + return true;
> + }
> + }
> }
> + return false;
> +}
>
> - // Compute the corresponding exitBlks and exit block set.
> - BlockTSmallerVector exitBlks;
> - std::set<BlockT *> exitBlkSet;
> - for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
> - iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
> - BlockT *exitingBlk = *iter;
> - BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
> - exitBlks.push_back(exitBlk);
> - exitBlkSet.insert(exitBlk); //non-duplicate insert
> +int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
> + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
> + int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
> + if (Num == 0) {
> + DEBUG(
> + dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
> + );
> + Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
> }
> + return Num;
> +}
>
> - assert(exitBlkSet.size() > 0);
> - assert(exitBlks.size() == exitingBlks.size());
> +int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
> + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
> + int Num = 0;
> + MachineBasicBlock *DownBlk;
> +
> + //trueBlk could be the common post dominator
> + DownBlk = TrueMBB;
>
> DEBUG(
> - dbgs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
> + dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
> + << " true = BB" << TrueMBB->getNumber()
> + << ", numSucc=" << TrueMBB->succ_size()
> + << " false = BB" << FalseMBB->getNumber() << "\n";
> );
>
> - // Find exitLandBlk.
> - BlockT *exitLandBlk = NULL;
> - int numCloned = 0;
> - int numSerial = 0;
> -
> - if (exitBlkSet.size() == 1) {
> - exitLandBlk = *exitBlkSet.begin();
> - } else {
> - exitLandBlk = findNearestCommonPostDom(exitBlkSet);
> -
> - if (exitLandBlk == NULL) {
> - return -1;
> - }
> -
> - bool allInPath = true;
> - bool allNotInPath = true;
> - for (typename std::set<BlockT*>::const_iterator
> - iter = exitBlkSet.begin(),
> - iterEnd = exitBlkSet.end();
> - iter != iterEnd; ++iter) {
> - BlockT *exitBlk = *iter;
> + while (DownBlk) {
> + DEBUG(
> + dbgs() << "check down = BB" << DownBlk->getNumber();
> + );
>
> - PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
> + if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
> DEBUG(
> - dbgs() << "BB" << exitBlk->getNumber()
> - << " to BB" << exitLandBlk->getNumber() << " PathToKind="
> - << pathKind << "\n";
> + dbgs() << " working\n";
> );
>
> - allInPath = allInPath && (pathKind == SinglePath_InPath);
> - allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
> -
> - if (!allInPath && !allNotInPath) {
> - DEBUG(
> - dbgs() << "singlePath check fail\n";
> - );
> - return -1;
> - }
> - } // check all exit blocks
> + Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
> + Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
>
> - if (allNotInPath) {
> -
> - // TODO: Simplify, maybe separate function?
> - LoopT *parentLoopRep = loopRep->getParentLoop();
> - BlockT *parentLoopHeader = NULL;
> - if (parentLoopRep)
> - parentLoopHeader = parentLoopRep->getHeader();
> -
> - if (exitLandBlk == parentLoopHeader &&
> - (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
> - loopRep,
> - exitBlkSet,
> - exitLandBlk)) != NULL) {
> - DEBUG(
> - dbgs() << "relocateLoopcontBlock success\n";
> - );
> - } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
> - exitingBlks,
> - exitBlks)) != NULL) {
> - DEBUG(
> - dbgs() << "insertEndbranchBlock success\n";
> - );
> - } else {
> - DEBUG(
> - dbgs() << "loop exit fail\n";
> - );
> - return -1;
> - }
> - }
> -
> - // Handle side entry to exit path.
> - exitBlks.clear();
> - exitBlkSet.clear();
> - for (typename BlockTSmallerVector::iterator iterExiting =
> - exitingBlks.begin(),
> - iterExitingEnd = exitingBlks.end();
> - iterExiting != iterExitingEnd; ++iterExiting) {
> - BlockT *exitingBlk = *iterExiting;
> - BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
> - BlockT *newExitBlk = exitBlk;
> -
> - if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
> - newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
> - ++numCloned;
> - }
> -
> - numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
> -
> - exitBlks.push_back(newExitBlk);
> - exitBlkSet.insert(newExitBlk);
> - }
> -
> - for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
> - iterExitEnd = exitBlks.end();
> - iterExit != iterExitEnd; ++iterExit) {
> - BlockT *exitBlk = *iterExit;
> - numSerial += serialPatternMatch(exitBlk);
> - }
> -
> - for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
> - iterExitEnd = exitBlks.end();
> - iterExit != iterExitEnd; ++iterExit) {
> - BlockT *exitBlk = *iterExit;
> - if (exitBlk->pred_size() > 1) {
> - if (exitBlk != exitLandBlk) {
> - return -1;
> - }
> - } else {
> - if (exitBlk != exitLandBlk &&
> - (exitBlk->succ_size() != 1 ||
> - *exitBlk->succ_begin() != exitLandBlk)) {
> - return -1;
> - }
> - }
> - }
> - } // else
> -
> - exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
> -
> - // Fold break into the breaking block. Leverage across level breaks.
> - assert(exitingBlks.size() == exitBlks.size());
> - for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
> - iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
> - iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
> - BlockT *exitBlk = *iterExit;
> - BlockT *exitingBlk = *iterExiting;
> - assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
> - LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
> - handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
> - }
> -
> - int numBreak = static_cast<int>(exitingBlks.size());
> - numLoopbreakPatternMatch += numBreak;
> - numClonedBlock += numCloned;
> - return numBreak + numSerial + numCloned;
> -} //loopbreakPatternMatch
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
> - BlockT *loopHeader) {
> - int numCont = 0;
> - SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
> - for (typename InvBlockGTraits::ChildIteratorType iter =
> - InvBlockGTraits::child_begin(loopHeader),
> - iterEnd = InvBlockGTraits::child_end(loopHeader);
> - iter != iterEnd; ++iter) {
> - BlockT *curBlk = *iter;
> - if (loopRep->contains(curBlk)) {
> - handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
> - loopHeader, loopRep);
> - contBlk.push_back(curBlk);
> - ++numCont;
> - }
> - }
> -
> - for (typename SmallVectorImpl<BlockT *>::iterator
> - iter = contBlk.begin(), iterEnd = contBlk.end();
> - iter != iterEnd; ++iter) {
> - (*iter)->removeSuccessor(loopHeader);
> - }
> -
> - numLoopcontPatternMatch += numCont;
> -
> - return numCont;
> -} //loopcontPatternMatch
> -
> -
> -template<class PassT>
> -bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
> - BlockT *src2Blk) {
> - // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
> - // same loop with LoopLandInfo without explicitly keeping track of
> - // loopContBlks and loopBreakBlks, this is a method to get the information.
> - //
> - if (src1Blk->succ_size() == 0) {
> - LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
> - if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
> - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> - if (theEntry != NULL) {
> - DEBUG(
> - dbgs() << "isLoopContBreakBlock yes src1 = BB"
> - << src1Blk->getNumber()
> - << " src2 = BB" << src2Blk->getNumber() << "\n";
> - );
> - return true;
> - }
> - }
> - }
> - return false;
> -} //isSameloopDetachedContbreak
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
> - BlockT *trueBlk,
> - BlockT *falseBlk) {
> - int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
> - if (num == 0) {
> - DEBUG(
> - dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
> - );
> - num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
> - }
> - return num;
> -}
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
> - BlockT *trueBlk,
> - BlockT *falseBlk) {
> - int num = 0;
> - BlockT *downBlk;
> -
> - //trueBlk could be the common post dominator
> - downBlk = trueBlk;
> -
> - DEBUG(
> - dbgs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
> - << " true = BB" << trueBlk->getNumber()
> - << ", numSucc=" << trueBlk->succ_size()
> - << " false = BB" << falseBlk->getNumber() << "\n";
> - );
> -
> - while (downBlk) {
> - DEBUG(
> - dbgs() << "check down = BB" << downBlk->getNumber();
> - );
> -
> - if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
> - DEBUG(
> - dbgs() << " working\n";
> - );
> -
> - num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
> - num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
> -
> - numClonedBlock += num;
> - num += serialPatternMatch(*headBlk->succ_begin());
> - num += serialPatternMatch(*(++headBlk->succ_begin()));
> - num += ifPatternMatch(headBlk);
> - assert(num > 0);
> + numClonedBlock += Num;
> + Num += serialPatternMatch(*HeadMBB->succ_begin());
> + Num += serialPatternMatch(*(++HeadMBB->succ_begin()));
> + Num += ifPatternMatch(HeadMBB);
> + assert(Num > 0);
>
> break;
> }
> DEBUG(
> dbgs() << " not working\n";
> );
> - downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
> + DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : NULL;
> } // walk down the postDomTree
>
> - return num;
> -} //handleJumpintoIf
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
> - BlockT *trueBlk,
> - BlockT *falseBlk,
> - BlockT *landBlk,
> - bool detail) {
> - dbgs() << "head = BB" << headBlk->getNumber()
> - << " size = " << headBlk->size();
> - if (detail) {
> + return Num;
> +}
> +
> +void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
> + MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
> + MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
> + dbgs() << "head = BB" << HeadMBB->getNumber()
> + << " size = " << HeadMBB->size();
> + if (Detail) {
> dbgs() << "\n";
> - headBlk->print(dbgs());
> + HeadMBB->print(dbgs());
> dbgs() << "\n";
> }
>
> - if (trueBlk) {
> - dbgs() << ", true = BB" << trueBlk->getNumber() << " size = "
> - << trueBlk->size() << " numPred = " << trueBlk->pred_size();
> - if (detail) {
> + if (TrueMBB) {
> + dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
> + << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
> + if (Detail) {
> dbgs() << "\n";
> - trueBlk->print(dbgs());
> + TrueMBB->print(dbgs());
> dbgs() << "\n";
> }
> }
> - if (falseBlk) {
> - dbgs() << ", false = BB" << falseBlk->getNumber() << " size = "
> - << falseBlk->size() << " numPred = " << falseBlk->pred_size();
> - if (detail) {
> + if (FalseMBB) {
> + dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
> + << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
> + if (Detail) {
> dbgs() << "\n";
> - falseBlk->print(dbgs());
> + FalseMBB->print(dbgs());
> dbgs() << "\n";
> }
> }
> - if (landBlk) {
> - dbgs() << ", land = BB" << landBlk->getNumber() << " size = "
> - << landBlk->size() << " numPred = " << landBlk->pred_size();
> - if (detail) {
> + if (LandMBB) {
> + dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
> + << LandMBB->size() << " numPred = " << LandMBB->pred_size();
> + if (Detail) {
> dbgs() << "\n";
> - landBlk->print(dbgs());
> + LandMBB->print(dbgs());
> dbgs() << "\n";
> }
> }
>
> dbgs() << "\n";
> -} //showImproveSimpleJumpintoIf
> +}
>
> -template<class PassT>
> -int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
> - BlockT *trueBlk,
> - BlockT *falseBlk,
> - BlockT **plandBlk) {
> - bool migrateTrue = false;
> - bool migrateFalse = false;
> +int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
> + MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
> + MachineBasicBlock **LandMBBPtr) {
> + bool MigrateTrue = false;
> + bool MigrateFalse = false;
>
> - BlockT *landBlk = *plandBlk;
> + MachineBasicBlock *LandBlk = *LandMBBPtr;
>
> - assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
> - && (falseBlk == NULL || falseBlk->succ_size() <= 1));
> + assert((!TrueMBB || TrueMBB->succ_size() <= 1)
> + && (!FalseMBB || FalseMBB->succ_size() <= 1));
>
> - if (trueBlk == falseBlk) {
> + if (TrueMBB == FalseMBB)
> return 0;
> - }
>
> - migrateTrue = needMigrateBlock(trueBlk);
> - migrateFalse = needMigrateBlock(falseBlk);
> + MigrateTrue = needMigrateBlock(TrueMBB);
> + MigrateFalse = needMigrateBlock(FalseMBB);
>
> - if (!migrateTrue && !migrateFalse) {
> + if (!MigrateTrue && !MigrateFalse)
> return 0;
> - }
>
> // If we need to migrate either trueBlk and falseBlk, migrate the rest that
> // have more than one predecessors. without doing this, its predecessor
> // rather than headBlk will have undefined value in initReg.
> - if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
> - migrateTrue = true;
> - }
> - if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
> - migrateFalse = true;
> - }
> + if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
> + MigrateTrue = true;
> + if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
> + MigrateFalse = true;
>
> DEBUG(
> dbgs() << "before improveSimpleJumpintoIf: ";
> - showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
> + showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
> );
>
> // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
> @@ -1193,205 +1348,142 @@ int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
> // add initReg = initVal to headBlk
>
> const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
> - unsigned initReg =
> - funcRep->getRegInfo().createVirtualRegister(I32RC);
> - if (!migrateTrue || !migrateFalse) {
> - int initVal = migrateTrue ? 0 : 1;
> - CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
> - }
> + unsigned InitReg =
> + HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
> + if (!MigrateTrue || !MigrateFalse)
> + llvm_unreachable("Extra register needed to handle CFG");
>
> - int numNewBlk = 0;
> + int NumNewBlk = 0;
>
> - if (landBlk == NULL) {
> - landBlk = funcRep->CreateMachineBasicBlock();
> - funcRep->push_back(landBlk); //insert to function
> + if (!LandBlk) {
> + LandBlk = HeadMBB->getParent()->CreateMachineBasicBlock();
> + HeadMBB->getParent()->push_back(LandBlk); //insert to function
>
> - if (trueBlk) {
> - trueBlk->addSuccessor(landBlk);
> + if (TrueMBB) {
> + TrueMBB->addSuccessor(LandBlk);
> } else {
> - headBlk->addSuccessor(landBlk);
> + HeadMBB->addSuccessor(LandBlk);
> }
>
> - if (falseBlk) {
> - falseBlk->addSuccessor(landBlk);
> + if (FalseMBB) {
> + FalseMBB->addSuccessor(LandBlk);
> } else {
> - headBlk->addSuccessor(landBlk);
> + HeadMBB->addSuccessor(LandBlk);
> }
>
> - numNewBlk ++;
> + NumNewBlk ++;
> }
>
> - bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
> + bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
>
> //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
> - typename BlockT::iterator insertPos =
> - CFGTraits::getInstrPos
> - (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
> -
> - if (landBlkHasOtherPred) {
> - unsigned immReg =
> - funcRep->getRegInfo().createVirtualRegister(I32RC);
> - CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
> - unsigned cmpResReg =
> - funcRep->getRegInfo().createVirtualRegister(I32RC);
> -
> - CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
> - initReg, immReg);
> - CFGTraits::insertCondBranchBefore(landBlk, insertPos,
> - AMDGPU::IF_PREDICATE_SET, passRep,
> - cmpResReg, DebugLoc());
> + MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
> +
> + if (LandBlkHasOtherPred) {
> + llvm_unreachable("Extra register needed to handle CFG");
> + unsigned CmpResReg =
> + HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
> + llvm_unreachable("Extra compare instruction needed to handle CFG");
> + insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
> + CmpResReg, DebugLoc());
> }
>
> - CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
> - passRep, initReg, DebugLoc());
> + insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
> + DebugLoc());
>
> - if (migrateTrue) {
> - migrateInstruction(trueBlk, landBlk, insertPos);
> + if (MigrateTrue) {
> + migrateInstruction(TrueMBB, LandBlk, I);
> // need to uncondionally insert the assignment to ensure a path from its
> // predecessor rather than headBlk has valid value in initReg if
> // (initVal != 1).
> - CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
> + llvm_unreachable("Extra register needed to handle CFG");
> }
> - CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
> + insertInstrBefore(I, AMDGPU::ELSE);
>
> - if (migrateFalse) {
> - migrateInstruction(falseBlk, landBlk, insertPos);
> + if (MigrateFalse) {
> + migrateInstruction(FalseMBB, LandBlk, I);
> // need to uncondionally insert the assignment to ensure a path from its
> // predecessor rather than headBlk has valid value in initReg if
> // (initVal != 0)
> - CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
> + llvm_unreachable("Extra register needed to handle CFG");
> }
>
> - if (landBlkHasOtherPred) {
> + if (LandBlkHasOtherPred) {
> // add endif
> - CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
> + insertInstrBefore(I, AMDGPU::ENDIF);
>
> // put initReg = 2 to other predecessors of landBlk
> - for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
> - predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
> - ++predIter) {
> - BlockT *curBlk = *predIter;
> - if (curBlk != trueBlk && curBlk != falseBlk) {
> - CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
> - }
> - } //for
> + for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
> + PE = LandBlk->pred_end(); PI != PE; ++PI) {
> + MachineBasicBlock *MBB = *PI;
> + if (MBB != TrueMBB && MBB != FalseMBB)
> + llvm_unreachable("Extra register needed to handle CFG");
> + }
> }
> DEBUG(
> dbgs() << "result from improveSimpleJumpintoIf: ";
> - showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
> + showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
> );
>
> // update landBlk
> - *plandBlk = landBlk;
> -
> - return numNewBlk;
> -} //improveSimpleJumpintoIf
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
> - LoopT *exitingLoop,
> - BlockT *exitBlk,
> - LoopT *exitLoop,
> - BlockT *landBlk) {
> - DEBUG(
> - dbgs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
> - << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
> - );
> - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
> -
> - RegiT initReg = INVALIDREGNUM;
> - if (exitingLoop != exitLoop) {
> - initReg = static_cast<int>
> - (funcRep->getRegInfo().createVirtualRegister(I32RC));
> - assert(initReg != INVALIDREGNUM);
> - addLoopBreakInitReg(exitLoop, initReg);
> - while (exitingLoop != exitLoop && exitingLoop) {
> - addLoopBreakOnReg(exitingLoop, initReg);
> - exitingLoop = exitingLoop->getParentLoop();
> - }
> - assert(exitingLoop == exitLoop);
> - }
> -
> - mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
> -
> -} //handleLoopbreak
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
> - LoopT *contingLoop,
> - BlockT *contBlk,
> - LoopT *contLoop) {
> - DEBUG(
> - dbgs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
> - << " header = BB" << contBlk->getNumber() << "\n";
> -
> - dbgs() << "Trying to continue loop-depth = "
> - << getLoopDepth(contLoop)
> - << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
> - );
> + *LandMBBPtr = LandBlk;
>
> - RegiT initReg = INVALIDREGNUM;
> - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
> - if (contingLoop != contLoop) {
> - initReg = static_cast<int>
> - (funcRep->getRegInfo().createVirtualRegister(I32RC));
> - assert(initReg != INVALIDREGNUM);
> - addLoopContInitReg(contLoop, initReg);
> - while (contingLoop && contingLoop->getParentLoop() != contLoop) {
> - addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg
> - contingLoop = contingLoop->getParentLoop();
> - }
> - assert(contingLoop && contingLoop->getParentLoop() == contLoop);
> - addLoopContOnReg(contingLoop, initReg);
> - }
> + return NumNewBlk;
> +}
>
> - settleLoopcontBlock(contingBlk, contBlk, initReg);
> -} //handleLoopcontBlock
> +void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB,
> + MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
> + MachineLoop *ContLoop) {
> + DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber()
> + << " header = BB" << ContMBB->getNumber() << "\n";
> + dbgs() << "Trying to continue loop-depth = "
> + << getLoopDepth(ContLoop)
> + << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";);
> + settleLoopcontBlock(ContingMBB, ContMBB);
> +}
>
> -template<class PassT>
> -void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
> +void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
> + MachineBasicBlock *SrcMBB) {
> DEBUG(
> - dbgs() << "serialPattern BB" << dstBlk->getNumber()
> - << " <= BB" << srcBlk->getNumber() << "\n";
> + dbgs() << "serialPattern BB" << DstMBB->getNumber()
> + << " <= BB" << SrcMBB->getNumber() << "\n";
> );
> - dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
> + DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
>
> - dstBlk->removeSuccessor(srcBlk);
> - CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
> + DstMBB->removeSuccessor(SrcMBB);
> + cloneSuccessorList(DstMBB, SrcMBB);
>
> - removeSuccessor(srcBlk);
> - retireBlock(dstBlk, srcBlk);
> -} //mergeSerialBlock
> + removeSuccessor(SrcMBB);
> + MLI->removeBlock(SrcMBB);
> + retireBlock(SrcMBB);
> +}
>
> -template<class PassT>
> -void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
> - BlockT *curBlk,
> - BlockT *trueBlk,
> - BlockT *falseBlk,
> - BlockT *landBlk) {
> +void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
> + MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
> + MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
> DEBUG(
> - dbgs() << "ifPattern BB" << curBlk->getNumber();
> + dbgs() << "ifPattern BB" << MBB->getNumber();
> dbgs() << "{ ";
> - if (trueBlk) {
> - dbgs() << "BB" << trueBlk->getNumber();
> + if (TrueMBB) {
> + dbgs() << "BB" << TrueMBB->getNumber();
> }
> dbgs() << " } else ";
> dbgs() << "{ ";
> - if (falseBlk) {
> - dbgs() << "BB" << falseBlk->getNumber();
> + if (FalseMBB) {
> + dbgs() << "BB" << FalseMBB->getNumber();
> }
> dbgs() << " }\n ";
> dbgs() << "landBlock: ";
> - if (landBlk == NULL) {
> + if (!LandMBB) {
> dbgs() << "NULL";
> } else {
> - dbgs() << "BB" << landBlk->getNumber();
> + dbgs() << "BB" << LandMBB->getNumber();
> }
> dbgs() << "\n";
> );
>
> - int oldOpcode = branchInstr->getOpcode();
> - DebugLoc branchDL = branchInstr->getDebugLoc();
> + int OldOpcode = BranchMI->getOpcode();
> + DebugLoc BranchDL = BranchMI->getDebugLoc();
>
> // transform to
> // if cond
> @@ -1401,1645 +1493,390 @@ void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
> // endif
> // landBlk
>
> - typename BlockT::iterator branchInstrPos =
> - CFGTraits::getInstrPos(curBlk, branchInstr);
> - CFGTraits::insertCondBranchBefore(branchInstrPos,
> - CFGTraits::getBranchNzeroOpcode(oldOpcode),
> - passRep,
> - branchDL);
> -
> - if (trueBlk) {
> - curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
> - curBlk->removeSuccessor(trueBlk);
> - if (landBlk && trueBlk->succ_size()!=0) {
> - trueBlk->removeSuccessor(landBlk);
> - }
> - retireBlock(curBlk, trueBlk);
> - }
> - CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
> -
> - if (falseBlk) {
> - curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
> - falseBlk->end());
> - curBlk->removeSuccessor(falseBlk);
> - if (landBlk && falseBlk->succ_size() != 0) {
> - falseBlk->removeSuccessor(landBlk);
> - }
> - retireBlock(curBlk, falseBlk);
> - }
> - CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
> + MachineBasicBlock::iterator I = BranchMI;
> + insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
> + BranchDL);
>
> - branchInstr->eraseFromParent();
> + if (TrueMBB) {
> + MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
> + MBB->removeSuccessor(TrueMBB);
> + if (LandMBB && TrueMBB->succ_size()!=0)
> + TrueMBB->removeSuccessor(LandMBB);
> + retireBlock(TrueMBB);
> + MLI->removeBlock(TrueMBB);
> + }
>
> - if (landBlk && trueBlk && falseBlk) {
> - curBlk->addSuccessor(landBlk);
> + if (FalseMBB) {
> + insertInstrBefore(I, AMDGPU::ELSE);
> + MBB->splice(I, FalseMBB, FalseMBB->begin(),
> + FalseMBB->end());
> + MBB->removeSuccessor(FalseMBB);
> + if (LandMBB && FalseMBB->succ_size() != 0)
> + FalseMBB->removeSuccessor(LandMBB);
> + retireBlock(FalseMBB);
> + MLI->removeBlock(FalseMBB);
> }
> + insertInstrBefore(I, AMDGPU::ENDIF);
>
> -} //mergeIfthenelseBlock
> + BranchMI->eraseFromParent();
>
> -template<class PassT>
> -void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
> - LoopLandInfo *loopLand) {
> - BlockT *landBlk = loopLand->landBlk;
> + if (LandMBB && TrueMBB && FalseMBB)
> + MBB->addSuccessor(LandMBB);
>
> - DEBUG(
> - dbgs() << "loopPattern header = BB" << dstBlk->getNumber()
> - << " land = BB" << landBlk->getNumber() << "\n";
> - );
> +}
>
> - // Loop contInitRegs are init at the beginning of the loop.
> - for (typename std::set<RegiT>::const_iterator iter =
> - loopLand->contInitRegs.begin(),
> - iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
> - CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
> - }
> +void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
> + MachineBasicBlock *LandMBB) {
> + DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
> + << " land = BB" << LandMBB->getNumber() << "\n";);
>
> /* we last inserterd the DebugLoc in the
> - * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
> + * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current
> + * dstBlk.
> * search for the DebugLoc in the that statement.
> * if not found, we have to insert the empty/default DebugLoc */
> - InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
> - DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
> -
> - CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
> - // Loop breakInitRegs are init before entering the loop.
> - for (typename std::set<RegiT>::const_iterator iter =
> - loopLand->breakInitRegs.begin(),
> - iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
> - CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
> - }
> - // Loop endbranchInitRegs are init before entering the loop.
> - for (typename std::set<RegiT>::const_iterator iter =
> - loopLand->endbranchInitRegs.begin(),
> - iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
> - CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
> - }
> + MachineInstr *LoopBreakInstr = getLoopBreakInstr(DstBlk);
> + DebugLoc DLBreak = (LoopBreakInstr) ? LoopBreakInstr->getDebugLoc() :
> + DebugLoc();
> +
> + insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DLBreak);
>
> - /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
> + /* we last inserterd the DebugLoc in the continue statement in the current
> + * dstBlk.
> * search for the DebugLoc in the continue statement.
> * if not found, we have to insert the empty/default DebugLoc */
> - InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
> - DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
> -
> - CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
> - // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
> - // loop.
> - for (typename std::set<RegiT>::const_iterator iter =
> - loopLand->breakOnRegs.begin(),
> - iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
> - CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
> - *iter);
> - }
> -
> - // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
> - // loop.
> - for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
> - iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
> - CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
> - passRep, *iter);
> - }
> -
> - dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
> -
> - for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
> - iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
> - dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of.
> - }
> + MachineInstr *ContinueInstr = getContinueInstr(DstBlk);
> + DebugLoc DLContinue = (ContinueInstr) ? ContinueInstr->getDebugLoc() :
> + DebugLoc();
>
> - removeSuccessor(landBlk);
> - retireBlock(dstBlk, landBlk);
> -} //mergeLooplandBlock
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
> - while (I--) {
> - if (I->getOpcode() == AMDGPU::PRED_X) {
> - switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
> - case OPCODE_IS_ZERO_INT:
> - static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
> - return;
> - case OPCODE_IS_NOT_ZERO_INT:
> - static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
> - return;
> - case OPCODE_IS_ZERO:
> - static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
> - return;
> - case OPCODE_IS_NOT_ZERO:
> - static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
> - return;
> - default:
> - llvm_unreachable("PRED_X Opcode invalid!");
> - }
> - }
> - }
> + insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DLContinue);
> + DstBlk->addSuccessor(LandMBB);
> + DstBlk->removeSuccessor(DstBlk);
> }
>
> -template<class PassT>
> -void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
> - BlockT *exitBlk,
> - BlockT *exitLandBlk,
> - RegiT setReg) {
> - DEBUG(
> - dbgs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
> - << " exit = BB" << exitBlk->getNumber()
> - << " land = BB" << exitLandBlk->getNumber() << "\n";
> - );
> -
> - InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
> - assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
> -
> - DebugLoc DL = branchInstr->getDebugLoc();
> -
> - BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
> -
> - // transform exitingBlk to
> - // if ( ) {
> - // exitBlk (if exitBlk != exitLandBlk)
> - // setReg = 1
> - // break
> - // }endif
> - // successor = {orgSuccessor(exitingBlk) - exitBlk}
> -
> - typename BlockT::iterator branchInstrPos =
> - CFGTraits::getInstrPos(exitingBlk, branchInstr);
> -
> - if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
> - //break_logical
> -
> - if (trueBranch != exitBlk) {
> - reversePredicateSetter(branchInstrPos);
> - }
> - CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
> - } else {
> - if (trueBranch != exitBlk) {
> - reversePredicateSetter(branchInstr);
> - }
> - CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
> - if (exitBlk != exitLandBlk) {
> - //splice is insert-before ...
> - exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
> - exitBlk->end());
> - }
> - if (setReg != INVALIDREGNUM) {
> - CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
> - }
> - CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
> - } //if_logical
>
> +void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
> + MachineBasicBlock *LandMBB) {
> + DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
> + << " land = BB" << LandMBB->getNumber() << "\n";);
> + MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
> + assert(BranchMI && isCondBranch(BranchMI));
> + DebugLoc DL = BranchMI->getDebugLoc();
> + MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
> + MachineBasicBlock::iterator I = BranchMI;
> + if (TrueBranch != LandMBB)
> + reversePredicateSetter(I);
> + insertCondBranchBefore(I, AMDGPU::PREDICATED_BREAK, DL);
> //now branchInst can be erase safely
> - branchInstr->eraseFromParent();
> -
> + BranchMI->eraseFromParent();
> //now take care of successors, retire blocks
> - exitingBlk->removeSuccessor(exitBlk);
> - if (exitBlk != exitLandBlk) {
> - //splice is insert-before ...
> - exitBlk->removeSuccessor(exitLandBlk);
> - retireBlock(exitingBlk, exitBlk);
> - }
> -
> -} //mergeLoopbreakBlock
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
> - BlockT *contBlk,
> - RegiT setReg) {
> - DEBUG(
> - dbgs() << "settleLoopcontBlock conting = BB"
> - << contingBlk->getNumber()
> - << ", cont = BB" << contBlk->getNumber() << "\n";
> - );
> -
> - InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
> - if (branchInstr) {
> - assert(CFGTraits::isCondBranch(branchInstr));
> - typename BlockT::iterator branchInstrPos =
> - CFGTraits::getInstrPos(contingBlk, branchInstr);
> - BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
> - int oldOpcode = branchInstr->getOpcode();
> - DebugLoc DL = branchInstr->getDebugLoc();
> -
> - // transform contingBlk to
> - // if () {
> - // move instr after branchInstr
> - // continue
> - // or
> - // setReg = 1
> - // break
> - // }endif
> - // successor = {orgSuccessor(contingBlk) - loopHeader}
> -
> - bool useContinueLogical =
> - (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
> -
> - if (useContinueLogical == false) {
> - int branchOpcode =
> - trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
> - : CFGTraits::getBranchZeroOpcode(oldOpcode);
> -
> - CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
> -
> - if (setReg != INVALIDREGNUM) {
> - CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
> - // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
> - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
> - } else {
> - // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
> - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
> - }
> + ExitingMBB->removeSuccessor(LandMBB);
> +}
>
> - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
> +void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
> + MachineBasicBlock *ContMBB) {
> + DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
> + << ContingMBB->getNumber()
> + << ", cont = BB" << ContMBB->getNumber() << "\n";);
> +
> + MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
> + if (MI) {
> + assert(isCondBranch(MI));
> + MachineBasicBlock::iterator I = MI;
> + MachineBasicBlock *TrueBranch = getTrueBranch(MI);
> + int OldOpcode = MI->getOpcode();
> + DebugLoc DL = MI->getDebugLoc();
> +
> + bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
> +
> + if (UseContinueLogical == false) {
> + int BranchOpcode =
> + TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
> + getBranchZeroOpcode(OldOpcode);
> + insertCondBranchBefore(I, BranchOpcode, DL);
> + // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
> + insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
> + insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
> } else {
> - int branchOpcode =
> - trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
> - : CFGTraits::getContinueZeroOpcode(oldOpcode);
> -
> - CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
> + int BranchOpcode =
> + TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
> + getContinueZeroOpcode(OldOpcode);
> + insertCondBranchBefore(I, BranchOpcode, DL);
> }
>
> - branchInstr->eraseFromParent();
> + MI->eraseFromParent();
> } else {
> // if we've arrived here then we've already erased the branch instruction
> - // travel back up the basic block to see the last reference of our debug location
> - // we've just inserted that reference here so it should be representative
> - if (setReg != INVALIDREGNUM) {
> - CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
> - // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
> - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
> - } else {
> - // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
> - CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
> - }
> - } //else
> -
> -} //settleLoopcontBlock
> -
> -// BBs in exitBlkSet are determined as in break-path for loopRep,
> -// before we can put code for BBs as inside loop-body for loopRep
> -// check whether those BBs are determined as cont-BB for parentLoopRep
> -// earlier.
> -// If so, generate a new BB newBlk
> -// (1) set newBlk common successor of BBs in exitBlkSet
> -// (2) change the continue-instr in BBs in exitBlkSet to break-instr
> -// (3) generate continue-instr in newBlk
> -//
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
> - LoopT *loopRep,
> - std::set<BlockT *> &exitBlkSet,
> - BlockT *exitLandBlk) {
> - std::set<BlockT *> endBlkSet;
> -
> -
> -
> - for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
> - iterEnd = exitBlkSet.end();
> - iter != iterEnd; ++iter) {
> - BlockT *exitBlk = *iter;
> - BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
> -
> - if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
> - return NULL;
> -
> - endBlkSet.insert(endBlk);
> - }
> -
> - BlockT *newBlk = funcRep->CreateMachineBasicBlock();
> - funcRep->push_back(newBlk); //insert to function
> - CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
> - SHOWNEWBLK(newBlk, "New continue block: ");
> -
> - for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
> - iterEnd = endBlkSet.end();
> - iter != iterEnd; ++iter) {
> - BlockT *endBlk = *iter;
> - InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
> - if (contInstr) {
> - contInstr->eraseFromParent();
> - }
> - endBlk->addSuccessor(newBlk);
> - DEBUG(
> - dbgs() << "Add new continue Block to BB"
> - << endBlk->getNumber() << " successors\n";
> - );
> - }
> -
> - return newBlk;
> -} //relocateLoopcontBlock
> -
> -
> -// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
> -// LoopLandBlock. This BB branch on the loop endBranchInit register to the
> -// pathes corresponding to the loop exiting branches.
> -
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
> - BlockTSmallerVector &exitingBlks,
> - BlockTSmallerVector &exitBlks) {
> - const AMDGPUInstrInfo *tii =
> - static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
> - const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
> -
> - RegiT endBranchReg = static_cast<int>
> - (funcRep->getRegInfo().createVirtualRegister(I32RC));
> - assert(endBranchReg >= 0);
> -
> - // reg = 0 before entering the loop
> - addLoopEndbranchInitReg(loopRep, endBranchReg);
> -
> - uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
> - assert(numBlks >=2 && numBlks == exitBlks.size());
> -
> - BlockT *preExitingBlk = exitingBlks[0];
> - BlockT *preExitBlk = exitBlks[0];
> - BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
> - funcRep->push_back(preBranchBlk); //insert to function
> - SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
> -
> - BlockT *newLandBlk = preBranchBlk;
> -
> - CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
> - newLandBlk);
> - preExitingBlk->removeSuccessor(preExitBlk);
> - preExitingBlk->addSuccessor(newLandBlk);
> -
> - //it is redundant to add reg = 0 to exitingBlks[0]
> -
> - // For 1..n th exiting path (the last iteration handles two pathes) create the
> - // branch to the previous path and the current path.
> - for (uint32_t i = 1; i < numBlks; ++i) {
> - BlockT *curExitingBlk = exitingBlks[i];
> - BlockT *curExitBlk = exitBlks[i];
> - BlockT *curBranchBlk;
> -
> - if (i == numBlks - 1) {
> - curBranchBlk = curExitBlk;
> - } else {
> - curBranchBlk = funcRep->CreateMachineBasicBlock();
> - funcRep->push_back(curBranchBlk); //insert to function
> - SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
> - }
> -
> - // Add reg = i to exitingBlks[i].
> - CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
> - endBranchReg, i);
> -
> - // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
> - // (exitingBlks[i], newLandBlk).
> - CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
> - newLandBlk);
> - curExitingBlk->removeSuccessor(curExitBlk);
> - curExitingBlk->addSuccessor(newLandBlk);
> -
> - // add to preBranchBlk the branch instruction:
> - // if (endBranchReg == preVal)
> - // preExitBlk
> - // else
> - // curBranchBlk
> - //
> - // preValReg = i - 1
> -
> - DebugLoc DL;
> - RegiT preValReg = static_cast<int>
> - (funcRep->getRegInfo().createVirtualRegister(I32RC));
> -
> - preBranchBlk->insert(preBranchBlk->begin(),
> - tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
> - i - 1));
> -
> - // condResReg = (endBranchReg == preValReg)
> - RegiT condResReg = static_cast<int>
> - (funcRep->getRegInfo().createVirtualRegister(I32RC));
> - BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
> - .addReg(endBranchReg).addReg(preValReg);
> -
> - BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
> - .addMBB(preExitBlk).addReg(condResReg);
> -
> - preBranchBlk->addSuccessor(preExitBlk);
> - preBranchBlk->addSuccessor(curBranchBlk);
> -
> - // Update preExitingBlk, preExitBlk, preBranchBlk.
> - preExitingBlk = curExitingBlk;
> - preExitBlk = curExitBlk;
> - preBranchBlk = curBranchBlk;
> -
> - } //end for 1 .. n blocks
> -
> - return newLandBlk;
> -} //addLoopEndbranchBlock
> -
> -template<class PassT>
> -typename CFGStructurizer<PassT>::PathToKind
> -CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
> - bool allowSideEntry) {
> - assert(dstBlk);
> -
> - if (srcBlk == dstBlk) {
> - return SinglePath_InPath;
> - }
> -
> - while (srcBlk && srcBlk->succ_size() == 1) {
> - srcBlk = *srcBlk->succ_begin();
> - if (srcBlk == dstBlk) {
> - return SinglePath_InPath;
> - }
> -
> - if (!allowSideEntry && srcBlk->pred_size() > 1) {
> - return Not_SinglePath;
> - }
> - }
> -
> - if (srcBlk && srcBlk->succ_size()==0) {
> - return SinglePath_NotInPath;
> - }
> -
> - return Not_SinglePath;
> -} //singlePathTo
> -
> -// If there is a single path from srcBlk to dstBlk, return the last block before
> -// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
> -// last block in the path Otherwise, return NULL
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
> - bool allowSideEntry) {
> - assert(dstBlk);
> -
> - if (srcBlk == dstBlk) {
> - return srcBlk;
> - }
> -
> - if (srcBlk->succ_size() == 0) {
> - return srcBlk;
> - }
> -
> - while (srcBlk && srcBlk->succ_size() == 1) {
> - BlockT *preBlk = srcBlk;
> -
> - srcBlk = *srcBlk->succ_begin();
> - if (srcBlk == NULL) {
> - return preBlk;
> - }
> -
> - if (!allowSideEntry && srcBlk->pred_size() > 1) {
> - return NULL;
> - }
> - }
> -
> - if (srcBlk && srcBlk->succ_size()==0) {
> - return srcBlk;
> + // travel back up the basic block to see the last reference of our debug
> + // location we've just inserted that reference here so it should be
> + // representative insertEnd to ensure phi-moves, if exist, go before the
> + // continue-instr.
> + insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
> + getLastDebugLocInBB(ContingMBB));
> }
> +}
>
> - return NULL;
> -
> -} //singlePathEnd
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
> - BlockT *dstBlk) {
> - int cloned = 0;
> - assert(preBlk->isSuccessor(srcBlk));
> - while (srcBlk && srcBlk != dstBlk) {
> - assert(srcBlk->succ_size() == 1);
> - if (srcBlk->pred_size() > 1) {
> - srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
> - ++cloned;
> +int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
> + MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
> + int Cloned = 0;
> + assert(PreMBB->isSuccessor(SrcMBB));
> + while (SrcMBB && SrcMBB != DstMBB) {
> + assert(SrcMBB->succ_size() == 1);
> + if (SrcMBB->pred_size() > 1) {
> + SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
> + ++Cloned;
> }
>
> - preBlk = srcBlk;
> - srcBlk = *srcBlk->succ_begin();
> + PreMBB = SrcMBB;
> + SrcMBB = *SrcMBB->succ_begin();
> }
>
> - return cloned;
> -} //cloneOnSideEntryTo
> + return Cloned;
> +}
>
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
> - BlockT *predBlk) {
> - assert(predBlk->isSuccessor(curBlk) &&
> +MachineBasicBlock *
> +AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
> + MachineBasicBlock *PredMBB) {
> + assert(PredMBB->isSuccessor(MBB) &&
> "succBlk is not a prececessor of curBlk");
>
> - BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions
> - CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
> + MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions
> + replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
> //srcBlk, oldBlk, newBlk
>
> - predBlk->removeSuccessor(curBlk);
> - predBlk->addSuccessor(cloneBlk);
> + PredMBB->removeSuccessor(MBB);
> + PredMBB->addSuccessor(CloneMBB);
>
> // add all successor to cloneBlk
> - CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
> + cloneSuccessorList(CloneMBB, MBB);
>
> - numClonedInstr += curBlk->size();
> + numClonedInstr += MBB->size();
>
> DEBUG(
> dbgs() << "Cloned block: " << "BB"
> - << curBlk->getNumber() << "size " << curBlk->size() << "\n";
> + << MBB->getNumber() << "size " << MBB->size() << "\n";
> );
>
> - SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
> -
> - return cloneBlk;
> -} //cloneBlockForPredecessor
> -
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
> - BlockT *exitingBlk) {
> - BlockT *exitBlk = NULL;
> -
> - for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
> - iterSuccEnd = exitingBlk->succ_end();
> - iterSucc != iterSuccEnd; ++iterSucc) {
> - BlockT *curBlk = *iterSucc;
> - if (!loopRep->contains(curBlk)) {
> - assert(exitBlk == NULL);
> - exitBlk = curBlk;
> - }
> - }
> -
> - assert(exitBlk != NULL);
> + SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
>
> - return exitBlk;
> -} //exitingBlock2ExitBlock
> + return CloneMBB;
> +}
>
> -template<class PassT>
> -void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
> - BlockT *dstBlk,
> - InstrIterator insertPos) {
> - InstrIterator spliceEnd;
> +void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
> + MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
> + MachineBasicBlock::iterator SpliceEnd;
> //look for the input branchinstr, not the AMDGPU branchinstr
> - InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
> - if (branchInstr == NULL) {
> + MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
> + if (!BranchMI) {
> DEBUG(
> dbgs() << "migrateInstruction don't see branch instr\n" ;
> );
> - spliceEnd = srcBlk->end();
> + SpliceEnd = SrcMBB->end();
> } else {
> DEBUG(
> dbgs() << "migrateInstruction see branch instr\n" ;
> - branchInstr->dump();
> + BranchMI->dump();
> );
> - spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
> + SpliceEnd = BranchMI;
> }
> DEBUG(
> - dbgs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
> - << "srcSize = " << srcBlk->size() << "\n";
> + dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
> + << "srcSize = " << SrcMBB->size() << "\n";
> );
>
> //splice insert before insertPos
> - dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
> + DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
>
> DEBUG(
> - dbgs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
> - << "srcSize = " << srcBlk->size() << "\n";
> + dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
> + << "srcSize = " << SrcMBB->size() << "\n";
> );
> -} //migrateInstruction
> +}
>
> -// normalizeInfiniteLoopExit change
> -// B1:
> -// uncond_br LoopHeader
> -//
> -// to
> -// B1:
> -// cond_br 1 LoopHeader dummyExit
> -// and return the newly added dummy exit block
> -//
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
> - BlockT *loopHeader;
> - BlockT *loopLatch;
> - loopHeader = LoopRep->getHeader();
> - loopLatch = LoopRep->getLoopLatch();
> - BlockT *dummyExitBlk = NULL;
> +MachineBasicBlock *
> +AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
> + MachineBasicBlock *LoopHeader = LoopRep->getHeader();
> + MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
> const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
> - if (loopHeader!=NULL && loopLatch!=NULL) {
> - InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
> - if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
> - dummyExitBlk = funcRep->CreateMachineBasicBlock();
> - funcRep->push_back(dummyExitBlk); //insert to function
> - SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
> -
> - DEBUG(dbgs() << "Old branch instr: " << *branchInstr << "\n";);
> -
> - typename BlockT::iterator insertPos =
> - CFGTraits::getInstrPos(loopLatch, branchInstr);
> - unsigned immReg =
> - funcRep->getRegInfo().createVirtualRegister(I32RC);
> - CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
> - InstrT *newInstr =
> - CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
> - MachineInstrBuilder MIB(*funcRep, newInstr);
> - MIB.addMBB(loopHeader);
> - MIB.addReg(immReg, false);
> -
> - SHOWNEWINSTR(newInstr);
> -
> - branchInstr->eraseFromParent();
> - loopLatch->addSuccessor(dummyExitBlk);
> - }
> - }
>
> - return dummyExitBlk;
> -} //normalizeInfiniteLoopExit
> + if (!LoopHeader || !LoopLatch)
> + return NULL;
> + MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
> + // Is LoopRep an infinite loop ?
> + if (!BranchMI || !isUncondBranch(BranchMI))
> + return NULL;
> +
> + MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
> + FuncRep->push_back(DummyExitBlk); //insert to function
> + SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
> + DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
> + MachineBasicBlock::iterator I = BranchMI;
> + unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC);
> + llvm_unreachable("Extra register needed to handle CFG");
> + MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32);
> + MachineInstrBuilder MIB(*FuncRep, NewMI);
> + MIB.addMBB(LoopHeader);
> + MIB.addReg(ImmReg, false);
> + SHOWNEWINSTR(NewMI);
> + BranchMI->eraseFromParent();
> + LoopLatch->addSuccessor(DummyExitBlk);
> +
> + return DummyExitBlk;
> +}
>
> -template<class PassT>
> -void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
> - InstrT *branchInstr;
> +void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
> + MachineInstr *BranchMI;
>
> // I saw two unconditional branch in one basic block in example
> // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
> - while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
> - && CFGTraits::isUncondBranch(branchInstr)) {
> - DEBUG(
> - dbgs() << "Removing unconditional branch instruction" ;
> - branchInstr->dump();
> - );
> - branchInstr->eraseFromParent();
> + while ((BranchMI = getLoopendBlockBranchInstr(MBB))
> + && isUncondBranch(BranchMI)) {
> + DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump(););
> + BranchMI->eraseFromParent();
> }
> -} //removeUnconditionalBranch
> +}
>
> -template<class PassT>
> -void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
> - if (srcBlk->succ_size() == 2) {
> - BlockT *blk1 = *srcBlk->succ_begin();
> - BlockT *blk2 = *(++srcBlk->succ_begin());
> +void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
> + MachineBasicBlock *MBB) {
> + if (MBB->succ_size() != 2)
> + return;
> + MachineBasicBlock *MBB1 = *MBB->succ_begin();
> + MachineBasicBlock *MBB2 = *(++MBB->succ_begin());
> + if (MBB1 != MBB2)
> + return;
> +
> + MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
> + assert(BranchMI && isCondBranch(BranchMI));
> + DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump(););
> + BranchMI->eraseFromParent();
> + SHOWNEWBLK(MBB1, "Removing redundant successor");
> + MBB->removeSuccessor(MBB1);
> +}
>
> - if (blk1 == blk2) {
> - InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
> - assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
> - DEBUG(
> - dbgs() << "Removing unneeded conditional branch instruction" ;
> - branchInstr->dump();
> - );
> - branchInstr->eraseFromParent();
> - SHOWNEWBLK(blk1, "Removing redundant successor");
> - srcBlk->removeSuccessor(blk1);
> - }
> - }
> -} //removeRedundantConditionalBranch
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
> - DEFAULT_VEC_SLOTS> &retBlks) {
> - BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
> - funcRep->push_back(dummyExitBlk); //insert to function
> - CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
> -
> - for (typename SmallVectorImpl<BlockT *>::iterator iter =
> - retBlks.begin(),
> - iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
> - BlockT *curBlk = *iter;
> - InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
> - if (curInstr) {
> - curInstr->eraseFromParent();
> - }
> - curBlk->addSuccessor(dummyExitBlk);
> +void AMDGPUCFGStructurizer::addDummyExitBlock(
> + SmallVector<MachineBasicBlock*, DEFAULT_VEC_SLOTS> &RetMBB) {
> + MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
> + FuncRep->push_back(DummyExitBlk); //insert to function
> + insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
> +
> + for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
> + E = RetMBB.end(); It != E; ++It) {
> + MachineBasicBlock *MBB = *It;
> + MachineInstr *MI = getReturnInstr(MBB);
> + if (MI)
> + MI->eraseFromParent();
> + MBB->addSuccessor(DummyExitBlk);
> DEBUG(
> - dbgs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
> + dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
> << " successors\n";
> );
> - } //for
> -
> - SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
> -} //addDummyExitBlock
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
> - while (srcBlk->succ_size()) {
> - srcBlk->removeSuccessor(*srcBlk->succ_begin());
> }
> + SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
> }
>
> -template<class PassT>
> -void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
> - BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
> -
> - if (srcBlkInfo == NULL) {
> - srcBlkInfo = new BlockInfo();
> - }
> -
> - srcBlkInfo->sccNum = sccNum;
> +void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
> + while (MBB->succ_size())
> + MBB->removeSuccessor(*MBB->succ_begin());
> }
>
> -template<class PassT>
> -int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
> - BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
> - return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
> +void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
> + int SccNum) {
> + BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
> + if (!srcBlkInfo)
> + srcBlkInfo = new BlockInformation();
> + srcBlkInfo->SccNum = SccNum;
> }
>
> -template<class PassT>
> -void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
> +void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
> DEBUG(
> - dbgs() << "Retiring BB" << srcBlk->getNumber() << "\n";
> + dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
> );
>
> - BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
> + BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
>
> - if (srcBlkInfo == NULL) {
> - srcBlkInfo = new BlockInfo();
> - }
> + if (!SrcBlkInfo)
> + SrcBlkInfo = new BlockInformation();
>
> - srcBlkInfo->isRetired = true;
> - assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
> + SrcBlkInfo->IsRetired = true;
> + assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
> && "can't retire block yet");
> }
>
> -template<class PassT>
> -bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
> - BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
> - return (srcBlkInfo && srcBlkInfo->isRetired);
> -}
> -
> -template<class PassT>
> -bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
> - LoopT *loopRep = loopInfo->getLoopFor(curBlk);
> - while (loopRep && loopRep->getHeader() == curBlk) {
> - LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
> -
> - if(loopLand == NULL)
> - return true;
> -
> - BlockT *landBlk = loopLand->landBlk;
> - assert(landBlk);
> - if (!isRetiredBlock(landBlk)) {
> - return true;
> - }
> -
> - loopRep = loopRep->getParentLoop();
> - }
> -
> - return false;
> -} //isActiveLoophead
> -
> -template<class PassT>
> -bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
> - const unsigned blockSizeThreshold = 30;
> - const unsigned cloneInstrThreshold = 100;
> -
> - bool multiplePreds = blk && (blk->pred_size() > 1);
> -
> - if(!multiplePreds)
> - return false;
> -
> - unsigned blkSize = blk->size();
> - return ((blkSize > blockSizeThreshold)
> - && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
> -} //needMigrateBlock
> -
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
> - BlockTSmallerVector &exitBlks,
> - std::set<BlockT *> &exitBlkSet) {
> - SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks
> -
> - for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
> - predIterEnd = landBlk->pred_end();
> - predIter != predIterEnd; ++predIter) {
> - BlockT *curBlk = *predIter;
> - if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
> - inpathBlks.push_back(curBlk);
> - }
> - } //for
> -
> - //if landBlk has predecessors that are not in the given loop,
> - //create a new block
> - BlockT *newLandBlk = landBlk;
> - if (inpathBlks.size() != landBlk->pred_size()) {
> - newLandBlk = funcRep->CreateMachineBasicBlock();
> - funcRep->push_back(newLandBlk); //insert to function
> - newLandBlk->addSuccessor(landBlk);
> - for (typename SmallVectorImpl<BlockT *>::iterator iter =
> - inpathBlks.begin(),
> - iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
> - BlockT *curBlk = *iter;
> - CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
> - //srcBlk, oldBlk, newBlk
> - curBlk->removeSuccessor(landBlk);
> - curBlk->addSuccessor(newLandBlk);
> - }
> - for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
> - if (exitBlks[i] == landBlk) {
> - exitBlks[i] = newLandBlk;
> - }
> - }
> - SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
> - }
> -
> - setLoopLandBlock(loopRep, newLandBlk);
> -
> - return newLandBlk;
> -} // recordLoopbreakLand
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
> - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> - if (theEntry == NULL) {
> - theEntry = new LoopLandInfo();
> +void AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep,
> + MachineBasicBlock *MBB) {
> + MachineBasicBlock *&TheEntry = LLInfoMap[loopRep];
> + if (!MBB) {
> + MBB = FuncRep->CreateMachineBasicBlock();
> + FuncRep->push_back(MBB); //insert to function
> + SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: ");
> }
> - assert(theEntry->landBlk == NULL);
> -
> - if (blk == NULL) {
> - blk = funcRep->CreateMachineBasicBlock();
> - funcRep->push_back(blk); //insert to function
> - SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
> - }
> -
> - theEntry->landBlk = blk;
> -
> + TheEntry = MBB;
> DEBUG(
> dbgs() << "setLoopLandBlock loop-header = BB"
> << loopRep->getHeader()->getNumber()
> - << " landing-block = BB" << blk->getNumber() << "\n";
> - );
> -} // setLoopLandBlock
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
> - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> - if (theEntry == NULL) {
> - theEntry = new LoopLandInfo();
> - }
> -
> - theEntry->breakOnRegs.insert(regNum);
> -
> - DEBUG(
> - dbgs() << "addLoopBreakOnReg loop-header = BB"
> - << loopRep->getHeader()->getNumber()
> - << " regNum = " << regNum << "\n";
> - );
> -} // addLoopBreakOnReg
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
> - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> - if (theEntry == NULL) {
> - theEntry = new LoopLandInfo();
> - }
> - theEntry->contOnRegs.insert(regNum);
> -
> - DEBUG(
> - dbgs() << "addLoopContOnReg loop-header = BB"
> - << loopRep->getHeader()->getNumber()
> - << " regNum = " << regNum << "\n";
> - );
> -} // addLoopContOnReg
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
> - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> - if (theEntry == NULL) {
> - theEntry = new LoopLandInfo();
> - }
> - theEntry->breakInitRegs.insert(regNum);
> -
> - DEBUG(
> - dbgs() << "addLoopBreakInitReg loop-header = BB"
> - << loopRep->getHeader()->getNumber()
> - << " regNum = " << regNum << "\n";
> + << " landing-block = BB" << MBB->getNumber() << "\n";
> );
> -} // addLoopBreakInitReg
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
> - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> - if (theEntry == NULL) {
> - theEntry = new LoopLandInfo();
> - }
> - theEntry->contInitRegs.insert(regNum);
> -
> - DEBUG(
> - dbgs() << "addLoopContInitReg loop-header = BB"
> - << loopRep->getHeader()->getNumber()
> - << " regNum = " << regNum << "\n";
> - );
> -} // addLoopContInitReg
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
> - RegiT regNum) {
> - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> - if (theEntry == NULL) {
> - theEntry = new LoopLandInfo();
> - }
> - theEntry->endbranchInitRegs.insert(regNum);
> -
> - DEBUG(
> - dbgs() << "addLoopEndbranchInitReg loop-header = BB"
> - << loopRep->getHeader()->getNumber()
> - << " regNum = " << regNum << "\n";
> - );
> -} // addLoopEndbranchInitReg
> -
> -template<class PassT>
> -typename CFGStructurizer<PassT>::LoopLandInfo *
> -CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
> - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> - return theEntry;
> -} // getLoopLandInfo
> -
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
> - LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> - return theEntry ? theEntry->landBlk : NULL;
> -} // getLoopLandBlock
> -
> -
> -template<class PassT>
> -bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
> - LoopT *loopRep = loopInfo->getLoopFor(curBlk);
> - if (loopRep == NULL)
> - return false;
> -
> - BlockT *loopHeader = loopRep->getHeader();
> -
> - return curBlk->isSuccessor(loopHeader);
> -
> -} //hasBackEdge
> -
> -template<class PassT>
> -unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
> - return loopRep ? loopRep->getLoopDepth() : 0;
> -} //getLoopDepth
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::countActiveBlock
> -(typename SmallVectorImpl<BlockT *>::const_iterator iterStart,
> - typename SmallVectorImpl<BlockT *>::const_iterator iterEnd) {
> - int count = 0;
> - while (iterStart != iterEnd) {
> - if (!isRetiredBlock(*iterStart)) {
> - ++count;
> - }
> - ++iterStart;
> - }
> -
> - return count;
> -} //countActiveBlock
> -
> -// This is work around solution for findNearestCommonDominator not avaiable to
> -// post dom a proper fix should go to Dominators.h.
> +}
>
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT*
> -CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
> +MachineBasicBlock *
> +AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
> + MachineBasicBlock *MBB2) {
>
> - if (postDomTree->dominates(blk1, blk2)) {
> - return blk1;
> - }
> - if (postDomTree->dominates(blk2, blk1)) {
> - return blk2;
> - }
> + if (PDT->dominates(MBB1, MBB2))
> + return MBB1;
> + if (PDT->dominates(MBB2, MBB1))
> + return MBB2;
>
> - DomTreeNodeT *node1 = postDomTree->getNode(blk1);
> - DomTreeNodeT *node2 = postDomTree->getNode(blk2);
> + MachineDomTreeNode *Node1 = PDT->getNode(MBB1);
> + MachineDomTreeNode *Node2 = PDT->getNode(MBB2);
>
> // Handle newly cloned node.
> - if (node1 == NULL && blk1->succ_size() == 1) {
> - return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
> - }
> - if (node2 == NULL && blk2->succ_size() == 1) {
> - return findNearestCommonPostDom(blk1, *blk2->succ_begin());
> - }
> + if (!Node1 && MBB1->succ_size() == 1)
> + return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2);
> + if (!Node2 && MBB2->succ_size() == 1)
> + return findNearestCommonPostDom(MBB1, *MBB2->succ_begin());
>
> - if (node1 == NULL || node2 == NULL) {
> + if (!Node1 || !Node2)
> return NULL;
> - }
>
> - node1 = node1->getIDom();
> - while (node1) {
> - if (postDomTree->dominates(node1, node2)) {
> - return node1->getBlock();
> - }
> - node1 = node1->getIDom();
> + Node1 = Node1->getIDom();
> + while (Node1) {
> + if (PDT->dominates(Node1, Node2))
> + return Node1->getBlock();
> + Node1 = Node1->getIDom();
> }
>
> return NULL;
> }
>
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::findNearestCommonPostDom
> -(typename std::set<BlockT *> &blks) {
> - BlockT *commonDom;
> - typename std::set<BlockT *>::const_iterator iter = blks.begin();
> - typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
> - for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
> - BlockT *curBlk = *iter;
> - if (curBlk != commonDom) {
> - commonDom = findNearestCommonPostDom(curBlk, commonDom);
> - }
> +MachineBasicBlock *
> +AMDGPUCFGStructurizer::findNearestCommonPostDom(
> + std::set<MachineBasicBlock *> &MBBs) {
> + MachineBasicBlock *CommonDom;
> + std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin();
> + std::set<MachineBasicBlock *>::const_iterator E = MBBs.end();
> + for (CommonDom = *It; It != E && CommonDom; ++It) {
> + MachineBasicBlock *MBB = *It;
> + if (MBB != CommonDom)
> + CommonDom = findNearestCommonPostDom(MBB, CommonDom);
> }
>
> DEBUG(
> dbgs() << "Common post dominator for exit blocks is ";
> - if (commonDom) {
> - dbgs() << "BB" << commonDom->getNumber() << "\n";
> - } else {
> + if (CommonDom)
> + dbgs() << "BB" << CommonDom->getNumber() << "\n";
> + else
> dbgs() << "NULL\n";
> - }
> );
>
> - return commonDom;
> -} //findNearestCommonPostDom
> -
> -} // end anonymous namespace
> -
> -//todo: move-end
> -
> -
> -//===----------------------------------------------------------------------===//
> -//
> -// CFGStructurizer for AMDGPU
> -//
> -//===----------------------------------------------------------------------===//
> -
> -
> -namespace {
> -class AMDGPUCFGStructurizer : public MachineFunctionPass {
> -public:
> - typedef MachineInstr InstructionType;
> - typedef MachineFunction FunctionType;
> - typedef MachineBasicBlock BlockType;
> - typedef MachineLoopInfo LoopinfoType;
> - typedef MachineDominatorTree DominatortreeType;
> - typedef MachinePostDominatorTree PostDominatortreeType;
> - typedef MachineDomTreeNode DomTreeNodeType;
> - typedef MachineLoop LoopType;
> -
> -protected:
> - TargetMachine &TM;
> -
> -public:
> - AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
> - const TargetInstrInfo *getTargetInstrInfo() const;
> - const AMDGPURegisterInfo *getTargetRegisterInfo() const;
> -};
> -
> -} // end anonymous namespace
> -AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
> - : MachineFunctionPass(pid), TM(tm) {
> -}
> -
> -const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
> - return TM.getInstrInfo();
> -}
> -
> -const AMDGPURegisterInfo *AMDGPUCFGStructurizer::getTargetRegisterInfo() const {
> - return static_cast<const AMDGPURegisterInfo *>(TM.getRegisterInfo());
> -}
> -
> -//===----------------------------------------------------------------------===//
> -//
> -// CFGPrepare
> -//
> -//===----------------------------------------------------------------------===//
> -
> -
> -namespace {
> -class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
> -public:
> - static char ID;
> -
> -public:
> - AMDGPUCFGPrepare(TargetMachine &tm);
> -
> - virtual const char *getPassName() const;
> - virtual void getAnalysisUsage(AnalysisUsage &AU) const;
> -
> - bool runOnMachineFunction(MachineFunction &F);
> -};
> -
> -char AMDGPUCFGPrepare::ID = 0;
> -} // end anonymous namespace
> -
> -AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
> - : AMDGPUCFGStructurizer(ID, tm ) {
> + return CommonDom;
> }
> -const char *AMDGPUCFGPrepare::getPassName() const {
> - return "AMD IL Control Flow Graph Preparation Pass";
> -}
> -
> -void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
> - AU.addPreserved<MachineFunctionAnalysis>();
> - AU.addRequired<MachineFunctionAnalysis>();
> - AU.addRequired<MachineDominatorTree>();
> - AU.addRequired<MachinePostDominatorTree>();
> - AU.addRequired<MachineLoopInfo>();
> -}
> -
> -//===----------------------------------------------------------------------===//
> -//
> -// CFGPerform
> -//
> -//===----------------------------------------------------------------------===//
> -
> -
> -namespace {
> -class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
> -public:
> - static char ID;
> -
> -public:
> - AMDGPUCFGPerform(TargetMachine &tm);
> - virtual const char *getPassName() const;
> - virtual void getAnalysisUsage(AnalysisUsage &AU) const;
> - bool runOnMachineFunction(MachineFunction &F);
> -};
> -
> -char AMDGPUCFGPerform::ID = 0;
> -} // end anonymous namespace
> -
> - AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
> -: AMDGPUCFGStructurizer(ID, tm) {
> -}
> -
> -const char *AMDGPUCFGPerform::getPassName() const {
> - return "AMD IL Control Flow Graph structurizer Pass";
> -}
> -
> -void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
> - AU.addPreserved<MachineFunctionAnalysis>();
> - AU.addRequired<MachineFunctionAnalysis>();
> - AU.addRequired<MachineDominatorTree>();
> - AU.addRequired<MachinePostDominatorTree>();
> - AU.addRequired<MachineLoopInfo>();
> -}
> -
> -//===----------------------------------------------------------------------===//
> -//
> -// CFGStructTraits<AMDGPUCFGStructurizer>
> -//
> -//===----------------------------------------------------------------------===//
> -
> -namespace {
> -// this class is tailor to the AMDGPU backend
> -template<>
> -struct CFGStructTraits<AMDGPUCFGStructurizer> {
> - typedef int RegiT;
> -
> - static int getBranchNzeroOpcode(int oldOpcode) {
> - switch(oldOpcode) {
> - case AMDGPU::JUMP_COND:
> - case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
> - case AMDGPU::BRANCH_COND_i32:
> - case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
> - default:
> - llvm_unreachable("internal error");
> - }
> - return -1;
> - }
> -
> - static int getBranchZeroOpcode(int oldOpcode) {
> - switch(oldOpcode) {
> - case AMDGPU::JUMP_COND:
> - case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
> - case AMDGPU::BRANCH_COND_i32:
> - case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
> - default:
> - llvm_unreachable("internal error");
> - }
> - return -1;
> - }
> -
> - static int getContinueNzeroOpcode(int oldOpcode) {
> - switch(oldOpcode) {
> - case AMDGPU::JUMP_COND:
> - case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
> - default:
> - llvm_unreachable("internal error");
> - };
> - return -1;
> - }
> -
> - static int getContinueZeroOpcode(int oldOpcode) {
> - switch(oldOpcode) {
> - case AMDGPU::JUMP_COND:
> - case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
> - default:
> - llvm_unreachable("internal error");
> - }
> - return -1;
> - }
> -
> - static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
> - return instr->getOperand(0).getMBB();
> - }
> -
> - static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
> - instr->getOperand(0).setMBB(blk);
> - }
> -
> - static MachineBasicBlock *
> - getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
> - assert(blk->succ_size() == 2);
> - MachineBasicBlock *trueBranch = getTrueBranch(instr);
> - MachineBasicBlock::succ_iterator iter = blk->succ_begin();
> - MachineBasicBlock::succ_iterator iterNext = iter;
> - ++iterNext;
> -
> - return (*iter == trueBranch) ? *iterNext : *iter;
> - }
> -
> - static bool isCondBranch(MachineInstr *instr) {
> - switch (instr->getOpcode()) {
> - case AMDGPU::JUMP_COND:
> - case AMDGPU::BRANCH_COND_i32:
> - case AMDGPU::BRANCH_COND_f32:
> - break;
> - default:
> - return false;
> - }
> - return true;
> - }
> -
> - static bool isUncondBranch(MachineInstr *instr) {
> - switch (instr->getOpcode()) {
> - case AMDGPU::JUMP:
> - case AMDGPU::BRANCH:
> - return true;
> - default:
> - return false;
> - }
> - return true;
> - }
> -
> - static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
> - //get DebugLoc from the first MachineBasicBlock instruction with debug info
> - DebugLoc DL;
> - for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
> - MachineInstr *instr = &(*iter);
> - if (instr->getDebugLoc().isUnknown() == false) {
> - DL = instr->getDebugLoc();
> - }
> - }
> - return DL;
> - }
> -
> - static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
> - MachineBasicBlock::reverse_iterator iter = blk->rbegin();
> - MachineInstr *instr = &*iter;
> - if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
> - return instr;
> - }
> - return NULL;
> - }
> -
> - // The correct naming for this is getPossibleLoopendBlockBranchInstr.
> - //
> - // BB with backward-edge could have move instructions after the branch
> - // instruction. Such move instruction "belong to" the loop backward-edge.
> - //
> - static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
> - const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
> - blk->getParent()->getTarget().getInstrInfo());
> -
> - for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
> - iterEnd = blk->rend(); iter != iterEnd; ++iter) {
> - // FIXME: Simplify
> - MachineInstr *instr = &*iter;
> - if (instr) {
> - if (isCondBranch(instr) || isUncondBranch(instr)) {
> - return instr;
> - } else if (!TII->isMov(instr->getOpcode())) {
> - break;
> - }
> - }
> - }
> - return NULL;
> - }
> -
> - static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
> - MachineBasicBlock::reverse_iterator iter = blk->rbegin();
> - if (iter != blk->rend()) {
> - MachineInstr *instr = &(*iter);
> - if (instr->getOpcode() == AMDGPU::RETURN) {
> - return instr;
> - }
> - }
> - return NULL;
> - }
> -
> - static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
> - MachineBasicBlock::reverse_iterator iter = blk->rbegin();
> - if (iter != blk->rend()) {
> - MachineInstr *instr = &(*iter);
> - if (instr->getOpcode() == AMDGPU::CONTINUE) {
> - return instr;
> - }
> - }
> - return NULL;
> - }
> -
> - static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
> - for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
> - MachineInstr *instr = &(*iter);
> - if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
> - return instr;
> - }
> - }
> - return NULL;
> - }
> -
> - static bool isReturnBlock(MachineBasicBlock *blk) {
> - MachineInstr *instr = getReturnInstr(blk);
> - bool isReturn = (blk->succ_size() == 0);
> - if (instr) {
> - assert(isReturn);
> - } else if (isReturn) {
> - DEBUG(
> - dbgs() << "BB" << blk->getNumber()
> - <<" is return block without RETURN instr\n";
> - );
> - }
> -
> - return isReturn;
> - }
> -
> - static MachineBasicBlock::iterator
> - getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
> - assert(instr->getParent() == blk && "instruction doesn't belong to block");
> - MachineBasicBlock::iterator iter = blk->begin();
> - MachineBasicBlock::iterator iterEnd = blk->end();
> - while (&(*iter) != instr && iter != iterEnd) {
> - ++iter;
> - }
> -
> - assert(iter != iterEnd);
> - return iter;
> - }//getInstrPos
> -
> - static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
> - AMDGPUCFGStructurizer *passRep) {
> - return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
> - } //insertInstrBefore
> -
> - static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
> - AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
> - const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
> - MachineInstr *newInstr =
> - blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
> -
> - MachineBasicBlock::iterator res;
> - if (blk->begin() != blk->end()) {
> - blk->insert(blk->begin(), newInstr);
> - } else {
> - blk->push_back(newInstr);
> - }
> -
> - SHOWNEWINSTR(newInstr);
> -
> - return newInstr;
> - } //insertInstrBefore
> -
> - static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
> - AMDGPUCFGStructurizer *passRep) {
> - insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
> - } //insertInstrEnd
> -
> - static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
> - AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
> - const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
> - MachineInstr *newInstr = blk->getParent()
> - ->CreateMachineInstr(tii->get(newOpcode), DL);
> -
> - blk->push_back(newInstr);
> - //assume the instruction doesn't take any reg operand ...
> -
> - SHOWNEWINSTR(newInstr);
> - } //insertInstrEnd
> -
> - static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
> - int newOpcode,
> - AMDGPUCFGStructurizer *passRep) {
> - MachineInstr *oldInstr = &(*instrPos);
> - const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
> - MachineBasicBlock *blk = oldInstr->getParent();
> - MachineInstr *newInstr =
> - blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
> - DebugLoc());
> -
> - blk->insert(instrPos, newInstr);
> - //assume the instruction doesn't take any reg operand ...
> -
> - SHOWNEWINSTR(newInstr);
> - return newInstr;
> - } //insertInstrBefore
> -
> - static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
> - int newOpcode,
> - AMDGPUCFGStructurizer *passRep,
> - DebugLoc DL) {
> - MachineInstr *oldInstr = &(*instrPos);
> - const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
> - MachineBasicBlock *blk = oldInstr->getParent();
> - MachineFunction *MF = blk->getParent();
> - MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
> -
> - blk->insert(instrPos, newInstr);
> - MachineInstrBuilder MIB(*MF, newInstr);
> - MIB.addReg(oldInstr->getOperand(1).getReg(), false);
> -
> - SHOWNEWINSTR(newInstr);
> - //erase later oldInstr->eraseFromParent();
> - } //insertCondBranchBefore
> -
> - static void insertCondBranchBefore(MachineBasicBlock *blk,
> - MachineBasicBlock::iterator insertPos,
> - int newOpcode,
> - AMDGPUCFGStructurizer *passRep,
> - RegiT regNum,
> - DebugLoc DL) {
> - const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
> - MachineFunction *MF = blk->getParent();
> -
> - MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
> -
> - //insert before
> - blk->insert(insertPos, newInstr);
> - MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
> -
> - SHOWNEWINSTR(newInstr);
> - } //insertCondBranchBefore
> -
> - static void insertCondBranchEnd(MachineBasicBlock *blk,
> - int newOpcode,
> - AMDGPUCFGStructurizer *passRep,
> - RegiT regNum) {
> - const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
> - MachineFunction *MF = blk->getParent();
> - MachineInstr *newInstr =
> - MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
> -
> - blk->push_back(newInstr);
> - MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
> -
> - SHOWNEWINSTR(newInstr);
> - } //insertCondBranchEnd
> -
> -
> - static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
> - AMDGPUCFGStructurizer *passRep,
> - RegiT regNum, int regVal) {
> - MachineInstr *oldInstr = &(*instrPos);
> - const AMDGPUInstrInfo *tii =
> - static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
> - MachineBasicBlock *blk = oldInstr->getParent();
> - MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
> - regVal);
> - blk->insert(instrPos, newInstr);
> -
> - SHOWNEWINSTR(newInstr);
> - } //insertAssignInstrBefore
> -
> - static void insertAssignInstrBefore(MachineBasicBlock *blk,
> - AMDGPUCFGStructurizer *passRep,
> - RegiT regNum, int regVal) {
> - const AMDGPUInstrInfo *tii =
> - static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
> -
> - MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
> - regVal);
> - if (blk->begin() != blk->end()) {
> - blk->insert(blk->begin(), newInstr);
> - } else {
> - blk->push_back(newInstr);
> - }
> -
> - SHOWNEWINSTR(newInstr);
> -
> - } //insertInstrBefore
> -
> - static void insertCompareInstrBefore(MachineBasicBlock *blk,
> - MachineBasicBlock::iterator instrPos,
> - AMDGPUCFGStructurizer *passRep,
> - RegiT dstReg, RegiT src1Reg,
> - RegiT src2Reg) {
> - const AMDGPUInstrInfo *tii =
> - static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
> - MachineFunction *MF = blk->getParent();
> - MachineInstr *newInstr =
> - MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
> -
> - MachineInstrBuilder MIB(*MF, newInstr);
> - MIB.addReg(dstReg, RegState::Define); //set target
> - MIB.addReg(src1Reg); //set src value
> - MIB.addReg(src2Reg); //set src value
> -
> - blk->insert(instrPos, newInstr);
> - SHOWNEWINSTR(newInstr);
>
> - } //insertCompareInstrBefore
> +char AMDGPUCFGStructurizer::ID = 0;
>
> - static void cloneSuccessorList(MachineBasicBlock *dstBlk,
> - MachineBasicBlock *srcBlk) {
> - for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
> - iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
> - dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of
> - }
> - } //cloneSuccessorList
> -
> - static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
> - MachineFunction *func = srcBlk->getParent();
> - MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
> - func->push_back(newBlk); //insert to function
> - for (MachineBasicBlock::iterator iter = srcBlk->begin(),
> - iterEnd = srcBlk->end();
> - iter != iterEnd; ++iter) {
> - MachineInstr *instr = func->CloneMachineInstr(iter);
> - newBlk->push_back(instr);
> - }
> - return newBlk;
> - }
> -
> - //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
> - //the AMDGPU instruction is not recognized as terminator fix this and retire
> - //this routine
> - static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
> - MachineBasicBlock *oldBlk,
> - MachineBasicBlock *newBlk) {
> - MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
> - if (branchInstr && isCondBranch(branchInstr) &&
> - getTrueBranch(branchInstr) == oldBlk) {
> - setTrueBranch(branchInstr, newBlk);
> - }
> - }
> -
> - static void wrapup(MachineBasicBlock *entryBlk) {
> - assert((!entryBlk->getParent()->getJumpTableInfo()
> - || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
> - && "found a jump table");
> -
> - //collect continue right before endloop
> - SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
> - MachineBasicBlock::iterator pre = entryBlk->begin();
> - MachineBasicBlock::iterator iterEnd = entryBlk->end();
> - MachineBasicBlock::iterator iter = pre;
> - while (iter != iterEnd) {
> - if (pre->getOpcode() == AMDGPU::CONTINUE
> - && iter->getOpcode() == AMDGPU::ENDLOOP) {
> - contInstr.push_back(pre);
> - }
> - pre = iter;
> - ++iter;
> - } //end while
> -
> - //delete continue right before endloop
> - for (unsigned i = 0; i < contInstr.size(); ++i) {
> - contInstr[i]->eraseFromParent();
> - }
> -
> - // TODO to fix up jump table so later phase won't be confused. if
> - // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
> - // there isn't such an interface yet. alternatively, replace all the other
> - // blocks in the jump table with the entryBlk //}
> -
> - } //wrapup
> -
> - static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
> - return &pass.getAnalysis<MachineDominatorTree>();
> - }
> -
> - static MachinePostDominatorTree*
> - getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
> - return &pass.getAnalysis<MachinePostDominatorTree>();
> - }
> -
> - static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
> - return &pass.getAnalysis<MachineLoopInfo>();
> - }
> -}; // template class CFGStructTraits
> } // end anonymous namespace
>
> -// createAMDGPUCFGPreparationPass- Returns a pass
> -FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm) {
> - return new AMDGPUCFGPrepare(tm);
> -}
> -
> -bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
> - return CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func, *this,
> - getTargetRegisterInfo());
> -}
>
> -// createAMDGPUCFGStructurizerPass- Returns a pass
> FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm) {
> - return new AMDGPUCFGPerform(tm);
> -}
> -
> -bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
> - return CFGStructurizer<AMDGPUCFGStructurizer>().run(func, *this,
> - getTargetRegisterInfo());
> + return new AMDGPUCFGStructurizer(tm);
> }
> --
> 1.8.3.1
>
> From b793a4dd4e34341b381a0b32a1de43d22aebb227 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Fri, 19 Jul 2013 18:02:21 +0200
> Subject: [PATCH 3/3] R600: Don't emit empty then clause and use alu_pop_after
>
> ---
> lib/Target/R600/AMDILCFGStructurizer.cpp | 8 +-
> lib/Target/R600/R600ControlFlowFinalizer.cpp | 48 +++++++++--
> lib/Target/R600/R600Instructions.td | 1 +
> test/CodeGen/R600/jump-address.ll | 2 +-
> test/CodeGen/R600/loop-address.ll | 9 +-
> test/CodeGen/R600/r600cfg.ll | 124 +++++++++++++++++++++++++++
> 6 files changed, 175 insertions(+), 17 deletions(-)
> create mode 100644 test/CodeGen/R600/r600cfg.ll
>
> diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
> index 6ace97a..aabeeca 100644
> --- a/lib/Target/R600/AMDILCFGStructurizer.cpp
> +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
> @@ -1044,8 +1044,11 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
> } else if (FalseMBB->succ_size() == 1
> && *FalseMBB->succ_begin() == TrueMBB) {
> // Triangle pattern, true is empty
> - LandBlk = TrueMBB;
> - TrueMBB = NULL;
> + // We reverse the predicate to make a triangle, empty false pattern;
> + std::swap(TrueMBB, FalseMBB);
> + reversePredicateSetter(MBB->end());
> + LandBlk = FalseMBB;
> + FalseMBB = NULL;
> } else if (FalseMBB->succ_size() == 1
> && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
> LandBlk = *FalseMBB->succ_begin();
> @@ -1461,6 +1464,7 @@ void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
> void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
> MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
> MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
> + assert (TrueMBB);
> DEBUG(
> dbgs() << "ifPattern BB" << MBB->getNumber();
> dbgs() << "{ ";
> diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> index 932a6a7..40cd2c2 100644
> --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
> +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> @@ -347,6 +347,9 @@ public:
> MaxStack = 1;
> }
> std::vector<ClauseFile> FetchClauses, AluClauses;
> + std::vector<MachineInstr *> LastAlu(1);
> + std::vector<MachineInstr *> ToPopAfter;
> +
> for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
> I != E;) {
> if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
> @@ -357,6 +360,10 @@ public:
> }
>
> MachineBasicBlock::iterator MI = I;
> + if (MI->getOpcode() != AMDGPU::ENDIF)
> + LastAlu.back() = 0;
> + if (MI->getOpcode() == AMDGPU::CF_ALU)
> + LastAlu.back() = MI;
> I++;
> switch (MI->getOpcode()) {
> case AMDGPU::CF_ALU_PUSH_BEFORE:
> @@ -403,6 +410,7 @@ public:
> break;
> }
> case AMDGPU::IF_PREDICATE_SET: {
> + LastAlu.push_back(0);
> MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> getHWInstrDesc(CF_JUMP))
> .addImm(0)
> @@ -420,7 +428,7 @@ public:
> MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> getHWInstrDesc(CF_ELSE))
> .addImm(0)
> - .addImm(1);
> + .addImm(0);
> DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
> IfThenElseStack.push_back(MIb);
> MI->eraseFromParent();
> @@ -429,17 +437,24 @@ public:
> }
> case AMDGPU::ENDIF: {
> CurrentStack--;
> + if (LastAlu.back()) {
> + ToPopAfter.push_back(LastAlu.back());
> + } else {
> + MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> + getHWInstrDesc(CF_POP))
> + .addImm(CfCount + 1)
> + .addImm(1);
> + (void)MIb;
> + DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
> + CfCount++;
> + }
> +
> MachineInstr *IfOrElseInst = IfThenElseStack.back();
> IfThenElseStack.pop_back();
> - CounterPropagateAddr(IfOrElseInst, CfCount + 1);
> - MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> - getHWInstrDesc(CF_POP))
> - .addImm(CfCount + 1)
> - .addImm(1);
> - (void)MIb;
> - DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
> + CounterPropagateAddr(IfOrElseInst, CfCount);
> + IfOrElseInst->getOperand(1).setImm(1);
> + LastAlu.pop_back();
> MI->eraseFromParent();
> - CfCount++;
> break;
> }
> case AMDGPU::PREDICATED_BREAK: {
> @@ -484,6 +499,21 @@ public:
> break;
> }
> }
> + for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
> + MachineInstr *Alu = ToPopAfter[i];
> + BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
> + TII->get(AMDGPU::CF_ALU_POP_AFTER))
> + .addImm(Alu->getOperand(0).getImm())
> + .addImm(Alu->getOperand(1).getImm())
> + .addImm(Alu->getOperand(2).getImm())
> + .addImm(Alu->getOperand(3).getImm())
> + .addImm(Alu->getOperand(4).getImm())
> + .addImm(Alu->getOperand(5).getImm())
> + .addImm(Alu->getOperand(6).getImm())
> + .addImm(Alu->getOperand(7).getImm())
> + .addImm(Alu->getOperand(8).getImm());
> + Alu->eraseFromParent();
> + }
> MFI->StackSize = getHWStackSize(MaxStack, HasPush);
> }
>
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index df5c438..3652c89 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -624,6 +624,7 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
>
> def CF_ALU : ALU_CLAUSE<8, "ALU">;
> def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
> +def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">;
>
> def FETCH_CLAUSE : AMDGPUInst <(outs),
> (ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
> diff --git a/test/CodeGen/R600/jump-address.ll b/test/CodeGen/R600/jump-address.ll
> index 9a5f1bc..26c298b 100644
> --- a/test/CodeGen/R600/jump-address.ll
> +++ b/test/CodeGen/R600/jump-address.ll
> @@ -1,6 +1,6 @@
> ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>
> -; CHECK: JUMP @7
> +; CHECK: JUMP @5
> ; CHECK: EXPORT
> ; CHECK-NOT: EXPORT
>
> diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll
> index 8a5458b..23be327 100644
> --- a/test/CodeGen/R600/loop-address.ll
> +++ b/test/CodeGen/R600/loop-address.ll
> @@ -2,12 +2,11 @@
>
> ;CHECK: TEX
> ;CHECK: ALU_PUSH
> -;CHECK: JUMP @4
> -;CHECK: ELSE @16
> +;CHECK: JUMP @15
> ;CHECK: TEX
> -;CHECK: LOOP_START_DX10 @15
> -;CHECK: LOOP_BREAK @14
> -;CHECK: POP @16
> +;CHECK: LOOP_START_DX10 @14
> +;CHECK: LOOP_BREAK @13
> +;CHECK: POP @15
>
> target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
> target triple = "r600--"
> diff --git a/test/CodeGen/R600/r600cfg.ll b/test/CodeGen/R600/r600cfg.ll
> new file mode 100644
> index 0000000..895ad5e
> --- /dev/null
> +++ b/test/CodeGen/R600/r600cfg.ll
> @@ -0,0 +1,124 @@
> +;RUN: llc < %s -march=r600 -mcpu=redwood
> +;REQUIRES: asserts
> +
> +define void @main() #0 {
> +main_body:
> + %0 = call float @llvm.R600.load.input(i32 4)
> + %1 = call float @llvm.R600.load.input(i32 5)
> + %2 = call float @llvm.R600.load.input(i32 6)
> + %3 = call float @llvm.R600.load.input(i32 7)
> + %4 = bitcast float %0 to i32
> + %5 = icmp eq i32 %4, 0
> + %6 = sext i1 %5 to i32
> + %7 = bitcast i32 %6 to float
> + %8 = bitcast float %7 to i32
> + %9 = icmp ne i32 %8, 0
> + %. = select i1 %9, float 0x36A0000000000000, float %0
> + br label %LOOP
> +
> +LOOP: ; preds = %LOOP47, %main_body
> + %temp12.0 = phi float [ 0x36A0000000000000, %main_body ], [ %temp12.1, %LOOP47 ]
> + %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %38, %LOOP47 ]
> + %temp4.1 = phi float [ %., %main_body ], [ %52, %LOOP47 ]
> + %10 = bitcast float %temp4.1 to i32
> + %11 = icmp eq i32 %10, 1
> + %12 = sext i1 %11 to i32
> + %13 = bitcast i32 %12 to float
> + %14 = bitcast float %13 to i32
> + %15 = icmp ne i32 %14, 0
> + br i1 %15, label %IF41, label %ENDIF40
> +
> +IF41: ; preds = %LOOP
> + %16 = insertelement <4 x float> undef, float %0, i32 0
> + %17 = insertelement <4 x float> %16, float %temp8.0, i32 1
> + %18 = insertelement <4 x float> %17, float %temp12.0, i32 2
> + %19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3
> + call void @llvm.R600.store.stream.output(<4 x float> %19, i32 0, i32 0, i32 1)
> + %20 = insertelement <4 x float> undef, float %0, i32 0
> + %21 = insertelement <4 x float> %20, float %temp8.0, i32 1
> + %22 = insertelement <4 x float> %21, float %temp12.0, i32 2
> + %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3
> + call void @llvm.R600.store.stream.output(<4 x float> %23, i32 0, i32 0, i32 2)
> + %24 = insertelement <4 x float> undef, float %0, i32 0
> + %25 = insertelement <4 x float> %24, float %temp8.0, i32 1
> + %26 = insertelement <4 x float> %25, float %temp12.0, i32 2
> + %27 = insertelement <4 x float> %26, float 0.000000e+00, i32 3
> + call void @llvm.R600.store.stream.output(<4 x float> %27, i32 0, i32 0, i32 4)
> + %28 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
> + %29 = insertelement <4 x float> %28, float 0.000000e+00, i32 1
> + %30 = insertelement <4 x float> %29, float 0.000000e+00, i32 2
> + %31 = insertelement <4 x float> %30, float 0.000000e+00, i32 3
> + call void @llvm.R600.store.swizzle(<4 x float> %31, i32 60, i32 1)
> + %32 = insertelement <4 x float> undef, float %0, i32 0
> + %33 = insertelement <4 x float> %32, float %temp8.0, i32 1
> + %34 = insertelement <4 x float> %33, float %temp12.0, i32 2
> + %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3
> + call void @llvm.R600.store.swizzle(<4 x float> %35, i32 0, i32 2)
> + ret void
> +
> +ENDIF40: ; preds = %LOOP
> + %36 = bitcast float %temp8.0 to i32
> + %37 = add i32 %36, 1
> + %38 = bitcast i32 %37 to float
> + %39 = bitcast float %temp4.1 to i32
> + %40 = urem i32 %39, 2
> + %41 = bitcast i32 %40 to float
> + %42 = bitcast float %41 to i32
> + %43 = icmp eq i32 %42, 0
> + %44 = sext i1 %43 to i32
> + %45 = bitcast i32 %44 to float
> + %46 = bitcast float %45 to i32
> + %47 = icmp ne i32 %46, 0
> + %48 = bitcast float %temp4.1 to i32
> + br i1 %47, label %IF44, label %ELSE45
> +
> +IF44: ; preds = %ENDIF40
> + %49 = udiv i32 %48, 2
> + br label %ENDIF43
> +
> +ELSE45: ; preds = %ENDIF40
> + %50 = mul i32 3, %48
> + %51 = add i32 %50, 1
> + br label %ENDIF43
> +
> +ENDIF43: ; preds = %ELSE45, %IF44
> + %.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
> + %52 = bitcast i32 %.sink to float
> + %53 = load <4 x float> addrspace(8)* null
> + %54 = extractelement <4 x float> %53, i32 0
> + %55 = bitcast float %54 to i32
> + br label %LOOP47
> +
> +LOOP47: ; preds = %ENDIF48, %ENDIF43
> + %temp12.1 = phi float [ %temp12.0, %ENDIF43 ], [ %67, %ENDIF48 ]
> + %temp28.0 = phi float [ 0.000000e+00, %ENDIF43 ], [ %70, %ENDIF48 ]
> + %56 = bitcast float %temp28.0 to i32
> + %57 = icmp uge i32 %56, %55
> + %58 = sext i1 %57 to i32
> + %59 = bitcast i32 %58 to float
> + %60 = bitcast float %59 to i32
> + %61 = icmp ne i32 %60, 0
> + br i1 %61, label %LOOP, label %ENDIF48
> +
> +ENDIF48: ; preds = %LOOP47
> + %62 = bitcast float %temp12.1 to i32
> + %63 = mul i32 %62, 2
> + %64 = bitcast i32 %63 to float
> + %65 = bitcast float %64 to i32
> + %66 = urem i32 %65, 2147483647
> + %67 = bitcast i32 %66 to float
> + %68 = bitcast float %temp28.0 to i32
> + %69 = add i32 %68, 1
> + %70 = bitcast i32 %69 to float
> + br label %LOOP47
> +}
> +
> +; Function Attrs: readnone
> +declare float @llvm.R600.load.input(i32) #1
> +
> +declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
> +
> +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
> +
> +attributes #0 = { "ShaderType"="1" }
> +attributes #1 = { readnone }
> --
> 1.8.3.1
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list