R600: AMDILCFGStructurizer simplification

Tom Stellard tom at stellard.net
Fri Jul 19 09:48:22 PDT 2013


On Fri, Jul 19, 2013 at 09:31:11AM -0700, Vincent Lejeune wrote:
> Hi,
> 
> this serie is simplifying our AMDILCFGStructurizer pass currently used by non SI target.
> I removed all the templates (there was only a single template instanciation, the original
> author probably wanted the code to work on LLVM IR as well as on Machine Code but it wasn't
> implemented) and removed some code by assuming that every loop has a single exit.
> While it may looks like a limitation from previous situation, the pass generated wrong code 
> 
> (it added vreg assignement after regalloc) and made llvm crash. Actually the simplified code is even able
> to solve some TFB piglit test crashes.
> I also took the opportunity to make the code more compliant with llvm code style standard (Uppercase variable,
>  brace in single line loops...)
> 
> 
> I also added a patch that makes triangle if/then/else pattern correctly lowered, ie sparing an extra THEN...ENDIF
> constructions, which should spare a couple of cycle in resulting code.
> 
> Vincent.

The series is:
Reviewed-by: Tom Stellard <thomas.stellard at amd.com>

Thanks for doing this cleanup!

-Tom

> From ffc0d774dbcb6914e3f707d6edf5afb4e1114db9 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Wed, 10 Jul 2013 18:43:10 +0200
> Subject: [PATCH 1/3] R600: Replace legacy debug code in
>  AMDILCFGStructurizer.cpp
> 
> ---
>  lib/Target/R600/AMDILCFGStructurizer.cpp | 463 ++++++++++++++++---------------
>  1 file changed, 235 insertions(+), 228 deletions(-)
> 
> diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
> index 437480c..bb8c217 100644
> --- a/lib/Target/R600/AMDILCFGStructurizer.cpp
> +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
> @@ -8,11 +8,12 @@
>  /// \file
>  //==-----------------------------------------------------------------------===//
>  
> -#define DEBUGME 0
>  #define DEBUG_TYPE "structcfg"
>  
>  #include "AMDGPU.h"
>  #include "AMDGPUInstrInfo.h"
> +#include "llvm/Support/Debug.h"
> +#include "llvm/Support/raw_ostream.h"
>  #include "llvm/ADT/SCCIterator.h"
>  #include "llvm/ADT/SmallVector.h"
>  #include "llvm/ADT/Statistic.h"
> @@ -62,22 +63,22 @@ STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
>  //===----------------------------------------------------------------------===//
>  namespace {
>  #define SHOWNEWINSTR(i) \
> -  if (DEBUGME) errs() << "New instr: " << *i << "\n"
> +  DEBUG(dbgs() << "New instr: " << *i << "\n");
>  
>  #define SHOWNEWBLK(b, msg) \
> -if (DEBUGME) { \
> -  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
> -  errs() << "\n"; \
> -}
> +DEBUG( \
> +  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
> +  dbgs() << "\n"; \
> +);
>  
>  #define SHOWBLK_DETAIL(b, msg) \
> -if (DEBUGME) { \
> +DEBUG( \
>    if (b) { \
> -  errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
> -  b->print(errs()); \
> -  errs() << "\n"; \
> +  dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
> +  b->print(dbgs()); \
> +  dbgs() << "\n"; \
>    } \
> -}
> +);
>  
>  #define INVALIDSCCNUM -1
>  #define INVALIDREGNUM 0
> @@ -332,21 +333,27 @@ bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
>  
>    //FIXME: if not reducible flow graph, make it so ???
>  
> -  if (DEBUGME) {
> -        errs() << "AMDGPUCFGStructurizer::prepare\n";
> -  }
> +  DEBUG(
> +        dbgs() << "AMDGPUCFGStructurizer::prepare\n";
> +  );
>  
>    loopInfo = CFGTraits::getLoopInfo(pass);
> -  if (DEBUGME) {
> -    errs() << "LoopInfo:\n";
> -    PrintLoopinfo(*loopInfo, errs());
> -  }
> +  DEBUG(
> +    dbgs() << "LoopInfo:\n";
> +    PrintLoopinfo(*loopInfo, dbgs());
> +  );
>  
>    orderBlocks();
> -  if (DEBUGME) {
> -    errs() << "Ordered blocks:\n";
> -    printOrderedBlocks(errs());
> -  }
> +  DEBUG(
> +    for (typename SmallVectorImpl<BlockT *>::const_iterator
> +        iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
> +        iterBlk != iterBlkEnd;
> +        ++iterBlk) {
> +      (*iterBlk)->dump();
> +    }
> +    dbgs() << "Ordered blocks:\n";
> +    printOrderedBlocks(dbgs());
> +  );
>  
>    SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
>  
> @@ -396,26 +403,26 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
>    TRI = tri;
>  
>    //Assume reducible CFG...
> -  if (DEBUGME) {
> -    errs() << "AMDGPUCFGStructurizer::run\n";
> +  DEBUG(
> +    dbgs() << "AMDGPUCFGStructurizer::run\n";
>      func.viewCFG();
> -  }
> +  );
>  
>    domTree = CFGTraits::getDominatorTree(pass);
> -  if (DEBUGME) {
> -    domTree->print(errs(), (const llvm::Module*)0);
> -  }
> +  DEBUG(
> +    domTree->print(dbgs(), (const llvm::Module*)0);
> +  );
>  
>    postDomTree = CFGTraits::getPostDominatorTree(pass);
> -  if (DEBUGME) {
> -    postDomTree->print(errs());
> -  }
> +  DEBUG(
> +    postDomTree->print(dbgs());
> +  );
>  
>    loopInfo = CFGTraits::getLoopInfo(pass);
> -  if (DEBUGME) {
> -    errs() << "LoopInfo:\n";
> -    PrintLoopinfo(*loopInfo, errs());
> -  }
> +  DEBUG(
> +    dbgs() << "LoopInfo:\n";
> +    PrintLoopinfo(*loopInfo, dbgs());
> +  );
>  
>    orderBlocks();
>  #ifdef STRESSTEST
> @@ -423,10 +430,10 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
>    ReverseVector(orderedBlks);
>  #endif
>  
> -  if (DEBUGME) {
> -    errs() << "Ordered blocks:\n";
> -    printOrderedBlocks(errs());
> -  }
> +  DEBUG(
> +    dbgs() << "Ordered blocks:\n";
> +    printOrderedBlocks(dbgs());
> +  );
>    int numIter = 0;
>    bool finish = false;
>    BlockT *curBlk;
> @@ -436,10 +443,10 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
>  
>    do {
>      ++numIter;
> -    if (DEBUGME) {
> -      errs() << "numIter = " << numIter
> +    DEBUG(
> +      dbgs() << "numIter = " << numIter
>               << ", numRemaintedBlk = " << numRemainedBlk << "\n";
> -    }
> +    );
>  
>      typename SmallVectorImpl<BlockT *>::const_iterator
>        iterBlk = orderedBlks.begin();
> @@ -461,10 +468,10 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
>          sccBeginBlk = curBlk;
>          sccNumIter = 0;
>          sccNumBlk = numRemainedBlk; // Init to maximum possible number.
> -        if (DEBUGME) {
> -              errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
> -              errs() << "\n";
> -        }
> +        DEBUG(
> +              dbgs() << "start processing SCC" << getSCCNum(sccBeginBlk);
> +              dbgs() << "\n";
> +        );
>        }
>  
>        if (!isRetiredBlock(curBlk)) {
> @@ -480,21 +487,21 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
>          ++sccNumIter;
>          int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
>          if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
> -          if (DEBUGME) {
> -            errs() << "Can't reduce SCC " << getSCCNum(curBlk)
> +          DEBUG(
> +            dbgs() << "Can't reduce SCC " << getSCCNum(curBlk)
>                     << ", sccNumIter = " << sccNumIter;
> -            errs() << "doesn't make any progress\n";
> -          }
> +            dbgs() << "doesn't make any progress\n";
> +          );
>            contNextScc = true;
>          } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
>            sccNumBlk = sccRemainedNumBlk;
>            iterBlk = sccBeginIter;
>            contNextScc = false;
> -          if (DEBUGME) {
> -            errs() << "repeat processing SCC" << getSCCNum(curBlk)
> +          DEBUG(
> +            dbgs() << "repeat processing SCC" << getSCCNum(curBlk)
>                     << "sccNumIter = " << sccNumIter << "\n";
>              func.viewCFG();
> -          }
> +          );
>          } else {
>            // Finish the current scc.
>            contNextScc = true;
> @@ -512,9 +519,9 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
>      BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
>      if (entryBlk->succ_size() == 0) {
>        finish = true;
> -      if (DEBUGME) {
> -        errs() << "Reduce to one block\n";
> -      }
> +      DEBUG(
> +        dbgs() << "Reduce to one block\n";
> +      );
>      } else {
>        int newnumRemainedBlk
>          = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
> @@ -524,9 +531,9 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
>          numRemainedBlk = newnumRemainedBlk;
>        } else {
>          makeProgress = false;
> -        if (DEBUGME) {
> -          errs() << "No progress\n";
> -        }
> +        DEBUG(
> +          dbgs() << "No progress\n";
> +        );
>        }
>      }
>    } while (!finish && makeProgress);
> @@ -539,9 +546,9 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
>         iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
>      if ((*iterMap).second && (*iterMap).second->isRetired) {
>        assert(((*iterMap).first)->getNumber() != -1);
> -      if (DEBUGME) {
> -        errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
> -      }
> +      DEBUG(
> +        dbgs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
> +      );
>        (*iterMap).first->eraseFromParent();  //Remove from the parent Function.
>      }
>      delete (*iterMap).second;
> @@ -555,12 +562,12 @@ bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
>    }
>    loopLandInfoMap.clear();
>  
> -  if (DEBUGME) {
> +  DEBUG(
>      func.viewCFG();
> -  }
> +  );
>  
>    if (!finish) {
> -    assert(!"IRREDUCIBL_CF");
> +    llvm_unreachable("IRREDUCIBL_CF");
>    }
>  
>    return true;
> @@ -609,7 +616,7 @@ template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
>      BlockT *bb = &(*blockIter1);
>      sccNum = getSCCNum(bb);
>      if (sccNum == INVALIDSCCNUM) {
> -      errs() << "unreachable block BB" << bb->getNumber() << "\n";
> +      dbgs() << "unreachable block BB" << bb->getNumber() << "\n";
>      }
>    }
>  } //orderBlocks
> @@ -618,18 +625,18 @@ template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
>    int numMatch = 0;
>    int curMatch;
>  
> -  if (DEBUGME) {
> -        errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
> -  }
> +  DEBUG(
> +        dbgs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
> +  );
>  
>    while ((curMatch = patternMatchGroup(curBlk)) > 0) {
>      numMatch += curMatch;
>    }
>  
> -  if (DEBUGME) {
> -        errs() << "End patternMatch BB" << curBlk->getNumber()
> +  DEBUG(
> +        dbgs() << "End patternMatch BB" << curBlk->getNumber()
>        << ", numMatch = " << numMatch << "\n";
> -  }
> +  );
>  
>    return numMatch;
>  } //patternMatch
> @@ -811,9 +818,9 @@ int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
>    BlockTSmallerVector exitingBlks;
>    loopRep->getExitingBlocks(exitingBlks);
>  
> -  if (DEBUGME) {
> -    errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
> -  }
> +  DEBUG(
> +    dbgs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
> +  );
>  
>    if (exitingBlks.size() == 0) {
>      setLoopLandBlock(loopRep);
> @@ -834,9 +841,9 @@ int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
>    assert(exitBlkSet.size() > 0);
>    assert(exitBlks.size() == exitingBlks.size());
>  
> -  if (DEBUGME) {
> -    errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
> -  }
> +  DEBUG(
> +    dbgs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
> +  );
>  
>    // Find exitLandBlk.
>    BlockT *exitLandBlk = NULL;
> @@ -861,19 +868,19 @@ int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
>        BlockT *exitBlk = *iter;
>  
>        PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
> -      if (DEBUGME) {
> -        errs() << "BB" << exitBlk->getNumber()
> +      DEBUG(
> +        dbgs() << "BB" << exitBlk->getNumber()
>                 << " to BB" << exitLandBlk->getNumber() << " PathToKind="
>                 << pathKind << "\n";
> -      }
> +      );
>  
>        allInPath = allInPath && (pathKind == SinglePath_InPath);
>        allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
>  
>        if (!allInPath && !allNotInPath) {
> -        if (DEBUGME) {
> -              errs() << "singlePath check fail\n";
> -        }
> +        DEBUG(
> +              dbgs() << "singlePath check fail\n";
> +        );
>          return -1;
>        }
>      } // check all exit blocks
> @@ -891,19 +898,19 @@ int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
>                                                 loopRep,
>                                                 exitBlkSet,
>                                                 exitLandBlk)) != NULL) {
> -        if (DEBUGME) {
> -          errs() << "relocateLoopcontBlock success\n";
> -        }
> +        DEBUG(
> +          dbgs() << "relocateLoopcontBlock success\n";
> +        );
>        } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
>                                                        exitingBlks,
>                                                        exitBlks)) != NULL) {
> -        if (DEBUGME) {
> -          errs() << "insertEndbranchBlock success\n";
> -        }
> +        DEBUG(
> +          dbgs() << "insertEndbranchBlock success\n";
> +        );
>        } else {
> -        if (DEBUGME) {
> -          errs() << "loop exit fail\n";
> -        }
> +        DEBUG(
> +          dbgs() << "loop exit fail\n";
> +        );
>          return -1;
>        }
>      }
> @@ -1017,11 +1024,11 @@ bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
>      if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
>        LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
>        if (theEntry != NULL) {
> -        if (DEBUGME) {
> -          errs() << "isLoopContBreakBlock yes src1 = BB"
> +        DEBUG(
> +          dbgs() << "isLoopContBreakBlock yes src1 = BB"
>                   << src1Blk->getNumber()
>                   << " src2 = BB" << src2Blk->getNumber() << "\n";
> -        }
> +        );
>          return true;
>        }
>      }
> @@ -1035,9 +1042,9 @@ int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
>                                               BlockT *falseBlk) {
>    int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
>    if (num == 0) {
> -    if (DEBUGME) {
> -      errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
> -    }
> +    DEBUG(
> +      dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
> +    );
>      num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
>    }
>    return num;
> @@ -1053,22 +1060,22 @@ int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
>    //trueBlk could be the common post dominator
>    downBlk = trueBlk;
>  
> -  if (DEBUGME) {
> -    errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
> +  DEBUG(
> +    dbgs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
>             << " true = BB" << trueBlk->getNumber()
>             << ", numSucc=" << trueBlk->succ_size()
>             << " false = BB" << falseBlk->getNumber() << "\n";
> -  }
> +  );
>  
>    while (downBlk) {
> -    if (DEBUGME) {
> -      errs() << "check down = BB" << downBlk->getNumber();
> -    }
> +    DEBUG(
> +      dbgs() << "check down = BB" << downBlk->getNumber();
> +    );
>  
>      if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
> -      if (DEBUGME) {
> -        errs() << " working\n";
> -      }
> +      DEBUG(
> +        dbgs() << " working\n";
> +      );
>  
>        num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
>        num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
> @@ -1081,9 +1088,9 @@ int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
>  
>        break;
>      }
> -    if (DEBUGME) {
> -      errs() << " not working\n";
> -    }
> +    DEBUG(
> +      dbgs() << " not working\n";
> +    );
>      downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
>    } // walk down the postDomTree
>  
> @@ -1096,43 +1103,43 @@ void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
>                                                           BlockT *falseBlk,
>                                                           BlockT *landBlk,
>                                                           bool detail) {
> -  errs() << "head = BB" << headBlk->getNumber()
> +  dbgs() << "head = BB" << headBlk->getNumber()
>           << " size = " << headBlk->size();
>    if (detail) {
> -    errs() << "\n";
> -    headBlk->print(errs());
> -    errs() << "\n";
> +    dbgs() << "\n";
> +    headBlk->print(dbgs());
> +    dbgs() << "\n";
>    }
>  
>    if (trueBlk) {
> -    errs() << ", true = BB" << trueBlk->getNumber() << " size = "
> +    dbgs() << ", true = BB" << trueBlk->getNumber() << " size = "
>             << trueBlk->size() << " numPred = " << trueBlk->pred_size();
>      if (detail) {
> -      errs() << "\n";
> -      trueBlk->print(errs());
> -      errs() << "\n";
> +      dbgs() << "\n";
> +      trueBlk->print(dbgs());
> +      dbgs() << "\n";
>      }
>    }
>    if (falseBlk) {
> -    errs() << ", false = BB" << falseBlk->getNumber() << " size = "
> +    dbgs() << ", false = BB" << falseBlk->getNumber() << " size = "
>             << falseBlk->size() << " numPred = " << falseBlk->pred_size();
>      if (detail) {
> -      errs() << "\n";
> -      falseBlk->print(errs());
> -      errs() << "\n";
> +      dbgs() << "\n";
> +      falseBlk->print(dbgs());
> +      dbgs() << "\n";
>      }
>    }
>    if (landBlk) {
> -    errs() << ", land = BB" << landBlk->getNumber() << " size = "
> +    dbgs() << ", land = BB" << landBlk->getNumber() << " size = "
>             << landBlk->size() << " numPred = " << landBlk->pred_size();
>      if (detail) {
> -      errs() << "\n";
> -      landBlk->print(errs());
> -      errs() << "\n";
> +      dbgs() << "\n";
> +      landBlk->print(dbgs());
> +      dbgs() << "\n";
>      }
>    }
>  
> -    errs() << "\n";
> +    dbgs() << "\n";
>  } //showImproveSimpleJumpintoIf
>  
>  template<class PassT>
> @@ -1169,10 +1176,10 @@ int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
>      migrateFalse = true;
>    }
>  
> -  if (DEBUGME) {
> -    errs() << "before improveSimpleJumpintoIf: ";
> +  DEBUG(
> +    dbgs() << "before improveSimpleJumpintoIf: ";
>      showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
> -  }
> +  );
>  
>    // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
>    //
> @@ -1269,10 +1276,10 @@ int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
>        }
>      } //for
>    }
> -  if (DEBUGME) {
> -    errs() << "result from improveSimpleJumpintoIf: ";
> +  DEBUG(
> +    dbgs() << "result from improveSimpleJumpintoIf: ";
>      showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
> -  }
> +  );
>  
>    // update landBlk
>    *plandBlk = landBlk;
> @@ -1286,10 +1293,10 @@ void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
>                                               BlockT *exitBlk,
>                                                LoopT *exitLoop,
>                                               BlockT *landBlk) {
> -  if (DEBUGME) {
> -    errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
> +  DEBUG(
> +    dbgs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
>             << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
> -  }
> +  );
>    const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
>  
>    RegiT initReg = INVALIDREGNUM;
> @@ -1314,14 +1321,14 @@ void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
>                                                    LoopT *contingLoop,
>                                                   BlockT *contBlk,
>                                                    LoopT *contLoop) {
> -  if (DEBUGME) {
> -    errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
> +  DEBUG(
> +    dbgs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
>             << " header = BB" << contBlk->getNumber() << "\n";
>  
> -    errs() << "Trying to continue loop-depth = "
> +    dbgs() << "Trying to continue loop-depth = "
>             << getLoopDepth(contLoop)
>             << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
> -  }
> +  );
>  
>    RegiT initReg = INVALIDREGNUM;
>    const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
> @@ -1343,10 +1350,10 @@ void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
>  
>  template<class PassT>
>  void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
> -  if (DEBUGME) {
> -    errs() << "serialPattern BB" << dstBlk->getNumber()
> +  DEBUG(
> +    dbgs() << "serialPattern BB" << dstBlk->getNumber()
>             << " <= BB" << srcBlk->getNumber() << "\n";
> -  }
> +  );
>    dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
>  
>    dstBlk->removeSuccessor(srcBlk);
> @@ -1362,26 +1369,26 @@ void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
>                                                    BlockT *trueBlk,
>                                                    BlockT *falseBlk,
>                                                    BlockT *landBlk) {
> -  if (DEBUGME) {
> -    errs() << "ifPattern BB" << curBlk->getNumber();
> -    errs() << "{  ";
> +  DEBUG(
> +    dbgs() << "ifPattern BB" << curBlk->getNumber();
> +    dbgs() << "{  ";
>      if (trueBlk) {
> -      errs() << "BB" << trueBlk->getNumber();
> +      dbgs() << "BB" << trueBlk->getNumber();
>      }
> -    errs() << "  } else ";
> -    errs() << "{  ";
> +    dbgs() << "  } else ";
> +    dbgs() << "{  ";
>      if (falseBlk) {
> -      errs() << "BB" << falseBlk->getNumber();
> +      dbgs() << "BB" << falseBlk->getNumber();
>      }
> -    errs() << "  }\n ";
> -    errs() << "landBlock: ";
> +    dbgs() << "  }\n ";
> +    dbgs() << "landBlock: ";
>      if (landBlk == NULL) {
> -      errs() << "NULL";
> +      dbgs() << "NULL";
>      } else {
> -      errs() << "BB" << landBlk->getNumber();
> +      dbgs() << "BB" << landBlk->getNumber();
>      }
> -    errs() << "\n";
> -  }
> +    dbgs() << "\n";
> +  );
>  
>    int oldOpcode = branchInstr->getOpcode();
>    DebugLoc branchDL = branchInstr->getDebugLoc();
> @@ -1435,10 +1442,10 @@ void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
>                                                  LoopLandInfo *loopLand) {
>    BlockT *landBlk = loopLand->landBlk;
>  
> -  if (DEBUGME) {
> -    errs() << "loopPattern header = BB" << dstBlk->getNumber()
> +  DEBUG(
> +    dbgs() << "loopPattern header = BB" << dstBlk->getNumber()
>             << " land = BB" << landBlk->getNumber() << "\n";
> -  }
> +  );
>  
>    // Loop contInitRegs are init at the beginning of the loop.
>    for (typename std::set<RegiT>::const_iterator iter =
> @@ -1521,7 +1528,7 @@ void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
>          static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
>          return;
>        default:
> -        assert(0 && "PRED_X Opcode invalid!");
> +        llvm_unreachable("PRED_X Opcode invalid!");
>        }
>      }
>    }
> @@ -1532,11 +1539,11 @@ void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
>                                                   BlockT *exitBlk,
>                                                   BlockT *exitLandBlk,
>                                                   RegiT  setReg) {
> -  if (DEBUGME) {
> -    errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
> +  DEBUG(
> +    dbgs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
>             << " exit = BB" << exitBlk->getNumber()
>             << " land = BB" << exitLandBlk->getNumber() << "\n";
> -  }
> +  );
>  
>    InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
>    assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
> @@ -1596,11 +1603,11 @@ template<class PassT>
>  void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
>                                                   BlockT *contBlk,
>                                                   RegiT   setReg) {
> -  if (DEBUGME) {
> -    errs() << "settleLoopcontBlock conting = BB"
> +  DEBUG(
> +    dbgs() << "settleLoopcontBlock conting = BB"
>             << contingBlk->getNumber()
>             << ", cont = BB" << contBlk->getNumber() << "\n";
> -  }
> +  );
>  
>    InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
>    if (branchInstr) {
> @@ -1711,10 +1718,10 @@ CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
>          contInstr->eraseFromParent();
>        }
>        endBlk->addSuccessor(newBlk);
> -      if (DEBUGME) {
> -        errs() << "Add new continue Block to BB"
> +      DEBUG(
> +        dbgs() << "Add new continue Block to BB"
>                 << endBlk->getNumber() << " successors\n";
> -      }
> +      );
>    }
>  
>    return newBlk;
> @@ -1927,10 +1934,10 @@ CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
>  
>    numClonedInstr += curBlk->size();
>  
> -  if (DEBUGME) {
> -    errs() << "Cloned block: " << "BB"
> +  DEBUG(
> +    dbgs() << "Cloned block: " << "BB"
>             << curBlk->getNumber() << "size " << curBlk->size() << "\n";
> -  }
> +  );
>  
>    SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
>  
> @@ -1966,29 +1973,29 @@ void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
>    //look for the input branchinstr, not the AMDGPU branchinstr
>    InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
>    if (branchInstr == NULL) {
> -    if (DEBUGME) {
> -      errs() << "migrateInstruction don't see branch instr\n" ;
> -    }
> +    DEBUG(
> +      dbgs() << "migrateInstruction don't see branch instr\n" ;
> +    );
>      spliceEnd = srcBlk->end();
>    } else {
> -    if (DEBUGME) {
> -      errs() << "migrateInstruction see branch instr\n" ;
> +    DEBUG(
> +      dbgs() << "migrateInstruction see branch instr\n" ;
>        branchInstr->dump();
> -    }
> +    );
>      spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
>    }
> -  if (DEBUGME) {
> -    errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
> +  DEBUG(
> +    dbgs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
>        << "srcSize = " << srcBlk->size() << "\n";
> -  }
> +  );
>  
>    //splice insert before insertPos
>    dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
>  
> -  if (DEBUGME) {
> -    errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
> +  DEBUG(
> +    dbgs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
>        << "srcSize = " << srcBlk->size() << "\n";
> -  }
> +  );
>  } //migrateInstruction
>  
>  // normalizeInfiniteLoopExit change
> @@ -2016,7 +2023,7 @@ CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
>        funcRep->push_back(dummyExitBlk);  //insert to function
>        SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
>  
> -      if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
> +      DEBUG(dbgs() << "Old branch instr: " << *branchInstr << "\n";);
>  
>        typename BlockT::iterator insertPos =
>          CFGTraits::getInstrPos(loopLatch, branchInstr);
> @@ -2047,10 +2054,10 @@ void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
>    // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
>    while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
>            && CFGTraits::isUncondBranch(branchInstr)) {
> -    if (DEBUGME) {
> -          errs() << "Removing unconditional branch instruction" ;
> +    DEBUG(
> +          dbgs() << "Removing unconditional branch instruction" ;
>        branchInstr->dump();
> -    }
> +    );
>      branchInstr->eraseFromParent();
>    }
>  } //removeUnconditionalBranch
> @@ -2064,10 +2071,10 @@ void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
>      if (blk1 == blk2) {
>        InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
>        assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
> -      if (DEBUGME) {
> -        errs() << "Removing unneeded conditional branch instruction" ;
> +      DEBUG(
> +        dbgs() << "Removing unneeded conditional branch instruction" ;
>          branchInstr->dump();
> -      }
> +      );
>        branchInstr->eraseFromParent();
>        SHOWNEWBLK(blk1, "Removing redundant successor");
>        srcBlk->removeSuccessor(blk1);
> @@ -2091,10 +2098,10 @@ void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
>        curInstr->eraseFromParent();
>      }
>      curBlk->addSuccessor(dummyExitBlk);
> -    if (DEBUGME) {
> -      errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
> +    DEBUG(
> +      dbgs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
>               << " successors\n";
> -    }
> +    );
>    } //for
>  
>    SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
> @@ -2126,9 +2133,9 @@ int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
>  
>  template<class PassT>
>  void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
> -  if (DEBUGME) {
> -        errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
> -  }
> +  DEBUG(
> +        dbgs() << "Retiring BB" << srcBlk->getNumber() << "\n";
> +  );
>  
>    BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
>  
> @@ -2245,11 +2252,11 @@ void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
>  
>    theEntry->landBlk = blk;
>  
> -  if (DEBUGME) {
> -    errs() << "setLoopLandBlock loop-header = BB"
> +  DEBUG(
> +    dbgs() << "setLoopLandBlock loop-header = BB"
>             << loopRep->getHeader()->getNumber()
>             << "  landing-block = BB" << blk->getNumber() << "\n";
> -  }
> +  );
>  } // setLoopLandBlock
>  
>  template<class PassT>
> @@ -2262,11 +2269,11 @@ void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
>  
>    theEntry->breakOnRegs.insert(regNum);
>  
> -  if (DEBUGME) {
> -    errs() << "addLoopBreakOnReg loop-header = BB"
> +  DEBUG(
> +    dbgs() << "addLoopBreakOnReg loop-header = BB"
>             << loopRep->getHeader()->getNumber()
>             << "  regNum = " << regNum << "\n";
> -  }
> +  );
>  } // addLoopBreakOnReg
>  
>  template<class PassT>
> @@ -2278,11 +2285,11 @@ void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
>    }
>    theEntry->contOnRegs.insert(regNum);
>  
> -  if (DEBUGME) {
> -    errs() << "addLoopContOnReg loop-header = BB"
> +  DEBUG(
> +    dbgs() << "addLoopContOnReg loop-header = BB"
>             << loopRep->getHeader()->getNumber()
>             << "  regNum = " << regNum << "\n";
> -  }
> +  );
>  } // addLoopContOnReg
>  
>  template<class PassT>
> @@ -2294,11 +2301,11 @@ void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
>    }
>    theEntry->breakInitRegs.insert(regNum);
>  
> -  if (DEBUGME) {
> -    errs() << "addLoopBreakInitReg loop-header = BB"
> +  DEBUG(
> +    dbgs() << "addLoopBreakInitReg loop-header = BB"
>             << loopRep->getHeader()->getNumber()
>             << "  regNum = " << regNum << "\n";
> -  }
> +  );
>  } // addLoopBreakInitReg
>  
>  template<class PassT>
> @@ -2310,11 +2317,11 @@ void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
>    }
>    theEntry->contInitRegs.insert(regNum);
>  
> -  if (DEBUGME) {
> -    errs() << "addLoopContInitReg loop-header = BB"
> +  DEBUG(
> +    dbgs() << "addLoopContInitReg loop-header = BB"
>             << loopRep->getHeader()->getNumber()
>             << "  regNum = " << regNum << "\n";
> -  }
> +  );
>  } // addLoopContInitReg
>  
>  template<class PassT>
> @@ -2327,11 +2334,11 @@ void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
>    }
>    theEntry->endbranchInitRegs.insert(regNum);
>  
> -  if (DEBUGME) {
> -        errs() << "addLoopEndbranchInitReg loop-header = BB"
> +  DEBUG(
> +        dbgs() << "addLoopEndbranchInitReg loop-header = BB"
>        << loopRep->getHeader()->getNumber()
>        << "  regNum = " << regNum << "\n";
> -  }
> +  );
>  } // addLoopEndbranchInitReg
>  
>  template<class PassT>
> @@ -2437,14 +2444,14 @@ CFGStructurizer<PassT>::findNearestCommonPostDom
>      }
>    }
>  
> -  if (DEBUGME) {
> -    errs() << "Common post dominator for exit blocks is ";
> +  DEBUG(
> +    dbgs() << "Common post dominator for exit blocks is ";
>      if (commonDom) {
> -          errs() << "BB" << commonDom->getNumber() << "\n";
> +          dbgs() << "BB" << commonDom->getNumber() << "\n";
>      } else {
> -      errs() << "NULL\n";
> +      dbgs() << "NULL\n";
>      }
> -  }
> +  );
>  
>    return commonDom;
>  } //findNearestCommonPostDom
> @@ -2591,7 +2598,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
>      case AMDGPU::BRANCH_COND_i32:
>      case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
>      default:
> -      assert(0 && "internal error");
> +      llvm_unreachable("internal error");
>      }
>      return -1;
>    }
> @@ -2603,7 +2610,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
>      case AMDGPU::BRANCH_COND_i32:
>      case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
>      default:
> -      assert(0 && "internal error");
> +      llvm_unreachable("internal error");
>      }
>      return -1;
>    }
> @@ -2613,7 +2620,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
>      case AMDGPU::JUMP_COND:
>      case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
>      default:
> -      assert(0 && "internal error");
> +      llvm_unreachable("internal error");
>      };
>      return -1;
>    }
> @@ -2623,7 +2630,7 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
>      case AMDGPU::JUMP_COND:
>      case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
>      default:
> -      assert(0 && "internal error");
> +      llvm_unreachable("internal error");
>      }
>      return -1;
>    }
> @@ -2753,10 +2760,10 @@ struct CFGStructTraits<AMDGPUCFGStructurizer> {
>      if (instr) {
>        assert(isReturn);
>      } else if (isReturn) {
> -      if (DEBUGME) {
> -        errs() << "BB" << blk->getNumber()
> +      DEBUG(
> +        dbgs() << "BB" << blk->getNumber()
>                 <<" is return block without RETURN instr\n";
> -      }
> +      );
>      }
>  
>      return  isReturn;
> -- 
> 1.8.3.1
> 

> From 5de07edc6f8a54fd28873a61ac84ad5fa16757e6 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Wed, 10 Jul 2013 23:49:09 +0200
> Subject: [PATCH 2/3] R600: Simplify AMDILCFGStructurize by removing templates
>  and assuming single exit
> 
> ---
>  lib/Target/R600/AMDGPU.h                 |    1 -
>  lib/Target/R600/AMDGPUTargetMachine.cpp  |    1 -
>  lib/Target/R600/AMDILCFGStructurizer.cpp | 3845 +++++++++++-------------------
>  3 files changed, 1341 insertions(+), 2506 deletions(-)
> 
> diff --git a/lib/Target/R600/AMDGPU.h b/lib/Target/R600/AMDGPU.h
> index f284291..7621422 100644
> --- a/lib/Target/R600/AMDGPU.h
> +++ b/lib/Target/R600/AMDGPU.h
> @@ -31,7 +31,6 @@ FunctionPass *createR600ExpandSpecialInstrsPass(TargetMachine &tm);
>  FunctionPass *createR600EmitClauseMarkers(TargetMachine &tm);
>  FunctionPass *createR600Packetizer(TargetMachine &tm);
>  FunctionPass *createR600ControlFlowFinalizer(TargetMachine &tm);
> -FunctionPass *createAMDGPUCFGPreparationPass(TargetMachine &tm);
>  FunctionPass *createAMDGPUCFGStructurizerPass(TargetMachine &tm);
>  
>  // SI Passes
> diff --git a/lib/Target/R600/AMDGPUTargetMachine.cpp b/lib/Target/R600/AMDGPUTargetMachine.cpp
> index 7a14e50..1dc1b6b 100644
> --- a/lib/Target/R600/AMDGPUTargetMachine.cpp
> +++ b/lib/Target/R600/AMDGPUTargetMachine.cpp
> @@ -160,7 +160,6 @@ bool AMDGPUPassConfig::addPreSched2() {
>  bool AMDGPUPassConfig::addPreEmitPass() {
>    const AMDGPUSubtarget &ST = TM->getSubtarget<AMDGPUSubtarget>();
>    if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
> -    addPass(createAMDGPUCFGPreparationPass(*TM));
>      addPass(createAMDGPUCFGStructurizerPass(*TM));
>      addPass(createR600ExpandSpecialInstrsPass(*TM));
>      addPass(&FinalizeMachineBundlesID);
> diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
> index bb8c217..6ace97a 100644
> --- a/lib/Target/R600/AMDILCFGStructurizer.cpp
> +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
> @@ -12,11 +12,13 @@
>  
>  #include "AMDGPU.h"
>  #include "AMDGPUInstrInfo.h"
> +#include "R600InstrInfo.h"
>  #include "llvm/Support/Debug.h"
>  #include "llvm/Support/raw_ostream.h"
>  #include "llvm/ADT/SCCIterator.h"
>  #include "llvm/ADT/SmallVector.h"
>  #include "llvm/ADT/Statistic.h"
> +#include "llvm/ADT/DepthFirstIterator.h"
>  #include "llvm/Analysis/DominatorInternals.h"
>  #include "llvm/Analysis/Dominators.h"
>  #include "llvm/CodeGen/MachineDominators.h"
> @@ -81,16 +83,6 @@ DEBUG( \
>  );
>  
>  #define INVALIDSCCNUM -1
> -#define INVALIDREGNUM 0
> -
> -template<class LoopinfoT>
> -void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
> -  for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
> -       iterEnd = LoopInfo.end();
> -       iter != iterEnd; ++iter) {
> -    (*iter)->print(OS, 0);
> -  }
> -}
>  
>  template<class NodeT>
>  void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
> @@ -110,40 +102,14 @@ void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
>  //
>  //===----------------------------------------------------------------------===//
>  
> +
>  namespace {
> -template<class PassT>
> -struct CFGStructTraits {
> -};
>  
> -template <class InstrT>
>  class BlockInformation {
>  public:
> -  bool isRetired;
> -  int  sccNum;
> -  //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
> -  //Instructions defining the corresponding successor.
> -  BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
> -};
> -
> -template <class BlockT, class InstrT, class RegiT>
> -class LandInformation {
> -public:
> -  BlockT *landBlk;
> -  std::set<RegiT> breakInitRegs;  //Registers that need to "reg = 0", before
> -                                  //WHILELOOP(thisloop) init before entering
> -                                  //thisloop.
> -  std::set<RegiT> contInitRegs;   //Registers that need to "reg = 0", after
> -                                  //WHILELOOP(thisloop) init after entering
> -                                  //thisloop.
> -  std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
> -                                     //land block, branch cond on this reg.
> -  std::set<RegiT> breakOnRegs;       //registers that need to "if (reg) break
> -                                     //endif" after ENDLOOP(thisloop) break
> -                                     //outerLoopOf(thisLoop).
> -  std::set<RegiT> contOnRegs;       //registers that need to "if (reg) continue
> -                                    //endif" after ENDLOOP(thisloop) continue on
> -                                    //outerLoopOf(thisLoop).
> -  LandInformation() : landBlk(NULL) {}
> +  bool IsRetired;
> +  int  SccNum;
> +  BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
>  };
>  
>  } // end anonymous namespace
> @@ -155,1030 +121,1219 @@ public:
>  //===----------------------------------------------------------------------===//
>  
>  namespace {
> -// bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
> -template<class PassT>
> -class  CFGStructurizer {
> +class AMDGPUCFGStructurizer : public MachineFunctionPass {
>  public:
> -  typedef enum {
> +  typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
> +  typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
> +  typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
> +
> +  enum PathToKind {
>      Not_SinglePath = 0,
>      SinglePath_InPath = 1,
>      SinglePath_NotInPath = 2
> -  } PathToKind;
> +  };
>  
> -public:
> -  typedef typename PassT::InstructionType         InstrT;
> -  typedef typename PassT::FunctionType            FuncT;
> -  typedef typename PassT::DominatortreeType       DomTreeT;
> -  typedef typename PassT::PostDominatortreeType   PostDomTreeT;
> -  typedef typename PassT::DomTreeNodeType         DomTreeNodeT;
> -  typedef typename PassT::LoopinfoType            LoopInfoT;
> -
> -  typedef GraphTraits<FuncT *>                    FuncGTraits;
> -  //typedef FuncGTraits::nodes_iterator BlockIterator;
> -  typedef typename FuncT::iterator                BlockIterator;
> -
> -  typedef typename FuncGTraits::NodeType          BlockT;
> -  typedef GraphTraits<BlockT *>                   BlockGTraits;
> -  typedef GraphTraits<Inverse<BlockT *> >         InvBlockGTraits;
> -  //typedef BlockGTraits::succ_iterator InstructionIterator;
> -  typedef typename BlockT::iterator               InstrIterator;
> -
> -  typedef CFGStructTraits<PassT>                  CFGTraits;
> -  typedef BlockInformation<InstrT>                BlockInfo;
> -  typedef std::map<BlockT *, BlockInfo *>         BlockInfoMap;
> -
> -  typedef int                                     RegiT;
> -  typedef typename PassT::LoopType                LoopT;
> -  typedef LandInformation<BlockT, InstrT, RegiT>  LoopLandInfo;
> -        typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
> -        //landing info for loop break
> -  typedef SmallVector<BlockT *, 32>               BlockTSmallerVector;
> +  static char ID;
>  
> -public:
> -  CFGStructurizer();
> -  ~CFGStructurizer();
> +  AMDGPUCFGStructurizer(TargetMachine &tm) :
> +      MachineFunctionPass(ID), TM(tm),
> +      TII(static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
> +      TRI(&TII->getRegisterInfo()) { }
> +
> +   const char *getPassName() const {
> +    return "AMD IL Control Flow Graph structurizer Pass";
> +  }
> +
> +  void getAnalysisUsage(AnalysisUsage &AU) const {
> +    AU.addPreserved<MachineFunctionAnalysis>();
> +    AU.addRequired<MachineFunctionAnalysis>();
> +    AU.addRequired<MachineDominatorTree>();
> +    AU.addRequired<MachinePostDominatorTree>();
> +    AU.addRequired<MachineLoopInfo>();
> +  }
>  
>    /// Perform the CFG structurization
> -  bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
> +  bool run();
>  
>    /// Perform the CFG preparation
> -  bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
> +  /// This step will remove every unconditionnal/dead jump instructions and make
> +  /// sure all loops have an exit block
> +  bool prepare();
> +
> +  bool runOnMachineFunction(MachineFunction &MF) {
> +    DEBUG(MF.dump(););
> +    OrderedBlks.clear();
> +    FuncRep = &MF;
> +    MLI = &getAnalysis<MachineLoopInfo>();
> +    DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
> +    MDT = &getAnalysis<MachineDominatorTree>();
> +    DEBUG(MDT->print(dbgs(), (const llvm::Module*)0););
> +    PDT = &getAnalysis<MachinePostDominatorTree>();
> +    DEBUG(PDT->print(dbgs()););
> +    prepare();
> +    run();
> +    DEBUG(MF.dump(););
> +    return true;
> +  }
>  
> -private:
> -  void reversePredicateSetter(typename BlockT::iterator);
> -  void   orderBlocks();
> -  void   printOrderedBlocks(llvm::raw_ostream &OS);
> -  int patternMatch(BlockT *CurBlock);
> -  int patternMatchGroup(BlockT *CurBlock);
> -
> -  int serialPatternMatch(BlockT *CurBlock);
> -  int ifPatternMatch(BlockT *CurBlock);
> -  int switchPatternMatch(BlockT *CurBlock);
> -  int loopendPatternMatch(BlockT *CurBlock);
> -  int loopPatternMatch(BlockT *CurBlock);
> -
> -  int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
> -  int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
> -  //int loopWithoutBreak(BlockT *);
> -
> -  void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
> -                        BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
> -  void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
> -                           BlockT *ContBlock, LoopT *contLoop);
> -  bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
> -  int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
> -                       BlockT *FalseBlock);
> -  int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
> -                          BlockT *FalseBlock);
> -  int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
> -                              BlockT *FalseBlock, BlockT **LandBlockPtr);
> -  void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
> -                                   BlockT *FalseBlock, BlockT *LandBlock,
> -                                   bool Detail = false);
> -  PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
> -                          bool AllowSideEntry = true);
> -  BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
> -                        bool AllowSideEntry = true);
> -  int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
> -  void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
> -
> -  void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
> -                            BlockT *TrueBlock, BlockT *FalseBlock,
> -                            BlockT *LandBlock);
> -  void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
> -  void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
> -                           BlockT *ExitLandBlock, RegiT SetReg);
> -  void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
> -                           RegiT SetReg);
> -  BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
> -                                std::set<BlockT*> &ExitBlockSet,
> -                                BlockT *ExitLandBlk);
> -  BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
> -                                BlockTSmallerVector &ExitingBlocks,
> -                                BlockTSmallerVector &ExitBlocks);
> -  BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
> -  void removeUnconditionalBranch(BlockT *SrcBlock);
> -  void removeRedundantConditionalBranch(BlockT *SrcBlock);
> -  void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
> -
> -  void removeSuccessor(BlockT *SrcBlock);
> -  BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
> -  BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
> -
> -  void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
> -                          InstrIterator InsertPos);
> -
> -  void recordSccnum(BlockT *SrcBlock, int SCCNum);
> -  int getSCCNum(BlockT *srcBlk);
> -
> -  void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
> -  bool isRetiredBlock(BlockT *SrcBlock);
> -  bool isActiveLoophead(BlockT *CurBlock);
> -  bool needMigrateBlock(BlockT *Block);
> -
> -  BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
> -                              BlockTSmallerVector &exitBlocks,
> -                              std::set<BlockT*> &ExitBlockSet);
> -  void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
> -  BlockT *getLoopLandBlock(LoopT *LoopRep);
> -  LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
> -
> -  void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
> -  void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
> -  void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
> -  void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
> -  void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
> -
> -  bool hasBackEdge(BlockT *curBlock);
> -  unsigned getLoopDepth  (LoopT *LoopRep);
> -  int countActiveBlock(
> -    typename SmallVectorImpl<BlockT *>::const_iterator IterStart,
> -    typename SmallVectorImpl<BlockT *>::const_iterator IterEnd);
> -    BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
> -  BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
> +protected:
> +  TargetMachine &TM;
> +  MachineDominatorTree *MDT;
> +  MachinePostDominatorTree *PDT;
> +  MachineLoopInfo *MLI;
> +  const R600InstrInfo *TII;
> +  const AMDGPURegisterInfo *TRI;
> +
> +  // PRINT FUNCTIONS
> +  /// Print the ordered Blocks.
> +  void printOrderedBlocks() const {
> +    size_t i = 0;
> +    for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
> +        iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
> +      dbgs() << "BB" << (*iterBlk)->getNumber();
> +      dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
> +      if (i != 0 && i % 10 == 0) {
> +        dbgs() << "\n";
> +      } else {
> +        dbgs() << " ";
> +      }
> +    }
> +  }
> +  static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
> +    for (MachineLoop::iterator iter = LoopInfo.begin(),
> +         iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
> +      (*iter)->print(dbgs(), 0);
> +    }
> +  }
> +
> +  // UTILITY FUNCTIONS
> +  int getSCCNum(MachineBasicBlock *MBB) const;
> +  MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
> +  bool hasBackEdge(MachineBasicBlock *MBB) const;
> +  static unsigned getLoopDepth(MachineLoop *LoopRep);
> +  bool isRetiredBlock(MachineBasicBlock *MBB) const;
> +  bool isActiveLoophead(MachineBasicBlock *MBB) const;
> +  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
> +      bool AllowSideEntry = true) const;
> +  int countActiveBlock(MBBVector::const_iterator It,
> +      MBBVector::const_iterator E) const;
> +  bool needMigrateBlock(MachineBasicBlock *MBB) const;
> +
> +  // Utility Functions
> +  void reversePredicateSetter(MachineBasicBlock::iterator I);
> +  /// Compute the reversed DFS post order of Blocks
> +  void orderBlocks(MachineFunction *MF);
> +
> +  // Function originaly from CFGStructTraits
> +  void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
> +      DebugLoc DL = DebugLoc());
> +  MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
> +    DebugLoc DL = DebugLoc());
> +  MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
> +  void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
> +      DebugLoc DL);
> +  void insertCondBranchBefore(MachineBasicBlock *MBB,
> +      MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
> +      DebugLoc DL);
> +  void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum);
> +  static int getBranchNzeroOpcode(int OldOpcode);
> +  static int getBranchZeroOpcode(int OldOpcode);
> +  static int getContinueNzeroOpcode(int OldOpcode);
> +  static int getContinueZeroOpcode(int OldOpcode);
> +  static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
> +  static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
> +  static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
> +      MachineInstr *MI);
> +  static bool isCondBranch(MachineInstr *MI);
> +  static bool isUncondBranch(MachineInstr *MI);
> +  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
> +  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
> +  /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
> +  ///
> +  /// BB with backward-edge could have move instructions after the branch
> +  /// instruction.  Such move instruction "belong to" the loop backward-edge.
> +  MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
> +  static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
> +  static MachineInstr *getContinueInstr(MachineBasicBlock *MBB);
> +  static MachineInstr *getLoopBreakInstr(MachineBasicBlock *MBB);
> +  static bool isReturnBlock(MachineBasicBlock *MBB);
> +  static void cloneSuccessorList(MachineBasicBlock *DstMBB,
> +      MachineBasicBlock *SrcMBB) ;
> +  static MachineBasicBlock *clone(MachineBasicBlock *MBB);
> +  /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
> +  /// because the AMDGPU instruction is not recognized as terminator fix this
> +  /// and retire this routine
> +  void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
> +      MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
> +  static void wrapup(MachineBasicBlock *MBB);
> +
> +
> +  int patternMatch(MachineBasicBlock *MBB);
> +  int patternMatchGroup(MachineBasicBlock *MBB);
> +  int serialPatternMatch(MachineBasicBlock *MBB);
> +  int ifPatternMatch(MachineBasicBlock *MBB);
> +  int loopendPatternMatch();
> +  int mergeLoop(MachineLoop *LoopRep);
> +  int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader);
> +
> +  void handleLoopcontBlock(MachineBasicBlock *ContingMBB,
> +      MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
> +      MachineLoop *ContLoop);
> +  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
> +  /// the same loop with LoopLandInfo without explicitly keeping track of
> +  /// loopContBlks and loopBreakBlks, this is a method to get the information.
> +  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
> +      MachineBasicBlock *Src2MBB);
> +  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
> +      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
> +  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
> +      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
> +  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
> +      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
> +      MachineBasicBlock **LandMBBPtr);
> +  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
> +      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
> +      MachineBasicBlock *LandMBB, bool Detail = false);
> +  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
> +      MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
> +  void mergeSerialBlock(MachineBasicBlock *DstMBB,
> +      MachineBasicBlock *SrcMBB);
> +
> +  void mergeIfthenelseBlock(MachineInstr *BranchMI,
> +      MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
> +      MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
> +  void mergeLooplandBlock(MachineBasicBlock *DstMBB,
> +      MachineBasicBlock *LandMBB);
> +  void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
> +      MachineBasicBlock *LandMBB);
> +  void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
> +      MachineBasicBlock *ContMBB);
> +  /// normalizeInfiniteLoopExit change
> +  ///   B1:
> +  ///        uncond_br LoopHeader
> +  ///
> +  /// to
> +  ///   B1:
> +  ///        cond_br 1 LoopHeader dummyExit
> +  /// and return the newly added dummy exit block
> +  MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
> +  void removeUnconditionalBranch(MachineBasicBlock *MBB);
> +  /// Remove duplicate branches instructions in a block.
> +  /// For instance
> +  /// B0:
> +  ///    cond_br X B1 B2
> +  ///    cond_br X B1 B2
> +  /// is transformed to
> +  /// B0:
> +  ///    cond_br X B1 B2
> +  void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
> +  void addDummyExitBlock(
> +      SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> &RetMBB);
> +  void removeSuccessor(MachineBasicBlock *MBB);
> +  MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
> +      MachineBasicBlock *PredMBB);
> +  void migrateInstruction(MachineBasicBlock *SrcMBB,
> +      MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
> +  void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
> +  void retireBlock(MachineBasicBlock *MBB);
> +  void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = NULL);
> +
> +  MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
> +  /// This is work around solution for findNearestCommonDominator not avaiable
> +  /// to post dom a proper fix should go to Dominators.h.
> +  MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
> +      MachineBasicBlock *MBB2);
>  
>  private:
> -  DomTreeT *domTree;
> -  PostDomTreeT *postDomTree;
> -  LoopInfoT *loopInfo;
> -  PassT *passRep;
> -  FuncT *funcRep;
> -
> -  BlockInfoMap blockInfoMap;
> -  LoopLandInfoMap loopLandInfoMap;
> -  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
> -  const AMDGPURegisterInfo *TRI;
> +  MBBInfoMap BlockInfoMap;
> +  LoopLandInfoMap LLInfoMap;
> +  std::map<MachineLoop *, bool> Visited;
> +  MachineFunction *FuncRep;
> +  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
> +};
> +
> +int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
> +  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
> +  if (It == BlockInfoMap.end())
> +    return INVALIDSCCNUM;
> +  return (*It).second->SccNum;
> +}
> +
> +MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
> +    const {
> +  LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
> +  if (It == LLInfoMap.end())
> +    return NULL;
> +  return (*It).second;
> +}
> +
> +bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
> +  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
> +  if (!LoopRep)
> +    return false;
> +  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
> +  return MBB->isSuccessor(LoopHeader);
> +}
>  
> -};  //template class CFGStructurizer
> +unsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) {
> +  return LoopRep ? LoopRep->getLoopDepth() : 0;
> +}
>  
> -template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
> -  : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
> +bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
> +  MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
> +  if (It == BlockInfoMap.end())
> +    return false;
> +  return (*It).second->IsRetired;
>  }
>  
> -template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
> -  for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
> -       E = blockInfoMap.end(); I != E; ++I) {
> -    delete I->second;
> +bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
> +  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
> +  while (LoopRep && LoopRep->getHeader() == MBB) {
> +    MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
> +    if(!LoopLand)
> +      return true;
> +    if (!isRetiredBlock(LoopLand))
> +      return true;
> +    LoopRep = LoopRep->getParentLoop();
> +  }
> +  return false;
> +}
> +AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
> +    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
> +    bool AllowSideEntry) const {
> +  assert(DstMBB);
> +  if (SrcMBB == DstMBB)
> +    return SinglePath_InPath;
> +  while (SrcMBB && SrcMBB->succ_size() == 1) {
> +    SrcMBB = *SrcMBB->succ_begin();
> +    if (SrcMBB == DstMBB)
> +      return SinglePath_InPath;
> +    if (!AllowSideEntry && SrcMBB->pred_size() > 1)
> +      return Not_SinglePath;
>    }
> +  if (SrcMBB && SrcMBB->succ_size()==0)
> +    return SinglePath_NotInPath;
> +  return Not_SinglePath;
>  }
>  
> -template<class PassT>
> -bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
> -                                     const AMDGPURegisterInfo * tri) {
> -  passRep = &pass;
> -  funcRep = &func;
> -  TRI = tri;
> +int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
> +    MBBVector::const_iterator E) const {
> +  int Count = 0;
> +  while (It != E) {
> +    if (!isRetiredBlock(*It))
> +      ++Count;
> +    ++It;
> +  }
> +  return Count;
> +}
>  
> -  bool changed = false;
> +bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
> +  unsigned BlockSizeThreshold = 30;
> +  unsigned CloneInstrThreshold = 100;
> +  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
>  
> -  //FIXME: if not reducible flow graph, make it so ???
> +  if(!MultiplePreds)
> +    return false;
> +  unsigned BlkSize = MBB->size();
> +  return ((BlkSize > BlockSizeThreshold) &&
> +      (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
> +}
>  
> -  DEBUG(
> -        dbgs() << "AMDGPUCFGStructurizer::prepare\n";
> -  );
> +void AMDGPUCFGStructurizer::reversePredicateSetter(
> +    MachineBasicBlock::iterator I) {
> +  while (I--) {
> +    if (I->getOpcode() == AMDGPU::PRED_X) {
> +      switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
> +      case OPCODE_IS_ZERO_INT:
> +        static_cast<MachineInstr *>(I)->getOperand(2)
> +            .setImm(OPCODE_IS_NOT_ZERO_INT);
> +        return;
> +      case OPCODE_IS_NOT_ZERO_INT:
> +        static_cast<MachineInstr *>(I)->getOperand(2)
> +            .setImm(OPCODE_IS_ZERO_INT);
> +        return;
> +      case OPCODE_IS_ZERO:
> +        static_cast<MachineInstr *>(I)->getOperand(2)
> +            .setImm(OPCODE_IS_NOT_ZERO);
> +        return;
> +      case OPCODE_IS_NOT_ZERO:
> +        static_cast<MachineInstr *>(I)->getOperand(2)
> +            .setImm(OPCODE_IS_ZERO);
> +        return;
> +      default:
> +        llvm_unreachable("PRED_X Opcode invalid!");
> +      }
> +    }
> +  }
> +}
>  
> -  loopInfo = CFGTraits::getLoopInfo(pass);
> -  DEBUG(
> -    dbgs() << "LoopInfo:\n";
> -    PrintLoopinfo(*loopInfo, dbgs());
> -  );
> +void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
> +    int NewOpcode, DebugLoc DL) {
> + MachineInstr *MI = MBB->getParent()
> +    ->CreateMachineInstr(TII->get(NewOpcode), DL);
> +  MBB->push_back(MI);
> +  //assume the instruction doesn't take any reg operand ...
> +  SHOWNEWINSTR(MI);
> +}
>  
> -  orderBlocks();
> -  DEBUG(
> -    for (typename SmallVectorImpl<BlockT *>::const_iterator
> -        iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
> -        iterBlk != iterBlkEnd;
> -        ++iterBlk) {
> -      (*iterBlk)->dump();
> +MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
> +    int NewOpcode, DebugLoc DL) {
> +  MachineInstr *MI =
> +      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
> +  if (MBB->begin() != MBB->end())
> +    MBB->insert(MBB->begin(), MI);
> +  else
> +    MBB->push_back(MI);
> +  SHOWNEWINSTR(MI);
> +  return MI;
> +}
> +
> +MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
> +    MachineBasicBlock::iterator I, int NewOpcode) {
> +  MachineInstr *OldMI = &(*I);
> +  MachineBasicBlock *MBB = OldMI->getParent();
> +  MachineInstr *NewMBB =
> +      MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
> +  MBB->insert(I, NewMBB);
> +  //assume the instruction doesn't take any reg operand ...
> +  SHOWNEWINSTR(NewMBB);
> +  return NewMBB;
> +}
> +
> +void AMDGPUCFGStructurizer::insertCondBranchBefore(
> +    MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) {
> +  MachineInstr *OldMI = &(*I);
> +  MachineBasicBlock *MBB = OldMI->getParent();
> +  MachineFunction *MF = MBB->getParent();
> +  MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
> +  MBB->insert(I, NewMI);
> +  MachineInstrBuilder MIB(*MF, NewMI);
> +  MIB.addReg(OldMI->getOperand(1).getReg(), false);
> +  SHOWNEWINSTR(NewMI);
> +  //erase later oldInstr->eraseFromParent();
> +}
> +
> +void AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk,
> +    MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
> +    DebugLoc DL) {
> +  MachineFunction *MF = blk->getParent();
> +  MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
> +  //insert before
> +  blk->insert(I, NewInstr);
> +  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
> +  SHOWNEWINSTR(NewInstr);
> +}
> +
> +void AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB,
> +    int NewOpcode, int RegNum) {
> +  MachineFunction *MF = MBB->getParent();
> +  MachineInstr *NewInstr =
> +    MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
> +  MBB->push_back(NewInstr);
> +  MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
> +  SHOWNEWINSTR(NewInstr);
> +}
> +
> +int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
> +  switch(OldOpcode) {
> +  case AMDGPU::JUMP_COND:
> +  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
> +  case AMDGPU::BRANCH_COND_i32:
> +  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
> +  default: llvm_unreachable("internal error");
> +  }
> +  return -1;
> +}
> +
> +int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
> +  switch(OldOpcode) {
> +  case AMDGPU::JUMP_COND:
> +  case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
> +  case AMDGPU::BRANCH_COND_i32:
> +  case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
> +  default: llvm_unreachable("internal error");
> +  }
> +  return -1;
> +}
> +
> +int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
> +  switch(OldOpcode) {
> +  case AMDGPU::JUMP_COND:
> +  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
> +  default: llvm_unreachable("internal error");
> +  };
> +  return -1;
> +}
> +
> +int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
> +  switch(OldOpcode) {
> +  case AMDGPU::JUMP_COND:
> +  case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
> +  default: llvm_unreachable("internal error");
> +  }
> +  return -1;
> +}
> +
> +MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
> +  return MI->getOperand(0).getMBB();
> +}
> +
> +void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
> +    MachineBasicBlock *MBB) {
> +  MI->getOperand(0).setMBB(MBB);
> +}
> +
> +MachineBasicBlock *
> +AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
> +    MachineInstr *MI) {
> +  assert(MBB->succ_size() == 2);
> +  MachineBasicBlock *TrueBranch = getTrueBranch(MI);
> +  MachineBasicBlock::succ_iterator It = MBB->succ_begin();
> +  MachineBasicBlock::succ_iterator Next = It;
> +  ++Next;
> +  return (*It == TrueBranch) ? *Next : *It;
> +}
> +
> +bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
> +  switch (MI->getOpcode()) {
> +    case AMDGPU::JUMP_COND:
> +    case AMDGPU::BRANCH_COND_i32:
> +    case AMDGPU::BRANCH_COND_f32: return true;
> +  default:
> +    return false;
> +  }
> +  return false;
> +}
> +
> +bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
> +  switch (MI->getOpcode()) {
> +  case AMDGPU::JUMP:
> +  case AMDGPU::BRANCH:
> +    return true;
> +  default:
> +    return false;
> +  }
> +  return false;
> +}
> +
> +DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
> +  //get DebugLoc from the first MachineBasicBlock instruction with debug info
> +  DebugLoc DL;
> +  for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
> +      ++It) {
> +    MachineInstr *instr = &(*It);
> +    if (instr->getDebugLoc().isUnknown() == false)
> +      DL = instr->getDebugLoc();
> +  }
> +  return DL;
> +}
> +
> +MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
> +    MachineBasicBlock *MBB) {
> +  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
> +  MachineInstr *MI = &*It;
> +  if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
> +    return MI;
> +  return NULL;
> +}
> +
> +MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
> +    MachineBasicBlock *MBB) {
> +  for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
> +      It != E; ++It) {
> +    // FIXME: Simplify
> +    MachineInstr *MI = &*It;
> +    if (MI) {
> +      if (isCondBranch(MI) || isUncondBranch(MI))
> +        return MI;
> +      else if (!TII->isMov(MI->getOpcode()))
> +        break;
>      }
> -    dbgs() << "Ordered blocks:\n";
> -    printOrderedBlocks(dbgs());
> -  );
> +  }
> +  return NULL;
> +}
>  
> -  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
> -
> -  for (typename LoopInfoT::iterator iter = loopInfo->begin(),
> -       iterEnd = loopInfo->end();
> -       iter != iterEnd; ++iter) {
> -    LoopT* loopRep = (*iter);
> -    BlockTSmallerVector exitingBlks;
> -    loopRep->getExitingBlocks(exitingBlks);
> -    
> -    if (exitingBlks.size() == 0) {
> -      BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
> -      if (dummyExitBlk != NULL)
> -        retBlks.push_back(dummyExitBlk);
> +MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
> +  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
> +  if (It != MBB->rend()) {
> +    MachineInstr *instr = &(*It);
> +    if (instr->getOpcode() == AMDGPU::RETURN)
> +      return instr;
> +  }
> +  return NULL;
> +}
> +
> +MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) {
> +  MachineBasicBlock::reverse_iterator It = MBB->rbegin();
> +  if (It != MBB->rend()) {
> +    MachineInstr *MI = &(*It);
> +    if (MI->getOpcode() == AMDGPU::CONTINUE)
> +      return MI;
> +  }
> +  return NULL;
> +}
> +
> +MachineInstr *AMDGPUCFGStructurizer::getLoopBreakInstr(MachineBasicBlock *MBB) {
> +  for (MachineBasicBlock::iterator It = MBB->begin(); (It != MBB->end());
> +      ++It) {
> +    MachineInstr *MI = &(*It);
> +    if (MI->getOpcode() == AMDGPU::PREDICATED_BREAK)
> +      return MI;
> +  }
> +  return NULL;
> +}
> +
> +bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
> +  MachineInstr *MI = getReturnInstr(MBB);
> +  bool IsReturn = (MBB->succ_size() == 0);
> +  if (MI)
> +    assert(IsReturn);
> +  else if (IsReturn)
> +    DEBUG(
> +      dbgs() << "BB" << MBB->getNumber()
> +             <<" is return block without RETURN instr\n";);
> +  return  IsReturn;
> +}
> +
> +void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
> +    MachineBasicBlock *SrcMBB) {
> +  for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
> +       iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
> +    DstMBB->addSuccessor(*It);  // *iter's predecessor is also taken care of
> +}
> +
> +MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
> +  MachineFunction *Func = MBB->getParent();
> +  MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
> +  Func->push_back(NewMBB);  //insert to function
> +  for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end();
> +      It != E; ++It) {
> +    MachineInstr *MI = Func->CloneMachineInstr(It);
> +    NewMBB->push_back(MI);
> +  }
> +  return NewMBB;
> +}
> +
> +void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
> +    MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
> +    MachineBasicBlock *NewBlk) {
> +  MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
> +  if (BranchMI && isCondBranch(BranchMI) &&
> +      getTrueBranch(BranchMI) == OldMBB)
> +    setTrueBranch(BranchMI, NewBlk);
> +}
> +
> +void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
> +  assert((!MBB->getParent()->getJumpTableInfo()
> +          || MBB->getParent()->getJumpTableInfo()->isEmpty())
> +         && "found a jump table");
> +
> +   //collect continue right before endloop
> +   SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
> +   MachineBasicBlock::iterator Pre = MBB->begin();
> +   MachineBasicBlock::iterator E = MBB->end();
> +   MachineBasicBlock::iterator It = Pre;
> +   while (It != E) {
> +     if (Pre->getOpcode() == AMDGPU::CONTINUE
> +         && It->getOpcode() == AMDGPU::ENDLOOP)
> +       ContInstr.push_back(Pre);
> +     Pre = It;
> +     ++It;
> +   }
> +
> +   //delete continue right before endloop
> +   for (unsigned i = 0; i < ContInstr.size(); ++i)
> +      ContInstr[i]->eraseFromParent();
> +
> +   // TODO to fix up jump table so later phase won't be confused.  if
> +   // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
> +   // there isn't such an interface yet.  alternatively, replace all the other
> +   // blocks in the jump table with the entryBlk //}
> +
> +}
> +
> +
> +bool AMDGPUCFGStructurizer::prepare() {
> +  bool Changed = false;
> +
> +  //FIXME: if not reducible flow graph, make it so ???
> +
> +  DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
> +
> +  orderBlocks(FuncRep);
> +
> +  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
> +
> +  // Add an ExitBlk to loop that don't have one
> +  for (MachineLoopInfo::iterator It = MLI->begin(),
> +       E = MLI->end(); It != E; ++It) {
> +    MachineLoop *LoopRep = (*It);
> +    MBBVector ExitingMBBs;
> +    LoopRep->getExitingBlocks(ExitingMBBs);
> +
> +    if (ExitingMBBs.size() == 0) {
> +      MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
> +      if (DummyExitBlk)
> +        RetBlks.push_back(DummyExitBlk);
>      }
>    }
>  
>    // Remove unconditional branch instr.
>    // Add dummy exit block iff there are multiple returns.
> -
> -  for (typename SmallVectorImpl<BlockT *>::const_iterator
> -       iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
> -       iterBlk != iterEndBlk;
> -       ++iterBlk) {
> -    BlockT *curBlk = *iterBlk;
> -    removeUnconditionalBranch(curBlk);
> -    removeRedundantConditionalBranch(curBlk);
> -    if (CFGTraits::isReturnBlock(curBlk)) {
> -      retBlks.push_back(curBlk);
> +  for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
> +       It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
> +    MachineBasicBlock *MBB = *It;
> +    removeUnconditionalBranch(MBB);
> +    removeRedundantConditionalBranch(MBB);
> +    if (isReturnBlock(MBB)) {
> +      RetBlks.push_back(MBB);
>      }
> -    assert(curBlk->succ_size() <= 2);
> -  } //for
> +    assert(MBB->succ_size() <= 2);
> +  }
>  
> -  if (retBlks.size() >= 2) {
> -    addDummyExitBlock(retBlks);
> -    changed = true;
> +  if (RetBlks.size() >= 2) {
> +    addDummyExitBlock(RetBlks);
> +    Changed = true;
>    }
>  
> -  return changed;
> -} //CFGStructurizer::prepare
> +  return Changed;
> +}
>  
> -template<class PassT>
> -bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
> -    const AMDGPURegisterInfo * tri) {
> -  passRep = &pass;
> -  funcRep = &func;
> -  TRI = tri;
> +bool AMDGPUCFGStructurizer::run() {
>  
>    //Assume reducible CFG...
> -  DEBUG(
> -    dbgs() << "AMDGPUCFGStructurizer::run\n";
> -    func.viewCFG();
> -  );
> -
> -  domTree = CFGTraits::getDominatorTree(pass);
> -  DEBUG(
> -    domTree->print(dbgs(), (const llvm::Module*)0);
> -  );
> -
> -  postDomTree = CFGTraits::getPostDominatorTree(pass);
> -  DEBUG(
> -    postDomTree->print(dbgs());
> -  );
> -
> -  loopInfo = CFGTraits::getLoopInfo(pass);
> -  DEBUG(
> -    dbgs() << "LoopInfo:\n";
> -    PrintLoopinfo(*loopInfo, dbgs());
> -  );
> +  DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n";FuncRep->viewCFG(););
>  
> -  orderBlocks();
>  #ifdef STRESSTEST
>    //Use the worse block ordering to test the algorithm.
>    ReverseVector(orderedBlks);
>  #endif
>  
> -  DEBUG(
> -    dbgs() << "Ordered blocks:\n";
> -    printOrderedBlocks(dbgs());
> -  );
> -  int numIter = 0;
> -  bool finish = false;
> -  BlockT *curBlk;
> -  bool makeProgress = false;
> -  int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
> -                                        orderedBlks.end());
> +  DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
> +  int NumIter = 0;
> +  bool Finish = false;
> +  MachineBasicBlock *MBB;
> +  bool MakeProgress = false;
> +  int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
> +                                        OrderedBlks.end());
>  
>    do {
> -    ++numIter;
> +    ++NumIter;
>      DEBUG(
> -      dbgs() << "numIter = " << numIter
> -             << ", numRemaintedBlk = " << numRemainedBlk << "\n";
> +      dbgs() << "numIter = " << NumIter
> +             << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
>      );
>  
> -    typename SmallVectorImpl<BlockT *>::const_iterator
> -      iterBlk = orderedBlks.begin();
> -    typename SmallVectorImpl<BlockT *>::const_iterator
> -      iterBlkEnd = orderedBlks.end();
> +    SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
> +        OrderedBlks.begin();
> +    SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
> +        OrderedBlks.end();
>  
> -    typename SmallVectorImpl<BlockT *>::const_iterator
> -      sccBeginIter = iterBlk;
> -    BlockT *sccBeginBlk = NULL;
> -    int sccNumBlk = 0;  // The number of active blocks, init to a
> +    SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
> +        It;
> +    MachineBasicBlock *SccBeginMBB = NULL;
> +    int SccNumBlk = 0;  // The number of active blocks, init to a
>                          // maximum possible number.
> -    int sccNumIter;     // Number of iteration in this SCC.
> +    int SccNumIter;     // Number of iteration in this SCC.
>  
> -    while (iterBlk != iterBlkEnd) {
> -      curBlk = *iterBlk;
> +    while (It != E) {
> +      MBB = *It;
>  
> -      if (sccBeginBlk == NULL) {
> -        sccBeginIter = iterBlk;
> -        sccBeginBlk = curBlk;
> -        sccNumIter = 0;
> -        sccNumBlk = numRemainedBlk; // Init to maximum possible number.
> +      if (!SccBeginMBB) {
> +        SccBeginIter = It;
> +        SccBeginMBB = MBB;
> +        SccNumIter = 0;
> +        SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
>          DEBUG(
> -              dbgs() << "start processing SCC" << getSCCNum(sccBeginBlk);
> +              dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
>                dbgs() << "\n";
>          );
>        }
>  
> -      if (!isRetiredBlock(curBlk)) {
> -        patternMatch(curBlk);
> -      }
> +      if (!isRetiredBlock(MBB))
> +        patternMatch(MBB);
>  
> -      ++iterBlk;
> +      ++It;
>  
> -      bool contNextScc = true;
> -      if (iterBlk == iterBlkEnd
> -          || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
> +      bool ContNextScc = true;
> +      if (It == E
> +          || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
>          // Just finish one scc.
> -        ++sccNumIter;
> -        int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
> -        if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
> +        ++SccNumIter;
> +        int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
> +        if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
>            DEBUG(
> -            dbgs() << "Can't reduce SCC " << getSCCNum(curBlk)
> -                   << ", sccNumIter = " << sccNumIter;
> +            dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
> +                   << ", sccNumIter = " << SccNumIter;
>              dbgs() << "doesn't make any progress\n";
>            );
> -          contNextScc = true;
> -        } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
> -          sccNumBlk = sccRemainedNumBlk;
> -          iterBlk = sccBeginIter;
> -          contNextScc = false;
> +          ContNextScc = true;
> +        } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
> +          SccNumBlk = sccRemainedNumBlk;
> +          It = SccBeginIter;
> +          ContNextScc = false;
>            DEBUG(
> -            dbgs() << "repeat processing SCC" << getSCCNum(curBlk)
> -                   << "sccNumIter = " << sccNumIter << "\n";
> -            func.viewCFG();
> +            dbgs() << "repeat processing SCC" << getSCCNum(MBB)
> +                   << "sccNumIter = " << SccNumIter << "\n";
> +            FuncRep->viewCFG();
>            );
>          } else {
>            // Finish the current scc.
> -          contNextScc = true;
> +          ContNextScc = true;
>          }
>        } else {
>          // Continue on next component in the current scc.
> -        contNextScc = false;
> +        ContNextScc = false;
>        }
>  
> -      if (contNextScc) {
> -        sccBeginBlk = NULL;
> -      }
> +      if (ContNextScc)
> +        SccBeginMBB = NULL;
>      } //while, "one iteration" over the function.
>  
> -    BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
> -    if (entryBlk->succ_size() == 0) {
> -      finish = true;
> +    MachineBasicBlock *EntryMBB =
> +        GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
> +    if (EntryMBB->succ_size() == 0) {
> +      Finish = true;
>        DEBUG(
>          dbgs() << "Reduce to one block\n";
>        );
>      } else {
> -      int newnumRemainedBlk
> -        = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
> +      int NewnumRemainedBlk
> +        = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
>        // consider cloned blocks ??
> -      if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
> -        makeProgress = true;
> -        numRemainedBlk = newnumRemainedBlk;
> +      if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
> +        MakeProgress = true;
> +        NumRemainedBlk = NewnumRemainedBlk;
>        } else {
> -        makeProgress = false;
> +        MakeProgress = false;
>          DEBUG(
>            dbgs() << "No progress\n";
>          );
>        }
>      }
> -  } while (!finish && makeProgress);
> +  } while (!Finish && MakeProgress);
>  
>    // Misc wrap up to maintain the consistency of the Function representation.
> -  CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
> +  wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
>  
>    // Detach retired Block, release memory.
> -  for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
> -       iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
> -    if ((*iterMap).second && (*iterMap).second->isRetired) {
> -      assert(((*iterMap).first)->getNumber() != -1);
> +  for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
> +      It != E; ++It) {
> +    if ((*It).second && (*It).second->IsRetired) {
> +      assert(((*It).first)->getNumber() != -1);
>        DEBUG(
> -        dbgs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
> +        dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
>        );
> -      (*iterMap).first->eraseFromParent();  //Remove from the parent Function.
> +      (*It).first->eraseFromParent();  //Remove from the parent Function.
>      }
> -    delete (*iterMap).second;
> +    delete (*It).second;
>    }
> -  blockInfoMap.clear();
> -
> -  // clear loopLandInfoMap
> -  for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
> -       iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
> -    delete (*iterMap).second;
> -  }
> -  loopLandInfoMap.clear();
> +  BlockInfoMap.clear();
> +  LLInfoMap.clear();
>  
>    DEBUG(
> -    func.viewCFG();
> +    FuncRep->viewCFG();
>    );
>  
> -  if (!finish) {
> +  if (!Finish)
>      llvm_unreachable("IRREDUCIBL_CF");
> -  }
>  
>    return true;
> -} //CFGStructurizer::run
> -
> -/// Print the ordered Blocks.
> -///
> -template<class PassT>
> -void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
> -  size_t i = 0;
> -  for (typename SmallVectorImpl<BlockT *>::const_iterator
> -      iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
> -       iterBlk != iterBlkEnd;
> -       ++iterBlk, ++i) {
> -    os << "BB" << (*iterBlk)->getNumber();
> -    os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
> -    if (i != 0 && i % 10 == 0) {
> -      os << "\n";
> -    } else {
> -      os << " ";
> -    }
> -  }
> -} //printOrderedBlocks
> -
> -/// Compute the reversed DFS post order of Blocks
> -///
> -template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
> -  int sccNum = 0;
> -  BlockT *bb;
> -  for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
> -       sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
> -    std::vector<BlockT *> &sccNext = *sccIter;
> -    for (typename std::vector<BlockT *>::const_iterator
> -         blockIter = sccNext.begin(), blockEnd = sccNext.end();
> +}
> +
> +
> +
> +void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
> +  int SccNum = 0;
> +  MachineBasicBlock *MBB;
> +  for (scc_iterator<MachineFunction *> It = scc_begin(MF), E = scc_end(MF);
> +      It != E; ++It, ++SccNum) {
> +    std::vector<MachineBasicBlock *> &SccNext = *It;
> +    for (std::vector<MachineBasicBlock *>::const_iterator
> +         blockIter = SccNext.begin(), blockEnd = SccNext.end();
>           blockIter != blockEnd; ++blockIter) {
> -      bb = *blockIter;
> -      orderedBlks.push_back(bb);
> -      recordSccnum(bb, sccNum);
> +      MBB = *blockIter;
> +      OrderedBlks.push_back(MBB);
> +      recordSccnum(MBB, SccNum);
>      }
>    }
>  
>    //walk through all the block in func to check for unreachable
> -  for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
> -       blockEnd1 = FuncGTraits::nodes_end(funcRep);
> -       blockIter1 != blockEnd1; ++blockIter1) {
> -    BlockT *bb = &(*blockIter1);
> -    sccNum = getSCCNum(bb);
> -    if (sccNum == INVALIDSCCNUM) {
> -      dbgs() << "unreachable block BB" << bb->getNumber() << "\n";
> -    }
> +  typedef GraphTraits<MachineFunction *> GTM;
> +  MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
> +  for (; It != E; ++It) {
> +    MachineBasicBlock *MBB = &(*It);
> +    SccNum = getSCCNum(MBB);
> +    if (SccNum == INVALIDSCCNUM)
> +      dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
>    }
> -} //orderBlocks
> +}
>  
> -template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
> -  int numMatch = 0;
> -  int curMatch;
> +int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
> +  int NumMatch = 0;
> +  int CurMatch;
>  
>    DEBUG(
> -        dbgs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
> +        dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
>    );
>  
> -  while ((curMatch = patternMatchGroup(curBlk)) > 0) {
> -    numMatch += curMatch;
> -  }
> +  while ((CurMatch = patternMatchGroup(MBB)) > 0)
> +    NumMatch += CurMatch;
>  
>    DEBUG(
> -        dbgs() << "End patternMatch BB" << curBlk->getNumber()
> -      << ", numMatch = " << numMatch << "\n";
> +        dbgs() << "End patternMatch BB" << MBB->getNumber()
> +      << ", numMatch = " << NumMatch << "\n";
>    );
>  
> -  return numMatch;
> -} //patternMatch
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
> -  int numMatch = 0;
> -  numMatch += serialPatternMatch(curBlk);
> -  numMatch += ifPatternMatch(curBlk);
> -  numMatch += loopendPatternMatch(curBlk);
> -  numMatch += loopPatternMatch(curBlk);
> -  return numMatch;
> -}//patternMatchGroup
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
> -  if (curBlk->succ_size() != 1) {
> +  return NumMatch;
> +}
> +
> +int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
> +  int NumMatch = 0;
> +  NumMatch += loopendPatternMatch();
> +  NumMatch += serialPatternMatch(MBB);
> +  NumMatch += ifPatternMatch(MBB);
> +  return NumMatch;
> +}
> +
> +
> +int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
> +  if (MBB->succ_size() != 1)
>      return 0;
> -  }
>  
> -  BlockT *childBlk = *curBlk->succ_begin();
> -  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
> +  MachineBasicBlock *childBlk = *MBB->succ_begin();
> +  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
>      return 0;
> -  }
>  
> -  mergeSerialBlock(curBlk, childBlk);
> +  mergeSerialBlock(MBB, childBlk);
>    ++numSerialPatternMatch;
>    return 1;
> -} //serialPatternMatch
> +}
>  
> -template<class PassT>
> -int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
> +int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
>    //two edges
> -  if (curBlk->succ_size() != 2) {
> +  if (MBB->succ_size() != 2)
>      return 0;
> -  }
> -
> -  if (hasBackEdge(curBlk)) {
> +  if (hasBackEdge(MBB))
>      return 0;
> -  }
> -
> -  InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
> -  if (branchInstr == NULL) {
> +  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
> +  if (!BranchMI)
>      return 0;
> -  }
>  
> -  assert(CFGTraits::isCondBranch(branchInstr));
> +  assert(isCondBranch(BranchMI));
>  
> -  BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
> -  BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
> -  BlockT *landBlk;
> -  int cloned = 0;
> +  MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
> +  serialPatternMatch(TrueMBB);
> +  ifPatternMatch(TrueMBB);
> +  MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
> +  serialPatternMatch(FalseMBB);
> +  ifPatternMatch(FalseMBB);
> +  MachineBasicBlock *LandBlk;
> +  int Cloned = 0;
>  
> +  assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
>    // TODO: Simplify
> -  if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
> -    && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
> -    landBlk = *trueBlk->succ_begin();
> -  } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
> -    landBlk = NULL;
> -  } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
> -    landBlk = falseBlk;
> -    falseBlk = NULL;
> -  } else if (falseBlk->succ_size() == 1
> -             && *falseBlk->succ_begin() == trueBlk) {
> -    landBlk = trueBlk;
> -    trueBlk = NULL;
> -  } else if (falseBlk->succ_size() == 1
> -             && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
> -    landBlk = *falseBlk->succ_begin();
> -  } else if (trueBlk->succ_size() == 1
> -    && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
> -    landBlk = *trueBlk->succ_begin();
> +  if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
> +    && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
> +    // Diamond pattern
> +    LandBlk = *TrueMBB->succ_begin();
> +  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
> +    // Triangle pattern, false is empty
> +    LandBlk = FalseMBB;
> +    FalseMBB = NULL;
> +  } else if (FalseMBB->succ_size() == 1
> +             && *FalseMBB->succ_begin() == TrueMBB) {
> +    // Triangle pattern, true is empty
> +    LandBlk = TrueMBB;
> +    TrueMBB = NULL;
> +  } else if (FalseMBB->succ_size() == 1
> +             && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
> +    LandBlk = *FalseMBB->succ_begin();
> +  } else if (TrueMBB->succ_size() == 1
> +    && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
> +    LandBlk = *TrueMBB->succ_begin();
>    } else {
> -    return handleJumpintoIf(curBlk, trueBlk, falseBlk);
> +    return handleJumpintoIf(MBB, TrueMBB, FalseMBB);
>    }
>  
>    // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
>    // new BB created for landBlk==NULL may introduce new challenge to the
>    // reduction process.
> -  if (landBlk != NULL &&
> -      ((trueBlk && trueBlk->pred_size() > 1)
> -      || (falseBlk && falseBlk->pred_size() > 1))) {
> -     cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
> +  if (LandBlk &&
> +      ((TrueMBB && TrueMBB->pred_size() > 1)
> +      || (FalseMBB && FalseMBB->pred_size() > 1))) {
> +     Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
>    }
>  
> -  if (trueBlk && trueBlk->pred_size() > 1) {
> -    trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
> -    ++cloned;
> +  if (TrueMBB && TrueMBB->pred_size() > 1) {
> +    TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
> +    ++Cloned;
>    }
>  
> -  if (falseBlk && falseBlk->pred_size() > 1) {
> -    falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
> -    ++cloned;
> +  if (FalseMBB && FalseMBB->pred_size() > 1) {
> +    FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
> +    ++Cloned;
>    }
>  
> -  mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
> +  mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
>  
>    ++numIfPatternMatch;
>  
> -  numClonedBlock += cloned;
> +  numClonedBlock += Cloned;
>  
> -  return 1 + cloned;
> -} //ifPatternMatch
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
> -  return 0;
> -} //switchPatternMatch
> +  return 1 + Cloned;
> +}
>  
> -template<class PassT>
> -int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
> -  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
> -  typename std::vector<LoopT *> nestedLoops;
> -  while (loopRep) {
> -    nestedLoops.push_back(loopRep);
> -    loopRep = loopRep->getParentLoop();
> +int AMDGPUCFGStructurizer::loopendPatternMatch() {
> +  std::vector<MachineLoop *> NestedLoops;
> +  for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end();
> +      It != E; ++It) {
> +    df_iterator<MachineLoop *> LpIt = df_begin(*It),
> +        LpE = df_end(*It);
> +    for (; LpIt != LpE; ++LpIt)
> +      NestedLoops.push_back(*LpIt);
>    }
> -
> -  if (nestedLoops.size() == 0) {
> +  if (NestedLoops.size() == 0)
>      return 0;
> -  }
>  
>    // Process nested loop outside->inside, so "continue" to a outside loop won't
>    // be mistaken as "break" of the current loop.
> -  int num = 0;
> -  for (typename std::vector<LoopT *>::reverse_iterator
> -       iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
> -       iter != iterEnd; ++iter) {
> -    loopRep = *iter;
> -
> -    if (getLoopLandBlock(loopRep) != NULL) {
> +  int Num = 0;
> +  for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(),
> +      E = NestedLoops.rend(); It != E; ++It) {
> +    MachineLoop *ExaminedLoop = *It;
> +    if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
>        continue;
> -    }
> -
> -    BlockT *loopHeader = loopRep->getHeader();
> -
> -    int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
> -
> -    if (numBreak == -1) {
> +    DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
> +    int NumBreak = mergeLoop(ExaminedLoop);
> +    if (NumBreak == -1)
>        break;
> -    }
> -
> -    int numCont = loopcontPatternMatch(loopRep, loopHeader);
> -    num += numBreak + numCont;
> +    Num += NumBreak;
>    }
> +  return Num;
> +}
>  
> -  return num;
> -} //loopendPatternMatch
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
> -  if (curBlk->succ_size() != 0) {
> -    return 0;
> -  }
> +int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
> +  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
> +  MBBVector ExitingMBBs;
> +  LoopRep->getExitingBlocks(ExitingMBBs);
> +  assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
> +  DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
> +  // We assume a single ExitBlk
> +  MBBVector ExitBlks;
> +  LoopRep->getExitBlocks(ExitBlks);
> +  SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
> +  for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
> +    ExitBlkSet.insert(ExitBlks[i]);
> +  assert(ExitBlkSet.size() == 1);
> +  MachineBasicBlock *ExitBlk = *ExitBlks.begin();
> +  assert(ExitBlk && "Loop has several exit block");
> +  MBBVector LatchBlks;
> +  typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
> +  InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
> +      PE = InvMBBTraits::child_end(LoopHeader);
> +  for (; PI != PE; PI++) {
> +    if (LoopRep->contains(*PI))
> +      LatchBlks.push_back(*PI);
> +  }
> +
> +  for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
> +    mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
> +  for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
> +    settleLoopcontBlock(LatchBlks[i], LoopHeader);
> +  int Match = 0;
> +  do {
> +    Match = 0;
> +    Match += serialPatternMatch(LoopHeader);
> +    Match += ifPatternMatch(LoopHeader);
> +  } while (Match > 0);
> +  mergeLooplandBlock(LoopHeader, ExitBlk);
> +  MachineLoop *ParentLoop = LoopRep->getParentLoop();
> +  if (ParentLoop)
> +    MLI->changeLoopFor(LoopHeader, ParentLoop);
> +  else
> +    MLI->removeBlock(LoopHeader);
> +  Visited[LoopRep] = true;
> +  return 1;
> +}
>  
> -  int numLoop = 0;
> -  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
> -  while (loopRep && loopRep->getHeader() == curBlk) {
> -    LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
> -    if (loopLand) {
> -      BlockT *landBlk = loopLand->landBlk;
> -      assert(landBlk);
> -      if (!isRetiredBlock(landBlk)) {
> -        mergeLooplandBlock(curBlk, loopLand);
> -        ++numLoop;
> -      }
> +int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
> +    MachineBasicBlock *LoopHeader) {
> +  int NumCont = 0;
> +  SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> ContMBB;
> +  typedef GraphTraits<Inverse<MachineBasicBlock *> > GTIM;
> +  GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader),
> +      E = GTIM::child_end(LoopHeader);
> +  for (; It != E; ++It) {
> +    MachineBasicBlock *MBB = *It;
> +    if (LoopRep->contains(MBB)) {
> +      handleLoopcontBlock(MBB, MLI->getLoopFor(MBB),
> +                          LoopHeader, LoopRep);
> +      ContMBB.push_back(MBB);
> +      ++NumCont;
>      }
> -    loopRep = loopRep->getParentLoop();
>    }
>  
> -  numLoopPatternMatch += numLoop;
> +  for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(),
> +      E = ContMBB.end(); It != E; ++It) {
> +    (*It)->removeSuccessor(LoopHeader);
> +  }
>  
> -  return numLoop;
> -} //loopPatternMatch
> +  numLoopcontPatternMatch += NumCont;
>  
> -template<class PassT>
> -int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
> -                                                  BlockT *loopHeader) {
> -  BlockTSmallerVector exitingBlks;
> -  loopRep->getExitingBlocks(exitingBlks);
> +  return NumCont;
> +}
>  
> -  DEBUG(
> -    dbgs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
> -  );
>  
> -  if (exitingBlks.size() == 0) {
> -    setLoopLandBlock(loopRep);
> -    return 0;
> +bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
> +    MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
> +  if (Src1MBB->succ_size() == 0) {
> +    MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
> +    if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
> +      MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
> +      if (TheEntry) {
> +        DEBUG(
> +          dbgs() << "isLoopContBreakBlock yes src1 = BB"
> +                 << Src1MBB->getNumber()
> +                 << " src2 = BB" << Src2MBB->getNumber() << "\n";
> +        );
> +        return true;
> +      }
> +    }
>    }
> +  return false;
> +}
>  
> -  // Compute the corresponding exitBlks and exit block set.
> -  BlockTSmallerVector exitBlks;
> -  std::set<BlockT *> exitBlkSet;
> -  for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
> -       iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
> -    BlockT *exitingBlk = *iter;
> -    BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
> -    exitBlks.push_back(exitBlk);
> -    exitBlkSet.insert(exitBlk);  //non-duplicate insert
> +int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
> +    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
> +  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
> +  if (Num == 0) {
> +    DEBUG(
> +      dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
> +    );
> +    Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
>    }
> +  return Num;
> +}
>  
> -  assert(exitBlkSet.size() > 0);
> -  assert(exitBlks.size() == exitingBlks.size());
> +int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
> +    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
> +  int Num = 0;
> +  MachineBasicBlock *DownBlk;
> +
> +  //trueBlk could be the common post dominator
> +  DownBlk = TrueMBB;
>  
>    DEBUG(
> -    dbgs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
> +    dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
> +           << " true = BB" << TrueMBB->getNumber()
> +           << ", numSucc=" << TrueMBB->succ_size()
> +           << " false = BB" << FalseMBB->getNumber() << "\n";
>    );
>  
> -  // Find exitLandBlk.
> -  BlockT *exitLandBlk = NULL;
> -  int numCloned = 0;
> -  int numSerial = 0;
> -
> -  if (exitBlkSet.size() == 1) {
> -    exitLandBlk = *exitBlkSet.begin();
> -  } else {
> -    exitLandBlk = findNearestCommonPostDom(exitBlkSet);
> -
> -    if (exitLandBlk == NULL) {
> -      return -1;
> -    }
> -
> -    bool allInPath = true;
> -    bool allNotInPath = true;
> -    for (typename std::set<BlockT*>::const_iterator
> -         iter = exitBlkSet.begin(),
> -         iterEnd = exitBlkSet.end();
> -         iter != iterEnd; ++iter) {
> -      BlockT *exitBlk = *iter;
> +  while (DownBlk) {
> +    DEBUG(
> +      dbgs() << "check down = BB" << DownBlk->getNumber();
> +    );
>  
> -      PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
> +    if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
>        DEBUG(
> -        dbgs() << "BB" << exitBlk->getNumber()
> -               << " to BB" << exitLandBlk->getNumber() << " PathToKind="
> -               << pathKind << "\n";
> +        dbgs() << " working\n";
>        );
>  
> -      allInPath = allInPath && (pathKind == SinglePath_InPath);
> -      allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
> -
> -      if (!allInPath && !allNotInPath) {
> -        DEBUG(
> -              dbgs() << "singlePath check fail\n";
> -        );
> -        return -1;
> -      }
> -    } // check all exit blocks
> +      Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
> +      Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
>  
> -    if (allNotInPath) {
> -
> -      // TODO: Simplify, maybe separate function?
> -      LoopT *parentLoopRep = loopRep->getParentLoop();
> -      BlockT *parentLoopHeader = NULL;
> -      if (parentLoopRep)
> -        parentLoopHeader = parentLoopRep->getHeader();
> -
> -      if (exitLandBlk == parentLoopHeader &&
> -          (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
> -                                               loopRep,
> -                                               exitBlkSet,
> -                                               exitLandBlk)) != NULL) {
> -        DEBUG(
> -          dbgs() << "relocateLoopcontBlock success\n";
> -        );
> -      } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
> -                                                      exitingBlks,
> -                                                      exitBlks)) != NULL) {
> -        DEBUG(
> -          dbgs() << "insertEndbranchBlock success\n";
> -        );
> -      } else {
> -        DEBUG(
> -          dbgs() << "loop exit fail\n";
> -        );
> -        return -1;
> -      }
> -    }
> -
> -    // Handle side entry to exit path.
> -    exitBlks.clear();
> -    exitBlkSet.clear();
> -    for (typename BlockTSmallerVector::iterator iterExiting =
> -           exitingBlks.begin(),
> -         iterExitingEnd = exitingBlks.end();
> -         iterExiting != iterExitingEnd; ++iterExiting) {
> -      BlockT *exitingBlk = *iterExiting;
> -      BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
> -      BlockT *newExitBlk = exitBlk;
> -
> -      if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
> -        newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
> -        ++numCloned;
> -      }
> -
> -      numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
> -
> -      exitBlks.push_back(newExitBlk);
> -      exitBlkSet.insert(newExitBlk);
> -    }
> -
> -    for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
> -         iterExitEnd = exitBlks.end();
> -         iterExit != iterExitEnd; ++iterExit) {
> -      BlockT *exitBlk = *iterExit;
> -      numSerial += serialPatternMatch(exitBlk);
> -    }
> -
> -    for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
> -         iterExitEnd = exitBlks.end();
> -         iterExit != iterExitEnd; ++iterExit) {
> -      BlockT *exitBlk = *iterExit;
> -      if (exitBlk->pred_size() > 1) {
> -        if (exitBlk != exitLandBlk) {
> -          return -1;
> -        }
> -      } else {
> -        if (exitBlk != exitLandBlk &&
> -            (exitBlk->succ_size() != 1 ||
> -            *exitBlk->succ_begin() != exitLandBlk)) {
> -          return -1;
> -        }
> -      }
> -    }
> -  } // else
> -
> -  exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
> -
> -  // Fold break into the breaking block. Leverage across level breaks.
> -  assert(exitingBlks.size() == exitBlks.size());
> -  for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
> -       iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
> -       iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
> -    BlockT *exitBlk = *iterExit;
> -    BlockT *exitingBlk = *iterExiting;
> -    assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
> -    LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
> -    handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
> -  }
> -
> -  int numBreak = static_cast<int>(exitingBlks.size());
> -  numLoopbreakPatternMatch += numBreak;
> -  numClonedBlock += numCloned;
> -  return numBreak + numSerial + numCloned;
> -} //loopbreakPatternMatch
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
> -                                                 BlockT *loopHeader) {
> -  int numCont = 0;
> -  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
> -  for (typename InvBlockGTraits::ChildIteratorType iter =
> -       InvBlockGTraits::child_begin(loopHeader),
> -       iterEnd = InvBlockGTraits::child_end(loopHeader);
> -       iter != iterEnd; ++iter) {
> -    BlockT *curBlk = *iter;
> -    if (loopRep->contains(curBlk)) {
> -      handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
> -                          loopHeader, loopRep);
> -      contBlk.push_back(curBlk);
> -      ++numCont;
> -    }
> -  }
> -
> -  for (typename SmallVectorImpl<BlockT *>::iterator
> -       iter = contBlk.begin(), iterEnd = contBlk.end();
> -       iter != iterEnd; ++iter) {
> -    (*iter)->removeSuccessor(loopHeader);
> -  }
> -
> -  numLoopcontPatternMatch += numCont;
> -
> -  return numCont;
> -} //loopcontPatternMatch
> -
> -
> -template<class PassT>
> -bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
> -                                                         BlockT *src2Blk) {
> -  // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
> -  // same loop with LoopLandInfo without explicitly keeping track of
> -  // loopContBlks and loopBreakBlks, this is a method to get the information.
> -  //
> -  if (src1Blk->succ_size() == 0) {
> -    LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
> -    if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
> -      LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -      if (theEntry != NULL) {
> -        DEBUG(
> -          dbgs() << "isLoopContBreakBlock yes src1 = BB"
> -                 << src1Blk->getNumber()
> -                 << " src2 = BB" << src2Blk->getNumber() << "\n";
> -        );
> -        return true;
> -      }
> -    }
> -  }
> -  return false;
> -}  //isSameloopDetachedContbreak
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
> -                                             BlockT *trueBlk,
> -                                             BlockT *falseBlk) {
> -  int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
> -  if (num == 0) {
> -    DEBUG(
> -      dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
> -    );
> -    num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
> -  }
> -  return num;
> -}
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
> -                                                BlockT *trueBlk,
> -                                                BlockT *falseBlk) {
> -  int num = 0;
> -  BlockT *downBlk;
> -
> -  //trueBlk could be the common post dominator
> -  downBlk = trueBlk;
> -
> -  DEBUG(
> -    dbgs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
> -           << " true = BB" << trueBlk->getNumber()
> -           << ", numSucc=" << trueBlk->succ_size()
> -           << " false = BB" << falseBlk->getNumber() << "\n";
> -  );
> -
> -  while (downBlk) {
> -    DEBUG(
> -      dbgs() << "check down = BB" << downBlk->getNumber();
> -    );
> -
> -    if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
> -      DEBUG(
> -        dbgs() << " working\n";
> -      );
> -
> -      num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
> -      num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
> -
> -      numClonedBlock += num;
> -      num += serialPatternMatch(*headBlk->succ_begin());
> -      num += serialPatternMatch(*(++headBlk->succ_begin()));
> -      num += ifPatternMatch(headBlk);
> -      assert(num > 0);
> +      numClonedBlock += Num;
> +      Num += serialPatternMatch(*HeadMBB->succ_begin());
> +      Num += serialPatternMatch(*(++HeadMBB->succ_begin()));
> +      Num += ifPatternMatch(HeadMBB);
> +      assert(Num > 0);
>  
>        break;
>      }
>      DEBUG(
>        dbgs() << " not working\n";
>      );
> -    downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
> +    DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : NULL;
>    } // walk down the postDomTree
>  
> -  return num;
> -} //handleJumpintoIf
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
> -                                                         BlockT *trueBlk,
> -                                                         BlockT *falseBlk,
> -                                                         BlockT *landBlk,
> -                                                         bool detail) {
> -  dbgs() << "head = BB" << headBlk->getNumber()
> -         << " size = " << headBlk->size();
> -  if (detail) {
> +  return Num;
> +}
> +
> +void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
> +    MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
> +    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
> +  dbgs() << "head = BB" << HeadMBB->getNumber()
> +         << " size = " << HeadMBB->size();
> +  if (Detail) {
>      dbgs() << "\n";
> -    headBlk->print(dbgs());
> +    HeadMBB->print(dbgs());
>      dbgs() << "\n";
>    }
>  
> -  if (trueBlk) {
> -    dbgs() << ", true = BB" << trueBlk->getNumber() << " size = "
> -           << trueBlk->size() << " numPred = " << trueBlk->pred_size();
> -    if (detail) {
> +  if (TrueMBB) {
> +    dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
> +           << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
> +    if (Detail) {
>        dbgs() << "\n";
> -      trueBlk->print(dbgs());
> +      TrueMBB->print(dbgs());
>        dbgs() << "\n";
>      }
>    }
> -  if (falseBlk) {
> -    dbgs() << ", false = BB" << falseBlk->getNumber() << " size = "
> -           << falseBlk->size() << " numPred = " << falseBlk->pred_size();
> -    if (detail) {
> +  if (FalseMBB) {
> +    dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
> +           << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
> +    if (Detail) {
>        dbgs() << "\n";
> -      falseBlk->print(dbgs());
> +      FalseMBB->print(dbgs());
>        dbgs() << "\n";
>      }
>    }
> -  if (landBlk) {
> -    dbgs() << ", land = BB" << landBlk->getNumber() << " size = "
> -           << landBlk->size() << " numPred = " << landBlk->pred_size();
> -    if (detail) {
> +  if (LandMBB) {
> +    dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
> +           << LandMBB->size() << " numPred = " << LandMBB->pred_size();
> +    if (Detail) {
>        dbgs() << "\n";
> -      landBlk->print(dbgs());
> +      LandMBB->print(dbgs());
>        dbgs() << "\n";
>      }
>    }
>  
>      dbgs() << "\n";
> -} //showImproveSimpleJumpintoIf
> +}
>  
> -template<class PassT>
> -int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
> -                                                    BlockT *trueBlk,
> -                                                    BlockT *falseBlk,
> -                                                    BlockT **plandBlk) {
> -  bool migrateTrue = false;
> -  bool migrateFalse = false;
> +int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
> +    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
> +    MachineBasicBlock **LandMBBPtr) {
> +  bool MigrateTrue = false;
> +  bool MigrateFalse = false;
>  
> -  BlockT *landBlk = *plandBlk;
> +  MachineBasicBlock *LandBlk = *LandMBBPtr;
>  
> -  assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
> -         && (falseBlk == NULL || falseBlk->succ_size() <= 1));
> +  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
> +         && (!FalseMBB || FalseMBB->succ_size() <= 1));
>  
> -  if (trueBlk == falseBlk) {
> +  if (TrueMBB == FalseMBB)
>      return 0;
> -  }
>  
> -  migrateTrue = needMigrateBlock(trueBlk);
> -  migrateFalse = needMigrateBlock(falseBlk);
> +  MigrateTrue = needMigrateBlock(TrueMBB);
> +  MigrateFalse = needMigrateBlock(FalseMBB);
>  
> -  if (!migrateTrue && !migrateFalse) {
> +  if (!MigrateTrue && !MigrateFalse)
>      return 0;
> -  }
>  
>    // If we need to migrate either trueBlk and falseBlk, migrate the rest that
>    // have more than one predecessors.  without doing this, its predecessor
>    // rather than headBlk will have undefined value in initReg.
> -  if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
> -    migrateTrue = true;
> -  }
> -  if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
> -    migrateFalse = true;
> -  }
> +  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
> +    MigrateTrue = true;
> +  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
> +    MigrateFalse = true;
>  
>    DEBUG(
>      dbgs() << "before improveSimpleJumpintoIf: ";
> -    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
> +    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
>    );
>  
>    // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
> @@ -1193,205 +1348,142 @@ int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
>    // add initReg = initVal to headBlk
>  
>    const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
> -  unsigned initReg =
> -    funcRep->getRegInfo().createVirtualRegister(I32RC);
> -  if (!migrateTrue || !migrateFalse) {
> -    int initVal = migrateTrue ? 0 : 1;
> -    CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
> -  }
> +  unsigned InitReg =
> +    HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
> +  if (!MigrateTrue || !MigrateFalse)
> +    llvm_unreachable("Extra register needed to handle CFG");
>  
> -  int numNewBlk = 0;
> +  int NumNewBlk = 0;
>  
> -  if (landBlk == NULL) {
> -    landBlk = funcRep->CreateMachineBasicBlock();
> -    funcRep->push_back(landBlk);  //insert to function
> +  if (!LandBlk) {
> +    LandBlk = HeadMBB->getParent()->CreateMachineBasicBlock();
> +    HeadMBB->getParent()->push_back(LandBlk);  //insert to function
>  
> -    if (trueBlk) {
> -      trueBlk->addSuccessor(landBlk);
> +    if (TrueMBB) {
> +      TrueMBB->addSuccessor(LandBlk);
>      } else {
> -      headBlk->addSuccessor(landBlk);
> +      HeadMBB->addSuccessor(LandBlk);
>      }
>  
> -    if (falseBlk) {
> -      falseBlk->addSuccessor(landBlk);
> +    if (FalseMBB) {
> +      FalseMBB->addSuccessor(LandBlk);
>      } else {
> -      headBlk->addSuccessor(landBlk);
> +      HeadMBB->addSuccessor(LandBlk);
>      }
>  
> -    numNewBlk ++;
> +    NumNewBlk ++;
>    }
>  
> -  bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
> +  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
>  
>    //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
> -  typename BlockT::iterator insertPos =
> -    CFGTraits::getInstrPos
> -    (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
> -
> -  if (landBlkHasOtherPred) {
> -    unsigned immReg =
> -      funcRep->getRegInfo().createVirtualRegister(I32RC);
> -    CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
> -    unsigned cmpResReg =
> -      funcRep->getRegInfo().createVirtualRegister(I32RC);
> -
> -    CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
> -                                        initReg, immReg);
> -    CFGTraits::insertCondBranchBefore(landBlk, insertPos,
> -                                      AMDGPU::IF_PREDICATE_SET, passRep,
> -                                      cmpResReg, DebugLoc());
> +  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
> +
> +  if (LandBlkHasOtherPred) {
> +    llvm_unreachable("Extra register needed to handle CFG");
> +    unsigned CmpResReg =
> +      HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
> +    llvm_unreachable("Extra compare instruction needed to handle CFG");
> +    insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
> +        CmpResReg, DebugLoc());
>    }
>  
> -  CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
> -                                    passRep, initReg, DebugLoc());
> +  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
> +      DebugLoc());
>  
> -  if (migrateTrue) {
> -    migrateInstruction(trueBlk, landBlk, insertPos);
> +  if (MigrateTrue) {
> +    migrateInstruction(TrueMBB, LandBlk, I);
>      // need to uncondionally insert the assignment to ensure a path from its
>      // predecessor rather than headBlk has valid value in initReg if
>      // (initVal != 1).
> -    CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
> +    llvm_unreachable("Extra register needed to handle CFG");
>    }
> -  CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
> +  insertInstrBefore(I, AMDGPU::ELSE);
>  
> -  if (migrateFalse) {
> -    migrateInstruction(falseBlk, landBlk, insertPos);
> +  if (MigrateFalse) {
> +    migrateInstruction(FalseMBB, LandBlk, I);
>      // need to uncondionally insert the assignment to ensure a path from its
>      // predecessor rather than headBlk has valid value in initReg if
>      // (initVal != 0)
> -    CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
> +    llvm_unreachable("Extra register needed to handle CFG");
>    }
>  
> -  if (landBlkHasOtherPred) {
> +  if (LandBlkHasOtherPred) {
>      // add endif
> -    CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
> +    insertInstrBefore(I, AMDGPU::ENDIF);
>  
>      // put initReg = 2 to other predecessors of landBlk
> -    for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
> -         predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
> -         ++predIter) {
> -      BlockT *curBlk = *predIter;
> -      if (curBlk != trueBlk && curBlk != falseBlk) {
> -        CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
> -      }
> -    } //for
> +    for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
> +         PE = LandBlk->pred_end(); PI != PE; ++PI) {
> +      MachineBasicBlock *MBB = *PI;
> +      if (MBB != TrueMBB && MBB != FalseMBB)
> +        llvm_unreachable("Extra register needed to handle CFG");
> +    }
>    }
>    DEBUG(
>      dbgs() << "result from improveSimpleJumpintoIf: ";
> -    showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
> +    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
>    );
>  
>    // update landBlk
> -  *plandBlk = landBlk;
> -
> -  return numNewBlk;
> -} //improveSimpleJumpintoIf
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
> -                                              LoopT *exitingLoop,
> -                                             BlockT *exitBlk,
> -                                              LoopT *exitLoop,
> -                                             BlockT *landBlk) {
> -  DEBUG(
> -    dbgs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
> -           << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
> -  );
> -  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
> -
> -  RegiT initReg = INVALIDREGNUM;
> -  if (exitingLoop != exitLoop) {
> -    initReg = static_cast<int>
> -      (funcRep->getRegInfo().createVirtualRegister(I32RC));
> -    assert(initReg != INVALIDREGNUM);
> -    addLoopBreakInitReg(exitLoop, initReg);
> -    while (exitingLoop != exitLoop && exitingLoop) {
> -      addLoopBreakOnReg(exitingLoop, initReg);
> -      exitingLoop = exitingLoop->getParentLoop();
> -    }
> -    assert(exitingLoop == exitLoop);
> -  }
> -
> -  mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
> -
> -} //handleLoopbreak
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
> -                                                  LoopT *contingLoop,
> -                                                 BlockT *contBlk,
> -                                                  LoopT *contLoop) {
> -  DEBUG(
> -    dbgs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
> -           << " header = BB" << contBlk->getNumber() << "\n";
> -
> -    dbgs() << "Trying to continue loop-depth = "
> -           << getLoopDepth(contLoop)
> -           << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
> -  );
> +  *LandMBBPtr = LandBlk;
>  
> -  RegiT initReg = INVALIDREGNUM;
> -  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
> -  if (contingLoop != contLoop) {
> -    initReg = static_cast<int>
> -      (funcRep->getRegInfo().createVirtualRegister(I32RC));
> -    assert(initReg != INVALIDREGNUM);
> -    addLoopContInitReg(contLoop, initReg);
> -    while (contingLoop && contingLoop->getParentLoop() != contLoop) {
> -      addLoopBreakOnReg(contingLoop, initReg);  //not addLoopContOnReg
> -      contingLoop = contingLoop->getParentLoop();
> -    }
> -    assert(contingLoop && contingLoop->getParentLoop() == contLoop);
> -    addLoopContOnReg(contingLoop, initReg);
> -  }
> +  return NumNewBlk;
> +}
>  
> -  settleLoopcontBlock(contingBlk, contBlk, initReg);
> -} //handleLoopcontBlock
> +void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB,
> +    MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
> +    MachineLoop *ContLoop) {
> +  DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber()
> +               << " header = BB" << ContMBB->getNumber() << "\n";
> +        dbgs() << "Trying to continue loop-depth = "
> +               << getLoopDepth(ContLoop)
> +               << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";);
> +  settleLoopcontBlock(ContingMBB, ContMBB);
> +}
>  
> -template<class PassT>
> -void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
> +void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
> +    MachineBasicBlock *SrcMBB) {
>    DEBUG(
> -    dbgs() << "serialPattern BB" << dstBlk->getNumber()
> -           << " <= BB" << srcBlk->getNumber() << "\n";
> +    dbgs() << "serialPattern BB" << DstMBB->getNumber()
> +           << " <= BB" << SrcMBB->getNumber() << "\n";
>    );
> -  dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
> +  DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
>  
> -  dstBlk->removeSuccessor(srcBlk);
> -  CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
> +  DstMBB->removeSuccessor(SrcMBB);
> +  cloneSuccessorList(DstMBB, SrcMBB);
>  
> -  removeSuccessor(srcBlk);
> -  retireBlock(dstBlk, srcBlk);
> -} //mergeSerialBlock
> +  removeSuccessor(SrcMBB);
> +  MLI->removeBlock(SrcMBB);
> +  retireBlock(SrcMBB);
> +}
>  
> -template<class PassT>
> -void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
> -                                                  BlockT *curBlk,
> -                                                  BlockT *trueBlk,
> -                                                  BlockT *falseBlk,
> -                                                  BlockT *landBlk) {
> +void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
> +    MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
> +    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
>    DEBUG(
> -    dbgs() << "ifPattern BB" << curBlk->getNumber();
> +    dbgs() << "ifPattern BB" << MBB->getNumber();
>      dbgs() << "{  ";
> -    if (trueBlk) {
> -      dbgs() << "BB" << trueBlk->getNumber();
> +    if (TrueMBB) {
> +      dbgs() << "BB" << TrueMBB->getNumber();
>      }
>      dbgs() << "  } else ";
>      dbgs() << "{  ";
> -    if (falseBlk) {
> -      dbgs() << "BB" << falseBlk->getNumber();
> +    if (FalseMBB) {
> +      dbgs() << "BB" << FalseMBB->getNumber();
>      }
>      dbgs() << "  }\n ";
>      dbgs() << "landBlock: ";
> -    if (landBlk == NULL) {
> +    if (!LandMBB) {
>        dbgs() << "NULL";
>      } else {
> -      dbgs() << "BB" << landBlk->getNumber();
> +      dbgs() << "BB" << LandMBB->getNumber();
>      }
>      dbgs() << "\n";
>    );
>  
> -  int oldOpcode = branchInstr->getOpcode();
> -  DebugLoc branchDL = branchInstr->getDebugLoc();
> +  int OldOpcode = BranchMI->getOpcode();
> +  DebugLoc BranchDL = BranchMI->getDebugLoc();
>  
>  //    transform to
>  //    if cond
> @@ -1401,1645 +1493,390 @@ void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
>  //    endif
>  //    landBlk
>  
> -  typename BlockT::iterator branchInstrPos =
> -    CFGTraits::getInstrPos(curBlk, branchInstr);
> -  CFGTraits::insertCondBranchBefore(branchInstrPos,
> -                                    CFGTraits::getBranchNzeroOpcode(oldOpcode),
> -                                    passRep,
> -                                    branchDL);
> -
> -  if (trueBlk) {
> -    curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
> -    curBlk->removeSuccessor(trueBlk);
> -    if (landBlk && trueBlk->succ_size()!=0) {
> -      trueBlk->removeSuccessor(landBlk);
> -    }
> -    retireBlock(curBlk, trueBlk);
> -  }
> -  CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
> -
> -  if (falseBlk) {
> -    curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
> -                   falseBlk->end());
> -    curBlk->removeSuccessor(falseBlk);
> -    if (landBlk && falseBlk->succ_size() != 0) {
> -      falseBlk->removeSuccessor(landBlk);
> -    }
> -    retireBlock(curBlk, falseBlk);
> -  }
> -  CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
> +  MachineBasicBlock::iterator I = BranchMI;
> +  insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
> +      BranchDL);
>  
> -  branchInstr->eraseFromParent();
> +  if (TrueMBB) {
> +    MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
> +    MBB->removeSuccessor(TrueMBB);
> +    if (LandMBB && TrueMBB->succ_size()!=0)
> +      TrueMBB->removeSuccessor(LandMBB);
> +    retireBlock(TrueMBB);
> +    MLI->removeBlock(TrueMBB);
> +  }
>  
> -  if (landBlk && trueBlk && falseBlk) {
> -    curBlk->addSuccessor(landBlk);
> +  if (FalseMBB) {
> +    insertInstrBefore(I, AMDGPU::ELSE);
> +    MBB->splice(I, FalseMBB, FalseMBB->begin(),
> +                   FalseMBB->end());
> +    MBB->removeSuccessor(FalseMBB);
> +    if (LandMBB && FalseMBB->succ_size() != 0)
> +      FalseMBB->removeSuccessor(LandMBB);
> +    retireBlock(FalseMBB);
> +    MLI->removeBlock(FalseMBB);
>    }
> +  insertInstrBefore(I, AMDGPU::ENDIF);
>  
> -} //mergeIfthenelseBlock
> +  BranchMI->eraseFromParent();
>  
> -template<class PassT>
> -void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
> -                                                LoopLandInfo *loopLand) {
> -  BlockT *landBlk = loopLand->landBlk;
> +  if (LandMBB && TrueMBB && FalseMBB)
> +    MBB->addSuccessor(LandMBB);
>  
> -  DEBUG(
> -    dbgs() << "loopPattern header = BB" << dstBlk->getNumber()
> -           << " land = BB" << landBlk->getNumber() << "\n";
> -  );
> +}
>  
> -  // Loop contInitRegs are init at the beginning of the loop.
> -  for (typename std::set<RegiT>::const_iterator iter =
> -         loopLand->contInitRegs.begin(),
> -       iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
> -    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
> -  }
> +void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
> +    MachineBasicBlock *LandMBB) {
> +  DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
> +               << " land = BB" << LandMBB->getNumber() << "\n";);
>  
>    /* we last inserterd the DebugLoc in the
> -   * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
> +   * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current
> +   * dstBlk.
>     * search for the DebugLoc in the that statement.
>     * if not found, we have to insert the empty/default DebugLoc */
> -  InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
> -  DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
> -
> -  CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
> -  // Loop breakInitRegs are init before entering the loop.
> -  for (typename std::set<RegiT>::const_iterator iter =
> -         loopLand->breakInitRegs.begin(),
> -       iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
> -    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
> -  }
> -  // Loop endbranchInitRegs are init before entering the loop.
> -  for (typename std::set<RegiT>::const_iterator iter =
> -         loopLand->endbranchInitRegs.begin(),
> -       iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
> -    CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
> -  }
> +  MachineInstr *LoopBreakInstr = getLoopBreakInstr(DstBlk);
> +  DebugLoc DLBreak = (LoopBreakInstr) ? LoopBreakInstr->getDebugLoc() :
> +      DebugLoc();
> +
> +  insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DLBreak);
>  
> -  /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
> +  /* we last inserterd the DebugLoc in the continue statement in the current
> +   * dstBlk.
>     * search for the DebugLoc in the continue statement.
>     * if not found, we have to insert the empty/default DebugLoc */
> -  InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
> -  DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
> -
> -  CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
> -  // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
> -  // loop.
> -  for (typename std::set<RegiT>::const_iterator iter =
> -         loopLand->breakOnRegs.begin(),
> -       iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
> -    CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
> -                                   *iter);
> -  }
> -
> -  // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
> -  // loop.
> -  for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
> -       iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
> -    CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
> -                                   passRep, *iter);
> -  }
> -
> -  dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
> -
> -  for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
> -       iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
> -    dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of.
> -  }
> +  MachineInstr *ContinueInstr = getContinueInstr(DstBlk);
> +  DebugLoc DLContinue = (ContinueInstr) ? ContinueInstr->getDebugLoc() :
> +      DebugLoc();
>  
> -  removeSuccessor(landBlk);
> -  retireBlock(dstBlk, landBlk);
> -} //mergeLooplandBlock
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
> -  while (I--) {
> -    if (I->getOpcode() == AMDGPU::PRED_X) {
> -      switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
> -      case OPCODE_IS_ZERO_INT:
> -        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
> -        return;
> -      case OPCODE_IS_NOT_ZERO_INT:
> -        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
> -        return;
> -      case OPCODE_IS_ZERO:
> -        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
> -        return;
> -      case OPCODE_IS_NOT_ZERO:
> -        static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
> -        return;
> -      default:
> -        llvm_unreachable("PRED_X Opcode invalid!");
> -      }
> -    }
> -  }
> +  insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DLContinue);
> +  DstBlk->addSuccessor(LandMBB);
> +  DstBlk->removeSuccessor(DstBlk);
>  }
>  
> -template<class PassT>
> -void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
> -                                                 BlockT *exitBlk,
> -                                                 BlockT *exitLandBlk,
> -                                                 RegiT  setReg) {
> -  DEBUG(
> -    dbgs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
> -           << " exit = BB" << exitBlk->getNumber()
> -           << " land = BB" << exitLandBlk->getNumber() << "\n";
> -  );
> -
> -  InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
> -  assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
> -
> -  DebugLoc DL = branchInstr->getDebugLoc();
> -
> -  BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
> -
> -  //    transform exitingBlk to
> -  //    if ( ) {
> -  //       exitBlk (if exitBlk != exitLandBlk)
> -  //       setReg = 1
> -  //       break
> -  //    }endif
> -  //    successor = {orgSuccessor(exitingBlk) - exitBlk}
> -
> -  typename BlockT::iterator branchInstrPos =
> -    CFGTraits::getInstrPos(exitingBlk, branchInstr);
> -
> -  if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
> -    //break_logical
> -
> -    if (trueBranch != exitBlk) {
> -      reversePredicateSetter(branchInstrPos);
> -    }
> -    CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
> -  } else {
> -    if (trueBranch != exitBlk) {
> -      reversePredicateSetter(branchInstr);
> -    }
> -    CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
> -    if (exitBlk != exitLandBlk) {
> -      //splice is insert-before ...
> -      exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
> -                         exitBlk->end());
> -    }
> -    if (setReg != INVALIDREGNUM) {
> -      CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
> -    }
> -    CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
> -  } //if_logical
>  
> +void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
> +    MachineBasicBlock *LandMBB) {
> +  DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
> +               << " land = BB" << LandMBB->getNumber() << "\n";);
> +  MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
> +  assert(BranchMI && isCondBranch(BranchMI));
> +  DebugLoc DL = BranchMI->getDebugLoc();
> +  MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
> +  MachineBasicBlock::iterator I = BranchMI;
> +  if (TrueBranch != LandMBB)
> +    reversePredicateSetter(I);
> +  insertCondBranchBefore(I, AMDGPU::PREDICATED_BREAK, DL);
>    //now branchInst can be erase safely
> -  branchInstr->eraseFromParent();
> -
> +  BranchMI->eraseFromParent();
>    //now take care of successors, retire blocks
> -  exitingBlk->removeSuccessor(exitBlk);
> -  if (exitBlk != exitLandBlk) {
> -    //splice is insert-before ...
> -    exitBlk->removeSuccessor(exitLandBlk);
> -    retireBlock(exitingBlk, exitBlk);
> -  }
> -
> -} //mergeLoopbreakBlock
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
> -                                                 BlockT *contBlk,
> -                                                 RegiT   setReg) {
> -  DEBUG(
> -    dbgs() << "settleLoopcontBlock conting = BB"
> -           << contingBlk->getNumber()
> -           << ", cont = BB" << contBlk->getNumber() << "\n";
> -  );
> -
> -  InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
> -  if (branchInstr) {
> -    assert(CFGTraits::isCondBranch(branchInstr));
> -    typename BlockT::iterator branchInstrPos =
> -      CFGTraits::getInstrPos(contingBlk, branchInstr);
> -    BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
> -    int oldOpcode = branchInstr->getOpcode();
> -    DebugLoc DL = branchInstr->getDebugLoc();
> -
> -    //    transform contingBlk to
> -    //     if () {
> -    //          move instr after branchInstr
> -    //          continue
> -    //        or
> -    //          setReg = 1
> -    //          break
> -    //     }endif
> -    //     successor = {orgSuccessor(contingBlk) - loopHeader}
> -
> -    bool useContinueLogical = 
> -      (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
> -
> -    if (useContinueLogical == false) {
> -      int branchOpcode =
> -        trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
> -                              : CFGTraits::getBranchZeroOpcode(oldOpcode);
> -
> -      CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
> -
> -      if (setReg != INVALIDREGNUM) {
> -        CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
> -        // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
> -        CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
> -      } else {
> -        // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
> -        CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
> -      }
> +  ExitingMBB->removeSuccessor(LandMBB);
> +}
>  
> -      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
> +void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
> +    MachineBasicBlock *ContMBB) {
> +  DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
> +               << ContingMBB->getNumber()
> +               << ", cont = BB" << ContMBB->getNumber() << "\n";);
> +
> +  MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
> +  if (MI) {
> +    assert(isCondBranch(MI));
> +    MachineBasicBlock::iterator I = MI;
> +    MachineBasicBlock *TrueBranch = getTrueBranch(MI);
> +    int OldOpcode = MI->getOpcode();
> +    DebugLoc DL = MI->getDebugLoc();
> +
> +    bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
> +
> +    if (UseContinueLogical == false) {
> +      int BranchOpcode =
> +          TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
> +          getBranchZeroOpcode(OldOpcode);
> +      insertCondBranchBefore(I, BranchOpcode, DL);
> +      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
> +      insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
> +      insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
>      } else {
> -      int branchOpcode =
> -        trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
> -                              : CFGTraits::getContinueZeroOpcode(oldOpcode);
> -
> -      CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
> +      int BranchOpcode =
> +          TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
> +          getContinueZeroOpcode(OldOpcode);
> +      insertCondBranchBefore(I, BranchOpcode, DL);
>      }
>  
> -    branchInstr->eraseFromParent();
> +    MI->eraseFromParent();
>    } else {
>      // if we've arrived here then we've already erased the branch instruction
> -    // travel back up the basic block to see the last reference of our debug location
> -    // we've just inserted that reference here so it should be representative
> -    if (setReg != INVALIDREGNUM) {
> -      CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
> -      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
> -      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
> -    } else {
> -      // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
> -      CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
> -    }
> -  } //else
> -
> -} //settleLoopcontBlock
> -
> -// BBs in exitBlkSet are determined as in break-path for loopRep,
> -// before we can put code for BBs as inside loop-body for loopRep
> -// check whether those BBs are determined as cont-BB for parentLoopRep
> -// earlier.
> -// If so, generate a new BB newBlk
> -//    (1) set newBlk common successor of BBs in exitBlkSet
> -//    (2) change the continue-instr in BBs in exitBlkSet to break-instr
> -//    (3) generate continue-instr in newBlk
> -//
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
> -                                              LoopT *loopRep,
> -                                              std::set<BlockT *> &exitBlkSet,
> -                                              BlockT *exitLandBlk) {
> -  std::set<BlockT *> endBlkSet;
> -
> -
> -
> -  for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
> -       iterEnd = exitBlkSet.end();
> -       iter != iterEnd; ++iter) {
> -    BlockT *exitBlk = *iter;
> -    BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
> -
> -    if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
> -      return NULL;
> -
> -    endBlkSet.insert(endBlk);
> -  }
> -
> -  BlockT *newBlk = funcRep->CreateMachineBasicBlock();
> -  funcRep->push_back(newBlk);  //insert to function
> -  CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
> -  SHOWNEWBLK(newBlk, "New continue block: ");
> -
> -  for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
> -       iterEnd = endBlkSet.end();
> -       iter != iterEnd; ++iter) {
> -      BlockT *endBlk = *iter;
> -      InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
> -      if (contInstr) {
> -        contInstr->eraseFromParent();
> -      }
> -      endBlk->addSuccessor(newBlk);
> -      DEBUG(
> -        dbgs() << "Add new continue Block to BB"
> -               << endBlk->getNumber() << " successors\n";
> -      );
> -  }
> -
> -  return newBlk;
> -} //relocateLoopcontBlock
> -
> -
> -// LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
> -// LoopLandBlock. This BB branch on the loop endBranchInit register to the
> -// pathes corresponding to the loop exiting branches.
> -
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
> -                                              BlockTSmallerVector &exitingBlks,
> -                                              BlockTSmallerVector &exitBlks) {
> -  const AMDGPUInstrInfo *tii =
> -             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
> -  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
> -
> -  RegiT endBranchReg = static_cast<int>
> -    (funcRep->getRegInfo().createVirtualRegister(I32RC));
> -  assert(endBranchReg >= 0);
> -
> -  // reg = 0 before entering the loop
> -  addLoopEndbranchInitReg(loopRep, endBranchReg);
> -
> -  uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
> -  assert(numBlks >=2 && numBlks == exitBlks.size());
> -
> -  BlockT *preExitingBlk = exitingBlks[0];
> -  BlockT *preExitBlk = exitBlks[0];
> -  BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
> -  funcRep->push_back(preBranchBlk);  //insert to function
> -  SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
> -
> -  BlockT *newLandBlk = preBranchBlk;
> -
> -      CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
> -        newLandBlk);
> -  preExitingBlk->removeSuccessor(preExitBlk);
> -  preExitingBlk->addSuccessor(newLandBlk);
> -
> -  //it is redundant to add reg = 0 to exitingBlks[0]
> -
> -  // For 1..n th exiting path (the last iteration handles two pathes) create the
> -  // branch to the previous path and the current path.
> -  for (uint32_t i = 1; i < numBlks; ++i) {
> -    BlockT *curExitingBlk = exitingBlks[i];
> -    BlockT *curExitBlk = exitBlks[i];
> -    BlockT *curBranchBlk;
> -
> -    if (i == numBlks - 1) {
> -      curBranchBlk = curExitBlk;
> -    } else {
> -      curBranchBlk = funcRep->CreateMachineBasicBlock();
> -      funcRep->push_back(curBranchBlk);  //insert to function
> -      SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
> -    }
> -
> -    // Add reg = i to exitingBlks[i].
> -    CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
> -                                       endBranchReg, i);
> -
> -    // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
> -    // (exitingBlks[i], newLandBlk).
> -    CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
> -                                          newLandBlk);
> -    curExitingBlk->removeSuccessor(curExitBlk);
> -    curExitingBlk->addSuccessor(newLandBlk);
> -
> -    // add to preBranchBlk the branch instruction:
> -    // if (endBranchReg == preVal)
> -    //    preExitBlk
> -    // else
> -    //    curBranchBlk
> -    //
> -    // preValReg = i - 1
> -
> -  DebugLoc DL;
> -  RegiT preValReg = static_cast<int>
> -    (funcRep->getRegInfo().createVirtualRegister(I32RC));
> -
> -  preBranchBlk->insert(preBranchBlk->begin(),
> -                       tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
> -                       i - 1));
> -
> -  // condResReg = (endBranchReg == preValReg)
> -    RegiT condResReg = static_cast<int>
> -      (funcRep->getRegInfo().createVirtualRegister(I32RC));
> -    BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
> -      .addReg(endBranchReg).addReg(preValReg);
> -
> -    BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
> -      .addMBB(preExitBlk).addReg(condResReg);
> -
> -    preBranchBlk->addSuccessor(preExitBlk);
> -    preBranchBlk->addSuccessor(curBranchBlk);
> -
> -    // Update preExitingBlk, preExitBlk, preBranchBlk.
> -    preExitingBlk = curExitingBlk;
> -    preExitBlk = curExitBlk;
> -    preBranchBlk = curBranchBlk;
> -
> -  }  //end for 1 .. n blocks
> -
> -  return newLandBlk;
> -} //addLoopEndbranchBlock
> -
> -template<class PassT>
> -typename CFGStructurizer<PassT>::PathToKind
> -CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
> -                                     bool allowSideEntry) {
> -  assert(dstBlk);
> -
> -  if (srcBlk == dstBlk) {
> -    return SinglePath_InPath;
> -  }
> -
> -  while (srcBlk && srcBlk->succ_size() == 1) {
> -    srcBlk = *srcBlk->succ_begin();
> -    if (srcBlk == dstBlk) {
> -      return SinglePath_InPath;
> -    }
> -
> -    if (!allowSideEntry && srcBlk->pred_size() > 1) {
> -      return Not_SinglePath;
> -    }
> -  }
> -
> -  if (srcBlk && srcBlk->succ_size()==0) {
> -    return SinglePath_NotInPath;
> -  }
> -
> -  return Not_SinglePath;
> -} //singlePathTo
> -
> -// If there is a single path from srcBlk to dstBlk, return the last block before
> -// dstBlk If there is a single path from srcBlk->end without dstBlk, return the
> -// last block in the path Otherwise, return NULL
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
> -                                      bool allowSideEntry) {
> -  assert(dstBlk);
> -
> -  if (srcBlk == dstBlk) {
> -    return srcBlk;
> -  }
> -
> -  if (srcBlk->succ_size() == 0) {
> -    return srcBlk;
> -  }
> -
> -  while (srcBlk && srcBlk->succ_size() == 1) {
> -    BlockT *preBlk = srcBlk;
> -
> -    srcBlk = *srcBlk->succ_begin();
> -    if (srcBlk == NULL) {
> -      return preBlk;
> -    }
> -
> -    if (!allowSideEntry && srcBlk->pred_size() > 1) {
> -      return NULL;
> -    }
> -  }
> -
> -  if (srcBlk && srcBlk->succ_size()==0) {
> -    return srcBlk;
> +    // travel back up the basic block to see the last reference of our debug
> +    // location we've just inserted that reference here so it should be
> +    // representative insertEnd to ensure phi-moves, if exist, go before the
> +    // continue-instr.
> +    insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
> +        getLastDebugLocInBB(ContingMBB));
>    }
> +}
>  
> -  return NULL;
> -
> -} //singlePathEnd
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
> -                                               BlockT *dstBlk) {
> -  int cloned = 0;
> -  assert(preBlk->isSuccessor(srcBlk));
> -  while (srcBlk && srcBlk != dstBlk) {
> -    assert(srcBlk->succ_size() == 1);
> -    if (srcBlk->pred_size() > 1) {
> -      srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
> -      ++cloned;
> +int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
> +    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
> +  int Cloned = 0;
> +  assert(PreMBB->isSuccessor(SrcMBB));
> +  while (SrcMBB && SrcMBB != DstMBB) {
> +    assert(SrcMBB->succ_size() == 1);
> +    if (SrcMBB->pred_size() > 1) {
> +      SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
> +      ++Cloned;
>      }
>  
> -    preBlk = srcBlk;
> -    srcBlk = *srcBlk->succ_begin();
> +    PreMBB = SrcMBB;
> +    SrcMBB = *SrcMBB->succ_begin();
>    }
>  
> -  return cloned;
> -} //cloneOnSideEntryTo
> +  return Cloned;
> +}
>  
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
> -                                                 BlockT *predBlk) {
> -  assert(predBlk->isSuccessor(curBlk) &&
> +MachineBasicBlock *
> +AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
> +    MachineBasicBlock *PredMBB) {
> +  assert(PredMBB->isSuccessor(MBB) &&
>           "succBlk is not a prececessor of curBlk");
>  
> -  BlockT *cloneBlk = CFGTraits::clone(curBlk);  //clone instructions
> -  CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
> +  MachineBasicBlock *CloneMBB = clone(MBB);  //clone instructions
> +  replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
>    //srcBlk, oldBlk, newBlk
>  
> -  predBlk->removeSuccessor(curBlk);
> -  predBlk->addSuccessor(cloneBlk);
> +  PredMBB->removeSuccessor(MBB);
> +  PredMBB->addSuccessor(CloneMBB);
>  
>    // add all successor to cloneBlk
> -  CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
> +  cloneSuccessorList(CloneMBB, MBB);
>  
> -  numClonedInstr += curBlk->size();
> +  numClonedInstr += MBB->size();
>  
>    DEBUG(
>      dbgs() << "Cloned block: " << "BB"
> -           << curBlk->getNumber() << "size " << curBlk->size() << "\n";
> +           << MBB->getNumber() << "size " << MBB->size() << "\n";
>    );
>  
> -  SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
> -
> -  return cloneBlk;
> -} //cloneBlockForPredecessor
> -
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
> -                                               BlockT *exitingBlk) {
> -  BlockT *exitBlk = NULL;
> -
> -  for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
> -       iterSuccEnd = exitingBlk->succ_end();
> -       iterSucc != iterSuccEnd; ++iterSucc) {
> -    BlockT *curBlk = *iterSucc;
> -    if (!loopRep->contains(curBlk)) {
> -      assert(exitBlk == NULL);
> -      exitBlk = curBlk;
> -    }
> -  }
> -
> -  assert(exitBlk != NULL);
> +  SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
>  
> -  return exitBlk;
> -} //exitingBlock2ExitBlock
> +  return CloneMBB;
> +}
>  
> -template<class PassT>
> -void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
> -                                                BlockT *dstBlk,
> -                                                InstrIterator insertPos) {
> -  InstrIterator spliceEnd;
> +void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
> +    MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
> +  MachineBasicBlock::iterator SpliceEnd;
>    //look for the input branchinstr, not the AMDGPU branchinstr
> -  InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
> -  if (branchInstr == NULL) {
> +  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
> +  if (!BranchMI) {
>      DEBUG(
>        dbgs() << "migrateInstruction don't see branch instr\n" ;
>      );
> -    spliceEnd = srcBlk->end();
> +    SpliceEnd = SrcMBB->end();
>    } else {
>      DEBUG(
>        dbgs() << "migrateInstruction see branch instr\n" ;
> -      branchInstr->dump();
> +      BranchMI->dump();
>      );
> -    spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
> +    SpliceEnd = BranchMI;
>    }
>    DEBUG(
> -    dbgs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
> -      << "srcSize = " << srcBlk->size() << "\n";
> +    dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
> +      << "srcSize = " << SrcMBB->size() << "\n";
>    );
>  
>    //splice insert before insertPos
> -  dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
> +  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
>  
>    DEBUG(
> -    dbgs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
> -      << "srcSize = " << srcBlk->size() << "\n";
> +    dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
> +      << "srcSize = " << SrcMBB->size() << "\n";
>    );
> -} //migrateInstruction
> +}
>  
> -// normalizeInfiniteLoopExit change
> -//   B1:
> -//        uncond_br LoopHeader
> -//
> -// to
> -//   B1:
> -//        cond_br 1 LoopHeader dummyExit
> -// and return the newly added dummy exit block
> -// 
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
> -  BlockT *loopHeader;
> -  BlockT *loopLatch;
> -  loopHeader = LoopRep->getHeader();
> -  loopLatch = LoopRep->getLoopLatch();
> -  BlockT *dummyExitBlk = NULL;
> +MachineBasicBlock *
> +AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
> +  MachineBasicBlock *LoopHeader = LoopRep->getHeader();
> +  MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
>    const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
> -  if (loopHeader!=NULL && loopLatch!=NULL) {
> -    InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
> -    if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
> -      dummyExitBlk = funcRep->CreateMachineBasicBlock();
> -      funcRep->push_back(dummyExitBlk);  //insert to function
> -      SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
> -
> -      DEBUG(dbgs() << "Old branch instr: " << *branchInstr << "\n";);
> -
> -      typename BlockT::iterator insertPos =
> -        CFGTraits::getInstrPos(loopLatch, branchInstr);
> -      unsigned immReg =
> -        funcRep->getRegInfo().createVirtualRegister(I32RC);
> -      CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
> -      InstrT *newInstr = 
> -        CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
> -      MachineInstrBuilder MIB(*funcRep, newInstr);
> -      MIB.addMBB(loopHeader);
> -      MIB.addReg(immReg, false);
> -
> -      SHOWNEWINSTR(newInstr);
> -
> -      branchInstr->eraseFromParent();
> -      loopLatch->addSuccessor(dummyExitBlk);
> -    }
> -  }
>  
> -  return dummyExitBlk;
> -} //normalizeInfiniteLoopExit
> +  if (!LoopHeader || !LoopLatch)
> +    return NULL;
> +  MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
> +  // Is LoopRep an infinite loop ?
> +  if (!BranchMI || !isUncondBranch(BranchMI))
> +    return NULL;
> +
> +  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
> +  FuncRep->push_back(DummyExitBlk);  //insert to function
> +  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
> +  DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
> +  MachineBasicBlock::iterator I = BranchMI;
> +  unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC);
> +  llvm_unreachable("Extra register needed to handle CFG");
> +  MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32);
> +  MachineInstrBuilder MIB(*FuncRep, NewMI);
> +  MIB.addMBB(LoopHeader);
> +  MIB.addReg(ImmReg, false);
> +  SHOWNEWINSTR(NewMI);
> +  BranchMI->eraseFromParent();
> +  LoopLatch->addSuccessor(DummyExitBlk);
> +
> +  return DummyExitBlk;
> +}
>  
> -template<class PassT>
> -void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
> -  InstrT *branchInstr;
> +void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
> +  MachineInstr *BranchMI;
>  
>    // I saw two unconditional branch in one basic block in example
>    // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
> -  while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
> -          && CFGTraits::isUncondBranch(branchInstr)) {
> -    DEBUG(
> -          dbgs() << "Removing unconditional branch instruction" ;
> -      branchInstr->dump();
> -    );
> -    branchInstr->eraseFromParent();
> +  while ((BranchMI = getLoopendBlockBranchInstr(MBB))
> +          && isUncondBranch(BranchMI)) {
> +    DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump(););
> +    BranchMI->eraseFromParent();
>    }
> -} //removeUnconditionalBranch
> +}
>  
> -template<class PassT>
> -void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
> -  if (srcBlk->succ_size() == 2) {
> -    BlockT *blk1 = *srcBlk->succ_begin();
> -    BlockT *blk2 = *(++srcBlk->succ_begin());
> +void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
> +    MachineBasicBlock *MBB) {
> +  if (MBB->succ_size() != 2)
> +    return;
> +  MachineBasicBlock *MBB1 = *MBB->succ_begin();
> +  MachineBasicBlock *MBB2 = *(++MBB->succ_begin());
> +  if (MBB1 != MBB2)
> +    return;
> +
> +  MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
> +  assert(BranchMI && isCondBranch(BranchMI));
> +  DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump(););
> +  BranchMI->eraseFromParent();
> +  SHOWNEWBLK(MBB1, "Removing redundant successor");
> +  MBB->removeSuccessor(MBB1);
> +}
>  
> -    if (blk1 == blk2) {
> -      InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
> -      assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
> -      DEBUG(
> -        dbgs() << "Removing unneeded conditional branch instruction" ;
> -        branchInstr->dump();
> -      );
> -      branchInstr->eraseFromParent();
> -      SHOWNEWBLK(blk1, "Removing redundant successor");
> -      srcBlk->removeSuccessor(blk1);
> -    }
> -  }
> -} //removeRedundantConditionalBranch
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
> -                                               DEFAULT_VEC_SLOTS> &retBlks) {
> -  BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
> -  funcRep->push_back(dummyExitBlk);  //insert to function
> -  CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
> -
> -  for (typename SmallVectorImpl<BlockT *>::iterator iter =
> -         retBlks.begin(),
> -       iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
> -    BlockT *curBlk = *iter;
> -    InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
> -    if (curInstr) {
> -      curInstr->eraseFromParent();
> -    }
> -    curBlk->addSuccessor(dummyExitBlk);
> +void AMDGPUCFGStructurizer::addDummyExitBlock(
> +    SmallVector<MachineBasicBlock*, DEFAULT_VEC_SLOTS> &RetMBB) {
> +  MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
> +  FuncRep->push_back(DummyExitBlk);  //insert to function
> +  insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
> +
> +  for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
> +       E = RetMBB.end(); It != E; ++It) {
> +    MachineBasicBlock *MBB = *It;
> +    MachineInstr *MI = getReturnInstr(MBB);
> +    if (MI)
> +      MI->eraseFromParent();
> +    MBB->addSuccessor(DummyExitBlk);
>      DEBUG(
> -      dbgs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
> +      dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
>               << " successors\n";
>      );
> -  } //for
> -
> -  SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
> -} //addDummyExitBlock
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
> -  while (srcBlk->succ_size()) {
> -    srcBlk->removeSuccessor(*srcBlk->succ_begin());
>    }
> +  SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
>  }
>  
> -template<class PassT>
> -void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
> -  BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
> -
> -  if (srcBlkInfo == NULL) {
> -    srcBlkInfo = new BlockInfo();
> -  }
> -
> -  srcBlkInfo->sccNum = sccNum;
> +void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
> +  while (MBB->succ_size())
> +    MBB->removeSuccessor(*MBB->succ_begin());
>  }
>  
> -template<class PassT>
> -int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
> -  BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
> -  return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
> +void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
> +    int SccNum) {
> +  BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
> +  if (!srcBlkInfo)
> +    srcBlkInfo = new BlockInformation();
> +  srcBlkInfo->SccNum = SccNum;
>  }
>  
> -template<class PassT>
> -void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
> +void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
>    DEBUG(
> -        dbgs() << "Retiring BB" << srcBlk->getNumber() << "\n";
> +        dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
>    );
>  
> -  BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
> +  BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
>  
> -  if (srcBlkInfo == NULL) {
> -    srcBlkInfo = new BlockInfo();
> -  }
> +  if (!SrcBlkInfo)
> +    SrcBlkInfo = new BlockInformation();
>  
> -  srcBlkInfo->isRetired = true;
> -  assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
> +  SrcBlkInfo->IsRetired = true;
> +  assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
>           && "can't retire block yet");
>  }
>  
> -template<class PassT>
> -bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
> -  BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
> -  return (srcBlkInfo && srcBlkInfo->isRetired);
> -}
> -
> -template<class PassT>
> -bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
> -  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
> -  while (loopRep && loopRep->getHeader() == curBlk) {
> -    LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
> -
> -    if(loopLand == NULL)
> -      return true;
> -
> -    BlockT *landBlk = loopLand->landBlk;
> -    assert(landBlk);
> -    if (!isRetiredBlock(landBlk)) {
> -      return true;
> -    }
> -
> -    loopRep = loopRep->getParentLoop();
> -  }
> -
> -  return false;
> -} //isActiveLoophead
> -
> -template<class PassT>
> -bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
> -  const unsigned blockSizeThreshold = 30;
> -  const unsigned cloneInstrThreshold = 100;
> -
> -  bool multiplePreds = blk && (blk->pred_size() > 1);
> -
> -  if(!multiplePreds)
> -    return false;
> -
> -  unsigned blkSize = blk->size();
> -  return ((blkSize > blockSizeThreshold)
> -          && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
> -} //needMigrateBlock
> -
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
> -                                            BlockTSmallerVector &exitBlks,
> -                                            std::set<BlockT *> &exitBlkSet) {
> -  SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks;  //in exit path blocks
> -
> -  for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
> -       predIterEnd = landBlk->pred_end();
> -       predIter != predIterEnd; ++predIter) {
> -    BlockT *curBlk = *predIter;
> -    if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
> -      inpathBlks.push_back(curBlk);
> -    }
> -  } //for
> -
> -  //if landBlk has predecessors that are not in the given loop,
> -  //create a new block
> -  BlockT *newLandBlk = landBlk;
> -  if (inpathBlks.size() != landBlk->pred_size()) {
> -    newLandBlk = funcRep->CreateMachineBasicBlock();
> -    funcRep->push_back(newLandBlk);  //insert to function
> -    newLandBlk->addSuccessor(landBlk);
> -    for (typename SmallVectorImpl<BlockT *>::iterator iter =
> -         inpathBlks.begin(),
> -         iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
> -      BlockT *curBlk = *iter;
> -      CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
> -      //srcBlk, oldBlk, newBlk
> -      curBlk->removeSuccessor(landBlk);
> -      curBlk->addSuccessor(newLandBlk);
> -    }
> -    for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
> -      if (exitBlks[i] == landBlk) {
> -        exitBlks[i] = newLandBlk;
> -      }
> -    }
> -    SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
> -  }
> -
> -  setLoopLandBlock(loopRep, newLandBlk);
> -
> -  return newLandBlk;
> -} // recordLoopbreakLand
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
> -  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> -  if (theEntry == NULL) {
> -    theEntry = new LoopLandInfo();
> +void AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep,
> +    MachineBasicBlock *MBB) {
> +  MachineBasicBlock *&TheEntry = LLInfoMap[loopRep];
> +  if (!MBB) {
> +    MBB = FuncRep->CreateMachineBasicBlock();
> +    FuncRep->push_back(MBB);  //insert to function
> +    SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: ");
>    }
> -  assert(theEntry->landBlk == NULL);
> -
> -  if (blk == NULL) {
> -    blk = funcRep->CreateMachineBasicBlock();
> -    funcRep->push_back(blk);  //insert to function
> -    SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
> -  }
> -
> -  theEntry->landBlk = blk;
> -
> +  TheEntry = MBB;
>    DEBUG(
>      dbgs() << "setLoopLandBlock loop-header = BB"
>             << loopRep->getHeader()->getNumber()
> -           << "  landing-block = BB" << blk->getNumber() << "\n";
> -  );
> -} // setLoopLandBlock
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
> -  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> -  if (theEntry == NULL) {
> -    theEntry = new LoopLandInfo();
> -  }
> -
> -  theEntry->breakOnRegs.insert(regNum);
> -
> -  DEBUG(
> -    dbgs() << "addLoopBreakOnReg loop-header = BB"
> -           << loopRep->getHeader()->getNumber()
> -           << "  regNum = " << regNum << "\n";
> -  );
> -} // addLoopBreakOnReg
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
> -  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> -  if (theEntry == NULL) {
> -    theEntry = new LoopLandInfo();
> -  }
> -  theEntry->contOnRegs.insert(regNum);
> -
> -  DEBUG(
> -    dbgs() << "addLoopContOnReg loop-header = BB"
> -           << loopRep->getHeader()->getNumber()
> -           << "  regNum = " << regNum << "\n";
> -  );
> -} // addLoopContOnReg
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
> -  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> -  if (theEntry == NULL) {
> -    theEntry = new LoopLandInfo();
> -  }
> -  theEntry->breakInitRegs.insert(regNum);
> -
> -  DEBUG(
> -    dbgs() << "addLoopBreakInitReg loop-header = BB"
> -           << loopRep->getHeader()->getNumber()
> -           << "  regNum = " << regNum << "\n";
> +           << "  landing-block = BB" << MBB->getNumber() << "\n";
>    );
> -} // addLoopBreakInitReg
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
> -  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> -  if (theEntry == NULL) {
> -    theEntry = new LoopLandInfo();
> -  }
> -  theEntry->contInitRegs.insert(regNum);
> -
> -  DEBUG(
> -    dbgs() << "addLoopContInitReg loop-header = BB"
> -           << loopRep->getHeader()->getNumber()
> -           << "  regNum = " << regNum << "\n";
> -  );
> -} // addLoopContInitReg
> -
> -template<class PassT>
> -void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
> -                                                     RegiT regNum) {
> -  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> -  if (theEntry == NULL) {
> -    theEntry = new LoopLandInfo();
> -  }
> -  theEntry->endbranchInitRegs.insert(regNum);
> -
> -  DEBUG(
> -        dbgs() << "addLoopEndbranchInitReg loop-header = BB"
> -      << loopRep->getHeader()->getNumber()
> -      << "  regNum = " << regNum << "\n";
> -  );
> -} // addLoopEndbranchInitReg
> -
> -template<class PassT>
> -typename CFGStructurizer<PassT>::LoopLandInfo *
> -CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
> -  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> -  return theEntry;
> -} // getLoopLandInfo
> -
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
> -  LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
> -
> -  return theEntry ? theEntry->landBlk : NULL;
> -} // getLoopLandBlock
> -
> -
> -template<class PassT>
> -bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
> -  LoopT *loopRep = loopInfo->getLoopFor(curBlk);
> -  if (loopRep == NULL)
> -    return false;
> -
> -  BlockT *loopHeader = loopRep->getHeader();
> -
> -  return curBlk->isSuccessor(loopHeader);
> -
> -} //hasBackEdge
> -
> -template<class PassT>
> -unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
> -  return loopRep ? loopRep->getLoopDepth() : 0;
> -} //getLoopDepth
> -
> -template<class PassT>
> -int CFGStructurizer<PassT>::countActiveBlock
> -(typename SmallVectorImpl<BlockT *>::const_iterator iterStart,
> - typename SmallVectorImpl<BlockT *>::const_iterator iterEnd) {
> -  int count = 0;
> -  while (iterStart != iterEnd) {
> -    if (!isRetiredBlock(*iterStart)) {
> -      ++count;
> -    }
> -    ++iterStart;
> -  }
> -
> -  return count;
> -} //countActiveBlock
> -
> -// This is work around solution for findNearestCommonDominator not avaiable to
> -// post dom a proper fix should go to Dominators.h.
> +}
>  
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT*
> -CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
> +MachineBasicBlock *
> +AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
> +    MachineBasicBlock *MBB2) {
>  
> -  if (postDomTree->dominates(blk1, blk2)) {
> -    return blk1;
> -  }
> -  if (postDomTree->dominates(blk2, blk1)) {
> -    return blk2;
> -  }
> +  if (PDT->dominates(MBB1, MBB2))
> +    return MBB1;
> +  if (PDT->dominates(MBB2, MBB1))
> +    return MBB2;
>  
> -  DomTreeNodeT *node1 = postDomTree->getNode(blk1);
> -  DomTreeNodeT *node2 = postDomTree->getNode(blk2);
> +  MachineDomTreeNode *Node1 = PDT->getNode(MBB1);
> +  MachineDomTreeNode *Node2 = PDT->getNode(MBB2);
>  
>    // Handle newly cloned node.
> -  if (node1 == NULL && blk1->succ_size() == 1) {
> -    return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
> -  }
> -  if (node2 == NULL && blk2->succ_size() == 1) {
> -    return findNearestCommonPostDom(blk1, *blk2->succ_begin());
> -  }
> +  if (!Node1 && MBB1->succ_size() == 1)
> +    return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2);
> +  if (!Node2 && MBB2->succ_size() == 1)
> +    return findNearestCommonPostDom(MBB1, *MBB2->succ_begin());
>  
> -  if (node1 == NULL || node2 == NULL) {
> +  if (!Node1 || !Node2)
>      return NULL;
> -  }
>  
> -  node1 = node1->getIDom();
> -  while (node1) {
> -    if (postDomTree->dominates(node1, node2)) {
> -      return node1->getBlock();
> -    }
> -    node1 = node1->getIDom();
> +  Node1 = Node1->getIDom();
> +  while (Node1) {
> +    if (PDT->dominates(Node1, Node2))
> +      return Node1->getBlock();
> +    Node1 = Node1->getIDom();
>    }
>  
>    return NULL;
>  }
>  
> -template<class PassT>
> -typename CFGStructurizer<PassT>::BlockT *
> -CFGStructurizer<PassT>::findNearestCommonPostDom
> -(typename std::set<BlockT *> &blks) {
> -  BlockT *commonDom;
> -  typename std::set<BlockT *>::const_iterator iter = blks.begin();
> -  typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
> -  for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
> -    BlockT *curBlk = *iter;
> -    if (curBlk != commonDom) {
> -      commonDom = findNearestCommonPostDom(curBlk, commonDom);
> -    }
> +MachineBasicBlock *
> +AMDGPUCFGStructurizer::findNearestCommonPostDom(
> +    std::set<MachineBasicBlock *> &MBBs) {
> +  MachineBasicBlock *CommonDom;
> +  std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin();
> +  std::set<MachineBasicBlock *>::const_iterator E = MBBs.end();
> +  for (CommonDom = *It; It != E && CommonDom; ++It) {
> +    MachineBasicBlock *MBB = *It;
> +    if (MBB != CommonDom)
> +      CommonDom = findNearestCommonPostDom(MBB, CommonDom);
>    }
>  
>    DEBUG(
>      dbgs() << "Common post dominator for exit blocks is ";
> -    if (commonDom) {
> -          dbgs() << "BB" << commonDom->getNumber() << "\n";
> -    } else {
> +    if (CommonDom)
> +          dbgs() << "BB" << CommonDom->getNumber() << "\n";
> +    else
>        dbgs() << "NULL\n";
> -    }
>    );
>  
> -  return commonDom;
> -} //findNearestCommonPostDom
> -
> -} // end anonymous namespace
> -
> -//todo: move-end
> -
> -
> -//===----------------------------------------------------------------------===//
> -//
> -// CFGStructurizer for AMDGPU
> -//
> -//===----------------------------------------------------------------------===//
> -
> -
> -namespace {
> -class AMDGPUCFGStructurizer : public MachineFunctionPass {
> -public:
> -  typedef MachineInstr              InstructionType;
> -  typedef MachineFunction           FunctionType;
> -  typedef MachineBasicBlock         BlockType;
> -  typedef MachineLoopInfo           LoopinfoType;
> -  typedef MachineDominatorTree      DominatortreeType;
> -  typedef MachinePostDominatorTree  PostDominatortreeType;
> -  typedef MachineDomTreeNode        DomTreeNodeType;
> -  typedef MachineLoop               LoopType;
> -
> -protected:
> -  TargetMachine &TM;
> -
> -public:
> -  AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
> -  const TargetInstrInfo *getTargetInstrInfo() const;
> -  const AMDGPURegisterInfo *getTargetRegisterInfo() const;
> -};
> -
> -} // end anonymous namespace
> -AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
> -  : MachineFunctionPass(pid), TM(tm) {
> -}
> -
> -const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
> -  return TM.getInstrInfo();
> -}
> -
> -const AMDGPURegisterInfo *AMDGPUCFGStructurizer::getTargetRegisterInfo() const {
> -  return static_cast<const AMDGPURegisterInfo *>(TM.getRegisterInfo());
> -}
> -
> -//===----------------------------------------------------------------------===//
> -//
> -// CFGPrepare
> -//
> -//===----------------------------------------------------------------------===//
> -
> -
> -namespace {
> -class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
> -public:
> -  static char ID;
> -
> -public:
> -  AMDGPUCFGPrepare(TargetMachine &tm);
> -
> -  virtual const char *getPassName() const;
> -  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
> -
> -  bool runOnMachineFunction(MachineFunction &F);
> -};
> -
> -char AMDGPUCFGPrepare::ID = 0;
> -} // end anonymous namespace
> -
> -AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
> -  : AMDGPUCFGStructurizer(ID, tm )  {
> +  return CommonDom;
>  }
> -const char *AMDGPUCFGPrepare::getPassName() const {
> -  return "AMD IL Control Flow Graph Preparation Pass";
> -}
> -
> -void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
> -  AU.addPreserved<MachineFunctionAnalysis>();
> -  AU.addRequired<MachineFunctionAnalysis>();
> -  AU.addRequired<MachineDominatorTree>();
> -  AU.addRequired<MachinePostDominatorTree>();
> -  AU.addRequired<MachineLoopInfo>();
> -}
> -
> -//===----------------------------------------------------------------------===//
> -//
> -// CFGPerform
> -//
> -//===----------------------------------------------------------------------===//
> -
> -
> -namespace {
> -class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
> -public:
> -  static char ID;
> -
> -public:
> -  AMDGPUCFGPerform(TargetMachine &tm);
> -  virtual const char *getPassName() const;
> -  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
> -  bool runOnMachineFunction(MachineFunction &F);
> -};
> -
> -char AMDGPUCFGPerform::ID = 0;
> -} // end anonymous namespace
> -
> -  AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
> -: AMDGPUCFGStructurizer(ID, tm) {
> -}
> -
> -const char *AMDGPUCFGPerform::getPassName() const {
> -  return "AMD IL Control Flow Graph structurizer Pass";
> -}
> -
> -void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
> -  AU.addPreserved<MachineFunctionAnalysis>();
> -  AU.addRequired<MachineFunctionAnalysis>();
> -  AU.addRequired<MachineDominatorTree>();
> -  AU.addRequired<MachinePostDominatorTree>();
> -  AU.addRequired<MachineLoopInfo>();
> -}
> -
> -//===----------------------------------------------------------------------===//
> -//
> -// CFGStructTraits<AMDGPUCFGStructurizer>
> -//
> -//===----------------------------------------------------------------------===//
> -
> -namespace {
> -// this class is tailor to the AMDGPU backend
> -template<>
> -struct CFGStructTraits<AMDGPUCFGStructurizer> {
> -  typedef int RegiT;
> -
> -  static int getBranchNzeroOpcode(int oldOpcode) {
> -    switch(oldOpcode) {
> -    case AMDGPU::JUMP_COND:
> -    case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
> -    case AMDGPU::BRANCH_COND_i32:
> -    case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
> -    default:
> -      llvm_unreachable("internal error");
> -    }
> -    return -1;
> -  }
> -
> -  static int getBranchZeroOpcode(int oldOpcode) {
> -    switch(oldOpcode) {
> -    case AMDGPU::JUMP_COND:
> -    case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
> -    case AMDGPU::BRANCH_COND_i32:
> -    case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
> -    default:
> -      llvm_unreachable("internal error");
> -    }
> -    return -1;
> -  }
> -
> -  static int getContinueNzeroOpcode(int oldOpcode) {
> -    switch(oldOpcode) {
> -    case AMDGPU::JUMP_COND:
> -    case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
> -    default:
> -      llvm_unreachable("internal error");
> -    };
> -    return -1;
> -  }
> -
> -  static int getContinueZeroOpcode(int oldOpcode) {
> -    switch(oldOpcode) {
> -    case AMDGPU::JUMP_COND:
> -    case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
> -    default:
> -      llvm_unreachable("internal error");
> -    }
> -    return -1;
> -  }
> -
> -  static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
> -    return instr->getOperand(0).getMBB();
> -  }
> -
> -  static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
> -    instr->getOperand(0).setMBB(blk);
> -  }
> -
> -  static MachineBasicBlock *
> -  getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
> -    assert(blk->succ_size() == 2);
> -    MachineBasicBlock *trueBranch = getTrueBranch(instr);
> -    MachineBasicBlock::succ_iterator iter = blk->succ_begin();
> -    MachineBasicBlock::succ_iterator iterNext = iter;
> -    ++iterNext;
> -
> -    return (*iter == trueBranch) ? *iterNext : *iter;
> -  }
> -
> -  static bool isCondBranch(MachineInstr *instr) {
> -    switch (instr->getOpcode()) {
> -      case AMDGPU::JUMP_COND:
> -      case AMDGPU::BRANCH_COND_i32:
> -      case AMDGPU::BRANCH_COND_f32:
> -      break;
> -    default:
> -      return false;
> -    }
> -    return true;
> -  }
> -
> -  static bool isUncondBranch(MachineInstr *instr) {
> -    switch (instr->getOpcode()) {
> -    case AMDGPU::JUMP:
> -    case AMDGPU::BRANCH:
> -      return true;
> -    default:
> -      return false;
> -    }
> -    return true;
> -  }
> -
> -  static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
> -    //get DebugLoc from the first MachineBasicBlock instruction with debug info
> -    DebugLoc DL;
> -    for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
> -      MachineInstr *instr = &(*iter);
> -      if (instr->getDebugLoc().isUnknown() == false) {
> -        DL = instr->getDebugLoc();
> -      }
> -    }
> -    return DL;
> -  }
> -
> -  static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
> -    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
> -    MachineInstr *instr = &*iter;
> -    if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
> -      return instr;
> -    }
> -    return NULL;
> -  }
> -
> -  // The correct naming for this is getPossibleLoopendBlockBranchInstr.
> -  //
> -  // BB with backward-edge could have move instructions after the branch
> -  // instruction.  Such move instruction "belong to" the loop backward-edge.
> -  //
> -  static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
> -    const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
> -                                  blk->getParent()->getTarget().getInstrInfo());
> -
> -    for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
> -         iterEnd = blk->rend(); iter != iterEnd; ++iter) {
> -      // FIXME: Simplify
> -      MachineInstr *instr = &*iter;
> -      if (instr) {
> -        if (isCondBranch(instr) || isUncondBranch(instr)) {
> -          return instr;
> -        } else if (!TII->isMov(instr->getOpcode())) {
> -          break;
> -        }
> -      }
> -    }
> -    return NULL;
> -  }
> -
> -  static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
> -    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
> -    if (iter != blk->rend()) {
> -      MachineInstr *instr = &(*iter);
> -      if (instr->getOpcode() == AMDGPU::RETURN) {
> -        return instr;
> -      }
> -    }
> -    return NULL;
> -  }
> -
> -  static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
> -    MachineBasicBlock::reverse_iterator iter = blk->rbegin();
> -    if (iter != blk->rend()) {
> -      MachineInstr *instr = &(*iter);
> -      if (instr->getOpcode() == AMDGPU::CONTINUE) {
> -        return instr;
> -      }
> -    }
> -    return NULL;
> -  }
> -
> -  static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
> -    for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
> -      MachineInstr *instr = &(*iter);
> -      if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
> -        return instr;
> -      }
> -    }
> -    return NULL;
> -  }
> -
> -  static bool isReturnBlock(MachineBasicBlock *blk) {
> -    MachineInstr *instr = getReturnInstr(blk);
> -    bool isReturn = (blk->succ_size() == 0);
> -    if (instr) {
> -      assert(isReturn);
> -    } else if (isReturn) {
> -      DEBUG(
> -        dbgs() << "BB" << blk->getNumber()
> -               <<" is return block without RETURN instr\n";
> -      );
> -    }
> -
> -    return  isReturn;
> -  }
> -
> -  static MachineBasicBlock::iterator
> -  getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
> -    assert(instr->getParent() == blk && "instruction doesn't belong to block");
> -    MachineBasicBlock::iterator iter = blk->begin();
> -    MachineBasicBlock::iterator iterEnd = blk->end();
> -    while (&(*iter) != instr && iter != iterEnd) {
> -      ++iter;
> -    }
> -
> -    assert(iter != iterEnd);
> -    return iter;
> -  }//getInstrPos
> -
> -  static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
> -                                         AMDGPUCFGStructurizer *passRep) {
> -    return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
> -  } //insertInstrBefore
> -
> -  static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
> -                                         AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
> -    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
> -    MachineInstr *newInstr =
> -      blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
> -
> -    MachineBasicBlock::iterator res;
> -    if (blk->begin() != blk->end()) {
> -      blk->insert(blk->begin(), newInstr);
> -    } else {
> -      blk->push_back(newInstr);
> -    }
> -
> -    SHOWNEWINSTR(newInstr);
> -
> -    return newInstr;
> -  } //insertInstrBefore
> -
> -  static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
> -                             AMDGPUCFGStructurizer *passRep) {
> -    insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
> -  } //insertInstrEnd
> -
> -  static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
> -                             AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
> -    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
> -   MachineInstr *newInstr = blk->getParent()
> -      ->CreateMachineInstr(tii->get(newOpcode), DL);
> -
> -    blk->push_back(newInstr);
> -    //assume the instruction doesn't take any reg operand ...
> -
> -    SHOWNEWINSTR(newInstr);
> -  } //insertInstrEnd
> -
> -  static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
> -                                         int newOpcode, 
> -                                         AMDGPUCFGStructurizer *passRep) {
> -    MachineInstr *oldInstr = &(*instrPos);
> -    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
> -    MachineBasicBlock *blk = oldInstr->getParent();
> -    MachineInstr *newInstr =
> -      blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
> -                                           DebugLoc());
> -
> -    blk->insert(instrPos, newInstr);
> -    //assume the instruction doesn't take any reg operand ...
> -
> -    SHOWNEWINSTR(newInstr);
> -    return newInstr;
> -  } //insertInstrBefore
> -
> -  static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
> -                                     int newOpcode,
> -                                     AMDGPUCFGStructurizer *passRep,
> -                                     DebugLoc DL) {
> -    MachineInstr *oldInstr = &(*instrPos);
> -    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
> -    MachineBasicBlock *blk = oldInstr->getParent();
> -    MachineFunction *MF = blk->getParent();
> -    MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
> -
> -    blk->insert(instrPos, newInstr);
> -    MachineInstrBuilder MIB(*MF, newInstr);
> -    MIB.addReg(oldInstr->getOperand(1).getReg(), false);
> -
> -    SHOWNEWINSTR(newInstr);
> -    //erase later oldInstr->eraseFromParent();
> -  } //insertCondBranchBefore
> -
> -  static void insertCondBranchBefore(MachineBasicBlock *blk,
> -                                     MachineBasicBlock::iterator insertPos,
> -                                     int newOpcode,
> -                                     AMDGPUCFGStructurizer *passRep,
> -                                     RegiT regNum,
> -                                     DebugLoc DL) {
> -    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
> -    MachineFunction *MF = blk->getParent();
> -
> -    MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
> -
> -    //insert before
> -    blk->insert(insertPos, newInstr);
> -    MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
> -
> -    SHOWNEWINSTR(newInstr);
> -  } //insertCondBranchBefore
> -
> -  static void insertCondBranchEnd(MachineBasicBlock *blk,
> -                                  int newOpcode,
> -                                  AMDGPUCFGStructurizer *passRep,
> -                                  RegiT regNum) {
> -    const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
> -    MachineFunction *MF = blk->getParent();
> -    MachineInstr *newInstr =
> -      MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
> -
> -    blk->push_back(newInstr);
> -    MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
> -
> -    SHOWNEWINSTR(newInstr);
> -  } //insertCondBranchEnd
> -
> -
> -  static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
> -                                      AMDGPUCFGStructurizer *passRep,
> -                                      RegiT regNum, int regVal) {
> -    MachineInstr *oldInstr = &(*instrPos);
> -    const AMDGPUInstrInfo *tii =
> -             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
> -    MachineBasicBlock *blk = oldInstr->getParent();
> -    MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
> -                                                 regVal);
> -    blk->insert(instrPos, newInstr);
> -
> -    SHOWNEWINSTR(newInstr);
> -  } //insertAssignInstrBefore
> -
> -  static void insertAssignInstrBefore(MachineBasicBlock *blk,
> -                                      AMDGPUCFGStructurizer *passRep,
> -                                      RegiT regNum, int regVal) {
> -    const AMDGPUInstrInfo *tii =
> -             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
> -
> -    MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
> -                                                 regVal);
> -    if (blk->begin() != blk->end()) {
> -      blk->insert(blk->begin(), newInstr);
> -    } else {
> -      blk->push_back(newInstr);
> -    }
> -
> -    SHOWNEWINSTR(newInstr);
> -
> -  } //insertInstrBefore
> -
> -  static void insertCompareInstrBefore(MachineBasicBlock *blk,
> -                                       MachineBasicBlock::iterator instrPos,
> -                                       AMDGPUCFGStructurizer *passRep,
> -                                       RegiT dstReg, RegiT src1Reg,
> -                                       RegiT src2Reg) {
> -    const AMDGPUInstrInfo *tii =
> -             static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
> -    MachineFunction *MF = blk->getParent();
> -    MachineInstr *newInstr =
> -      MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
> -
> -    MachineInstrBuilder MIB(*MF, newInstr);
> -    MIB.addReg(dstReg, RegState::Define); //set target
> -    MIB.addReg(src1Reg); //set src value
> -    MIB.addReg(src2Reg); //set src value
> -
> -    blk->insert(instrPos, newInstr);
> -    SHOWNEWINSTR(newInstr);
>  
> -  } //insertCompareInstrBefore
> +char AMDGPUCFGStructurizer::ID = 0;
>  
> -  static void cloneSuccessorList(MachineBasicBlock *dstBlk,
> -                                 MachineBasicBlock *srcBlk) {
> -    for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
> -         iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
> -      dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of
> -    }
> -  } //cloneSuccessorList
> -
> -  static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
> -    MachineFunction *func = srcBlk->getParent();
> -    MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
> -    func->push_back(newBlk);  //insert to function
> -    for (MachineBasicBlock::iterator iter = srcBlk->begin(),
> -         iterEnd = srcBlk->end();
> -         iter != iterEnd; ++iter) {
> -      MachineInstr *instr = func->CloneMachineInstr(iter);
> -      newBlk->push_back(instr);
> -    }
> -    return newBlk;
> -  }
> -
> -  //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
> -  //the AMDGPU instruction is not recognized as terminator fix this and retire
> -  //this routine
> -  static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
> -                                         MachineBasicBlock *oldBlk,
> -                                         MachineBasicBlock *newBlk) {
> -    MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
> -    if (branchInstr && isCondBranch(branchInstr) &&
> -        getTrueBranch(branchInstr) == oldBlk) {
> -      setTrueBranch(branchInstr, newBlk);
> -    }
> -  }
> -
> -  static void wrapup(MachineBasicBlock *entryBlk) {
> -    assert((!entryBlk->getParent()->getJumpTableInfo()
> -            || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
> -           && "found a jump table");
> -
> -     //collect continue right before endloop
> -     SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
> -     MachineBasicBlock::iterator pre = entryBlk->begin();
> -     MachineBasicBlock::iterator iterEnd = entryBlk->end();
> -     MachineBasicBlock::iterator iter = pre;
> -     while (iter != iterEnd) {
> -       if (pre->getOpcode() == AMDGPU::CONTINUE
> -           && iter->getOpcode() == AMDGPU::ENDLOOP) {
> -         contInstr.push_back(pre);
> -       }
> -       pre = iter;
> -       ++iter;
> -     } //end while
> -
> -     //delete continue right before endloop
> -     for (unsigned i = 0; i < contInstr.size(); ++i) {
> -        contInstr[i]->eraseFromParent();
> -     }
> -
> -     // TODO to fix up jump table so later phase won't be confused.  if
> -     // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
> -     // there isn't such an interface yet.  alternatively, replace all the other
> -     // blocks in the jump table with the entryBlk //}
> -
> -  } //wrapup
> -
> -  static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
> -    return &pass.getAnalysis<MachineDominatorTree>();
> -  }
> -
> -  static MachinePostDominatorTree*
> -  getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
> -    return &pass.getAnalysis<MachinePostDominatorTree>();
> -  }
> -
> -  static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
> -    return &pass.getAnalysis<MachineLoopInfo>();
> -  }
> -}; // template class CFGStructTraits
>  } // end anonymous namespace
>  
> -// createAMDGPUCFGPreparationPass- Returns a pass
> -FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm) {
> -  return new AMDGPUCFGPrepare(tm);
> -}
> -
> -bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
> -  return CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func, *this,
> -                                                       getTargetRegisterInfo());
> -}
>  
> -// createAMDGPUCFGStructurizerPass- Returns a pass
>  FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm) {
> -  return new AMDGPUCFGPerform(tm);
> -}
> -
> -bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
> -  return CFGStructurizer<AMDGPUCFGStructurizer>().run(func, *this,
> -                                                      getTargetRegisterInfo());
> +  return new AMDGPUCFGStructurizer(tm);
>  }
> -- 
> 1.8.3.1
> 

> From b793a4dd4e34341b381a0b32a1de43d22aebb227 Mon Sep 17 00:00:00 2001
> From: Vincent Lejeune <vljn at ovi.com>
> Date: Fri, 19 Jul 2013 18:02:21 +0200
> Subject: [PATCH 3/3] R600: Don't emit empty then clause and use alu_pop_after
> 
> ---
>  lib/Target/R600/AMDILCFGStructurizer.cpp     |   8 +-
>  lib/Target/R600/R600ControlFlowFinalizer.cpp |  48 +++++++++--
>  lib/Target/R600/R600Instructions.td          |   1 +
>  test/CodeGen/R600/jump-address.ll            |   2 +-
>  test/CodeGen/R600/loop-address.ll            |   9 +-
>  test/CodeGen/R600/r600cfg.ll                 | 124 +++++++++++++++++++++++++++
>  6 files changed, 175 insertions(+), 17 deletions(-)
>  create mode 100644 test/CodeGen/R600/r600cfg.ll
> 
> diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
> index 6ace97a..aabeeca 100644
> --- a/lib/Target/R600/AMDILCFGStructurizer.cpp
> +++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
> @@ -1044,8 +1044,11 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
>    } else if (FalseMBB->succ_size() == 1
>               && *FalseMBB->succ_begin() == TrueMBB) {
>      // Triangle pattern, true is empty
> -    LandBlk = TrueMBB;
> -    TrueMBB = NULL;
> +    // We reverse the predicate to make a triangle, empty false pattern;
> +    std::swap(TrueMBB, FalseMBB);
> +    reversePredicateSetter(MBB->end());
> +    LandBlk = FalseMBB;
> +    FalseMBB = NULL;
>    } else if (FalseMBB->succ_size() == 1
>               && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
>      LandBlk = *FalseMBB->succ_begin();
> @@ -1461,6 +1464,7 @@ void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
>  void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
>      MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
>      MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
> +  assert (TrueMBB);
>    DEBUG(
>      dbgs() << "ifPattern BB" << MBB->getNumber();
>      dbgs() << "{  ";
> diff --git a/lib/Target/R600/R600ControlFlowFinalizer.cpp b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> index 932a6a7..40cd2c2 100644
> --- a/lib/Target/R600/R600ControlFlowFinalizer.cpp
> +++ b/lib/Target/R600/R600ControlFlowFinalizer.cpp
> @@ -347,6 +347,9 @@ public:
>          MaxStack = 1;
>        }
>        std::vector<ClauseFile> FetchClauses, AluClauses;
> +      std::vector<MachineInstr *> LastAlu(1);
> +      std::vector<MachineInstr *> ToPopAfter;
> +      
>        for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
>            I != E;) {
>          if (TII->usesTextureCache(I) || TII->usesVertexCache(I)) {
> @@ -357,6 +360,10 @@ public:
>          }
>  
>          MachineBasicBlock::iterator MI = I;
> +        if (MI->getOpcode() != AMDGPU::ENDIF)
> +          LastAlu.back() = 0;
> +        if (MI->getOpcode() == AMDGPU::CF_ALU)
> +          LastAlu.back() = MI;
>          I++;
>          switch (MI->getOpcode()) {
>          case AMDGPU::CF_ALU_PUSH_BEFORE:
> @@ -403,6 +410,7 @@ public:
>            break;
>          }
>          case AMDGPU::IF_PREDICATE_SET: {
> +          LastAlu.push_back(0);
>            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
>                getHWInstrDesc(CF_JUMP))
>                .addImm(0)
> @@ -420,7 +428,7 @@ public:
>            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
>                getHWInstrDesc(CF_ELSE))
>                .addImm(0)
> -              .addImm(1);
> +              .addImm(0);
>            DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
>            IfThenElseStack.push_back(MIb);
>            MI->eraseFromParent();
> @@ -429,17 +437,24 @@ public:
>          }
>          case AMDGPU::ENDIF: {
>            CurrentStack--;
> +          if (LastAlu.back()) {
> +            ToPopAfter.push_back(LastAlu.back());
> +          } else {
> +            MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> +                getHWInstrDesc(CF_POP))
> +                .addImm(CfCount + 1)
> +                .addImm(1);
> +            (void)MIb;
> +            DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
> +            CfCount++;
> +          }
> +          
>            MachineInstr *IfOrElseInst = IfThenElseStack.back();
>            IfThenElseStack.pop_back();
> -          CounterPropagateAddr(IfOrElseInst, CfCount + 1);
> -          MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
> -              getHWInstrDesc(CF_POP))
> -              .addImm(CfCount + 1)
> -              .addImm(1);
> -          (void)MIb;
> -          DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
> +          CounterPropagateAddr(IfOrElseInst, CfCount);
> +          IfOrElseInst->getOperand(1).setImm(1);
> +          LastAlu.pop_back();
>            MI->eraseFromParent();
> -          CfCount++;
>            break;
>          }
>          case AMDGPU::PREDICATED_BREAK: {
> @@ -484,6 +499,21 @@ public:
>            break;
>          }
>        }
> +      for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
> +        MachineInstr *Alu = ToPopAfter[i];
> +        BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
> +            TII->get(AMDGPU::CF_ALU_POP_AFTER))
> +            .addImm(Alu->getOperand(0).getImm())
> +            .addImm(Alu->getOperand(1).getImm())
> +            .addImm(Alu->getOperand(2).getImm())
> +            .addImm(Alu->getOperand(3).getImm())
> +            .addImm(Alu->getOperand(4).getImm())
> +            .addImm(Alu->getOperand(5).getImm())
> +            .addImm(Alu->getOperand(6).getImm())
> +            .addImm(Alu->getOperand(7).getImm())
> +            .addImm(Alu->getOperand(8).getImm());
> +        Alu->eraseFromParent();
> +      }
>        MFI->StackSize = getHWStackSize(MaxStack, HasPush);
>      }
>  
> diff --git a/lib/Target/R600/R600Instructions.td b/lib/Target/R600/R600Instructions.td
> index df5c438..3652c89 100644
> --- a/lib/Target/R600/R600Instructions.td
> +++ b/lib/Target/R600/R600Instructions.td
> @@ -624,6 +624,7 @@ ins, AsmPrint, [] >, CF_WORD0_EG, CF_WORD1_EG {
>  
>  def CF_ALU : ALU_CLAUSE<8, "ALU">;
>  def CF_ALU_PUSH_BEFORE : ALU_CLAUSE<9, "ALU_PUSH_BEFORE">;
> +def CF_ALU_POP_AFTER : ALU_CLAUSE<10, "ALU_POP_AFTER">;
>  
>  def FETCH_CLAUSE : AMDGPUInst <(outs),
>  (ins i32imm:$addr), "Fetch clause starting at $addr:", [] > {
> diff --git a/test/CodeGen/R600/jump-address.ll b/test/CodeGen/R600/jump-address.ll
> index 9a5f1bc..26c298b 100644
> --- a/test/CodeGen/R600/jump-address.ll
> +++ b/test/CodeGen/R600/jump-address.ll
> @@ -1,6 +1,6 @@
>  ;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s
>  
> -; CHECK: JUMP @7
> +; CHECK: JUMP @5
>  ; CHECK: EXPORT
>  ; CHECK-NOT: EXPORT
>  
> diff --git a/test/CodeGen/R600/loop-address.ll b/test/CodeGen/R600/loop-address.ll
> index 8a5458b..23be327 100644
> --- a/test/CodeGen/R600/loop-address.ll
> +++ b/test/CodeGen/R600/loop-address.ll
> @@ -2,12 +2,11 @@
>  
>  ;CHECK: TEX
>  ;CHECK: ALU_PUSH
> -;CHECK: JUMP @4
> -;CHECK: ELSE @16
> +;CHECK: JUMP @15
>  ;CHECK: TEX
> -;CHECK: LOOP_START_DX10 @15
> -;CHECK: LOOP_BREAK @14
> -;CHECK: POP @16
> +;CHECK: LOOP_START_DX10 @14
> +;CHECK: LOOP_BREAK @13
> +;CHECK: POP @15
>  
>  target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024-v2048:2048:2048-n32:64"
>  target triple = "r600--"
> diff --git a/test/CodeGen/R600/r600cfg.ll b/test/CodeGen/R600/r600cfg.ll
> new file mode 100644
> index 0000000..895ad5e
> --- /dev/null
> +++ b/test/CodeGen/R600/r600cfg.ll
> @@ -0,0 +1,124 @@
> +;RUN: llc < %s -march=r600 -mcpu=redwood
> +;REQUIRES: asserts
> +
> +define void @main() #0 {
> +main_body:
> +  %0 = call float @llvm.R600.load.input(i32 4)
> +  %1 = call float @llvm.R600.load.input(i32 5)
> +  %2 = call float @llvm.R600.load.input(i32 6)
> +  %3 = call float @llvm.R600.load.input(i32 7)
> +  %4 = bitcast float %0 to i32
> +  %5 = icmp eq i32 %4, 0
> +  %6 = sext i1 %5 to i32
> +  %7 = bitcast i32 %6 to float
> +  %8 = bitcast float %7 to i32
> +  %9 = icmp ne i32 %8, 0
> +  %. = select i1 %9, float 0x36A0000000000000, float %0
> +  br label %LOOP
> +
> +LOOP:                                             ; preds = %LOOP47, %main_body
> +  %temp12.0 = phi float [ 0x36A0000000000000, %main_body ], [ %temp12.1, %LOOP47 ]
> +  %temp8.0 = phi float [ 0.000000e+00, %main_body ], [ %38, %LOOP47 ]
> +  %temp4.1 = phi float [ %., %main_body ], [ %52, %LOOP47 ]
> +  %10 = bitcast float %temp4.1 to i32
> +  %11 = icmp eq i32 %10, 1
> +  %12 = sext i1 %11 to i32
> +  %13 = bitcast i32 %12 to float
> +  %14 = bitcast float %13 to i32
> +  %15 = icmp ne i32 %14, 0
> +  br i1 %15, label %IF41, label %ENDIF40
> +
> +IF41:                                             ; preds = %LOOP
> +  %16 = insertelement <4 x float> undef, float %0, i32 0
> +  %17 = insertelement <4 x float> %16, float %temp8.0, i32 1
> +  %18 = insertelement <4 x float> %17, float %temp12.0, i32 2
> +  %19 = insertelement <4 x float> %18, float 0.000000e+00, i32 3
> +  call void @llvm.R600.store.stream.output(<4 x float> %19, i32 0, i32 0, i32 1)
> +  %20 = insertelement <4 x float> undef, float %0, i32 0
> +  %21 = insertelement <4 x float> %20, float %temp8.0, i32 1
> +  %22 = insertelement <4 x float> %21, float %temp12.0, i32 2
> +  %23 = insertelement <4 x float> %22, float 0.000000e+00, i32 3
> +  call void @llvm.R600.store.stream.output(<4 x float> %23, i32 0, i32 0, i32 2)
> +  %24 = insertelement <4 x float> undef, float %0, i32 0
> +  %25 = insertelement <4 x float> %24, float %temp8.0, i32 1
> +  %26 = insertelement <4 x float> %25, float %temp12.0, i32 2
> +  %27 = insertelement <4 x float> %26, float 0.000000e+00, i32 3
> +  call void @llvm.R600.store.stream.output(<4 x float> %27, i32 0, i32 0, i32 4)
> +  %28 = insertelement <4 x float> undef, float 0.000000e+00, i32 0
> +  %29 = insertelement <4 x float> %28, float 0.000000e+00, i32 1
> +  %30 = insertelement <4 x float> %29, float 0.000000e+00, i32 2
> +  %31 = insertelement <4 x float> %30, float 0.000000e+00, i32 3
> +  call void @llvm.R600.store.swizzle(<4 x float> %31, i32 60, i32 1)
> +  %32 = insertelement <4 x float> undef, float %0, i32 0
> +  %33 = insertelement <4 x float> %32, float %temp8.0, i32 1
> +  %34 = insertelement <4 x float> %33, float %temp12.0, i32 2
> +  %35 = insertelement <4 x float> %34, float 0.000000e+00, i32 3
> +  call void @llvm.R600.store.swizzle(<4 x float> %35, i32 0, i32 2)
> +  ret void
> +
> +ENDIF40:                                          ; preds = %LOOP
> +  %36 = bitcast float %temp8.0 to i32
> +  %37 = add i32 %36, 1
> +  %38 = bitcast i32 %37 to float
> +  %39 = bitcast float %temp4.1 to i32
> +  %40 = urem i32 %39, 2
> +  %41 = bitcast i32 %40 to float
> +  %42 = bitcast float %41 to i32
> +  %43 = icmp eq i32 %42, 0
> +  %44 = sext i1 %43 to i32
> +  %45 = bitcast i32 %44 to float
> +  %46 = bitcast float %45 to i32
> +  %47 = icmp ne i32 %46, 0
> +  %48 = bitcast float %temp4.1 to i32
> +  br i1 %47, label %IF44, label %ELSE45
> +
> +IF44:                                             ; preds = %ENDIF40
> +  %49 = udiv i32 %48, 2
> +  br label %ENDIF43
> +
> +ELSE45:                                           ; preds = %ENDIF40
> +  %50 = mul i32 3, %48
> +  %51 = add i32 %50, 1
> +  br label %ENDIF43
> +
> +ENDIF43:                                          ; preds = %ELSE45, %IF44
> +  %.sink = phi i32 [ %49, %IF44 ], [ %51, %ELSE45 ]
> +  %52 = bitcast i32 %.sink to float
> +  %53 = load <4 x float> addrspace(8)* null
> +  %54 = extractelement <4 x float> %53, i32 0
> +  %55 = bitcast float %54 to i32
> +  br label %LOOP47
> +
> +LOOP47:                                           ; preds = %ENDIF48, %ENDIF43
> +  %temp12.1 = phi float [ %temp12.0, %ENDIF43 ], [ %67, %ENDIF48 ]
> +  %temp28.0 = phi float [ 0.000000e+00, %ENDIF43 ], [ %70, %ENDIF48 ]
> +  %56 = bitcast float %temp28.0 to i32
> +  %57 = icmp uge i32 %56, %55
> +  %58 = sext i1 %57 to i32
> +  %59 = bitcast i32 %58 to float
> +  %60 = bitcast float %59 to i32
> +  %61 = icmp ne i32 %60, 0
> +  br i1 %61, label %LOOP, label %ENDIF48
> +
> +ENDIF48:                                          ; preds = %LOOP47
> +  %62 = bitcast float %temp12.1 to i32
> +  %63 = mul i32 %62, 2
> +  %64 = bitcast i32 %63 to float
> +  %65 = bitcast float %64 to i32
> +  %66 = urem i32 %65, 2147483647
> +  %67 = bitcast i32 %66 to float
> +  %68 = bitcast float %temp28.0 to i32
> +  %69 = add i32 %68, 1
> +  %70 = bitcast i32 %69 to float
> +  br label %LOOP47
> +}
> +
> +; Function Attrs: readnone
> +declare float @llvm.R600.load.input(i32) #1
> +
> +declare void @llvm.R600.store.stream.output(<4 x float>, i32, i32, i32)
> +
> +declare void @llvm.R600.store.swizzle(<4 x float>, i32, i32)
> +
> +attributes #0 = { "ShaderType"="1" }
> +attributes #1 = { readnone }
> -- 
> 1.8.3.1
> 

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits





More information about the llvm-commits mailing list