[PATCH] R600: Clean if/then/else handling code in AMDILCFGStructurizer

Tom Stellard tom at stellard.net
Mon Nov 18 12:09:15 PST 2013


Hi Vincent,

I've rebased your patch on top of the current master branch (which
includes my patches to enable the IR Structurizer), and attached it to
this mail.

Your patch regresses a test case I added in:

r195030
R600: Fix a crash in the AMDILCFGStrucurizer

I think the problem may be that not all of the if statements nested
inside the loop are being matched by the ifPatternMatch() function.

The other question I have about this patch is: Why is it safe to remove
the code for handling branch into IF?

-Tom

On Fri, Nov 15, 2013 at 09:16:13AM -0800, Vincent Lejeune wrote:
> Further simplify the pass and fix some shadertoy's sample crashes.
> The pass is no longer able to copy block in the jump into if situation and rely on structurizeCFG pass.
> 
> http://llvm-reviews.chandlerc.com/D2188
> 
> Files:
>   lib/Target/R600/AMDILCFGStructurizer.cpp

> Index: lib/Target/R600/AMDILCFGStructurizer.cpp
> ===================================================================
> --- lib/Target/R600/AMDILCFGStructurizer.cpp
> +++ lib/Target/R600/AMDILCFGStructurizer.cpp
> @@ -133,18 +133,15 @@
>  
>    AMDGPUCFGStructurizer(TargetMachine &tm) :
>        MachineFunctionPass(ID), TM(tm),
> -      TII(static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
> -      TRI(&TII->getRegisterInfo()) { }
> +      MLI(0), TII(0), TRI(0), FuncRep(0) { }
>  
>     const char *getPassName() const {
>      return "AMD IL Control Flow Graph structurizer Pass";
>    }
>  
>    void getAnalysisUsage(AnalysisUsage &AU) const {
>      AU.addPreserved<MachineFunctionAnalysis>();
>      AU.addRequired<MachineFunctionAnalysis>();
> -    AU.addRequired<MachineDominatorTree>();
> -    AU.addRequired<MachinePostDominatorTree>();
>      AU.addRequired<MachineLoopInfo>();
>    }
>  
> @@ -161,21 +158,17 @@
>      OrderedBlks.clear();
>      FuncRep = &MF;
>      MLI = &getAnalysis<MachineLoopInfo>();
> +    TII = static_cast<const R600InstrInfo *>(TM.getInstrInfo());
> +    TRI = &TII->getRegisterInfo();
>      DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
> -    MDT = &getAnalysis<MachineDominatorTree>();
> -    DEBUG(MDT->print(dbgs(), (const llvm::Module*)0););
> -    PDT = &getAnalysis<MachinePostDominatorTree>();
> -    DEBUG(PDT->print(dbgs()););
>      prepare();
>      run();
>      DEBUG(MF.dump(););
>      return true;
>    }
>  
>  protected:
>    TargetMachine &TM;
> -  MachineDominatorTree *MDT;
> -  MachinePostDominatorTree *PDT;
>    MachineLoopInfo *MLI;
>    const R600InstrInfo *TII;
>    const AMDGPURegisterInfo *TRI;
> @@ -208,12 +201,8 @@
>    bool hasBackEdge(MachineBasicBlock *MBB) const;
>    static unsigned getLoopDepth(MachineLoop *LoopRep);
>    bool isRetiredBlock(MachineBasicBlock *MBB) const;
> -  bool isActiveLoophead(MachineBasicBlock *MBB) const;
> -  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
> -      bool AllowSideEntry = true) const;
>    int countActiveBlock(MBBVector::const_iterator It,
>        MBBVector::const_iterator E) const;
> -  bool needMigrateBlock(MachineBasicBlock *MBB) const;
>  
>    // Utility Functions
>    void reversePredicateSetter(MachineBasicBlock::iterator I);
> @@ -264,33 +253,16 @@
>  
>  
>    int patternMatch(MachineBasicBlock *MBB);
> -  int patternMatchGroup(MachineBasicBlock *MBB);
> -  int serialPatternMatch(MachineBasicBlock *MBB);
> -  int ifPatternMatch(MachineBasicBlock *MBB);
> +  bool patternMatchGroup(MachineBasicBlock *MBB);
> +  bool serialPatternMatch(MachineBasicBlock *MBB);
> +  bool ifPatternMatch(MachineBasicBlock *MBB);
>    int loopendPatternMatch();
>    int mergeLoop(MachineLoop *LoopRep);
>    int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader);
>  
>    void handleLoopcontBlock(MachineBasicBlock *ContingMBB,
>        MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
>        MachineLoop *ContLoop);
> -  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
> -  /// the same loop with LoopLandInfo without explicitly keeping track of
> -  /// loopContBlks and loopBreakBlks, this is a method to get the information.
> -  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
> -      MachineBasicBlock *Src2MBB);
> -  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
> -      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
> -  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
> -      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
> -  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
> -      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
> -      MachineBasicBlock **LandMBBPtr);
> -  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
> -      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
> -      MachineBasicBlock *LandMBB, bool Detail = false);
> -  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
> -      MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
>    void mergeSerialBlock(MachineBasicBlock *DstMBB,
>        MachineBasicBlock *SrcMBB);
>  
> @@ -326,18 +298,10 @@
>    void removeSuccessor(MachineBasicBlock *MBB);
>    MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
>        MachineBasicBlock *PredMBB);
> -  void migrateInstruction(MachineBasicBlock *SrcMBB,
> -      MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
>    void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
>    void retireBlock(MachineBasicBlock *MBB);
>    void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = NULL);
>  
> -  MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
> -  /// This is work around solution for findNearestCommonDominator not avaiable
> -  /// to post dom a proper fix should go to Dominators.h.
> -  MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
> -      MachineBasicBlock *MBB2);
> -
>  private:
>    MBBInfoMap BlockInfoMap;
>    LoopLandInfoMap LLInfoMap;
> @@ -380,36 +344,6 @@
>    return (*It).second->IsRetired;
>  }
>  
> -bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
> -  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
> -  while (LoopRep && LoopRep->getHeader() == MBB) {
> -    MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
> -    if(!LoopLand)
> -      return true;
> -    if (!isRetiredBlock(LoopLand))
> -      return true;
> -    LoopRep = LoopRep->getParentLoop();
> -  }
> -  return false;
> -}
> -AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
> -    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
> -    bool AllowSideEntry) const {
> -  assert(DstMBB);
> -  if (SrcMBB == DstMBB)
> -    return SinglePath_InPath;
> -  while (SrcMBB && SrcMBB->succ_size() == 1) {
> -    SrcMBB = *SrcMBB->succ_begin();
> -    if (SrcMBB == DstMBB)
> -      return SinglePath_InPath;
> -    if (!AllowSideEntry && SrcMBB->pred_size() > 1)
> -      return Not_SinglePath;
> -  }
> -  if (SrcMBB && SrcMBB->succ_size()==0)
> -    return SinglePath_NotInPath;
> -  return Not_SinglePath;
> -}
> -
>  int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
>      MBBVector::const_iterator E) const {
>    int Count = 0;
> @@ -421,18 +355,6 @@
>    return Count;
>  }
>  
> -bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
> -  unsigned BlockSizeThreshold = 30;
> -  unsigned CloneInstrThreshold = 100;
> -  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
> -
> -  if(!MultiplePreds)
> -    return false;
> -  unsigned BlkSize = MBB->size();
> -  return ((BlkSize > BlockSizeThreshold) &&
> -      (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
> -}
> -
>  void AMDGPUCFGStructurizer::reversePredicateSetter(
>      MachineBasicBlock::iterator I) {
>    while (I--) {
> @@ -800,6 +722,7 @@
>    bool MakeProgress = false;
>    int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
>                                          OrderedBlks.end());
> +  loopendPatternMatch();
>  
>    do {
>      ++NumIter;
> @@ -972,103 +895,96 @@
>    return NumMatch;
>  }
>  
> -int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
> -  int NumMatch = 0;
> -  NumMatch += loopendPatternMatch();
> -  NumMatch += serialPatternMatch(MBB);
> -  NumMatch += ifPatternMatch(MBB);
> -  return NumMatch;
> +bool AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
> +  bool ChangedInSingleIteration, Changed = false;
> +  do {
> +      ChangedInSingleIteration = false;
> +      DEBUG(dbgs() << "Pattern matching starting from BB#" << MBB->getNumber()
> +          << "\n";);
> +      ChangedInSingleIteration |= serialPatternMatch(MBB);
> +      ChangedInSingleIteration |= ifPatternMatch(MBB);
> +      Changed |= ChangedInSingleIteration;
> +  } while (ChangedInSingleIteration);
> +
> +  return Changed;
>  }
>  
>  
> -int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
> +bool AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
>    if (MBB->succ_size() != 1)
> -    return 0;
> +    return false;
>  
>    MachineBasicBlock *childBlk = *MBB->succ_begin();
> -  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
> -    return 0;
> +  if (childBlk->pred_size() != 1)
> +    return false;
>  
>    mergeSerialBlock(MBB, childBlk);
>    ++numSerialPatternMatch;
> -  return 1;
> +  return true;
>  }
>  
> -int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
> -  //two edges
> +bool AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
>    if (MBB->succ_size() != 2)
> -    return 0;
> +    return false;
>    if (hasBackEdge(MBB))
> -    return 0;
> +    return false;
>    MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
>    if (!BranchMI)
> -    return 0;
> +    return false;
>  
>    assert(isCondBranch(BranchMI));
>  
> +  bool Changed = false;
>    MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
> -  serialPatternMatch(TrueMBB);
> -  ifPatternMatch(TrueMBB);
>    MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
> -  serialPatternMatch(FalseMBB);
> -  ifPatternMatch(FalseMBB);
> +  DEBUG(dbgs() << "if pattern starting at BB#" << MBB->getNumber() <<
> +      ", True branch to BB#" << TrueMBB->getNumber() <<
> +      ", False branch to BB#" << FalseMBB->getNumber() << "\n");
> +
> +  Changed |= patternMatchGroup(TrueMBB);
> +  Changed |= patternMatchGroup(FalseMBB);
> +  DEBUG(
> +    dbgs() << "BB#" << TrueMBB->getNumber() << "'successors :";
> +    for (MachineBasicBlock::succ_iterator I = TrueMBB->succ_begin(),
> +        E = TrueMBB->succ_end(); I != E; ++I)
> +      dbgs() << "BB#" << (*I)->getNumber() <<", ";
> +    dbgs() << "\n";
> +    dbgs() << "BB#" << FalseMBB->getNumber() << "'successors :";
> +    for (MachineBasicBlock::succ_iterator I = FalseMBB->succ_begin(),
> +        E = FalseMBB->succ_end(); I != E; ++I)
> +      dbgs() << "BB#" << (*I)->getNumber() << ", ";
> +    dbgs() << "\n";
> +);
>    MachineBasicBlock *LandBlk;
> -  int Cloned = 0;
>  
>    assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
> -  // TODO: Simplify
>    if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
> -    && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
> +    && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()
> +    && TrueMBB->pred_size() == 1 && FalseMBB->pred_size() == 1) {
>      // Diamond pattern
>      LandBlk = *TrueMBB->succ_begin();
> -  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
> +  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB &&
> +      TrueMBB->pred_size() == 1) {
>      // Triangle pattern, false is empty
>      LandBlk = FalseMBB;
>      FalseMBB = NULL;
> -  } else if (FalseMBB->succ_size() == 1
> +  } else if (FalseMBB->succ_size() == 1 && FalseMBB->pred_size() == 1
>               && *FalseMBB->succ_begin() == TrueMBB) {
>      // Triangle pattern, true is empty
>      // We reverse the predicate to make a triangle, empty false pattern;
>      std::swap(TrueMBB, FalseMBB);
>      reversePredicateSetter(MBB->end());
>      LandBlk = FalseMBB;
>      FalseMBB = NULL;
> -  } else if (FalseMBB->succ_size() == 1
> -             && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
> -    LandBlk = *FalseMBB->succ_begin();
> -  } else if (TrueMBB->succ_size() == 1
> -    && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
> -    LandBlk = *TrueMBB->succ_begin();
> -  } else {
> -    return handleJumpintoIf(MBB, TrueMBB, FalseMBB);
> -  }
> -
> -  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
> -  // new BB created for landBlk==NULL may introduce new challenge to the
> -  // reduction process.
> -  if (LandBlk &&
> -      ((TrueMBB && TrueMBB->pred_size() > 1)
> -      || (FalseMBB && FalseMBB->pred_size() > 1))) {
> -     Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
> -  }
> +  } else
> +    return Changed;
>  
> -  if (TrueMBB && TrueMBB->pred_size() > 1) {
> +  if (TrueMBB && TrueMBB->pred_size() > 1)
>      TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
> -    ++Cloned;
> -  }
> -
> -  if (FalseMBB && FalseMBB->pred_size() > 1) {
> +  if (FalseMBB && FalseMBB->pred_size() > 1)
>      FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
> -    ++Cloned;
> -  }
> -
>    mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
> -
> -  ++numIfPatternMatch;
> -
> -  numClonedBlock += Cloned;
> -
> -  return 1 + Cloned;
> +  return true;
>  }
>  
>  int AMDGPUCFGStructurizer::loopendPatternMatch() {
> @@ -1129,11 +1045,7 @@
>    for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
>      settleLoopcontBlock(LatchBlks[i], LoopHeader);
>    int Match = 0;
> -  do {
> -    Match = 0;
> -    Match += serialPatternMatch(LoopHeader);
> -    Match += ifPatternMatch(LoopHeader);
> -  } while (Match > 0);
> +  patternMatchGroup(LoopHeader);
>    mergeLooplandBlock(LoopHeader, ExitBlk);
>    MachineLoop *ParentLoop = LoopRep->getParentLoop();
>    if (ParentLoop)
> @@ -1171,302 +1083,6 @@
>    return NumCont;
>  }
>  
> -
> -bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
> -    MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
> -  if (Src1MBB->succ_size() == 0) {
> -    MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
> -    if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
> -      MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
> -      if (TheEntry) {
> -        DEBUG(
> -          dbgs() << "isLoopContBreakBlock yes src1 = BB"
> -                 << Src1MBB->getNumber()
> -                 << " src2 = BB" << Src2MBB->getNumber() << "\n";
> -        );
> -        return true;
> -      }
> -    }
> -  }
> -  return false;
> -}
> -
> -int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
> -    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
> -  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
> -  if (Num == 0) {
> -    DEBUG(
> -      dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
> -    );
> -    Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
> -  }
> -  return Num;
> -}
> -
> -int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
> -    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
> -  int Num = 0;
> -  MachineBasicBlock *DownBlk;
> -
> -  //trueBlk could be the common post dominator
> -  DownBlk = TrueMBB;
> -
> -  DEBUG(
> -    dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
> -           << " true = BB" << TrueMBB->getNumber()
> -           << ", numSucc=" << TrueMBB->succ_size()
> -           << " false = BB" << FalseMBB->getNumber() << "\n";
> -  );
> -
> -  while (DownBlk) {
> -    DEBUG(
> -      dbgs() << "check down = BB" << DownBlk->getNumber();
> -    );
> -
> -    if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
> -      DEBUG(
> -        dbgs() << " working\n";
> -      );
> -
> -      Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
> -      Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
> -
> -      numClonedBlock += Num;
> -      Num += serialPatternMatch(*HeadMBB->succ_begin());
> -      Num += serialPatternMatch(*llvm::next(HeadMBB->succ_begin()));
> -      Num += ifPatternMatch(HeadMBB);
> -      assert(Num > 0);
> -
> -      break;
> -    }
> -    DEBUG(
> -      dbgs() << " not working\n";
> -    );
> -    DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : NULL;
> -  } // walk down the postDomTree
> -
> -  return Num;
> -}
> -
> -void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
> -    MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
> -    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
> -  dbgs() << "head = BB" << HeadMBB->getNumber()
> -         << " size = " << HeadMBB->size();
> -  if (Detail) {
> -    dbgs() << "\n";
> -    HeadMBB->print(dbgs());
> -    dbgs() << "\n";
> -  }
> -
> -  if (TrueMBB) {
> -    dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
> -           << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
> -    if (Detail) {
> -      dbgs() << "\n";
> -      TrueMBB->print(dbgs());
> -      dbgs() << "\n";
> -    }
> -  }
> -  if (FalseMBB) {
> -    dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
> -           << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
> -    if (Detail) {
> -      dbgs() << "\n";
> -      FalseMBB->print(dbgs());
> -      dbgs() << "\n";
> -    }
> -  }
> -  if (LandMBB) {
> -    dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
> -           << LandMBB->size() << " numPred = " << LandMBB->pred_size();
> -    if (Detail) {
> -      dbgs() << "\n";
> -      LandMBB->print(dbgs());
> -      dbgs() << "\n";
> -    }
> -  }
> -
> -    dbgs() << "\n";
> -}
> -
> -int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
> -    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
> -    MachineBasicBlock **LandMBBPtr) {
> -  bool MigrateTrue = false;
> -  bool MigrateFalse = false;
> -
> -  MachineBasicBlock *LandBlk = *LandMBBPtr;
> -
> -  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
> -         && (!FalseMBB || FalseMBB->succ_size() <= 1));
> -
> -  if (TrueMBB == FalseMBB)
> -    return 0;
> -
> -  MigrateTrue = needMigrateBlock(TrueMBB);
> -  MigrateFalse = needMigrateBlock(FalseMBB);
> -
> -  if (!MigrateTrue && !MigrateFalse)
> -    return 0;
> -
> -  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
> -  // have more than one predecessors.  without doing this, its predecessor
> -  // rather than headBlk will have undefined value in initReg.
> -  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
> -    MigrateTrue = true;
> -  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
> -    MigrateFalse = true;
> -
> -  DEBUG(
> -    dbgs() << "before improveSimpleJumpintoIf: ";
> -    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
> -  );
> -
> -  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
> -  //
> -  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
> -  //      {initReg = 0; org falseBlk branch }
> -  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
> -  //      => org landBlk
> -  //      if landBlk->pred_size() > 2, put the about if-else inside
> -  //      if (initReg !=2) {...}
> -  //
> -  // add initReg = initVal to headBlk
> -
> -  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
> -  if (!MigrateTrue || !MigrateFalse) {
> -    // XXX: We have an opportunity here to optimize the "branch into if" case
> -    // here.  Branch into if looks like this:
> -    //                        entry
> -    //                       /     |
> -    //           diamond_head       branch_from
> -    //             /      \           |
> -    // diamond_false        diamond_true
> -    //             \      /
> -    //               done
> -    //
> -    // The diamond_head block begins the "if" and the diamond_true block
> -    // is the block being "branched into".
> -    //
> -    // If MigrateTrue is true, then TrueBB is the block being "branched into"
> -    // and if MigrateFalse is true, then FalseBB is the block being
> -    // "branched into"
> -    // 
> -    // Here is the pseudo code for how I think the optimization should work:
> -    // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
> -    // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
> -    // 3. Move the branch instruction from diamond_head into its own basic
> -    //    block (new_block).
> -    // 4. Add an unconditional branch from diamond_head to new_block
> -    // 5. Replace the branch instruction in branch_from with an unconditional
> -    //    branch to new_block.  If branch_from has multiple predecessors, then
> -    //    we need to replace the True/False block in the branch
> -    //    instruction instead of replacing it.
> -    // 6. Change the condition of the branch instruction in new_block from
> -    //    COND to (COND || GPR0)
> -    //
> -    // In order insert these MOV instruction, we will need to use the
> -    // RegisterScavenger.  Usually liveness stops being tracked during
> -    // the late machine optimization passes, however if we implement
> -    // bool TargetRegisterInfo::requiresRegisterScavenging(
> -    //                                                const MachineFunction &MF)
> -    // and have it return true, liveness will be tracked correctly 
> -    // by generic optimization passes.  We will also need to make sure that
> -    // all of our target-specific passes that run after regalloc and before
> -    // the CFGStructurizer track liveness and we will need to modify this pass
> -    // to correctly track liveness.
> -    //
> -    // After the above changes, the new CFG should look like this:
> -    //                        entry
> -    //                       /     |
> -    //           diamond_head       branch_from
> -    //                       \     /
> -    //                      new_block
> -    //                      /      |
> -    //         diamond_false        diamond_true
> -    //                      \      /
> -    //                        done
> -    //
> -    // Without this optimization, we are forced to duplicate the diamond_true
> -    // block and we will end up with a CFG like this:
> -    //
> -    //                        entry
> -    //                       /     |
> -    //           diamond_head       branch_from
> -    //             /      \                   |
> -    // diamond_false        diamond_true      diamond_true (duplicate)
> -    //             \      /                   |
> -    //               done --------------------|
> -    //
> -    // Duplicating diamond_true can be very costly especially if it has a
> -    // lot of instructions.
> -    return 0;
> -  }
> -
> -  int NumNewBlk = 0;
> -
> -  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
> -
> -  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
> -  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
> -
> -  if (LandBlkHasOtherPred) {
> -    llvm_unreachable("Extra register needed to handle CFG");
> -    unsigned CmpResReg =
> -      HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
> -    llvm_unreachable("Extra compare instruction needed to handle CFG");
> -    insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
> -        CmpResReg, DebugLoc());
> -  }
> -
> -  // XXX: We are running this after RA, so creating virtual registers will
> -  // cause an assertion failure in the PostRA scheduling pass.
> -  unsigned InitReg =
> -    HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
> -  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
> -      DebugLoc());
> -
> -  if (MigrateTrue) {
> -    migrateInstruction(TrueMBB, LandBlk, I);
> -    // need to uncondionally insert the assignment to ensure a path from its
> -    // predecessor rather than headBlk has valid value in initReg if
> -    // (initVal != 1).
> -    llvm_unreachable("Extra register needed to handle CFG");
> -  }
> -  insertInstrBefore(I, AMDGPU::ELSE);
> -
> -  if (MigrateFalse) {
> -    migrateInstruction(FalseMBB, LandBlk, I);
> -    // need to uncondionally insert the assignment to ensure a path from its
> -    // predecessor rather than headBlk has valid value in initReg if
> -    // (initVal != 0)
> -    llvm_unreachable("Extra register needed to handle CFG");
> -  }
> -
> -  if (LandBlkHasOtherPred) {
> -    // add endif
> -    insertInstrBefore(I, AMDGPU::ENDIF);
> -
> -    // put initReg = 2 to other predecessors of landBlk
> -    for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
> -         PE = LandBlk->pred_end(); PI != PE; ++PI) {
> -      MachineBasicBlock *MBB = *PI;
> -      if (MBB != TrueMBB && MBB != FalseMBB)
> -        llvm_unreachable("Extra register needed to handle CFG");
> -    }
> -  }
> -  DEBUG(
> -    dbgs() << "result from improveSimpleJumpintoIf: ";
> -    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
> -  );
> -
> -  // update landBlk
> -  *LandMBBPtr = LandBlk;
> -
> -  return NumNewBlk;
> -}
> -
>  void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB,
>      MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
>      MachineLoop *ContLoop) {
> @@ -1637,24 +1253,6 @@
>    }
>  }
>  
> -int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
> -    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
> -  int Cloned = 0;
> -  assert(PreMBB->isSuccessor(SrcMBB));
> -  while (SrcMBB && SrcMBB != DstMBB) {
> -    assert(SrcMBB->succ_size() == 1);
> -    if (SrcMBB->pred_size() > 1) {
> -      SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
> -      ++Cloned;
> -    }
> -
> -    PreMBB = SrcMBB;
> -    SrcMBB = *SrcMBB->succ_begin();
> -  }
> -
> -  return Cloned;
> -}
> -
>  MachineBasicBlock *
>  AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
>      MachineBasicBlock *PredMBB) {
> @@ -1683,37 +1281,6 @@
>    return CloneMBB;
>  }
>  
> -void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
> -    MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
> -  MachineBasicBlock::iterator SpliceEnd;
> -  //look for the input branchinstr, not the AMDGPU branchinstr
> -  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
> -  if (!BranchMI) {
> -    DEBUG(
> -      dbgs() << "migrateInstruction don't see branch instr\n" ;
> -    );
> -    SpliceEnd = SrcMBB->end();
> -  } else {
> -    DEBUG(
> -      dbgs() << "migrateInstruction see branch instr\n" ;
> -      BranchMI->dump();
> -    );
> -    SpliceEnd = BranchMI;
> -  }
> -  DEBUG(
> -    dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
> -      << "srcSize = " << SrcMBB->size() << "\n";
> -  );
> -
> -  //splice insert before insertPos
> -  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
> -
> -  DEBUG(
> -    dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
> -      << "srcSize = " << SrcMBB->size() << "\n";
> -  );
> -}
> -
>  MachineBasicBlock *
>  AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
>    MachineBasicBlock *LoopHeader = LoopRep->getHeader();
> @@ -1839,60 +1406,6 @@
>    );
>  }
>  
> -MachineBasicBlock *
> -AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
> -    MachineBasicBlock *MBB2) {
> -
> -  if (PDT->dominates(MBB1, MBB2))
> -    return MBB1;
> -  if (PDT->dominates(MBB2, MBB1))
> -    return MBB2;
> -
> -  MachineDomTreeNode *Node1 = PDT->getNode(MBB1);
> -  MachineDomTreeNode *Node2 = PDT->getNode(MBB2);
> -
> -  // Handle newly cloned node.
> -  if (!Node1 && MBB1->succ_size() == 1)
> -    return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2);
> -  if (!Node2 && MBB2->succ_size() == 1)
> -    return findNearestCommonPostDom(MBB1, *MBB2->succ_begin());
> -
> -  if (!Node1 || !Node2)
> -    return NULL;
> -
> -  Node1 = Node1->getIDom();
> -  while (Node1) {
> -    if (PDT->dominates(Node1, Node2))
> -      return Node1->getBlock();
> -    Node1 = Node1->getIDom();
> -  }
> -
> -  return NULL;
> -}
> -
> -MachineBasicBlock *
> -AMDGPUCFGStructurizer::findNearestCommonPostDom(
> -    std::set<MachineBasicBlock *> &MBBs) {
> -  MachineBasicBlock *CommonDom;
> -  std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin();
> -  std::set<MachineBasicBlock *>::const_iterator E = MBBs.end();
> -  for (CommonDom = *It; It != E && CommonDom; ++It) {
> -    MachineBasicBlock *MBB = *It;
> -    if (MBB != CommonDom)
> -      CommonDom = findNearestCommonPostDom(MBB, CommonDom);
> -  }
> -
> -  DEBUG(
> -    dbgs() << "Common post dominator for exit blocks is ";
> -    if (CommonDom)
> -          dbgs() << "BB" << CommonDom->getNumber() << "\n";
> -    else
> -      dbgs() << "NULL\n";
> -  );
> -
> -  return CommonDom;
> -}
> -
>  char AMDGPUCFGStructurizer::ID = 0;
>  
>  } // end anonymous namespace

> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits

-------------- next part --------------
>From cb567865c2489b8e38d13734760b2511b15dab32 Mon Sep 17 00:00:00 2001
From: Tom Stellard <thomas.stellard at amd.com>
Date: Mon, 18 Nov 2013 11:52:09 -0800
Subject: [PATCH] R600: Clean if/then/else handling code in
 AMDILCFGStructurizer v2

v2:
  - Rebased on top of IR Structurizer enable patches.
---
 lib/Target/R600/AMDILCFGStructurizer.cpp | 604 +++----------------------------
 1 file changed, 58 insertions(+), 546 deletions(-)

diff --git a/lib/Target/R600/AMDILCFGStructurizer.cpp b/lib/Target/R600/AMDILCFGStructurizer.cpp
index 507570f..b040ad7 100644
--- a/lib/Target/R600/AMDILCFGStructurizer.cpp
+++ b/lib/Target/R600/AMDILCFGStructurizer.cpp
@@ -133,8 +133,7 @@ public:
 
   AMDGPUCFGStructurizer(TargetMachine &tm) :
       MachineFunctionPass(ID), TM(tm),
-      TII(static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
-      TRI(&TII->getRegisterInfo()) { }
+      MLI(0), TII(0), TRI(0), FuncRep(0) { }
 
    const char *getPassName() const {
     return "AMD IL Control Flow Graph structurizer Pass";
@@ -143,8 +142,6 @@ public:
   void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addPreserved<MachineFunctionAnalysis>();
     AU.addRequired<MachineFunctionAnalysis>();
-    AU.addRequired<MachineDominatorTree>();
-    AU.addRequired<MachinePostDominatorTree>();
     AU.addRequired<MachineLoopInfo>();
   }
 
@@ -161,11 +158,9 @@ public:
     OrderedBlks.clear();
     FuncRep = &MF;
     MLI = &getAnalysis<MachineLoopInfo>();
+    TII = static_cast<const R600InstrInfo *>(TM.getInstrInfo());
+    TRI = &TII->getRegisterInfo();
     DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
-    MDT = &getAnalysis<MachineDominatorTree>();
-    DEBUG(MDT->print(dbgs(), (const llvm::Module*)0););
-    PDT = &getAnalysis<MachinePostDominatorTree>();
-    DEBUG(PDT->print(dbgs()););
     prepare();
     run();
     DEBUG(MF.dump(););
@@ -174,8 +169,6 @@ public:
 
 protected:
   TargetMachine &TM;
-  MachineDominatorTree *MDT;
-  MachinePostDominatorTree *PDT;
   MachineLoopInfo *MLI;
   const R600InstrInfo *TII;
   const AMDGPURegisterInfo *TRI;
@@ -208,12 +201,8 @@ protected:
   bool hasBackEdge(MachineBasicBlock *MBB) const;
   static unsigned getLoopDepth(MachineLoop *LoopRep);
   bool isRetiredBlock(MachineBasicBlock *MBB) const;
-  bool isActiveLoophead(MachineBasicBlock *MBB) const;
-  PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
-      bool AllowSideEntry = true) const;
   int countActiveBlock(MBBVector::const_iterator It,
       MBBVector::const_iterator E) const;
-  bool needMigrateBlock(MachineBasicBlock *MBB) const;
 
   // Utility Functions
   void reversePredicateSetter(MachineBasicBlock::iterator I);
@@ -264,9 +253,9 @@ protected:
 
 
   int patternMatch(MachineBasicBlock *MBB);
-  int patternMatchGroup(MachineBasicBlock *MBB);
-  int serialPatternMatch(MachineBasicBlock *MBB);
-  int ifPatternMatch(MachineBasicBlock *MBB);
+  bool patternMatchGroup(MachineBasicBlock *MBB);
+  bool serialPatternMatch(MachineBasicBlock *MBB);
+  bool ifPatternMatch(MachineBasicBlock *MBB);
   int loopendPatternMatch();
   int mergeLoop(MachineLoop *LoopRep);
   int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader);
@@ -274,23 +263,6 @@ protected:
   void handleLoopcontBlock(MachineBasicBlock *ContingMBB,
       MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
       MachineLoop *ContLoop);
-  /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
-  /// the same loop with LoopLandInfo without explicitly keeping track of
-  /// loopContBlks and loopBreakBlks, this is a method to get the information.
-  bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
-      MachineBasicBlock *Src2MBB);
-  int handleJumpintoIf(MachineBasicBlock *HeadMBB,
-      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
-  int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
-      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
-  int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
-      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
-      MachineBasicBlock **LandMBBPtr);
-  void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
-      MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
-      MachineBasicBlock *LandMBB, bool Detail = false);
-  int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
-      MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
   void mergeSerialBlock(MachineBasicBlock *DstMBB,
       MachineBasicBlock *SrcMBB);
 
@@ -326,18 +298,10 @@ protected:
   void removeSuccessor(MachineBasicBlock *MBB);
   MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
       MachineBasicBlock *PredMBB);
-  void migrateInstruction(MachineBasicBlock *SrcMBB,
-      MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
   void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
   void retireBlock(MachineBasicBlock *MBB);
   void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = NULL);
 
-  MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
-  /// This is work around solution for findNearestCommonDominator not avaiable
-  /// to post dom a proper fix should go to Dominators.h.
-  MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
-      MachineBasicBlock *MBB2);
-
 private:
   MBBInfoMap BlockInfoMap;
   LoopLandInfoMap LLInfoMap;
@@ -380,36 +344,6 @@ bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
   return (*It).second->IsRetired;
 }
 
-bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
-  MachineLoop *LoopRep = MLI->getLoopFor(MBB);
-  while (LoopRep && LoopRep->getHeader() == MBB) {
-    MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
-    if(!LoopLand)
-      return true;
-    if (!isRetiredBlock(LoopLand))
-      return true;
-    LoopRep = LoopRep->getParentLoop();
-  }
-  return false;
-}
-AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
-    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
-    bool AllowSideEntry) const {
-  assert(DstMBB);
-  if (SrcMBB == DstMBB)
-    return SinglePath_InPath;
-  while (SrcMBB && SrcMBB->succ_size() == 1) {
-    SrcMBB = *SrcMBB->succ_begin();
-    if (SrcMBB == DstMBB)
-      return SinglePath_InPath;
-    if (!AllowSideEntry && SrcMBB->pred_size() > 1)
-      return Not_SinglePath;
-  }
-  if (SrcMBB && SrcMBB->succ_size()==0)
-    return SinglePath_NotInPath;
-  return Not_SinglePath;
-}
-
 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
     MBBVector::const_iterator E) const {
   int Count = 0;
@@ -421,18 +355,6 @@ int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
   return Count;
 }
 
-bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
-  unsigned BlockSizeThreshold = 30;
-  unsigned CloneInstrThreshold = 100;
-  bool MultiplePreds = MBB && (MBB->pred_size() > 1);
-
-  if(!MultiplePreds)
-    return false;
-  unsigned BlkSize = MBB->size();
-  return ((BlkSize > BlockSizeThreshold) &&
-      (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
-}
-
 void AMDGPUCFGStructurizer::reversePredicateSetter(
     MachineBasicBlock::iterator I) {
   while (I--) {
@@ -800,6 +722,7 @@ bool AMDGPUCFGStructurizer::run() {
   bool MakeProgress = false;
   int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
                                         OrderedBlks.end());
+  loopendPatternMatch();
 
   do {
     ++NumIter;
@@ -972,61 +895,80 @@ int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
   return NumMatch;
 }
 
-int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
-  int NumMatch = 0;
-  NumMatch += loopendPatternMatch();
-  NumMatch += serialPatternMatch(MBB);
-  NumMatch += ifPatternMatch(MBB);
-  return NumMatch;
+bool AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
+  bool ChangedInSingleIteration, Changed = false;
+  do {
+      ChangedInSingleIteration = false;
+      DEBUG(dbgs() << "Pattern matching starting from BB#" << MBB->getNumber()
+          << "\n";);
+      ChangedInSingleIteration |= serialPatternMatch(MBB);
+      ChangedInSingleIteration |= ifPatternMatch(MBB);
+      Changed |= ChangedInSingleIteration;
+  } while (ChangedInSingleIteration);
+
+  return Changed;
 }
 
 
-int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
+bool AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
   if (MBB->succ_size() != 1)
-    return 0;
+    return false;
 
   MachineBasicBlock *childBlk = *MBB->succ_begin();
-  if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
-    return 0;
+  if (childBlk->pred_size() != 1)
+    return false;
 
   mergeSerialBlock(MBB, childBlk);
   ++numSerialPatternMatch;
-  return 1;
+  return true;
 }
 
-int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
-  //two edges
+bool AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
   if (MBB->succ_size() != 2)
-    return 0;
+    return false;
   if (hasBackEdge(MBB))
-    return 0;
+    return false;
   MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
   if (!BranchMI)
-    return 0;
+    return false;
 
   assert(isCondBranch(BranchMI));
-  int NumMatch = 0;
 
+  bool Changed = false;
   MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
-  NumMatch += serialPatternMatch(TrueMBB);
-  NumMatch += ifPatternMatch(TrueMBB);
   MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
-  NumMatch += serialPatternMatch(FalseMBB);
-  NumMatch += ifPatternMatch(FalseMBB);
+  DEBUG(dbgs() << "if pattern starting at BB#" << MBB->getNumber() <<
+      ", True branch to BB#" << TrueMBB->getNumber() <<
+      ", False branch to BB#" << FalseMBB->getNumber() << "\n");
+
+  Changed |= patternMatchGroup(TrueMBB);
+  Changed |= patternMatchGroup(FalseMBB);
+  DEBUG(
+    dbgs() << "BB#" << TrueMBB->getNumber() << "'successors :";
+    for (MachineBasicBlock::succ_iterator I = TrueMBB->succ_begin(),
+        E = TrueMBB->succ_end(); I != E; ++I)
+      dbgs() << "BB#" << (*I)->getNumber() <<", ";
+    dbgs() << "\n";
+    dbgs() << "BB#" << FalseMBB->getNumber() << "'successors :";
+    for (MachineBasicBlock::succ_iterator I = FalseMBB->succ_begin(),
+        E = FalseMBB->succ_end(); I != E; ++I)
+      dbgs() << "BB#" << (*I)->getNumber() << ", ";
+    dbgs() << "\n";
+);
   MachineBasicBlock *LandBlk;
-  int Cloned = 0;
 
   assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
-  // TODO: Simplify
   if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
-    && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
+    && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()
+    && TrueMBB->pred_size() == 1 && FalseMBB->pred_size() == 1) {
     // Diamond pattern
     LandBlk = *TrueMBB->succ_begin();
-  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
+  } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB &&
+      TrueMBB->pred_size() == 1) {
     // Triangle pattern, false is empty
     LandBlk = FalseMBB;
     FalseMBB = NULL;
-  } else if (FalseMBB->succ_size() == 1
+  } else if (FalseMBB->succ_size() == 1 && FalseMBB->pred_size() == 1
              && *FalseMBB->succ_begin() == TrueMBB) {
     // Triangle pattern, true is empty
     // We reverse the predicate to make a triangle, empty false pattern;
@@ -1034,42 +976,15 @@ int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
     reversePredicateSetter(MBB->end());
     LandBlk = FalseMBB;
     FalseMBB = NULL;
-  } else if (FalseMBB->succ_size() == 1
-             && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
-    LandBlk = *FalseMBB->succ_begin();
-  } else if (TrueMBB->succ_size() == 1
-    && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
-    LandBlk = *TrueMBB->succ_begin();
-  } else {
-    return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB);
-  }
+  } else
+    return Changed;
 
-  // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
-  // new BB created for landBlk==NULL may introduce new challenge to the
-  // reduction process.
-  if (LandBlk &&
-      ((TrueMBB && TrueMBB->pred_size() > 1)
-      || (FalseMBB && FalseMBB->pred_size() > 1))) {
-     Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
-  }
-
-  if (TrueMBB && TrueMBB->pred_size() > 1) {
+  if (TrueMBB && TrueMBB->pred_size() > 1)
     TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
-    ++Cloned;
-  }
-
-  if (FalseMBB && FalseMBB->pred_size() > 1) {
+  if (FalseMBB && FalseMBB->pred_size() > 1)
     FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
-    ++Cloned;
-  }
-
   mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
-
-  ++numIfPatternMatch;
-
-  numClonedBlock += Cloned;
-
-  return 1 + Cloned + NumMatch;
+  return true;
 }
 
 int AMDGPUCFGStructurizer::loopendPatternMatch() {
@@ -1130,11 +1045,7 @@ int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
   for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
     settleLoopcontBlock(LatchBlks[i], LoopHeader);
   int Match = 0;
-  do {
-    Match = 0;
-    Match += serialPatternMatch(LoopHeader);
-    Match += ifPatternMatch(LoopHeader);
-  } while (Match > 0);
+  patternMatchGroup(LoopHeader);
   mergeLooplandBlock(LoopHeader, ExitBlk);
   MachineLoop *ParentLoop = LoopRep->getParentLoop();
   if (ParentLoop)
@@ -1172,302 +1083,6 @@ int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
   return NumCont;
 }
 
-
-bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
-    MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
-  if (Src1MBB->succ_size() == 0) {
-    MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
-    if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
-      MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
-      if (TheEntry) {
-        DEBUG(
-          dbgs() << "isLoopContBreakBlock yes src1 = BB"
-                 << Src1MBB->getNumber()
-                 << " src2 = BB" << Src2MBB->getNumber() << "\n";
-        );
-        return true;
-      }
-    }
-  }
-  return false;
-}
-
-int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
-    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
-  int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
-  if (Num == 0) {
-    DEBUG(
-      dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
-    );
-    Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
-  }
-  return Num;
-}
-
-int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
-    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
-  int Num = 0;
-  MachineBasicBlock *DownBlk;
-
-  //trueBlk could be the common post dominator
-  DownBlk = TrueMBB;
-
-  DEBUG(
-    dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
-           << " true = BB" << TrueMBB->getNumber()
-           << ", numSucc=" << TrueMBB->succ_size()
-           << " false = BB" << FalseMBB->getNumber() << "\n";
-  );
-
-  while (DownBlk) {
-    DEBUG(
-      dbgs() << "check down = BB" << DownBlk->getNumber();
-    );
-
-    if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
-      DEBUG(
-        dbgs() << " working\n";
-      );
-
-      Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
-      Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
-
-      numClonedBlock += Num;
-      Num += serialPatternMatch(*HeadMBB->succ_begin());
-      Num += serialPatternMatch(*llvm::next(HeadMBB->succ_begin()));
-      Num += ifPatternMatch(HeadMBB);
-      assert(Num > 0);
-
-      break;
-    }
-    DEBUG(
-      dbgs() << " not working\n";
-    );
-    DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : NULL;
-  } // walk down the postDomTree
-
-  return Num;
-}
-
-void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
-    MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
-    MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
-  dbgs() << "head = BB" << HeadMBB->getNumber()
-         << " size = " << HeadMBB->size();
-  if (Detail) {
-    dbgs() << "\n";
-    HeadMBB->print(dbgs());
-    dbgs() << "\n";
-  }
-
-  if (TrueMBB) {
-    dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
-           << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
-    if (Detail) {
-      dbgs() << "\n";
-      TrueMBB->print(dbgs());
-      dbgs() << "\n";
-    }
-  }
-  if (FalseMBB) {
-    dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
-           << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
-    if (Detail) {
-      dbgs() << "\n";
-      FalseMBB->print(dbgs());
-      dbgs() << "\n";
-    }
-  }
-  if (LandMBB) {
-    dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
-           << LandMBB->size() << " numPred = " << LandMBB->pred_size();
-    if (Detail) {
-      dbgs() << "\n";
-      LandMBB->print(dbgs());
-      dbgs() << "\n";
-    }
-  }
-
-    dbgs() << "\n";
-}
-
-int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
-    MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
-    MachineBasicBlock **LandMBBPtr) {
-  bool MigrateTrue = false;
-  bool MigrateFalse = false;
-
-  MachineBasicBlock *LandBlk = *LandMBBPtr;
-
-  assert((!TrueMBB || TrueMBB->succ_size() <= 1)
-         && (!FalseMBB || FalseMBB->succ_size() <= 1));
-
-  if (TrueMBB == FalseMBB)
-    return 0;
-
-  MigrateTrue = needMigrateBlock(TrueMBB);
-  MigrateFalse = needMigrateBlock(FalseMBB);
-
-  if (!MigrateTrue && !MigrateFalse)
-    return 0;
-
-  // If we need to migrate either trueBlk and falseBlk, migrate the rest that
-  // have more than one predecessors.  without doing this, its predecessor
-  // rather than headBlk will have undefined value in initReg.
-  if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
-    MigrateTrue = true;
-  if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
-    MigrateFalse = true;
-
-  DEBUG(
-    dbgs() << "before improveSimpleJumpintoIf: ";
-    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
-  );
-
-  // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
-  //
-  // new: headBlk => if () {initReg = 1; org trueBlk branch} else
-  //      {initReg = 0; org falseBlk branch }
-  //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
-  //      => org landBlk
-  //      if landBlk->pred_size() > 2, put the about if-else inside
-  //      if (initReg !=2) {...}
-  //
-  // add initReg = initVal to headBlk
-
-  const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
-  if (!MigrateTrue || !MigrateFalse) {
-    // XXX: We have an opportunity here to optimize the "branch into if" case
-    // here.  Branch into if looks like this:
-    //                        entry
-    //                       /     |
-    //           diamond_head       branch_from
-    //             /      \           |
-    // diamond_false        diamond_true
-    //             \      /
-    //               done
-    //
-    // The diamond_head block begins the "if" and the diamond_true block
-    // is the block being "branched into".
-    //
-    // If MigrateTrue is true, then TrueBB is the block being "branched into"
-    // and if MigrateFalse is true, then FalseBB is the block being
-    // "branched into"
-    // 
-    // Here is the pseudo code for how I think the optimization should work:
-    // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head.
-    // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from.
-    // 3. Move the branch instruction from diamond_head into its own basic
-    //    block (new_block).
-    // 4. Add an unconditional branch from diamond_head to new_block
-    // 5. Replace the branch instruction in branch_from with an unconditional
-    //    branch to new_block.  If branch_from has multiple predecessors, then
-    //    we need to replace the True/False block in the branch
-    //    instruction instead of replacing it.
-    // 6. Change the condition of the branch instruction in new_block from
-    //    COND to (COND || GPR0)
-    //
-    // In order insert these MOV instruction, we will need to use the
-    // RegisterScavenger.  Usually liveness stops being tracked during
-    // the late machine optimization passes, however if we implement
-    // bool TargetRegisterInfo::requiresRegisterScavenging(
-    //                                                const MachineFunction &MF)
-    // and have it return true, liveness will be tracked correctly 
-    // by generic optimization passes.  We will also need to make sure that
-    // all of our target-specific passes that run after regalloc and before
-    // the CFGStructurizer track liveness and we will need to modify this pass
-    // to correctly track liveness.
-    //
-    // After the above changes, the new CFG should look like this:
-    //                        entry
-    //                       /     |
-    //           diamond_head       branch_from
-    //                       \     /
-    //                      new_block
-    //                      /      |
-    //         diamond_false        diamond_true
-    //                      \      /
-    //                        done
-    //
-    // Without this optimization, we are forced to duplicate the diamond_true
-    // block and we will end up with a CFG like this:
-    //
-    //                        entry
-    //                       /     |
-    //           diamond_head       branch_from
-    //             /      \                   |
-    // diamond_false        diamond_true      diamond_true (duplicate)
-    //             \      /                   |
-    //               done --------------------|
-    //
-    // Duplicating diamond_true can be very costly especially if it has a
-    // lot of instructions.
-    return 0;
-  }
-
-  int NumNewBlk = 0;
-
-  bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
-
-  //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
-  MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
-
-  if (LandBlkHasOtherPred) {
-    llvm_unreachable("Extra register needed to handle CFG");
-    unsigned CmpResReg =
-      HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
-    llvm_unreachable("Extra compare instruction needed to handle CFG");
-    insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
-        CmpResReg, DebugLoc());
-  }
-
-  // XXX: We are running this after RA, so creating virtual registers will
-  // cause an assertion failure in the PostRA scheduling pass.
-  unsigned InitReg =
-    HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
-  insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
-      DebugLoc());
-
-  if (MigrateTrue) {
-    migrateInstruction(TrueMBB, LandBlk, I);
-    // need to uncondionally insert the assignment to ensure a path from its
-    // predecessor rather than headBlk has valid value in initReg if
-    // (initVal != 1).
-    llvm_unreachable("Extra register needed to handle CFG");
-  }
-  insertInstrBefore(I, AMDGPU::ELSE);
-
-  if (MigrateFalse) {
-    migrateInstruction(FalseMBB, LandBlk, I);
-    // need to uncondionally insert the assignment to ensure a path from its
-    // predecessor rather than headBlk has valid value in initReg if
-    // (initVal != 0)
-    llvm_unreachable("Extra register needed to handle CFG");
-  }
-
-  if (LandBlkHasOtherPred) {
-    // add endif
-    insertInstrBefore(I, AMDGPU::ENDIF);
-
-    // put initReg = 2 to other predecessors of landBlk
-    for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
-         PE = LandBlk->pred_end(); PI != PE; ++PI) {
-      MachineBasicBlock *MBB = *PI;
-      if (MBB != TrueMBB && MBB != FalseMBB)
-        llvm_unreachable("Extra register needed to handle CFG");
-    }
-  }
-  DEBUG(
-    dbgs() << "result from improveSimpleJumpintoIf: ";
-    showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
-  );
-
-  // update landBlk
-  *LandMBBPtr = LandBlk;
-
-  return NumNewBlk;
-}
-
 void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB,
     MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
     MachineLoop *ContLoop) {
@@ -1638,24 +1253,6 @@ void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
   }
 }
 
-int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
-    MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
-  int Cloned = 0;
-  assert(PreMBB->isSuccessor(SrcMBB));
-  while (SrcMBB && SrcMBB != DstMBB) {
-    assert(SrcMBB->succ_size() == 1);
-    if (SrcMBB->pred_size() > 1) {
-      SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
-      ++Cloned;
-    }
-
-    PreMBB = SrcMBB;
-    SrcMBB = *SrcMBB->succ_begin();
-  }
-
-  return Cloned;
-}
-
 MachineBasicBlock *
 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
     MachineBasicBlock *PredMBB) {
@@ -1684,37 +1281,6 @@ AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
   return CloneMBB;
 }
 
-void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
-    MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
-  MachineBasicBlock::iterator SpliceEnd;
-  //look for the input branchinstr, not the AMDGPU branchinstr
-  MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
-  if (!BranchMI) {
-    DEBUG(
-      dbgs() << "migrateInstruction don't see branch instr\n" ;
-    );
-    SpliceEnd = SrcMBB->end();
-  } else {
-    DEBUG(
-      dbgs() << "migrateInstruction see branch instr\n" ;
-      BranchMI->dump();
-    );
-    SpliceEnd = BranchMI;
-  }
-  DEBUG(
-    dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
-      << "srcSize = " << SrcMBB->size() << "\n";
-  );
-
-  //splice insert before insertPos
-  DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
-
-  DEBUG(
-    dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
-      << "srcSize = " << SrcMBB->size() << "\n";
-  );
-}
-
 MachineBasicBlock *
 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
   MachineBasicBlock *LoopHeader = LoopRep->getHeader();
@@ -1840,60 +1406,6 @@ void AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep,
   );
 }
 
-MachineBasicBlock *
-AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
-    MachineBasicBlock *MBB2) {
-
-  if (PDT->dominates(MBB1, MBB2))
-    return MBB1;
-  if (PDT->dominates(MBB2, MBB1))
-    return MBB2;
-
-  MachineDomTreeNode *Node1 = PDT->getNode(MBB1);
-  MachineDomTreeNode *Node2 = PDT->getNode(MBB2);
-
-  // Handle newly cloned node.
-  if (!Node1 && MBB1->succ_size() == 1)
-    return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2);
-  if (!Node2 && MBB2->succ_size() == 1)
-    return findNearestCommonPostDom(MBB1, *MBB2->succ_begin());
-
-  if (!Node1 || !Node2)
-    return NULL;
-
-  Node1 = Node1->getIDom();
-  while (Node1) {
-    if (PDT->dominates(Node1, Node2))
-      return Node1->getBlock();
-    Node1 = Node1->getIDom();
-  }
-
-  return NULL;
-}
-
-MachineBasicBlock *
-AMDGPUCFGStructurizer::findNearestCommonPostDom(
-    std::set<MachineBasicBlock *> &MBBs) {
-  MachineBasicBlock *CommonDom;
-  std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin();
-  std::set<MachineBasicBlock *>::const_iterator E = MBBs.end();
-  for (CommonDom = *It; It != E && CommonDom; ++It) {
-    MachineBasicBlock *MBB = *It;
-    if (MBB != CommonDom)
-      CommonDom = findNearestCommonPostDom(MBB, CommonDom);
-  }
-
-  DEBUG(
-    dbgs() << "Common post dominator for exit blocks is ";
-    if (CommonDom)
-          dbgs() << "BB" << CommonDom->getNumber() << "\n";
-    else
-      dbgs() << "NULL\n";
-  );
-
-  return CommonDom;
-}
-
 char AMDGPUCFGStructurizer::ID = 0;
 
 } // end anonymous namespace
-- 
1.8.1.4



More information about the llvm-commits mailing list