[llvm] r279168 - CodeGen: If Convert blocks that would form a diamond when tail-merged.

Diana Picus via llvm-commits llvm-commits at lists.llvm.org
Fri Aug 19 01:27:47 PDT 2016


Hi Kyle,

I'm getting the same failure on the same buildbot again. I've attached
a reproducer to https://llvm.org/bugs/show_bug.cgi?id=28949

HTH,
Diana

On 19 August 2016 at 01:09, Kyle Butt via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: iteratee
> Date: Thu Aug 18 17:09:27 2016
> New Revision: 279168
>
> URL: http://llvm.org/viewvc/llvm-project?rev=279168&view=rev
> Log:
> CodeGen: If Convert blocks that would form a diamond when tail-merged.
>
> The following function currently relies on tail-merging for if
> conversion to succeed. The common tail of cond_true and cond_false is
> extracted, and this then forms a diamond pattern that can be
> successfully if converted.
>
> If this block does not get extracted, either because tail-merging is
> disabled or the threshold is higher, we should still recognize this
> pattern and if-convert it.
>
> Fixed a regression in the original commit. Need to un-reverse branches after
> reversing them, or other conversions go awry.
>
> Regression on self-hosting bots with no obvious explanation. Tidied up range
> handling to be more obviously correct, but there was no smoking gun.
>
> define i32 @t2(i32 %a, i32 %b) nounwind {
> entry:
>         %tmp1434 = icmp eq i32 %a, %b           ; <i1> [#uses=1]
>         br i1 %tmp1434, label %bb17, label %bb.outer
>
> bb.outer:               ; preds = %cond_false, %entry
>         %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]
>         %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]
>         br label %bb
>
> bb:             ; preds = %cond_true, %bb.outer
>         %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]
>         %tmp. = sub i32 0, %b_addr.021.0.ph
>         %tmp.40 = mul i32 %indvar, %tmp.
>         %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph
>         %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph
>         br i1 %tmp3, label %cond_true, label %cond_false
>
> cond_true:              ; preds = %bb
>         %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph
>         %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph
>         %indvar.next = add i32 %indvar, 1
>         br i1 %tmp1437, label %bb17, label %bb
>
> cond_false:             ; preds = %bb
>         %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0
>         %tmp14 = icmp eq i32 %a_addr.026.0, %tmp10
>         br i1 %tmp14, label %bb17, label %bb.outer
>
> bb17:           ; preds = %cond_false, %cond_true, %entry
>         %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]
>         ret i32 %a_addr.026.1
> }
>
> Without tail-merging or diamond-tail if conversion:
> LBB1_1:                                 @ %bb
>                                         @ =>This Inner Loop Header: Depth=1
>         cmp     r0, r1
>         ble     LBB1_3
> @ BB#2:                                 @ %cond_true
>                                         @   in Loop: Header=BB1_1 Depth=1
>         subs    r0, r0, r1
>         cmp     r1, r0
>         it      ne
>         cmpne   r0, r1
>         bgt     LBB1_4
> LBB1_3:                                 @ %cond_false
>                                         @   in Loop: Header=BB1_1 Depth=1
>         subs    r1, r1, r0
>         cmp     r1, r0
>         bne     LBB1_1
> LBB1_4:                                 @ %bb17
>         bx      lr
>
> With diamond-tail if conversion, but without tail-merging:
> @ BB#0:                                 @ %entry
>         cmp     r0, r1
>         it      eq
>         bxeq    lr
> LBB1_1:                                 @ %bb
>                                         @ =>This Inner Loop Header: Depth=1
>         cmp     r0, r1
>         ite     le
>         suble   r1, r1, r0
>         subgt   r0, r0, r1
>         cmp     r1, r0
>         bne     LBB1_1
> @ BB#2:                                 @ %bb17
>         bx      lr
>
> Added:
>     llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll
> Modified:
>     llvm/trunk/lib/CodeGen/IfConversion.cpp
>     llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
>
> Modified: llvm/trunk/lib/CodeGen/IfConversion.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/IfConversion.cpp?rev=279168&r1=279167&r2=279168&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/IfConversion.cpp (original)
> +++ llvm/trunk/lib/CodeGen/IfConversion.cpp Thu Aug 18 17:09:27 2016
> @@ -59,6 +59,8 @@ static cl::opt<bool> DisableTriangleFR("
>                                         cl::init(false), cl::Hidden);
>  static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
>                                      cl::init(false), cl::Hidden);
> +static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond",
> +                                        cl::init(false), cl::Hidden);
>  static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
>                                       cl::init(true), cl::Hidden);
>
> @@ -69,6 +71,7 @@ STATISTIC(NumTriangleRev,  "Number of tr
>  STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
>  STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
>  STATISTIC(NumDiamonds,     "Number of diamond if-conversions performed");
> +STATISTIC(NumForkedDiamonds, "Number of forked-diamond if-conversions performed");
>  STATISTIC(NumIfConvBBs,    "Number of if-converted blocks");
>  STATISTIC(NumDupBBs,       "Number of duplicated blocks");
>  STATISTIC(NumUnpred,       "Number of true blocks of diamonds unpredicated");
> @@ -83,7 +86,9 @@ namespace {
>        ICTriangleRev,   // Same as ICTriangle, but true path rev condition.
>        ICTriangleFalse, // Same as ICTriangle, but on the false path.
>        ICTriangle,      // BB is entry of a triangle sub-CFG.
> -      ICDiamond        // BB is entry of a diamond sub-CFG.
> +      ICDiamond,       // BB is entry of a diamond sub-CFG.
> +      ICForkedDiamond  // BB is entry of an almost diamond sub-CFG, with a
> +                       // common tail that can be shared.
>      };
>
>      /// One per MachineBasicBlock, this is used to cache the result
> @@ -115,6 +120,7 @@ namespace {
>        bool IsAnalyzed      : 1;
>        bool IsEnqueued      : 1;
>        bool IsBrAnalyzable  : 1;
> +      bool IsBrReversible  : 1;
>        bool HasFallThrough  : 1;
>        bool IsUnpredicable  : 1;
>        bool CannotBeCopied  : 1;
> @@ -129,9 +135,10 @@ namespace {
>        SmallVector<MachineOperand, 4> Predicate;
>        BBInfo() : IsDone(false), IsBeingAnalyzed(false),
>                   IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
> -                 HasFallThrough(false), IsUnpredicable(false),
> -                 CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
> -                 ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
> +                 IsBrReversible(false), HasFallThrough(false),
> +                 IsUnpredicable(false), CannotBeCopied(false),
> +                 ClobbersPred(false), NonPredSize(0), ExtraCost(0),
> +                 ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
>                   FalseBB(nullptr) {}
>      };
>
> @@ -209,10 +216,14 @@ namespace {
>      bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
>                        unsigned &Dups1, unsigned &Dups2,
>                        BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
> +    bool ValidForkedDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
> +                            unsigned &Dups1, unsigned &Dups2,
> +                            BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
>      void AnalyzeBranches(BBInfo &BBI);
>      void ScanInstructions(BBInfo &BBI,
>                            MachineBasicBlock::iterator &Begin,
> -                          MachineBasicBlock::iterator &End) const;
> +                          MachineBasicBlock::iterator &End,
> +                          bool BranchUnpredicable = false) const;
>      bool RescanInstructions(
>          MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
>          MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
> @@ -228,9 +239,17 @@ namespace {
>      void RemoveExtraEdges(BBInfo &BBI);
>      bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
>      bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
> +    bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
> +                                unsigned NumDups1, unsigned NumDups2,
> +                                bool TClobbersPred, bool FClobbersPred,
> +                                bool RemoveTrueBranch, bool RemoveFalseBranch,
> +                                bool MergeAddEdges);
>      bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
>                            unsigned NumDups1, unsigned NumDups2,
>                            bool TClobbers, bool FClobbers);
> +    bool IfConvertForkedDiamond(BBInfo &BBI, IfcvtKind Kind,
> +                              unsigned NumDups1, unsigned NumDups2,
> +                              bool TClobbers, bool FClobbers);
>      void PredicateBlock(BBInfo &BBI,
>                          MachineBasicBlock::iterator E,
>                          SmallVectorImpl<MachineOperand> &Cond,
> @@ -424,6 +443,19 @@ bool IfConverter::runOnMachineFunction(M
>          if (RetVal) ++NumDiamonds;
>          break;
>        }
> +      case ICForkedDiamond: {
> +        if (DisableForkedDiamond) break;
> +        DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#"
> +                     << BBI.BB->getNumber() << " (T:"
> +                     << BBI.TrueBB->getNumber() << ",F:"
> +                     << BBI.FalseBB->getNumber() << ") ");
> +        RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2,
> +                                      Token->TClobbersPred,
> +                                      Token->FClobbersPred);
> +        DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
> +        if (RetVal) ++NumForkedDiamonds;
> +        break;
> +      }
>        }
>
>        Change |= RetVal;
> @@ -626,9 +658,6 @@ static void countDuplicatedInstructions(
>      ++FIB;
>    }
>
> -  // If both blocks are returning don't skip the branches, since they will
> -  // likely be both identical return instructions. In such cases the return
> -  // can be left unpredicated.
>    // Check for already containing all of the block.
>    if (TIB == TIE || FIB == FIE)
>      return;
> @@ -703,10 +732,11 @@ bool IfConverter::RescanInstructions(
>      MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
>      MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
>      BBInfo &TrueBBI, BBInfo &FalseBBI) const {
> -  ScanInstructions(TrueBBI, TIB, TIE);
> +  bool BranchUnpredicable = true;
> +  ScanInstructions(TrueBBI, TIB, TIE, BranchUnpredicable);
>    if (TrueBBI.IsUnpredicable)
>      return false;
> -  ScanInstructions(FalseBBI, FIB, FIE);
> +  ScanInstructions(FalseBBI, FIB, FIE, BranchUnpredicable);
>    if (FalseBBI.IsUnpredicable)
>      return false;
>    if (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)
> @@ -714,6 +744,95 @@ bool IfConverter::RescanInstructions(
>    return true;
>  }
>
> +/// ValidForkedDiamond - Returns true if the 'true' and 'false' blocks (along
> +/// with their common predecessor) form a diamond if a common tail block is
> +/// extracted.
> +/// While not strictly a diamond, this pattern would form a diamond if
> +/// tail-merging had merged the shared tails.
> +///           EBB
> +///         _/   \_
> +///         |     |
> +///        TBB   FBB
> +///        /  \ /   \
> +///  FalseBB TrueBB FalseBB
> +/// Currently only handles analyzable branches.
> +/// Specifically excludes actual diamonds to avoid overlap.
> +bool IfConverter::ValidForkedDiamond(
> +    BBInfo &TrueBBI, BBInfo &FalseBBI,
> +    unsigned &Dups1, unsigned &Dups2,
> +    BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
> +  Dups1 = Dups2 = 0;
> +  if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
> +      FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
> +    return false;
> +
> +  if (!TrueBBI.IsBrAnalyzable || !FalseBBI.IsBrAnalyzable)
> +    return false;
> +  // Don't IfConvert blocks that can't be folded into their predecessor.
> +  if  (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
> +    return false;
> +
> +  // This function is specifically looking for conditional tails, as
> +  // unconditional tails are already handled by the standard diamond case.
> +  if (TrueBBI.BrCond.size() == 0 ||
> +      FalseBBI.BrCond.size() == 0)
> +    return false;
> +
> +  MachineBasicBlock *TT = TrueBBI.TrueBB;
> +  MachineBasicBlock *TF = TrueBBI.FalseBB;
> +  MachineBasicBlock *FT = FalseBBI.TrueBB;
> +  MachineBasicBlock *FF = FalseBBI.FalseBB;
> +
> +  if (!TT)
> +    TT = getNextBlock(*TrueBBI.BB);
> +  if (!TF)
> +    TF = getNextBlock(*TrueBBI.BB);
> +  if (!FT)
> +    FT = getNextBlock(*FalseBBI.BB);
> +  if (!FF)
> +    FF = getNextBlock(*FalseBBI.BB);
> +
> +  if (!TT || !TF)
> +    return false;
> +
> +  // Check successors. If they don't match, bail.
> +  if (!((TT == FT && TF == FF) || (TF == FT && TT == FF)))
> +    return false;
> +
> +  bool FalseReversed = false;
> +  if (TF == FT && TT == FF) {
> +    // If the branches are opposing, but we can't reverse, don't do it.
> +    if (!FalseBBI.IsBrReversible)
> +      return false;
> +    FalseReversed = true;
> +    ReverseBranchCondition(FalseBBI);
> +  }
> +  auto UnReverseOnExit = make_scope_exit([&]() {
> +    if (FalseReversed)
> +      ReverseBranchCondition(FalseBBI);
> +  });
> +
> +  // Count duplicate instructions at the beginning of the true and false blocks.
> +  MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
> +  MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
> +  MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
> +  MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
> +  countDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
> +                              *TrueBBI.BB, *FalseBBI.BB,
> +                              /* SkipConditionalBranches */ false);
> +
> +  TrueBBICalc.BB = TrueBBI.BB;
> +  FalseBBICalc.BB = FalseBBI.BB;
> +  if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
> +    return false;
> +  // The size is used to decide whether to if-convert, and the shared portions
> +  // are subtracted off. Because of the subtraction, we just use the size that
> +  // was calculated by the original ScanInstructions, as it is correct.
> +  TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
> +  FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
> +  return true;
> +}
> +
>  /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
>  /// with their common predecessor) forms a valid diamond shape for ifcvt.
>  bool IfConverter::ValidDiamond(
> @@ -775,6 +894,9 @@ void IfConverter::AnalyzeBranches(BBInfo
>    BBI.BrCond.clear();
>    BBI.IsBrAnalyzable =
>        !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
> +  SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
> +  BBI.IsBrReversible = (RevCond.size() == 0) ||
> +      !TII->ReverseBranchCondition(RevCond);
>    BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;
>
>    if (BBI.BrCond.size()) {
> @@ -796,7 +918,8 @@ void IfConverter::AnalyzeBranches(BBInfo
>  /// If so, the block is not predicable unless it's the last instruction.
>  void IfConverter::ScanInstructions(BBInfo &BBI,
>                                     MachineBasicBlock::iterator &Begin,
> -                                   MachineBasicBlock::iterator &End) const {
> +                                   MachineBasicBlock::iterator &End,
> +                                   bool BranchUnpredicable) const {
>    if (BBI.IsDone || BBI.IsUnpredicable)
>      return;
>
> @@ -846,6 +969,11 @@ void IfConverter::ScanInstructions(BBInf
>      bool isPredicated = TII->isPredicated(MI);
>      bool isCondBr = BBI.IsBrAnalyzable && MI.isConditionalBranch();
>
> +    if (BranchUnpredicable && MI.isBranch()) {
> +      BBI.IsUnpredicable = true;
> +      return;
> +    }
> +
>      // A conditional branch is not predicable, but it may be eliminated.
>      if (isCondBr)
>        continue;
> @@ -1027,34 +1155,56 @@ void IfConverter::AnalyzeBlock(
>
>      if (CanRevCond) {
>        BBInfo TrueBBICalc, FalseBBICalc;
> +      auto feasibleDiamond = [&]() {
> +        bool MeetsSize = MeetIfcvtSizeLimit(
> +            *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) +
> +                          TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2,
> +            *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) +
> +                           FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2,
> +            Prediction);
> +        bool TrueFeasible = FeasibilityAnalysis(TrueBBI, BBI.BrCond,
> +                                                /* IsTriangle */ false, /* RevCond */ false,
> +                                                /* hasCommonTail */ true);
> +        bool FalseFeasible = FeasibilityAnalysis(FalseBBI, RevCond,
> +                                                 /* IsTriangle */ false, /* RevCond */ false,
> +                                                 /* hasCommonTail */ true);
> +        return MeetsSize && TrueFeasible && FalseFeasible;
> +      };
> +
>        if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
> -                       TrueBBICalc, FalseBBICalc) &&
> -        MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
> -                                         TrueBBICalc.ExtraCost),
> -                           TrueBBICalc.ExtraCost2,
> -                           *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
> -                                          FalseBBICalc.ExtraCost),
> -                           FalseBBICalc.ExtraCost2,
> -                           Prediction) &&
> -        FeasibilityAnalysis(TrueBBI, BBI.BrCond,
> -                            /* IsTriangle */ false, /* RevCond */ false,
> -                            /* hasCommonTail */ true) &&
> -        FeasibilityAnalysis(FalseBBI, RevCond,
> -                            /* IsTriangle */ false, /* RevCond */ false,
> -                            /* hasCommonTail */ true)) {
> -        // Diamond:
> -        //   EBB
> -        //   / \_
> -        //  |   |
> -        // TBB FBB
> -        //   \ /
> -        //  TailBB
> -        // Note TailBB can be empty.
> -        Tokens.push_back(llvm::make_unique<IfcvtToken>(
> -                BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2,
> -                (bool) TrueBBICalc.ClobbersPred,
> -                (bool) FalseBBICalc.ClobbersPred));
> -        Enqueued = true;
> +                       TrueBBICalc, FalseBBICalc)) {
> +        if (feasibleDiamond()) {
> +          // Diamond:
> +          //   EBB
> +          //   / \_
> +          //  |   |
> +          // TBB FBB
> +          //   \ /
> +          //  TailBB
> +          // Note TailBB can be empty.
> +          Tokens.push_back(llvm::make_unique<IfcvtToken>(
> +              BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2,
> +              (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
> +          Enqueued = true;
> +        }
> +      } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
> +                                    TrueBBICalc, FalseBBICalc)) {
> +        if (feasibleDiamond()) {
> +          // ForkedDiamond:
> +          // if TBB and FBB have a common tail that includes their conditional
> +          // branch instructions, then we can If Convert this pattern.
> +          //          EBB
> +          //         _/ \_
> +          //         |   |
> +          //        TBB  FBB
> +          //        / \ /   \
> +          //  FalseBB TrueBB FalseBB
> +          //
> +          Tokens.push_back(llvm::make_unique<IfcvtToken>(
> +              BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2,
> +              (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
> +          Enqueued = true;
> +        }
>        }
>      }
>
> @@ -1499,32 +1649,28 @@ bool IfConverter::IfConvertTriangle(BBIn
>    return true;
>  }
>
> -/// If convert a diamond sub-CFG.
> -/// \p BBI is the head of the diamond
> -/// \p NumDups1 - number of shared instructions at the beginning of TrueBBI and
> -///               FalseBBI
> -/// \p NumDups2 - number of shared instructions at the end of TrueBBI and
> -///               FalseBBI
> -/// \p TClobbersPred - True if the true block clobbers the predicate in the
> -///                    non-shared portion.
> -/// \p TClobbersPred - True if the false block clobbers the predicate in the
> -///                    non-shared portion.
> -bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
> -                                   unsigned NumDups1, unsigned NumDups2,
> -                                   bool TClobbersPred, bool FClobbersPred) {
> -  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
> -  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
> -  MachineBasicBlock *TailBB = TrueBBI.TrueBB;
> -  // True block must fall through or end with an unanalyzable terminator.
> -  if (!TailBB) {
> -    if (blockAlwaysFallThrough(TrueBBI))
> -      TailBB = FalseBBI.TrueBB;
> -    assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
> -  }
> +/// Common code shared between diamond conversions.
> +/// \p BBI, \p TrueBBI, and \p FalseBBI form the diamond shape.
> +/// \p NumDups1 - number of shared instructions at the beginning of \p TrueBBI
> +///               and FalseBBI
> +/// \p NumDups2 - number of shared instructions at the end of \p TrueBBI
> +///               and \p FalseBBI
> +/// \p RemoveTrueBranch - Remove the branch of the true block before predicating
> +///                       Only false for unanalyzable fallthrough cases.
> +/// \p RemoveFalseBranch - Remove the branch of the false block before
> +///                        predicating Only false for unanalyzable fallthrough
> +///                        cases.
> +/// \p MergeAddEdges - Add successor edges when merging blocks. Only false for
> +///                    unanalyzable fallthrough
> +bool IfConverter::IfConvertDiamondCommon(
> +    BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
> +    unsigned NumDups1, unsigned NumDups2,
> +    bool TClobbersPred, bool FClobbersPred,
> +    bool RemoveTrueBranch, bool RemoveFalseBranch,
> +    bool MergeAddEdges) {
>
>    if (TrueBBI.IsDone || FalseBBI.IsDone ||
> -      TrueBBI.BB->pred_size() > 1 ||
> -      FalseBBI.BB->pred_size() > 1) {
> +      TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) {
>      // Something has changed. It's no longer safe to predicate these blocks.
>      BBI.IsAnalyzed = false;
>      TrueBBI.IsAnalyzed = false;
> @@ -1558,6 +1704,7 @@ bool IfConverter::IfConvertDiamond(BBInf
>    if (DoSwap) {
>      std::swap(BBI1, BBI2);
>      std::swap(Cond1, Cond2);
> +    std::swap(RemoveTrueBranch, RemoveFalseBranch);
>    }
>
>    // Remove the conditional branch from entry to the blocks.
> @@ -1610,12 +1757,8 @@ bool IfConverter::IfConvertDiamond(BBInf
>    BBI.BB->splice(BBI.BB->end(), &MBB1, MBB1.begin(), DI1);
>    MBB2.erase(MBB2.begin(), DI2);
>
> -  // Remove branch from the 'true' block, unless it was not analyzable.
> -  // Non-analyzable branches need to be preserved, since in such cases,
> -  // the CFG structure is not an actual diamond (the join block may not
> -  // be present).
> -  if (BBI1->IsBrAnalyzable)
> -    BBI1->NonPredSize -= TII->RemoveBranch(MBB1);
> +  if (RemoveTrueBranch)
> +    BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
>    // Remove duplicated instructions.
>    DI1 = MBB1.end();
>    for (unsigned i = 0; i != NumDups2; ) {
> @@ -1633,11 +1776,11 @@ bool IfConverter::IfConvertDiamond(BBInf
>    // must be removed.
>    RemoveKills(MBB1.begin(), MBB1.end(), DontKill, *TRI);
>
> -  // Remove 'false' block branch (unless it was not analyzable), and find
> -  // the last instruction to predicate.
> -  if (BBI2->IsBrAnalyzable)
> -    BBI2->NonPredSize -= TII->RemoveBranch(MBB2);
> -  DI2 = MBB2.end();
> +  // Remove 'false' block branch, and find the last instruction to predicate.
> +  // Save the debug location.
> +  if (RemoveFalseBranch)
> +    BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
> +  DI2 = BBI2->BB->end();
>    while (NumDups2 != 0) {
>      // NumDups2 only counted non-dbg_value instructions, so this won't
>      // run off the head of the list.
> @@ -1709,8 +1852,74 @@ bool IfConverter::IfConvertDiamond(BBInf
>    PredicateBlock(*BBI2, DI2, *Cond2);
>
>    // Merge the true block into the entry of the diamond.
> -  MergeBlocks(BBI, *BBI1, TailBB == nullptr);
> -  MergeBlocks(BBI, *BBI2, TailBB == nullptr);
> +  MergeBlocks(BBI, *BBI1, MergeAddEdges);
> +  MergeBlocks(BBI, *BBI2, MergeAddEdges);
> +  return true;
> +}
> +
> +/// If convert an almost-diamond sub-CFG where the true
> +/// and false blocks share a common tail.
> +bool IfConverter::IfConvertForkedDiamond(
> +    BBInfo &BBI, IfcvtKind Kind,
> +    unsigned NumDups1, unsigned NumDups2,
> +    bool TClobbersPred, bool FClobbersPred) {
> +  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
> +  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
> +
> +  // Save the debug location for later.
> +  DebugLoc dl;
> +  MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator();
> +  if (TIE != TrueBBI.BB->end())
> +    dl = TIE->getDebugLoc();
> +  // Removing branches from both blocks is safe, because we have already
> +  // determined that both blocks have the same branch instructions. The branch
> +  // will be added back at the end, unpredicated.
> +  if (!IfConvertDiamondCommon(
> +      BBI, TrueBBI, FalseBBI,
> +      NumDups1, NumDups2,
> +      TClobbersPred, FClobbersPred,
> +      /* RemoveTrueBranch */ true, /* RemoveFalseBranch */ true,
> +      /* MergeAddEdges */ true))
> +    return false;
> +
> +  // Add back the branch.
> +  // Debug location saved above when removing the branch from BBI2
> +  TII->InsertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB,
> +                    TrueBBI.BrCond, dl);
> +
> +  RemoveExtraEdges(BBI);
> +
> +  // Update block info.
> +  BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
> +  InvalidatePreds(*BBI.BB);
> +
> +  // FIXME: Must maintain LiveIns.
> +  return true;
> +}
> +
> +/// If convert a diamond sub-CFG.
> +bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
> +                                   unsigned NumDups1, unsigned NumDups2,
> +                                   bool TClobbersPred, bool FClobbersPred) {
> +  BBInfo &TrueBBI  = BBAnalysis[BBI.TrueBB->getNumber()];
> +  BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
> +  MachineBasicBlock *TailBB = TrueBBI.TrueBB;
> +
> +  // True block must fall through or end with an unanalyzable terminator.
> +  if (!TailBB) {
> +    if (blockAlwaysFallThrough(TrueBBI))
> +      TailBB = FalseBBI.TrueBB;
> +    assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
> +  }
> +
> +  if (!IfConvertDiamondCommon(
> +      BBI, TrueBBI, FalseBBI,
> +      NumDups1, NumDups2,
> +      TrueBBI.ClobbersPred, FalseBBI.ClobbersPred,
> +      /* RemoveTrueBranch */ TrueBBI.IsBrAnalyzable,
> +      /* RemoveFalseBranch */ FalseBBI.IsBrAnalyzable,
> +      /* MergeAddEdges */ TailBB == nullptr))
> +    return false;
>
>    // If the if-converted block falls through or unconditionally branches into
>    // the tail block, and the tail block does not have other predecessors, then
> @@ -1733,7 +1942,7 @@ bool IfConverter::IfConvertDiamond(BBInf
>        CanMergeTail = false;
>      else if (NumPreds == 1 && CanMergeTail) {
>        MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
> -      if (*PI != &MBB1 && *PI != &MBB2)
> +      if (*PI != TrueBBI.BB && *PI != FalseBBI.BB)
>          CanMergeTail = false;
>      }
>      if (CanMergeTail) {
> @@ -1749,8 +1958,8 @@ bool IfConverter::IfConvertDiamond(BBInf
>    // RemoveExtraEdges won't work if the block has an unanalyzable branch,
>    // which can happen here if TailBB is unanalyzable and is merged, so
>    // explicitly remove BBI1 and BBI2 as successors.
> -  BBI.BB->removeSuccessor(&MBB1);
> -  BBI.BB->removeSuccessor(&MBB2, true);
> +  BBI.BB->removeSuccessor(TrueBBI.BB);
> +  BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true);
>    RemoveExtraEdges(BBI);
>
>    // Update block info.
>
> Added: llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll?rev=279168&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll (added)
> +++ llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll Thu Aug 18 17:09:27 2016
> @@ -0,0 +1,36 @@
> +; ModuleID = 'bugpoint-reduced-instructions.bc'
> +; RUN: llc -O2 -o - %s | FileCheck %s
> +source_filename = "bugpoint-output-9ad75f8.bc"
> +target datalayout = "e-m:e-i64:64-n32:64"
> +target triple = "powerpc64le-unknown-linux-gnu"
> +
> +; Function Attrs: nounwind uwtable
> +define hidden void @_ZN11__sanitizer25MaybeStartBackgroudThreadEv() local_unnamed_addr #0 {
> +entry:
> +  br i1 undef, label %land.lhs.true, label %if.end
> +
> +; CHECK: # %land.lhs.true
> +; CHECK-NEXT: bclr
> +; CHECK-NEXT: # %if.end4
> +land.lhs.true:                                    ; preds = %entry
> +  br i1 undef, label %return, label %if.end4
> +
> +if.end:                                           ; preds = %entry
> +  br i1 icmp ne (i32 (i8*, i8*, i8* (i8*)*, i8*)* @_ZN11__sanitizer19real_pthread_createEPvS0_PFS0_S0_ES0_, i32 (i8*, i8*, i8* (i8*)*, i8*)* null), label %if.end4, label %return
> +
> +if.end4:                                          ; preds = %if.end, %land.lhs.true
> +  %call5 = tail call i8* @_ZN11__sanitizer21internal_start_threadEPFvPvES0_(void (i8*)* nonnull @_ZN11__sanitizer16BackgroundThreadEPv, i8* null) #7
> +  unreachable
> +
> +return:                                           ; preds = %if.end, %land.lhs.true
> +  ret void
> +}
> +
> +declare extern_weak signext i32 @_ZN11__sanitizer19real_pthread_createEPvS0_PFS0_S0_ES0_(i8*, i8*, i8* (i8*)*, i8*) #2
> +
> +declare i8* @_ZN11__sanitizer21internal_start_threadEPFvPvES0_(void (i8*)*, i8*) local_unnamed_addr #2
> +
> +declare hidden void @_ZN11__sanitizer16BackgroundThreadEPv(i8* nocapture readnone) #5
> +
> +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
> +attributes #7 = { nobuiltin nounwind }
>
> Modified: llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll?rev=279168&r1=279167&r2=279168&view=diff
> ==============================================================================
> --- llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll (original)
> +++ llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll Thu Aug 18 17:09:27 2016
> @@ -1,6 +1,7 @@
>  ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
>  ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s
> -; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it |FileCheck %s
> +; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it | FileCheck %s
> +; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it -enable-tail-merge=0 | FileCheck %s
>  define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
>  ; CHECK-LABEL: t1:
>  ; CHECK: ittt ne
> @@ -25,9 +26,9 @@ cond_next:
>  define i32 @t2(i32 %a, i32 %b) nounwind {
>  entry:
>  ; CHECK-LABEL: t2:
> -; CHECK: ite gt
> -; CHECK: subgt
> -; CHECK: suble
> +; CHECK: ite {{gt|le}}
> +; CHECK-DAG: suble
> +; CHECK-DAG: subgt
>         %tmp1434 = icmp eq i32 %a, %b           ; <i1> [#uses=1]
>         br i1 %tmp1434, label %bb17, label %bb.outer
>
> @@ -56,6 +57,44 @@ cond_false:          ; preds = %bb
>         br i1 %tmp14, label %bb17, label %bb.outer
>
>  bb17:          ; preds = %cond_false, %cond_true, %entry
> +       %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]           ; <i32> [#uses=1]
> +       ret i32 %a_addr.026.1
> +}
> +
> +define i32 @t2_nomerge(i32 %a, i32 %b) nounwind {
> +entry:
> +; CHECK-LABEL: t2_nomerge:
> +; CHECK-NOT: ite {{gt|le}}
> +; CHECK-NOT: suble
> +; CHECK-NOT: subgt
> +       %tmp1434 = icmp eq i32 %a, %b           ; <i1> [#uses=1]
> +       br i1 %tmp1434, label %bb17, label %bb.outer
> +
> +bb.outer:              ; preds = %cond_false, %entry
> +       %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]              ; <i32> [#uses=5]
> +       %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]               ; <i32> [#uses=1]
> +       br label %bb
> +
> +bb:            ; preds = %cond_true, %bb.outer
> +       %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]                ; <i32> [#uses=2]
> +       %tmp. = sub i32 0, %b_addr.021.0.ph             ; <i32> [#uses=1]
> +       %tmp.40 = mul i32 %indvar, %tmp.                ; <i32> [#uses=1]
> +       %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph               ; <i32> [#uses=6]
> +       %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph            ; <i1> [#uses=1]
> +       br i1 %tmp3, label %cond_true, label %cond_false
> +
> +cond_true:             ; preds = %bb
> +       %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph         ; <i32> [#uses=2]
> +       %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph          ; <i1> [#uses=1]
> +       %indvar.next = add i32 %indvar, 1               ; <i32> [#uses=1]
> +       br i1 %tmp1437, label %bb17, label %bb
> +
> +cond_false:            ; preds = %bb
> +       %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0                ; <i32> [#uses=2]
> +       %tmp14 = icmp eq i32 %b_addr.021.0.ph, %tmp10           ; <i1> [#uses=1]
> +       br i1 %tmp14, label %bb17, label %bb.outer
> +
> +bb17:          ; preds = %cond_false, %cond_true, %entry
>         %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]           ; <i32> [#uses=1]
>         ret i32 %a_addr.026.1
>  }
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits


More information about the llvm-commits mailing list