[llvm] r277905 - CodeGen: If Convert blocks that would form a diamond when tail-merged.
Kyle Butt via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 5 18:52:37 PDT 2016
Author: iteratee
Date: Fri Aug 5 20:52:37 2016
New Revision: 277905
URL: http://llvm.org/viewvc/llvm-project?rev=277905&view=rev
Log:
CodeGen: If Convert blocks that would form a diamond when tail-merged.
The following function currently relies on tail-merging for if
conversion to succeed. The common tail of cond_true and cond_false is
extracted, and this then forms a diamond pattern that can be
successfully if converted.
If this block does not get extracted, either because tail-merging is
disabled or the threshold is higher, we should still recognize this
pattern and if-convert it.
define i32 @t2(i32 %a, i32 %b) nounwind {
entry:
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
br i1 %tmp1434, label %bb17, label %bb.outer
bb.outer: ; preds = %cond_false, %entry
%b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]
%a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]
br label %bb
bb: ; preds = %cond_true, %bb.outer
%indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]
%tmp. = sub i32 0, %b_addr.021.0.ph
%tmp.40 = mul i32 %indvar, %tmp.
%a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph
%tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph
br i1 %tmp3, label %cond_true, label %cond_false
cond_true: ; preds = %bb
%tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph
%tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph
%indvar.next = add i32 %indvar, 1
br i1 %tmp1437, label %bb17, label %bb
cond_false: ; preds = %bb
%tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0
%tmp14 = icmp eq i32 %a_addr.026.0, %tmp10
br i1 %tmp14, label %bb17, label %bb.outer
bb17: ; preds = %cond_false, %cond_true, %entry
%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]
ret i32 %a_addr.026.1
}
Without tail-merging or diamond-tail if conversion:
LBB1_1: @ %bb
@ =>This Inner Loop Header: Depth=1
cmp r0, r1
ble LBB1_3
@ BB#2: @ %cond_true
@ in Loop: Header=BB1_1 Depth=1
subs r0, r0, r1
cmp r1, r0
it ne
cmpne r0, r1
bgt LBB1_4
LBB1_3: @ %cond_false
@ in Loop: Header=BB1_1 Depth=1
subs r1, r1, r0
cmp r1, r0
bne LBB1_1
LBB1_4: @ %bb17
bx lr
With diamond-tail if conversion, but without tail-merging:
@ BB#0: @ %entry
cmp r0, r1
it eq
bxeq lr
LBB1_1: @ %bb
@ =>This Inner Loop Header: Depth=1
cmp r0, r1
ite le
suble r1, r1, r0
subgt r0, r0, r1
cmp r1, r0
bne LBB1_1
@ BB#2: @ %bb17
bx lr
Modified:
llvm/trunk/lib/CodeGen/IfConversion.cpp
llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
Modified: llvm/trunk/lib/CodeGen/IfConversion.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/IfConversion.cpp?rev=277905&r1=277904&r2=277905&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/IfConversion.cpp (original)
+++ llvm/trunk/lib/CodeGen/IfConversion.cpp Fri Aug 5 20:52:37 2016
@@ -58,6 +58,8 @@ static cl::opt<bool> DisableTriangleFR("
cl::init(false), cl::Hidden);
static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
cl::init(false), cl::Hidden);
+static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond",
+ cl::init(false), cl::Hidden);
static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
cl::init(true), cl::Hidden);
@@ -68,6 +70,7 @@ STATISTIC(NumTriangleRev, "Number of tr
STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
+STATISTIC(NumForkedDiamonds, "Number of forked-diamond if-conversions performed");
STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
STATISTIC(NumDupBBs, "Number of duplicated blocks");
STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");
@@ -82,7 +85,9 @@ namespace {
ICTriangleRev, // Same as ICTriangle, but true path rev condition.
ICTriangleFalse, // Same as ICTriangle, but on the false path.
ICTriangle, // BB is entry of a triangle sub-CFG.
- ICDiamond // BB is entry of a diamond sub-CFG.
+ ICDiamond, // BB is entry of a diamond sub-CFG.
+ ICForkedDiamond // BB is entry of an almost diamond sub-CFG, with a
+ // common tail that can be shared.
};
/// BBInfo - One per MachineBasicBlock, this is used to cache the result
@@ -114,6 +119,7 @@ namespace {
bool IsAnalyzed : 1;
bool IsEnqueued : 1;
bool IsBrAnalyzable : 1;
+ bool IsBrReversible : 1;
bool HasFallThrough : 1;
bool IsUnpredicable : 1;
bool CannotBeCopied : 1;
@@ -128,9 +134,10 @@ namespace {
SmallVector<MachineOperand, 4> Predicate;
BBInfo() : IsDone(false), IsBeingAnalyzed(false),
IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
- HasFallThrough(false), IsUnpredicable(false),
- CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
- ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
+ IsBrReversible(false), HasFallThrough(false),
+ IsUnpredicable(false), CannotBeCopied(false),
+ ClobbersPred(false), NonPredSize(0), ExtraCost(0),
+ ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
FalseBB(nullptr) {}
};
@@ -148,11 +155,15 @@ namespace {
struct IfcvtToken {
BBInfo &BBI;
IfcvtKind Kind;
- bool NeedSubsumption;
unsigned NumDups;
unsigned NumDups2;
- IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
- : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
+ bool NeedSubsumption : 1;
+ bool TClobbersPred : 1;
+ bool FClobbersPred : 1;
+ IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0,
+ bool tc = false, bool fc = false)
+ : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s),
+ TClobbersPred(tc), FClobbersPred(fc) {}
};
/// BBAnalysis - Results of if-conversion feasibility analysis indexed by
@@ -202,23 +213,40 @@ namespace {
bool FalseBranch, unsigned &Dups,
BranchProbability Prediction) const;
bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
- unsigned &Dups1, unsigned &Dups2) const;
+ unsigned &Dups1, unsigned &Dups2,
+ BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
+ bool ValidForkedDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2,
+ BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
void AnalyzeBranches(BBInfo &BBI);
void ScanInstructions(BBInfo &BBI,
MachineBasicBlock::iterator &Begin,
MachineBasicBlock::iterator &End) const;
+ bool RescanInstructions(
+ MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+ MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+ BBInfo &TrueBBI, BBInfo &FalseBBI) const;
void AnalyzeBlock(MachineBasicBlock *MBB,
std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
- bool isTriangle = false, bool RevBranch = false);
+ bool isTriangle = false, bool RevBranch = false,
+ bool hasCommonTail = false);
void AnalyzeBlocks(MachineFunction &MF,
std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
void InvalidatePreds(MachineBasicBlock *BB);
void RemoveExtraEdges(BBInfo &BBI);
bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
+ bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbersPred, bool FClobbersPred,
+ bool RemoveTrueBranch, bool RemoveFalseBranch,
+ bool MergeAddEdges);
bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
unsigned NumDups1, unsigned NumDups2);
+ bool IfConvertForkedDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbers, bool FClobbers);
void PredicateBlock(BBInfo &BBI,
MachineBasicBlock::iterator E,
SmallVectorImpl<MachineOperand> &Cond,
@@ -410,6 +438,19 @@ bool IfConverter::runOnMachineFunction(M
if (RetVal) ++NumDiamonds;
break;
}
+ case ICForkedDiamond: {
+ if (DisableForkedDiamond) break;
+ DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#"
+ << BBI.BB->getNumber() << " (T:"
+ << BBI.TrueBB->getNumber() << ",F:"
+ << BBI.FalseBB->getNumber() << ") ");
+ RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2,
+ Token->TClobbersPred,
+ Token->FClobbersPred);
+ DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
+ if (RetVal) ++NumForkedDiamonds;
+ break;
+ }
}
Change |= RetVal;
@@ -627,8 +668,12 @@ static void countDuplicatedInstructions(
// If Dups1 includes all of a block, then don't count duplicate
// instructions at the end of the blocks.
- if (TIB == TIE || FIB == FIE)
+ if (TIB == TIE || FIB == FIE) {
+ // Same reason as below. We need end to point just after the last non-shared
+ // instruction.
+ ++TIE; ++FIE;
return;
+ }
// Count duplicate instructions at the ends of the blocks.
while (TIE != TIB && FIE != FIB) {
@@ -646,12 +691,124 @@ static void countDuplicatedInstructions(
--TIE;
--FIE;
}
+ // TIE and FIE both now point at the last non-shared instruction, move them
+ // forward.
+ ++TIE; ++FIE;
+}
+
+/// RescanInstructions - Run ScanInstructions on a pair of blocks.
+/// @param TIB - True Iterator Begin, points to first non-shared instruction
+/// @param FIB - False Iterator Begin, points to first non-shared instruction
+/// @param TIE - True Iterator End, points past last non-shared instruction
+/// @param FIE - False Iterator End, points past last non-shared instruction
+/// @param TrueBBI - BBInfo to update for the true block.
+/// @param FalseBBI - BBInfo to update for the false block.
+/// @returns - false if either block cannot be predicated or if both blocks end
+/// with a predicate-clobbering instruction.
+bool IfConverter::RescanInstructions(
+ MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
+ MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
+ BBInfo &TrueBBI, BBInfo &FalseBBI) const {
+ ScanInstructions(TrueBBI, TIB, TIE);
+ if (TrueBBI.IsUnpredicable)
+ return false;
+ ScanInstructions(FalseBBI, FIB, FIE);
+ if (FalseBBI.IsUnpredicable)
+ return false;
+ if (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)
+ return false;
+ return true;
+}
+
+/// ValidForkedDiamond - Returns true if the 'true' and 'false' blocks (along
+/// with their common predecessor) form a diamond if a common tail block is
+/// extracted.
+/// While not strictly a diamond, this pattern would form a diamond if
+/// tail-merging had merged the shared tails.
+/// EBB
+/// _/ \_
+/// | |
+/// TBB FBB
+/// / \ / \
+/// FalseBB TrueBB FalseBB
+/// Currently only handles analyzable branches.
+/// Specifically excludes actual diamonds to avoid overlap.
+bool IfConverter::ValidForkedDiamond(
+ BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2,
+ BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
+ Dups1 = Dups2 = 0;
+ if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
+ FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
+ return false;
+
+ if (!TrueBBI.IsBrAnalyzable || !FalseBBI.IsBrAnalyzable)
+ return false;
+ // Don't IfConvert blocks that can't be folded into their predecessor.
+ if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
+ return false;
+
+ // This function is specifically looking for conditional tails, as
+ // unconditional tails are already handled by the standard diamond case.
+ if (TrueBBI.BrCond.size() == 0 ||
+ FalseBBI.BrCond.size() == 0)
+ return false;
+
+ MachineBasicBlock *TT = TrueBBI.TrueBB;
+ MachineBasicBlock *TF = TrueBBI.FalseBB;
+ MachineBasicBlock *FT = FalseBBI.TrueBB;
+ MachineBasicBlock *FF = FalseBBI.FalseBB;
+
+ if (!TT)
+ TT = getNextBlock(TrueBBI.BB);
+ if (!TF)
+ TF = getNextBlock(TrueBBI.BB);
+ if (!FT)
+ FT = getNextBlock(FalseBBI.BB);
+ if (!FF)
+ FF = getNextBlock(FalseBBI.BB);
+
+ if (!TT || !TF)
+ return false;
+
+ // Check successors. If they don't match, bail.
+ if (!((TT == FT && TF == FF) || (TF == FT && TT == FF)))
+ return false;
+
+ if (TF == FT && TT == FF) {
+ // If the branches are opposing, but we can't reverse, don't do it.
+ if (!FalseBBI.IsBrReversible)
+ return false;
+ ReverseBranchCondition(FalseBBI);
+ }
+
+ // Count duplicate instructions at the beginning of the true and false blocks.
+ MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
+ MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
+ MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
+ countDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
+ *TrueBBI.BB, *FalseBBI.BB,
+ /* SkipConditionalBranches */ false);
+
+ TrueBBICalc.BB = TrueBBI.BB;
+ FalseBBICalc.BB = FalseBBI.BB;
+ if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
+ return false;
+ // The size is used to decide whether to if-convert, and the shared portions
+ // are subtracted off. Because of the subtraction, we just use the size that
+ // was calculated by the original ScanInstructions, as it is correct.
+ TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
+ FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
+ return true;
}
/// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
/// with their common predecessor) forms a valid diamond shape for ifcvt.
-bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
- unsigned &Dups1, unsigned &Dups2) const {
+bool IfConverter::ValidDiamond(
+ BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned &Dups1, unsigned &Dups2,
+ BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
Dups1 = Dups2 = 0;
if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
@@ -672,8 +829,7 @@ bool IfConverter::ValidDiamond(BBInfo &T
return false;
// FIXME: Allow true block to have an early exit?
- if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
- (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
+ if (TrueBBI.FalseBB || FalseBBI.FalseBB)
return false;
// Count duplicate instructions at the beginning and end of the true and
@@ -685,6 +841,16 @@ bool IfConverter::ValidDiamond(BBInfo &T
countDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
*TrueBBI.BB, *FalseBBI.BB,
/* SkipConditionalBranches */ true);
+
+ TrueBBICalc.BB = TrueBBI.BB;
+ FalseBBICalc.BB = FalseBBI.BB;
+ if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
+ return false;
+ // The size is used to decide whether to if-convert, and the shared portions
+ // are subtracted off. Because of the subtraction, we just use the size that
+ // was calculated by the original ScanInstructions, as it is correct.
+ TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
+ FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
return true;
}
@@ -698,6 +864,9 @@ void IfConverter::AnalyzeBranches(BBInfo
BBI.BrCond.clear();
BBI.IsBrAnalyzable =
!TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
+ SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
+ BBI.IsBrReversible = (RevCond.size() == 0) ||
+ !TII->ReverseBranchCondition(RevCond);
BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;
if (BBI.BrCond.size()) {
@@ -813,11 +982,22 @@ void IfConverter::ScanInstructions(BBInf
/// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
/// predicated by the specified predicate.
+/// @param BBI BBInfo for the block to check
+/// @param Pred Predicate array for the branch that leads to BBI
+/// @param isTriangle true if the Analysis is for a triangle
+/// @param RevBranch true if Reverse(Pred) leads to BBI (e.g. BBI is the false
+/// case
+/// @param hasCommonTail true if BBI shares a tail with a sibling block that
+/// contains any instruction that would make the block unpredicable.
bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
SmallVectorImpl<MachineOperand> &Pred,
- bool isTriangle, bool RevBranch) {
+ bool isTriangle, bool RevBranch,
+ bool hasCommonTail) {
// If the block is dead or unpredicable, then it cannot be predicated.
- if (BBI.IsDone || BBI.IsUnpredicable)
+ // Two blocks may share a common unpredicable tail, but this doesn't prevent
+ // them from being if-converted. The non-shared portion is assumed to have
+ // been checked
+ if (BBI.IsDone || (BBI.IsUnpredicable && !hasCommonTail))
return false;
// If it is already predicated but we couldn't analyze its terminator, the
@@ -831,7 +1011,7 @@ bool IfConverter::FeasibilityAnalysis(BB
if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
return false;
- if (BBI.BrCond.size()) {
+ if (!hasCommonTail && BBI.BrCond.size()) {
if (!isTriangle)
return false;
@@ -939,25 +1119,58 @@ void IfConverter::AnalyzeBlock(
BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
- if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
- MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
- TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
- *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
- FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
- Prediction) &&
- FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
- FeasibilityAnalysis(FalseBBI, RevCond)) {
- // Diamond:
- // EBB
- // / \_
- // | |
- // TBB FBB
- // \ /
- // TailBB
- // Note TailBB can be empty.
- Tokens.push_back(llvm::make_unique<IfcvtToken>(
- BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2));
- Enqueued = true;
+ if (CanRevCond) {
+ BBInfo TrueBBICalc, FalseBBICalc;
+ auto feasibleDiamond = [&]() {
+ return (
+ MeetIfcvtSizeLimit(
+ *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) +
+ TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2,
+ *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) +
+ FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2,
+ Prediction) &&
+ FeasibilityAnalysis(TrueBBI, BBI.BrCond,
+ /* IsTriangle */ false, /* RevCond */ false,
+ /* hasCommonTail */ true) &&
+ FeasibilityAnalysis(FalseBBI, RevCond,
+ /* IsTriangle */ false, /* RevCond */ false,
+ /* hasCommonTail */ true));
+ };
+
+ if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
+ TrueBBICalc, FalseBBICalc)) {
+ if (feasibleDiamond()) {
+ // Diamond:
+ // EBB
+ // / \_
+ // | |
+ // TBB FBB
+ // \ /
+ // TailBB
+ // Note TailBB can be empty.
+ Tokens.push_back(llvm::make_unique<IfcvtToken>(
+ BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2));
+ Enqueued = true;
+ }
+ } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
+ TrueBBICalc, FalseBBICalc)) {
+ if (feasibleDiamond()) {
+ // ForkedDiamond:
+ // if TBB and FBB have a common tail that includes their conditional
+ // branch instructions, then we can If Convert this pattern.
+ // EBB
+ // _/ \_
+ // | |
+ // TBB FBB
+ // / \ / \
+ // FalseBB TrueBB FalseBB
+ //
+ Tokens.push_back(llvm::make_unique<IfcvtToken>(
+ BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2,
+ (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
+ Enqueued = true;
+ }
+ }
}
if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
@@ -1410,23 +1623,26 @@ bool IfConverter::IfConvertTriangle(BBIn
return true;
}
-/// IfConvertDiamond - If convert a diamond sub-CFG.
-///
-bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
- unsigned NumDups1, unsigned NumDups2) {
- BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
- BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
- MachineBasicBlock *TailBB = TrueBBI.TrueBB;
- // True block must fall through or end with an unanalyzable terminator.
- if (!TailBB) {
- if (blockAlwaysFallThrough(TrueBBI))
- TailBB = FalseBBI.TrueBB;
- assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
- }
+/// IfConvertDiamondCommon - Common code shared between diamond conversions.
+/// BBI, TrueBBI, and FalseBBI form the diamond shape.
+/// NumDups1 - number of shared instructions at the beginning of TrueBBI and
+/// FalseBBI
+/// NumDups2 - number of shared instructions at the end of TrueBBI and FalseBBI
+/// RemoveTrueBranch - Remove the branch of the true block before predicating
+/// Only false for unanalyzable fallthrough cases.
+/// RemoveFalseBranch - Remove the branch of the false block before predicating
+/// Only false for unanalyzable fallthrough cases.
+/// MergeAddEdges - Add successor edges when merging blocks. Only false for
+/// unanalyzable fallthrough
+bool IfConverter::IfConvertDiamondCommon(
+ BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbersPred, bool FClobbersPred,
+ bool RemoveTrueBranch, bool RemoveFalseBranch,
+ bool MergeAddEdges) {
if (TrueBBI.IsDone || FalseBBI.IsDone ||
- TrueBBI.BB->pred_size() > 1 ||
- FalseBBI.BB->pred_size() > 1) {
+ TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) {
// Something has changed. It's no longer safe to predicate these blocks.
BBI.IsAnalyzed = false;
TrueBBI.IsAnalyzed = false;
@@ -1451,15 +1667,16 @@ bool IfConverter::IfConvertDiamond(BBInf
// Figure out the more profitable ordering.
bool DoSwap = false;
- if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
+ if (TClobbersPred && !FClobbersPred)
DoSwap = true;
- else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
+ else if (TClobbersPred == FClobbersPred) {
if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
DoSwap = true;
}
if (DoSwap) {
std::swap(BBI1, BBI2);
std::swap(Cond1, Cond2);
+ std::swap(RemoveTrueBranch, RemoveFalseBranch);
}
// Remove the conditional branch from entry to the blocks.
@@ -1506,11 +1723,7 @@ bool IfConverter::IfConvertDiamond(BBInf
BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
BBI2->BB->erase(BBI2->BB->begin(), DI2);
- // Remove branch from the 'true' block, unless it was not analyzable.
- // Non-analyzable branches need to be preserved, since in such cases,
- // the CFG structure is not an actual diamond (the join block may not
- // be present).
- if (BBI1->IsBrAnalyzable)
+ if (RemoveTrueBranch)
BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
// Remove duplicated instructions.
DI1 = BBI1->BB->end();
@@ -1529,9 +1742,9 @@ bool IfConverter::IfConvertDiamond(BBInf
// must be removed.
RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI);
- // Remove 'false' block branch (unless it was not analyzable), and find
- // the last instruction to predicate.
- if (BBI2->IsBrAnalyzable)
+ // Remove 'false' block branch, and find the last instruction to predicate.
+ // Save the debug location.
+ if (RemoveFalseBranch)
BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
DI2 = BBI2->BB->end();
while (NumDups2 != 0) {
@@ -1607,8 +1820,74 @@ bool IfConverter::IfConvertDiamond(BBInf
PredicateBlock(*BBI2, DI2, *Cond2);
// Merge the true block into the entry of the diamond.
- MergeBlocks(BBI, *BBI1, TailBB == nullptr);
- MergeBlocks(BBI, *BBI2, TailBB == nullptr);
+ MergeBlocks(BBI, *BBI1, MergeAddEdges);
+ MergeBlocks(BBI, *BBI2, MergeAddEdges);
+ return true;
+}
+
+/// IfConvertForkedDiamond - If convert an almost-diamond sub-CFG where the true
+/// and false blocks share a common tail.
+bool IfConverter::IfConvertForkedDiamond(
+ BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2,
+ bool TClobbersPred, bool FClobbersPred) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+
+ // Save the debug location for later.
+ DebugLoc dl;
+ MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator();
+ if (TIE != TrueBBI.BB->end())
+ dl = TIE->getDebugLoc();
+ // Removing branches from both blocks is safe, because we have already
+ // determined that both blocks have the same branch instructions. The branch
+ // will be added back at the end, unpredicated.
+ if (!IfConvertDiamondCommon(
+ BBI, TrueBBI, FalseBBI,
+ NumDups1, NumDups2,
+ TClobbersPred, FClobbersPred,
+ /* RemoveTrueBranch */ true, /* RemoveFalseBranch */ true,
+ /* MergeAddEdges */ true))
+ return false;
+
+ // Add back the branch.
+ // Debug location saved above when removing the branch from BBI2
+ TII->InsertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB,
+ TrueBBI.BrCond, dl);
+
+ RemoveExtraEdges(BBI);
+
+ // Update block info.
+ BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
+ InvalidatePreds(BBI.BB);
+
+ // FIXME: Must maintain LiveIns.
+ return true;
+}
+
+/// IfConvertDiamond - If convert a diamond sub-CFG.
+///
+bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
+ unsigned NumDups1, unsigned NumDups2) {
+ BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
+ BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
+ MachineBasicBlock *TailBB = TrueBBI.TrueBB;
+
+ // True block must fall through or end with an unanalyzable terminator.
+ if (!TailBB) {
+ if (blockAlwaysFallThrough(TrueBBI))
+ TailBB = FalseBBI.TrueBB;
+ assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
+ }
+
+ if (!IfConvertDiamondCommon(
+ BBI, TrueBBI, FalseBBI,
+ NumDups1, NumDups2,
+ TrueBBI.ClobbersPred, FalseBBI.ClobbersPred,
+ /* RemoveTrueBranch */ TrueBBI.IsBrAnalyzable,
+ /* RemoveFalseBranch */ FalseBBI.IsBrAnalyzable,
+ /* MergeAddEdges */ TailBB == nullptr))
+ return false;
// If the if-converted block falls through or unconditionally branches into
// the tail block, and the tail block does not have other predecessors, then
@@ -1631,7 +1910,7 @@ bool IfConverter::IfConvertDiamond(BBInf
CanMergeTail = false;
else if (NumPreds == 1 && CanMergeTail) {
MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
- if (*PI != BBI1->BB && *PI != BBI2->BB)
+ if (*PI != TrueBBI.BB && *PI != FalseBBI.BB)
CanMergeTail = false;
}
if (CanMergeTail) {
@@ -1647,8 +1926,8 @@ bool IfConverter::IfConvertDiamond(BBInf
// RemoveExtraEdges won't work if the block has an unanalyzable branch,
// which can happen here if TailBB is unanalyzable and is merged, so
// explicitly remove BBI1 and BBI2 as successors.
- BBI.BB->removeSuccessor(BBI1->BB);
- BBI.BB->removeSuccessor(BBI2->BB, true);
+ BBI.BB->removeSuccessor(TrueBBI.BB);
+ BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true);
RemoveExtraEdges(BBI);
// Update block info.
Modified: llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll?rev=277905&r1=277904&r2=277905&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll (original)
+++ llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll Fri Aug 5 20:52:37 2016
@@ -1,6 +1,7 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s
-; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it |FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it | FileCheck %s
+; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it -enable-tail-merge=0 | FileCheck %s
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
; CHECK-LABEL: t1:
; CHECK: ittt ne
@@ -25,9 +26,9 @@ cond_next:
define i32 @t2(i32 %a, i32 %b) nounwind {
entry:
; CHECK-LABEL: t2:
-; CHECK: ite gt
-; CHECK: subgt
-; CHECK: suble
+; CHECK: ite {{gt|le}}
+; CHECK-DAG: suble
+; CHECK-DAG: subgt
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
br i1 %tmp1434, label %bb17, label %bb.outer
@@ -56,6 +57,44 @@ cond_false: ; preds = %bb
br i1 %tmp14, label %bb17, label %bb.outer
bb17: ; preds = %cond_false, %cond_true, %entry
+ %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
+ ret i32 %a_addr.026.1
+}
+
+define i32 @t2_nomerge(i32 %a, i32 %b) nounwind {
+entry:
+; CHECK-LABEL: t2_nomerge:
+; CHECK-NOT: ite {{gt|le}}
+; CHECK-NOT: suble
+; CHECK-NOT: subgt
+ %tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
+ br i1 %tmp1434, label %bb17, label %bb.outer
+
+bb.outer: ; preds = %cond_false, %entry
+ %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; <i32> [#uses=5]
+ %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
+ br label %bb
+
+bb: ; preds = %cond_true, %bb.outer
+ %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
+ %tmp. = sub i32 0, %b_addr.021.0.ph ; <i32> [#uses=1]
+ %tmp.40 = mul i32 %indvar, %tmp. ; <i32> [#uses=1]
+ %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph ; <i32> [#uses=6]
+ %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph ; <i1> [#uses=1]
+ br i1 %tmp3, label %cond_true, label %cond_false
+
+cond_true: ; preds = %bb
+ %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph ; <i32> [#uses=2]
+ %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph ; <i1> [#uses=1]
+ %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
+ br i1 %tmp1437, label %bb17, label %bb
+
+cond_false: ; preds = %bb
+ %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0 ; <i32> [#uses=2]
+ %tmp14 = icmp eq i32 %b_addr.021.0.ph, %tmp10 ; <i1> [#uses=1]
+ br i1 %tmp14, label %bb17, label %bb.outer
+
+bb17: ; preds = %cond_false, %cond_true, %entry
%a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
ret i32 %a_addr.026.1
}
More information about the llvm-commits
mailing list