[llvm] r278287 - CodeGen: If Convert blocks that would form a diamond when tail-merged.
Diana Picus via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 14 10:29:50 PDT 2016
Looks like this was also causing failures on other selfhost bots.
I've attached the log to https://llvm.org/bugs/show_bug.cgi?id=28950
Diana
On 14 August 2016 at 05:22, Diana Picus <diana.picus at linaro.org> wrote:
> Hi Kyle,
>
> This broke one of the buildbots. I reverted both this and r278288 in
> r27862[0-1].
> See https://llvm.org/bugs/show_bug.cgi?id=28949
>
> Sorry it took so long to point it out. Please let me know if I can
> help you figure out what's upsetting the bot.
>
> Regards,
> Diana
>
>
> On 10 August 2016 at 23:45, Kyle Butt via llvm-commits
> <llvm-commits at lists.llvm.org> wrote:
>> Author: iteratee
>> Date: Wed Aug 10 15:45:56 2016
>> New Revision: 278287
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=278287&view=rev
>> Log:
>> CodeGen: If Convert blocks that would form a diamond when tail-merged.
>>
>> The following function currently relies on tail-merging for if
>> conversion to succeed. The common tail of cond_true and cond_false is
>> extracted, and this then forms a diamond pattern that can be
>> successfully if converted.
>>
>> If this block does not get extracted, either because tail-merging is
>> disabled or the threshold is higher, we should still recognize this
>> pattern and if-convert it.
>>
>> Fixed a regression in the original commit. Need to un-reverse branches after
>> reversing them, or other conversions go awry.
>>
>> define i32 @t2(i32 %a, i32 %b) nounwind {
>> entry:
>> %tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
>> br i1 %tmp1434, label %bb17, label %bb.outer
>>
>> bb.outer: ; preds = %cond_false, %entry
>> %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ]
>> %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ]
>> br label %bb
>>
>> bb: ; preds = %cond_true, %bb.outer
>> %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ]
>> %tmp. = sub i32 0, %b_addr.021.0.ph
>> %tmp.40 = mul i32 %indvar, %tmp.
>> %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph
>> %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph
>> br i1 %tmp3, label %cond_true, label %cond_false
>>
>> cond_true: ; preds = %bb
>> %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph
>> %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph
>> %indvar.next = add i32 %indvar, 1
>> br i1 %tmp1437, label %bb17, label %bb
>>
>> cond_false: ; preds = %bb
>> %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0
>> %tmp14 = icmp eq i32 %a_addr.026.0, %tmp10
>> br i1 %tmp14, label %bb17, label %bb.outer
>>
>> bb17: ; preds = %cond_false, %cond_true, %entry
>> %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ]
>> ret i32 %a_addr.026.1
>> }
>>
>> Without tail-merging or diamond-tail if conversion:
>> LBB1_1: @ %bb
>> @ =>This Inner Loop Header: Depth=1
>> cmp r0, r1
>> ble LBB1_3
>> @ BB#2: @ %cond_true
>> @ in Loop: Header=BB1_1 Depth=1
>> subs r0, r0, r1
>> cmp r1, r0
>> it ne
>> cmpne r0, r1
>> bgt LBB1_4
>> LBB1_3: @ %cond_false
>> @ in Loop: Header=BB1_1 Depth=1
>> subs r1, r1, r0
>> cmp r1, r0
>> bne LBB1_1
>> LBB1_4: @ %bb17
>> bx lr
>>
>> With diamond-tail if conversion, but without tail-merging:
>> @ BB#0: @ %entry
>> cmp r0, r1
>> it eq
>> bxeq lr
>> LBB1_1: @ %bb
>> @ =>This Inner Loop Header: Depth=1
>> cmp r0, r1
>> ite le
>> suble r1, r1, r0
>> subgt r0, r0, r1
>> cmp r1, r0
>> bne LBB1_1
>> @ BB#2: @ %bb17
>> bx lr
>>
>> Added:
>> llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll
>> Modified:
>> llvm/trunk/lib/CodeGen/IfConversion.cpp
>> llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
>>
>> Modified: llvm/trunk/lib/CodeGen/IfConversion.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/IfConversion.cpp?rev=278287&r1=278286&r2=278287&view=diff
>> ==============================================================================
>> --- llvm/trunk/lib/CodeGen/IfConversion.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/IfConversion.cpp Wed Aug 10 15:45:56 2016
>> @@ -15,6 +15,7 @@
>> #include "llvm/CodeGen/Passes.h"
>> #include "BranchFolding.h"
>> #include "llvm/ADT/STLExtras.h"
>> +#include "llvm/ADT/ScopeExit.h"
>> #include "llvm/ADT/SmallSet.h"
>> #include "llvm/ADT/Statistic.h"
>> #include "llvm/CodeGen/LivePhysRegs.h"
>> @@ -58,6 +59,8 @@ static cl::opt<bool> DisableTriangleFR("
>> cl::init(false), cl::Hidden);
>> static cl::opt<bool> DisableDiamond("disable-ifcvt-diamond",
>> cl::init(false), cl::Hidden);
>> +static cl::opt<bool> DisableForkedDiamond("disable-ifcvt-forked-diamond",
>> + cl::init(false), cl::Hidden);
>> static cl::opt<bool> IfCvtBranchFold("ifcvt-branch-fold",
>> cl::init(true), cl::Hidden);
>>
>> @@ -68,6 +71,7 @@ STATISTIC(NumTriangleRev, "Number of tr
>> STATISTIC(NumTriangleFalse,"Number of triangle (F) if-conversions performed");
>> STATISTIC(NumTriangleFRev, "Number of triangle (F/R) if-conversions performed");
>> STATISTIC(NumDiamonds, "Number of diamond if-conversions performed");
>> +STATISTIC(NumForkedDiamonds, "Number of forked-diamond if-conversions performed");
>> STATISTIC(NumIfConvBBs, "Number of if-converted blocks");
>> STATISTIC(NumDupBBs, "Number of duplicated blocks");
>> STATISTIC(NumUnpred, "Number of true blocks of diamonds unpredicated");
>> @@ -82,7 +86,9 @@ namespace {
>> ICTriangleRev, // Same as ICTriangle, but true path rev condition.
>> ICTriangleFalse, // Same as ICTriangle, but on the false path.
>> ICTriangle, // BB is entry of a triangle sub-CFG.
>> - ICDiamond // BB is entry of a diamond sub-CFG.
>> + ICDiamond, // BB is entry of a diamond sub-CFG.
>> + ICForkedDiamond // BB is entry of an almost diamond sub-CFG, with a
>> + // common tail that can be shared.
>> };
>>
>> /// BBInfo - One per MachineBasicBlock, this is used to cache the result
>> @@ -114,6 +120,7 @@ namespace {
>> bool IsAnalyzed : 1;
>> bool IsEnqueued : 1;
>> bool IsBrAnalyzable : 1;
>> + bool IsBrReversible : 1;
>> bool HasFallThrough : 1;
>> bool IsUnpredicable : 1;
>> bool CannotBeCopied : 1;
>> @@ -128,9 +135,10 @@ namespace {
>> SmallVector<MachineOperand, 4> Predicate;
>> BBInfo() : IsDone(false), IsBeingAnalyzed(false),
>> IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
>> - HasFallThrough(false), IsUnpredicable(false),
>> - CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
>> - ExtraCost(0), ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
>> + IsBrReversible(false), HasFallThrough(false),
>> + IsUnpredicable(false), CannotBeCopied(false),
>> + ClobbersPred(false), NonPredSize(0), ExtraCost(0),
>> + ExtraCost2(0), BB(nullptr), TrueBB(nullptr),
>> FalseBB(nullptr) {}
>> };
>>
>> @@ -148,11 +156,15 @@ namespace {
>> struct IfcvtToken {
>> BBInfo &BBI;
>> IfcvtKind Kind;
>> - bool NeedSubsumption;
>> unsigned NumDups;
>> unsigned NumDups2;
>> - IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0)
>> - : BBI(b), Kind(k), NeedSubsumption(s), NumDups(d), NumDups2(d2) {}
>> + bool NeedSubsumption : 1;
>> + bool TClobbersPred : 1;
>> + bool FClobbersPred : 1;
>> + IfcvtToken(BBInfo &b, IfcvtKind k, bool s, unsigned d, unsigned d2 = 0,
>> + bool tc = false, bool fc = false)
>> + : BBI(b), Kind(k), NumDups(d), NumDups2(d2), NeedSubsumption(s),
>> + TClobbersPred(tc), FClobbersPred(fc) {}
>> };
>>
>> /// BBAnalysis - Results of if-conversion feasibility analysis indexed by
>> @@ -202,23 +214,40 @@ namespace {
>> bool FalseBranch, unsigned &Dups,
>> BranchProbability Prediction) const;
>> bool ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
>> - unsigned &Dups1, unsigned &Dups2) const;
>> + unsigned &Dups1, unsigned &Dups2,
>> + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
>> + bool ValidForkedDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
>> + unsigned &Dups1, unsigned &Dups2,
>> + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const;
>> void AnalyzeBranches(BBInfo &BBI);
>> void ScanInstructions(BBInfo &BBI,
>> MachineBasicBlock::iterator &Begin,
>> MachineBasicBlock::iterator &End) const;
>> + bool RescanInstructions(
>> + MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
>> + MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
>> + BBInfo &TrueBBI, BBInfo &FalseBBI) const;
>> void AnalyzeBlock(MachineBasicBlock *MBB,
>> std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
>> bool FeasibilityAnalysis(BBInfo &BBI, SmallVectorImpl<MachineOperand> &Cond,
>> - bool isTriangle = false, bool RevBranch = false);
>> + bool isTriangle = false, bool RevBranch = false,
>> + bool hasCommonTail = false);
>> void AnalyzeBlocks(MachineFunction &MF,
>> std::vector<std::unique_ptr<IfcvtToken>> &Tokens);
>> void InvalidatePreds(MachineBasicBlock *BB);
>> void RemoveExtraEdges(BBInfo &BBI);
>> bool IfConvertSimple(BBInfo &BBI, IfcvtKind Kind);
>> bool IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind);
>> + bool IfConvertDiamondCommon(BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
>> + unsigned NumDups1, unsigned NumDups2,
>> + bool TClobbersPred, bool FClobbersPred,
>> + bool RemoveTrueBranch, bool RemoveFalseBranch,
>> + bool MergeAddEdges);
>> bool IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
>> unsigned NumDups1, unsigned NumDups2);
>> + bool IfConvertForkedDiamond(BBInfo &BBI, IfcvtKind Kind,
>> + unsigned NumDups1, unsigned NumDups2,
>> + bool TClobbers, bool FClobbers);
>> void PredicateBlock(BBInfo &BBI,
>> MachineBasicBlock::iterator E,
>> SmallVectorImpl<MachineOperand> &Cond,
>> @@ -410,6 +439,19 @@ bool IfConverter::runOnMachineFunction(M
>> if (RetVal) ++NumDiamonds;
>> break;
>> }
>> + case ICForkedDiamond: {
>> + if (DisableForkedDiamond) break;
>> + DEBUG(dbgs() << "Ifcvt (Forked Diamond): BB#"
>> + << BBI.BB->getNumber() << " (T:"
>> + << BBI.TrueBB->getNumber() << ",F:"
>> + << BBI.FalseBB->getNumber() << ") ");
>> + RetVal = IfConvertForkedDiamond(BBI, Kind, NumDups, NumDups2,
>> + Token->TClobbersPred,
>> + Token->FClobbersPred);
>> + DEBUG(dbgs() << (RetVal ? "succeeded!" : "failed!") << "\n");
>> + if (RetVal) ++NumForkedDiamonds;
>> + break;
>> + }
>> }
>>
>> Change |= RetVal;
>> @@ -611,6 +653,13 @@ static void countDuplicatedInstructions(
>> return;
>> --TIE;
>> --FIE;
>> + // Upon exit TIE and FIE will both point at the last non-shared instruction,
>> + // they need to be moved forward to point past the last non-shared
>> + // instruction.
>> + auto IncrementEndIteratorsOnExit = make_scope_exit([&]() {
>> + ++TIE; ++FIE;
>> + });
>> +
>> if (!TBB.succ_empty() || !FBB.succ_empty()) {
>> if (SkipConditionalBranches) {
>> while (TIE != TIB && TIE->isBranch())
>> @@ -648,10 +697,125 @@ static void countDuplicatedInstructions(
>> }
>> }
>>
>> +/// RescanInstructions - Run ScanInstructions on a pair of blocks.
>> +/// @param TIB - True Iterator Begin, points to first non-shared instruction
>> +/// @param FIB - False Iterator Begin, points to first non-shared instruction
>> +/// @param TIE - True Iterator End, points past last non-shared instruction
>> +/// @param FIE - False Iterator End, points past last non-shared instruction
>> +/// @param TrueBBI - BBInfo to update for the true block.
>> +/// @param FalseBBI - BBInfo to update for the false block.
>> +/// @returns - false if either block cannot be predicated or if both blocks end
>> +/// with a predicate-clobbering instruction.
>> +bool IfConverter::RescanInstructions(
>> + MachineBasicBlock::iterator &TIB, MachineBasicBlock::iterator &FIB,
>> + MachineBasicBlock::iterator &TIE, MachineBasicBlock::iterator &FIE,
>> + BBInfo &TrueBBI, BBInfo &FalseBBI) const {
>> + ScanInstructions(TrueBBI, TIB, TIE);
>> + if (TrueBBI.IsUnpredicable)
>> + return false;
>> + ScanInstructions(FalseBBI, FIB, FIE);
>> + if (FalseBBI.IsUnpredicable)
>> + return false;
>> + if (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred)
>> + return false;
>> + return true;
>> +}
>> +
>> +/// ValidForkedDiamond - Returns true if the 'true' and 'false' blocks (along
>> +/// with their common predecessor) form a diamond if a common tail block is
>> +/// extracted.
>> +/// While not strictly a diamond, this pattern would form a diamond if
>> +/// tail-merging had merged the shared tails.
>> +/// EBB
>> +/// _/ \_
>> +/// | |
>> +/// TBB FBB
>> +/// / \ / \
>> +/// FalseBB TrueBB FalseBB
>> +/// Currently only handles analyzable branches.
>> +/// Specifically excludes actual diamonds to avoid overlap.
>> +bool IfConverter::ValidForkedDiamond(
>> + BBInfo &TrueBBI, BBInfo &FalseBBI,
>> + unsigned &Dups1, unsigned &Dups2,
>> + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
>> + Dups1 = Dups2 = 0;
>> + if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
>> + FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
>> + return false;
>> +
>> + if (!TrueBBI.IsBrAnalyzable || !FalseBBI.IsBrAnalyzable)
>> + return false;
>> + // Don't IfConvert blocks that can't be folded into their predecessor.
>> + if (TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1)
>> + return false;
>> +
>> + // This function is specifically looking for conditional tails, as
>> + // unconditional tails are already handled by the standard diamond case.
>> + if (TrueBBI.BrCond.size() == 0 ||
>> + FalseBBI.BrCond.size() == 0)
>> + return false;
>> +
>> + MachineBasicBlock *TT = TrueBBI.TrueBB;
>> + MachineBasicBlock *TF = TrueBBI.FalseBB;
>> + MachineBasicBlock *FT = FalseBBI.TrueBB;
>> + MachineBasicBlock *FF = FalseBBI.FalseBB;
>> +
>> + if (!TT)
>> + TT = getNextBlock(TrueBBI.BB);
>> + if (!TF)
>> + TF = getNextBlock(TrueBBI.BB);
>> + if (!FT)
>> + FT = getNextBlock(FalseBBI.BB);
>> + if (!FF)
>> + FF = getNextBlock(FalseBBI.BB);
>> +
>> + if (!TT || !TF)
>> + return false;
>> +
>> + // Check successors. If they don't match, bail.
>> + if (!((TT == FT && TF == FF) || (TF == FT && TT == FF)))
>> + return false;
>> +
>> + bool FalseReversed = false;
>> + if (TF == FT && TT == FF) {
>> + // If the branches are opposing, but we can't reverse, don't do it.
>> + if (!FalseBBI.IsBrReversible)
>> + return false;
>> + FalseReversed = true;
>> + ReverseBranchCondition(FalseBBI);
>> + }
>> + auto UnReverseOnExit = make_scope_exit([&]() {
>> + if (FalseReversed)
>> + ReverseBranchCondition(FalseBBI);
>> + });
>> +
>> + // Count duplicate instructions at the beginning of the true and false blocks.
>> + MachineBasicBlock::iterator TIB = TrueBBI.BB->begin();
>> + MachineBasicBlock::iterator FIB = FalseBBI.BB->begin();
>> + MachineBasicBlock::iterator TIE = TrueBBI.BB->end();
>> + MachineBasicBlock::iterator FIE = FalseBBI.BB->end();
>> + countDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
>> + *TrueBBI.BB, *FalseBBI.BB,
>> + /* SkipConditionalBranches */ false);
>> +
>> + TrueBBICalc.BB = TrueBBI.BB;
>> + FalseBBICalc.BB = FalseBBI.BB;
>> + if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
>> + return false;
>> + // The size is used to decide whether to if-convert, and the shared portions
>> + // are subtracted off. Because of the subtraction, we just use the size that
>> + // was calculated by the original ScanInstructions, as it is correct.
>> + TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
>> + FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
>> + return true;
>> +}
>> +
>> /// ValidDiamond - Returns true if the 'true' and 'false' blocks (along
>> /// with their common predecessor) forms a valid diamond shape for ifcvt.
>> -bool IfConverter::ValidDiamond(BBInfo &TrueBBI, BBInfo &FalseBBI,
>> - unsigned &Dups1, unsigned &Dups2) const {
>> +bool IfConverter::ValidDiamond(
>> + BBInfo &TrueBBI, BBInfo &FalseBBI,
>> + unsigned &Dups1, unsigned &Dups2,
>> + BBInfo &TrueBBICalc, BBInfo &FalseBBICalc) const {
>> Dups1 = Dups2 = 0;
>> if (TrueBBI.IsBeingAnalyzed || TrueBBI.IsDone ||
>> FalseBBI.IsBeingAnalyzed || FalseBBI.IsDone)
>> @@ -672,8 +836,7 @@ bool IfConverter::ValidDiamond(BBInfo &T
>> return false;
>>
>> // FIXME: Allow true block to have an early exit?
>> - if (TrueBBI.FalseBB || FalseBBI.FalseBB ||
>> - (TrueBBI.ClobbersPred && FalseBBI.ClobbersPred))
>> + if (TrueBBI.FalseBB || FalseBBI.FalseBB)
>> return false;
>>
>> // Count duplicate instructions at the beginning and end of the true and
>> @@ -685,6 +848,16 @@ bool IfConverter::ValidDiamond(BBInfo &T
>> countDuplicatedInstructions(TIB, FIB, TIE, FIE, Dups1, Dups2,
>> *TrueBBI.BB, *FalseBBI.BB,
>> /* SkipConditionalBranches */ true);
>> +
>> + TrueBBICalc.BB = TrueBBI.BB;
>> + FalseBBICalc.BB = FalseBBI.BB;
>> + if (!RescanInstructions(TIB, FIB, TIE, FIE, TrueBBICalc, FalseBBICalc))
>> + return false;
>> + // The size is used to decide whether to if-convert, and the shared portions
>> + // are subtracted off. Because of the subtraction, we just use the size that
>> + // was calculated by the original ScanInstructions, as it is correct.
>> + TrueBBICalc.NonPredSize = TrueBBI.NonPredSize;
>> + FalseBBICalc.NonPredSize = FalseBBI.NonPredSize;
>> return true;
>> }
>>
>> @@ -698,6 +871,9 @@ void IfConverter::AnalyzeBranches(BBInfo
>> BBI.BrCond.clear();
>> BBI.IsBrAnalyzable =
>> !TII->analyzeBranch(*BBI.BB, BBI.TrueBB, BBI.FalseBB, BBI.BrCond);
>> + SmallVector<MachineOperand, 4> RevCond(BBI.BrCond.begin(), BBI.BrCond.end());
>> + BBI.IsBrReversible = (RevCond.size() == 0) ||
>> + !TII->ReverseBranchCondition(RevCond);
>> BBI.HasFallThrough = BBI.IsBrAnalyzable && BBI.FalseBB == nullptr;
>>
>> if (BBI.BrCond.size()) {
>> @@ -813,11 +989,22 @@ void IfConverter::ScanInstructions(BBInf
>>
>> /// FeasibilityAnalysis - Determine if the block is a suitable candidate to be
>> /// predicated by the specified predicate.
>> +/// @param BBI BBInfo for the block to check
>> +/// @param Pred Predicate array for the branch that leads to BBI
>> +/// @param isTriangle true if the Analysis is for a triangle
>> +/// @param RevBranch true if Reverse(Pred) leads to BBI (e.g. BBI is the false
>> +/// case
>> +/// @param hasCommonTail true if BBI shares a tail with a sibling block that
>> +/// contains any instruction that would make the block unpredicable.
>> bool IfConverter::FeasibilityAnalysis(BBInfo &BBI,
>> SmallVectorImpl<MachineOperand> &Pred,
>> - bool isTriangle, bool RevBranch) {
>> + bool isTriangle, bool RevBranch,
>> + bool hasCommonTail) {
>> // If the block is dead or unpredicable, then it cannot be predicated.
>> - if (BBI.IsDone || BBI.IsUnpredicable)
>> + // Two blocks may share a common unpredicable tail, but this doesn't prevent
>> + // them from being if-converted. The non-shared portion is assumed to have
>> + // been checked
>> + if (BBI.IsDone || (BBI.IsUnpredicable && !hasCommonTail))
>> return false;
>>
>> // If it is already predicated but we couldn't analyze its terminator, the
>> @@ -831,7 +1018,7 @@ bool IfConverter::FeasibilityAnalysis(BB
>> if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate))
>> return false;
>>
>> - if (BBI.BrCond.size()) {
>> + if (!hasCommonTail && BBI.BrCond.size()) {
>> if (!isTriangle)
>> return false;
>>
>> @@ -939,25 +1126,58 @@ void IfConverter::AnalyzeBlock(
>>
>> BranchProbability Prediction = MBPI->getEdgeProbability(BB, TrueBBI.BB);
>>
>> - if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
>> - MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
>> - TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
>> - *FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
>> - FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
>> - Prediction) &&
>> - FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
>> - FeasibilityAnalysis(FalseBBI, RevCond)) {
>> - // Diamond:
>> - // EBB
>> - // / \_
>> - // | |
>> - // TBB FBB
>> - // \ /
>> - // TailBB
>> - // Note TailBB can be empty.
>> - Tokens.push_back(llvm::make_unique<IfcvtToken>(
>> - BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2));
>> - Enqueued = true;
>> + if (CanRevCond) {
>> + BBInfo TrueBBICalc, FalseBBICalc;
>> + auto feasibleDiamond = [&]() {
>> + return (
>> + MeetIfcvtSizeLimit(
>> + *TrueBBI.BB, (TrueBBICalc.NonPredSize - (Dups + Dups2) +
>> + TrueBBICalc.ExtraCost), TrueBBICalc.ExtraCost2,
>> + *FalseBBI.BB, (FalseBBICalc.NonPredSize - (Dups + Dups2) +
>> + FalseBBICalc.ExtraCost), FalseBBICalc.ExtraCost2,
>> + Prediction) &&
>> + FeasibilityAnalysis(TrueBBI, BBI.BrCond,
>> + /* IsTriangle */ false, /* RevCond */ false,
>> + /* hasCommonTail */ true) &&
>> + FeasibilityAnalysis(FalseBBI, RevCond,
>> + /* IsTriangle */ false, /* RevCond */ false,
>> + /* hasCommonTail */ true));
>> + };
>> +
>> + if (ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2,
>> + TrueBBICalc, FalseBBICalc)) {
>> + if (feasibleDiamond()) {
>> + // Diamond:
>> + // EBB
>> + // / \_
>> + // | |
>> + // TBB FBB
>> + // \ /
>> + // TailBB
>> + // Note TailBB can be empty.
>> + Tokens.push_back(llvm::make_unique<IfcvtToken>(
>> + BBI, ICDiamond, TNeedSub | FNeedSub, Dups, Dups2));
>> + Enqueued = true;
>> + }
>> + } else if (ValidForkedDiamond(TrueBBI, FalseBBI, Dups, Dups2,
>> + TrueBBICalc, FalseBBICalc)) {
>> + if (feasibleDiamond()) {
>> + // ForkedDiamond:
>> + // if TBB and FBB have a common tail that includes their conditional
>> + // branch instructions, then we can If Convert this pattern.
>> + // EBB
>> + // _/ \_
>> + // | |
>> + // TBB FBB
>> + // / \ / \
>> + // FalseBB TrueBB FalseBB
>> + //
>> + Tokens.push_back(llvm::make_unique<IfcvtToken>(
>> + BBI, ICForkedDiamond, TNeedSub | FNeedSub, Dups, Dups2,
>> + (bool) TrueBBICalc.ClobbersPred, (bool) FalseBBICalc.ClobbersPred));
>> + Enqueued = true;
>> + }
>> + }
>> }
>>
>> if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction) &&
>> @@ -1410,23 +1630,26 @@ bool IfConverter::IfConvertTriangle(BBIn
>> return true;
>> }
>>
>> -/// IfConvertDiamond - If convert a diamond sub-CFG.
>> -///
>> -bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
>> - unsigned NumDups1, unsigned NumDups2) {
>> - BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
>> - BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
>> - MachineBasicBlock *TailBB = TrueBBI.TrueBB;
>> - // True block must fall through or end with an unanalyzable terminator.
>> - if (!TailBB) {
>> - if (blockAlwaysFallThrough(TrueBBI))
>> - TailBB = FalseBBI.TrueBB;
>> - assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
>> - }
>> +/// IfConvertDiamondCommon - Common code shared between diamond conversions.
>> +/// BBI, TrueBBI, and FalseBBI form the diamond shape.
>> +/// NumDups1 - number of shared instructions at the beginning of TrueBBI and
>> +/// FalseBBI
>> +/// NumDups2 - number of shared instructions at the end of TrueBBI and FalseBBI
>> +/// RemoveTrueBranch - Remove the branch of the true block before predicating
>> +/// Only false for unanalyzable fallthrough cases.
>> +/// RemoveFalseBranch - Remove the branch of the false block before predicating
>> +/// Only false for unanalyzable fallthrough cases.
>> +/// MergeAddEdges - Add successor edges when merging blocks. Only false for
>> +/// unanalyzable fallthrough
>> +bool IfConverter::IfConvertDiamondCommon(
>> + BBInfo &BBI, BBInfo &TrueBBI, BBInfo &FalseBBI,
>> + unsigned NumDups1, unsigned NumDups2,
>> + bool TClobbersPred, bool FClobbersPred,
>> + bool RemoveTrueBranch, bool RemoveFalseBranch,
>> + bool MergeAddEdges) {
>>
>> if (TrueBBI.IsDone || FalseBBI.IsDone ||
>> - TrueBBI.BB->pred_size() > 1 ||
>> - FalseBBI.BB->pred_size() > 1) {
>> + TrueBBI.BB->pred_size() > 1 || FalseBBI.BB->pred_size() > 1) {
>> // Something has changed. It's no longer safe to predicate these blocks.
>> BBI.IsAnalyzed = false;
>> TrueBBI.IsAnalyzed = false;
>> @@ -1451,15 +1674,16 @@ bool IfConverter::IfConvertDiamond(BBInf
>>
>> // Figure out the more profitable ordering.
>> bool DoSwap = false;
>> - if (TrueBBI.ClobbersPred && !FalseBBI.ClobbersPred)
>> + if (TClobbersPred && !FClobbersPred)
>> DoSwap = true;
>> - else if (TrueBBI.ClobbersPred == FalseBBI.ClobbersPred) {
>> + else if (TClobbersPred == FClobbersPred) {
>> if (TrueBBI.NonPredSize > FalseBBI.NonPredSize)
>> DoSwap = true;
>> }
>> if (DoSwap) {
>> std::swap(BBI1, BBI2);
>> std::swap(Cond1, Cond2);
>> + std::swap(RemoveTrueBranch, RemoveFalseBranch);
>> }
>>
>> // Remove the conditional branch from entry to the blocks.
>> @@ -1506,11 +1730,7 @@ bool IfConverter::IfConvertDiamond(BBInf
>> BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1);
>> BBI2->BB->erase(BBI2->BB->begin(), DI2);
>>
>> - // Remove branch from the 'true' block, unless it was not analyzable.
>> - // Non-analyzable branches need to be preserved, since in such cases,
>> - // the CFG structure is not an actual diamond (the join block may not
>> - // be present).
>> - if (BBI1->IsBrAnalyzable)
>> + if (RemoveTrueBranch)
>> BBI1->NonPredSize -= TII->RemoveBranch(*BBI1->BB);
>> // Remove duplicated instructions.
>> DI1 = BBI1->BB->end();
>> @@ -1529,9 +1749,9 @@ bool IfConverter::IfConvertDiamond(BBInf
>> // must be removed.
>> RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI);
>>
>> - // Remove 'false' block branch (unless it was not analyzable), and find
>> - // the last instruction to predicate.
>> - if (BBI2->IsBrAnalyzable)
>> + // Remove 'false' block branch, and find the last instruction to predicate.
>> + // Save the debug location.
>> + if (RemoveFalseBranch)
>> BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB);
>> DI2 = BBI2->BB->end();
>> while (NumDups2 != 0) {
>> @@ -1607,8 +1827,74 @@ bool IfConverter::IfConvertDiamond(BBInf
>> PredicateBlock(*BBI2, DI2, *Cond2);
>>
>> // Merge the true block into the entry of the diamond.
>> - MergeBlocks(BBI, *BBI1, TailBB == nullptr);
>> - MergeBlocks(BBI, *BBI2, TailBB == nullptr);
>> + MergeBlocks(BBI, *BBI1, MergeAddEdges);
>> + MergeBlocks(BBI, *BBI2, MergeAddEdges);
>> + return true;
>> +}
>> +
>> +/// IfConvertForkedDiamond - If convert an almost-diamond sub-CFG where the true
>> +/// and false blocks share a common tail.
>> +bool IfConverter::IfConvertForkedDiamond(
>> + BBInfo &BBI, IfcvtKind Kind,
>> + unsigned NumDups1, unsigned NumDups2,
>> + bool TClobbersPred, bool FClobbersPred) {
>> + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
>> + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
>> +
>> + // Save the debug location for later.
>> + DebugLoc dl;
>> + MachineBasicBlock::iterator TIE = TrueBBI.BB->getFirstTerminator();
>> + if (TIE != TrueBBI.BB->end())
>> + dl = TIE->getDebugLoc();
>> + // Removing branches from both blocks is safe, because we have already
>> + // determined that both blocks have the same branch instructions. The branch
>> + // will be added back at the end, unpredicated.
>> + if (!IfConvertDiamondCommon(
>> + BBI, TrueBBI, FalseBBI,
>> + NumDups1, NumDups2,
>> + TClobbersPred, FClobbersPred,
>> + /* RemoveTrueBranch */ true, /* RemoveFalseBranch */ true,
>> + /* MergeAddEdges */ true))
>> + return false;
>> +
>> + // Add back the branch.
>> + // Debug location saved above when removing the branch from BBI2
>> + TII->InsertBranch(*BBI.BB, TrueBBI.TrueBB, TrueBBI.FalseBB,
>> + TrueBBI.BrCond, dl);
>> +
>> + RemoveExtraEdges(BBI);
>> +
>> + // Update block info.
>> + BBI.IsDone = TrueBBI.IsDone = FalseBBI.IsDone = true;
>> + InvalidatePreds(BBI.BB);
>> +
>> + // FIXME: Must maintain LiveIns.
>> + return true;
>> +}
>> +
>> +/// IfConvertDiamond - If convert a diamond sub-CFG.
>> +///
>> +bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind,
>> + unsigned NumDups1, unsigned NumDups2) {
>> + BBInfo &TrueBBI = BBAnalysis[BBI.TrueBB->getNumber()];
>> + BBInfo &FalseBBI = BBAnalysis[BBI.FalseBB->getNumber()];
>> + MachineBasicBlock *TailBB = TrueBBI.TrueBB;
>> +
>> + // True block must fall through or end with an unanalyzable terminator.
>> + if (!TailBB) {
>> + if (blockAlwaysFallThrough(TrueBBI))
>> + TailBB = FalseBBI.TrueBB;
>> + assert((TailBB || !TrueBBI.IsBrAnalyzable) && "Unexpected!");
>> + }
>> +
>> + if (!IfConvertDiamondCommon(
>> + BBI, TrueBBI, FalseBBI,
>> + NumDups1, NumDups2,
>> + TrueBBI.ClobbersPred, FalseBBI.ClobbersPred,
>> + /* RemoveTrueBranch */ TrueBBI.IsBrAnalyzable,
>> + /* RemoveFalseBranch */ FalseBBI.IsBrAnalyzable,
>> + /* MergeAddEdges */ TailBB == nullptr))
>> + return false;
>>
>> // If the if-converted block falls through or unconditionally branches into
>> // the tail block, and the tail block does not have other predecessors, then
>> @@ -1631,7 +1917,7 @@ bool IfConverter::IfConvertDiamond(BBInf
>> CanMergeTail = false;
>> else if (NumPreds == 1 && CanMergeTail) {
>> MachineBasicBlock::pred_iterator PI = TailBB->pred_begin();
>> - if (*PI != BBI1->BB && *PI != BBI2->BB)
>> + if (*PI != TrueBBI.BB && *PI != FalseBBI.BB)
>> CanMergeTail = false;
>> }
>> if (CanMergeTail) {
>> @@ -1647,8 +1933,8 @@ bool IfConverter::IfConvertDiamond(BBInf
>> // RemoveExtraEdges won't work if the block has an unanalyzable branch,
>> // which can happen here if TailBB is unanalyzable and is merged, so
>> // explicitly remove BBI1 and BBI2 as successors.
>> - BBI.BB->removeSuccessor(BBI1->BB);
>> - BBI.BB->removeSuccessor(BBI2->BB, true);
>> + BBI.BB->removeSuccessor(TrueBBI.BB);
>> + BBI.BB->removeSuccessor(FalseBBI.BB, /* NormalizeSuccessProbs */ true);
>> RemoveExtraEdges(BBI);
>>
>> // Update block info.
>>
>> Added: llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll?rev=278287&view=auto
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll (added)
>> +++ llvm/trunk/test/CodeGen/PowerPC/ifcvt-forked-bug-2016-08-08.ll Wed Aug 10 15:45:56 2016
>> @@ -0,0 +1,36 @@
>> +; ModuleID = 'bugpoint-reduced-instructions.bc'
>> +; RUN: llc -O2 -o - %s | FileCheck %s
>> +source_filename = "bugpoint-output-9ad75f8.bc"
>> +target datalayout = "e-m:e-i64:64-n32:64"
>> +target triple = "powerpc64le-unknown-linux-gnu"
>> +
>> +; Function Attrs: nounwind uwtable
>> +define hidden void @_ZN11__sanitizer25MaybeStartBackgroudThreadEv() local_unnamed_addr #0 {
>> +entry:
>> + br i1 undef, label %land.lhs.true, label %if.end
>> +
>> +; CHECK: # %land.lhs.true
>> +; CHECK-NEXT: bclr
>> +; CHECK-NEXT: # %if.end4
>> +land.lhs.true: ; preds = %entry
>> + br i1 undef, label %return, label %if.end4
>> +
>> +if.end: ; preds = %entry
>> + br i1 icmp ne (i32 (i8*, i8*, i8* (i8*)*, i8*)* @_ZN11__sanitizer19real_pthread_createEPvS0_PFS0_S0_ES0_, i32 (i8*, i8*, i8* (i8*)*, i8*)* null), label %if.end4, label %return
>> +
>> +if.end4: ; preds = %if.end, %land.lhs.true
>> + %call5 = tail call i8* @_ZN11__sanitizer21internal_start_threadEPFvPvES0_(void (i8*)* nonnull @_ZN11__sanitizer16BackgroundThreadEPv, i8* null) #7
>> + unreachable
>> +
>> +return: ; preds = %if.end, %land.lhs.true
>> + ret void
>> +}
>> +
>> +declare extern_weak signext i32 @_ZN11__sanitizer19real_pthread_createEPvS0_PFS0_S0_ES0_(i8*, i8*, i8* (i8*)*, i8*) #2
>> +
>> +declare i8* @_ZN11__sanitizer21internal_start_threadEPFvPvES0_(void (i8*)*, i8*) local_unnamed_addr #2
>> +
>> +declare hidden void @_ZN11__sanitizer16BackgroundThreadEPv(i8* nocapture readnone) #5
>> +
>> +attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="ppc64le" "target-features"="+altivec,+bpermd,+crypto,+direct-move,+extdiv,+power8-vector,+vsx,-qpx" "unsafe-fp-math"="false" "use-soft-float"="false" }
>> +attributes #7 = { nobuiltin nounwind }
>>
>> Modified: llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll?rev=278287&r1=278286&r2=278287&view=diff
>> ==============================================================================
>> --- llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll (original)
>> +++ llvm/trunk/test/CodeGen/Thumb2/thumb2-ifcvt1.ll Wed Aug 10 15:45:56 2016
>> @@ -1,6 +1,7 @@
>> ; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
>> ; RUN: llc < %s -mtriple=thumbv7-apple-darwin -arm-default-it | FileCheck %s
>> -; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it |FileCheck %s
>> +; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it | FileCheck %s
>> +; RUN: llc < %s -mtriple=thumbv8 -arm-no-restrict-it -enable-tail-merge=0 | FileCheck %s
>> define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
>> ; CHECK-LABEL: t1:
>> ; CHECK: ittt ne
>> @@ -25,9 +26,9 @@ cond_next:
>> define i32 @t2(i32 %a, i32 %b) nounwind {
>> entry:
>> ; CHECK-LABEL: t2:
>> -; CHECK: ite gt
>> -; CHECK: subgt
>> -; CHECK: suble
>> +; CHECK: ite {{gt|le}}
>> +; CHECK-DAG: suble
>> +; CHECK-DAG: subgt
>> %tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
>> br i1 %tmp1434, label %bb17, label %bb.outer
>>
>> @@ -56,6 +57,44 @@ cond_false: ; preds = %bb
>> br i1 %tmp14, label %bb17, label %bb.outer
>>
>> bb17: ; preds = %cond_false, %cond_true, %entry
>> + %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
>> + ret i32 %a_addr.026.1
>> +}
>> +
>> +define i32 @t2_nomerge(i32 %a, i32 %b) nounwind {
>> +entry:
>> +; CHECK-LABEL: t2_nomerge:
>> +; CHECK-NOT: ite {{gt|le}}
>> +; CHECK-NOT: suble
>> +; CHECK-NOT: subgt
>> + %tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
>> + br i1 %tmp1434, label %bb17, label %bb.outer
>> +
>> +bb.outer: ; preds = %cond_false, %entry
>> + %b_addr.021.0.ph = phi i32 [ %b, %entry ], [ %tmp10, %cond_false ] ; <i32> [#uses=5]
>> + %a_addr.026.0.ph = phi i32 [ %a, %entry ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
>> + br label %bb
>> +
>> +bb: ; preds = %cond_true, %bb.outer
>> + %indvar = phi i32 [ 0, %bb.outer ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
>> + %tmp. = sub i32 0, %b_addr.021.0.ph ; <i32> [#uses=1]
>> + %tmp.40 = mul i32 %indvar, %tmp. ; <i32> [#uses=1]
>> + %a_addr.026.0 = add i32 %tmp.40, %a_addr.026.0.ph ; <i32> [#uses=6]
>> + %tmp3 = icmp sgt i32 %a_addr.026.0, %b_addr.021.0.ph ; <i1> [#uses=1]
>> + br i1 %tmp3, label %cond_true, label %cond_false
>> +
>> +cond_true: ; preds = %bb
>> + %tmp7 = sub i32 %a_addr.026.0, %b_addr.021.0.ph ; <i32> [#uses=2]
>> + %tmp1437 = icmp eq i32 %tmp7, %b_addr.021.0.ph ; <i1> [#uses=1]
>> + %indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
>> + br i1 %tmp1437, label %bb17, label %bb
>> +
>> +cond_false: ; preds = %bb
>> + %tmp10 = sub i32 %b_addr.021.0.ph, %a_addr.026.0 ; <i32> [#uses=2]
>> + %tmp14 = icmp eq i32 %b_addr.021.0.ph, %tmp10 ; <i1> [#uses=1]
>> + br i1 %tmp14, label %bb17, label %bb.outer
>> +
>> +bb17: ; preds = %cond_false, %cond_true, %entry
>> %a_addr.026.1 = phi i32 [ %a, %entry ], [ %tmp7, %cond_true ], [ %a_addr.026.0, %cond_false ] ; <i32> [#uses=1]
>> ret i32 %a_addr.026.1
>> }
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list