[llvm] r271925 - [MBP] Reduce code size by running tail merging in MBP.
Diana Picus via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 7 09:11:29 PDT 2016
On 7 June 2016 at 18:24, Haicheng Wu <haicheng at codeaurora.org> wrote:
> Hi Diana,
>
> Sorry for this. I revert the patch in r272017 . If possible, would you please send me a reduced test case? Thank you.
Thanks, I'm currently reducing it on a machine similar to the bot, but
it might take a while. I'll get back to you when it's done.
>
> Best,
>
> Haicheng
>
> -----Original Message-----
> From: Diana Picus [mailto:diana.picus at linaro.org]
> Sent: Tuesday, June 07, 2016 10:00 AM
> To: Haicheng Wu <haicheng at codeaurora.org>
> Cc: llvm-commits <llvm-commits at lists.llvm.org>; Renato Golin <renato.golin at linaro.org>
> Subject: Re: [llvm] r271925 - [MBP] Reduce code size by running tail merging in MBP.
>
> Hi,
>
> We've done some investigations and it seems this commit broke our thumb selfhosting bot:
> http://lab.llvm.org:8011/builders/clang-cmake-thumbv7-a15-full-sh/builds/3602
>
> For some reason, we can't revert it. Can you help us fix this?
>
> I've attached the source file + script to reproduce the issue. I can work on a smaller reproducer if that helps.
>
> Thanks,
> Diana
>
> On 6 June 2016 at 21:36, Haicheng Wu via llvm-commits <llvm-commits at lists.llvm.org> wrote:
>> Author: haicheng
>> Date: Mon Jun 6 13:36:07 2016
>> New Revision: 271925
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=271925&view=rev
>> Log:
>> [MBP] Reduce code size by running tail merging in MBP.
>>
>> The code layout that TailMerging (inside BranchFolding) works on is
>> not the final layout optimized based on the branch probability.
>> Generally, after BlockPlacement, many new merging opportunities emerge.
>>
>> This patch calls Tail Merging after MBP and calls MBP again if Tail
>> Merging merges anything.
>>
>> Differential Revision: http://reviews.llvm.org/D20276
>>
>> Added:
>> llvm/trunk/test/CodeGen/AArch64/tailmerging_in_mbp.ll
>> Modified:
>> llvm/trunk/lib/CodeGen/BranchFolding.cpp
>> llvm/trunk/lib/CodeGen/BranchFolding.h
>> llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
>> llvm/trunk/test/CodeGen/ARM/arm-and-tst-peephole.ll
>>
>> Modified: llvm/trunk/lib/CodeGen/BranchFolding.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/BranchFoldi
>> ng.cpp?rev=271925&r1=271924&r2=271925&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/lib/CodeGen/BranchFolding.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/BranchFolding.cpp Mon Jun 6 13:36:07 2016
>> @@ -27,6 +27,7 @@
>> #include "llvm/CodeGen/MachineFunctionPass.h"
>> #include "llvm/CodeGen/MachineJumpTableInfo.h"
>> #include "llvm/CodeGen/MachineMemOperand.h"
>> +#include "llvm/CodeGen/MachineLoopInfo.h"
>> #include "llvm/CodeGen/MachineModuleInfo.h"
>> #include "llvm/CodeGen/MachineRegisterInfo.h"
>> #include "llvm/CodeGen/Passes.h"
>> @@ -137,6 +138,8 @@ void BranchFolder::RemoveDeadBlock(Machi
>> // Remove the block.
>> MF->erase(MBB);
>> FuncletMembership.erase(MBB);
>> + if (MLI)
>> + MLI->removeBlock(MBB);
>> }
>>
>> /// OptimizeImpDefsBlock - If a basic block is just a bunch of
>> implicit_def @@ -193,18 +196,22 @@ bool
>> BranchFolder::OptimizeImpDefsBlock(
>> }
>>
>> /// OptimizeFunction - Perhaps branch folding, tail merging and other
>> -/// CFG optimizations on the given function.
>> +/// CFG optimizations on the given function. Block placement changes
>> +the layout /// and may create new tail merging opportunities.
>> bool BranchFolder::OptimizeFunction(MachineFunction &MF,
>> const TargetInstrInfo *tii,
>> const TargetRegisterInfo *tri,
>> - MachineModuleInfo *mmi) {
>> + MachineModuleInfo *mmi,
>> + MachineLoopInfo *mli, bool
>> + AfterPlacement) {
>> if (!tii) return false;
>>
>> TriedMerging.clear();
>>
>> + AfterBlockPlacement = AfterPlacement;
>> TII = tii;
>> TRI = tri;
>> MMI = mmi;
>> + MLI = mli;
>> RS = nullptr;
>>
>> // Use a RegScavenger to help update liveness when required.
>> @@ -230,7 +237,10 @@ bool BranchFolder::OptimizeFunction(Mach
>> bool MadeChangeThisIteration = true;
>> while (MadeChangeThisIteration) {
>> MadeChangeThisIteration = TailMergeBlocks(MF);
>> - MadeChangeThisIteration |= OptimizeBranches(MF);
>> + // No need to clean up if tail merging does not change anything after the
>> + // block placement.
>> + if (!AfterBlockPlacement || MadeChangeThisIteration)
>> + MadeChangeThisIteration |= OptimizeBranches(MF);
>> if (EnableHoistCommonCode)
>> MadeChangeThisIteration |= HoistCommonCode(MF);
>> MadeChange |= MadeChangeThisIteration; @@ -447,6 +457,11 @@
>> MachineBasicBlock *BranchFolder::SplitMB
>> // Splice the code over.
>> NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
>>
>> + // NewMBB belongs to the same loop as CurMBB.
>> + if (MLI)
>> + if (MachineLoop *ML = MLI->getLoopFor(&CurMBB))
>> + ML->addBasicBlockToLoop(NewMBB, MLI->getBase());
>> +
>> // NewMBB inherits CurMBB's block frequency.
>> MBBFreqInfo.setBlockFreq(NewMBB,
>> MBBFreqInfo.getBlockFreq(&CurMBB));
>>
>> @@ -934,23 +949,27 @@ bool BranchFolder::TailMergeBlocks(Machi
>> if (!EnableTailMerge) return MadeChange;
>>
>> // First find blocks with no successors.
>> - MergePotentials.clear();
>> - for (MachineBasicBlock &MBB : MF) {
>> + // Block placement does not create new tail merging opportunities
>> + for these // blocks.
>> + if (!AfterBlockPlacement) {
>> + MergePotentials.clear();
>> + for (MachineBasicBlock &MBB : MF) {
>> + if (MergePotentials.size() == TailMergeThreshold)
>> + break;
>> + if (!TriedMerging.count(&MBB) && MBB.succ_empty())
>> + MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
>> + }
>> +
>> + // If this is a large problem, avoid visiting the same basic blocks
>> + // multiple times.
>> if (MergePotentials.size() == TailMergeThreshold)
>> - break;
>> - if (!TriedMerging.count(&MBB) && MBB.succ_empty())
>> - MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(MBB), &MBB));
>> - }
>> + for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
>> + TriedMerging.insert(MergePotentials[i].getBlock());
>>
>> - // If this is a large problem, avoid visiting the same basic blocks
>> - // multiple times.
>> - if (MergePotentials.size() == TailMergeThreshold)
>> - for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
>> - TriedMerging.insert(MergePotentials[i].getBlock());
>> -
>> - // See if we can do any tail merging on those.
>> - if (MergePotentials.size() >= 2)
>> - MadeChange |= TryTailMergeBlocks(nullptr, nullptr);
>> + // See if we can do any tail merging on those.
>> + if (MergePotentials.size() >= 2)
>> + MadeChange |= TryTailMergeBlocks(nullptr, nullptr); }
>>
>> // Look at blocks (IBB) with multiple predecessors (PBB).
>> // We change each predecessor to a canonical form, by @@ -997,6
>> +1016,17 @@ bool BranchFolder::TailMergeBlocks(Machi
>> if (PBB->hasEHPadSuccessor())
>> continue;
>>
>> + // Bail out if the loop header (IBB) is not the top of the loop chain
>> + // after the block placement. Otherwise, the common tail of IBB's
>> + // predecessors may become the loop top if block placement is called again
>> + // and the predecessors may branch to this common tail.
>> + // FIXME: Relaxed this check if the algorithm of finding loop top is
>> + // changed in MBP.
>> + if (AfterBlockPlacement && MLI)
>> + if (MachineLoop *ML = MLI->getLoopFor(IBB))
>> + if (IBB == ML->getHeader() && ML == MLI->getLoopFor(PBB))
>> + continue;
>> +
>> MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
>> SmallVector<MachineOperand, 4> Cond;
>> if (!TII->AnalyzeBranch(*PBB, TBB, FBB, Cond, true)) {
>>
>> Modified: llvm/trunk/lib/CodeGen/BranchFolding.h
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/BranchFoldi
>> ng.h?rev=271925&r1=271924&r2=271925&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/lib/CodeGen/BranchFolding.h (original)
>> +++ llvm/trunk/lib/CodeGen/BranchFolding.h Mon Jun 6 13:36:07 2016
>> @@ -20,6 +20,7 @@ namespace llvm {
>> class MachineBranchProbabilityInfo;
>> class MachineFunction;
>> class MachineModuleInfo;
>> + class MachineLoopInfo;
>> class RegScavenger;
>> class TargetInstrInfo;
>> class TargetRegisterInfo;
>> @@ -32,10 +33,11 @@ namespace llvm {
>> MBFIWrapper &MBFI,
>> const MachineBranchProbabilityInfo &MBPI);
>>
>> - bool OptimizeFunction(MachineFunction &MF,
>> - const TargetInstrInfo *tii,
>> - const TargetRegisterInfo *tri,
>> - MachineModuleInfo *mmi);
>> + bool OptimizeFunction(MachineFunction &MF, const TargetInstrInfo *tii,
>> + const TargetRegisterInfo *tri, MachineModuleInfo *mmi,
>> + MachineLoopInfo *mli = nullptr,
>> + bool AfterPlacement = false);
>> +
>> private:
>> class MergePotentialsElt {
>> unsigned Hash;
>> @@ -93,11 +95,13 @@ namespace llvm {
>> };
>> std::vector<SameTailElt> SameTails;
>>
>> + bool AfterBlockPlacement;
>> bool EnableTailMerge;
>> bool EnableHoistCommonCode;
>> const TargetInstrInfo *TII;
>> const TargetRegisterInfo *TRI;
>> MachineModuleInfo *MMI;
>> + MachineLoopInfo *MLI;
>> RegScavenger *RS;
>>
>> public:
>>
>> Modified: llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineBloc
>> kPlacement.cpp?rev=271925&r1=271924&r2=271925&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp Mon Jun 6
>> +++ 13:36:07 2016
>> @@ -26,6 +26,8 @@
>>
>> //===-----------------------------------------------------------------
>> -----===//
>>
>> #include "llvm/CodeGen/Passes.h"
>> +#include "llvm/CodeGen/TargetPassConfig.h"
>> +#include "BranchFolding.h"
>> #include "llvm/ADT/DenseMap.h"
>> #include "llvm/ADT/SmallPtrSet.h"
>> #include "llvm/ADT/SmallVector.h"
>> @@ -116,6 +118,12 @@ static cl::opt<unsigned> JumpInstCost("j
>> cl::desc("Cost of jump instructions."),
>> cl::init(1), cl::Hidden);
>>
>> +static cl::opt<bool>
>> +BranchFoldPlacement("branch-fold-placement",
>> + cl::desc("Perform branch folding during placement. "
>> + "Reduces code size."),
>> + cl::init(true), cl::Hidden);
>> +
>> extern cl::opt<unsigned> StaticLikelyProb;
>>
>> namespace {
>> @@ -232,10 +240,10 @@ class MachineBlockPlacement : public Mac
>> const MachineBranchProbabilityInfo *MBPI;
>>
>> /// \brief A handle to the function-wide block frequency pass.
>> - const MachineBlockFrequencyInfo *MBFI;
>> + std::unique_ptr<BranchFolder::MBFIWrapper> MBFI;
>>
>> /// \brief A handle to the loop info.
>> - const MachineLoopInfo *MLI;
>> + MachineLoopInfo *MLI;
>>
>> /// \brief A handle to the target's instruction info.
>> const TargetInstrInfo *TII;
>> @@ -323,6 +331,7 @@ public:
>> AU.addRequired<MachineBlockFrequencyInfo>();
>> AU.addRequired<MachineDominatorTree>();
>> AU.addRequired<MachineLoopInfo>();
>> + AU.addRequired<TargetPassConfig>();
>> MachineFunctionPass::getAnalysisUsage(AU);
>> }
>> };
>> @@ -1462,7 +1471,8 @@ bool MachineBlockPlacement::runOnMachine
>> return false;
>>
>> MBPI = &getAnalysis<MachineBranchProbabilityInfo>();
>> - MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
>> + MBFI = llvm::make_unique<BranchFolder::MBFIWrapper>(
>> + getAnalysis<MachineBlockFrequencyInfo>());
>> MLI = &getAnalysis<MachineLoopInfo>();
>> TII = F.getSubtarget().getInstrInfo();
>> TLI = F.getSubtarget().getTargetLowering();
>> @@ -1470,6 +1480,29 @@ bool MachineBlockPlacement::runOnMachine
>> assert(BlockToChain.empty());
>>
>> buildCFGChains(F);
>> +
>> + // Changing the layout can create new tail merging opportunities.
>> + TargetPassConfig *PassConfig = &getAnalysis<TargetPassConfig>();
>> + // TailMerge can create jump into if branches that make CFG
>> + irreducible for // HW that requires structurized CFG.
>> + bool EnableTailMerge = !F.getTarget().requiresStructuredCFG() &&
>> + PassConfig->getEnableTailMerge() &&
>> + BranchFoldPlacement; // No tail merging
>> + opportunities if the block number is less than four.
>> + if (F.size() > 3 && EnableTailMerge) {
>> + BranchFolder BF(/*EnableTailMerge=*/true, /*CommonHoist=*/false, *MBFI,
>> + *MBPI);
>> +
>> + if (BF.OptimizeFunction(F, TII, F.getSubtarget().getRegisterInfo(),
>> + getAnalysisIfAvailable<MachineModuleInfo>(), MLI,
>> + /*AfterBlockPlacement=*/true)) {
>> + // Redo the layout if tail merging creates/removes/moves blocks.
>> + BlockToChain.clear();
>> + ChainAllocator.DestroyAll();
>> + buildCFGChains(F);
>> + }
>> + }
>> +
>> optimizeBranches(F);
>> alignBlocks(F);
>>
>>
>> Added: llvm/trunk/test/CodeGen/AArch64/tailmerging_in_mbp.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/AArch64/ta
>> ilmerging_in_mbp.ll?rev=271925&view=auto
>> ======================================================================
>> ========
>> --- llvm/trunk/test/CodeGen/AArch64/tailmerging_in_mbp.ll (added)
>> +++ llvm/trunk/test/CodeGen/AArch64/tailmerging_in_mbp.ll Mon Jun 6
>> +++ 13:36:07 2016
>> @@ -0,0 +1,63 @@
>> +; RUN: llc <%s -march=aarch64 | FileCheck %s
>> +
>> +; CHECK-LABEL: test:
>> +; CHECK: .LBB0_7
>> +; CHECK: b.hi .LBB0_2
>> +; CHECK-NEXT: b .LBB0_9
>> +; CHECK-NEXT: .LBB0_8
>> +; CHECK-NEXT: mov x8, x9
>> +; CHECK-NEXT: .LBB0_9
>> +define i64 @test(i64 %n, i64* %a, i64* %b, i64* %c, i64* %d, i64* %e,
>> +i64* %f) {
>> +entry:
>> + %cmp28 = icmp sgt i64 %n, 1
>> + br i1 %cmp28, label %for.body, label %for.end
>> +
>> +for.body: ; preds = %for.body.lr.ph, %if.end
>> + %j = phi i64 [ %n, %entry ], [ %div, %if.end ]
>> + %div = lshr i64 %j, 1
>> + %a.arrayidx = getelementptr inbounds i64, i64* %a, i64 %div
>> + %a.j = load i64, i64* %a.arrayidx
>> + %b.arrayidx = getelementptr inbounds i64, i64* %b, i64 %div
>> + %b.j = load i64, i64* %b.arrayidx
>> + %cmp.i = icmp slt i64 %a.j, %b.j
>> + br i1 %cmp.i, label %for.end.loopexit, label %cond.false.i
>> +
>> +cond.false.i: ; preds = %for.body
>> + %cmp4.i = icmp sgt i64 %a.j, %b.j
>> + br i1 %cmp4.i, label %if.end, label %cond.false6.i
>> +
>> +cond.false6.i: ; preds = %cond.false.i
>> + %c.arrayidx = getelementptr inbounds i64, i64* %c, i64 %div
>> + %c.j = load i64, i64* %c.arrayidx
>> + %d.arrayidx = getelementptr inbounds i64, i64* %d, i64 %div
>> + %d.j = load i64, i64* %d.arrayidx
>> + %cmp9.i = icmp slt i64 %c.j, %d.j
>> + br i1 %cmp9.i, label %for.end.loopexit, label %cond.false11.i
>> +
>> +cond.false11.i: ; preds = %cond.false6.i
>> + %cmp14.i = icmp sgt i64 %c.j, %d.j
>> + br i1 %cmp14.i, label %if.end, label %cond.false12.i
>> +
>> +cond.false12.i: ; preds = %cond.false11.i
>> + %e.arrayidx = getelementptr inbounds i64, i64* %e, i64 %div
>> + %e.j = load i64, i64* %e.arrayidx
>> + %f.arrayidx = getelementptr inbounds i64, i64* %f, i64 %div
>> + %f.j = load i64, i64* %f.arrayidx
>> + %cmp19.i = icmp sgt i64 %e.j, %f.j
>> + br i1 %cmp19.i, label %if.end, label %for.end.loopexit
>> +
>> +if.end: ; preds = %cond.false12.i, %cond.false11.i, %cond.false.i
>> + %cmp = icmp ugt i64 %j, 3
>> + br i1 %cmp, label %for.body, label %for.end.loopexit
>> +
>> +for.end.loopexit: ; preds = %cond.false12.i, %cond.false6.i, %for.body, %if.end
>> + %j.0.lcssa.ph = phi i64 [ %j, %cond.false12.i ], [ %j,
>> +%cond.false6.i ], [ %j, %for.body ], [ %div, %if.end ]
>> + br label %for.end
>> +
>> +for.end: ; preds = %for.end.loopexit, %entry
>> + %j.0.lcssa = phi i64 [ %n, %entry ], [ %j.0.lcssa.ph,
>> +%for.end.loopexit ]
>> + %j.2 = add i64 %j.0.lcssa, %n
>> + %j.3 = mul i64 %j.2, %n
>> + %j.4 = add i64 %j.3, 10
>> + ret i64 %j.4
>> +}
>>
>> Modified: llvm/trunk/test/CodeGen/ARM/arm-and-tst-peephole.ll
>> URL:
>> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/ARM/arm-an
>> d-tst-peephole.ll?rev=271925&r1=271924&r2=271925&view=diff
>> ======================================================================
>> ========
>> --- llvm/trunk/test/CodeGen/ARM/arm-and-tst-peephole.ll (original)
>> +++ llvm/trunk/test/CodeGen/ARM/arm-and-tst-peephole.ll Mon Jun 6
>> +++ 13:36:07 2016
>> @@ -49,7 +49,7 @@ tailrecurse.switch:
>> ; V8-NEXT: beq
>> ; V8-NEXT: %tailrecurse.switch
>> ; V8: cmp
>> -; V8-NEXT: bne
>> +; V8-NEXT: beq
>> ; V8-NEXT: b
>> ; The trailing space in the last line checks that the branch is unconditional
>> switch i32 %and, label %sw.epilog [
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list