[llvm] ac8da31 - [PGO][PGSO] Handle MBFIWrapper
Hiroshi Yamauchi via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 31 09:37:17 PST 2020
Author: Hiroshi Yamauchi
Date: 2020-01-31T09:36:55-08:00
New Revision: ac8da31a0f9895685e34e661d44075f46b559fbe
URL: https://github.com/llvm/llvm-project/commit/ac8da31a0f9895685e34e661d44075f46b559fbe
DIFF: https://github.com/llvm/llvm-project/commit/ac8da31a0f9895685e34e661d44075f46b559fbe.diff
LOG: [PGO][PGSO] Handle MBFIWrapper
Some code gen passes use MBFIWrapper to keep track of the frequency of new
blocks. This was not taken into account and could lead to incorrect frequencies
as MBFI silently returns zero frequency for unknown/new blocks.
Add a variant for MBFIWrapper in the PGSO query interface.
Depends on D73494.
Added:
Modified:
llvm/include/llvm/CodeGen/MachineSizeOpts.h
llvm/include/llvm/CodeGen/TailDuplicator.h
llvm/include/llvm/Transforms/Utils/SizeOpts.h
llvm/lib/CodeGen/BranchFolding.cpp
llvm/lib/CodeGen/MachineBlockPlacement.cpp
llvm/lib/CodeGen/MachineSizeOpts.cpp
llvm/lib/CodeGen/TailDuplication.cpp
llvm/lib/CodeGen/TailDuplicator.cpp
llvm/lib/Transforms/Utils/SizeOpts.cpp
llvm/test/CodeGen/X86/tail-opts.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/MachineSizeOpts.h b/llvm/include/llvm/CodeGen/MachineSizeOpts.h
index 3b02d0860ea1..07bbbad8d9c9 100644
--- a/llvm/include/llvm/CodeGen/MachineSizeOpts.h
+++ b/llvm/include/llvm/CodeGen/MachineSizeOpts.h
@@ -21,6 +21,7 @@ class ProfileSummaryInfo;
class MachineBasicBlock;
class MachineBlockFrequencyInfo;
class MachineFunction;
+class MBFIWrapper;
/// Returns true if machine function \p MF is suggested to be size-optimized
/// based on the profile.
@@ -33,6 +34,12 @@ bool shouldOptimizeForSize(const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI,
PGSOQueryType QueryType = PGSOQueryType::Other);
+/// Returns true if machine basic block \p MBB is suggested to be size-optimized
+/// based on the profile.
+bool shouldOptimizeForSize(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ MBFIWrapper *MBFIWrapper,
+ PGSOQueryType QueryType = PGSOQueryType::Other);
} // end namespace llvm
diff --git a/llvm/include/llvm/CodeGen/TailDuplicator.h b/llvm/include/llvm/CodeGen/TailDuplicator.h
index e0623a3193e5..5b2de1c32072 100644
--- a/llvm/include/llvm/CodeGen/TailDuplicator.h
+++ b/llvm/include/llvm/CodeGen/TailDuplicator.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallVector.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include <utility>
#include <vector>
@@ -42,7 +43,7 @@ class TailDuplicator {
const MachineModuleInfo *MMI;
MachineRegisterInfo *MRI;
MachineFunction *MF;
- const MachineBlockFrequencyInfo *MBFI;
+ MBFIWrapper *MBFI;
ProfileSummaryInfo *PSI;
bool PreRegAlloc;
bool LayoutMode;
@@ -69,7 +70,7 @@ class TailDuplicator {
/// default implies using the command line value TailDupSize.
void initMF(MachineFunction &MF, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPI,
- const MachineBlockFrequencyInfo *MBFI,
+ MBFIWrapper *MBFI,
ProfileSummaryInfo *PSI,
bool LayoutMode, unsigned TailDupSize = 0);
diff --git a/llvm/include/llvm/Transforms/Utils/SizeOpts.h b/llvm/include/llvm/Transforms/Utils/SizeOpts.h
index 1137c89195a6..daead1443353 100644
--- a/llvm/include/llvm/Transforms/Utils/SizeOpts.h
+++ b/llvm/include/llvm/Transforms/Utils/SizeOpts.h
@@ -63,10 +63,9 @@ bool shouldFuncOptimizeForSizeImpl(const FuncT *F, ProfileSummaryInfo *PSI,
F, PSI, *BFI);
}
-template<typename AdapterT, typename BlockT, typename BFIT>
-bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI,
+template<typename AdapterT, typename BlockTOrBlockFreq, typename BFIT>
+bool shouldOptimizeForSizeImpl(BlockTOrBlockFreq BBOrBlockFreq, ProfileSummaryInfo *PSI,
BFIT *BFI, PGSOQueryType QueryType) {
- assert(BB);
if (!PSI || !BFI || !PSI->hasProfileSummary())
return false;
if (ForcePGSO)
@@ -81,11 +80,11 @@ bool shouldOptimizeForSizeImpl(const BlockT *BB, ProfileSummaryInfo *PSI,
if (PGSOColdCodeOnly ||
(PGSOLargeWorkingSetSizeOnly && !PSI->hasLargeWorkingSetSize())) {
// Even if the working set size isn't large, size-optimize cold code.
- return AdapterT::isColdBlock(BB, PSI, BFI);
+ return AdapterT::isColdBlock(BBOrBlockFreq, PSI, BFI);
}
return !AdapterT::isHotBlockNthPercentile(
PSI->hasSampleProfile() ? PgsoCutoffSampleProf : PgsoCutoffInstrProf,
- BB, PSI, BFI);
+ BBOrBlockFreq, PSI, BFI);
}
/// Returns true if function \p F is suggested to be size-optimized based on the
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index eb4e076a06a4..e355e16e7768 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -655,8 +655,8 @@ ProfitableToMerge(MachineBasicBlock *MBB1, MachineBasicBlock *MBB2,
MachineFunction *MF = MBB1->getParent();
bool OptForSize =
MF->getFunction().hasOptSize() ||
- (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo.getMBFI()) &&
- llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo.getMBFI()));
+ (llvm::shouldOptimizeForSize(MBB1, PSI, &MBBFreqInfo) &&
+ llvm::shouldOptimizeForSize(MBB2, PSI, &MBBFreqInfo));
return EffectiveTailLen >= 2 && OptForSize &&
(FullBlockTail1 || FullBlockTail2);
}
@@ -1511,7 +1511,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool OptForSize =
MF.getFunction().hasOptSize() ||
- llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo.getMBFI());
+ llvm::shouldOptimizeForSize(MBB, PSI, &MBBFreqInfo);
if (!IsEmptyBlock(MBB) && MBB->pred_size() == 1 && OptForSize) {
// Changing "Jcc foo; foo: jmp bar;" into "Jcc bar;" might change the branch
// direction, thereby defeating careful block placement and regressing
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 9dfa8d76e09a..06e2f46f892f 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -2082,8 +2082,7 @@ MachineBlockPlacement::findBestLoopTop(const MachineLoop &L,
// In practice this never happens though: there always seems to be a preheader
// that can fallthrough and that is also placed before the header.
bool OptForSize = F->getFunction().hasOptSize() ||
- llvm::shouldOptimizeForSize(L.getHeader(), PSI,
- &MBFI->getMBFI());
+ llvm::shouldOptimizeForSize(L.getHeader(), PSI, MBFI.get());
if (OptForSize)
return L.getHeader();
@@ -2841,7 +2840,7 @@ void MachineBlockPlacement::alignBlocks() {
continue;
// If the global profiles indicates so, don't align it.
- if (llvm::shouldOptimizeForSize(ChainBB, PSI, &MBFI->getMBFI()) &&
+ if (llvm::shouldOptimizeForSize(ChainBB, PSI, MBFI.get()) &&
!TLI->alignLoopsWithOptSize())
continue;
@@ -3088,7 +3087,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
if (OptForSize)
TailDupSize = 1;
bool PreRegAlloc = false;
- TailDup.initMF(MF, PreRegAlloc, MBPI, &MBFI->getMBFI(), PSI,
+ TailDup.initMF(MF, PreRegAlloc, MBPI, MBFI.get(), PSI,
/* LayoutMode */ true, TailDupSize);
precomputeTriangleChains();
}
diff --git a/llvm/lib/CodeGen/MachineSizeOpts.cpp b/llvm/lib/CodeGen/MachineSizeOpts.cpp
index aff67f9cfd55..a181b540d180 100644
--- a/llvm/lib/CodeGen/MachineSizeOpts.cpp
+++ b/llvm/lib/CodeGen/MachineSizeOpts.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/CodeGen/MachineSizeOpts.h"
+#include "llvm/CodeGen/MBFIWrapper.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
@@ -33,6 +34,13 @@ bool isColdBlock(const MachineBasicBlock *MBB,
return Count && PSI->isColdCount(*Count);
}
+bool isColdBlock(BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
+ return Count && PSI->isColdCount(*Count);
+}
+
/// Like ProfileSummaryInfo::isHotBlockNthPercentile but for MachineBasicBlock.
static bool isHotBlockNthPercentile(int PercentileCutoff,
const MachineBasicBlock *MBB,
@@ -42,6 +50,14 @@ static bool isHotBlockNthPercentile(int PercentileCutoff,
return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
}
+static bool isHotBlockNthPercentile(int PercentileCutoff,
+ BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ auto Count = MBFI->getProfileCountFromFreq(BlockFreq.getFrequency());
+ return Count && PSI->isHotCountNthPercentile(PercentileCutoff, *Count);
+}
+
/// Like ProfileSummaryInfo::isFunctionColdInCallGraph but for
/// MachineFunction.
bool isFunctionColdInCallGraph(
@@ -95,6 +111,11 @@ struct MachineBasicBlockBFIAdapter {
const MachineBlockFrequencyInfo *MBFI) {
return machine_size_opts_detail::isColdBlock(MBB, PSI, MBFI);
}
+ static bool isColdBlock(BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return machine_size_opts_detail::isColdBlock(BlockFreq, PSI, MBFI);
+ }
static bool isHotBlockNthPercentile(int CutOff,
const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI,
@@ -102,6 +123,13 @@ struct MachineBasicBlockBFIAdapter {
return machine_size_opts_detail::isHotBlockNthPercentile(
CutOff, MBB, PSI, MBFI);
}
+ static bool isHotBlockNthPercentile(int CutOff,
+ BlockFrequency BlockFreq,
+ ProfileSummaryInfo *PSI,
+ const MachineBlockFrequencyInfo *MBFI) {
+ return machine_size_opts_detail::isHotBlockNthPercentile(
+ CutOff, BlockFreq, PSI, MBFI);
+ }
};
} // end anonymous namespace
@@ -117,6 +145,19 @@ bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
ProfileSummaryInfo *PSI,
const MachineBlockFrequencyInfo *MBFI,
PGSOQueryType QueryType) {
+ assert(MBB);
return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
MBB, PSI, MBFI, QueryType);
}
+
+bool llvm::shouldOptimizeForSize(const MachineBasicBlock *MBB,
+ ProfileSummaryInfo *PSI,
+ MBFIWrapper *MBFIW,
+ PGSOQueryType QueryType) {
+ assert(MBB);
+ if (!PSI || !MBFIW)
+ return false;
+ BlockFrequency BlockFreq = MBFIW->getBlockFreq(MBB);
+ return shouldOptimizeForSizeImpl<MachineBasicBlockBFIAdapter>(
+ BlockFreq, PSI, &MBFIW->getMBFI(), QueryType);
+}
diff --git a/llvm/lib/CodeGen/TailDuplication.cpp b/llvm/lib/CodeGen/TailDuplication.cpp
index 648bf48b7d17..20892a79d35f 100644
--- a/llvm/lib/CodeGen/TailDuplication.cpp
+++ b/llvm/lib/CodeGen/TailDuplication.cpp
@@ -31,6 +31,7 @@ namespace {
class TailDuplicateBase : public MachineFunctionPass {
TailDuplicator Duplicator;
+ std::unique_ptr<MBFIWrapper> MBFIW;
bool PreRegAlloc;
public:
TailDuplicateBase(char &PassID, bool PreRegAlloc)
@@ -88,7 +89,10 @@ bool TailDuplicateBase::runOnMachineFunction(MachineFunction &MF) {
auto *MBFI = (PSI && PSI->hasProfileSummary()) ?
&getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI() :
nullptr;
- Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI, PSI, /*LayoutMode=*/false);
+ if (MBFI)
+ MBFIW = std::make_unique<MBFIWrapper>(*MBFI);
+ Duplicator.initMF(MF, PreRegAlloc, MBPI, MBFI ? MBFIW.get() : nullptr, PSI,
+ /*LayoutMode=*/false);
bool MadeChange = false;
while (Duplicator.tailDuplicateBlocks())
diff --git a/llvm/lib/CodeGen/TailDuplicator.cpp b/llvm/lib/CodeGen/TailDuplicator.cpp
index 45eef7d84db6..87db1647034b 100644
--- a/llvm/lib/CodeGen/TailDuplicator.cpp
+++ b/llvm/lib/CodeGen/TailDuplicator.cpp
@@ -80,7 +80,7 @@ static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
const MachineBranchProbabilityInfo *MBPIin,
- const MachineBlockFrequencyInfo *MBFIin,
+ MBFIWrapper *MBFIin,
ProfileSummaryInfo *PSIin,
bool LayoutModeIn, unsigned TailDupSizeIn) {
MF = &MFin;
diff --git a/llvm/lib/Transforms/Utils/SizeOpts.cpp b/llvm/lib/Transforms/Utils/SizeOpts.cpp
index d2a400027d4b..a959cc6ce1a4 100644
--- a/llvm/lib/Transforms/Utils/SizeOpts.cpp
+++ b/llvm/lib/Transforms/Utils/SizeOpts.cpp
@@ -84,6 +84,7 @@ bool llvm::shouldOptimizeForSize(const Function *F, ProfileSummaryInfo *PSI,
bool llvm::shouldOptimizeForSize(const BasicBlock *BB, ProfileSummaryInfo *PSI,
BlockFrequencyInfo *BFI,
PGSOQueryType QueryType) {
+ assert(BB);
return shouldOptimizeForSizeImpl<BasicBlockBFIAdapter>(BB, PSI, BFI,
QueryType);
}
diff --git a/llvm/test/CodeGen/X86/tail-opts.ll b/llvm/test/CodeGen/X86/tail-opts.ll
index 32ad04a65fae..89c26df49b87 100644
--- a/llvm/test/CodeGen/X86/tail-opts.ll
+++ b/llvm/test/CodeGen/X86/tail-opts.ll
@@ -846,6 +846,72 @@ cont4:
ret void
}
+; This triggers a situation where a new block (bb4 is split) is created and then
+; would be passed to the PGSO interface llvm::shouldOptimizeForSize().
+ at GV = global i32 0
+define void @bfi_new_block_pgso(i32 %c) nounwind {
+; CHECK-LABEL: bfi_new_block_pgso:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: testl %edi, %edi
+; CHECK-NEXT: je .LBB14_4
+; CHECK-NEXT: # %bb.1: # %bb1
+; CHECK-NEXT: pushq %rax
+; CHECK-NEXT: cmpl $16, %edi
+; CHECK-NEXT: je .LBB14_6
+; CHECK-NEXT: # %bb.2: # %bb1
+; CHECK-NEXT: cmpl $17, %edi
+; CHECK-NEXT: je .LBB14_7
+; CHECK-NEXT: # %bb.3: # %bb4
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: jmp tail_call_me # TAILCALL
+; CHECK-NEXT: .LBB14_4: # %bb5
+; CHECK-NEXT: cmpl $128, %edi
+; CHECK-NEXT: jne .LBB14_8
+; CHECK-NEXT: # %bb.5: # %return
+; CHECK-NEXT: retq
+; CHECK-NEXT: .LBB14_6: # %bb3
+; CHECK-NEXT: movl $0, {{.*}}(%rip)
+; CHECK-NEXT: .LBB14_7: # %bb4
+; CHECK-NEXT: callq func
+; CHECK-NEXT: popq %rax
+; CHECK-NEXT: .LBB14_8: # %bb6
+; CHECK-NEXT: jmp tail_call_me # TAILCALL
+entry:
+ %0 = icmp eq i32 %c, 0
+ br i1 %0, label %bb5, label %bb1
+
+bb1:
+ switch i32 %c, label %bb4 [
+ i32 16, label %bb3
+ i32 17, label %bb2
+ ]
+
+bb2:
+ call void @func()
+ br label %bb4
+
+bb3:
+ store i32 0, i32* @GV
+ call void @func()
+ br label %bb4
+
+bb4:
+ tail call void @tail_call_me()
+ br label %return
+
+bb5:
+ switch i32 %c, label %bb6 [
+ i32 128, label %return
+ ]
+
+bb6:
+ tail call void @tail_call_me()
+ br label %return
+
+return:
+ ret void
+}
+
!llvm.module.flags = !{!0}
!0 = !{i32 1, !"ProfileSummary", !1}
!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
More information about the llvm-commits
mailing list