[llvm] [CodeGen][NPM] Port MachineBlockPlacement to NPM (PR #129828)
Akshat Oke via llvm-commits
llvm-commits at lists.llvm.org
Mon Mar 10 23:42:02 PDT 2025
https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/129828
>From 0f98512f573eae3288726bfe7495c1443e374cae Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 5 Mar 2025 05:00:50 +0000
Subject: [PATCH 1/4] [CodeGen][NPM] Port MachineBlockPlacement to NPM
---
.../llvm/CodeGen/MachineBlockPlacement.h | 30 +++++
llvm/include/llvm/InitializePasses.h | 2 +-
llvm/include/llvm/Passes/CodeGenPassBuilder.h | 1 +
.../llvm/Passes/MachinePassRegistry.def | 15 ++-
llvm/lib/CodeGen/CodeGen.cpp | 2 +-
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 119 +++++++++++++-----
llvm/lib/Passes/PassBuilder.cpp | 1 +
.../AArch64/pauthlr-prologue-duplication.mir | 1 +
.../CodeGen/AMDGPU/loop_header_nopred.mir | 2 +
llvm/test/CodeGen/X86/block-placement.mir | 1 +
10 files changed, 138 insertions(+), 36 deletions(-)
create mode 100644 llvm/include/llvm/CodeGen/MachineBlockPlacement.h
diff --git a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
new file mode 100644
index 0000000000000..8003b52fa6a3c
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
@@ -0,0 +1,30 @@
+//===- llvm/CodeGen/MachineBlockPlacement.h ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H
+#define LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class MachineBlockPlacementPass
+ : public PassInfoMixin<MachineBlockPlacementPass> {
+
+ bool AllowTailMerge = true;
+
+public:
+ MachineBlockPlacementPass(bool AllowTailMerge)
+ : AllowTailMerge(AllowTailMerge) {}
+ PreservedAnalyses run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index a05e876806ab5..a27b5630b308e 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -184,7 +184,7 @@ void initializeMIRCanonicalizerPass(PassRegistry &);
void initializeMIRNamerPass(PassRegistry &);
void initializeMIRPrintingPassPass(PassRegistry &);
void initializeMachineBlockFrequencyInfoWrapperPassPass(PassRegistry &);
-void initializeMachineBlockPlacementPass(PassRegistry &);
+void initializeMachineBlockPlacementLegacyPass(PassRegistry &);
void initializeMachineBlockPlacementStatsPass(PassRegistry &);
void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &);
void initializeMachineCFGPrinterPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 25899d04dc664..d5abc672dc02b 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -46,6 +46,7 @@
#include "llvm/CodeGen/LocalStackSlotAllocation.h"
#include "llvm/CodeGen/LowerEmuTLS.h"
#include "llvm/CodeGen/MIRPrinter.h"
+#include "llvm/CodeGen/MachineBlockPlacement.h"
#include "llvm/CodeGen/MachineCSE.h"
#include "llvm/CodeGen/MachineCopyPropagation.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index f99a5f2c74bf3..11b8ff81211d9 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -193,6 +193,20 @@ MACHINE_FUNCTION_PASS("verify<machine-trace-metrics>", MachineTraceMetricsVerifi
#define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER, \
PARAMS)
#endif
+
+MACHINE_FUNCTION_PASS_WITH_PARAMS(
+ "block-placement", "MachineBlockPlacementPass",
+ [](bool NoTailMerge) {
+ // Tail merging is enabled by default, so this option
+ // is to disable it.
+ return MachineBlockPlacementPass(!NoTailMerge);
+ },
+ [](StringRef Params) {
+ return parseSinglePassOption(Params, "no-tail-merge",
+ "MachineBlockPlacementPass");
+ },
+ "no-tail-merge")
+
MACHINE_FUNCTION_PASS_WITH_PARAMS(
"machine-sink", "MachineSinkingPass",
[](bool EnableSinkAndFold) {
@@ -242,7 +256,6 @@ DUMMY_MACHINE_MODULE_PASS("mir-strip-debug", StripDebugMachineModulePass)
#endif
DUMMY_MACHINE_FUNCTION_PASS("bbsections-prepare", BasicBlockSectionsPass)
DUMMY_MACHINE_FUNCTION_PASS("bbsections-profile-reader", BasicBlockSectionsProfileReaderPass)
-DUMMY_MACHINE_FUNCTION_PASS("block-placement", MachineBlockPlacementPass)
DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass)
DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass)
DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index beb7fb284a376..daa31073e7151 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -72,7 +72,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
initializeMIRNamerPass(Registry);
initializeMIRProfileLoaderPassPass(Registry);
initializeMachineBlockFrequencyInfoWrapperPassPass(Registry);
- initializeMachineBlockPlacementPass(Registry);
+ initializeMachineBlockPlacementLegacyPass(Registry);
initializeMachineBlockPlacementStatsPass(Registry);
initializeMachineCFGPrinterPass(Registry);
initializeMachineCSELegacyPass(Registry);
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 9ccfadc318fa4..e968693e8e1ff 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -24,6 +24,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/MachineBlockPlacement.h"
#include "BranchFolding.h"
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/DenseMap.h"
@@ -357,7 +358,7 @@ class BlockChain {
unsigned UnscheduledPredecessors = 0;
};
-class MachineBlockPlacement : public MachineFunctionPass {
+class MachineBlockPlacement {
/// A type for a block filter set.
using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
@@ -409,7 +410,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
ProfileSummaryInfo *PSI = nullptr;
- TargetPassConfig *PassConfig = nullptr;
+ // Tail merging is also determined based on
+ // whether structured CFG is required.
+ bool AllowTailMerge;
+
+ CodeGenOptLevel OptLevel;
/// Duplicator used to duplicate tails during placement.
///
@@ -608,18 +613,48 @@ class MachineBlockPlacement : public MachineFunctionPass {
/// Create a single CFG chain from the current block order.
void createCFGChainExtTsp();
+public:
+ MachineBlockPlacement(const MachineBranchProbabilityInfo *MBPI,
+ MachineLoopInfo *MLI, ProfileSummaryInfo *PSI,
+ std::unique_ptr<MBFIWrapper> MBFI,
+ MachinePostDominatorTree *MPDT, bool AllowTailMerge)
+ : MBPI(MBPI), MBFI(std::move(MBFI)), MLI(MLI), MPDT(MPDT), PSI(PSI),
+ AllowTailMerge(AllowTailMerge) {};
+
+ bool run(MachineFunction &F);
+
+ static bool allowTailDupPlacement(MachineFunction &MF) {
+ return TailDupPlacement && !MF.getTarget().requiresStructuredCFG();
+ }
+};
+
+class MachineBlockPlacementLegacy : public MachineFunctionPass {
public:
static char ID; // Pass identification, replacement for typeid
- MachineBlockPlacement() : MachineFunctionPass(ID) {
- initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
+ MachineBlockPlacementLegacy() : MachineFunctionPass(ID) {
+ initializeMachineBlockPlacementLegacyPass(*PassRegistry::getPassRegistry());
}
- bool runOnMachineFunction(MachineFunction &F) override;
+ bool runOnMachineFunction(MachineFunction &MF) override {
+ if (skipFunction(MF.getFunction()))
+ return false;
- bool allowTailDupPlacement() const {
- assert(F);
- return TailDupPlacement && !F->getTarget().requiresStructuredCFG();
+ auto *MBPI =
+ &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+ auto MBFI = std::make_unique<MBFIWrapper>(
+ getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
+ auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+ auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
+ ? &getAnalysis<MachinePostDominatorTreeWrapperPass>()
+ .getPostDomTree()
+ : nullptr;
+ auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+ auto *PassConfig = &getAnalysis<TargetPassConfig>();
+ bool AllowTailMerge = PassConfig->getEnableTailMerge();
+ return MachineBlockPlacement(MBPI, MLI, PSI, std::move(MBFI), MPDT,
+ AllowTailMerge)
+ .run(MF);
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -636,18 +671,18 @@ class MachineBlockPlacement : public MachineFunctionPass {
} // end anonymous namespace
-char MachineBlockPlacement::ID = 0;
+char MachineBlockPlacementLegacy::ID = 0;
-char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
+char &llvm::MachineBlockPlacementID = MachineBlockPlacementLegacy::ID;
-INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(MachineBlockPlacementLegacy, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
+INITIALIZE_PASS_END(MachineBlockPlacementLegacy, DEBUG_TYPE,
"Branch Probability Basic Block Placement", false, false)
#ifndef NDEBUG
@@ -1130,7 +1165,7 @@ MachineBlockPlacement::getBestTrellisSuccessor(
MachineBasicBlock *Succ1 = BestA.Dest;
MachineBasicBlock *Succ2 = BestB.Dest;
// Check to see if tail-duplication would be profitable.
- if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
+ if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ2) &&
canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
Chain, BlockFilter)) {
@@ -1655,7 +1690,7 @@ MachineBlockPlacement::selectBestSuccessor(const MachineBasicBlock *BB,
if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
Chain, BlockFilter)) {
// If tail duplication would make Succ profitable, place it.
- if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
+ if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ))
DupCandidates.emplace_back(SuccProb, Succ);
continue;
}
@@ -1883,7 +1918,7 @@ void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB,
auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
MachineBasicBlock *BestSucc = Result.BB;
bool ShouldTailDup = Result.ShouldTailDup;
- if (allowTailDupPlacement())
+ if (allowTailDupPlacement(*F))
ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
BB, BestSucc, Chain, BlockFilter));
@@ -1910,7 +1945,7 @@ void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB,
// Placement may have changed tail duplication opportunities.
// Check for that now.
- if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
+ if (allowTailDupPlacement(*F) && BestSucc && ShouldTailDup) {
repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
BlockFilter, PrevUnplacedBlockIt,
PrevUnplacedBlockInFilterIt);
@@ -3466,7 +3501,7 @@ void MachineBlockPlacement::initTailDupThreshold() {
// For aggressive optimization, we can adjust some thresholds to be less
// conservative.
- if (PassConfig->getOptLevel() >= CodeGenOptLevel::Aggressive) {
+ if (OptLevel >= CodeGenOptLevel::Aggressive) {
// At O3 we should be more willing to copy blocks for tail duplication. This
// increases size pressure, so we only do it at O3
// Do this unless only the regular threshold is explicitly set.
@@ -3478,29 +3513,48 @@ void MachineBlockPlacement::initTailDupThreshold() {
// If there's no threshold provided through options, query the target
// information for a threshold instead.
if (TailDupPlacementThreshold.getNumOccurrences() == 0 &&
- (PassConfig->getOptLevel() < CodeGenOptLevel::Aggressive ||
+ (OptLevel < CodeGenOptLevel::Aggressive ||
TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0))
- TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel());
+ TailDupSize = TII->getTailDuplicateSize(OptLevel);
}
-bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
- if (skipFunction(MF.getFunction()))
- return false;
+PreservedAnalyses
+MachineBlockPlacementPass::run(MachineFunction &MF,
+ MachineFunctionAnalysisManager &MFAM) {
+ auto *MBPI = &MFAM.getResult<MachineBranchProbabilityAnalysis>(MF);
+ auto MBFI = std::make_unique<MBFIWrapper>(
+ MFAM.getResult<MachineBlockFrequencyAnalysis>(MF));
+ auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
+ auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
+ ? &MFAM.getResult<MachinePostDominatorTreeAnalysis>(MF)
+ : nullptr;
+ auto *PSI = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(MF)
+ .getCachedResult<ProfileSummaryAnalysis>(
+ *MF.getFunction().getParent());
+ if (!PSI)
+ report_fatal_error("MachineBlockPlacement requires ProfileSummaryAnalysis",
+ false);
+
+ MachineBlockPlacement MBP(MBPI, MLI, PSI, std::move(MBFI), MPDT,
+ AllowTailMerge);
+
+ if (!MBP.run(MF))
+ return PreservedAnalyses::all();
+
+ return getMachineFunctionPassPreservedAnalyses();
+}
+
+bool MachineBlockPlacement::run(MachineFunction &MF) {
// Check for single-block functions and skip them.
if (std::next(MF.begin()) == MF.end())
return false;
F = &MF;
- MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
- MBFI = std::make_unique<MBFIWrapper>(
- getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
- MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+ OptLevel = F->getTarget().getOptLevel();
+
TII = MF.getSubtarget().getInstrInfo();
TLI = MF.getSubtarget().getTargetLowering();
- MPDT = nullptr;
- PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
- PassConfig = &getAnalysis<TargetPassConfig>();
// Initialize PreferredLoopExit to nullptr here since it may never be set if
// there are no MachineLoops.
@@ -3529,8 +3583,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
}
// Apply tail duplication.
- if (allowTailDupPlacement()) {
- MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
+ if (allowTailDupPlacement(*F)) {
if (OptForSize)
TailDupSize = 1;
const bool PreRegAlloc = false;
@@ -3548,8 +3601,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
// TailMerge can create jump into if branches that make CFG irreducible for
// HW that requires structured CFG.
const bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
- PassConfig->getEnableTailMerge() &&
- BranchFoldPlacement && MF.size() > 3;
+ AllowTailMerge && BranchFoldPlacement &&
+ MF.size() > 3;
// No tail merging opportunities if the block number is less than four.
if (EnableTailMerge) {
const unsigned TailMergeSize = TailDupSize + 1;
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 8080059f0bb03..0e01462cfc97e 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -110,6 +110,7 @@
#include "llvm/CodeGen/LowerEmuTLS.h"
#include "llvm/CodeGen/MIRPrinter.h"
#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBlockPlacement.h"
#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
#include "llvm/CodeGen/MachineCSE.h"
#include "llvm/CodeGen/MachineCopyPropagation.h"
diff --git a/llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir b/llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir
index 5e57604263793..7b107f8a24045 100644
--- a/llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir
+++ b/llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir
@@ -1,5 +1,6 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -mtriple aarch64-none-elf -run-pass=block-placement -O3 -o - %s | FileCheck %s
+# RUN: llc -mtriple aarch64-none-elf -passes='require<profile-summary>,function(machine-function(block-placement))' -O3 -o - %s | FileCheck %s
## Check that block-placement does not perform tail duplication on the
## PAUTH_EPILOGUE instruction. If that happened, the two prologues would use
diff --git a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
index efa24a9bee7de..f8ce9882b7a8e 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
+++ b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
@@ -2,6 +2,8 @@
# RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1010 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX10 %s
# RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
+# RUN: llc -mtriple=amdgcn -o - -passes='require<profile-summary>,function(machine-function(block-placement))' -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
+
# Used to fail with
# Assertion `Out && "Header of loop has no predecessors from outside loop?"
diff --git a/llvm/test/CodeGen/X86/block-placement.mir b/llvm/test/CodeGen/X86/block-placement.mir
index 3f69ca0a40ad3..de7a80718a297 100644
--- a/llvm/test/CodeGen/X86/block-placement.mir
+++ b/llvm/test/CodeGen/X86/block-placement.mir
@@ -1,4 +1,5 @@
# RUN: llc -mtriple=x86_64-apple-macosx10.12.0 -O3 -run-pass=block-placement -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-apple-macosx10.12.0 -O3 -passes='require<profile-summary>,function(machine-function(block-placement))' -o - %s | FileCheck %s
--- |
; ModuleID = 'test.ll'
>From ad832b49decbac1146e534f87cb64ff8c4112643 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 5 Mar 2025 07:07:42 +0000
Subject: [PATCH 2/4] fix param in codegenpassbuilder
---
llvm/include/llvm/Passes/CodeGenPassBuilder.h | 2 +-
llvm/include/llvm/Target/CGPassBuilderOption.h | 1 +
2 files changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index d5abc672dc02b..1dfbb6c7eec2e 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1224,7 +1224,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineLateOptimization(
template <typename Derived, typename TargetMachineT>
void CodeGenPassBuilder<Derived, TargetMachineT>::addBlockPlacement(
AddMachinePass &addPass) const {
- addPass(MachineBlockPlacementPass());
+ addPass(MachineBlockPlacementPass(Opt.EnableTailMerge));
// Run a separate pass to collect block placement statistics.
if (Opt.EnableBlockPlacementStats)
addPass(MachineBlockPlacementStatsPass());
diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h
index f006ef1fcb40b..51f25c1360b87 100644
--- a/llvm/include/llvm/Target/CGPassBuilderOption.h
+++ b/llvm/include/llvm/Target/CGPassBuilderOption.h
@@ -50,6 +50,7 @@ struct CGPassBuilderOption {
bool EnableGlobalMergeFunc = false;
bool EnableMachineFunctionSplitter = false;
bool EnableSinkAndFold = false;
+ bool EnableTailMerge = true;
bool MISchedPostRA = false;
bool EarlyLiveIntervals = false;
bool GCEmptyBlocks = false;
>From 02c4c9587a80473f9c06912f818c6797eb871f82 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 5 Mar 2025 08:45:53 +0000
Subject: [PATCH 3/4] add options no-tail-merge and enable-tail-merge
---
llvm/include/llvm/Passes/MachinePassRegistry.def | 13 ++++---------
llvm/lib/Passes/PassBuilder.cpp | 13 +++++++++++++
llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir | 2 +-
3 files changed, 18 insertions(+), 10 deletions(-)
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 11b8ff81211d9..0156cf47e5881 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -196,16 +196,11 @@ MACHINE_FUNCTION_PASS("verify<machine-trace-metrics>", MachineTraceMetricsVerifi
MACHINE_FUNCTION_PASS_WITH_PARAMS(
"block-placement", "MachineBlockPlacementPass",
- [](bool NoTailMerge) {
- // Tail merging is enabled by default, so this option
- // is to disable it.
- return MachineBlockPlacementPass(!NoTailMerge);
+ [](bool AllowTailMerge) {
+ // Default is true.
+ return MachineBlockPlacementPass(AllowTailMerge);
},
- [](StringRef Params) {
- return parseSinglePassOption(Params, "no-tail-merge",
- "MachineBlockPlacementPass");
- },
- "no-tail-merge")
+ parseMachineBlockPlacementPassOptions, "no-tail-merge;enable-tail-merge")
MACHINE_FUNCTION_PASS_WITH_PARAMS(
"machine-sink", "MachineSinkingPass",
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 0e01462cfc97e..66472d706bea9 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1440,6 +1440,19 @@ Expected<bool> parseMachineSinkingPassOptions(StringRef Params) {
"MachineSinkingPass");
}
+Expected<bool> parseMachineBlockPlacementPassOptions(StringRef Params) {
+ bool AllowTailMerge = true;
+ if (Params == "no-tail-merge")
+ AllowTailMerge = false;
+ else if (!Params.empty() && Params != "enable-tail-merge")
+ return make_error<StringError>(
+ formatv("invalid MachineBlockPlacementPass parameter '{0}' ", Params)
+ .str(),
+ inconvertibleErrorCode());
+
+ return AllowTailMerge;
+}
+
} // namespace
/// Tests whether a pass name starts with a valid prefix for a default pipeline
diff --git a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
index f8ce9882b7a8e..6a1f82aed5fb9 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
+++ b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1010 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX10 %s
# RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
-# RUN: llc -mtriple=amdgcn -o - -passes='require<profile-summary>,function(machine-function(block-placement))' -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
+# RUN: llc -mtriple=amdgcn -o - -passes='require<profile-summary>,function(machine-function(block-placement<enable-tail-merge>))' -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
# Used to fail with
# Assertion `Out && "Header of loop has no predecessors from outside loop?"
>From e18d4c24ecef127aa0bb34c96c8f5b736a5132c0 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Tue, 11 Mar 2025 05:03:51 +0000
Subject: [PATCH 4/4] change pass option style
---
.../include/llvm/CodeGen/MachineBlockPlacement.h | 5 +++++
llvm/include/llvm/Passes/MachinePassRegistry.def | 2 +-
llvm/lib/CodeGen/MachineBlockPlacement.cpp | 8 ++++++++
llvm/lib/Passes/PassBuilder.cpp | 16 ++++++++--------
llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir | 2 +-
llvm/test/tools/llc/new-pm/option-parsing.mir | 13 +++++++++++++
6 files changed, 36 insertions(+), 10 deletions(-)
create mode 100644 llvm/test/tools/llc/new-pm/option-parsing.mir
diff --git a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
index 8003b52fa6a3c..733d24ab719a8 100644
--- a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
+++ b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
@@ -23,6 +23,11 @@ class MachineBlockPlacementPass
: AllowTailMerge(AllowTailMerge) {}
PreservedAnalyses run(MachineFunction &MF,
MachineFunctionAnalysisManager &MFAM);
+ static bool isRequired() { return true; }
+
+ void
+ printPipeline(raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) const;
};
} // namespace llvm
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 0156cf47e5881..517401b3f4ebe 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -200,7 +200,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
// Default is true.
return MachineBlockPlacementPass(AllowTailMerge);
},
- parseMachineBlockPlacementPassOptions, "no-tail-merge;enable-tail-merge")
+ parseMachineBlockPlacementPassOptions, "no-tail-merge;tail-merge")
MACHINE_FUNCTION_PASS_WITH_PARAMS(
"machine-sink", "MachineSinkingPass",
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index e968693e8e1ff..40edc47f3e6bb 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -3544,6 +3544,14 @@ MachineBlockPlacementPass::run(MachineFunction &MF,
return getMachineFunctionPassPreservedAnalyses();
}
+void MachineBlockPlacementPass::printPipeline(
+ raw_ostream &OS,
+ function_ref<StringRef(StringRef)> MapClassName2PassName) const {
+ OS << MapClassName2PassName(name());
+ if (!AllowTailMerge)
+ OS << "<no-tail-merge>";
+}
+
bool MachineBlockPlacement::run(MachineFunction &MF) {
// Check for single-block functions and skip them.
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 66472d706bea9..555349cbe0398 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1442,14 +1442,14 @@ Expected<bool> parseMachineSinkingPassOptions(StringRef Params) {
Expected<bool> parseMachineBlockPlacementPassOptions(StringRef Params) {
bool AllowTailMerge = true;
- if (Params == "no-tail-merge")
- AllowTailMerge = false;
- else if (!Params.empty() && Params != "enable-tail-merge")
- return make_error<StringError>(
- formatv("invalid MachineBlockPlacementPass parameter '{0}' ", Params)
- .str(),
- inconvertibleErrorCode());
-
+ if (!Params.empty()) {
+ AllowTailMerge = !Params.consume_front("no-");
+ if (Params != "tail-merge")
+ return make_error<StringError>(
+ formatv("invalid MachineBlockPlacementPass parameter '{0}' ", Params)
+ .str(),
+ inconvertibleErrorCode());
+ }
return AllowTailMerge;
}
diff --git a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
index 6a1f82aed5fb9..05cfe53224582 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
+++ b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
@@ -2,7 +2,7 @@
# RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1010 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX10 %s
# RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
-# RUN: llc -mtriple=amdgcn -o - -passes='require<profile-summary>,function(machine-function(block-placement<enable-tail-merge>))' -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
+# RUN: llc -mtriple=amdgcn -o - -passes='require<profile-summary>,function(machine-function(block-placement<tail-merge>))' -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
# Used to fail with
# Assertion `Out && "Header of loop has no predecessors from outside loop?"
diff --git a/llvm/test/tools/llc/new-pm/option-parsing.mir b/llvm/test/tools/llc/new-pm/option-parsing.mir
new file mode 100644
index 0000000000000..f0353d72cb9ec
--- /dev/null
+++ b/llvm/test/tools/llc/new-pm/option-parsing.mir
@@ -0,0 +1,13 @@
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -passes="block-placement<tail-merge>,block-placement<no-tail-merge>" -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK
+
+# RUN: not llc -mtriple=x86_64-unknown-linux-gnu -passes="block-placement<invalid-opt>" -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT
+
+# CHECK: block-placement,block-placement<no-tail-merge>
+# CHECK-NOT: invalid MachineBlockPlacementPass parameter 'invalid-opt'
+
+---
+name: f
+body: |
+ bb.0:
+ RET 0
+...
More information about the llvm-commits
mailing list