[llvm] [CodeGen][NPM] Port MachineBlockPlacement to NPM (PR #129828)

Akshat Oke via llvm-commits llvm-commits at lists.llvm.org
Mon Mar 10 23:42:02 PDT 2025


https://github.com/optimisan updated https://github.com/llvm/llvm-project/pull/129828

>From 0f98512f573eae3288726bfe7495c1443e374cae Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 5 Mar 2025 05:00:50 +0000
Subject: [PATCH 1/4] [CodeGen][NPM] Port MachineBlockPlacement to NPM

---
 .../llvm/CodeGen/MachineBlockPlacement.h      |  30 +++++
 llvm/include/llvm/InitializePasses.h          |   2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |   1 +
 .../llvm/Passes/MachinePassRegistry.def       |  15 ++-
 llvm/lib/CodeGen/CodeGen.cpp                  |   2 +-
 llvm/lib/CodeGen/MachineBlockPlacement.cpp    | 119 +++++++++++++-----
 llvm/lib/Passes/PassBuilder.cpp               |   1 +
 .../AArch64/pauthlr-prologue-duplication.mir  |   1 +
 .../CodeGen/AMDGPU/loop_header_nopred.mir     |   2 +
 llvm/test/CodeGen/X86/block-placement.mir     |   1 +
 10 files changed, 138 insertions(+), 36 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/MachineBlockPlacement.h

diff --git a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
new file mode 100644
index 0000000000000..8003b52fa6a3c
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
@@ -0,0 +1,30 @@
+//===- llvm/CodeGen/MachineBlockPlacement.h ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H
+#define LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class MachineBlockPlacementPass
+    : public PassInfoMixin<MachineBlockPlacementPass> {
+
+  bool AllowTailMerge = true;
+
+public:
+  MachineBlockPlacementPass(bool AllowTailMerge)
+      : AllowTailMerge(AllowTailMerge) {}
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_MACHINEBLOCKPLACEMENT_H
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index a05e876806ab5..a27b5630b308e 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -184,7 +184,7 @@ void initializeMIRCanonicalizerPass(PassRegistry &);
 void initializeMIRNamerPass(PassRegistry &);
 void initializeMIRPrintingPassPass(PassRegistry &);
 void initializeMachineBlockFrequencyInfoWrapperPassPass(PassRegistry &);
-void initializeMachineBlockPlacementPass(PassRegistry &);
+void initializeMachineBlockPlacementLegacyPass(PassRegistry &);
 void initializeMachineBlockPlacementStatsPass(PassRegistry &);
 void initializeMachineBranchProbabilityInfoWrapperPassPass(PassRegistry &);
 void initializeMachineCFGPrinterPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 25899d04dc664..d5abc672dc02b 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -46,6 +46,7 @@
 #include "llvm/CodeGen/LocalStackSlotAllocation.h"
 #include "llvm/CodeGen/LowerEmuTLS.h"
 #include "llvm/CodeGen/MIRPrinter.h"
+#include "llvm/CodeGen/MachineBlockPlacement.h"
 #include "llvm/CodeGen/MachineCSE.h"
 #include "llvm/CodeGen/MachineCopyPropagation.h"
 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index f99a5f2c74bf3..11b8ff81211d9 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -193,6 +193,20 @@ MACHINE_FUNCTION_PASS("verify<machine-trace-metrics>", MachineTraceMetricsVerifi
 #define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER,    \
                                           PARAMS)
 #endif
+
+MACHINE_FUNCTION_PASS_WITH_PARAMS(
+    "block-placement", "MachineBlockPlacementPass",
+    [](bool NoTailMerge) {
+      // Tail merging is enabled by default, so this option
+      // is to disable it.
+      return MachineBlockPlacementPass(!NoTailMerge);
+    },
+    [](StringRef Params) {
+      return parseSinglePassOption(Params, "no-tail-merge",
+                                   "MachineBlockPlacementPass");
+    },
+    "no-tail-merge")
+
 MACHINE_FUNCTION_PASS_WITH_PARAMS(
     "machine-sink", "MachineSinkingPass",
     [](bool EnableSinkAndFold) {
@@ -242,7 +256,6 @@ DUMMY_MACHINE_MODULE_PASS("mir-strip-debug", StripDebugMachineModulePass)
 #endif
 DUMMY_MACHINE_FUNCTION_PASS("bbsections-prepare", BasicBlockSectionsPass)
 DUMMY_MACHINE_FUNCTION_PASS("bbsections-profile-reader", BasicBlockSectionsProfileReaderPass)
-DUMMY_MACHINE_FUNCTION_PASS("block-placement", MachineBlockPlacementPass)
 DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass)
 DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass)
 DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass)
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index beb7fb284a376..daa31073e7151 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -72,7 +72,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeMIRNamerPass(Registry);
   initializeMIRProfileLoaderPassPass(Registry);
   initializeMachineBlockFrequencyInfoWrapperPassPass(Registry);
-  initializeMachineBlockPlacementPass(Registry);
+  initializeMachineBlockPlacementLegacyPass(Registry);
   initializeMachineBlockPlacementStatsPass(Registry);
   initializeMachineCFGPrinterPass(Registry);
   initializeMachineCSELegacyPass(Registry);
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index 9ccfadc318fa4..e968693e8e1ff 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -24,6 +24,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/CodeGen/MachineBlockPlacement.h"
 #include "BranchFolding.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
@@ -357,7 +358,7 @@ class BlockChain {
   unsigned UnscheduledPredecessors = 0;
 };
 
-class MachineBlockPlacement : public MachineFunctionPass {
+class MachineBlockPlacement {
   /// A type for a block filter set.
   using BlockFilterSet = SmallSetVector<const MachineBasicBlock *, 16>;
 
@@ -409,7 +410,11 @@ class MachineBlockPlacement : public MachineFunctionPass {
 
   ProfileSummaryInfo *PSI = nullptr;
 
-  TargetPassConfig *PassConfig = nullptr;
+  // Tail merging is also determined based on
+  // whether structured CFG is required.
+  bool AllowTailMerge;
+
+  CodeGenOptLevel OptLevel;
 
   /// Duplicator used to duplicate tails during placement.
   ///
@@ -608,18 +613,48 @@ class MachineBlockPlacement : public MachineFunctionPass {
   /// Create a single CFG chain from the current block order.
   void createCFGChainExtTsp();
 
+public:
+  MachineBlockPlacement(const MachineBranchProbabilityInfo *MBPI,
+                        MachineLoopInfo *MLI, ProfileSummaryInfo *PSI,
+                        std::unique_ptr<MBFIWrapper> MBFI,
+                        MachinePostDominatorTree *MPDT, bool AllowTailMerge)
+      : MBPI(MBPI), MBFI(std::move(MBFI)), MLI(MLI), MPDT(MPDT), PSI(PSI),
+        AllowTailMerge(AllowTailMerge) {};
+
+  bool run(MachineFunction &F);
+
+  static bool allowTailDupPlacement(MachineFunction &MF) {
+    return TailDupPlacement && !MF.getTarget().requiresStructuredCFG();
+  }
+};
+
+class MachineBlockPlacementLegacy : public MachineFunctionPass {
 public:
   static char ID; // Pass identification, replacement for typeid
 
-  MachineBlockPlacement() : MachineFunctionPass(ID) {
-    initializeMachineBlockPlacementPass(*PassRegistry::getPassRegistry());
+  MachineBlockPlacementLegacy() : MachineFunctionPass(ID) {
+    initializeMachineBlockPlacementLegacyPass(*PassRegistry::getPassRegistry());
   }
 
-  bool runOnMachineFunction(MachineFunction &F) override;
+  bool runOnMachineFunction(MachineFunction &MF) override {
+    if (skipFunction(MF.getFunction()))
+      return false;
 
-  bool allowTailDupPlacement() const {
-    assert(F);
-    return TailDupPlacement && !F->getTarget().requiresStructuredCFG();
+    auto *MBPI =
+        &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
+    auto MBFI = std::make_unique<MBFIWrapper>(
+        getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
+    auto *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+    auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
+                     ? &getAnalysis<MachinePostDominatorTreeWrapperPass>()
+                            .getPostDomTree()
+                     : nullptr;
+    auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+    auto *PassConfig = &getAnalysis<TargetPassConfig>();
+    bool AllowTailMerge = PassConfig->getEnableTailMerge();
+    return MachineBlockPlacement(MBPI, MLI, PSI, std::move(MBFI), MPDT,
+                                 AllowTailMerge)
+        .run(MF);
   }
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
@@ -636,18 +671,18 @@ class MachineBlockPlacement : public MachineFunctionPass {
 
 } // end anonymous namespace
 
-char MachineBlockPlacement::ID = 0;
+char MachineBlockPlacementLegacy::ID = 0;
 
-char &llvm::MachineBlockPlacementID = MachineBlockPlacement::ID;
+char &llvm::MachineBlockPlacementID = MachineBlockPlacementLegacy::ID;
 
-INITIALIZE_PASS_BEGIN(MachineBlockPlacement, DEBUG_TYPE,
+INITIALIZE_PASS_BEGIN(MachineBlockPlacementLegacy, DEBUG_TYPE,
                       "Branch Probability Basic Block Placement", false, false)
 INITIALIZE_PASS_DEPENDENCY(MachineBranchProbabilityInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTreeWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
-INITIALIZE_PASS_END(MachineBlockPlacement, DEBUG_TYPE,
+INITIALIZE_PASS_END(MachineBlockPlacementLegacy, DEBUG_TYPE,
                     "Branch Probability Basic Block Placement", false, false)
 
 #ifndef NDEBUG
@@ -1130,7 +1165,7 @@ MachineBlockPlacement::getBestTrellisSuccessor(
     MachineBasicBlock *Succ1 = BestA.Dest;
     MachineBasicBlock *Succ2 = BestB.Dest;
     // Check to see if tail-duplication would be profitable.
-    if (allowTailDupPlacement() && shouldTailDuplicate(Succ2) &&
+    if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ2) &&
         canTailDuplicateUnplacedPreds(BB, Succ2, Chain, BlockFilter) &&
         isProfitableToTailDup(BB, Succ2, MBPI->getEdgeProbability(BB, Succ1),
                               Chain, BlockFilter)) {
@@ -1655,7 +1690,7 @@ MachineBlockPlacement::selectBestSuccessor(const MachineBasicBlock *BB,
     if (hasBetterLayoutPredecessor(BB, Succ, SuccChain, SuccProb, RealSuccProb,
                                    Chain, BlockFilter)) {
       // If tail duplication would make Succ profitable, place it.
-      if (allowTailDupPlacement() && shouldTailDuplicate(Succ))
+      if (allowTailDupPlacement(*F) && shouldTailDuplicate(Succ))
         DupCandidates.emplace_back(SuccProb, Succ);
       continue;
     }
@@ -1883,7 +1918,7 @@ void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB,
     auto Result = selectBestSuccessor(BB, Chain, BlockFilter);
     MachineBasicBlock *BestSucc = Result.BB;
     bool ShouldTailDup = Result.ShouldTailDup;
-    if (allowTailDupPlacement())
+    if (allowTailDupPlacement(*F))
       ShouldTailDup |= (BestSucc && canTailDuplicateUnplacedPreds(
                                         BB, BestSucc, Chain, BlockFilter));
 
@@ -1910,7 +1945,7 @@ void MachineBlockPlacement::buildChain(const MachineBasicBlock *HeadBB,
 
     // Placement may have changed tail duplication opportunities.
     // Check for that now.
-    if (allowTailDupPlacement() && BestSucc && ShouldTailDup) {
+    if (allowTailDupPlacement(*F) && BestSucc && ShouldTailDup) {
       repeatedlyTailDuplicateBlock(BestSucc, BB, LoopHeaderBB, Chain,
                                    BlockFilter, PrevUnplacedBlockIt,
                                    PrevUnplacedBlockInFilterIt);
@@ -3466,7 +3501,7 @@ void MachineBlockPlacement::initTailDupThreshold() {
 
   // For aggressive optimization, we can adjust some thresholds to be less
   // conservative.
-  if (PassConfig->getOptLevel() >= CodeGenOptLevel::Aggressive) {
+  if (OptLevel >= CodeGenOptLevel::Aggressive) {
     // At O3 we should be more willing to copy blocks for tail duplication. This
     // increases size pressure, so we only do it at O3
     // Do this unless only the regular threshold is explicitly set.
@@ -3478,29 +3513,48 @@ void MachineBlockPlacement::initTailDupThreshold() {
   // If there's no threshold provided through options, query the target
   // information for a threshold instead.
   if (TailDupPlacementThreshold.getNumOccurrences() == 0 &&
-      (PassConfig->getOptLevel() < CodeGenOptLevel::Aggressive ||
+      (OptLevel < CodeGenOptLevel::Aggressive ||
        TailDupPlacementAggressiveThreshold.getNumOccurrences() == 0))
-    TailDupSize = TII->getTailDuplicateSize(PassConfig->getOptLevel());
+    TailDupSize = TII->getTailDuplicateSize(OptLevel);
 }
 
-bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
-  if (skipFunction(MF.getFunction()))
-    return false;
+PreservedAnalyses
+MachineBlockPlacementPass::run(MachineFunction &MF,
+                               MachineFunctionAnalysisManager &MFAM) {
+  auto *MBPI = &MFAM.getResult<MachineBranchProbabilityAnalysis>(MF);
+  auto MBFI = std::make_unique<MBFIWrapper>(
+      MFAM.getResult<MachineBlockFrequencyAnalysis>(MF));
+  auto *MLI = &MFAM.getResult<MachineLoopAnalysis>(MF);
+  auto *MPDT = MachineBlockPlacement::allowTailDupPlacement(MF)
+                   ? &MFAM.getResult<MachinePostDominatorTreeAnalysis>(MF)
+                   : nullptr;
+  auto *PSI = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(MF)
+                  .getCachedResult<ProfileSummaryAnalysis>(
+                      *MF.getFunction().getParent());
+  if (!PSI)
+    report_fatal_error("MachineBlockPlacement requires ProfileSummaryAnalysis",
+                       false);
+
+  MachineBlockPlacement MBP(MBPI, MLI, PSI, std::move(MBFI), MPDT,
+                            AllowTailMerge);
+
+  if (!MBP.run(MF))
+    return PreservedAnalyses::all();
+
+  return getMachineFunctionPassPreservedAnalyses();
+}
+
+bool MachineBlockPlacement::run(MachineFunction &MF) {
 
   // Check for single-block functions and skip them.
   if (std::next(MF.begin()) == MF.end())
     return false;
 
   F = &MF;
-  MBPI = &getAnalysis<MachineBranchProbabilityInfoWrapperPass>().getMBPI();
-  MBFI = std::make_unique<MBFIWrapper>(
-      getAnalysis<MachineBlockFrequencyInfoWrapperPass>().getMBFI());
-  MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
+  OptLevel = F->getTarget().getOptLevel();
+
   TII = MF.getSubtarget().getInstrInfo();
   TLI = MF.getSubtarget().getTargetLowering();
-  MPDT = nullptr;
-  PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
-  PassConfig = &getAnalysis<TargetPassConfig>();
 
   // Initialize PreferredLoopExit to nullptr here since it may never be set if
   // there are no MachineLoops.
@@ -3529,8 +3583,7 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
   }
 
   // Apply tail duplication.
-  if (allowTailDupPlacement()) {
-    MPDT = &getAnalysis<MachinePostDominatorTreeWrapperPass>().getPostDomTree();
+  if (allowTailDupPlacement(*F)) {
     if (OptForSize)
       TailDupSize = 1;
     const bool PreRegAlloc = false;
@@ -3548,8 +3601,8 @@ bool MachineBlockPlacement::runOnMachineFunction(MachineFunction &MF) {
   // TailMerge can create jump into if branches that make CFG irreducible for
   // HW that requires structured CFG.
   const bool EnableTailMerge = !MF.getTarget().requiresStructuredCFG() &&
-                               PassConfig->getEnableTailMerge() &&
-                               BranchFoldPlacement && MF.size() > 3;
+                               AllowTailMerge && BranchFoldPlacement &&
+                               MF.size() > 3;
   // No tail merging opportunities if the block number is less than four.
   if (EnableTailMerge) {
     const unsigned TailMergeSize = TailDupSize + 1;
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 8080059f0bb03..0e01462cfc97e 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -110,6 +110,7 @@
 #include "llvm/CodeGen/LowerEmuTLS.h"
 #include "llvm/CodeGen/MIRPrinter.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBlockPlacement.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineCSE.h"
 #include "llvm/CodeGen/MachineCopyPropagation.h"
diff --git a/llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir b/llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir
index 5e57604263793..7b107f8a24045 100644
--- a/llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir
+++ b/llvm/test/CodeGen/AArch64/pauthlr-prologue-duplication.mir
@@ -1,5 +1,6 @@
 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
 # RUN: llc -mtriple aarch64-none-elf -run-pass=block-placement -O3 -o - %s | FileCheck %s
+# RUN: llc -mtriple aarch64-none-elf -passes='require<profile-summary>,function(machine-function(block-placement))' -O3 -o - %s | FileCheck %s
 
 ## Check that block-placement does not perform tail duplication on the
 ## PAUTH_EPILOGUE instruction. If that happened, the two prologues would use
diff --git a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
index efa24a9bee7de..f8ce9882b7a8e 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
+++ b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
@@ -2,6 +2,8 @@
 # RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1010 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX10 %s
 # RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
 
+# RUN: llc -mtriple=amdgcn -o - -passes='require<profile-summary>,function(machine-function(block-placement))' -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
+
 # Used to fail with
 # Assertion `Out && "Header of loop has no predecessors from outside loop?"
 
diff --git a/llvm/test/CodeGen/X86/block-placement.mir b/llvm/test/CodeGen/X86/block-placement.mir
index 3f69ca0a40ad3..de7a80718a297 100644
--- a/llvm/test/CodeGen/X86/block-placement.mir
+++ b/llvm/test/CodeGen/X86/block-placement.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=x86_64-apple-macosx10.12.0 -O3 -run-pass=block-placement -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-apple-macosx10.12.0 -O3 -passes='require<profile-summary>,function(machine-function(block-placement))' -o - %s | FileCheck %s
 
 --- |
   ; ModuleID = 'test.ll'

>From ad832b49decbac1146e534f87cb64ff8c4112643 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 5 Mar 2025 07:07:42 +0000
Subject: [PATCH 2/4] fix param in codegenpassbuilder

---
 llvm/include/llvm/Passes/CodeGenPassBuilder.h  | 2 +-
 llvm/include/llvm/Target/CGPassBuilderOption.h | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index d5abc672dc02b..1dfbb6c7eec2e 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -1224,7 +1224,7 @@ void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineLateOptimization(
 template <typename Derived, typename TargetMachineT>
 void CodeGenPassBuilder<Derived, TargetMachineT>::addBlockPlacement(
     AddMachinePass &addPass) const {
-  addPass(MachineBlockPlacementPass());
+  addPass(MachineBlockPlacementPass(Opt.EnableTailMerge));
   // Run a separate pass to collect block placement statistics.
   if (Opt.EnableBlockPlacementStats)
     addPass(MachineBlockPlacementStatsPass());
diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h
index f006ef1fcb40b..51f25c1360b87 100644
--- a/llvm/include/llvm/Target/CGPassBuilderOption.h
+++ b/llvm/include/llvm/Target/CGPassBuilderOption.h
@@ -50,6 +50,7 @@ struct CGPassBuilderOption {
   bool EnableGlobalMergeFunc = false;
   bool EnableMachineFunctionSplitter = false;
   bool EnableSinkAndFold = false;
+  bool EnableTailMerge = true;
   bool MISchedPostRA = false;
   bool EarlyLiveIntervals = false;
   bool GCEmptyBlocks = false;

>From 02c4c9587a80473f9c06912f818c6797eb871f82 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 5 Mar 2025 08:45:53 +0000
Subject: [PATCH 3/4] add options no-tail-merge and enable-tail-merge

---
 llvm/include/llvm/Passes/MachinePassRegistry.def | 13 ++++---------
 llvm/lib/Passes/PassBuilder.cpp                  | 13 +++++++++++++
 llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir  |  2 +-
 3 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 11b8ff81211d9..0156cf47e5881 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -196,16 +196,11 @@ MACHINE_FUNCTION_PASS("verify<machine-trace-metrics>", MachineTraceMetricsVerifi
 
 MACHINE_FUNCTION_PASS_WITH_PARAMS(
     "block-placement", "MachineBlockPlacementPass",
-    [](bool NoTailMerge) {
-      // Tail merging is enabled by default, so this option
-      // is to disable it.
-      return MachineBlockPlacementPass(!NoTailMerge);
+    [](bool AllowTailMerge) {
+      // Default is true.
+      return MachineBlockPlacementPass(AllowTailMerge);
     },
-    [](StringRef Params) {
-      return parseSinglePassOption(Params, "no-tail-merge",
-                                   "MachineBlockPlacementPass");
-    },
-    "no-tail-merge")
+    parseMachineBlockPlacementPassOptions, "no-tail-merge;enable-tail-merge")
 
 MACHINE_FUNCTION_PASS_WITH_PARAMS(
     "machine-sink", "MachineSinkingPass",
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 0e01462cfc97e..66472d706bea9 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1440,6 +1440,19 @@ Expected<bool> parseMachineSinkingPassOptions(StringRef Params) {
                                             "MachineSinkingPass");
 }
 
+Expected<bool> parseMachineBlockPlacementPassOptions(StringRef Params) {
+  bool AllowTailMerge = true;
+  if (Params == "no-tail-merge")
+    AllowTailMerge = false;
+  else if (!Params.empty() && Params != "enable-tail-merge")
+    return make_error<StringError>(
+        formatv("invalid MachineBlockPlacementPass parameter '{0}' ", Params)
+            .str(),
+        inconvertibleErrorCode());
+
+  return AllowTailMerge;
+}
+
 } // namespace
 
 /// Tests whether a pass name starts with a valid prefix for a default pipeline
diff --git a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
index f8ce9882b7a8e..6a1f82aed5fb9 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
+++ b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
@@ -2,7 +2,7 @@
 # RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1010 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX10 %s
 # RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
 
-# RUN: llc -mtriple=amdgcn -o - -passes='require<profile-summary>,function(machine-function(block-placement))' -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
+# RUN: llc -mtriple=amdgcn -o - -passes='require<profile-summary>,function(machine-function(block-placement<enable-tail-merge>))' -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
 
 # Used to fail with
 # Assertion `Out && "Header of loop has no predecessors from outside loop?"

>From e18d4c24ecef127aa0bb34c96c8f5b736a5132c0 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Tue, 11 Mar 2025 05:03:51 +0000
Subject: [PATCH 4/4] change pass option style

---
 .../include/llvm/CodeGen/MachineBlockPlacement.h |  5 +++++
 llvm/include/llvm/Passes/MachinePassRegistry.def |  2 +-
 llvm/lib/CodeGen/MachineBlockPlacement.cpp       |  8 ++++++++
 llvm/lib/Passes/PassBuilder.cpp                  | 16 ++++++++--------
 llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir  |  2 +-
 llvm/test/tools/llc/new-pm/option-parsing.mir    | 13 +++++++++++++
 6 files changed, 36 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/tools/llc/new-pm/option-parsing.mir

diff --git a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
index 8003b52fa6a3c..733d24ab719a8 100644
--- a/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
+++ b/llvm/include/llvm/CodeGen/MachineBlockPlacement.h
@@ -23,6 +23,11 @@ class MachineBlockPlacementPass
       : AllowTailMerge(AllowTailMerge) {}
   PreservedAnalyses run(MachineFunction &MF,
                         MachineFunctionAnalysisManager &MFAM);
+  static bool isRequired() { return true; }
+
+  void
+  printPipeline(raw_ostream &OS,
+                function_ref<StringRef(StringRef)> MapClassName2PassName) const;
 };
 
 } // namespace llvm
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 0156cf47e5881..517401b3f4ebe 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -200,7 +200,7 @@ MACHINE_FUNCTION_PASS_WITH_PARAMS(
       // Default is true.
       return MachineBlockPlacementPass(AllowTailMerge);
     },
-    parseMachineBlockPlacementPassOptions, "no-tail-merge;enable-tail-merge")
+    parseMachineBlockPlacementPassOptions, "no-tail-merge;tail-merge")
 
 MACHINE_FUNCTION_PASS_WITH_PARAMS(
     "machine-sink", "MachineSinkingPass",
diff --git a/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
index e968693e8e1ff..40edc47f3e6bb 100644
--- a/llvm/lib/CodeGen/MachineBlockPlacement.cpp
+++ b/llvm/lib/CodeGen/MachineBlockPlacement.cpp
@@ -3544,6 +3544,14 @@ MachineBlockPlacementPass::run(MachineFunction &MF,
   return getMachineFunctionPassPreservedAnalyses();
 }
 
+void MachineBlockPlacementPass::printPipeline(
+    raw_ostream &OS,
+    function_ref<StringRef(StringRef)> MapClassName2PassName) const {
+  OS << MapClassName2PassName(name());
+  if (!AllowTailMerge)
+    OS << "<no-tail-merge>";
+}
+
 bool MachineBlockPlacement::run(MachineFunction &MF) {
 
   // Check for single-block functions and skip them.
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 66472d706bea9..555349cbe0398 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1442,14 +1442,14 @@ Expected<bool> parseMachineSinkingPassOptions(StringRef Params) {
 
 Expected<bool> parseMachineBlockPlacementPassOptions(StringRef Params) {
   bool AllowTailMerge = true;
-  if (Params == "no-tail-merge")
-    AllowTailMerge = false;
-  else if (!Params.empty() && Params != "enable-tail-merge")
-    return make_error<StringError>(
-        formatv("invalid MachineBlockPlacementPass parameter '{0}' ", Params)
-            .str(),
-        inconvertibleErrorCode());
-
+  if (!Params.empty()) {
+    AllowTailMerge = !Params.consume_front("no-");
+    if (Params != "tail-merge")
+      return make_error<StringError>(
+          formatv("invalid MachineBlockPlacementPass parameter '{0}' ", Params)
+              .str(),
+          inconvertibleErrorCode());
+  }
   return AllowTailMerge;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
index 6a1f82aed5fb9..05cfe53224582 100644
--- a/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
+++ b/llvm/test/CodeGen/AMDGPU/loop_header_nopred.mir
@@ -2,7 +2,7 @@
 # RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1010 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX10 %s
 # RUN: llc -mtriple=amdgcn -o - -run-pass=block-placement -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
 
-# RUN: llc -mtriple=amdgcn -o - -passes='require<profile-summary>,function(machine-function(block-placement<enable-tail-merge>))' -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
+# RUN: llc -mtriple=amdgcn -o - -passes='require<profile-summary>,function(machine-function(block-placement<tail-merge>))' -mcpu=gfx1100 -mattr=-inst-fwd-prefetch-bug -verify-machineinstrs %s | FileCheck -check-prefixes=GFX11 %s
 
 # Used to fail with
 # Assertion `Out && "Header of loop has no predecessors from outside loop?"
diff --git a/llvm/test/tools/llc/new-pm/option-parsing.mir b/llvm/test/tools/llc/new-pm/option-parsing.mir
new file mode 100644
index 0000000000000..f0353d72cb9ec
--- /dev/null
+++ b/llvm/test/tools/llc/new-pm/option-parsing.mir
@@ -0,0 +1,13 @@
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -passes="block-placement<tail-merge>,block-placement<no-tail-merge>" -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK
+
+# RUN: not llc -mtriple=x86_64-unknown-linux-gnu -passes="block-placement<invalid-opt>" -print-pipeline-passes -o /dev/null %s 2>&1 | FileCheck %s --check-prefix=CHECK-NOT
+
+# CHECK: block-placement,block-placement<no-tail-merge>
+# CHECK-NOT: invalid MachineBlockPlacementPass parameter 'invalid-opt'
+
+---
+name: f
+body: |
+  bb.0:
+    RET 0
+...



More information about the llvm-commits mailing list