[llvm] [CodeGen][NPM] Port BranchFolder to NPM (PR #128858)

Akshat Oke via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 26 03:09:44 PST 2025


https://github.com/optimisan created https://github.com/llvm/llvm-project/pull/128858

EnableTailMerge is false by default and is handled by the pass builder. Passes are independent of target pipeline options.

>From a2d434f8a443c2d4548c7dbc0ed149aa8e9bacd2 Mon Sep 17 00:00:00 2001
From: Akshat Oke <Akshat.Oke at amd.com>
Date: Wed, 26 Feb 2025 11:04:03 +0000
Subject: [PATCH] [CodeGen][NPM] Port BranchFolder to NPM

Note: EnableTailMerge is false by default and is handled by the pass
builder. Passes are independent of target pipeline options.
---
 llvm/include/llvm/CodeGen/BranchFoldingPass.h | 31 ++++++++
 llvm/include/llvm/InitializePasses.h          |  2 +-
 llvm/include/llvm/Passes/CodeGenPassBuilder.h |  3 +-
 .../llvm/Passes/MachinePassRegistry.def       | 10 ++-
 .../include/llvm/Target/CGPassBuilderOption.h |  1 +
 llvm/lib/CodeGen/BranchFolding.cpp            | 73 +++++++++++++------
 llvm/lib/CodeGen/BranchFolding.h              |  1 +
 llvm/lib/CodeGen/CodeGen.cpp                  |  2 +-
 llvm/lib/Passes/PassBuilder.cpp               |  1 +
 .../CodeGen/AArch64/branch-folder-oneinst.mir |  1 +
 .../AMDGPU/branch-folder-requires-no-phis.mir |  6 +-
 .../Hexagon/branchfolder-insert-impdef.mir    |  1 +
 .../MIR/X86/branch-folder-with-label.mir      |  1 +
 llvm/test/CodeGen/X86/branchfolding-ehpad.mir |  1 +
 14 files changed, 104 insertions(+), 30 deletions(-)
 create mode 100644 llvm/include/llvm/CodeGen/BranchFoldingPass.h

diff --git a/llvm/include/llvm/CodeGen/BranchFoldingPass.h b/llvm/include/llvm/CodeGen/BranchFoldingPass.h
new file mode 100644
index 0000000000000..6ebef47252d25
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/BranchFoldingPass.h
@@ -0,0 +1,31 @@
+//===- llvm/CodeGen/BranchFoldingPass.h -------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CODEGEN_BRANCHFOLDINGPASS_H
+#define LLVM_CODEGEN_BRANCHFOLDINGPASS_H
+
+#include "llvm/CodeGen/MachinePassManager.h"
+
+namespace llvm {
+
+class BranchFolderPass : public PassInfoMixin<BranchFolderPass> {
+  bool EnableTailMerge;
+
+public:
+  BranchFolderPass(bool EnableTailMerge) : EnableTailMerge(EnableTailMerge) {}
+  PreservedAnalyses run(MachineFunction &MF,
+                        MachineFunctionAnalysisManager &MFAM);
+
+  MachineFunctionProperties getRequiredProperties() const {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoPHIs);
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_BRANCHFOLDINGPASS_H
diff --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 0dfb46a210c7e..a0a1bf36338d3 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -59,7 +59,7 @@ void initializeBasicBlockSectionsPass(PassRegistry &);
 void initializeBarrierNoopPass(PassRegistry &);
 void initializeBasicAAWrapperPassPass(PassRegistry &);
 void initializeBlockFrequencyInfoWrapperPassPass(PassRegistry &);
-void initializeBranchFolderPassPass(PassRegistry &);
+void initializeBranchFolderLegacyPass(PassRegistry &);
 void initializeBranchProbabilityInfoWrapperPassPass(PassRegistry &);
 void initializeBranchRelaxationPass(PassRegistry &);
 void initializeBreakCriticalEdgesPass(PassRegistry &);
diff --git a/llvm/include/llvm/Passes/CodeGenPassBuilder.h b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
index 30f0742fd2c26..8d39815c302a6 100644
--- a/llvm/include/llvm/Passes/CodeGenPassBuilder.h
+++ b/llvm/include/llvm/Passes/CodeGenPassBuilder.h
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/TypeBasedAliasAnalysis.h"
 #include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
+#include "llvm/CodeGen/BranchFoldingPass.h"
 #include "llvm/CodeGen/CallBrPrepare.h"
 #include "llvm/CodeGen/CodeGenPrepare.h"
 #include "llvm/CodeGen/DeadMachineInstructionElim.h"
@@ -1174,7 +1175,7 @@ template <typename Derived, typename TargetMachineT>
 void CodeGenPassBuilder<Derived, TargetMachineT>::addMachineLateOptimization(
     AddMachinePass &addPass) const {
   // Branch folding must be run after regalloc and prolog/epilog insertion.
-  addPass(BranchFolderPass());
+  addPass(BranchFolderPass(Opt.EnableTailMerge));
 
   // Tail duplication.
   // Note that duplicating tail just increases code size and degrades
diff --git a/llvm/include/llvm/Passes/MachinePassRegistry.def b/llvm/include/llvm/Passes/MachinePassRegistry.def
index 67eab42b014c9..6c4834f5f8176 100644
--- a/llvm/include/llvm/Passes/MachinePassRegistry.def
+++ b/llvm/include/llvm/Passes/MachinePassRegistry.def
@@ -188,6 +188,15 @@ MACHINE_FUNCTION_PASS("verify<machine-trace-metrics>", MachineTraceMetricsVerifi
 #define MACHINE_FUNCTION_PASS_WITH_PARAMS(NAME, CLASS, CREATE_PASS, PARSER,    \
                                           PARAMS)
 #endif
+MACHINE_FUNCTION_PASS_WITH_PARAMS(
+    "branch-folder", "BranchFolderPass",
+    [](bool EnableTailMerge) { return BranchFolderPass(EnableTailMerge); },
+    [](StringRef Params) {
+      return parseSinglePassOption(Params, "enable-tail-merge",
+                                   "BranchFolderPass");
+    },
+    "enable-tail-merge")
+
 MACHINE_FUNCTION_PASS_WITH_PARAMS(
     "regallocfast", "RegAllocFastPass",
     [](RegAllocFastPass::Options Opts) { return RegAllocFastPass(Opts); },
@@ -223,7 +232,6 @@ DUMMY_MACHINE_FUNCTION_PASS("bbsections-prepare", BasicBlockSectionsPass)
 DUMMY_MACHINE_FUNCTION_PASS("bbsections-profile-reader", BasicBlockSectionsProfileReaderPass)
 DUMMY_MACHINE_FUNCTION_PASS("block-placement", MachineBlockPlacementPass)
 DUMMY_MACHINE_FUNCTION_PASS("block-placement-stats", MachineBlockPlacementStatsPass)
-DUMMY_MACHINE_FUNCTION_PASS("branch-folder", BranchFolderPass)
 DUMMY_MACHINE_FUNCTION_PASS("break-false-deps", BreakFalseDepsPass)
 DUMMY_MACHINE_FUNCTION_PASS("cfguard-longjmp", CFGuardLongjmpPass)
 DUMMY_MACHINE_FUNCTION_PASS("cfi-fixup", CFIFixupPass)
diff --git a/llvm/include/llvm/Target/CGPassBuilderOption.h b/llvm/include/llvm/Target/CGPassBuilderOption.h
index d3d19c8a7dc9f..813ded01a0c08 100644
--- a/llvm/include/llvm/Target/CGPassBuilderOption.h
+++ b/llvm/include/llvm/Target/CGPassBuilderOption.h
@@ -33,6 +33,7 @@ struct CGPassBuilderOption {
   bool EnableBlockPlacementStats = false;
   bool EnableGlobalMergeFunc = false;
   bool EnableMachineFunctionSplitter = false;
+  bool EnableTailMerge = true;
   bool MISchedPostRA = false;
   bool EarlyLiveIntervals = false;
   bool GCEmptyBlocks = false;
diff --git a/llvm/lib/CodeGen/BranchFolding.cpp b/llvm/lib/CodeGen/BranchFolding.cpp
index 29a3076b57e20..06fedf8d321ac 100644
--- a/llvm/lib/CodeGen/BranchFolding.cpp
+++ b/llvm/lib/CodeGen/BranchFolding.cpp
@@ -24,6 +24,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CodeGen/Analysis.h"
+#include "llvm/CodeGen/BranchFoldingPass.h"
 #include "llvm/CodeGen/MBFIWrapper.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
@@ -88,38 +89,62 @@ TailMergeSize("tail-merge-size",
 namespace {
 
   /// BranchFolderPass - Wrap branch folder in a machine function pass.
-  class BranchFolderPass : public MachineFunctionPass {
-  public:
-    static char ID;
+class BranchFolderLegacy : public MachineFunctionPass {
+public:
+  static char ID;
 
-    explicit BranchFolderPass(): MachineFunctionPass(ID) {}
+  explicit BranchFolderLegacy() : MachineFunctionPass(ID) {}
 
-    bool runOnMachineFunction(MachineFunction &MF) override;
+  bool runOnMachineFunction(MachineFunction &MF) override;
 
-    void getAnalysisUsage(AnalysisUsage &AU) const override {
-      AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
-      AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
-      AU.addRequired<ProfileSummaryInfoWrapperPass>();
-      AU.addRequired<TargetPassConfig>();
-      MachineFunctionPass::getAnalysisUsage(AU);
-    }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
+    AU.addRequired<MachineBranchProbabilityInfoWrapperPass>();
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addRequired<TargetPassConfig>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
 
-    MachineFunctionProperties getRequiredProperties() const override {
-      return MachineFunctionProperties().set(
-          MachineFunctionProperties::Property::NoPHIs);
-    }
-  };
+  MachineFunctionProperties getRequiredProperties() const override {
+    return MachineFunctionProperties().set(
+        MachineFunctionProperties::Property::NoPHIs);
+  }
+};
 
 } // end anonymous namespace
 
-char BranchFolderPass::ID = 0;
-
-char &llvm::BranchFolderPassID = BranchFolderPass::ID;
-
-INITIALIZE_PASS(BranchFolderPass, DEBUG_TYPE,
-                "Control Flow Optimizer", false, false)
+char BranchFolderLegacy::ID = 0;
+
+char &llvm::BranchFolderPassID = BranchFolderLegacy::ID;
+
+INITIALIZE_PASS(BranchFolderLegacy, DEBUG_TYPE, "Control Flow Optimizer", false,
+                false)
+
+PreservedAnalyses BranchFolderPass::run(MachineFunction &MF,
+                                        MachineFunctionAnalysisManager &MFAM) {
+  MFPropsModifier _(*this, MF);
+  bool EnableTailMerge =
+      !MF.getTarget().requiresStructuredCFG() && this->EnableTailMerge;
+
+  auto &MBPI = MFAM.getResult<MachineBranchProbabilityAnalysis>(MF);
+  auto *PSI = MFAM.getResult<ModuleAnalysisManagerMachineFunctionProxy>(MF)
+                  .getCachedResult<ProfileSummaryAnalysis>(
+                      *MF.getFunction().getParent());
+  if (!PSI)
+    report_fatal_error(
+        "ProfileSummaryAnalysis is required for BranchFoldingPass", false);
+
+  auto &MBFI = MFAM.getResult<MachineBlockFrequencyAnalysis>(MF);
+  MBFIWrapper MBBFreqInfo(MBFI);
+  BranchFolder Folder(EnableTailMerge, /*CommonHoist=*/true, MBBFreqInfo, MBPI,
+                      PSI);
+  if (!Folder.OptimizeFunction(MF, MF.getSubtarget().getInstrInfo(),
+                               MF.getSubtarget().getRegisterInfo()))
+    return PreservedAnalyses::all();
+  return getMachineFunctionPassPreservedAnalyses();
+}
 
-bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
+bool BranchFolderLegacy::runOnMachineFunction(MachineFunction &MF) {
   if (skipFunction(MF.getFunction()))
     return false;
 
diff --git a/llvm/lib/CodeGen/BranchFolding.h b/llvm/lib/CodeGen/BranchFolding.h
index ff2bbe06c0488..63284e2c56b4a 100644
--- a/llvm/lib/CodeGen/BranchFolding.h
+++ b/llvm/lib/CodeGen/BranchFolding.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachinePassManager.h"
 #include "llvm/Support/Compiler.h"
 #include <vector>
 
diff --git a/llvm/lib/CodeGen/CodeGen.cpp b/llvm/lib/CodeGen/CodeGen.cpp
index 046a3ee42dd6b..5ac673a5fbffa 100644
--- a/llvm/lib/CodeGen/CodeGen.cpp
+++ b/llvm/lib/CodeGen/CodeGen.cpp
@@ -22,7 +22,7 @@ void llvm::initializeCodeGen(PassRegistry &Registry) {
   initializeAtomicExpandLegacyPass(Registry);
   initializeBasicBlockPathCloningPass(Registry);
   initializeBasicBlockSectionsPass(Registry);
-  initializeBranchFolderPassPass(Registry);
+  initializeBranchFolderLegacyPass(Registry);
   initializeBranchRelaxationPass(Registry);
   initializeBreakFalseDepsPass(Registry);
   initializeCallBrPreparePass(Registry);
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index c9825dfc89d3d..a5ad50d625af0 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -80,6 +80,7 @@
 #include "llvm/CodeGen/AssignmentTrackingAnalysis.h"
 #include "llvm/CodeGen/AtomicExpand.h"
 #include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
+#include "llvm/CodeGen/BranchFoldingPass.h"
 #include "llvm/CodeGen/CallBrPrepare.h"
 #include "llvm/CodeGen/CodeGenPrepare.h"
 #include "llvm/CodeGen/ComplexDeinterleavingPass.h"
diff --git a/llvm/test/CodeGen/AArch64/branch-folder-oneinst.mir b/llvm/test/CodeGen/AArch64/branch-folder-oneinst.mir
index cfb6da3e9ae7a..f1224783c93ea 100644
--- a/llvm/test/CodeGen/AArch64/branch-folder-oneinst.mir
+++ b/llvm/test/CodeGen/AArch64/branch-folder-oneinst.mir
@@ -1,4 +1,5 @@
 # RUN: llc -o - %s -mtriple=aarch64 -run-pass branch-folder -verify-machineinstrs | FileCheck %s
+# RUN: llc -o - %s -mtriple=aarch64 -passes="require<profile-summary>,function(machine-function(branch-folder<enable-tail-merge>))" -verify-machineinstrs | FileCheck %s
 # Check that BranchFolding pass is able to hoist a common instruction into a block with a single branch instruction.
 name: func
 tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AMDGPU/branch-folder-requires-no-phis.mir b/llvm/test/CodeGen/AMDGPU/branch-folder-requires-no-phis.mir
index a5b5098795265..2d6b60e7133ab 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-folder-requires-no-phis.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-folder-requires-no-phis.mir
@@ -1,9 +1,11 @@
 # REQUIRES: asserts
-# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=branch-folder -o /dev/null %s 2>&1 | FileCheck %s
+# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -run-pass=branch-folder -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=LEGACY-CHECK,CHECK
+# RUN: not --crash llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes="require<profile-summary>,function(machine-function(branch-folder<enable-tail-merge>))" -o /dev/null %s 2>&1 | FileCheck %s --check-prefixes=NPM-CHECK,CHECK
 
 # BranchFolding breaks this function due to phis
 
-# CHECK: MachineFunctionProperties required by Control Flow Optimizer pass are not met by function func.
+# LEGACY-CHECK: MachineFunctionProperties required by Control Flow Optimizer pass are not met by function func.
+# NPM-CHECK: MachineFunctionProperties required by BranchFolderPass pass are not met by function func.
 # CHECK-NEXT: Required properties: NoPHIs
 # CHECK-NEXT: Current properties: IsSSA, TracksLiveness{{$}}
 ---
diff --git a/llvm/test/CodeGen/Hexagon/branchfolder-insert-impdef.mir b/llvm/test/CodeGen/Hexagon/branchfolder-insert-impdef.mir
index 97f57fcc97e98..5e80432471750 100644
--- a/llvm/test/CodeGen/Hexagon/branchfolder-insert-impdef.mir
+++ b/llvm/test/CodeGen/Hexagon/branchfolder-insert-impdef.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=hexagon -run-pass branch-folder %s -o - -verify-machineinstrs | FileCheck %s
+# RUN: llc -mtriple=hexagon -passes="require<profile-summary>,function(machine-function(branch-folder<enable-tail-merge>))" %s -o - -verify-machineinstrs | FileCheck %s
 
 # Branch folding will perform tail merging of bb.1 and bb.2, and bb.2 will
 # become the common tail. The use of R0 in bb.2 is <undef> while the
diff --git a/llvm/test/CodeGen/MIR/X86/branch-folder-with-label.mir b/llvm/test/CodeGen/MIR/X86/branch-folder-with-label.mir
index ce225d4567e91..8251558caccce 100644
--- a/llvm/test/CodeGen/MIR/X86/branch-folder-with-label.mir
+++ b/llvm/test/CodeGen/MIR/X86/branch-folder-with-label.mir
@@ -39,6 +39,7 @@
 # }
 #
 # RUN: llc -o - %s -mtriple=x86_64-- -run-pass=branch-folder | FileCheck %s
+# TODO: llc -o - %s -mtriple=x86_64-- -passes="require<profile-summary>,function(machine-function(branch-folder<enable-tail-merge>))" | FileCheck %s
 --- |
   ; ModuleID = 'test.ll'
   source_filename = "test.ll"
diff --git a/llvm/test/CodeGen/X86/branchfolding-ehpad.mir b/llvm/test/CodeGen/X86/branchfolding-ehpad.mir
index d445cd2068030..e15905c07282d 100644
--- a/llvm/test/CodeGen/X86/branchfolding-ehpad.mir
+++ b/llvm/test/CodeGen/X86/branchfolding-ehpad.mir
@@ -1,4 +1,5 @@
 # RUN: llc -mtriple=x86_64-windows-msvc -verify-machineinstrs -run-pass branch-folder -o - %s | FileCheck %s
+# RUN: llc -mtriple=x86_64-windows-msvc -verify-machineinstrs -passes="require<profile-summary>,function(machine-function(branch-folder<enable-tail-merge>))" -o - %s | FileCheck %s
 
 # Check that branch-folder does not create a fallthrough to a landing pad.
 # Also make sure that the landing pad still can be tail merged.



More information about the llvm-commits mailing list