[llvm] [BasicBlockSections] Allow mixing of -basic-block-sections with MFS. (PR #117076)

Rahman Lavaee via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 21 14:47:58 PST 2024


https://github.com/rlavaee updated https://github.com/llvm/llvm-project/pull/117076

>From 001dc94f7b74b9e7b4ecc6542aed1e84f77b182f Mon Sep 17 00:00:00 2001
From: Rahman Lavaee <rahmanl at google.com>
Date: Wed, 20 Nov 2024 22:04:07 +0000
Subject: [PATCH] [BasicBlockSections] Allow mixing of -basic-block-sections
 with MFS.

---
 llvm/lib/CodeGen/MachineFunctionSplitter.cpp  | 12 ++++
 llvm/lib/CodeGen/TargetPassConfig.cpp         | 17 ++---
 .../Generic/machine-function-splitter.ll      | 69 ++++++++++++++++++-
 3 files changed, 89 insertions(+), 9 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index ba0015d3ddacb6..dc0f233f1b5e83 100644
--- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Analysis/EHUtils.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CodeGen/BasicBlockSectionUtils.h"
+#include "llvm/CodeGen/BasicBlockSectionsProfileReader.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -128,6 +129,9 @@ static bool isColdBlock(const MachineBasicBlock &MBB,
 }
 
 bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
+  // Do not split functions when -basic-block-sections=all is specified.
+  if (MF.getTarget().getBBSectionsType() == llvm::BasicBlockSection::All)
+    return false;
   // We target functions with profile data. Static information in the form
   // of exception handling code may be split to cold if user passes the
   // mfs-split-ehcode flag.
@@ -139,6 +143,13 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
   if (!TII.isFunctionSafeToSplit(MF))
     return false;
 
+  // Do not split functions with BasicBlockSections profiles as they will
+  // be split according to that profile by the BasicBlockSections pass.
+  auto BBSPRWP =
+      getAnalysisIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
+  if (BBSPRWP != nullptr && BBSPRWP->getBBSPR().isFunctionHot(MF.getName()))
+    return false;
+
   // Renumbering blocks here preserves the order of the blocks as
   // sortBasicBlocksAndUpdateBranches uses the numeric identifier to sort
   // blocks. Preserving the order of blocks is essential to retaining decisions
@@ -201,6 +212,7 @@ void MachineFunctionSplitter::getAnalysisUsage(AnalysisUsage &AU) const {
   AU.addRequired<MachineModuleInfoWrapperPass>();
   AU.addRequired<MachineBlockFrequencyInfoWrapperPass>();
   AU.addRequired<ProfileSummaryInfoWrapperPass>();
+  AU.addUsedIfAvailable<BasicBlockSectionsProfileReaderWrapperPass>();
 }
 
 char MachineFunctionSplitter::ID = 0;
diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index a6159a38753cf5..d407e9f0871d4c 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -1235,13 +1235,13 @@ void TargetPassConfig::addMachinePasses() {
     addPass(createMIRAddFSDiscriminatorsPass(
         sampleprof::FSDiscriminatorPass::PassLast));
 
-  bool NeedsBBSections =
-      TM->getBBSectionsType() != llvm::BasicBlockSection::None;
-  // Machine function splitter uses the basic block sections feature. Both
-  // cannot be enabled at the same time. We do not apply machine function
-  // splitter if -basic-block-sections is requested.
-  if (!NeedsBBSections && (TM->Options.EnableMachineFunctionSplitter ||
-                           EnableMachineFunctionSplitter)) {
+  // Machine function splitter uses the basic block sections feature.
+  // When used along with `-basic-block-sections=`, the basic-block-sections
+  // feature takes precedence. This means functions eligible for
+  // basic-block-sections optimizations (`=all`, or `=list=` with function
+  // included in the list profile) will get that optimization instead.
+  if (TM->Options.EnableMachineFunctionSplitter ||
+      EnableMachineFunctionSplitter) {
     const std::string ProfileFile = getFSProfileFile(TM);
     if (!ProfileFile.empty()) {
       if (EnableFSDiscriminator) {
@@ -1260,7 +1260,8 @@ void TargetPassConfig::addMachinePasses() {
   }
   // We run the BasicBlockSections pass if either we need BB sections or BB
   // address map (or both).
-  if (NeedsBBSections || TM->Options.BBAddrMap) {
+  if (TM->getBBSectionsType() != llvm::BasicBlockSection::None ||
+      TM->Options.BBAddrMap) {
     if (TM->getBBSectionsType() == llvm::BasicBlockSection::List) {
       addPass(llvm::createBasicBlockSectionsProfileReaderWrapperPass(
           TM->getBBSectionsFuncListBuf()));
diff --git a/llvm/test/CodeGen/Generic/machine-function-splitter.ll b/llvm/test/CodeGen/Generic/machine-function-splitter.ll
index 2097523a61c5f9..5768e7809dcca8 100644
--- a/llvm/test/CodeGen/Generic/machine-function-splitter.ll
+++ b/llvm/test/CodeGen/Generic/machine-function-splitter.ll
@@ -2,12 +2,21 @@
 ; REQUIRES: x86-registered-target
 
 ; COM: Machine function splitting with FDO profiles
-; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-X86
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-X86,MFS-NOBBSECTIONS
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-X86
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-X86
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefixes=MFS-EH-SPLIT,MFS-EH-SPLIT-X86
 ; RUN: llc < %s -mtriple=x86_64 -split-machine-functions -O0 -mfs-psi-cutoff=0 -mfs-count-threshold=10000 | FileCheck %s -check-prefixes=MFS-O0,MFS-O0-X86
 
+; COM: Machine function splitting along with -basic-block-sections profile
+; RUN: echo 'v1' > %t
+; RUN: echo 'ffoo21' >> %t
+; RUN: echo 'c0' >> %t
+; RUN: echo 'ffoo22' >> %t
+; RUN: echo 'c0 1' >> %t
+; RUN: echo 'c2' >> %t
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -basic-block-sections=%t -split-machine-functions | FileCheck %s --check-prefixes=MFS-BBSECTIONS
+
 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions | FileCheck %s -check-prefixes=MFS-DEFAULTS,MFS-DEFAULTS-AARCH64
 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefixes=MFS-OPTS1,MFS-OPTS1-AARCH64
 ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -aarch64-min-jump-table-entries=4 -enable-split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefixes=MFS-OPTS2,MFS-OPTS2-AARCH64
@@ -20,6 +29,7 @@
 ; RUN: llc < %t.ll -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s --check-prefix=FSAFDO-MFS
 ; RUN: llc < %t.ll -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s --check-prefix=FSAFDO-MFS2
 
+
 define void @foo1(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 {
 ;; Check that cold block is moved to .text.split.
 ; MFS-DEFAULTS-LABEL:         foo1
@@ -610,6 +620,63 @@ cold_asm_target:
   ret void
 }
 
+define void @foo21(i1 zeroext %0) {
+;; Check that a function with basic-block-sections profile is properly split
+;; when the profile is used along with mfs.
+; MFS-BBSECTIONS:            .section   .text.hot.foo21
+; MFS-NOBBSECTIONS-NOT:      .section   .text.hot.foo21
+; MFS-BBSECTIONS-LABEL:      foo21:
+; MFS-NOBBSECTIONS-NOT:      foo21.cold:
+; MFS-BBSECTIONS:            .section	.text.split.foo21
+; MFS-BBSECTIONS:            foo21.cold
+  %2 = alloca i8, align 1
+  %3 = zext i1 %0 to i8
+  store i8 %3, ptr %2, align 1
+  %4 = load i8, ptr %2, align 1
+  %5 = trunc i8 %4 to i1
+  br i1 %5, label %6, label %8
+
+6:                                                ; preds = %1
+  %7 = call i32 @bar()
+  br label %10
+
+8:                                                ; preds = %1
+  %9 = call i32 @baz()
+  br label %10
+
+10:                                               ; preds = %8, %6
+  ret void
+}
+
+define void @foo22(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 {
+;; Check that when function has both basic-block-section and pgo profiles,
+;; then basic-block-section profile is applied.
+
+;; Check that we create two hot sections with -basic-block-sections.
+; MFS-BBSECTIONS:             .section        .text.hot.foo22
+; MFS-BBSECTIONS-LABEL:       foo22:
+; MFS-BBSECTIONS:             callq  bar
+; MFS-BBSECTIONS:             .section        .text.hot.foo22
+; MFS-BBSECTIONS-NEXT:        foo22.__part.1:
+; MFS-BBSECTIONS:             callq  baz
+; MFS-BBSECTIONS-NOT:         .section        .text.split.foo22
+  br i1 %0, label %2, label %4, !prof !17
+
+2:                                                ; preds = %1
+  %3 = call i32 @bar()
+  br label %6
+
+4:                                                ; preds = %1
+  %5 = call i32 @baz()
+  br label %6
+
+6:                                                ; preds = %4, %2
+  %7 = tail call i32 @qux()
+  ret void
+}
+
+
+
 declare i32 @bar()
 declare i32 @baz()
 declare i32 @bam()



More information about the llvm-commits mailing list