[llvm] e170d95 - Split EH code by default

via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 17 12:42:46 PDT 2022


Author: Archit Saxena
Date: 2022-08-17T12:40:31-07:00
New Revision: e170d955fe5717bfcf231b009b62e4f9d25b0354

URL: https://github.com/llvm/llvm-project/commit/e170d955fe5717bfcf231b009b62e4f9d25b0354
DIFF: https://github.com/llvm/llvm-project/commit/e170d955fe5717bfcf231b009b62e4f9d25b0354.diff

LOG: Split EH code by default

The current machine function splitter is reliant on profile data to do profile summary analysis to split blocks into cold section. This may sometimes limit the usage of machine function splitter especially in cases where we could do some form of static analysis to split out cold blocks if profile data is absent or profile data which may be faulty (Consider Sample PGO).

Of all code that could statically be marked cold Exception handling blocks are one of them (In fact BFI framework also tends to mark them as cold), and the most in size contribution. In my experiments I found out Exception handling pads and all code reachable from there account for up to 6-8% of the .text section on modern production binaries. This patch introduces a flag to split out all Exception handling blocks and blocks only reachable from Exceptional Handling pad to cold section. This flag has shown to give a performance win of up to 0.1% in terms of average cycles and instructions executed on internal facebook search service.

Reviewed By: snehasish

Differential Revision: https://reviews.llvm.org/D131824

Added: 
    

Modified: 
    llvm/lib/CodeGen/MachineFunctionSplitter.cpp
    llvm/test/CodeGen/X86/machine-function-splitter.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index 3e1aace855a5..e8d233385161 100644
--- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -57,6 +57,11 @@ static cl::opt<unsigned> ColdCountThreshold(
         "Minimum number of times a block must be executed to be retained."),
     cl::init(1), cl::Hidden);
 
+static cl::opt<bool> SplitAllEHCode(
+    "mfs-split-ehcode",
+    cl::desc("Splits all EH code and it's descendants by default."),
+    cl::init(false), cl::Hidden);
+
 namespace {
 
 class MachineFunctionSplitter : public MachineFunctionPass {
@@ -76,6 +81,79 @@ class MachineFunctionSplitter : public MachineFunctionPass {
 };
 } // end anonymous namespace
 
+/// setDescendantEHBlocksCold - This splits all EH pads and blocks reachable
+/// only by EH pad as cold. This will help mark EH pads statically cold instead
+/// of relying on profile data.
+static void
+setDescendantEHBlocksCold(SmallVectorImpl<MachineBasicBlock *> &EHBlocks,
+                          MachineFunction &MF) {
+  MachineBasicBlock *StartBlock = &MF.front();
+  // A block can be unknown if its not reachable from anywhere
+  // EH if its only reachable from start blocks via some path through EH pads
+  // NonEH if it's reachable from Non EH blocks as well.
+  enum Status { Unknown = 0, EH = 1, NonEH = 2 };
+  DenseSet<MachineBasicBlock *> WorkList;
+  DenseMap<MachineBasicBlock *, Status> Statuses;
+
+  auto getStatus = [&](MachineBasicBlock *MBB) {
+    if (Statuses.find(MBB) != Statuses.end())
+      return Statuses[MBB];
+    else
+      return Unknown;
+  };
+
+  auto checkPredecessors = [&](MachineBasicBlock *MBB, Status Stat) {
+    for (auto *PredMBB : MBB->predecessors()) {
+      Status PredStatus = getStatus(PredMBB);
+      // If status of predecessor block has gone above current block
+      // we update current blocks status.
+      if (PredStatus > Stat)
+        Stat = PredStatus;
+    }
+    return Stat;
+  };
+
+  auto addSuccesors = [&](MachineBasicBlock *MBB) {
+    for (auto *SuccMBB : MBB->successors()) {
+      if (!SuccMBB->isEHPad())
+        WorkList.insert(SuccMBB);
+    }
+  };
+
+  // Insert the successors of start block
+  // and landing pads successor.
+  Statuses[StartBlock] = NonEH;
+  addSuccesors(StartBlock);
+  for (auto *LP : EHBlocks) {
+    addSuccesors(LP);
+    Statuses[LP] = EH;
+  }
+
+  // Worklist iterative algorithm.
+  while (!WorkList.empty()) {
+    auto *MBB = *WorkList.begin();
+    WorkList.erase(MBB);
+
+    Status OldStatus = getStatus(MBB);
+
+    // Check on predecessors and check for
+    // Status update.
+    Status NewStatus = checkPredecessors(MBB, OldStatus);
+
+    // Did the block status change?
+    bool changed = OldStatus != NewStatus;
+    if (changed) {
+      addSuccesors(MBB);
+      Statuses[MBB] = NewStatus;
+    }
+  }
+
+  for (auto Entry : Statuses) {
+    if (Entry.second == EH)
+      Entry.first->setSectionID(MBBSectionID::ColdSectionID);
+  }
+}
+
 static bool isColdBlock(const MachineBasicBlock &MBB,
                         const MachineBlockFrequencyInfo *MBFI,
                         ProfileSummaryInfo *PSI) {
@@ -90,9 +168,11 @@ static bool isColdBlock(const MachineBasicBlock &MBB,
 }
 
 bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
-  // TODO: We only target functions with profile data. Static information may
-  // also be considered but we don't see performance improvements yet.
-  if (!MF.getFunction().hasProfileData())
+  // We target functions with profile data. Static information in the form
+  // of exception handling code may be split to cold if user passes the
+  // mfs-split-ehcode flag.
+  bool UseProfileData = MF.getFunction().hasProfileData();
+  if (!UseProfileData && !SplitAllEHCode)
     return false;
 
   // TODO: We don't split functions where a section attribute has been set
@@ -117,8 +197,13 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
   // made by prior passes such as MachineBlockPlacement.
   MF.RenumberBlocks();
   MF.setBBSectionsType(BasicBlockSection::Preset);
-  auto *MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
-  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+
+  MachineBlockFrequencyInfo *MBFI = nullptr;
+  ProfileSummaryInfo *PSI = nullptr;
+  if (UseProfileData) {
+    MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+    PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  }
 
   SmallVector<MachineBasicBlock *, 2> LandingPads;
   for (auto &MBB : MF) {
@@ -127,21 +212,25 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
 
     if (MBB.isEHPad())
       LandingPads.push_back(&MBB);
-    else if (isColdBlock(MBB, MBFI, PSI))
+    else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode)
       MBB.setSectionID(MBBSectionID::ColdSectionID);
   }
 
+  // Split all EH code and it's descendant statically by default.
+  if (SplitAllEHCode)
+    setDescendantEHBlocksCold(LandingPads, MF);
   // We only split out eh pads if all of them are cold.
-  bool HasHotLandingPads = false;
-  for (const MachineBasicBlock *LP : LandingPads) {
-    if (!isColdBlock(*LP, MBFI, PSI))
-      HasHotLandingPads = true;
+  else {
+    bool HasHotLandingPads = false;
+    for (const MachineBasicBlock *LP : LandingPads) {
+      if (!isColdBlock(*LP, MBFI, PSI))
+        HasHotLandingPads = true;
+    }
+    if (!HasHotLandingPads) {
+      for (MachineBasicBlock *LP : LandingPads)
+        LP->setSectionID(MBBSectionID::ColdSectionID);
+    }
   }
-  if (!HasHotLandingPads) {
-    for (MachineBasicBlock *LP : LandingPads)
-      LP->setSectionID(MBBSectionID::ColdSectionID);
-  }
-
   auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
     return X.getSectionID().Type < Y.getSectionID().Type;
   };

diff  --git a/llvm/test/CodeGen/X86/machine-function-splitter.ll b/llvm/test/CodeGen/X86/machine-function-splitter.ll
index 8e428515a4b5..70f47037d118 100644
--- a/llvm/test/CodeGen/X86/machine-function-splitter.ll
+++ b/llvm/test/CodeGen/X86/machine-function-splitter.ll
@@ -1,7 +1,7 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s -check-prefix=MFS-DEFAULTS
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefix=MFS-OPTS1
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefix=MFS-OPTS2
-
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefix=MFS-EH-SPLIT
 define void @foo1(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 {
 ;; Check that cold block is moved to .text.split.
 ; MFS-DEFAULTS-LABEL: foo1
@@ -190,6 +190,14 @@ define i32 @foo8(i1 zeroext %0) personality ptr @__gxx_personality_v0 !prof !14
 ; MFS-DEFAULTS:       .section        .text.split.foo8,"ax", at progbits
 ; MFS-DEFAULTS-NEXT:  foo8.cold:
 ; MFS-DEFAULTS:       callq   baz
+
+;; Check that all ehpads are by default treated as cold with -mfs-split-ehcode.
+; MFS-EH-SPLIT-LABEL: foo8
+; MFS-EH-SPLIT:       callq   baz
+; MFS-EH-SPLIT:       .section        .text.split.foo8,"ax", at progbits
+; MFS-EH-SPLIT-NEXT:  foo8.cold:
+; MFS-EH-SPLIT:       callq   _Unwind_Resume at PLT
+; MFS-EH-SPLIT:       callq   _Unwind_Resume at PLT
 entry:
   invoke void @_Z1fv()
           to label %try.cont unwind label %lpad1
@@ -265,6 +273,101 @@ try.cont:
   ret i32 %2
 }
 
+define void @foo11(i1 zeroext %0) personality ptr @__gxx_personality_v0 {
+;; Check that function having landing pads are split with mfs-split-ehcode
+;; even in the absence of profile data
+; MFS-EH-SPLIT-LABEL: foo11
+; MFS-EH-SPLIT:       .section        .text.split.foo11,"ax", at progbits
+; MFS-EH-SPLIT-NEXT:  foo11.cold:
+; MFS-EH-SPLIT:       nop
+; MFS-EH-SPLIT:       callq   _Unwind_Resume at PLT
+entry:
+  invoke void @_Z1fv()
+        to label %2 unwind label %lpad
+
+lpad:
+  %1 = landingpad { ptr, i32 }
+          cleanup
+          catch ptr @_ZTIi
+  resume { ptr, i32 } %1
+
+2:                                                ; preds = entry
+  %3 = tail call i32 @qux()
+  ret void
+}
+
+define i32 @foo12(i1 zeroext %0) personality ptr @__gxx_personality_v0 !prof !14 {
+;; Check that all code reachable from ehpad is split out with cycles.
+; MFS-EH-SPLIT-LABEL: foo12
+; MFS-EH-SPLIT:       .section        .text.split.foo12,"ax", at progbits
+; MFS-EH-SPLIT-NEXT:  foo12.cold:
+; MFS-EH-SPLIT:       callq   bar
+; MFS-EH-SPLIT:       callq   baz
+; MFS-EH-SPLIT:       callq   qux
+entry:
+  invoke void @_Z1fv()
+          to label %8 unwind label %lpad
+
+lpad:
+  %1 = landingpad { ptr, i32 }
+          cleanup
+          catch ptr @_ZTIi
+  br label %2
+
+2:                                                ; preds = lpad
+  %3 = call i32 @bar()
+  br i1 %0, label %4, label %6
+
+4:                                                ; preds = lpad
+  %5 = call i32 @baz()
+  br label %6
+
+6:                                                ; preds = %4, %2
+  %7 = tail call i32 @qux()
+  br i1 %0, label %2, label %8
+
+8:                                                ; preds = %6
+  ret i32 0
+}
+
+define i32 @foo13(i1 zeroext %0) personality ptr @__gxx_personality_v0 !prof !14{
+;; Check that all code reachable from EH
+;; that is also reachable from outside EH pad
+;; is not touched.
+; MFS-EH-SPLIT-LABEL: foo13
+; MFS-EH-SPLIT:       callq   bam
+; MFS-EH-SPLIT:       .section        .text.split.foo13,"ax", at progbits
+; MFS-EH-SPLIT-NEXT:  foo13.cold:
+; MFS-EH-SPLIT:       callq   baz
+; MFS-EH-SPLIT:       callq   bar
+; MFS-EH-SPLIT:       callq   qux
+entry:
+  invoke void @_Z1fv()
+          to label %try.cont unwind label %lpad, !prof !17
+
+lpad:
+  %1 = landingpad { ptr, i32 }
+          cleanup
+          catch ptr @_ZTIi
+  br i1 %0, label %2, label %4, !prof !17
+
+2:                                                ; preds = lpad
+  %3 = call i32 @bar()
+  br label %6
+
+4:                                                ; preds = lpad
+  %5 = call i32 @baz()
+  br label %6
+
+6:                                                ; preds = %4, %2
+  %7 = tail call i32 @qux()
+  br i1 %0, label %2, label %try.cont, !prof !17
+
+try.cont:                                        ; preds = %entry
+  %8 = call i32 @bam()
+  ret i32 %8
+}
+
 declare i32 @bar()
 declare i32 @baz()
 declare i32 @bam()


        


More information about the llvm-commits mailing list