[llvm] 8df7596 - [CodeGen] Fine tune MachineFunctionSplitPass (MFS) for FSAFDO.

Han Shen via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 10 16:02:27 PDT 2023


Author: Han Shen
Date: 2023-07-10T16:00:30-07:00
New Revision: 8df75969ae7023b6ee527f9f9e0c8aaca02eee86

URL: https://github.com/llvm/llvm-project/commit/8df75969ae7023b6ee527f9f9e0c8aaca02eee86
DIFF: https://github.com/llvm/llvm-project/commit/8df75969ae7023b6ee527f9f9e0c8aaca02eee86.diff

LOG: [CodeGen] Fine tune MachineFunctionSplitPass (MFS) for FSAFDO.

The original MFS work D85368 shows good performance improvement with
Instrumented FDO. However, AutoFDO or Flow-Sensitive AutoFDO (FSAFDO)
does not show performance gain. This is mainly caused by a less
accurate profile compared to the iFDO profile.

For the past few months, we have been working to improve FSAFDO
quality, like in D145171. Taking advantage of this improvement, MFS
now shows performance improvements over FSAFDO profiles.

That being said, 2 minor changes need to be made, 1) An FS-AutoFDO
profile generation pass needs to be added right before MFS pass and an
FSAFDO profile load pass is needed when FS-AutoFDO is enabled and the
MFS flag is present. 2) MFS only applies to hot functions, because we
believe (and experiment also shows) FS-AutoFDO is more accurate about
functions that have plenty of samples than those with no or very few
samples.

With this improvement, we see a 1.2% performance improvement in clang
benchmark, 0.9% QPS improvement in our internal search benchmark, and
3%-5% improvement in internal storage benchmark.

This is #1 of the two patches that enables the improvement.

Reviewed By: wenlei, snehasish, xur

Differential Revision: https://reviews.llvm.org/D152399

Added: 
    

Modified: 
    llvm/lib/CodeGen/MachineFunctionSplitter.cpp
    llvm/test/CodeGen/X86/machine-function-splitter.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
index 1b939a518f1682..3a3b9a6e5e69a2 100644
--- a/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
+++ b/llvm/lib/CodeGen/MachineFunctionSplitter.cpp
@@ -24,6 +24,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/EHUtils.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/CodeGen/BasicBlockSectionUtils.h"
@@ -94,16 +96,26 @@ static void setDescendantEHBlocksCold(MachineFunction &MF) {
   }
 }
 
+static void finishAdjustingBasicBlocksAndLandingPads(MachineFunction &MF) {
+  auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
+    return X.getSectionID().Type < Y.getSectionID().Type;
+  };
+  llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
+  llvm::avoidZeroOffsetLandingPad(MF);
+}
+
 static bool isColdBlock(const MachineBasicBlock &MBB,
                         const MachineBlockFrequencyInfo *MBFI,
-                        ProfileSummaryInfo *PSI) {
+                        ProfileSummaryInfo *PSI, bool HasAccurateProfile) {
   std::optional<uint64_t> Count = MBFI->getBlockProfileCount(&MBB);
+  // If using accurate profile, no count means cold.
+  // If no accurate profile, no count means "do not judge
+  // coldness".
   if (!Count)
-    return true;
+    return HasAccurateProfile;
 
-  if (PercentileCutoff > 0) {
+  if (PercentileCutoff > 0)
     return PSI->isColdCountNthPercentile(PercentileCutoff, *Count);
-  }
   return (*Count < ColdCountThreshold);
 }
 
@@ -140,9 +152,28 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
 
   MachineBlockFrequencyInfo *MBFI = nullptr;
   ProfileSummaryInfo *PSI = nullptr;
+  // Whether this pass is using FSAFDO profile (not accurate) or IRPGO
+  // (accurate). HasAccurateProfile is only used when UseProfileData is true,
+  // but giving it a default value to silent any possible warning.
+  bool HasAccurateProfile = false;
   if (UseProfileData) {
     MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
     PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+    // "HasAccurateProfile" is false for FSAFDO, true when using IRPGO
+    // (traditional instrumented FDO) or CSPGO profiles.
+    HasAccurateProfile =
+        PSI->hasInstrumentationProfile() || PSI->hasCSInstrumentationProfile();
+    // If HasAccurateProfile is false, we only trust hot functions,
+    // which have many samples, and consider them as split
+    // candidates. On the other hand, if HasAccurateProfile (likeIRPGO), we
+    // trust both cold and hot functions.
+    if (!HasAccurateProfile && !PSI->isFunctionHotInCallGraph(&MF, *MBFI)) {
+      // Split all EH code and it's descendant statically by default.
+      if (SplitAllEHCode)
+        setDescendantEHBlocksCold(MF);
+      finishAdjustingBasicBlocksAndLandingPads(MF);
+      return true;
+    }
   }
 
   SmallVector<MachineBasicBlock *, 2> LandingPads;
@@ -152,7 +183,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
 
     if (MBB.isEHPad())
       LandingPads.push_back(&MBB);
-    else if (UseProfileData && isColdBlock(MBB, MBFI, PSI) && !SplitAllEHCode)
+    else if (UseProfileData &&
+             isColdBlock(MBB, MBFI, PSI, HasAccurateProfile) && !SplitAllEHCode)
       MBB.setSectionID(MBBSectionID::ColdSectionID);
   }
 
@@ -161,9 +193,10 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
     setDescendantEHBlocksCold(MF);
   // We only split out eh pads if all of them are cold.
   else {
+    // Here we have UseProfileData == true.
     bool HasHotLandingPads = false;
     for (const MachineBasicBlock *LP : LandingPads) {
-      if (!isColdBlock(*LP, MBFI, PSI))
+      if (!isColdBlock(*LP, MBFI, PSI, HasAccurateProfile))
         HasHotLandingPads = true;
     }
     if (!HasHotLandingPads) {
@@ -171,11 +204,8 @@ bool MachineFunctionSplitter::runOnMachineFunction(MachineFunction &MF) {
         LP->setSectionID(MBBSectionID::ColdSectionID);
     }
   }
-  auto Comparator = [](const MachineBasicBlock &X, const MachineBasicBlock &Y) {
-    return X.getSectionID().Type < Y.getSectionID().Type;
-  };
-  llvm::sortBasicBlocksAndUpdateBranches(MF, Comparator);
-  llvm::avoidZeroOffsetLandingPad(MF);
+
+  finishAdjustingBasicBlocksAndLandingPads(MF);
   return true;
 }
 

diff  --git a/llvm/test/CodeGen/X86/machine-function-splitter.ll b/llvm/test/CodeGen/X86/machine-function-splitter.ll
index 70f47037d118de..03c0f93b342d70 100644
--- a/llvm/test/CodeGen/X86/machine-function-splitter.ll
+++ b/llvm/test/CodeGen/X86/machine-function-splitter.ll
@@ -2,6 +2,10 @@
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=0 -mfs-count-threshold=2000 | FileCheck %s --dump-input=always -check-prefix=MFS-OPTS1
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-psi-cutoff=950000 | FileCheck %s -check-prefix=MFS-OPTS2
 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -split-machine-functions -mfs-split-ehcode | FileCheck %s -check-prefix=MFS-EH-SPLIT
+; RUN: sed 's/InstrProf/SampleProfile/g' %s > %t.ll
+; RUN: llc < %t.ll -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s --check-prefix=FSAFDO-MFS
+; RUN: llc < %t.ll -mtriple=x86_64-unknown-linux-gnu -split-machine-functions | FileCheck %s --check-prefix=FSAFDO-MFS2
+
 define void @foo1(i1 zeroext %0) nounwind !prof !14 !section_prefix !15 {
 ;; Check that cold block is moved to .text.split.
 ; MFS-DEFAULTS-LABEL: foo1
@@ -368,6 +372,66 @@ try.cont:                                        ; preds = %entry
   ret i32 %8
 }
 
+define void @foo14(i1 zeroext %0, i1 zeroext %1) nounwind !prof !24 {
+; FSAFDO-MFS: .section	.text.split.foo14,"ax"
+; FSAFDO-MFS: foo14.cold:
+  br i1 %0, label %3, label %7, !prof !25
+
+3:
+  %4 = call i32 @bar()
+  br label %7
+
+5:
+  %6 = call i32 @baz()
+  br label %7
+
+7:
+  br i1 %1, label %8, label %10, !prof !26
+
+8:
+  %9 = call i32 @bam()
+  br label %12
+
+10:
+  %11 = call i32 @baz()
+  br label %12
+
+12:
+  %13 = tail call i32 @qux()
+  ret void
+}
+
+define void @foo15(i1 zeroext %0, i1 zeroext %1) nounwind !prof !27 {
+;; HasAccurateProfile is false, foo15 is hot, but no profile data for
+;; blocks, no split should happen.
+; FSAFDO-MFS2-NOT: .section	.text.split.foo15,"ax"
+; FSAFDO-MFS2-NOT: foo15.cold:
+  br i1 %0, label %3, label %7
+
+3:
+  %4 = call i32 @bar()
+  br label %7
+
+5:
+  %6 = call i32 @baz()
+  br label %7
+
+7:
+  br i1 %1, label %8, label %10
+
+8:
+  %9 = call i32 @bam()
+  br label %12
+
+10:
+  %11 = call i32 @baz()
+  br label %12
+
+12:
+  %13 = tail call i32 @qux()
+  ret void
+}
+
 declare i32 @bar()
 declare i32 @baz()
 declare i32 @bam()
@@ -404,3 +468,7 @@ attributes #0 = { "implicit-section-name"="nosplit" }
 !21 = !{!"branch_weights", i32 6000, i32 4000}
 !22 = !{!"branch_weights", i32 80, i32 9920}
 !23 = !{!"function_entry_count", i64 7}
+!24 = !{!"function_entry_count", i64 10000}
+!25 = !{!"branch_weights", i32 0, i32 7000}
+!26 = !{!"branch_weights", i32 1000, i32 6000}
+!27 = !{!"function_entry_count", i64 10000}


        


More information about the llvm-commits mailing list