[llvm] 65ef4d4 - [CodeGen] Part II of "Fine tune MachineFunctionSplitPass (MFS) for FSAFDO".

Han Shen via llvm-commits llvm-commits at lists.llvm.org
Tue Jul 11 22:41:52 PDT 2023


Author: Han Shen
Date: 2023-07-11T22:40:25-07:00
New Revision: 65ef4d43577dc39ea3eb003a0237e91665788a9c

URL: https://github.com/llvm/llvm-project/commit/65ef4d43577dc39ea3eb003a0237e91665788a9c
DIFF: https://github.com/llvm/llvm-project/commit/65ef4d43577dc39ea3eb003a0237e91665788a9c.diff

LOG: [CodeGen] Part II of "Fine tune MachineFunctionSplitPass (MFS) for FSAFDO".

This CL adds a new discriminator pass. Also adds a new sample profile
loading pass when MFS is enabled.

Differential Revision: https://reviews.llvm.org/D152577

Added: 
    llvm/test/CodeGen/X86/Inputs/fsloader-mfs.afdo
    llvm/test/CodeGen/X86/load-sample-profile-2.ll
    llvm/test/CodeGen/X86/load-sample-profile.ll

Modified: 
    llvm/lib/CodeGen/TargetPassConfig.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index 8ece4c764f6191..5d418c6d97bc0e 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -43,6 +43,7 @@
 #include "llvm/Support/SaveAndRestore.h"
 #include "llvm/Support/Threading.h"
 #include "llvm/Support/VirtualFileSystem.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Target/CGPassBuilderOption.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/Scalar.h"
@@ -172,12 +173,6 @@ static cl::opt<GlobalISelAbortMode> EnableGlobalISelAbort(
         clEnumValN(GlobalISelAbortMode::DisableWithDiag, "2",
                    "Disable the abort but emit a diagnostic on failure")));
 
-// An option that disables inserting FS-AFDO discriminators before emit.
-// This is mainly for debugging and tuning purpose.
-static cl::opt<bool>
-    FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden,
-                     cl::desc("Do not insert FS-AFDO discriminators before "
-                              "emit."));
 // Disable MIRProfileLoader before RegAlloc. This is for for debugging and
 // tuning purpose.
 static cl::opt<bool> DisableRAFSProfileLoader(
@@ -1225,14 +1220,6 @@ void TargetPassConfig::addMachinePasses() {
   addPass(&XRayInstrumentationID);
   addPass(&PatchableFunctionID);
 
-  if (EnableFSDiscriminator && !FSNoFinalDiscrim)
-    // Add FS discriminators here so that all the instruction duplicates
-    // in 
diff erent BBs get their own discriminators. With this, we can "sum"
-    // the SampleFDO counters instead of using MAX. This will improve the
-    // SampleFDO profile quality.
-    addPass(createMIRAddFSDiscriminatorsPass(
-        sampleprof::FSDiscriminatorPass::PassLast));
-
   addPreEmitPass();
 
   if (TM->Options.EnableIPRA)
@@ -1258,6 +1245,10 @@ void TargetPassConfig::addMachinePasses() {
       addPass(createMachineOutlinerPass(RunOnAllFunctions));
   }
 
+  if (EnableFSDiscriminator)
+    addPass(createMIRAddFSDiscriminatorsPass(
+        sampleprof::FSDiscriminatorPass::PassLast));
+
   // Machine function splitter uses the basic block sections feature. Both
   // cannot be enabled at the same time. Basic block sections takes precedence.
   // FIXME: In principle, BasicBlockSection::Labels and splitting can used
@@ -1270,6 +1261,20 @@ void TargetPassConfig::addMachinePasses() {
     addPass(llvm::createBasicBlockSectionsPass());
   } else if (TM->Options.EnableMachineFunctionSplitter ||
              EnableMachineFunctionSplitter) {
+    const std::string ProfileFile = getFSProfileFile(TM);
+    if (!ProfileFile.empty()) {
+      if (EnableFSDiscriminator) {
+        addPass(createMIRProfileLoaderPass(
+            ProfileFile, getFSRemappingFile(TM),
+            sampleprof::FSDiscriminatorPass::PassLast, nullptr));
+      } else {
+        // Sample profile is given, but FSDiscriminator is not
+        // enabled, this may result in performance regression.
+        WithColor::warning()
+            << "Using AutoFDO without FSDiscriminator for MFS may regress "
+               "performance.";
+      }
+    }
     addPass(createMachineFunctionSplitterPass());
   }
 

diff  --git a/llvm/test/CodeGen/X86/Inputs/fsloader-mfs.afdo b/llvm/test/CodeGen/X86/Inputs/fsloader-mfs.afdo
new file mode 100644
index 00000000000000..1f9faf5ddfa686
Binary files /dev/null and b/llvm/test/CodeGen/X86/Inputs/fsloader-mfs.afdo 
diff er

diff  --git a/llvm/test/CodeGen/X86/load-sample-profile-2.ll b/llvm/test/CodeGen/X86/load-sample-profile-2.ll
new file mode 100644
index 00000000000000..f608d22843d568
--- /dev/null
+++ b/llvm/test/CodeGen/X86/load-sample-profile-2.ll
@@ -0,0 +1,35 @@
+;;; MFS with sample profile fails when no -enable-fs-discriminator=true.
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -fs-profile-file=%S/Inputs/fsloader-mfs.afdo -split-machine-functions -enable-fs-discriminator=false 2>&1 | FileCheck %s --check-prefix=NODISCRIMINATOR
+; NODISCRIMINATOR: warning: Using AutoFDO without FSDiscriminator for MFS may regress performance.
+
+define void @foo4(i1 zeroext %0, i1 zeroext %1) nounwind {
+  br i1 %0, label %3, label %7
+
+3:
+  %4 = call i32 @bar()
+  br label %7
+
+5:
+  %6 = call i32 @baz()
+  br label %7
+
+7:
+  br i1 %1, label %8, label %10
+
+8:
+  %9 = call i32 @bam()
+  br label %12
+
+10:
+  %11 = call i32 @baz()
+  br label %12
+
+12:
+  %13 = tail call i32 @qux()
+  ret void
+}
+
+declare i32 @bar()
+declare i32 @baz()
+declare i32 @bam()
+declare i32 @qux()

diff  --git a/llvm/test/CodeGen/X86/load-sample-profile.ll b/llvm/test/CodeGen/X86/load-sample-profile.ll
new file mode 100644
index 00000000000000..2e3f8809d3882e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/load-sample-profile.ll
@@ -0,0 +1,61 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -debug-pass=Structure -enable-fs-discriminator=true -improved-fs-discriminator=true 2>&1 | FileCheck %s --check-prefix=NOPROFILE
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -debug-pass=Structure -fs-profile-file=%S/Inputs/fsloader-mfs.afdo -enable-fs-discriminator=true -improved-fs-discriminator=true 2>&1 | FileCheck %s --check-prefix=PROFILE-NOMFS
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -debug-pass=Structure -fs-profile-file=%S/Inputs/fsloader-mfs.afdo -split-machine-functions -enable-fs-discriminator=true -improved-fs-discriminator=true 2>&1 | FileCheck %s --check-prefix=PROFILE-MFS
+
+;; No profile is specified, no load passes.
+; NOPROFILE: Add FS discriminators in MIR
+; NO-NOPROFILE: SampleFDO loader in MIR
+; NOPROFILE: Add FS discriminators in MIR
+; NO-NOPROFILE: SampleFDO loader in MIR
+; NOPROFILE: Add FS discriminators in MIR
+; NO-NOPROFILE: SampleFDO loader in MIR
+
+;; Profile is specified, so we have first 2 load passes.
+; PROFILE-NOMFS: Add FS discriminators in MIR
+; PROFILE-NOMFS: SampleFDO loader in MIR
+; PROFILE-NOMFS: Add FS discriminators in MIR
+; PROFILE-NOMFS: SampleFDO loader in MIR
+; PROFILE-NOMFS: Add FS discriminators in MIR
+;; But mfs is not specified, so no "SampleFDO loader should be created"
+; NO-PROFILE-NOMFS: SampleFDO loader in MIR
+
+;; Profile is specified with mfs, so we have 3 load passes.
+; PROFILE-MFS: Add FS discriminators in MIR
+; PROFILE-MFS: SampleFDO loader in MIR
+; PROFILE-MFS: Add FS discriminators in MIR
+; PROFILE-MFS: SampleFDO loader in MIR
+; PROFILE-MFS: Add FS discriminators in MIR
+; PROFILE-MFS: SampleFDO loader in MIR
+; PROFILE-MFS: Machine Function Splitter Transformation
+
+define void @foo4(i1 zeroext %0, i1 zeroext %1) nounwind {
+  br i1 %0, label %3, label %7
+
+3:
+  %4 = call i32 @bar()
+  br label %7
+
+5:
+  %6 = call i32 @baz()
+  br label %7
+
+7:
+  br i1 %1, label %8, label %10
+
+8:
+  %9 = call i32 @bam()
+  br label %12
+
+10:
+  %11 = call i32 @baz()
+  br label %12
+
+12:
+  %13 = tail call i32 @qux()
+  ret void
+}
+
+declare i32 @bar()
+declare i32 @baz()
+declare i32 @bam()
+declare i32 @qux()


        


More information about the llvm-commits mailing list