[llvm] 5fdaaf7 - [SampleFDO] Flow Sensitive Sample FDO (FSAFDO) profile loader

Rong Xu via llvm-commits llvm-commits at lists.llvm.org
Wed Aug 18 18:44:46 PDT 2021


Author: Rong Xu
Date: 2021-08-18T18:37:35-07:00
New Revision: 5fdaaf7fd8f35ac9c9de50a45b09e29c7b0d48c4

URL: https://github.com/llvm/llvm-project/commit/5fdaaf7fd8f35ac9c9de50a45b09e29c7b0d48c4
DIFF: https://github.com/llvm/llvm-project/commit/5fdaaf7fd8f35ac9c9de50a45b09e29c7b0d48c4.diff

LOG: [SampleFDO] Flow Sensitive Sample FDO (FSAFDO) profile loader

This patch implements Flow Sensitive Sample FDO (FSAFDO) profile
loader. We have two profile loaders for FS profile,
one before RegAlloc and one before BlockPlacement.

To enable it, when -fprofile-sample-use=<profile> is specified,
add "-enable-fs-discriminator=true \
     -disable-ra-fsprofile-loader=false \
     -disable-layout-fsprofile-loader=false"
to turn on the FS profile loaders.

Differential Revision: https://reviews.llvm.org/D107878

Added: 
    llvm/include/llvm/CodeGen/MIRSampleProfile.h
    llvm/include/llvm/Support/PGOOptions.h
    llvm/lib/CodeGen/MIRSampleProfile.cpp
    llvm/test/CodeGen/X86/Inputs/fsloader.afdo

Modified: 
    clang/lib/CodeGen/BackendUtil.cpp
    llvm/include/llvm/CodeGen/MachineDominators.h
    llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
    llvm/include/llvm/CodeGen/Passes.h
    llvm/include/llvm/IR/DebugInfoMetadata.h
    llvm/include/llvm/InitializePasses.h
    llvm/include/llvm/Passes/PassBuilder.h
    llvm/include/llvm/Target/TargetMachine.h
    llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
    llvm/lib/CodeGen/CMakeLists.txt
    llvm/lib/CodeGen/TargetPassConfig.cpp
    llvm/lib/LTO/LTOBackend.cpp
    llvm/lib/Transforms/IPO/SampleProfile.cpp
    llvm/test/CodeGen/X86/fsafdo_test2.ll
    llvm/tools/opt/NewPMDriver.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp
index 9e4e40a36e056..404d2680fac35 100644
--- a/clang/lib/CodeGen/BackendUtil.cpp
+++ b/clang/lib/CodeGen/BackendUtil.cpp
@@ -1261,6 +1261,8 @@ void EmitAssemblyHelper::EmitAssemblyWithNewPassManager(
                           "", PGOOptions::NoAction, PGOOptions::CSIRInstr,
                           CodeGenOpts.DebugInfoForProfiling);
   }
+  if (TM)
+    TM->setPGOOption(PGOOpt);
 
   PipelineTuningOptions PTO;
   PTO.LoopUnrolling = CodeGenOpts.UnrollLoops;

diff  --git a/llvm/include/llvm/CodeGen/MIRSampleProfile.h b/llvm/include/llvm/CodeGen/MIRSampleProfile.h
new file mode 100644
index 0000000000000..7872daf3a54c6
--- /dev/null
+++ b/llvm/include/llvm/CodeGen/MIRSampleProfile.h
@@ -0,0 +1,81 @@
+//===----- MIRSampleProfile.h: SampleFDO Support in MIR ---*- c++ -*-------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the supoorting functions for machine level Sample FDO
+// loader. This is used in Flow Sensitive SampelFDO.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_MIRSAMPLEPROFILE_H
+#define LLVM_CODEGEN_MIRSAMPLEPROFILE_H
+
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
+#include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
+#include "llvm/CodeGen/MachinePostDominators.h"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Module.h"
+#include "llvm/InitializePasses.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
+#include "llvm/ProfileData/SampleProfReader.h"
+
+#include <cassert>
+
+namespace llvm {
+
+using namespace sampleprof;
+
+class MIRProfileLoader;
+class MIRProfileLoaderPass : public MachineFunctionPass {
+  MachineFunction *MF;
+  std::string ProfileFileName;
+  FSDiscriminatorPass P;
+  unsigned LowBit;
+  unsigned HighBit;
+
+public:
+  static char ID;
+  /// FS bits will only use the '1' bits in the Mask.
+  MIRProfileLoaderPass(std::string FileName = "",
+                       std::string RemappingFileName = "",
+                       FSDiscriminatorPass P = FSDiscriminatorPass::Pass1)
+      : MachineFunctionPass(ID), ProfileFileName(FileName), P(P),
+        MIRSampleLoader(
+            std::make_unique<MIRProfileLoader>(FileName, RemappingFileName)) {
+    LowBit = getFSPassBitBegin(P);
+    HighBit = getFSPassBitEnd(P);
+    assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
+  }
+
+  /// getMachineFunction - Return the last machine function computed.
+  const MachineFunction *getMachineFunction() const { return MF; }
+
+private:
+  void init(MachineFunction &MF);
+  bool runOnMachineFunction(MachineFunction &) override;
+  bool doInitialization(Module &M) override;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+
+  std::unique_ptr<MIRProfileLoader> MIRSampleLoader;
+  /// Hold the information of the basic block frequency.
+  MachineBlockFrequencyInfo *MBFI;
+};
+
+} // namespace llvm
+
+#endif // LLVM_CODEGEN_MIRSAMPLEPROFILE_H

diff  --git a/llvm/include/llvm/CodeGen/MachineDominators.h b/llvm/include/llvm/CodeGen/MachineDominators.h
index 46bf73cdd7b63..00bfa1a269456 100644
--- a/llvm/include/llvm/CodeGen/MachineDominators.h
+++ b/llvm/include/llvm/CodeGen/MachineDominators.h
@@ -112,6 +112,12 @@ class MachineDominatorTree : public MachineFunctionPass {
     return DT->dominates(A, B);
   }
 
+  void getDescendants(MachineBasicBlock *A,
+                      SmallVectorImpl<MachineBasicBlock *> &Result) {
+    applySplitCriticalEdges();
+    DT->getDescendants(A, Result);
+  }
+
   bool dominates(const MachineBasicBlock *A, const MachineBasicBlock *B) const {
     applySplitCriticalEdges();
     return DT->dominates(A, B);

diff  --git a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
index 8cc5909c40b7f..285b858c96cb1 100644
--- a/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
+++ b/llvm/include/llvm/CodeGen/MachineOptimizationRemarkEmitter.h
@@ -118,6 +118,12 @@ class MachineOptimizationRemarkAnalysis : public DiagnosticInfoMIROptimization {
       : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis,
                                       PassName, RemarkName, Loc, MBB) {}
 
+  MachineOptimizationRemarkAnalysis(const char *PassName, StringRef RemarkName,
+                                    const MachineInstr *MI)
+      : DiagnosticInfoMIROptimization(DK_MachineOptimizationRemarkAnalysis,
+                                      PassName, RemarkName, MI->getDebugLoc(),
+                                      MI->getParent()) {}
+
   static bool classof(const DiagnosticInfo *DI) {
     return DI->getKind() == DK_MachineOptimizationRemarkAnalysis;
   }

diff  --git a/llvm/include/llvm/CodeGen/Passes.h b/llvm/include/llvm/CodeGen/Passes.h
index da1bab7189488..09aad65464400 100644
--- a/llvm/include/llvm/CodeGen/Passes.h
+++ b/llvm/include/llvm/CodeGen/Passes.h
@@ -171,6 +171,9 @@ namespace llvm {
   /// This pass adds flow sensitive discriminators.
   extern char &MIRAddFSDiscriminatorsID;
 
+  /// This pass reads flow sensitive profile.
+  extern char &MIRProfileLoaderPassID;
+
   /// FastRegisterAllocation Pass - This pass register allocates as fast as
   /// possible. It is best suited for debug code where live ranges are short.
   ///
@@ -513,6 +516,11 @@ namespace llvm {
   FunctionPass *
   createMIRAddFSDiscriminatorsPass(sampleprof::FSDiscriminatorPass P);
 
+  /// Read Flow Sensitive Profile.
+  FunctionPass *createMIRProfileLoaderPass(std::string File,
+                                           std::string RemappingFile,
+                                           sampleprof::FSDiscriminatorPass P);
+
   /// Creates MIR Debugify pass. \see MachineDebugify.cpp
   ModulePass *createDebugifyMachineModulePass();
 

diff  --git a/llvm/include/llvm/IR/DebugInfoMetadata.h b/llvm/include/llvm/IR/DebugInfoMetadata.h
index b59a7b3a46f3d..2f116ecc716ff 100644
--- a/llvm/include/llvm/IR/DebugInfoMetadata.h
+++ b/llvm/include/llvm/IR/DebugInfoMetadata.h
@@ -2212,7 +2212,8 @@ unsigned DILocation::getCopyIdentifier() const {
   return getCopyIdentifierFromDiscriminator(getDiscriminator());
 }
 
-Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D) const {
+Optional<const DILocation *>
+DILocation::cloneWithBaseDiscriminator(unsigned D) const {
   unsigned BD, DF, CI;
 
   if (EnableFSDiscriminator) {
@@ -2230,7 +2231,8 @@ Optional<const DILocation *> DILocation::cloneWithBaseDiscriminator(unsigned D)
   return None;
 }
 
-Optional<const DILocation *> DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
+Optional<const DILocation *>
+DILocation::cloneByMultiplyingDuplicationFactor(unsigned DF) const {
   assert(!EnableFSDiscriminator && "FSDiscriminator should not call this.");
 
   DF *= getDuplicationFactor();

diff  --git a/llvm/include/llvm/InitializePasses.h b/llvm/include/llvm/InitializePasses.h
index 365240de321ae..02e2e95dac944 100644
--- a/llvm/include/llvm/InitializePasses.h
+++ b/llvm/include/llvm/InitializePasses.h
@@ -64,6 +64,7 @@ void initializeAAEvalLegacyPassPass(PassRegistry&);
 void initializeAAResultsWrapperPassPass(PassRegistry&);
 void initializeADCELegacyPassPass(PassRegistry&);
 void initializeAddDiscriminatorsLegacyPassPass(PassRegistry&);
+void initializeAddFSDiscriminatorsPass(PassRegistry &);
 void initializeModuleAddressSanitizerLegacyPassPass(PassRegistry &);
 void initializeASanGlobalsMetadataWrapperPassPass(PassRegistry &);
 void initializeAddressSanitizerLegacyPassPass(PassRegistry &);
@@ -183,6 +184,7 @@ void initializeGlobalSplitPass(PassRegistry&);
 void initializeGlobalsAAWrapperPassPass(PassRegistry&);
 void initializeGuardWideningLegacyPassPass(PassRegistry&);
 void initializeHardwareLoopsPass(PassRegistry&);
+void initializeMIRProfileLoaderPassPass(PassRegistry &);
 void initializeMemProfilerLegacyPassPass(PassRegistry &);
 void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
 void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &);

diff  --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 9ab7bd4664f59..943ad316b082e 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -20,6 +20,7 @@
 #include "llvm/IR/PassManager.h"
 #include "llvm/Passes/OptimizationLevel.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/PGOOptions.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/IPO/Inliner.h"
 #include "llvm/Transforms/Instrumentation.h"
@@ -32,49 +33,6 @@ class AAManager;
 class TargetMachine;
 class ModuleSummaryIndex;
 
-/// A struct capturing PGO tunables.
-struct PGOOptions {
-  enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
-  enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
-  PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
-             std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
-             CSPGOAction CSAction = NoCSAction,
-             bool DebugInfoForProfiling = false,
-             bool PseudoProbeForProfiling = false)
-      : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
-        ProfileRemappingFile(ProfileRemappingFile), Action(Action),
-        CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
-                                                  (Action == SampleUse &&
-                                                   !PseudoProbeForProfiling)),
-        PseudoProbeForProfiling(PseudoProbeForProfiling) {
-    // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
-    // callback with IRUse action without ProfileFile.
-
-    // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
-    assert(this->CSAction == NoCSAction ||
-           (this->Action != IRInstr && this->Action != SampleUse));
-
-    // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
-    assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
-
-    // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
-    // a profile.
-    assert(this->CSAction != CSIRUse || this->Action == IRUse);
-
-    // If neither Action nor CSAction, DebugInfoForProfiling or
-    // PseudoProbeForProfiling needs to be true.
-    assert(this->Action != NoAction || this->CSAction != NoCSAction ||
-           this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
-  }
-  std::string ProfileFile;
-  std::string CSProfileGenFile;
-  std::string ProfileRemappingFile;
-  PGOAction Action;
-  CSPGOAction CSAction;
-  bool DebugInfoForProfiling;
-  bool PseudoProbeForProfiling;
-};
-
 /// Tunable parameters for passes in the default pipelines.
 class PipelineTuningOptions {
 public:

diff  --git a/llvm/include/llvm/Support/PGOOptions.h b/llvm/include/llvm/Support/PGOOptions.h
new file mode 100644
index 0000000000000..2141e2159c0c3
--- /dev/null
+++ b/llvm/include/llvm/Support/PGOOptions.h
@@ -0,0 +1,65 @@
+//===------ PGOOptions.h -- PGO option tunables ----------------*- C++ -*--===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Define option tunables for PGO.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_SUPPORT_PGOOPTIONS_H
+#define LLVM_SUPPORT_PGOOPTIONS_H
+
+#include "llvm/Support/Error.h"
+
+namespace llvm {
+
+/// A struct capturing PGO tunables.
+struct PGOOptions {
+  enum PGOAction { NoAction, IRInstr, IRUse, SampleUse };
+  enum CSPGOAction { NoCSAction, CSIRInstr, CSIRUse };
+  PGOOptions(std::string ProfileFile = "", std::string CSProfileGenFile = "",
+             std::string ProfileRemappingFile = "", PGOAction Action = NoAction,
+             CSPGOAction CSAction = NoCSAction,
+             bool DebugInfoForProfiling = false,
+             bool PseudoProbeForProfiling = false)
+      : ProfileFile(ProfileFile), CSProfileGenFile(CSProfileGenFile),
+        ProfileRemappingFile(ProfileRemappingFile), Action(Action),
+        CSAction(CSAction), DebugInfoForProfiling(DebugInfoForProfiling ||
+                                                  (Action == SampleUse &&
+                                                   !PseudoProbeForProfiling)),
+        PseudoProbeForProfiling(PseudoProbeForProfiling) {
+    // Note, we do allow ProfileFile.empty() for Action=IRUse LTO can
+    // callback with IRUse action without ProfileFile.
+
+    // If there is a CSAction, PGOAction cannot be IRInstr or SampleUse.
+    assert(this->CSAction == NoCSAction ||
+           (this->Action != IRInstr && this->Action != SampleUse));
+
+    // For CSIRInstr, CSProfileGenFile also needs to be nonempty.
+    assert(this->CSAction != CSIRInstr || !this->CSProfileGenFile.empty());
+
+    // If CSAction is CSIRUse, PGOAction needs to be IRUse as they share
+    // a profile.
+    assert(this->CSAction != CSIRUse || this->Action == IRUse);
+
+    // If neither Action nor CSAction, DebugInfoForProfiling or
+    // PseudoProbeForProfiling needs to be true.
+    assert(this->Action != NoAction || this->CSAction != NoCSAction ||
+           this->DebugInfoForProfiling || this->PseudoProbeForProfiling);
+  }
+  std::string ProfileFile;
+  std::string CSProfileGenFile;
+  std::string ProfileRemappingFile;
+  PGOAction Action;
+  CSPGOAction CSAction;
+  bool DebugInfoForProfiling;
+  bool PseudoProbeForProfiling;
+};
+} // namespace llvm
+
+#endif

diff  --git a/llvm/include/llvm/Target/TargetMachine.h b/llvm/include/llvm/Target/TargetMachine.h
index dd17af4a642ac..9e8853853aa26 100644
--- a/llvm/include/llvm/Target/TargetMachine.h
+++ b/llvm/include/llvm/Target/TargetMachine.h
@@ -13,6 +13,7 @@
 #ifndef LLVM_TARGET_TARGETMACHINE_H
 #define LLVM_TARGET_TARGETMACHINE_H
 
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Triple.h"
 #include "llvm/IR/DataLayout.h"
@@ -20,6 +21,7 @@
 #include "llvm/Pass.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/PGOOptions.h"
 #include "llvm/Target/CGPassBuilderOption.h"
 #include "llvm/Target/TargetOptions.h"
 #include <string>
@@ -110,6 +112,9 @@ class TargetMachine {
   unsigned RequireStructuredCFG : 1;
   unsigned O0WantsFastISel : 1;
 
+  // PGO related tunables.
+  Optional<PGOOptions> PGOOption = None;
+
 public:
   const TargetOptions DefaultOptions;
   mutable TargetOptions Options;
@@ -303,6 +308,9 @@ class TargetMachine {
     return false;
   }
 
+  void setPGOOption(Optional<PGOOptions> PGOOpt) { PGOOption = PGOOpt; }
+  const Optional<PGOOptions> &getPGOOption() const { return PGOOption; }
+
   /// If the specified generic pointer could be assumed as a pointer to a
   /// specific address space, return that address space.
   ///

diff  --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index e0759d359dbe7..2a510e69cf45f 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -56,15 +56,20 @@ template <> struct IRTraits<BasicBlock> {
   using FunctionT = Function;
   using BlockFrequencyInfoT = BlockFrequencyInfo;
   using LoopT = Loop;
-  using LoopInfoT = LoopInfo;
+  using LoopInfoPtrT = std::unique_ptr<LoopInfo>;
+  using DominatorTreePtrT = std::unique_ptr<DominatorTree>;
+  using PostDominatorTreeT = PostDominatorTree;
+  using PostDominatorTreePtrT = std::unique_ptr<PostDominatorTree>;
   using OptRemarkEmitterT = OptimizationRemarkEmitter;
   using OptRemarkAnalysisT = OptimizationRemarkAnalysis;
-  using DominatorTreeT = DominatorTree;
-  using PostDominatorTreeT = PostDominatorTree;
+  using PredRangeT = pred_range;
+  using SuccRangeT = succ_range;
   static Function &getFunction(Function &F) { return F; }
   static const BasicBlock *getEntryBB(const Function *F) {
     return &F->getEntryBlock();
   }
+  static pred_range getPredecessors(BasicBlock *BB) { return predecessors(BB); }
+  static succ_range getSuccessors(BasicBlock *BB) { return successors(BB); }
 };
 
 } // end namespace afdo_detail
@@ -76,7 +81,8 @@ extern cl::opt<bool> NoWarnSampleUnused;
 
 template <typename BT> class SampleProfileLoaderBaseImpl {
 public:
-  SampleProfileLoaderBaseImpl(std::string Name) : Filename(Name) {}
+  SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName)
+      : Filename(Name), RemappingFilename(RemapName) {}
   void dump() { Reader->dump(); }
 
   using InstructionT = typename afdo_detail::IRTraits<BT>::InstructionT;
@@ -85,14 +91,19 @@ template <typename BT> class SampleProfileLoaderBaseImpl {
       typename afdo_detail::IRTraits<BT>::BlockFrequencyInfoT;
   using FunctionT = typename afdo_detail::IRTraits<BT>::FunctionT;
   using LoopT = typename afdo_detail::IRTraits<BT>::LoopT;
-  using LoopInfoT = typename afdo_detail::IRTraits<BT>::LoopInfoT;
+  using LoopInfoPtrT = typename afdo_detail::IRTraits<BT>::LoopInfoPtrT;
+  using DominatorTreePtrT =
+      typename afdo_detail::IRTraits<BT>::DominatorTreePtrT;
+  using PostDominatorTreePtrT =
+      typename afdo_detail::IRTraits<BT>::PostDominatorTreePtrT;
+  using PostDominatorTreeT =
+      typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
   using OptRemarkEmitterT =
       typename afdo_detail::IRTraits<BT>::OptRemarkEmitterT;
   using OptRemarkAnalysisT =
       typename afdo_detail::IRTraits<BT>::OptRemarkAnalysisT;
-  using DominatorTreeT = typename afdo_detail::IRTraits<BT>::DominatorTreeT;
-  using PostDominatorTreeT =
-      typename afdo_detail::IRTraits<BT>::PostDominatorTreeT;
+  using PredRangeT = typename afdo_detail::IRTraits<BT>::PredRangeT;
+  using SuccRangeT = typename afdo_detail::IRTraits<BT>::SuccRangeT;
 
   using BlockWeightMap = DenseMap<const BasicBlockT *, uint64_t>;
   using EquivalenceClassMap =
@@ -112,6 +123,12 @@ template <typename BT> class SampleProfileLoaderBaseImpl {
   const BasicBlockT *getEntryBB(const FunctionT *F) {
     return afdo_detail::IRTraits<BT>::getEntryBB(F);
   }
+  PredRangeT getPredecessors(BasicBlockT *BB) {
+    return afdo_detail::IRTraits<BT>::getPredecessors(BB);
+  }
+  SuccRangeT getSuccessors(BasicBlockT *BB) {
+    return afdo_detail::IRTraits<BT>::getSuccessors(BB);
+  }
 
   unsigned getFunctionLoc(FunctionT &Func);
   virtual ErrorOr<uint64_t> getInstWeight(const InstructionT &Inst);
@@ -129,12 +146,11 @@ template <typename BT> class SampleProfileLoaderBaseImpl {
   void findEquivalencesFor(BasicBlockT *BB1,
                            ArrayRef<BasicBlockT *> Descendants,
                            PostDominatorTreeT *DomTree);
-
   void propagateWeights(FunctionT &F);
   uint64_t visitEdge(Edge E, unsigned *NumUnknownEdges, Edge *UnknownEdge);
   void buildEdges(FunctionT &F);
   bool propagateThroughEdges(FunctionT &F, bool UpdateBlockCount);
-  void clearFunctionData();
+  void clearFunctionData(bool ResetDT = true);
   void computeDominanceAndLoopInfo(FunctionT &F);
   bool
   computeAndPropagateWeights(FunctionT &F,
@@ -168,9 +184,9 @@ template <typename BT> class SampleProfileLoaderBaseImpl {
   EquivalenceClassMap EquivalenceClass;
 
   /// Dominance, post-dominance and loop information.
-  std::unique_ptr<DominatorTreeT> DT;
-  std::unique_ptr<PostDominatorTreeT> PDT;
-  std::unique_ptr<LoopInfoT> LI;
+  DominatorTreePtrT DT;
+  PostDominatorTreePtrT PDT;
+  LoopInfoPtrT LI;
 
   /// Predecessors for each basic block in the CFG.
   BlockEdgeMap Predecessors;
@@ -190,6 +206,9 @@ template <typename BT> class SampleProfileLoaderBaseImpl {
   /// Name of the profile file to load.
   std::string Filename;
 
+  /// Name of the profile remapping file to load.
+  std::string RemappingFilename;
+
   /// Profile Summary Info computed from sample profile.
   ProfileSummaryInfo *PSI = nullptr;
 
@@ -199,15 +218,17 @@ template <typename BT> class SampleProfileLoaderBaseImpl {
 
 /// Clear all the per-function data used to load samples and propagate weights.
 template <typename BT>
-void SampleProfileLoaderBaseImpl<BT>::clearFunctionData() {
+void SampleProfileLoaderBaseImpl<BT>::clearFunctionData(bool ResetDT) {
   BlockWeights.clear();
   EdgeWeights.clear();
   VisitedBlocks.clear();
   VisitedEdges.clear();
   EquivalenceClass.clear();
-  DT = nullptr;
-  PDT = nullptr;
-  LI = nullptr;
+  if (ResetDT) {
+    DT = nullptr;
+    PDT = nullptr;
+    LI = nullptr;
+  }
   Predecessors.clear();
   Successors.clear();
   CoverageTracker.clear();
@@ -475,7 +496,7 @@ void SampleProfileLoaderBaseImpl<BT>::findEquivalenceClasses(FunctionT &F) {
     // class by making BB2's equivalence class be BB1.
     DominatedBBs.clear();
     DT->getDescendants(BB1, DominatedBBs);
-    findEquivalencesFor(BB1, DominatedBBs, PDT.get());
+    findEquivalencesFor(BB1, DominatedBBs, &*PDT);
 
     LLVM_DEBUG(printBlockEquivalence(dbgs(), BB1));
   }
@@ -692,7 +713,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
     SmallPtrSet<BasicBlockT *, 16> Visited;
     if (!Predecessors[B1].empty())
       llvm_unreachable("Found a stale predecessors list in a basic block.");
-    for (BasicBlockT *B2 : predecessors(B1))
+    for (auto *B2 : getPredecessors(B1))
       if (Visited.insert(B2).second)
         Predecessors[B1].push_back(B2);
 
@@ -700,7 +721,7 @@ void SampleProfileLoaderBaseImpl<BT>::buildEdges(FunctionT &F) {
     Visited.clear();
     if (!Successors[B1].empty())
       llvm_unreachable("Found a stale successors list in a basic block.");
-    for (BasicBlockT *B2 : successors(B1))
+    for (auto *B2 : getSuccessors(B1))
       if (Visited.insert(B2).second)
         Successors[B1].push_back(B2);
   }
@@ -911,12 +932,12 @@ unsigned SampleProfileLoaderBaseImpl<BT>::getFunctionLoc(FunctionT &F) {
 template <typename BT>
 void SampleProfileLoaderBaseImpl<BT>::computeDominanceAndLoopInfo(
     FunctionT &F) {
-  DT.reset(new DominatorTreeT);
+  DT.reset(new DominatorTree);
   DT->recalculate(F);
 
   PDT.reset(new PostDominatorTree(F));
 
-  LI.reset(new LoopInfoT);
+  LI.reset(new LoopInfo);
   LI->analyze(*DT);
 }
 

diff  --git a/llvm/lib/CodeGen/CMakeLists.txt b/llvm/lib/CodeGen/CMakeLists.txt
index 398253fbfce0c..605aee351ef76 100644
--- a/llvm/lib/CodeGen/CMakeLists.txt
+++ b/llvm/lib/CodeGen/CMakeLists.txt
@@ -108,6 +108,7 @@ add_llvm_component_library(LLVMCodeGen
   MachineTraceMetrics.cpp
   MachineVerifier.cpp
   MIRFSDiscriminator.cpp
+  MIRSampleProfile.cpp
   MIRYamlMapping.cpp
   ModuloSchedule.cpp
   MultiHazardRecognizer.cpp

diff  --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp
new file mode 100644
index 0000000000000..f4f8d138515ee
--- /dev/null
+++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -0,0 +1,335 @@
+//===-------- MIRSampleProfile.cpp: MIRSampleFDO (For FSAFDO) -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the implementation of the MIRSampleProfile loader, mainly
+// for flow sensitive SampleFDO.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MIRSampleProfile.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/IR/Function.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+
+using namespace llvm;
+using namespace sampleprof;
+using namespace llvm::sampleprofutil;
+using ProfileCount = Function::ProfileCount;
+
+#define DEBUG_TYPE "fs-profile-loader"
+
+static cl::opt<bool> ShowFSBranchProb(
+    "show-fs-branchprob", cl::Hidden, cl::init(false),
+    cl::desc("Print setting flow sensitive branch probabilities"));
+static cl::opt<unsigned> FSProfileDebugProbDiffThreshold(
+    "fs-profile-debug-prob-
diff -threshold", cl::init(10),
+    cl::desc("Only show debug message if the branch probility is greater than "
+             "this value (in percentage)."));
+
+static cl::opt<unsigned> FSProfileDebugBWThreshold(
+    "fs-profile-debug-bw-threshold", cl::init(10000),
+    cl::desc("Only show debug message if the source branch weight is greater "
+             " than this value."));
+
+static cl::opt<bool> ViewBFIBefore("fs-viewbfi-before", cl::Hidden,
+                                   cl::init(false),
+                                   cl::desc("View BFI before MIR loader"));
+static cl::opt<bool> ViewBFIAfter("fs-viewbfi-after", cl::Hidden,
+                                  cl::init(false),
+                                  cl::desc("View BFI after MIR loader"));
+
+char MIRProfileLoaderPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(MIRProfileLoaderPass, DEBUG_TYPE,
+                      "Load MIR Sample Profile",
+                      /* cfg = */ false, /* is_analysis = */ false)
+INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
+INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
+INITIALIZE_PASS_DEPENDENCY(MachineOptimizationRemarkEmitterPass)
+INITIALIZE_PASS_END(MIRProfileLoaderPass, DEBUG_TYPE, "Load MIR Sample Profile",
+                    /* cfg = */ false, /* is_analysis = */ false)
+
+char &llvm::MIRProfileLoaderPassID = MIRProfileLoaderPass::ID;
+
+FunctionPass *llvm::createMIRProfileLoaderPass(std::string File,
+                                               std::string RemappingFile,
+                                               FSDiscriminatorPass P) {
+  return new MIRProfileLoaderPass(File, RemappingFile, P);
+}
+
+namespace llvm {
+
+// Internal option used to control BFI display only after MBP pass.
+// Defined in CodeGen/MachineBlockFrequencyInfo.cpp:
+// -view-block-layout-with-bfi={none | fraction | integer | count}
+extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
+
+// Command line option to specify the name of the function for CFG dump
+// Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=
+extern cl::opt<std::string> ViewBlockFreqFuncName;
+
+namespace afdo_detail {
+template <> struct IRTraits<MachineBasicBlock> {
+  using InstructionT = MachineInstr;
+  using BasicBlockT = MachineBasicBlock;
+  using FunctionT = MachineFunction;
+  using BlockFrequencyInfoT = MachineBlockFrequencyInfo;
+  using LoopT = MachineLoop;
+  using LoopInfoPtrT = MachineLoopInfo *;
+  using DominatorTreePtrT = MachineDominatorTree *;
+  using PostDominatorTreePtrT = MachinePostDominatorTree *;
+  using PostDominatorTreeT = MachinePostDominatorTree;
+  using OptRemarkEmitterT = MachineOptimizationRemarkEmitter;
+  using OptRemarkAnalysisT = MachineOptimizationRemarkAnalysis;
+  using PredRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
+  using SuccRangeT = iterator_range<std::vector<MachineBasicBlock *>::iterator>;
+  static Function &getFunction(MachineFunction &F) { return F.getFunction(); }
+  static const MachineBasicBlock *getEntryBB(const MachineFunction *F) {
+    return GraphTraits<const MachineFunction *>::getEntryNode(F);
+  }
+  static PredRangeT getPredecessors(MachineBasicBlock *BB) {
+    return BB->predecessors();
+  }
+  static SuccRangeT getSuccessors(MachineBasicBlock *BB) {
+    return BB->successors();
+  }
+};
+} // namespace afdo_detail
+
+class MIRProfileLoader final
+    : public SampleProfileLoaderBaseImpl<MachineBasicBlock> {
+public:
+  void setInitVals(MachineDominatorTree *MDT, MachinePostDominatorTree *MPDT,
+                   MachineLoopInfo *MLI, MachineBlockFrequencyInfo *MBFI,
+                   MachineOptimizationRemarkEmitter *MORE) {
+    DT = MDT;
+    PDT = MPDT;
+    LI = MLI;
+    BFI = MBFI;
+    ORE = MORE;
+  }
+  void setFSPass(FSDiscriminatorPass Pass) {
+    P = Pass;
+    LowBit = getFSPassBitBegin(P);
+    HighBit = getFSPassBitEnd(P);
+    assert(LowBit < HighBit && "HighBit needs to be greater than Lowbit");
+  }
+
+  MIRProfileLoader(StringRef Name, StringRef RemapName)
+      : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)) {
+  }
+
+  void setBranchProbs(MachineFunction &F);
+  bool runOnFunction(MachineFunction &F);
+  bool doInitialization(Module &M);
+  bool isValid() const { return ProfileIsValid; }
+
+protected:
+  friend class SampleCoverageTracker;
+
+  /// Hold the information of the basic block frequency.
+  MachineBlockFrequencyInfo *BFI;
+
+  /// PassNum is the sequence number this pass is called, start from 1.
+  FSDiscriminatorPass P;
+
+  // LowBit in the FS discriminator used by this instance. Note the number is
+  // 0-based. Base discrimnator use bit 0 to bit 11.
+  unsigned LowBit;
+  // HighwBit in the FS discriminator used by this instance. Note the number
+  // is 0-based.
+  unsigned HighBit;
+
+  bool ProfileIsValid = true;
+};
+
+template <>
+void SampleProfileLoaderBaseImpl<
+    MachineBasicBlock>::computeDominanceAndLoopInfo(MachineFunction &F) {}
+
+void MIRProfileLoader::setBranchProbs(MachineFunction &F) {
+  LLVM_DEBUG(dbgs() << "\nPropagation complete. Setting branch probs\n");
+  for (auto &BI : F) {
+    MachineBasicBlock *BB = &BI;
+    if (BB->succ_size() < 2)
+      continue;
+    const MachineBasicBlock *EC = EquivalenceClass[BB];
+    uint64_t BBWeight = BlockWeights[EC];
+    uint64_t SumEdgeWeight = 0;
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+                                          SE = BB->succ_end();
+         SI != SE; ++SI) {
+      MachineBasicBlock *Succ = *SI;
+      Edge E = std::make_pair(BB, Succ);
+      SumEdgeWeight += EdgeWeights[E];
+    }
+
+    if (BBWeight != SumEdgeWeight) {
+      LLVM_DEBUG(dbgs() << "BBweight is not equal to SumEdgeWeight: BBWWeight="
+                        << BBWeight << " SumEdgeWeight= " << SumEdgeWeight
+                        << "\n");
+      BBWeight = SumEdgeWeight;
+    }
+    if (BBWeight == 0) {
+      LLVM_DEBUG(dbgs() << "SKIPPED. All branch weights are zero.\n");
+      continue;
+    }
+
+#ifndef NDEBUG
+    uint64_t BBWeightOrig = BBWeight;
+#endif
+    uint32_t MaxWeight = std::numeric_limits<uint32_t>::max();
+    uint32_t Factor = 1;
+    if (BBWeight > MaxWeight) {
+      Factor = BBWeight / MaxWeight + 1;
+      BBWeight /= Factor;
+      LLVM_DEBUG(dbgs() << "Scaling weights by " << Factor << "\n");
+    }
+
+    for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+                                          SE = BB->succ_end();
+         SI != SE; ++SI) {
+      MachineBasicBlock *Succ = *SI;
+      Edge E = std::make_pair(BB, Succ);
+      uint64_t EdgeWeight = EdgeWeights[E];
+      EdgeWeight /= Factor;
+
+      assert(BBWeight >= EdgeWeight &&
+             "BBweight is larger than EdgeWeight -- should not happen.\n");
+
+      BranchProbability OldProb = BFI->getMBPI()->getEdgeProbability(BB, SI);
+      BranchProbability NewProb(EdgeWeight, BBWeight);
+      if (OldProb == NewProb)
+        continue;
+      BB->setSuccProbability(SI, NewProb);
+#ifndef NDEBUG
+      if (!ShowFSBranchProb)
+        continue;
+      bool Show = false;
+      BranchProbability Diff;
+      if (OldProb > NewProb)
+        Diff = OldProb - NewProb;
+      else
+        Diff = NewProb - OldProb;
+      Show = (Diff >= BranchProbability(FSProfileDebugProbDiffThreshold, 100));
+      Show &= (BBWeightOrig >= FSProfileDebugBWThreshold);
+
+      auto DIL = BB->findBranchDebugLoc();
+      auto SuccDIL = Succ->findBranchDebugLoc();
+      if (Show) {
+        dbgs() << "Set branch fs prob: MBB (" << BB->getNumber() << " -> "
+               << Succ->getNumber() << "): ";
+        if (DIL)
+          dbgs() << DIL->getFilename() << ":" << DIL->getLine() << ":"
+                 << DIL->getColumn();
+        if (SuccDIL)
+          dbgs() << "-->" << SuccDIL->getFilename() << ":" << SuccDIL->getLine()
+                 << ":" << SuccDIL->getColumn();
+        dbgs() << " W=" << BBWeightOrig << "  " << OldProb << " --> " << NewProb
+               << "\n";
+      }
+#endif
+    }
+  }
+}
+
+bool MIRProfileLoader::doInitialization(Module &M) {
+  auto &Ctx = M.getContext();
+
+  auto ReaderOrErr = sampleprof::SampleProfileReader::create(Filename, Ctx, P,
+                                                             RemappingFilename);
+  if (std::error_code EC = ReaderOrErr.getError()) {
+    std::string Msg = "Could not open profile: " + EC.message();
+    Ctx.diagnose(DiagnosticInfoSampleProfile(Filename, Msg));
+    return false;
+  }
+
+  Reader = std::move(ReaderOrErr.get());
+  Reader->setModule(&M);
+  ProfileIsValid = (Reader->read() == sampleprof_error::success);
+  Reader->getSummary();
+
+  return true;
+}
+
+bool MIRProfileLoader::runOnFunction(MachineFunction &MF) {
+  Function &Func = MF.getFunction();
+  clearFunctionData(false);
+  Samples = Reader->getSamplesFor(Func);
+  if (!Samples || Samples->empty())
+    return false;
+
+  if (getFunctionLoc(MF) == 0)
+    return false;
+
+  DenseSet<GlobalValue::GUID> InlinedGUIDs;
+  bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs);
+
+  // Set the new BPI, BFI.
+  setBranchProbs(MF);
+
+  return Changed;
+}
+
+} // namespace llvm
+
+bool MIRProfileLoaderPass::runOnMachineFunction(MachineFunction &MF) {
+  if (!MIRSampleLoader->isValid())
+    return false;
+
+  LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Func: "
+                    << MF.getFunction().getName() << "\n");
+  MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
+  MIRSampleLoader->setInitVals(
+      &getAnalysis<MachineDominatorTree>(),
+      &getAnalysis<MachinePostDominatorTree>(), &getAnalysis<MachineLoopInfo>(),
+      MBFI, &getAnalysis<MachineOptimizationRemarkEmitterPass>().getORE());
+
+  MF.RenumberBlocks();
+  if (ViewBFIBefore && ViewBlockLayoutWithBFI != GVDT_None &&
+      (ViewBlockFreqFuncName.empty() ||
+       MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+    MBFI->view("MIR_Prof_loader_b." + MF.getName(), false);
+  }
+
+  bool Changed = MIRSampleLoader->runOnFunction(MF);
+
+  if (ViewBFIAfter && ViewBlockLayoutWithBFI != GVDT_None &&
+      (ViewBlockFreqFuncName.empty() ||
+       MF.getFunction().getName().equals(ViewBlockFreqFuncName))) {
+    MBFI->view("MIR_prof_loader_a." + MF.getName(), false);
+  }
+
+  return Changed;
+}
+
+bool MIRProfileLoaderPass::doInitialization(Module &M) {
+  LLVM_DEBUG(dbgs() << "MIRProfileLoader pass working on Module " << M.getName()
+                    << "\n");
+
+  MIRSampleLoader->setFSPass(P);
+  return MIRSampleLoader->doInitialization(M);
+}
+
+void MIRProfileLoaderPass::getAnalysisUsage(AnalysisUsage &AU) const {
+  AU.setPreservesAll();
+  AU.addRequired<MachineBlockFrequencyInfo>();
+  AU.addRequired<MachineDominatorTree>();
+  AU.addRequired<MachinePostDominatorTree>();
+  AU.addRequiredTransitive<MachineLoopInfo>();
+  AU.addRequired<MachineOptimizationRemarkEmitterPass>();
+  MachineFunctionPass::getAnalysisUsage(AU);
+}

diff  --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index c31deceb9ae35..2a90c3154bb41 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -172,6 +172,24 @@ static cl::opt<bool>
     FSNoFinalDiscrim("fs-no-final-discrim", cl::init(false), cl::Hidden,
                      cl::desc("Do not insert FS-AFDO discriminators before "
                               "emit."));
+// Disable MIRProfileLoader before RegAlloc. This is for for debugging and
+// tuning purpose.
+static cl::opt<bool> DisableRAFSProfileLoader(
+    "disable-ra-fsprofile-loader", cl::init(true), cl::Hidden,
+    cl::desc("Disable MIRProfileLoader before RegAlloc"));
+// Disable MIRProfileLoader before BloackPlacement. This is for for debugging
+// and tuning purpose.
+static cl::opt<bool> DisableLayoutFSProfileLoader(
+    "disable-layout-fsprofile-loader", cl::init(true), cl::Hidden,
+    cl::desc("Disable MIRProfileLoader before BlockPlacement"));
+// Specify FSProfile file name.
+static cl::opt<std::string>
+    FSProfileFile("fs-profile-file", cl::init(""), cl::value_desc("filename"),
+                  cl::desc("Flow Sensitive profile file name."), cl::Hidden);
+// Specify Remapping file for FSProfile.
+static cl::opt<std::string> FSRemappingFile(
+    "fs-remapping-file", cl::init(""), cl::value_desc("filename"),
+    cl::desc("Flow Sensitive profile remapping file name."), cl::Hidden);
 
 // Temporary option to allow experimenting with MachineScheduler as a post-RA
 // scheduler. Targets can "properly" enable this with
@@ -308,6 +326,28 @@ static IdentifyingPassPtr overridePass(AnalysisID StandardID,
   return TargetID;
 }
 
+// Find the FSProfile file name. The internal option takes the precedence
+// before getting from TargetMachine.
+static const std::string getFSProfileFile(const TargetMachine *TM) {
+  if (!FSProfileFile.empty())
+    return FSProfileFile.getValue();
+  const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+  if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
+    return std::string();
+  return PGOOpt->ProfileFile;
+}
+
+// Find the Profile remapping file name. The internal option takes the
+// precedence before getting from TargetMachine.
+static const std::string getFSRemappingFile(const TargetMachine *TM) {
+  if (!FSRemappingFile.empty())
+    return FSRemappingFile.getValue();
+  const Optional<PGOOptions> &PGOOpt = TM->getPGOOption();
+  if (PGOOpt == None || PGOOpt->Action != PGOOptions::SampleUse)
+    return std::string();
+  return PGOOpt->ProfileRemappingFile;
+}
+
 //===---------------------------------------------------------------------===//
 /// TargetPassConfig
 //===---------------------------------------------------------------------===//
@@ -1115,9 +1155,15 @@ void TargetPassConfig::addMachinePasses() {
 
   // Add a FSDiscriminator pass right before RA, so that we could get
   // more precise SampleFDO profile for RA.
-  if (EnableFSDiscriminator)
+  if (EnableFSDiscriminator) {
     addPass(createMIRAddFSDiscriminatorsPass(
         sampleprof::FSDiscriminatorPass::Pass1));
+    const std::string ProfileFile = getFSProfileFile(TM);
+    if (!ProfileFile.empty() && !DisableRAFSProfileLoader)
+      addPass(
+          createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+                                     sampleprof::FSDiscriminatorPass::Pass1));
+  }
 
   // Run register allocation and passes that are tightly coupled with it,
   // including phi elimination and scheduling.
@@ -1471,9 +1517,15 @@ bool TargetPassConfig::addGCPasses() {
 
 /// Add standard basic block placement passes.
 void TargetPassConfig::addBlockPlacement() {
-  if (EnableFSDiscriminator)
+  if (EnableFSDiscriminator) {
     addPass(createMIRAddFSDiscriminatorsPass(
         sampleprof::FSDiscriminatorPass::Pass2));
+    const std::string ProfileFile = getFSProfileFile(TM);
+    if (!ProfileFile.empty() && !DisableLayoutFSProfileLoader)
+      addPass(
+          createMIRProfileLoaderPass(ProfileFile, getFSRemappingFile(TM),
+                                     sampleprof::FSDiscriminatorPass::Pass2));
+  }
   if (addPass(&MachineBlockPlacementID)) {
     // Run a separate pass to collect block placement statistics.
     if (EnableBlockPlacementStats)

diff  --git a/llvm/lib/LTO/LTOBackend.cpp b/llvm/lib/LTO/LTOBackend.cpp
index 6563af0d6c74b..73a48a7fcf223 100644
--- a/llvm/lib/LTO/LTOBackend.cpp
+++ b/llvm/lib/LTO/LTOBackend.cpp
@@ -230,6 +230,8 @@ static void runNewPMPasses(const Config &Conf, Module &Mod, TargetMachine *TM,
     PGOOpt = PGOOptions("", "", "", PGOOptions::NoAction,
                         PGOOptions::NoCSAction, true);
   }
+  if (TM)
+    TM->setPGOOption(PGOOpt);
 
   LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;

diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 8e9c79fc7bbba..e3e06a21ad561 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -358,10 +358,10 @@ class SampleProfileLoader final
       std::function<AssumptionCache &(Function &)> GetAssumptionCache,
       std::function<TargetTransformInfo &(Function &)> GetTargetTransformInfo,
       std::function<const TargetLibraryInfo &(Function &)> GetTLI)
-      : SampleProfileLoaderBaseImpl(std::string(Name)),
+      : SampleProfileLoaderBaseImpl(std::string(Name), std::string(RemapName)),
         GetAC(std::move(GetAssumptionCache)),
         GetTTI(std::move(GetTargetTransformInfo)), GetTLI(std::move(GetTLI)),
-        RemappingFilename(std::string(RemapName)), LTOPhase(LTOPhase) {}
+        LTOPhase(LTOPhase) {}
 
   bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
   bool runOnModule(Module &M, ModuleAnalysisManager *AM,
@@ -417,9 +417,6 @@ class SampleProfileLoader final
   /// Profile tracker for 
diff erent context.
   std::unique_ptr<SampleContextTracker> ContextTracker;
 
-  /// Name of the profile remapping file to load.
-  std::string RemappingFilename;
-
   /// Flag indicating whether input profile is context-sensitive
   bool ProfileIsCS = false;
 

diff  --git a/llvm/test/CodeGen/X86/Inputs/fsloader.afdo b/llvm/test/CodeGen/X86/Inputs/fsloader.afdo
new file mode 100644
index 0000000000000..debfcd3589579
--- /dev/null
+++ b/llvm/test/CodeGen/X86/Inputs/fsloader.afdo
@@ -0,0 +1,35 @@
+work:42380966:1346190
+ 1: 1246499
+ 5: 1246499
+foo:28798256:4267
+ 0: 4267
+ 2.1: 255999
+ 4: 264627 bar:250018
+ 4.512: 269485 bar:278102
+ 4.4608: 280297 bar:280933
+ 4.12288: 278916 bar:267752
+ 5: 264627
+ 5.4096: 269485
+ 5.8192: 260670
+ 5.8704: 278916
+ 6: 11541
+ 6.3584: 278916 work:284547
+ 6.4096: 260670 work:249428
+ 6.8704: 11541
+ 7: 272442
+ 7.512: 283590
+ 7.4608: 234082
+ 7.9728: 279149
+ 8: 11541
+ 8.11776: 283590 work:305061
+ 8.12288: 279149 work:281368
+ 8.13824: 234082 work:225786
+ 10: 4050
+bar:9504180:1076805
+ 2: 1056020
+ 3: 1056020
+main:20360:0
+ 0: 0
+ 2.1: 4045
+ 3: 4156 foo:4267
+ 5: 0

diff  --git a/llvm/test/CodeGen/X86/fsafdo_test2.ll b/llvm/test/CodeGen/X86/fsafdo_test2.ll
index 7695f3e373dc6..35e4d46f1cce3 100644
--- a/llvm/test/CodeGen/X86/fsafdo_test2.ll
+++ b/llvm/test/CodeGen/X86/fsafdo_test2.ll
@@ -1,4 +1,7 @@
 ; RUN: llc -enable-fs-discriminator < %s | FileCheck %s
+; RUN: llvm-profdata merge --sample -profile-isfs -o %t.afdo %S/Inputs/fsloader.afdo
+; RUN: llc -enable-fs-discriminator -fs-profile-file=%t.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefix=LOADER
+;
 ;;
 ;; C source code for the test (compiler at -O3):
 ;; // A test case for loop unroll.
@@ -50,6 +53,25 @@
 ; CHECK: .byte   1
 ; CHECK: .size   __llvm_fs_discriminator__, 1
 
+;; Check that new branch probs are generated.
+; LOADER: Set branch fs prob: MBB (1 -> 3): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93%
+; LOADER: Set branch fs prob: MBB (1 -> 2): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07%
+; LOADER: Set branch fs prob: MBB (3 -> 5): unroll.c:24:11-->unroll.c:22:11 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x7aca7894 / 0x80000000 = 95.93%
+; LOADER: Set branch fs prob: MBB (3 -> 4): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x0535876c / 0x80000000 = 4.07%
+; LOADER: Set branch fs prob: MBB (5 -> 8): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x021c112e / 0x80000000 = 1.65%
+; LOADER: Set branch fs prob: MBB (5 -> 7): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7de3eed2 / 0x80000000 = 98.35%
+; LOADER: Set branch fs prob: MBB (8 -> 10): unroll.c:24:11-->unroll.c:22:11 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x00000000 / 0x80000000 = 0.00%
+; LOADER: Set branch fs prob: MBB (8 -> 9): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x80000000 / 0x80000000 = 100.00%
+; LOADER: Set branch fs prob: MBB (10 -> 12): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x7aca7894 / 0x80000000 = 95.93%
+; LOADER: Set branch fs prob: MBB (10 -> 11): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0535876c / 0x80000000 = 4.07%
+; LOADER: Set branch fs prob: MBB (12 -> 14): unroll.c:24:11-->unroll.c:22:11 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x02012507 / 0x80000000 = 1.57%
+; LOADER: Set branch fs prob: MBB (12 -> 13): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x7dfedaf9 / 0x80000000 = 98.43%
+; LOADER: Set branch fs prob: MBB (14 -> 16): unroll.c:22:11-->unroll.c:24:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x0a5856e1 / 0x80000000 = 8.08%
+; LOADER: Set branch fs prob: MBB (14 -> 15): unroll.c:22:11 W=283590  0x40000000 / 0x80000000 = 50.00% --> 0x75a7a91f / 0x80000000 = 91.92%
+; LOADER: Set branch fs prob: MBB (16 -> 18): unroll.c:24:11-->unroll.c:19:3 W=283590  0x30000000 / 0x80000000 = 37.50% --> 0x16588166 / 0x80000000 = 17.46%
+; LOADER: Set branch fs prob: MBB (16 -> 17): unroll.c:24:11 W=283590  0x50000000 / 0x80000000 = 62.50% --> 0x69a77e9a / 0x80000000 = 82.54%
+
+
 target triple = "x86_64-unknown-linux-gnu"
 
 @sum = dso_local local_unnamed_addr global i32 0, align 4

diff  --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index d88636a4d784c..6c11da5648353 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -284,6 +284,9 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
       P->CSAction = PGOOptions::CSIRUse;
     }
   }
+  if (TM)
+    TM->setPGOOption(P);
+
   LoopAnalysisManager LAM;
   FunctionAnalysisManager FAM;
   CGSCCAnalysisManager CGAM;


        


More information about the llvm-commits mailing list