[llvm] b7d9322 - [FS-AFDO] Load pseudo probe profile on MIR

Hongtao Yu via llvm-commits llvm-commits at lists.llvm.org
Wed May 10 11:34:09 PDT 2023


Author: Hongtao Yu
Date: 2023-05-10T11:29:37-07:00
New Revision: b7d9322b4963e620dfd12246816e6f7b2da5fd88

URL: https://github.com/llvm/llvm-project/commit/b7d9322b4963e620dfd12246816e6f7b2da5fd88
DIFF: https://github.com/llvm/llvm-project/commit/b7d9322b4963e620dfd12246816e6f7b2da5fd88.diff

LOG: [FS-AFDO] Load pseudo probe profile on MIR

This change enables loading pseudo-probe based profile on MIR. Different from the IR profile loader, callsites are excluded from MIR profile loading since they are not assinged a FS discriminator. Using zero as the discriminator is not accurate and would undo the distribution work done by the IR loader based on pseudo probe distribution factor. We reply on block probes only for FS profile loading.

Some refactoring is done to the IR profile loader so that `getProbeWeight` can be shared by both loaders.

Reviewed By: wenlei

Differential Revision: https://reviews.llvm.org/D148584

Added: 
    llvm/test/CodeGen/X86/Inputs/fsloader-probe.afdo
    llvm/test/CodeGen/X86/fsafdo_probe2.ll

Modified: 
    llvm/include/llvm/IR/PseudoProbe.h
    llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
    llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
    llvm/lib/CodeGen/MIRSampleProfile.cpp
    llvm/lib/IR/PseudoProbe.cpp
    llvm/lib/Transforms/IPO/SampleProfile.cpp
    llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
    llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h
index 60b1be169f87d..d29192276b3d6 100644
--- a/llvm/include/llvm/IR/PseudoProbe.h
+++ b/llvm/include/llvm/IR/PseudoProbe.h
@@ -21,6 +21,7 @@
 namespace llvm {
 
 class Instruction;
+class DILocation;
 
 constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc";
 
@@ -78,10 +79,22 @@ struct PseudoProbeDwarfDiscriminator {
   constexpr static uint8_t FullDistributionFactor = 100;
 };
 
+class PseudoProbeDescriptor {
+  uint64_t FunctionGUID;
+  uint64_t FunctionHash;
+
+public:
+  PseudoProbeDescriptor(uint64_t GUID, uint64_t Hash)
+      : FunctionGUID(GUID), FunctionHash(Hash) {}
+  uint64_t getFunctionGUID() const { return FunctionGUID; }
+  uint64_t getFunctionHash() const { return FunctionHash; }
+};
+
 struct PseudoProbe {
   uint32_t Id;
   uint32_t Type;
   uint32_t Attr;
+  uint32_t Discriminator;
   // Distribution factor that estimates the portion of the real execution count.
   // A saturated distribution factor stands for 1.0 or 100%. A pesudo probe has
   // a factor with the value ranged from 0.0 to 1.0.

diff  --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index bd511f7609820..601fe6ce8a2e2 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -40,16 +40,6 @@ using ProbeFactorMap = std::unordered_map<std::pair<uint64_t, uint64_t>, float,
                                           pair_hash<uint64_t, uint64_t>>;
 using FuncProbeFactorMap = StringMap<ProbeFactorMap>;
 
-class PseudoProbeDescriptor {
-  uint64_t FunctionGUID;
-  uint64_t FunctionHash;
-
-public:
-  PseudoProbeDescriptor(uint64_t GUID, uint64_t Hash)
-      : FunctionGUID(GUID), FunctionHash(Hash) {}
-  uint64_t getFunctionGUID() const { return FunctionGUID; }
-  uint64_t getFunctionHash() const { return FunctionHash; }
-};
 
 // A pseudo probe verifier that can be run after each IR passes to detect the
 // violation of updating probe factors. In principle, the sum of distribution
@@ -78,20 +68,6 @@ class PseudoProbeVerifier {
                           const ProbeFactorMap &ProbeFactors);
 };
 
-// This class serves sample counts correlation for SampleProfileLoader by
-// analyzing pseudo probes and their function descriptors injected by
-// SampleProfileProber.
-class PseudoProbeManager {
-  DenseMap<uint64_t, PseudoProbeDescriptor> GUIDToProbeDescMap;
-
-  const PseudoProbeDescriptor *getDesc(const Function &F) const;
-
-public:
-  PseudoProbeManager(const Module &M);
-  bool moduleIsProbed(const Module &M) const;
-  bool profileIsValid(const Function &F, const FunctionSamples &Samples) const;
-};
-
 /// Sample profile pseudo prober.
 ///
 /// Insert pseudo probes for block sampling and value sampling.

diff  --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index 502867da3c504..6e819c6568eab 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -34,6 +34,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/PseudoProbe.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/ProfileData/SampleProfReader.h"
 #include "llvm/Support/CommandLine.h"
@@ -80,6 +81,55 @@ template <> struct IRTraits<BasicBlock> {
 
 } // end namespace afdo_detail
 
+// This class serves sample counts correlation for SampleProfileLoader by
+// analyzing pseudo probes and their function descriptors injected by
+// SampleProfileProber.
+class PseudoProbeManager {
+  DenseMap<uint64_t, PseudoProbeDescriptor> GUIDToProbeDescMap;
+
+  const PseudoProbeDescriptor *getDesc(const Function &F) const {
+    auto I = GUIDToProbeDescMap.find(
+        Function::getGUID(FunctionSamples::getCanonicalFnName(F)));
+    return I == GUIDToProbeDescMap.end() ? nullptr : &I->second;
+  }
+
+public:
+  PseudoProbeManager(const Module &M) {
+    if (NamedMDNode *FuncInfo =
+            M.getNamedMetadata(PseudoProbeDescMetadataName)) {
+      for (const auto *Operand : FuncInfo->operands()) {
+        const auto *MD = cast<MDNode>(Operand);
+        auto GUID = mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))
+                        ->getZExtValue();
+        auto Hash = mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))
+                        ->getZExtValue();
+        GUIDToProbeDescMap.try_emplace(GUID, PseudoProbeDescriptor(GUID, Hash));
+      }
+    }
+  }
+
+  bool moduleIsProbed(const Module &M) const {
+    return M.getNamedMetadata(PseudoProbeDescMetadataName);
+  }
+
+  bool profileIsValid(const Function &F, const FunctionSamples &Samples) const {
+    const auto *Desc = getDesc(F);
+    if (!Desc) {
+      LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function "
+                        << F.getName() << "\n");
+      return false;
+    }
+    if (Desc->getFunctionHash() != Samples.getFunctionHash()) {
+      LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName()
+                        << "\n");
+      return false;
+    }
+    return true;
+  }
+};
+
+
+
 extern cl::opt<bool> SampleProfileUseProfi;
 
 template <typename BT> class SampleProfileLoaderBaseImpl {
@@ -137,6 +187,7 @@ template <typename BT> class SampleProfileLoaderBaseImpl {
   unsigned getFunctionLoc(FunctionT &Func);
   virtual ErrorOr<uint64_t> getInstWeight(const InstructionT &Inst);
   ErrorOr<uint64_t> getInstWeightImpl(const InstructionT &Inst);
+  virtual ErrorOr<uint64_t> getProbeWeight(const InstructionT &Inst);
   ErrorOr<uint64_t> getBlockWeight(const BasicBlockT *BB);
   mutable DenseMap<const DILocation *, const FunctionSamples *>
       DILocation2SampleMap;
@@ -212,6 +263,9 @@ template <typename BT> class SampleProfileLoaderBaseImpl {
   /// Profile reader object.
   std::unique_ptr<SampleProfileReader> Reader;
 
+  // A pseudo probe helper to correlate the imported sample counts.
+  std::unique_ptr<PseudoProbeManager> ProbeManager;
+
   /// Samples collected for the body of this function.
   FunctionSamples *Samples = nullptr;
 
@@ -299,6 +353,8 @@ void SampleProfileLoaderBaseImpl<BT>::printBlockWeight(
 template <typename BT>
 ErrorOr<uint64_t>
 SampleProfileLoaderBaseImpl<BT>::getInstWeight(const InstructionT &Inst) {
+  if (FunctionSamples::ProfileIsProbeBased)
+    return getProbeWeight(Inst);
   return getInstWeightImpl(Inst);
 }
 
@@ -346,6 +402,65 @@ SampleProfileLoaderBaseImpl<BT>::getInstWeightImpl(const InstructionT &Inst) {
   return R;
 }
 
+// Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
+// of non-probe instruction. So if all instructions of the BB give error_code,
+// tell the inference algorithm to infer the BB weight.
+template <typename BT>
+ErrorOr<uint64_t>
+SampleProfileLoaderBaseImpl<BT>::getProbeWeight(const InstructionT &Inst) {
+  assert(FunctionSamples::ProfileIsProbeBased &&
+         "Profile is not pseudo probe based");
+  std::optional<PseudoProbe> Probe = extractProbe(Inst);
+  // Ignore the non-probe instruction. If none of the instruction in the BB is
+  // probe, we choose to infer the BB's weight.
+  if (!Probe)
+    return std::error_code();
+
+  const FunctionSamples *FS = findFunctionSamples(Inst);
+  // If none of the instruction has FunctionSample, we choose to return zero
+  // value sample to indicate the BB is cold. This could happen when the
+  // instruction is from inlinee and no profile data is found.
+  // FIXME: This should not be affected by the source drift issue as 1) if the
+  // newly added function is top-level inliner, it won't match the CFG checksum
+  // in the function profile or 2) if it's the inlinee, the inlinee should have
+  // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
+  // we can improve it by adding a switch for profile-sample-block-accurate for
+  // block level counts in the future.
+  if (!FS)
+    return 0;
+
+  auto R = FS->findSamplesAt(Probe->Id, Probe->Discriminator);
+  if (R) {
+    uint64_t Samples = R.get() * Probe->Factor;
+    bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
+    if (FirstMark) {
+      ORE->emit([&]() {
+        OptRemarkAnalysisT Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
+        Remark << "Applied " << ore::NV("NumSamples", Samples);
+        Remark << " samples from profile (ProbeId=";
+        Remark << ore::NV("ProbeId", Probe->Id);
+        if (Probe->Discriminator) {
+          Remark << ".";
+          Remark << ore::NV("Discriminator", Probe->Discriminator);
+        }
+        Remark << ", Factor=";
+        Remark << ore::NV("Factor", Probe->Factor);
+        Remark << ", OriginalSamples=";
+        Remark << ore::NV("OriginalSamples", R.get());
+        Remark << ")";
+        return Remark;
+      });
+    }
+    LLVM_DEBUG({dbgs() << "    " << Probe->Id;
+      if (Probe->Discriminator)
+        dbgs() << "." << Probe->Discriminator;
+      dbgs() << ":" << Inst << " - weight: " << R.get()
+             << " - factor: " << format("%0.2f", Probe->Factor) << ")\n";});
+    return Samples;
+  }
+  return R;
+}
+
 /// Compute the weight of a basic block.
 ///
 /// The weight of basic block \p BB is the maximum weight of all the

diff  --git a/llvm/lib/CodeGen/MIRSampleProfile.cpp b/llvm/lib/CodeGen/MIRSampleProfile.cpp
index 2eb29a7860eb7..6d8b3b8cabf6c 100644
--- a/llvm/lib/CodeGen/MIRSampleProfile.cpp
+++ b/llvm/lib/CodeGen/MIRSampleProfile.cpp
@@ -18,11 +18,13 @@
 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/PseudoProbe.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -30,6 +32,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
 #include "llvm/Transforms/Utils/SampleProfileLoaderBaseUtil.h"
+#include <optional>
 
 using namespace llvm;
 using namespace sampleprof;
@@ -92,6 +95,22 @@ extern cl::opt<GVDAGType> ViewBlockLayoutWithBFI;
 // Defined in Analysis/BlockFrequencyInfo.cpp:  -view-bfi-func-name=
 extern cl::opt<std::string> ViewBlockFreqFuncName;
 
+std::optional<PseudoProbe> extractProbe(const MachineInstr &MI) {
+  if (MI.isPseudoProbe()) {
+    PseudoProbe Probe;
+    Probe.Id = MI.getOperand(1).getImm();
+    Probe.Type = MI.getOperand(2).getImm();
+    Probe.Attr = MI.getOperand(3).getImm();
+    Probe.Factor = 1;
+    DILocation *DebugLoc = MI.getDebugLoc();
+    Probe.Discriminator = DebugLoc ? DebugLoc->getDiscriminator() : 0;
+    return Probe;
+  }
+
+  // Ignore callsite probes since they do not have FS discriminators.
+  return std::nullopt;
+}
+
 namespace afdo_detail {
 template <> struct IRTraits<MachineBasicBlock> {
   using InstructionT = MachineInstr;
@@ -167,6 +186,8 @@ class MIRProfileLoader final
 
   bool ProfileIsValid = true;
   ErrorOr<uint64_t> getInstWeight(const MachineInstr &MI) override {
+    if (FunctionSamples::ProfileIsProbeBased)
+      return getProbeWeight(MI);
     if (ImprovedFSDiscriminator && MI.isMetaInstruction())
       return std::error_code();
     return getInstWeightImpl(MI);
@@ -275,6 +296,14 @@ bool MIRProfileLoader::doInitialization(Module &M) {
   Reader->setModule(&M);
   ProfileIsValid = (Reader->read() == sampleprof_error::success);
 
+  // Load pseudo probe descriptors for probe-based function samples.
+  if (Reader->profileIsProbeBased()) {
+    ProbeManager = std::make_unique<PseudoProbeManager>(M);
+    if (!ProbeManager->moduleIsProbed(M)) {
+      return false;
+    }
+  }
+
   return true;
 }
 
@@ -285,8 +314,13 @@ bool MIRProfileLoader::runOnFunction(MachineFunction &MF) {
   if (!Samples || Samples->empty())
     return false;
 
-  if (getFunctionLoc(MF) == 0)
-    return false;
+  if (FunctionSamples::ProfileIsProbeBased) {
+    if (!ProbeManager->profileIsValid(MF.getFunction(), *Samples))
+      return false;
+  } else {
+    if (getFunctionLoc(MF) == 0)
+      return false;
+  }
 
   DenseSet<GlobalValue::GUID> InlinedGUIDs;
   bool Changed = computeAndPropagateWeights(MF, InlinedGUIDs);

diff  --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp
index f3802af26a614..b3dc5ae32023f 100644
--- a/llvm/lib/IR/PseudoProbe.cpp
+++ b/llvm/lib/IR/PseudoProbe.cpp
@@ -22,12 +22,8 @@ using namespace llvm;
 namespace llvm {
 
 std::optional<PseudoProbe>
-extractProbeFromDiscriminator(const Instruction &Inst) {
-  assert(isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst) &&
-         "Only call instructions should have pseudo probe encodes as their "
-         "Dwarf discriminators");
-  if (const DebugLoc &DLoc = Inst.getDebugLoc()) {
-    const DILocation *DIL = DLoc;
+extractProbeFromDiscriminator(const DILocation *DIL) {
+  if (DIL) {
     auto Discriminator = DIL->getDiscriminator();
     if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
       PseudoProbe Probe;
@@ -40,12 +36,23 @@ extractProbeFromDiscriminator(const Instruction &Inst) {
       Probe.Factor =
           PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) /
           (float)PseudoProbeDwarfDiscriminator::FullDistributionFactor;
+      Probe.Discriminator = 0;
       return Probe;
     }
   }
   return std::nullopt;
 }
 
+std::optional<PseudoProbe>
+extractProbeFromDiscriminator(const Instruction &Inst) {
+  assert(isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst) &&
+         "Only call instructions should have pseudo probe encodes as their "
+         "Dwarf discriminators");
+  if (const DebugLoc &DLoc = Inst.getDebugLoc())
+    return extractProbeFromDiscriminator(DLoc);
+  return std::nullopt;
+}
+
 std::optional<PseudoProbe> extractProbe(const Instruction &Inst) {
   if (const auto *II = dyn_cast<PseudoProbeInst>(&Inst)) {
     PseudoProbe Probe;
@@ -54,6 +61,11 @@ std::optional<PseudoProbe> extractProbe(const Instruction &Inst) {
     Probe.Attr = II->getAttributes()->getZExtValue();
     Probe.Factor = II->getFactor()->getZExtValue() /
                    (float)PseudoProbeFullDistributionFactor;
+    Probe.Discriminator = 0;
+    if (const DebugLoc &DLoc = Inst.getDebugLoc())
+      Probe.Discriminator = DLoc->getDiscriminator();
+    assert(Probe.Discriminator == 0 &&
+           "Unexpected non-zero FS-discriminator for IR pseudo probes");
     return Probe;
   }
 

diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 25ebff8ac1681..b06b9480bbc02 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -532,7 +532,6 @@ class SampleProfileLoader final
   bool runOnFunction(Function &F, ModuleAnalysisManager *AM);
   bool emitAnnotations(Function &F);
   ErrorOr<uint64_t> getInstWeight(const Instruction &I) override;
-  ErrorOr<uint64_t> getProbeWeight(const Instruction &I);
   const FunctionSamples *findCalleeFunctionSamples(const CallBase &I) const;
   const FunctionSamples *
   findFunctionSamples(const Instruction &I) const override;
@@ -628,9 +627,6 @@ class SampleProfileLoader final
   // External inline advisor used to replay inline decision from remarks.
   std::unique_ptr<InlineAdvisor> ExternalInlineAdvisor;
 
-  // A pseudo probe helper to correlate the imported sample counts.
-  std::unique_ptr<PseudoProbeManager> ProbeManager;
-
   // A helper to implement the sample profile matching algorithm.
   std::unique_ptr<SampleProfileMatcher> MatchingManager;
 
@@ -669,68 +665,6 @@ ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
   return getInstWeightImpl(Inst);
 }
 
-// Here use error_code to represent: 1) The dangling probe. 2) Ignore the weight
-// of non-probe instruction. So if all instructions of the BB give error_code,
-// tell the inference algorithm to infer the BB weight.
-ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
-  assert(FunctionSamples::ProfileIsProbeBased &&
-         "Profile is not pseudo probe based");
-  std::optional<PseudoProbe> Probe = extractProbe(Inst);
-  // Ignore the non-probe instruction. If none of the instruction in the BB is
-  // probe, we choose to infer the BB's weight.
-  if (!Probe)
-    return std::error_code();
-
-  const FunctionSamples *FS = findFunctionSamples(Inst);
-  // If none of the instruction has FunctionSample, we choose to return zero
-  // value sample to indicate the BB is cold. This could happen when the
-  // instruction is from inlinee and no profile data is found.
-  // FIXME: This should not be affected by the source drift issue as 1) if the
-  // newly added function is top-level inliner, it won't match the CFG checksum
-  // in the function profile or 2) if it's the inlinee, the inlinee should have
-  // a profile, otherwise it wouldn't be inlined. For non-probe based profile,
-  // we can improve it by adding a switch for profile-sample-block-accurate for
-  // block level counts in the future.
-  if (!FS)
-    return 0;
-
-  // For non-CS profile, If a direct call/invoke instruction is inlined in
-  // profile (findCalleeFunctionSamples returns non-empty result), but not
-  // inlined here, it means that the inlined callsite has no sample, thus the
-  // call instruction should have 0 count.
-  // For CS profile, the callsite count of previously inlined callees is
-  // populated with the entry count of the callees.
-  if (!FunctionSamples::ProfileIsCS)
-    if (const auto *CB = dyn_cast<CallBase>(&Inst))
-      if (!CB->isIndirectCall() && findCalleeFunctionSamples(*CB))
-        return 0;
-
-  const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
-  if (R) {
-    uint64_t Samples = R.get() * Probe->Factor;
-    bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
-    if (FirstMark) {
-      ORE->emit([&]() {
-        OptimizationRemarkAnalysis Remark(DEBUG_TYPE, "AppliedSamples", &Inst);
-        Remark << "Applied " << ore::NV("NumSamples", Samples);
-        Remark << " samples from profile (ProbeId=";
-        Remark << ore::NV("ProbeId", Probe->Id);
-        Remark << ", Factor=";
-        Remark << ore::NV("Factor", Probe->Factor);
-        Remark << ", OriginalSamples=";
-        Remark << ore::NV("OriginalSamples", R.get());
-        Remark << ")";
-        return Remark;
-      });
-    }
-    LLVM_DEBUG(dbgs() << "    " << Probe->Id << ":" << Inst
-                      << " - weight: " << R.get() << " - factor: "
-                      << format("%0.2f", Probe->Factor) << ")\n");
-    return Samples;
-  }
-  return R;
-}
-
 /// Get the FunctionSamples for a call instruction.
 ///
 /// The FunctionSamples of a call/invoke instruction \p Inst is the inlined

diff  --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 5097b619f0971..6ca794f9d38d4 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -166,47 +166,6 @@ void PseudoProbeVerifier::verifyProbeFactors(
   }
 }
 
-PseudoProbeManager::PseudoProbeManager(const Module &M) {
-  if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
-    for (const auto *Operand : FuncInfo->operands()) {
-      const auto *MD = cast<MDNode>(Operand);
-      auto GUID =
-          mdconst::dyn_extract<ConstantInt>(MD->getOperand(0))->getZExtValue();
-      auto Hash =
-          mdconst::dyn_extract<ConstantInt>(MD->getOperand(1))->getZExtValue();
-      GUIDToProbeDescMap.try_emplace(GUID, PseudoProbeDescriptor(GUID, Hash));
-    }
-  }
-}
-
-const PseudoProbeDescriptor *
-PseudoProbeManager::getDesc(const Function &F) const {
-  auto I = GUIDToProbeDescMap.find(
-      Function::getGUID(FunctionSamples::getCanonicalFnName(F)));
-  return I == GUIDToProbeDescMap.end() ? nullptr : &I->second;
-}
-
-bool PseudoProbeManager::moduleIsProbed(const Module &M) const {
-  return M.getNamedMetadata(PseudoProbeDescMetadataName);
-}
-
-bool PseudoProbeManager::profileIsValid(const Function &F,
-                                        const FunctionSamples &Samples) const {
-  const auto *Desc = getDesc(F);
-  if (!Desc) {
-    LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function " << F.getName()
-                      << "\n");
-    return false;
-  } else {
-    if (Desc->getFunctionHash() != Samples.getFunctionHash()) {
-      LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName()
-                        << "\n");
-      return false;
-    }
-  }
-  return true;
-}
-
 SampleProfileProber::SampleProfileProber(Function &Func,
                                          const std::string &CurModuleUniqueId)
     : F(&Func), CurModuleUniqueId(CurModuleUniqueId) {

diff  --git a/llvm/test/CodeGen/X86/Inputs/fsloader-probe.afdo b/llvm/test/CodeGen/X86/Inputs/fsloader-probe.afdo
new file mode 100644
index 0000000000000..07785a196d2ce
--- /dev/null
+++ b/llvm/test/CodeGen/X86/Inputs/fsloader-probe.afdo
@@ -0,0 +1,40 @@
+foo:884430:431
+ 1: 431
+ 2: 431
+ 2.2048: 19368
+ 4: 19332
+ 4.512: 24813
+ 4.4608: 20867
+ 4.9216: 19368
+ 4.491520: 24782
+ 5: 19332
+ 5.2560: 24813
+ 5.6144: 20867
+ 5.14336: 24782
+ 6: 0
+ 6.4608: 0
+ 6.15872: 26051
+ 6.98304: 25893
+ 7: 24465
+ 7.1024: 25581
+ 7.9216: 26128
+ 7.11264: 24371
+ 8: 0
+ 8.11776: 26128
+ 8.12288: 25581
+ 8.13824: 24371
+ 9: 24782
+ 9.7168: 19368
+ 9.10752: 20867
+ 9.14848: 24813
+ 10: 24782
+ 10.512: 24813
+ 10.4608: 20867
+ 10.9216: 19368
+ 11: 19368
+ 12: 19368
+ 13: 461
+ 14: 98698 bar:98698
+ 15: 51957 work:51957
+ 16: 76609 work:76609
+ !CFGChecksum: 844700110938769

diff  --git a/llvm/test/CodeGen/X86/fsafdo_probe2.ll b/llvm/test/CodeGen/X86/fsafdo_probe2.ll
new file mode 100644
index 0000000000000..2b02eed358d02
--- /dev/null
+++ b/llvm/test/CodeGen/X86/fsafdo_probe2.ll
@@ -0,0 +1,322 @@
+; RUN: llvm-profdata merge --sample -profile-isfs --extbinary -o %t.afdo %S/Inputs/fsloader-probe.afdo
+; RUN: llc -enable-fs-discriminator -fs-profile-file=%t.afdo -show-fs-branchprob -disable-ra-fsprofile-loader=false -disable-layout-fsprofile-loader=false < %s 2>&1 | FileCheck %s --check-prefix=LOADER
+;
+;;
+;; C source code for the test.
+;; Compiled with clang -O3 -g -fdebug-info-for-profiling -fpseudo-probe-for-profiling -mllvm --enable-fs-discriminator
+;; // A test case for loop unroll.
+;;
+;; __attribute__((noinline)) int bar(int i){
+;;   volatile int j;
+;;   j = i;
+;;   return j;
+;; }
+;;
+;; unsigned sum;
+;; __attribute__((noinline)) void work(int i){
+;;   if (sum % 7)
+;;     sum += i;
+;;   else
+;;     sum -= i;
+;; }
+;;
+;; __attribute__((noinline)) void foo(){
+;;   int i, j;
+;;   for (j = 0; j < 48; j++)
+;;     for (i = 0; i < 4; i++) {
+;;       int ii = bar(i+j*48);
+;;       if (ii % 2)
+;;         work(ii*2);
+;;       if (ii % 4)
+;;         work(ii*3);
+;;   }
+;; }
+;;
+;; int main() {
+;;   int i;
+;;   for (i = 0; i < 10000000; i++) {
+;;     foo();
+;;   }
+;; }
+;;
+;;
+
+;; Check that new branch probs are generated.
+
+; LOADER: Set branch fs prob: MBB (3 -> 5): unroll.c:22:12-->unroll.c:20:12 W=44114  0x30000000 / 0x80000000 = 37.50% --> 0x80000000 / 0x80000000 = 100.00%
+; LOADER: Set branch fs prob: MBB (3 -> 4): unroll.c:22:12 W=44114  0x50000000 / 0x80000000 = 62.50% --> 0x00000000 / 0x80000000 = 0.00%
+; LOADER: Set branch fs prob: MBB (9 -> 11): unroll.c:20:12-->unroll.c:22:12 W=44114  0x40000000 / 0x80000000 = 50.00% --> 0x80000000 / 0x80000000 = 100.00%
+; LOADER: Set branch fs prob: MBB (9 -> 10): unroll.c:20:12 W=44114  0x40000000 / 0x80000000 = 50.00% --> 0x00000000 / 0x80000000 = 0.00%
+; LOADER: Set branch fs prob: MBB (1 -> 3): unroll.c:20:12-->unroll.c:22:12 W=26128  0x34de9bd3 / 0x80000000 = 41.30% --> 0x80000000 / 0x80000000 = 100.00%
+; LOADER: Set branch fs prob: MBB (1 -> 2): unroll.c:20:12 W=26128  0x4b21642d / 0x80000000 = 58.70% --> 0x00000000 / 0x80000000 = 0.00%
+; LOADER: Set branch fs prob: MBB (5 -> 7): unroll.c:20:12-->unroll.c:22:12 W=26128  0x34693ef1 / 0x80000000 = 40.95% --> 0x0060917b / 0x80000000 = 0.29%
+; LOADER: Set branch fs prob: MBB (5 -> 6): unroll.c:20:12 W=26128  0x4b96c10f / 0x80000000 = 59.05% --> 0x7f9f6e85 / 0x80000000 = 99.71%
+; LOADER: Set branch fs prob: MBB (7 -> 9): unroll.c:22:12-->unroll.c:20:12 W=26128  0x34300cd0 / 0x80000000 = 40.77% --> 0x00000000 / 0x80000000 = 0.00%
+; LOADER: Set branch fs prob: MBB (7 -> 8): unroll.c:22:12 W=26128  0x4bcff330 / 0x80000000 = 59.23% --> 0x80000000 / 0x80000000 = 100.00%
+; LOADER: Set branch fs prob: MBB (11 -> 13): unroll.c:22:12-->unroll.c:20:12 W=26128  0x35c65cf7 / 0x80000000 = 42.01% --> 0x02ae02d2 / 0x80000000 = 2.09%
+; LOADER: Set branch fs prob: MBB (11 -> 12): unroll.c:22:12 W=26128  0x4a39a309 / 0x80000000 = 57.99% --> 0x7d51fd2e / 0x80000000 = 97.91%
+; LOADER: Set branch fs prob: MBB (13 -> 15): unroll.c:20:12-->unroll.c:22:12 W=26128  0x34de9bd3 / 0x80000000 = 41.30% --> 0x0126b8ac / 0x80000000 = 0.90%
+; LOADER: Set branch fs prob: MBB (13 -> 14): unroll.c:20:12 W=26128  0x4b21642d / 0x80000000 = 58.70% --> 0x7ed94754 / 0x80000000 = 99.10%
+; LOADER: Set branch fs prob: MBB (15 -> 17): unroll.c:22:12-->unroll.c:17:4 W=26128  0x3949278b / 0x80000000 = 44.75% --> 0x089b8337 / 0x80000000 = 6.72%
+; LOADER: Set branch fs prob: MBB (15 -> 16): unroll.c:22:12 W=26128  0x46b6d875 / 0x80000000 = 55.25% --> 0x77647cc9 / 0x80000000 = 93.28%
+
+
+
+target triple = "x86_64-unknown-linux-gnu"
+
+
+ at sum = dso_local local_unnamed_addr global i32 0, align 4, !dbg !0
+ at __llvm_fs_discriminator__ = weak_odr constant i1 true
+ at llvm.used = appending global [1 x ptr] [ptr @__llvm_fs_discriminator__], section "llvm.metadata"
+
+; Function Attrs: nofree noinline nounwind memory(inaccessiblemem: readwrite) uwtable
+declare dso_local i32 @bar(i32 noundef %i) local_unnamed_addr #0
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: mustprogress nofree noinline norecurse nosync nounwind willreturn memory(readwrite, argmem: none, inaccessiblemem: none) uwtable
+declare dso_local void @work(i32 noundef %i) local_unnamed_addr #3
+
+; Function Attrs: nofree noinline nounwind uwtable
+define dso_local void @foo() local_unnamed_addr #4 !dbg !47 {
+entry:
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg !59
+  call void @llvm.dbg.value(metadata i32 0, metadata !52, metadata !DIExpression()), !dbg !60
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg !61
+  br label %for.cond1.preheader, !dbg !63
+
+for.cond1.preheader:                              ; preds = %entry, %if.end9.3
+  %lsr.iv = phi i32 [ 3, %entry ], [ %lsr.iv.next, %if.end9.3 ]
+  call void @llvm.dbg.value(metadata i32 %lsr.iv, metadata !52, metadata !DIExpression(DW_OP_consts, 3, DW_OP_minus, DW_OP_consts, 48, DW_OP_div, DW_OP_stack_value)), !dbg !60
+  call void @llvm.dbg.value(metadata i32 0, metadata !51, metadata !DIExpression()), !dbg !60
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !65
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 5, i32 0, i64 -1), !dbg !67
+  %0 = add i32 %lsr.iv, -3, !dbg !65
+  call void @llvm.dbg.value(metadata i32 0, metadata !51, metadata !DIExpression()), !dbg !60
+  %call = tail call i32 @bar(i32 noundef %0), !dbg !68
+  call void @llvm.dbg.value(metadata i32 %call, metadata !53, metadata !DIExpression()), !dbg !70
+  %1 = and i32 %call, 1, !dbg !71
+  %tobool.not = icmp eq i32 %1, 0, !dbg !71
+  br i1 %tobool.not, label %if.end, label %if.then, !dbg !73
+
+if.then:                                          ; preds = %for.cond1.preheader
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1), !dbg !74
+  %mul4 = shl nsw i32 %call, 1, !dbg !75
+  tail call void @work(i32 noundef %mul4), !dbg !76
+  br label %if.end, !dbg !78
+
+if.end:                                           ; preds = %if.then, %for.cond1.preheader
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 7, i32 0, i64 -1), !dbg !79
+  %2 = and i32 %call, 3, !dbg !81
+  %tobool6.not = icmp eq i32 %2, 0, !dbg !81
+  br i1 %tobool6.not, label %if.end9, label %if.then7, !dbg !82
+
+if.then7:                                         ; preds = %if.end
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 8, i32 0, i64 -1), !dbg !83
+  %mul8 = mul nsw i32 %call, 3, !dbg !84
+  tail call void @work(i32 noundef %mul8), !dbg !85
+  br label %if.end9, !dbg !87
+
+if.end9:                                          ; preds = %if.then7, %if.end
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 9, i32 0, i64 -1), !dbg !88
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 10, i32 0, i64 -1), !dbg !89
+  call void @llvm.dbg.value(metadata i32 1, metadata !51, metadata !DIExpression()), !dbg !60
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !65
+  call void @llvm.dbg.value(metadata i32 1, metadata !51, metadata !DIExpression()), !dbg !60
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 5, i32 0, i64 -1), !dbg !67
+  %3 = add i32 %lsr.iv, -2, !dbg !68
+  %call.1 = tail call i32 @bar(i32 noundef %3), !dbg !68
+  call void @llvm.dbg.value(metadata i32 %call.1, metadata !53, metadata !DIExpression()), !dbg !70
+  %4 = and i32 %call.1, 1, !dbg !71
+  %tobool.not.1 = icmp eq i32 %4, 0, !dbg !71
+  br i1 %tobool.not.1, label %if.end.1, label %if.then.1, !dbg !73
+
+if.then.1:                                        ; preds = %if.end9
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1), !dbg !74
+  %mul4.1 = shl nsw i32 %call.1, 1, !dbg !75
+  tail call void @work(i32 noundef %mul4.1), !dbg !76
+  br label %if.end.1, !dbg !78
+
+if.end.1:                                         ; preds = %if.then.1, %if.end9
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 7, i32 0, i64 -1), !dbg !79
+  %5 = and i32 %call.1, 3, !dbg !81
+  %tobool6.not.1 = icmp eq i32 %5, 0, !dbg !81
+  br i1 %tobool6.not.1, label %if.end9.1, label %if.then7.1, !dbg !82
+
+if.then7.1:                                       ; preds = %if.end.1
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 8, i32 0, i64 -1), !dbg !83
+  %mul8.1 = mul nsw i32 %call.1, 3, !dbg !84
+  tail call void @work(i32 noundef %mul8.1), !dbg !85
+  br label %if.end9.1, !dbg !87
+
+if.end9.1:                                        ; preds = %if.then7.1, %if.end.1
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 9, i32 0, i64 -1), !dbg !88
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 10, i32 0, i64 -1), !dbg !89
+  call void @llvm.dbg.value(metadata i32 2, metadata !51, metadata !DIExpression()), !dbg !60
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !65
+  call void @llvm.dbg.value(metadata i32 2, metadata !51, metadata !DIExpression()), !dbg !60
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 5, i32 0, i64 -1), !dbg !67
+  %6 = add i32 %lsr.iv, -1, !dbg !68
+  %call.2 = tail call i32 @bar(i32 noundef %6), !dbg !68
+  call void @llvm.dbg.value(metadata i32 %call.2, metadata !53, metadata !DIExpression()), !dbg !70
+  %7 = and i32 %call.2, 1, !dbg !71
+  %tobool.not.2 = icmp eq i32 %7, 0, !dbg !71
+  br i1 %tobool.not.2, label %if.end.2, label %if.then.2, !dbg !73
+
+if.then.2:                                        ; preds = %if.end9.1
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1), !dbg !74
+  %mul4.2 = shl nsw i32 %call.2, 1, !dbg !75
+  tail call void @work(i32 noundef %mul4.2), !dbg !76
+  br label %if.end.2, !dbg !78
+
+if.end.2:                                         ; preds = %if.then.2, %if.end9.1
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 7, i32 0, i64 -1), !dbg !79
+  %8 = and i32 %call.2, 3, !dbg !81
+  %tobool6.not.2 = icmp eq i32 %8, 0, !dbg !81
+  br i1 %tobool6.not.2, label %if.end9.2, label %if.then7.2, !dbg !82
+
+if.then7.2:                                       ; preds = %if.end.2
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 8, i32 0, i64 -1), !dbg !83
+  %mul8.2 = mul nsw i32 %call.2, 3, !dbg !84
+  tail call void @work(i32 noundef %mul8.2), !dbg !85
+  br label %if.end9.2, !dbg !87
+
+if.end9.2:                                        ; preds = %if.then7.2, %if.end.2
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 9, i32 0, i64 -1), !dbg !88
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 10, i32 0, i64 -1), !dbg !89
+  call void @llvm.dbg.value(metadata i32 3, metadata !51, metadata !DIExpression()), !dbg !60
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !65
+  call void @llvm.dbg.value(metadata i32 3, metadata !51, metadata !DIExpression()), !dbg !60
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 5, i32 0, i64 -1), !dbg !67
+  %call.3 = tail call i32 @bar(i32 noundef %lsr.iv), !dbg !68
+  call void @llvm.dbg.value(metadata i32 %call.3, metadata !53, metadata !DIExpression()), !dbg !70
+  %9 = and i32 %call.3, 1, !dbg !71
+  %tobool.not.3 = icmp eq i32 %9, 0, !dbg !71
+  br i1 %tobool.not.3, label %if.end.3, label %if.then.3, !dbg !73
+
+if.then.3:                                        ; preds = %if.end9.2
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1), !dbg !74
+  %mul4.3 = shl nsw i32 %call.3, 1, !dbg !75
+  tail call void @work(i32 noundef %mul4.3), !dbg !76
+  br label %if.end.3, !dbg !78
+
+if.end.3:                                         ; preds = %if.then.3, %if.end9.2
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 7, i32 0, i64 -1), !dbg !79
+  %10 = and i32 %call.3, 3, !dbg !81
+  %tobool6.not.3 = icmp eq i32 %10, 0, !dbg !81
+  br i1 %tobool6.not.3, label %if.end9.3, label %if.then7.3, !dbg !82
+
+if.then7.3:                                       ; preds = %if.end.3
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 8, i32 0, i64 -1), !dbg !83
+  %mul8.3 = mul nsw i32 %call.3, 3, !dbg !84
+  tail call void @work(i32 noundef %mul8.3), !dbg !85
+  br label %if.end9.3, !dbg !87
+
+if.end9.3:                                        ; preds = %if.then7.3, %if.end.3
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 9, i32 0, i64 -1), !dbg !88
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 10, i32 0, i64 -1), !dbg !89
+  call void @llvm.dbg.value(metadata i32 4, metadata !51, metadata !DIExpression()), !dbg !60
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !65
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 11, i32 0, i64 -1), !dbg !90
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 12, i32 0, i64 -1), !dbg !92
+  call void @llvm.dbg.value(metadata i32 %lsr.iv, metadata !52, metadata !DIExpression(DW_OP_consts, 3, DW_OP_minus, DW_OP_consts, 48, DW_OP_div, DW_OP_consts, 1, DW_OP_plus, DW_OP_stack_value)), !dbg !60
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg !61
+  %lsr.iv.next = add nuw nsw i32 %lsr.iv, 48, !dbg !93
+  %exitcond.not = icmp eq i32 %lsr.iv.next, 2307, !dbg !93
+  br i1 %exitcond.not, label %for.end12, label %for.cond1.preheader, !dbg !63, !llvm.loop !95
+
+for.end12:                                        ; preds = %if.end9.3
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 13, i32 0, i64 -1), !dbg !99
+  ret void, !dbg !99
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #6
+
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.value(metadata, metadata, metadata) #7
+
+attributes #0 = { nofree noinline nounwind memory(inaccessiblemem: readwrite) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #3 = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(readwrite, argmem: none, inaccessiblemem: none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #4 = { nofree noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #5 = { nofree nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #6 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+attributes #7 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!6, !7, !8, !9}
+!llvm.ident = !{!10}
+!llvm.pseudo_probe_desc = !{!11, !12, !13, !14}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "sum", scope: !2, file: !3, line: 7, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 17.0.0 (https://github.com/llvm/llvm-project.git fb16df500443aa5129f4a5e4dc4d9dcac613a809)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!3 = !DIFile(filename: "unroll.c", directory: "/home/hoy/build/llvm-github", checksumkind: CSK_MD5, checksum: "11508da575b4d414f8b2f39cf4d90184")
+!4 = !{!0}
+!5 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!6 = !{i32 7, !"Dwarf Version", i32 5}
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{i32 1, !"wchar_size", i32 4}
+!9 = !{i32 7, !"uwtable", i32 2}
+!10 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git fb16df500443aa5129f4a5e4dc4d9dcac613a809)"}
+!11 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
+!12 = !{i64 9204417991963109735, i64 72617220756, !"work"}
+!13 = !{i64 6699318081062747564, i64 844700110938769, !"foo"}
+!14 = !{i64 -2624081020897602054, i64 281563657672557, !"main"}
+!18 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!47 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 15, type: !48, scopeLine: 15, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !50)
+!48 = !DISubroutineType(types: !49)
+!49 = !{null}
+!50 = !{!51, !52, !53}
+!51 = !DILocalVariable(name: "i", scope: !47, file: !3, line: 16, type: !18)
+!52 = !DILocalVariable(name: "j", scope: !47, file: !3, line: 16, type: !18)
+!53 = !DILocalVariable(name: "ii", scope: !54, file: !3, line: 19, type: !18)
+!54 = distinct !DILexicalBlock(scope: !55, file: !3, line: 18, column: 30)
+!55 = distinct !DILexicalBlock(scope: !56, file: !3, line: 18, column: 6)
+!56 = distinct !DILexicalBlock(scope: !57, file: !3, line: 18, column: 6)
+!57 = distinct !DILexicalBlock(scope: !58, file: !3, line: 17, column: 4)
+!58 = distinct !DILexicalBlock(scope: !47, file: !3, line: 17, column: 4)
+!59 = !DILocation(line: 17, column: 11, scope: !58)
+!60 = !DILocation(line: 0, scope: !47)
+!61 = !DILocation(line: 17, column: 16, scope: !62)
+!62 = !DILexicalBlockFile(scope: !57, file: !3, discriminator: 0)
+!63 = !DILocation(line: 17, column: 4, scope: !64)
+!64 = !DILexicalBlockFile(scope: !58, file: !3, discriminator: 1)
+!65 = !DILocation(line: 18, column: 18, scope: !66)
+!66 = !DILexicalBlockFile(scope: !55, file: !3, discriminator: 0)
+!67 = !DILocation(line: 19, column: 21, scope: !54)
+!68 = !DILocation(line: 19, column: 17, scope: !69)
+!69 = !DILexicalBlockFile(scope: !54, file: !3, discriminator: 186646647)
+!70 = !DILocation(line: 0, scope: !54)
+!71 = !DILocation(line: 20, column: 15, scope: !72)
+!72 = distinct !DILexicalBlock(scope: !54, file: !3, line: 20, column: 12)
+!73 = !DILocation(line: 20, column: 12, scope: !54)
+!74 = !DILocation(line: 21, column: 15, scope: !72)
+!75 = !DILocation(line: 21, column: 17, scope: !72)
+!76 = !DILocation(line: 21, column: 10, scope: !77)
+!77 = !DILexicalBlockFile(scope: !72, file: !3, discriminator: 186646655)
+!78 = !DILocation(line: 21, column: 10, scope: !72)
+!79 = !DILocation(line: 22, column: 12, scope: !80)
+!80 = distinct !DILexicalBlock(scope: !54, file: !3, line: 22, column: 12)
+!81 = !DILocation(line: 22, column: 15, scope: !80)
+!82 = !DILocation(line: 22, column: 12, scope: !54)
+!83 = !DILocation(line: 23, column: 15, scope: !80)
+!84 = !DILocation(line: 23, column: 17, scope: !80)
+!85 = !DILocation(line: 23, column: 10, scope: !86)
+!86 = !DILexicalBlockFile(scope: !80, file: !3, discriminator: 186646663)
+!87 = !DILocation(line: 23, column: 10, scope: !80)
+!88 = !DILocation(line: 24, column: 4, scope: !54)
+!89 = !DILocation(line: 18, column: 26, scope: !66)
+!90 = !DILocation(line: 24, column: 4, scope: !91)
+!91 = !DILexicalBlockFile(scope: !56, file: !3, discriminator: 0)
+!92 = !DILocation(line: 17, column: 25, scope: !62)
+!93 = !DILocation(line: 17, column: 18, scope: !94)
+!94 = !DILexicalBlockFile(scope: !57, file: !3, discriminator: 1)
+!95 = distinct !{!95, !96, !97, !98}
+!96 = !DILocation(line: 17, column: 4, scope: !58)
+!97 = !DILocation(line: 24, column: 4, scope: !58)
+!98 = !{!"llvm.loop.mustprogress"}
+!99 = !DILocation(line: 25, column: 2, scope: !47)

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll
index 54dbc1d50d419..34a494f702346 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll
@@ -1,6 +1,6 @@
 ; REQUIRES: x86_64-linux
 ; REQUIRES: asserts
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching.prof --salvage-stale-profile -S --debug-only=sample-profile 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-impl 2>&1 | FileCheck %s
 
 ; The profiled source code:
 


        


More information about the llvm-commits mailing list