[llvm] 3d89b3c - [CSSPGO] Introducing distribution factor for pseudo probe.

Hongtao Yu via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 2 11:55:17 PST 2021


Author: Hongtao Yu
Date: 2021-02-02T11:55:01-08:00
New Revision: 3d89b3cbec230633e8228787819b15116c1a1730

URL: https://github.com/llvm/llvm-project/commit/3d89b3cbec230633e8228787819b15116c1a1730
DIFF: https://github.com/llvm/llvm-project/commit/3d89b3cbec230633e8228787819b15116c1a1730.diff

LOG: [CSSPGO] Introducing distribution factor for pseudo probe.

Sample re-annotation is required in LTO time to achieve a reasonable post-inline profile quality. However, we have seen that such LTO-time re-annotation degrades profile quality. This is mainly caused by preLTO code duplication that is done by passes such as loop unrolling, jump threading, indirect call promotion etc, where samples corresponding to a source location are aggregated multiple times due to the duplicates. In this change we are introducing a concept of distribution factor for pseudo probes so that samples can be distributed for duplicated probes scaled by a factor. We hope that optimizations duplicating code well-maintain the branch frequency information (BFI) based on which probe distribution factors are calculated. Distribution factors are updated at the end of preLTO pipeline to reflect an estimated portion of the real execution count.

This change also introduces a pseudo probe verifier that can be run after each IR passes to detect duplicated pseudo probes.

A saturated distribution factor stands for 1.0. A pesudo probe will carry a factor with the value ranged from 0.0 to 1.0. A 64-bit integral distribution factor field that represents [0.0, 1.0] is associated to each block probe. Unfortunately this cannot be done for callsite probes due to the size limitation of a 32-bit Dwarf discriminator. A 7-bit distribution factor is used instead.

Changes are also needed to the sample profile inliner to deal with prorated callsite counts. Call sites duplicated by PreLTO passes, when later on inlined in LTO time, should have the callees’s probe prorated based on the Prelink-computed distribution factors. The distribution factors should also be taken into account when computing hotness for inline candidates. Also, Indirect call promotion results in multiple callisites. The original samples should be distributed across them. This is fixed by adjusting the callisites' distribution factors.

Reviewed By: wmi

Differential Revision: https://reviews.llvm.org/D93264

Added: 
    llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
    llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
    llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll

Modified: 
    clang/test/CodeGen/pseudo-probe-emit.c
    llvm/include/llvm/IR/IntrinsicInst.h
    llvm/include/llvm/IR/Intrinsics.td
    llvm/include/llvm/IR/PseudoProbe.h
    llvm/include/llvm/Passes/StandardInstrumentations.h
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
    llvm/lib/IR/PseudoProbe.cpp
    llvm/lib/Passes/PassBuilder.cpp
    llvm/lib/Passes/PassRegistry.def
    llvm/lib/Passes/StandardInstrumentations.cpp
    llvm/lib/Transforms/IPO/SampleProfile.cpp
    llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
    llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
    llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
    llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
    llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll

Removed: 
    


################################################################################
diff  --git a/clang/test/CodeGen/pseudo-probe-emit.c b/clang/test/CodeGen/pseudo-probe-emit.c
index 059673b6992e..fccc8f04844d 100644
--- a/clang/test/CodeGen/pseudo-probe-emit.c
+++ b/clang/test/CodeGen/pseudo-probe-emit.c
@@ -6,12 +6,12 @@ void bar();
 void go();
 
 void foo(int x) {
-  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0)
+  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
   if (x == 0)
-    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0)
+    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1)
     bar();
   else
-    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0)
+    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1)
     go();
-  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0)
+  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
 }

diff  --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h
index 9d68f3fdde6c..df3a1d568756 100644
--- a/llvm/include/llvm/IR/IntrinsicInst.h
+++ b/llvm/include/llvm/IR/IntrinsicInst.h
@@ -981,12 +981,16 @@ class PseudoProbeInst : public IntrinsicInst {
     return cast<ConstantInt>(const_cast<Value *>(getArgOperand(0)));
   }
 
+  ConstantInt *getIndex() const {
+    return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+  }
+
   ConstantInt *getAttributes() const {
     return cast<ConstantInt>(const_cast<Value *>(getArgOperand(2)));
   }
 
-  ConstantInt *getIndex() const {
-    return cast<ConstantInt>(const_cast<Value *>(getArgOperand(1)));
+  ConstantInt *getFactor() const {
+    return cast<ConstantInt>(const_cast<Value *>(getArgOperand(3)));
   }
 };
 

diff  --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index dea20bb0583a..58f0f86cae63 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1299,7 +1299,7 @@ def int_sideeffect : DefaultAttrsIntrinsic<[], [], [IntrInaccessibleMemOnly, Int
 // Like the sideeffect intrinsic defined above, this intrinsic is treated by the 
 // optimizer as having opaque side effects so that it won't be get rid of or moved 
 // out of the block it probes.
-def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
+def int_pseudoprobe : Intrinsic<[], [llvm_i64_ty, llvm_i64_ty, llvm_i32_ty, llvm_i64_ty],
                                     [IntrInaccessibleMemOnly, IntrWillReturn]>;
 
 // Intrinsics to support half precision floating point format

diff  --git a/llvm/include/llvm/IR/PseudoProbe.h b/llvm/include/llvm/IR/PseudoProbe.h
index e0370c264102..5165e80caa2d 100644
--- a/llvm/include/llvm/IR/PseudoProbe.h
+++ b/llvm/include/llvm/IR/PseudoProbe.h
@@ -16,28 +16,39 @@
 #include "llvm/ADT/Optional.h"
 #include <cassert>
 #include <cstdint>
+#include <limits>
 
 namespace llvm {
 
 class Instruction;
+class BasicBlock;
 
 constexpr const char *PseudoProbeDescMetadataName = "llvm.pseudo_probe_desc";
 
 enum class PseudoProbeType { Block = 0, IndirectCall, DirectCall };
 
+// The saturated distrution factor representing 100% for block probes.
+constexpr static uint64_t PseudoProbeFullDistributionFactor =
+    std::numeric_limits<uint64_t>::max();
+
 struct PseudoProbeDwarfDiscriminator {
+public:
   // The following APIs encodes/decodes per-probe information to/from a
   // 32-bit integer which is organized as:
   //  [2:0] - 0x7, this is reserved for regular discriminator,
   //          see DWARF discriminator encoding rule
   //  [18:3] - probe id
-  //  [25:19] - reserved
+  //  [25:19] - probe distribution factor
   //  [28:26] - probe type, see PseudoProbeType
   //  [31:29] - reserved for probe attributes
-  static uint32_t packProbeData(uint32_t Index, uint32_t Type) {
+  static uint32_t packProbeData(uint32_t Index, uint32_t Type, uint32_t Flags,
+                                uint32_t Factor) {
     assert(Index <= 0xFFFF && "Probe index too big to encode, exceeding 2^16");
     assert(Type <= 0x7 && "Probe type too big to encode, exceeding 7");
-    return (Index << 3) | (Type << 26) | 0x7;
+    assert(Flags <= 0x7);
+    assert(Factor <= 100 &&
+           "Probe distribution factor too big to encode, exceeding 100");
+    return (Index << 3) | (Factor << 19) | (Type << 26) | 0x7;
   }
 
   static uint32_t extractProbeIndex(uint32_t Value) {
@@ -51,16 +62,26 @@ struct PseudoProbeDwarfDiscriminator {
   static uint32_t extractProbeAttributes(uint32_t Value) {
     return (Value >> 29) & 0x7;
   }
+
+  static uint32_t extractProbeFactor(uint32_t Value) {
+    return (Value >> 19) & 0x7F;
+  }
+
+  // The saturated distrution factor representing 100% for callsites.
+  constexpr static uint8_t FullDistributionFactor = 100;
 };
 
 struct PseudoProbe {
   uint32_t Id;
   uint32_t Type;
   uint32_t Attr;
+  float Factor;
 };
 
 Optional<PseudoProbe> extractProbe(const Instruction &Inst);
 
+void setProbeDistributionFactor(Instruction &Inst, float Factor);
+
 } // end namespace llvm
 
 #endif // LLVM_IR_PSEUDOPROBE_H

diff  --git a/llvm/include/llvm/Passes/StandardInstrumentations.h b/llvm/include/llvm/Passes/StandardInstrumentations.h
index 795a980878e2..61c86b0468f2 100644
--- a/llvm/include/llvm/Passes/StandardInstrumentations.h
+++ b/llvm/include/llvm/Passes/StandardInstrumentations.h
@@ -22,6 +22,7 @@
 #include "llvm/IR/PassTimingInfo.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/IPO/SampleProfileProbe.h"
 
 #include <string>
 #include <utility>
@@ -273,6 +274,7 @@ class StandardInstrumentations {
   OptBisectInstrumentation OptBisect;
   PreservedCFGCheckerInstrumentation PreservedCFGChecker;
   IRChangedPrinter PrintChangedIR;
+  PseudoProbeVerifier PseudoProbeVerification;
   VerifyInstrumentation Verify;
 
   bool VerifyEach;

diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 346bc4c81d86..25d5b2376c11 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -347,6 +347,16 @@ class SampleRecord {
     return SortedTargets;
   }
 
+  /// Prorate call targets by a distribution factor.
+  static const CallTargetMap adjustCallTargets(const CallTargetMap &Targets,
+                                               float DistributionFactor) {
+    CallTargetMap AdjustedTargets;
+    for (const auto &I : Targets) {
+      AdjustedTargets[I.first()] = I.second * DistributionFactor;
+    }
+    return AdjustedTargets;
+  }
+
   /// Merge the samples in \p Other into this record.
   /// Optionally scale sample counts by \p Weight.
   sampleprof_error merge(const SampleRecord &Other, uint64_t Weight = 1) {

diff  --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index 78117fd4a9c2..cab893b50d19 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -16,6 +16,10 @@
 #define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H
 
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/IR/PassInstrumentation.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/PseudoProbe.h"
 #include "llvm/ProfileData/SampleProf.h"
@@ -29,6 +33,8 @@ class Module;
 using namespace sampleprof;
 using BlockIdMap = std::unordered_map<BasicBlock *, uint32_t>;
 using InstructionIdMap = std::unordered_map<Instruction *, uint32_t>;
+using ProbeFactorMap = std::unordered_map<uint64_t, float>;
+using FuncProbeFactorMap = StringMap<ProbeFactorMap>;
 
 enum class PseudoProbeReservedId { Invalid = 0, Last = Invalid };
 
@@ -43,6 +49,33 @@ class PseudoProbeDescriptor {
   uint64_t getFunctionHash() const { return FunctionHash; }
 };
 
+// A pseudo probe verifier that can be run after each IR passes to detect the
+// violation of updating probe factors. In principle, the sum of distribution
+// factor for a probe should be identical before and after a pass. For a
+// function pass, the factor sum for a probe would be typically 100%.
+class PseudoProbeVerifier {
+public:
+  void registerCallbacks(PassInstrumentationCallbacks &PIC);
+
+  // Implementation of pass instrumentation callbacks for new pass manager.
+  void runAfterPass(StringRef PassID, Any IR);
+
+private:
+  // Allow a little bias due the rounding to integral factors.
+  constexpr static float DistributionFactorVariance = 0.02;
+  // Distribution factors from last pass.
+  FuncProbeFactorMap FunctionProbeFactors;
+
+  void collectProbeFactors(const BasicBlock *BB, ProbeFactorMap &ProbeFactors);
+  void runAfterPass(const Module *M);
+  void runAfterPass(const LazyCallGraph::SCC *C);
+  void runAfterPass(const Function *F);
+  void runAfterPass(const Loop *L);
+  bool shouldVerifyFunction(const Function *F);
+  void verifyProbeFactors(const Function *F,
+                          const ProbeFactorMap &ProbeFactors);
+};
+
 // This class serves sample counts correlation for SampleProfileLoader by
 // analyzing pseudo probes and their function descriptors injected by
 // SampleProfileProber.
@@ -102,5 +135,13 @@ class SampleProfileProbePass : public PassInfoMixin<SampleProfileProbePass> {
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 };
 
+class PseudoProbeUpdatePass : public PassInfoMixin<PseudoProbeUpdatePass> {
+  void runOnFunction(Function &F, FunctionAnalysisManager &FAM);
+
+public:
+  PseudoProbeUpdatePass() {}
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
+};
+
 } // end namespace llvm
 #endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEPROBE_H

diff  --git a/llvm/lib/IR/PseudoProbe.cpp b/llvm/lib/IR/PseudoProbe.cpp
index 804214f06e7a..80d2963938d4 100644
--- a/llvm/lib/IR/PseudoProbe.cpp
+++ b/llvm/lib/IR/PseudoProbe.cpp
@@ -35,6 +35,9 @@ Optional<PseudoProbe> extractProbeFromDiscriminator(const Instruction &Inst) {
           PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
       Probe.Attr =
           PseudoProbeDwarfDiscriminator::extractProbeAttributes(Discriminator);
+      Probe.Factor =
+          PseudoProbeDwarfDiscriminator::extractProbeFactor(Discriminator) /
+          (float)PseudoProbeDwarfDiscriminator::FullDistributionFactor;
       return Probe;
     }
   }
@@ -47,6 +50,8 @@ Optional<PseudoProbe> extractProbe(const Instruction &Inst) {
     Probe.Id = II->getIndex()->getZExtValue();
     Probe.Type = (uint32_t)PseudoProbeType::Block;
     Probe.Attr = II->getAttributes()->getZExtValue();
+    Probe.Factor = II->getFactor()->getZExtValue() /
+                   (float)PseudoProbeFullDistributionFactor;
     return Probe;
   }
 
@@ -55,4 +60,40 @@ Optional<PseudoProbe> extractProbe(const Instruction &Inst) {
 
   return None;
 }
+
+void setProbeDistributionFactor(Instruction &Inst, float Factor) {
+  assert(Factor >= 0 && Factor <= 1 &&
+         "Distribution factor must be in [0, 1.0]");
+  if (auto *II = dyn_cast<PseudoProbeInst>(&Inst)) {
+    IRBuilder<> Builder(&Inst);
+    uint64_t IntFactor = PseudoProbeFullDistributionFactor;
+    if (Factor < 1)
+      IntFactor *= Factor;
+    auto OrigFactor = II->getFactor()->getZExtValue();
+    if (IntFactor != OrigFactor)
+      II->replaceUsesOfWith(II->getFactor(), Builder.getInt64(IntFactor));
+  } else if (isa<CallBase>(&Inst) && !isa<IntrinsicInst>(&Inst)) {
+    if (const DebugLoc &DLoc = Inst.getDebugLoc()) {
+      const DILocation *DIL = DLoc;
+      auto Discriminator = DIL->getDiscriminator();
+      if (DILocation::isPseudoProbeDiscriminator(Discriminator)) {
+        auto Index =
+            PseudoProbeDwarfDiscriminator::extractProbeIndex(Discriminator);
+        auto Type =
+            PseudoProbeDwarfDiscriminator::extractProbeType(Discriminator);
+        auto Attr = PseudoProbeDwarfDiscriminator::extractProbeAttributes(
+            Discriminator);
+        // Round small factors to 0 to avoid over-counting.
+        uint32_t IntFactor =
+            PseudoProbeDwarfDiscriminator::FullDistributionFactor;
+        if (Factor < 1)
+          IntFactor *= Factor;
+        uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+            Index, Type, Attr, IntFactor);
+        DIL = DIL->cloneWithDiscriminator(V);
+        Inst.setDebugLoc(DIL);
+      }
+    }
+  }
+}
 } // namespace llvm

diff  --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 3af3ddf8ad43..f801bcd879d3 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -1428,6 +1428,9 @@ PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
   // Now add the optimization pipeline.
   MPM.addPass(buildModuleOptimizationPipeline(Level, LTOPreLink));
 
+  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+    MPM.addPass(PseudoProbeUpdatePass());
+
   // Emit annotation remarks.
   addAnnotationRemarksPass(MPM);
 
@@ -1482,6 +1485,9 @@ PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level) {
   if (PTO.Coroutines)
     MPM.addPass(createModuleToFunctionPassAdaptor(CoroCleanupPass()));
 
+  if (PGOOpt && PGOOpt->PseudoProbeForProfiling)
+    MPM.addPass(PseudoProbeUpdatePass());
+
   // Emit annotation remarks.
   addAnnotationRemarksPass(MPM);
 

diff  --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 860bfade733d..877cb9ed13b3 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -119,6 +119,7 @@ MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, f
 MODULE_PASS("sancov-module", ModuleSanitizerCoveragePass())
 MODULE_PASS("memprof-module", ModuleMemProfilerPass())
 MODULE_PASS("poison-checking", PoisonCheckingPass())
+MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass())
 #undef MODULE_PASS
 
 #ifndef CGSCC_ANALYSIS

diff  --git a/llvm/lib/Passes/StandardInstrumentations.cpp b/llvm/lib/Passes/StandardInstrumentations.cpp
index a8bfe02d4432..6795aed7b04e 100644
--- a/llvm/lib/Passes/StandardInstrumentations.cpp
+++ b/llvm/lib/Passes/StandardInstrumentations.cpp
@@ -882,6 +882,7 @@ void StandardInstrumentations::registerCallbacks(
   OptBisect.registerCallbacks(PIC);
   PreservedCFGChecker.registerCallbacks(PIC);
   PrintChangedIR.registerCallbacks(PIC);
+  PseudoProbeVerification.registerCallbacks(PIC);
   if (VerifyEach)
     Verify.registerCallbacks(PIC);
 }

diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 2cfefd3a18ea..b2a9127773c3 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -108,6 +108,8 @@ STATISTIC(NumCSNotInlined,
 STATISTIC(NumMismatchedProfile,
           "Number of functions with CFG mismatched profile");
 STATISTIC(NumMatchedProfile, "Number of functions with CFG matched profile");
+STATISTIC(NumDuplicatedInlinesite,
+          "Number of inlined callsites with a partial distribution factor");
 
 STATISTIC(NumCSInlinedHitMinLimit,
           "Number of functions with FDO inline stopped due to min size limit");
@@ -358,7 +360,14 @@ class GUIDToFuncNameMapper {
 struct InlineCandidate {
   CallBase *CallInstr;
   const FunctionSamples *CalleeSamples;
+  // Prorated callsite count, which will be used to guide inlining. For example,
+  // if a callsite is duplicated in LTO prelink, then in LTO postlink the two
+  // copies will get their own distribution factors and their prorated counts
+  // will be used to decide if they should be inlined independently.
   uint64_t CallsiteCount;
+  // Call site distribution factor to prorate the profile samples for a
+  // duplicated callsite. Default value is 1.0.
+  float CallsiteDistribution;
 };
 
 // Inline candidate comparer using call site weight
@@ -418,8 +427,8 @@ class SampleProfileLoader {
   const FunctionSamples *findFunctionSamples(const Instruction &I) const;
   // Attempt to promote indirect call and also inline the promoted call
   bool tryPromoteAndInlineCandidate(
-      Function &F, InlineCandidate &Candidate, uint64_t &Sum,
-      DenseSet<Instruction *> &PromotedInsns,
+      Function &F, InlineCandidate &Candidate, uint64_t SumOrigin,
+      uint64_t &Sum, DenseSet<Instruction *> &PromotedInsns,
       SmallVector<CallBase *, 8> *InlinedCallSites = nullptr);
   bool inlineHotFunctions(Function &F,
                           DenseSet<GlobalValue::GUID> &InlinedGUIDs);
@@ -886,7 +895,7 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
 
   const ErrorOr<uint64_t> &R = FS->findSamplesAt(Probe->Id, 0);
   if (R) {
-    uint64_t Samples = R.get();
+    uint64_t Samples = R.get() * Probe->Factor;
     bool FirstMark = CoverageTracker.markSamplesUsed(FS, Probe->Id, 0, Samples);
     if (FirstMark) {
       ORE->emit([&]() {
@@ -894,13 +903,17 @@ ErrorOr<uint64_t> SampleProfileLoader::getProbeWeight(const Instruction &Inst) {
         Remark << "Applied " << ore::NV("NumSamples", Samples);
         Remark << " samples from profile (ProbeId=";
         Remark << ore::NV("ProbeId", Probe->Id);
+        Remark << ", Factor=";
+        Remark << ore::NV("Factor", Probe->Factor);
+        Remark << ", OriginalSamples=";
+        Remark << ore::NV("OriginalSamples", R.get());
         Remark << ")";
         return Remark;
       });
     }
-
     LLVM_DEBUG(dbgs() << "    " << Probe->Id << ":" << Inst
-                      << " - weight: " << R.get() << ")\n");
+                      << " - weight: " << R.get() << " - factor: "
+                      << format("%0.2f", Probe->Factor) << ")\n");
     return Samples;
   }
   return R;
@@ -1085,7 +1098,7 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
 /// \param InlinedCallSite  Output vector for new call sites exposed after
 /// inlining.
 bool SampleProfileLoader::tryPromoteAndInlineCandidate(
-    Function &F, InlineCandidate &Candidate, uint64_t &Sum,
+    Function &F, InlineCandidate &Candidate, uint64_t SumOrigin, uint64_t &Sum,
     DenseSet<Instruction *> &PromotedInsns,
     SmallVector<CallBase *, 8> *InlinedCallSite) {
   const char *Reason = "Callee function not available";
@@ -1106,10 +1119,28 @@ bool SampleProfileLoader::tryPromoteAndInlineCandidate(
                                   Candidate.CallsiteCount, Sum, false, ORE);
     if (DI) {
       Sum -= Candidate.CallsiteCount;
+      // Prorate the indirect callsite distribution.
+      // Do not update the promoted direct callsite distribution at this
+      // point since the original distribution combined with the callee
+      // profile will be used to prorate callsites from the callee if
+      // inlined. Once not inlined, the direct callsite distribution should
+      // be prorated so that the it will reflect the real callsite counts.
+      setProbeDistributionFactor(*Candidate.CallInstr,
+                                 Candidate.CallsiteDistribution * Sum /
+                                     SumOrigin);
       PromotedInsns.insert(Candidate.CallInstr);
       Candidate.CallInstr = DI;
-      if (isa<CallInst>(DI) || isa<InvokeInst>(DI))
-        return tryInlineCandidate(Candidate, InlinedCallSite);
+      if (isa<CallInst>(DI) || isa<InvokeInst>(DI)) {
+        bool Inlined = tryInlineCandidate(Candidate, InlinedCallSite);
+        if (!Inlined) {
+          // Prorate the direct callsite distribution so that it reflects real
+          // callsite counts.
+          setProbeDistributionFactor(*DI, Candidate.CallsiteDistribution *
+                                              Candidate.CallsiteCount /
+                                              SumOrigin);
+        }
+        return Inlined;
+      }
     }
   } else {
     LLVM_DEBUG(dbgs() << "\nFailed to promote indirect call to "
@@ -1216,11 +1247,11 @@ bool SampleProfileLoader::inlineHotFunctions(
     }
     for (CallBase *I : CIS) {
       Function *CalledFunction = I->getCalledFunction();
-      InlineCandidate Candidate = {I,
-                                   LocalNotInlinedCallSites.count(I)
-                                       ? LocalNotInlinedCallSites[I]
-                                       : nullptr,
-                                   0 /* dummy count */};
+      InlineCandidate Candidate = {
+          I,
+          LocalNotInlinedCallSites.count(I) ? LocalNotInlinedCallSites[I]
+                                            : nullptr,
+          0 /* dummy count */, 1.0 /* dummy distribution factor */};
       // Do not inline recursive calls.
       if (CalledFunction == &F)
         continue;
@@ -1229,6 +1260,7 @@ bool SampleProfileLoader::inlineHotFunctions(
           continue;
         uint64_t Sum;
         for (const auto *FS : findIndirectCallFunctionSamples(*I, Sum)) {
+          uint64_t SumOrigin = Sum;
           if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
             FS->findInlinedFunctions(InlinedGUIDs, F.getParent(),
                                      PSI->getOrCompHotCountThreshold());
@@ -1237,8 +1269,9 @@ bool SampleProfileLoader::inlineHotFunctions(
           if (!callsiteIsHot(FS, PSI))
             continue;
 
-          Candidate = {I, FS, FS->getEntrySamples()};
-          if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns)) {
+          Candidate = {I, FS, FS->getEntrySamples(), 1.0};
+          if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+                                           PromotedInsns)) {
             LocalNotInlinedCallSites.erase(I);
             LocalChanged = true;
           }
@@ -1343,6 +1376,23 @@ bool SampleProfileLoader::tryInlineCandidate(
     if (ProfileIsCS)
       ContextTracker->markContextSamplesInlined(Candidate.CalleeSamples);
     ++NumCSInlined;
+
+    // Prorate inlined probes for a duplicated inlining callsite which probably
+    // has a distribution less than 100%. Samples for an inlinee should be
+    // distributed among the copies of the original callsite based on each
+    // callsite's distribution factor for counts accuracy. Note that an inlined
+    // probe may come with its own distribution factor if it has been duplicated
+    // in the inlinee body. The two factor are multiplied to reflect the
+    // aggregation of duplication.
+    if (Candidate.CallsiteDistribution < 1) {
+      for (auto &I : IFI.InlinedCallSites) {
+        if (Optional<PseudoProbe> Probe = extractProbe(*I))
+          setProbeDistributionFactor(*I, Probe->Factor *
+                                             Candidate.CallsiteDistribution);
+      }
+      NumDuplicatedInlinesite++;
+    }
+
     return true;
   }
   return false;
@@ -1360,14 +1410,19 @@ bool SampleProfileLoader::getInlineCandidate(InlineCandidate *NewCandidate,
   if (!CalleeSamples)
     return false;
 
+  float Factor = 1.0;
+  if (Optional<PseudoProbe> Probe = extractProbe(*CB))
+    Factor = Probe->Factor;
+
   uint64_t CallsiteCount = 0;
   ErrorOr<uint64_t> Weight = getBlockWeight(CB->getParent());
   if (Weight)
     CallsiteCount = Weight.get();
   if (CalleeSamples)
-    CallsiteCount = std::max(CallsiteCount, CalleeSamples->getEntrySamples());
+    CallsiteCount = std::max(
+        CallsiteCount, uint64_t(CalleeSamples->getEntrySamples() * Factor));
 
-  *NewCandidate = {CB, CalleeSamples, CallsiteCount};
+  *NewCandidate = {CB, CalleeSamples, CallsiteCount, Factor};
   return true;
 }
 
@@ -1479,6 +1534,7 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
       uint64_t Sum;
       auto CalleeSamples = findIndirectCallFunctionSamples(*I, Sum);
       uint64_t SumOrigin = Sum;
+      Sum *= Candidate.CallsiteDistribution;
       for (const auto *FS : CalleeSamples) {
         // TODO: Consider disable pre-lTO ICP for MonoLTO as well
         if (LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink) {
@@ -1486,7 +1542,8 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
                                    PSI->getOrCompHotCountThreshold());
           continue;
         }
-        uint64_t EntryCountDistributed = FS->getEntrySamples();
+        uint64_t EntryCountDistributed =
+            FS->getEntrySamples() * Candidate.CallsiteDistribution;
         // In addition to regular inline cost check, we also need to make sure
         // ICP isn't introducing excessive speculative checks even if individual
         // target looks beneficial to promote and inline. That means we should
@@ -1505,9 +1562,10 @@ bool SampleProfileLoader::inlineHotFunctionsWithPriority(
         SmallVector<CallBase *, 8> InlinedCallSites;
         // Attach function profile for promoted indirect callee, and update
         // call site count for the promoted inline candidate too.
-        Candidate = {I, FS, EntryCountDistributed};
-        if (tryPromoteAndInlineCandidate(F, Candidate, Sum, PromotedInsns,
-                                         &InlinedCallSites)) {
+        Candidate = {I, FS, EntryCountDistributed,
+                     Candidate.CallsiteDistribution};
+        if (tryPromoteAndInlineCandidate(F, Candidate, SumOrigin, Sum,
+                                         PromotedInsns, &InlinedCallSites)) {
           for (auto *CB : InlinedCallSites) {
             if (getInlineCandidate(&NewCandidate, CB))
               CQueue.emplace(NewCandidate);
@@ -1965,6 +2023,14 @@ void SampleProfileLoader::propagateWeights(Function &F) {
           auto T = FS->findCallTargetMapAt(CallSite);
           if (!T || T.get().empty())
             continue;
+          // Prorate the callsite counts to reflect what is already done to the
+          // callsite, such as ICP or calliste cloning.
+          if (FunctionSamples::ProfileIsProbeBased) {
+            if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+              if (Probe->Factor < 1)
+                T = SampleRecord::adjustCallTargets(T.get(), Probe->Factor);
+            }
+          }
           SmallVector<InstrProfValueData, 2> SortedCallTargets =
               GetSortedValueDataFromCallTargets(T.get());
           uint64_t Sum;

diff  --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 7cecd20b78d8..a885c3ee4ded 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -12,6 +12,7 @@
 
 #include "llvm/Transforms/IPO/SampleProfileProbe.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
@@ -25,8 +26,10 @@
 #include "llvm/IR/MDBuilder.h"
 #include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/CRC.h"
+#include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <unordered_set>
 #include <vector>
 
 using namespace llvm;
@@ -35,6 +38,115 @@ using namespace llvm;
 STATISTIC(ArtificialDbgLine,
           "Number of probes that have an artificial debug line");
 
+static cl::opt<bool>
+    VerifyPseudoProbe("verify-pseudo-probe", cl::init(false), cl::Hidden,
+                      cl::desc("Do pseudo probe verification"));
+
+static cl::list<std::string> VerifyPseudoProbeFuncList(
+    "verify-pseudo-probe-funcs", cl::Hidden,
+    cl::desc("The option to specify the name of the functions to verify."));
+
+static cl::opt<bool>
+    UpdatePseudoProbe("update-pseudo-probe", cl::init(true), cl::Hidden,
+                      cl::desc("Update pseudo probe distribution factor"));
+
+bool PseudoProbeVerifier::shouldVerifyFunction(const Function *F) {
+  // Skip function declaration.
+  if (F->isDeclaration())
+    return false;
+  // Skip function that will not be emitted into object file. The prevailing
+  // defintion will be verified instead.
+  if (F->hasAvailableExternallyLinkage())
+    return false;
+  // Do a name matching.
+  static std::unordered_set<std::string> VerifyFuncNames(
+      VerifyPseudoProbeFuncList.begin(), VerifyPseudoProbeFuncList.end());
+  return VerifyFuncNames.empty() || VerifyFuncNames.count(F->getName().str());
+}
+
+void PseudoProbeVerifier::registerCallbacks(PassInstrumentationCallbacks &PIC) {
+  if (VerifyPseudoProbe) {
+    PIC.registerAfterPassCallback(
+        [this](StringRef P, Any IR, const PreservedAnalyses &) {
+          this->runAfterPass(P, IR);
+        });
+  }
+}
+
+// Callback to run after each transformation for the new pass manager.
+void PseudoProbeVerifier::runAfterPass(StringRef PassID, Any IR) {
+  std::string Banner =
+      "\n*** Pseudo Probe Verification After " + PassID.str() + " ***\n";
+  dbgs() << Banner;
+  if (any_isa<const Module *>(IR))
+    runAfterPass(any_cast<const Module *>(IR));
+  else if (any_isa<const Function *>(IR))
+    runAfterPass(any_cast<const Function *>(IR));
+  else if (any_isa<const LazyCallGraph::SCC *>(IR))
+    runAfterPass(any_cast<const LazyCallGraph::SCC *>(IR));
+  else if (any_isa<const Loop *>(IR))
+    runAfterPass(any_cast<const Loop *>(IR));
+  else
+    llvm_unreachable("Unknown IR unit");
+}
+
+void PseudoProbeVerifier::runAfterPass(const Module *M) {
+  for (const Function &F : *M)
+    runAfterPass(&F);
+}
+
+void PseudoProbeVerifier::runAfterPass(const LazyCallGraph::SCC *C) {
+  for (const LazyCallGraph::Node &N : *C)
+    runAfterPass(&N.getFunction());
+}
+
+void PseudoProbeVerifier::runAfterPass(const Function *F) {
+  if (!shouldVerifyFunction(F))
+    return;
+  ProbeFactorMap ProbeFactors;
+  for (const auto &BB : *F)
+    collectProbeFactors(&BB, ProbeFactors);
+  verifyProbeFactors(F, ProbeFactors);
+}
+
+void PseudoProbeVerifier::runAfterPass(const Loop *L) {
+  const Function *F = L->getHeader()->getParent();
+  runAfterPass(F);
+}
+
+void PseudoProbeVerifier::collectProbeFactors(const BasicBlock *Block,
+                                              ProbeFactorMap &ProbeFactors) {
+  for (const auto &I : *Block) {
+    if (Optional<PseudoProbe> Probe = extractProbe(I))
+      ProbeFactors[Probe->Id] += Probe->Factor;
+  }
+}
+
+void PseudoProbeVerifier::verifyProbeFactors(
+    const Function *F, const ProbeFactorMap &ProbeFactors) {
+  bool BannerPrinted = false;
+  auto &PrevProbeFactors = FunctionProbeFactors[F->getName()];
+  for (const auto &I : ProbeFactors) {
+    float CurProbeFactor = I.second;
+    if (PrevProbeFactors.count(I.first)) {
+      float PrevProbeFactor = PrevProbeFactors[I.first];
+      if (std::abs(CurProbeFactor - PrevProbeFactor) >
+          DistributionFactorVariance) {
+        if (!BannerPrinted) {
+          dbgs() << "Function " << F->getName() << ":\n";
+          BannerPrinted = true;
+        }
+        dbgs() << "Probe " << I.first << "\tprevious factor "
+               << format("%0.2f", PrevProbeFactor) << "\tcurrent factor "
+               << format("%0.2f", CurProbeFactor) << "\n";
+      }
+    }
+
+    // Update
+    PrevProbeFactors[I.first] = I.second;
+  }
+}
+
 PseudoProbeManager::PseudoProbeManager(const Module &M) {
   if (NamedMDNode *FuncInfo = M.getNamedMetadata(PseudoProbeDescMetadataName)) {
     for (const auto *Operand : FuncInfo->operands()) {
@@ -201,7 +313,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
     Function *ProbeFn =
         llvm::Intrinsic::getDeclaration(M, Intrinsic::pseudoprobe);
     Value *Args[] = {Builder.getInt64(Guid), Builder.getInt64(Index),
-                     Builder.getInt32(0)};
+                     Builder.getInt32(0),
+                     Builder.getInt64(PseudoProbeFullDistributionFactor)};
     auto *Probe = Builder.CreateCall(ProbeFn, Args);
     AssignDebugLoc(Probe);
   }
@@ -219,7 +332,8 @@ void SampleProfileProber::instrumentOneFunc(Function &F, TargetMachine *TM) {
     // Levarge the 32-bit discriminator field of debug data to store the ID and
     // type of a callsite probe. This gets rid of the dependency on plumbing a
     // customized metadata through the codegen pipeline.
-    uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(Index, Type);
+    uint32_t V = PseudoProbeDwarfDiscriminator::packProbeData(
+        Index, Type, 0, PseudoProbeDwarfDiscriminator::FullDistributionFactor);
     if (auto DIL = Call->getDebugLoc()) {
       DIL = DIL->cloneWithDiscriminator(V);
       Call->setDebugLoc(DIL);
@@ -274,3 +388,47 @@ PreservedAnalyses SampleProfileProbePass::run(Module &M,
 
   return PreservedAnalyses::none();
 }
+
+void PseudoProbeUpdatePass::runOnFunction(Function &F,
+                                          FunctionAnalysisManager &FAM) {
+  BlockFrequencyInfo &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
+  auto BBProfileCount = [&BFI](BasicBlock *BB) {
+    return BFI.getBlockProfileCount(BB)
+               ? BFI.getBlockProfileCount(BB).getValue()
+               : 0;
+  };
+
+  // Collect the sum of execution weight for each probe.
+  ProbeFactorMap ProbeFactors;
+  for (auto &Block : F) {
+    for (auto &I : Block) {
+      if (Optional<PseudoProbe> Probe = extractProbe(I))
+        ProbeFactors[Probe->Id] += BBProfileCount(&Block);
+    }
+  }
+
+  // Fix up over-counted probes.
+  for (auto &Block : F) {
+    for (auto &I : Block) {
+      if (Optional<PseudoProbe> Probe = extractProbe(I)) {
+        float Sum = ProbeFactors[Probe->Id];
+        if (Sum != 0)
+          setProbeDistributionFactor(I, BBProfileCount(&Block) / Sum);
+      }
+    }
+  }
+}
+
+PreservedAnalyses PseudoProbeUpdatePass::run(Module &M,
+                                             ModuleAnalysisManager &AM) {
+  if (UpdatePseudoProbe) {
+    for (auto &F : M) {
+      if (F.isDeclaration())
+        continue;
+      FunctionAnalysisManager &FAM =
+          AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+      runOnFunction(F, FAM);
+    }
+  }
+  return PreservedAnalyses::none();
+}

diff  --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
new file mode 100644
index 000000000000..62f9bd5992e7
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
@@ -0,0 +1,8 @@
+foo:3200:13
+ 1: 13
+ 2: 7
+ 3: 6
+ 4: 13
+ 5: 7
+ 6: 6
+ !CFGChecksum: 844530426352218

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
index 7e3c7e8deda2..4f730ba09a3a 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit-inline.ll
@@ -11,14 +11,14 @@
 ; RUN: llvm-objdump --section-headers  %t4 | FileCheck %s --check-prefix=CHECK-OBJ
 
 define dso_local void @foo2() !dbg !7 {
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0), !dbg ![[#]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
 ; CHECK-ASM: .pseudoprobe	[[#GUID1:]] 1 0 0
   ret void, !dbg !10
 }
 
 define dso_local void @foo() #0 !dbg !11 {
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0), !dbg ![[#]]
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL1:]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL1:]]
 ; CHECK-ASM: .pseudoprobe	[[#GUID2:]] 1 0 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID1]] 1 0 0 @ [[#GUID2]]:2
   call void @foo2(), !dbg !12
@@ -26,9 +26,9 @@ define dso_local void @foo() #0 !dbg !11 {
 }
 
 define dso_local i32 @entry() !dbg !14 {
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0), !dbg ![[#]]
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0), !dbg ![[#DL2:]]
-; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0), !dbg ![[#DL3:]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID3:]], i64 1, i32 0, i64 -1), !dbg ![[#]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1), !dbg ![[#DL2:]]
+; CHECK-IL:  call void @llvm.pseudoprobe(i64 [[#GUID1]], i64 1, i32 0, i64 -1), !dbg ![[#DL3:]]
 ; CHECK-ASM: .pseudoprobe	[[#GUID3:]] 1 0 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID2]] 1 0 0 @ [[#GUID3]]:2
 ; CHECK-ASM: .pseudoprobe	[[#GUID1]] 1 0 0 @ [[#GUID3]]:2 @ [[#GUID2]]:2
@@ -41,13 +41,13 @@ define dso_local i32 @entry() !dbg !14 {
 ; CHECK-IL: ![[#SCOPE2:]] = distinct !DISubprogram(name: "foo"
 ; CHECK-IL: ![[#DL1]] = !DILocation(line: 3, column: 1,  scope: ![[#SCOPE1]], inlinedAt: ![[#INL1:]])
 ; CHECK-IL: ![[#INL1]] = distinct !DILocation(line: 7, column: 3, scope: ![[#BL1:]])
-;; A discriminator of 134217751 which is 0x8000017 in hexdecimal, stands for a direct call probe
-;; with an index of 2.
-; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 134217751)
+;; A discriminator of 186646551 which is 0xb200017 in hexdecimal, stands for a direct call probe
+;; with an index of 2 and a scale of 100%.
+; CHECK-IL: ![[#BL1]] = !DILexicalBlockFile(scope: ![[#SCOPE2]], file: !1, discriminator: 186646551)
 ; CHECK-IL: ![[#SCOPE3:]] = distinct !DISubprogram(name: "entry"
 ; CHECK-IL: ![[#DL2]] = !DILocation(line: 7, column: 3,  scope: ![[#SCOPE2]], inlinedAt: ![[#INL2:]])
 ; CHECK-IL: ![[#INL2]] = distinct !DILocation(line: 11, column: 3, scope: ![[#BL2:]])
-; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 134217751)
+; CHECK-IL: ![[#BL2]] = !DILexicalBlockFile(scope: ![[#SCOPE3]], file: !1, discriminator: 186646551)
 ; CHECK-IL: ![[#DL3]] = !DILocation(line: 3, column: 1,  scope: ![[#SCOPE1]], inlinedAt: ![[#INL3:]])
 ; CHECK-IL: ![[#INL3]] = distinct !DILocation(line: 7, column: 3,  scope: ![[#BL1]], inlinedAt: ![[#INL2]])
 

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
index 2074b708380f..da5d46a32287 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-emit.ll
@@ -11,32 +11,36 @@
 
 ;; Check the generation of pseudoprobe intrinsic call.
 
+ at a = dso_local global i32 0, align 4
+
 define void @foo(i32 %x) !dbg !3 {
 bb0:
   %cmp = icmp eq i32 %x, 0
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0), !dbg ![[#FAKELINE:]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1), !dbg ![[#FAKELINE:]]
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID:]], 1, 0, 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID:]] 1 0 0
   br i1 %cmp, label %bb1, label %bb2
 
 bb1:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0), !dbg ![[#FAKELINE]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1), !dbg ![[#FAKELINE]]
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 3, 0, 0
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID]] 3 0 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID]] 4 0 0
+  store i32 6, i32* @a, align 4
   br label %bb3
 
 bb2:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0), !dbg ![[#FAKELINE]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1), !dbg ![[#FAKELINE]]
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 2, 0, 0
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID]], 4, 0, 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID]] 2 0 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID]] 4 0 0
+  store i32 8, i32* @a, align 4
   br label %bb3
 
 bb3:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0), !dbg ![[#REALLINE:]]
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1), !dbg ![[#REALLINE:]]
   ret void, !dbg !12
 }
 
@@ -44,7 +48,7 @@ declare void @bar(i32 %x)
 
 define internal void @foo2(void (i32)* %f) !dbg !4 {
 entry:
-; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0)
+; CHECK-IL: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1)
 ; CHECK-MIR: PSEUDO_PROBE [[#GUID2:]], 1, 0, 0
 ; CHECK-ASM: .pseudoprobe	[[#GUID2:]] 1 0 0
 ; Check pseudo_probe metadata attached to the indirect call instruction.
@@ -64,13 +68,13 @@ entry:
 ; CHECK-IL: ![[#FAKELINE]] = !DILocation(line: 0, scope: ![[#FOO]])
 ; CHECK-IL: ![[#REALLINE]] = !DILocation(line: 2, scope: ![[#FOO]])
 ; CHECK-IL: ![[#PROBE0]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE0:]])
-;; A discriminator of 67108887 which is 0x4000017 in hexdecimal, stands for a direct call probe
+;; A discriminator of 67108887 which is 0x7200017 in hexdecimal, stands for a direct call probe
 ;; with an index of 2.
-; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108887)
+; CHECK-IL: ![[#SCOPE0]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537687)
 ; CHECK-IL: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
-;; A discriminator of 134217759 which is 0x800001f in hexdecimal, stands for a direct call probe
+;; A discriminator of 186646559 which is 0xb20001f in hexdecimal, stands for a direct call probe
 ;; with an index of 3.
-; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 134217759)
+; CHECK-IL: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646559)
 
 ; Check the generation of .pseudo_probe_desc section
 ; CHECK-ASM: .section .pseudo_probe_desc,"G", at progbits,.pseudo_probe_desc_foo,comdat

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
index 5359fd4da067..055d41792290 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-inline.ll
@@ -12,18 +12,18 @@ target triple = "x86_64-unknown-linux-gnu"
 
 define dso_local i32 @foo(i32 %x) #0 !dbg !12 {
 entry:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID1:]], i64 1, i32 0, i64 -1)
   %add = add nsw i32 %x, 100000, !dbg !19
 ;; Check zen is fully inlined so there's no call to zen anymore.
 ;; Check code from the inlining of zen is properly annotated here.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2:]], i64 1, i32 0, i64 -1)
 ; CHECK: br i1 %cmp.i, label %while.cond.i, label %while.cond2.i, !dbg ![[#]], !prof ![[PD1:[0-9]+]]
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1)
 ; CHECK: br i1 %cmp1.i, label %while.body.i, label %zen.exit, !dbg ![[#]], !prof ![[PD2:[0-9]+]]
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0)
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0)
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0)
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1)
 ; CHECK-NOT: call i32 @zen
   %call = call i32 @zen(i32 %add), !dbg !20
   ret i32 %call, !dbg !21
@@ -32,36 +32,36 @@ entry:
 ; CHECK: define dso_local i32 @zen
 define dso_local i32 @zen(i32 %x) #0 !dbg !22 {
 entry:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 1, i32 0, i64 -1)
   %cmp = icmp sgt i32 %x, 0, !dbg !26
   br i1 %cmp, label %while.cond, label %while.cond2, !dbg !28
 
 while.cond:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 2, i32 0, i64 -1)
   %x.addr.0 = phi i32 [ %x, %entry ], [ %sub, %while.body ]
   %cmp1 = icmp sgt i32 %x.addr.0, 0, !dbg !29
   br i1 %cmp1, label %while.body, label %if.end, !dbg !31
 
 while.body:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 3, i32 0, i64 -1)
   %0 = load volatile i32, i32* @factor, align 4, !dbg !32
   %sub = sub nsw i32 %x.addr.0, %0, !dbg !39
   br label %while.cond, !dbg !31
 
 while.cond2:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 4, i32 0, i64 -1)
   %x.addr.1 = phi i32 [ %x, %entry ], [ %add, %while.body4 ]
   %cmp3 = icmp slt i32 %x.addr.1, 0, !dbg !42
   br i1 %cmp3, label %while.body4, label %if.end, !dbg !44
 
 while.body4:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 5, i32 0, i64 -1)
   %1 = load volatile i32, i32* @factor, align 4, !dbg !45
   %add = add nsw i32 %x.addr.1, %1, !dbg !48
   br label %while.cond2, !dbg !44
 
 if.end:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID2]], i64 6, i32 0, i64 -1)
   %x.addr.2 = phi i32 [ %x.addr.0, %while.cond ], [ %x.addr.1, %while.cond2 ]
   ret i32 %x.addr.2, !dbg !51
 }
@@ -109,6 +109,10 @@ if.end:
 ;YAML-NEXT:    - NumSamples:      '23'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '1'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '23'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:  ...
 ;YAML:  --- !Analysis
@@ -121,6 +125,10 @@ if.end:
 ;YAML-NEXT:    - NumSamples:      '23'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '1'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '23'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:  ...
 ;YAML:  --- !Analysis
@@ -133,6 +141,10 @@ if.end:
 ;YAML-NEXT:    - NumSamples:      '382920'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '2'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '382920'
 ;YAML-NEXT:    - String:          ')'
 ;YAML-NEXT:  ...
 

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
index 25fd04e9d710..34629a3743eb 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
@@ -8,26 +8,26 @@ entry:
   store i32 %x, i32* %x.addr, align 4
   %0 = load i32, i32* %x.addr, align 4
   %cmp = icmp eq i32 %0, 0
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
   br i1 %cmp, label %if.then, label %if.else
   ; CHECK: br i1 %cmp, label %if.then, label %if.else, !prof ![[PD1:[0-9]+]]
 
 if.then:
   ; CHECK: call {{.*}}, !dbg ![[#PROBE1:]], !prof ![[PROF1:[0-9]+]]
   call void %f(i32 1)
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
   store i32 1, i32* %retval, align 4
   br label %return
 
 if.else:
   ; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]]
   call void %f(i32 2)
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
   store i32 2, i32* %retval, align 4
   br label %return
 
 return:
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
   %1 = load i32, i32* %retval, align 4
   ret i32 %1
 }
@@ -36,14 +36,14 @@ attributes #0 = {"use-sample-profile"}
 
 ; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7}
 ; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
-;; A discriminator of 119537711 which is 0x400002f in hexdecimal, stands for an indirect call probe
+;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe
 ;; with an index of 5.
-; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108911)
+; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711)
 ; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
-; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
-;; A discriminator of 119537719 which is 0x4000037 in hexdecimal, stands for an indirect call probe
+;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe
 ;; with an index of 6.
-; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 67108919)
+; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
+; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719)
 ; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2}
 
 !llvm.module.flags = !{!9, !10}
@@ -69,6 +69,10 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - NumSamples:      '13'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '1'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '13'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -80,6 +84,10 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - NumSamples:      '7'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '5'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '7'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -91,6 +99,10 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - NumSamples:      '7'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '2'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '7'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -102,6 +114,10 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - NumSamples:      '6'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '6'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '6'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -113,6 +129,10 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - NumSamples:      '6'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '3'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '6'
 ;YAML-NEXT:    - String:          ')'
 ;YAML:  --- !Analysis
 ;YAML-NEXT:  Pass:            sample-profile
@@ -124,4 +144,8 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - NumSamples:      '13'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
 ;YAML-NEXT:    - ProbeId:         '4'
+;YAML-NEXT:    - String:          ', Factor='
+;YAML-NEXT:    - Factor:          '1.000000e+00'
+;YAML-NEXT:    - String:          ', OriginalSamples='
+;YAML-NEXT:    - OriginalSamples: '13'
 ;YAML-NEXT:    - String:          ')'

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
new file mode 100644
index 000000000000..992afedd14f7
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -passes='pseudo-probe,sample-profile,jump-threading,pseudo-probe-update' -sample-profile-file=%S/Inputs/pseudo-probe-update.prof -S  | FileCheck %s
+
+declare i32 @f1()
+declare i32 @f2()
+declare void @f3()
+
+
+;; This tests that the branch in 'merge' can be cloned up into T1.
+define i32 @foo(i1 %cond, i1 %cond2) #0 {
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
+	br i1 %cond, label %T1, label %F1
+T1:
+; CHECK: %v1 = call i32 @f1(), !prof ![[#PROF1:]]
+	%v1 = call i32 @f1()
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
+;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080)
+    %cond3 = icmp eq i32 %v1, 412
+	br label %Merge
+F1:
+; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]]
+	%v2 = call i32 @f2()
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968)
+	br label %Merge
+Merge:
+
+	%A = phi i1 [%cond3, %T1], [%cond2, %F1]
+	%B = phi i32 [%v1, %T1], [%v2, %F1]
+	br i1 %A, label %T2, label %F2
+T2:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+	call void @f3()
+	ret i32 %B
+F2:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
+	ret i32 %B
+}
+
+; CHECK: ![[#PROF1]] = !{!"branch_weights", i32 7}
+; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6}
+
+attributes #0 = {"use-sample-profile"}
+

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
new file mode 100644
index 000000000000..fd57dd8bc526
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
@@ -0,0 +1,77 @@
+; REQUIRES: x86_64-linux
+; RUN: opt < %s -passes='pseudo-probe,loop-unroll-full' -verify-pseudo-probe -S -o %t 2>&1 | FileCheck %s --check-prefix=VERIFY
+; RUN: FileCheck %s < %t
+
+; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass ***
+; VERIFY: Function foo:
+; VERIFY-DAG: Probe 6	previous factor 1.00	current factor 5.00
+; VERIFY-DAG: Probe 4	previous factor 1.00	current factor 5.00
+
+declare void @foo2() nounwind
+
+define void @foo(i32 %x) {
+bb:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
+  %tmp = alloca [5 x i32*], align 16
+  br label %bb7.preheader
+
+bb3.loopexit:
+  %spec.select.lcssa = phi i32 [ %spec.select, %bb10 ]
+  %tmp5.not = icmp eq i32 %spec.select.lcssa, 0
+  br i1 %tmp5.not, label %bb24, label %bb7.preheader
+
+bb7.preheader:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+  %tmp1.06 = phi i32 [ 5, %bb ], [ %spec.select.lcssa, %bb3.loopexit ]
+  br label %bb10
+
+bb10:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
+  %indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ]
+  %tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ]
+  %tmp13 = getelementptr inbounds [5 x i32*], [5 x i32*]* %tmp, i64 0, i64 %indvars.iv
+  %tmp14 = load i32*, i32** %tmp13, align 8
+  %tmp15.not = icmp ne i32* %tmp14, null
+  %tmp18 = sext i1 %tmp15.not to i32
+  %spec.select = add nsw i32 %tmp1.14, %tmp18
+  call void @foo2(), !dbg !12
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond.not = icmp eq i64 %indvars.iv.next, 5
+  br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13
+
+bb24:
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+  ret void
+}
+
+;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe
+;; with an index of 6 and a scale of -1%.
+; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]])
+; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9, !10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2)
+!1 = !DIFile(filename: "test.c", directory: "")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0, retainedNodes: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!9 = !{i32 2, !"Dwarf Version", i32 4}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{!"clang version 3.9.0"}
+!12 = !DILocation(line: 2, column: 20, scope: !4)
+!13 = distinct !{!13, !14}
+!14 = !{!"llvm.loop.unroll.full"}


        


More information about the llvm-commits mailing list