[llvm] 106ec64 - [PGO] Add memcmp/bcmp size value profiling.

Hiroshi Yamauchi via llvm-commits llvm-commits at lists.llvm.org
Tue May 26 10:28:19 PDT 2020


Author: Hiroshi Yamauchi
Date: 2020-05-26T10:28:04-07:00
New Revision: 106ec64fbc7fb5ef28d0368fb1dca18e67e75adf

URL: https://github.com/llvm/llvm-project/commit/106ec64fbc7fb5ef28d0368fb1dca18e67e75adf
DIFF: https://github.com/llvm/llvm-project/commit/106ec64fbc7fb5ef28d0368fb1dca18e67e75adf.diff

LOG: [PGO] Add memcmp/bcmp size value profiling.

Summary: This adds support for memcmp/bcmp to the existing memcpy/memset value profiling.

Reviewers: davidxl

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D79751

Added: 
    

Modified: 
    llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
    llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
    llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
    llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h
    llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
    llvm/test/Transforms/PGOProfile/Inputs/memop_size_annotation.proftext
    llvm/test/Transforms/PGOProfile/memop_size_annotation.ll
    llvm/test/Transforms/PGOProfile/memop_size_opt.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 5fcb0b27d46f..72eb5cd61b00 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -377,6 +377,7 @@ class PGOInstrumentationGenLegacyPass : public ModulePass {
 
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<BlockFrequencyInfoWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
   }
 };
 
@@ -405,6 +406,7 @@ class PGOInstrumentationUseLegacyPass : public ModulePass {
   void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequired<ProfileSummaryInfoWrapperPass>();
     AU.addRequired<BlockFrequencyInfoWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
   }
 };
 
@@ -437,6 +439,7 @@ INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
                       "PGO instrumentation.", false, false)
 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
 INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
                     "PGO instrumentation.", false, false)
 
@@ -566,11 +569,11 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
   }
 
   FuncPGOInstrumentation(
-      Function &Func,
+      Function &Func, TargetLibraryInfo &TLI,
       std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
       bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
       BlockFrequencyInfo *BFI = nullptr, bool IsCS = false)
-      : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func),
+      : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
         ValueSites(IPVK_Last + 1), SIVisitor(Func), MST(F, BPI, BFI) {
     // This should be done before CFG hash computation.
     SIVisitor.countSelects(Func);
@@ -834,15 +837,16 @@ populateEHOperandBundle(VPCandidateInfo &Cand,
 // Visit all edge and instrument the edges not in MST, and do value profiling.
 // Critical edges will be split.
 static void instrumentOneFunc(
-    Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
+    Function &F, Module *M, TargetLibraryInfo &TLI, BranchProbabilityInfo *BPI,
+    BlockFrequencyInfo *BFI,
     std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
     bool IsCS) {
   // Split indirectbr critical edges here before computing the MST rather than
   // later in getInstrBB() to avoid invalidating it.
   SplitIndirectBrCriticalEdges(F, BPI, BFI);
 
-  FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
-                                                   BFI, IsCS);
+  FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, TLI, ComdatMembers, true,
+                                                   BPI, BFI, IsCS);
   std::vector<BasicBlock *> InstrumentBBs;
   FuncInfo.getInstrumentBBs(InstrumentBBs);
   unsigned NumCounters =
@@ -997,12 +1001,12 @@ namespace {
 
 class PGOUseFunc {
 public:
-  PGOUseFunc(Function &Func, Module *Modu,
+  PGOUseFunc(Function &Func, Module *Modu, TargetLibraryInfo &TLI,
              std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
              BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFIin,
              ProfileSummaryInfo *PSI, bool IsCS)
       : F(Func), M(Modu), BFI(BFIin), PSI(PSI),
-        FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS),
+        FuncInfo(Func, TLI, ComdatMembers, false, BPI, BFIin, IsCS),
         FreqAttr(FFA_Normal), IsCS(IsCS) {}
 
   // Read counts for the instrumented BB from profile.
@@ -1504,7 +1508,8 @@ static void collectComdatMembers(
 }
 
 static bool InstrumentAllFunctions(
-    Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
+    Module &M, function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
+    function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
   // For the context-sensitve instrumentation, we should have a separated pass
   // (before LTO/ThinLTO linking) to create these variables.
@@ -1516,9 +1521,10 @@ static bool InstrumentAllFunctions(
   for (auto &F : M) {
     if (F.isDeclaration())
       continue;
+    auto &TLI = LookupTLI(F);
     auto *BPI = LookupBPI(F);
     auto *BFI = LookupBFI(F);
-    instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS);
+    instrumentOneFunc(F, &M, TLI, BPI, BFI, ComdatMembers, IsCS);
   }
   return true;
 }
@@ -1534,27 +1540,32 @@ bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
   if (skipModule(M))
     return false;
 
+  auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
+    return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  };
   auto LookupBPI = [this](Function &F) {
     return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
   };
   auto LookupBFI = [this](Function &F) {
     return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
   };
-  return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS);
+  return InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS);
 }
 
 PreservedAnalyses PGOInstrumentationGen::run(Module &M,
                                              ModuleAnalysisManager &AM) {
   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
   auto LookupBPI = [&FAM](Function &F) {
     return &FAM.getResult<BranchProbabilityAnalysis>(F);
   };
-
   auto LookupBFI = [&FAM](Function &F) {
     return &FAM.getResult<BlockFrequencyAnalysis>(F);
   };
 
-  if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS))
+  if (!InstrumentAllFunctions(M, LookupTLI, LookupBPI, LookupBFI, IsCS))
     return PreservedAnalyses::all();
 
   return PreservedAnalyses::none();
@@ -1562,6 +1573,7 @@ PreservedAnalyses PGOInstrumentationGen::run(Module &M,
 
 static bool annotateAllFunctions(
     Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
+    function_ref<TargetLibraryInfo &(Function &)> LookupTLI,
     function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
     function_ref<BlockFrequencyInfo *(Function &)> LookupBFI,
     ProfileSummaryInfo *PSI, bool IsCS) {
@@ -1609,12 +1621,13 @@ static bool annotateAllFunctions(
   for (auto &F : M) {
     if (F.isDeclaration())
       continue;
+    auto &TLI = LookupTLI(F);
     auto *BPI = LookupBPI(F);
     auto *BFI = LookupBFI(F);
     // Split indirectbr critical edges here before computing the MST rather than
     // later in getInstrBB() to avoid invalidating it.
     SplitIndirectBrCriticalEdges(F, BPI, BFI);
-    PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, PSI, IsCS);
+    PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS);
     bool AllZeros = false;
     if (!Func.readCounters(PGOReader.get(), AllZeros))
       continue;
@@ -1695,10 +1708,12 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
                                              ModuleAnalysisManager &AM) {
 
   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
+  auto LookupTLI = [&FAM](Function &F) -> TargetLibraryInfo & {
+    return FAM.getResult<TargetLibraryAnalysis>(F);
+  };
   auto LookupBPI = [&FAM](Function &F) {
     return &FAM.getResult<BranchProbabilityAnalysis>(F);
   };
-
   auto LookupBFI = [&FAM](Function &F) {
     return &FAM.getResult<BlockFrequencyAnalysis>(F);
   };
@@ -1706,7 +1721,7 @@ PreservedAnalyses PGOInstrumentationUse::run(Module &M,
   auto *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
 
   if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
-                            LookupBPI, LookupBFI, PSI, IsCS))
+                            LookupTLI, LookupBPI, LookupBFI, PSI, IsCS))
     return PreservedAnalyses::all();
 
   return PreservedAnalyses::none();
@@ -1716,6 +1731,9 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
   if (skipModule(M))
     return false;
 
+  auto LookupTLI = [this](Function &F) -> TargetLibraryInfo & {
+    return this->getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  };
   auto LookupBPI = [this](Function &F) {
     return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
   };
@@ -1724,8 +1742,8 @@ bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
   };
 
   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
-  return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI, PSI,
-                              IsCS);
+  return annotateAllFunctions(M, ProfileFileName, "", LookupTLI, LookupBPI,
+                              LookupBFI, PSI, IsCS);
 }
 
 static std::string getSimpleNodeName(const BasicBlock *Node) {

diff  --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index 9767fda82f3d..bef0e0257f02 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -95,6 +95,11 @@ extern cl::opt<std::string> MemOPSizeRange;
 // This option sets the value that groups large memop sizes
 extern cl::opt<unsigned> MemOPSizeLarge;
 
+static cl::opt<bool>
+    MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(false),
+                       cl::Hidden,
+                       cl::desc("Size-specialize memcmp and bcmp calls"));
+
 namespace {
 class PGOMemOPSizeOptLegacyPass : public FunctionPass {
 public:
@@ -113,6 +118,7 @@ class PGOMemOPSizeOptLegacyPass : public FunctionPass {
     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
     AU.addPreserved<GlobalsAAWrapperPass>();
     AU.addPreserved<DominatorTreeWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
   }
 };
 } // end anonymous namespace
@@ -122,6 +128,7 @@ INITIALIZE_PASS_BEGIN(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
                       "Optimize memory intrinsic using its size value profile",
                       false, false)
 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
 INITIALIZE_PASS_END(PGOMemOPSizeOptLegacyPass, "pgo-memop-opt",
                     "Optimize memory intrinsic using its size value profile",
                     false, false)
@@ -131,11 +138,90 @@ FunctionPass *llvm::createPGOMemOPSizeOptLegacyPass() {
 }
 
 namespace {
+
+static const char *getMIName(const MemIntrinsic *MI) {
+  switch (MI->getIntrinsicID()) {
+  case Intrinsic::memcpy:
+    return "memcpy";
+  case Intrinsic::memmove:
+    return "memmove";
+  case Intrinsic::memset:
+    return "memset";
+  default:
+    return "unknown";
+  }
+}
+
+// A class that abstracts a memop (memcpy, memmove, memset, memcmp and bcmp).
+struct MemOp {
+  Instruction *I;
+  MemOp(MemIntrinsic *MI) : I(MI) {}
+  MemOp(CallInst *CI) : I(CI) {}
+  MemIntrinsic *asMI() { return dyn_cast<MemIntrinsic>(I); }
+  CallInst *asCI() { return cast<CallInst>(I); }
+  MemOp clone() {
+    if (auto MI = asMI())
+      return MemOp(cast<MemIntrinsic>(MI->clone()));
+    return MemOp(cast<CallInst>(asCI()->clone()));
+  }
+  Value *getLength() {
+    if (auto MI = asMI())
+      return MI->getLength();
+    return asCI()->getArgOperand(2);
+  }
+  void setLength(Value *Length) {
+    if (auto MI = asMI())
+      return MI->setLength(Length);
+    asCI()->setArgOperand(2, Length);
+  }
+  StringRef getFuncName() {
+    if (auto MI = asMI())
+      return MI->getCalledFunction()->getName();
+    return asCI()->getCalledFunction()->getName();
+  }
+  bool isMemmove() {
+    if (auto MI = asMI())
+      if (MI->getIntrinsicID() == Intrinsic::memmove)
+        return true;
+    return false;
+  }
+  bool isMemcmp(TargetLibraryInfo &TLI) {
+    LibFunc Func;
+    if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
+        Func == LibFunc_memcmp) {
+      return true;
+    }
+    return false;
+  }
+  bool isBcmp(TargetLibraryInfo &TLI) {
+    LibFunc Func;
+    if (asMI() == nullptr && TLI.getLibFunc(*asCI(), Func) &&
+        Func == LibFunc_bcmp) {
+      return true;
+    }
+    return false;
+  }
+  const char *getName(TargetLibraryInfo &TLI) {
+    if (auto MI = asMI())
+      return getMIName(MI);
+    LibFunc Func;
+    if (TLI.getLibFunc(*asCI(), Func)) {
+      if (Func == LibFunc_memcmp)
+        return "memcmp";
+      if (Func == LibFunc_bcmp)
+        return "bcmp";
+    }
+    llvm_unreachable("Must be MemIntrinsic or memcmp/bcmp CallInst");
+    return nullptr;
+  }
+};
+
 class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
 public:
   MemOPSizeOpt(Function &Func, BlockFrequencyInfo &BFI,
-               OptimizationRemarkEmitter &ORE, DominatorTree *DT)
-      : Func(Func), BFI(BFI), ORE(ORE), DT(DT), Changed(false) {
+               OptimizationRemarkEmitter &ORE, DominatorTree *DT,
+               TargetLibraryInfo &TLI)
+      : Func(Func), BFI(BFI), ORE(ORE), DT(DT), TLI(TLI), Changed(false) {
     ValueDataArray =
         std::make_unique<InstrProfValueData[]>(MemOPMaxVersion + 2);
     // Get the MemOPSize range information from option MemOPSizeRange,
@@ -147,13 +233,12 @@ class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
     WorkList.clear();
     visit(Func);
 
-    for (auto &MI : WorkList) {
+    for (auto &MO : WorkList) {
       ++NumOfPGOMemOPAnnotate;
-      if (perform(MI)) {
+      if (perform(MO)) {
         Changed = true;
         ++NumOfPGOMemOPOpt;
-        LLVM_DEBUG(dbgs() << "MemOP call: "
-                          << MI->getCalledFunction()->getName()
+        LLVM_DEBUG(dbgs() << "MemOP call: " << MO.getFuncName()
                           << "is Transformed.\n");
       }
     }
@@ -164,7 +249,16 @@ class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
     // Not perform on constant length calls.
     if (dyn_cast<ConstantInt>(Length))
       return;
-    WorkList.push_back(&MI);
+    WorkList.push_back(MemOp(&MI));
+  }
+
+  void visitCallInst(CallInst &CI) {
+    LibFunc Func;
+    if (TLI.getLibFunc(CI, Func) &&
+        (Func == LibFunc_memcmp || Func == LibFunc_bcmp) &&
+        !dyn_cast<ConstantInt>(CI.getArgOperand(2))) {
+      WorkList.push_back(MemOp(&CI));
+    }
   }
 
 private:
@@ -172,15 +266,16 @@ class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
   BlockFrequencyInfo &BFI;
   OptimizationRemarkEmitter &ORE;
   DominatorTree *DT;
+  TargetLibraryInfo &TLI;
   bool Changed;
-  std::vector<MemIntrinsic *> WorkList;
+  std::vector<MemOp> WorkList;
   // Start of the previse range.
   int64_t PreciseRangeStart;
   // Last value of the previse range.
   int64_t PreciseRangeLast;
   // The space to read the profile annotation.
   std::unique_ptr<InstrProfValueData[]> ValueDataArray;
-  bool perform(MemIntrinsic *MI);
+  bool perform(MemOp MO);
 
   // This kind shows which group the value falls in. For PreciseValue, we have
   // the profile count for that value. LargeGroup groups the values that are in
@@ -196,19 +291,6 @@ class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
   }
 };
 
-static const char *getMIName(const MemIntrinsic *MI) {
-  switch (MI->getIntrinsicID()) {
-  case Intrinsic::memcpy:
-    return "memcpy";
-  case Intrinsic::memmove:
-    return "memmove";
-  case Intrinsic::memset:
-    return "memset";
-  default:
-    return "unknown";
-  }
-}
-
 static bool isProfitable(uint64_t Count, uint64_t TotalCount) {
   assert(Count <= TotalCount);
   if (Count < MemOPCountThreshold)
@@ -227,21 +309,23 @@ static inline uint64_t getScaledCount(uint64_t Count, uint64_t Num,
   return ScaleCount / Denom;
 }
 
-bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
-  assert(MI);
-  if (MI->getIntrinsicID() == Intrinsic::memmove)
+bool MemOPSizeOpt::perform(MemOp MO) {
+  assert(MO.I);
+  if (MO.isMemmove())
+    return false;
+  if (!MemOPOptMemcmpBcmp && (MO.isMemcmp(TLI) || MO.isBcmp(TLI)))
     return false;
 
   uint32_t NumVals, MaxNumPromotions = MemOPMaxVersion + 2;
   uint64_t TotalCount;
-  if (!getValueProfDataFromInst(*MI, IPVK_MemOPSize, MaxNumPromotions,
+  if (!getValueProfDataFromInst(*MO.I, IPVK_MemOPSize, MaxNumPromotions,
                                 ValueDataArray.get(), NumVals, TotalCount))
     return false;
 
   uint64_t ActualCount = TotalCount;
   uint64_t SavedTotalCount = TotalCount;
   if (MemOPScaleCount) {
-    auto BBEdgeCount = BFI.getBlockProfileCount(MI->getParent());
+    auto BBEdgeCount = BFI.getBlockProfileCount(MO.I->getParent());
     if (!BBEdgeCount)
       return false;
     ActualCount = *BBEdgeCount;
@@ -333,13 +417,13 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
   // }
   // merge_bb:
 
-  BasicBlock *BB = MI->getParent();
+  BasicBlock *BB = MO.I->getParent();
   LLVM_DEBUG(dbgs() << "\n\n== Basic Block Before ==\n");
   LLVM_DEBUG(dbgs() << *BB << "\n");
   auto OrigBBFreq = BFI.getBlockFreq(BB);
 
-  BasicBlock *DefaultBB = SplitBlock(BB, MI, DT);
-  BasicBlock::iterator It(*MI);
+  BasicBlock *DefaultBB = SplitBlock(BB, MO.I, DT);
+  BasicBlock::iterator It(*MO.I);
   ++It;
   assert(It != DefaultBB->end());
   BasicBlock *MergeBB = SplitBlock(DefaultBB, &(*It), DT);
@@ -351,15 +435,24 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
   auto &Ctx = Func.getContext();
   IRBuilder<> IRB(BB);
   BB->getTerminator()->eraseFromParent();
-  Value *SizeVar = MI->getLength();
+  Value *SizeVar = MO.getLength();
   SwitchInst *SI = IRB.CreateSwitch(SizeVar, DefaultBB, SizeIds.size());
+  Type *MemOpTy = MO.I->getType();
+  PHINode *PHI = nullptr;
+  if (!MemOpTy->isVoidTy()) {
+    // Insert a phi for the return values at the merge block.
+    IRBuilder<> IRBM(MergeBB->getFirstNonPHI());
+    PHI = IRBM.CreatePHI(MemOpTy, SizeIds.size() + 1, "MemOP.RVMerge");
+    MO.I->replaceAllUsesWith(PHI);
+    PHI->addIncoming(MO.I, DefaultBB);
+  }
 
   // Clear the value profile data.
-  MI->setMetadata(LLVMContext::MD_prof, nullptr);
+  MO.I->setMetadata(LLVMContext::MD_prof, nullptr);
   // If all promoted, we don't need the MD.prof metadata.
   if (SavedRemainCount > 0 || Version != NumVals)
     // Otherwise we need update with the un-promoted records back.
-    annotateValueSite(*Func.getParent(), *MI, VDs.slice(Version),
+    annotateValueSite(*Func.getParent(), *MO.I, VDs.slice(Version),
                       SavedRemainCount, IPVK_MemOPSize, NumVals);
 
   LLVM_DEBUG(dbgs() << "\n\n== Basic Block After==\n");
@@ -371,17 +464,18 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
   for (uint64_t SizeId : SizeIds) {
     BasicBlock *CaseBB = BasicBlock::Create(
         Ctx, Twine("MemOP.Case.") + Twine(SizeId), &Func, DefaultBB);
-    Instruction *NewInst = MI->clone();
+    MemOp NewMO = MO.clone();
     // Fix the argument.
-    auto *MemI = cast<MemIntrinsic>(NewInst);
-    auto *SizeType = dyn_cast<IntegerType>(MemI->getLength()->getType());
+    auto *SizeType = dyn_cast<IntegerType>(NewMO.getLength()->getType());
     assert(SizeType && "Expected integer type size argument.");
     ConstantInt *CaseSizeId = ConstantInt::get(SizeType, SizeId);
-    MemI->setLength(CaseSizeId);
-    CaseBB->getInstList().push_back(NewInst);
+    NewMO.setLength(CaseSizeId);
+    CaseBB->getInstList().push_back(NewMO.I);
     IRBuilder<> IRBCase(CaseBB);
     IRBCase.CreateBr(MergeBB);
     SI->addCase(CaseSizeId, CaseBB);
+    if (!MemOpTy->isVoidTy())
+      PHI->addIncoming(NewMO.I, CaseBB);
     if (DT) {
       Updates.push_back({DominatorTree::Insert, CaseBB, MergeBB});
       Updates.push_back({DominatorTree::Insert, BB, CaseBB});
@@ -399,11 +493,10 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
 
   ORE.emit([&]() {
     using namespace ore;
-    return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MI)
-             << "optimized " << NV("Intrinsic", StringRef(getMIName(MI)))
-             << " with count " << NV("Count", SumForOpt) << " out of "
-             << NV("Total", TotalCount) << " for " << NV("Versions", Version)
-             << " versions";
+    return OptimizationRemark(DEBUG_TYPE, "memopt-opt", MO.I)
+           << "optimized " << NV("Memop", MO.getName(TLI)) << " with count "
+           << NV("Count", SumForOpt) << " out of " << NV("Total", TotalCount)
+           << " for " << NV("Versions", Version) << " versions";
   });
 
   return true;
@@ -412,13 +505,13 @@ bool MemOPSizeOpt::perform(MemIntrinsic *MI) {
 
 static bool PGOMemOPSizeOptImpl(Function &F, BlockFrequencyInfo &BFI,
                                 OptimizationRemarkEmitter &ORE,
-                                DominatorTree *DT) {
+                                DominatorTree *DT, TargetLibraryInfo &TLI) {
   if (DisableMemOPOPT)
     return false;
 
   if (F.hasFnAttribute(Attribute::OptimizeForSize))
     return false;
-  MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT);
+  MemOPSizeOpt MemOPSizeOpt(F, BFI, ORE, DT, TLI);
   MemOPSizeOpt.perform();
   return MemOPSizeOpt.isChanged();
 }
@@ -429,7 +522,9 @@ bool PGOMemOPSizeOptLegacyPass::runOnFunction(Function &F) {
   auto &ORE = getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
   auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
   DominatorTree *DT = DTWP ? &DTWP->getDomTree() : nullptr;
-  return PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
+  TargetLibraryInfo &TLI =
+      getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
+  return PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
 }
 
 namespace llvm {
@@ -440,7 +535,8 @@ PreservedAnalyses PGOMemOPSizeOpt::run(Function &F,
   auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
   auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
-  bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT);
+  auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
+  bool Changed = PGOMemOPSizeOptImpl(F, BFI, ORE, DT, TLI);
   if (!Changed)
     return PreservedAnalyses::all();
   auto PA = PreservedAnalyses();

diff  --git a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
index 604726d4f40f..cd4f636ff132 100644
--- a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.cpp
@@ -38,7 +38,7 @@ using PluginChainFinal = PluginChain<VP_PLUGIN_LIST>;
 
 template <> class PluginChain<> {
 public:
-  PluginChain(Function &F) {}
+  PluginChain(Function &F, TargetLibraryInfo &TLI) {}
   void get(InstrProfValueKind K, std::vector<CandidateInfo> &Candidates) {}
 };
 
@@ -48,7 +48,8 @@ class PluginChain<PluginT, Ts...> : public PluginChain<Ts...> {
   using Base = PluginChain<Ts...>;
 
 public:
-  PluginChain(Function &F) : PluginChain<Ts...>(F), Plugin(F) {}
+  PluginChain(Function &F, TargetLibraryInfo &TLI)
+      : PluginChain<Ts...>(F, TLI), Plugin(F, TLI) {}
 
   void get(InstrProfValueKind K, std::vector<CandidateInfo> &Candidates) {
     if (K == PluginT::Kind)
@@ -65,8 +66,9 @@ class ValueProfileCollector::ValueProfileCollectorImpl : public PluginChainFinal
   using PluginChainFinal::PluginChainFinal;
 };
 
-ValueProfileCollector::ValueProfileCollector(Function &F)
-    : PImpl(new ValueProfileCollectorImpl(F)) {}
+ValueProfileCollector::ValueProfileCollector(Function &F,
+                                             TargetLibraryInfo &TLI)
+    : PImpl(new ValueProfileCollectorImpl(F, TLI)) {}
 
 ValueProfileCollector::~ValueProfileCollector() = default;
 

diff  --git a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h
index ff883c8d0c77..c3f549c2e7cc 100644
--- a/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h
+++ b/llvm/lib/Transforms/Instrumentation/ValueProfileCollector.h
@@ -16,6 +16,7 @@
 #ifndef LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H
 #define LLVM_ANALYSIS_PROFILE_GEN_ANALYSIS_H
 
+#include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Pass.h"
@@ -58,7 +59,7 @@ class ValueProfileCollector {
     Instruction *AnnotatedInst; // Where metadata is attached.
   };
 
-  ValueProfileCollector(Function &Fn);
+  ValueProfileCollector(Function &Fn, TargetLibraryInfo &TLI);
   ValueProfileCollector(ValueProfileCollector &&) = delete;
   ValueProfileCollector &operator=(ValueProfileCollector &&) = delete;
 

diff  --git a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
index 361035b178c8..b5dd9fab24a5 100644
--- a/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
+++ b/llvm/lib/Transforms/Instrumentation/ValueProfilePlugins.inc
@@ -23,12 +23,14 @@ using CandidateInfo = ValueProfileCollector::CandidateInfo;
 ///--------------------------- MemIntrinsicPlugin ------------------------------
 class MemIntrinsicPlugin : public InstVisitor<MemIntrinsicPlugin> {
   Function &F;
+  TargetLibraryInfo &TLI;
   std::vector<CandidateInfo> *Candidates;
 
 public:
   static constexpr InstrProfValueKind Kind = IPVK_MemOPSize;
 
-  MemIntrinsicPlugin(Function &Fn) : F(Fn), Candidates(nullptr) {}
+  MemIntrinsicPlugin(Function &Fn, TargetLibraryInfo &TLI)
+      : F(Fn), TLI(TLI), Candidates(nullptr) {}
 
   void run(std::vector<CandidateInfo> &Cs) {
     Candidates = &Cs;
@@ -45,6 +47,22 @@ public:
     Instruction *AnnotatedInst = &MI;
     Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst});
   }
+  void visitCallInst(CallInst &CI) {
+    auto *F = CI.getCalledFunction();
+    if (!F)
+      return;
+    LibFunc Func;
+    if (TLI.getLibFunc(CI, Func) &&
+        (Func == LibFunc_memcmp || Func == LibFunc_bcmp)) {
+      Value *Length = CI.getArgOperand(2);
+      // Not instrument constant length calls.
+      if (dyn_cast<ConstantInt>(Length))
+        return;
+      Instruction *InsertPt = &CI;
+      Instruction *AnnotatedInst = &CI;
+      Candidates->emplace_back(CandidateInfo{Length, InsertPt, AnnotatedInst});
+    }
+  }
 };
 
 ///------------------------ IndirectCallPromotionPlugin ------------------------
@@ -54,7 +72,7 @@ class IndirectCallPromotionPlugin {
 public:
   static constexpr InstrProfValueKind Kind = IPVK_IndirectCallTarget;
 
-  IndirectCallPromotionPlugin(Function &Fn) : F(Fn) {}
+  IndirectCallPromotionPlugin(Function &Fn, TargetLibraryInfo &TLI) : F(Fn) {}
 
   void run(std::vector<CandidateInfo> &Candidates) {
     std::vector<CallBase *> Result = findIndirectCalls(F);

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memop_size_annotation.proftext b/llvm/test/Transforms/PGOProfile/Inputs/memop_size_annotation.proftext
index 400b29df3036..cce1a67a9457 100644
--- a/llvm/test/Transforms/PGOProfile/Inputs/memop_size_annotation.proftext
+++ b/llvm/test/Transforms/PGOProfile/Inputs/memop_size_annotation.proftext
@@ -14,7 +14,27 @@ foo
 # ValueKind = IPVK_MemOPSize:
 1
 # NumValueSites:
-1
+3
+9
+7:33
+2:88
+9:72
+4:66
+1:99
+5:55
+6:44
+3:77
+8:22
+9
+7:33
+2:88
+9:72
+4:66
+1:99
+5:55
+6:44
+3:77
+8:22
 9
 7:33
 2:88

diff  --git a/llvm/test/Transforms/PGOProfile/memop_size_annotation.ll b/llvm/test/Transforms/PGOProfile/memop_size_annotation.ll
index a59988462ae6..5884a6ebbb25 100644
--- a/llvm/test/Transforms/PGOProfile/memop_size_annotation.ll
+++ b/llvm/test/Transforms/PGOProfile/memop_size_annotation.ll
@@ -33,6 +33,12 @@ for.body3:
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i1 false)
 ; MEMOP_ANNOTATION: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i1 false)
 ; MEMOP_ANNOTATION-SAME: !prof ![[MEMOP_VALUESITE:[0-9]+]]
+  %memcmp = call i32 @memcmp(i8* %dst, i8* %src, i64 %conv)
+; MEMOP_ANNOTATION: call i32 @memcmp(i8* %dst, i8* %src, i64 %conv)
+; MEMOP_ANNOTATION-SAME: !prof ![[MEMOP_VALUESITE]]
+  %bcmp = call i32 @bcmp(i8* %dst, i8* %src, i64 %conv)
+; MEMOP_ANNOTATION: call i32 @bcmp(i8* %dst, i8* %src, i64 %conv)
+; MEMOP_ANNOTATION-SAME: !prof ![[MEMOP_VALUESITE]]
 ; MEMOP_ANNOTATION9: ![[MEMOP_VALUESITE]] = !{!"VP", i32 1, i64 556, i64 1, i64 99, i64 2, i64 88, i64 3, i64 77, i64 9, i64 72, i64 4, i64 66, i64 5, i64 55, i64 6, i64 44, i64 7, i64 33, i64 8, i64 22}
 ; MEMOP_ANNOTATION4: ![[MEMOP_VALUESITE]] = !{!"VP", i32 1, i64 556, i64 1, i64 99, i64 2, i64 88, i64 3, i64 77, i64 9, i64 72}
   br label %for.inc
@@ -56,4 +62,7 @@ declare void @llvm.lifetime.start(i64, i8* nocapture)
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
 
+declare i32 @memcmp(i8*, i8*, i64)
+declare i32 @bcmp(i8*, i8*, i64)
+
 declare void @llvm.lifetime.end(i64, i8* nocapture)

diff  --git a/llvm/test/Transforms/PGOProfile/memop_size_opt.ll b/llvm/test/Transforms/PGOProfile/memop_size_opt.ll
index 8d6215cf9252..bc79fbc3e37e 100644
--- a/llvm/test/Transforms/PGOProfile/memop_size_opt.ll
+++ b/llvm/test/Transforms/PGOProfile/memop_size_opt.ll
@@ -1,8 +1,8 @@
-; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
-; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -S | FileCheck %s --check-prefix=MEMOP_OPT
-; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
+; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pgo-memop-optimize-memcmp-bcmp -S | FileCheck %s --check-prefix=MEMOP_OPT
+; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 --pgo-memop-optimize-memcmp-bcmp -S | FileCheck %s --check-prefix=MEMOP_OPT
+; RUN: opt < %s -pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pgo-memop-optimize-memcmp-bcmp -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
 ; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML
-; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
+; RUN: opt < %s -passes=pgo-memop-opt -verify-dom-info -pgo-memop-count-threshold=90 -pgo-memop-percent-threshold=15 -pgo-memop-optimize-memcmp-bcmp -pass-remarks-with-hotness -pass-remarks-output=%t.opt.yaml -S | FileCheck %s --check-prefix=MEMOP_OPT
 ; RUN: FileCheck %s -input-file=%t.opt.yaml --check-prefix=YAML
 
 
@@ -57,12 +57,6 @@ for.body3:
 ; MEMOP_OPT:   br label %[[MERGE_LABEL2]]
 ; MEMOP_OPT: [[MERGE_LABEL2]]:
 ; MEMOP_OPT:   br label %for.inc
-; MEMOP_OPT: [[SWITCH_BW]] = !{!"branch_weights", i32 457, i32 99}
-; Should be 457 total left (original total count 556, minus 99 from specialized
-; value 1, which is removed from VP array. Also, we only end up with 5 total
-; values, since the default max number of promotions is 5 and therefore
-; the rest of the values are ignored when extracting the VP metadata.
-; MEMOP_OPT: [[NEWVP]] = !{!"VP", i32 1, i64 457, i64 2, i64 88, i64 3, i64 77, i64 9, i64 72, i64 4, i64 66}
 
 for.inc:
   %inc = add nsw i32 %j.0, 1
@@ -79,6 +73,83 @@ for.end6:
   ret void
 }
 
+declare void @consume(i32 %v1, i32 %v2)
+
+define void @foo_memcmp_bcmp(i8* %dst, i8* %src, i8* %dst2, i8* %src2, i32* %a, i32 %n) !prof !27 {
+entry:
+  br label %for.cond
+
+for.cond:
+  %i.0 = phi i32 [ 0, %entry ], [ %inc5, %for.inc4 ]
+  %cmp = icmp slt i32 %i.0, %n
+  br i1 %cmp, label %for.body, label %for.end6, !prof !28
+
+for.body:
+  br label %for.cond1
+
+for.cond1:
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %idx.ext = sext i32 %i.0 to i64
+  %add.ptr = getelementptr inbounds i32, i32* %a, i64 %idx.ext
+  %0 = load i32, i32* %add.ptr, align 4
+  %cmp2 = icmp slt i32 %j.0, %0
+  br i1 %cmp2, label %for.body3, label %for.end, !prof !29
+
+for.body3:
+  %add = add nsw i32 %i.0, 1
+  %conv = sext i32 %add to i64
+  %memcmp = call i32 @memcmp(i8* %dst, i8* %src, i64 %conv), !prof !30
+  %bcmp = call i32 @bcmp(i8* %dst2, i8* %src2, i64 %conv), !prof !31
+  call void @consume(i32 %memcmp, i32 %bcmp)
+  br label %for.inc
+
+; MEMOP_OPT:  switch i64 %conv, label %[[DEFAULT_LABEL:.*]] [
+; MEMOP_OPT:    i64 1, label %[[CASE_1_LABEL:.*]]
+; MEMOP_OPT:  ], !prof [[SWITCH_BW:![0-9]+]]
+; MEMOP_OPT: [[CASE_1_LABEL]]:
+; MEMOP_OPT:   %[[RV:.*]] = call i32 @memcmp(i8* %dst, i8* %src, i64 1)
+; MEMOP_OPT:   br label %[[MERGE_LABEL:.*]]
+; MEMOP_OPT: [[DEFAULT_LABEL]]:
+; MEMOP_OPT:   %[[RVD:.*]] = call i32 @memcmp(i8* %dst, i8* %src, i64 %conv), !prof [[NEWVP:![0-9]+]]
+; MEMOP_OPT:   br label %[[MERGE_LABEL]]
+; MEMOP_OPT: [[MERGE_LABEL]]:
+; MEMOP_OPT:  %[[PHI:.*]] = phi i32 [ %[[RVD]], %[[DEFAULT_LABEL]] ], [ %[[RV]], %[[CASE_1_LABEL]] ]
+; MEMOP_OPT:  switch i64 %conv, label %[[DEFAULT_LABEL2:.*]] [
+; MEMOP_OPT:    i64 1, label %[[CASE_1_LABEL2:.*]]
+; MEMOP_OPT:  ], !prof [[SWITCH_BW:![0-9]+]]
+; MEMOP_OPT: [[CASE_1_LABEL2]]:
+; MEMOP_OPT:   %[[RV2:.*]] = call i32 @bcmp(i8* %dst2, i8* %src2, i64 1)
+; MEMOP_OPT:   br label %[[MERGE_LABEL2:.*]]
+; MEMOP_OPT: [[DEFAULT_LABEL2]]:
+; MEMOP_OPT:   %[[RVD2:.*]] = call i32 @bcmp(i8* %dst2, i8* %src2, i64 %conv), !prof [[NEWVP]]
+; MEMOP_OPT:   br label %[[MERGE_LABEL2]]
+; MEMOP_OPT: [[MERGE_LABEL2]]:
+; MEMOP_OPT:   %[[PHI2:.*]] = phi i32 [ %[[RVD2]], %[[DEFAULT_LABEL2]] ], [ %[[RV2]], %[[CASE_1_LABEL2]] ]
+; MEMOP_OPT:   call void @consume(i32 %[[PHI]], i32 %[[PHI2]])
+; MEMOP_OPT:   br label %for.inc
+
+for.inc:
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:
+  br label %for.inc4
+
+for.inc4:
+  %inc5 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end6:
+  ret void
+}
+
+; MEMOP_OPT: [[SWITCH_BW]] = !{!"branch_weights", i32 457, i32 99}
+; Should be 457 total left (original total count 556, minus 99 from specialized
+; value 1, which is removed from VP array. Also, we only end up with 5 total
+; values, since the default max number of promotions is 5 and therefore
+; the rest of the values are ignored when extracting the VP metadata.
+; MEMOP_OPT: [[NEWVP]] = !{!"VP", i32 1, i64 457, i64 2, i64 88, i64 3, i64 77, i64 9, i64 72, i64 4, i64 66}
+
 !llvm.module.flags = !{!0}
 
 !0 = !{i32 1, !"ProfileSummary", !1}
@@ -118,6 +189,9 @@ declare void @llvm.lifetime.start(i64, i8* nocapture)
 
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
 
+declare i32 @memcmp(i8*, i8*, i64)
+declare i32 @bcmp(i8*, i8*, i64)
+
 declare void @llvm.lifetime.end(i64, i8* nocapture)
 
 ; YAML:      --- !Passed
@@ -127,7 +201,7 @@ declare void @llvm.lifetime.end(i64, i8* nocapture)
 ; YAML-NEXT: Hotness:         0
 ; YAML-NEXT: Args:
 ; YAML-NEXT:   - String:          'optimized '
-; YAML-NEXT:   - Intrinsic:       memcpy
+; YAML-NEXT:   - Memop:           memcpy
 ; YAML-NEXT:   - String:          ' with count '
 ; YAML-NEXT:   - Count:           '99'
 ; YAML-NEXT:   - String:          ' out of '
@@ -143,7 +217,39 @@ declare void @llvm.lifetime.end(i64, i8* nocapture)
 ; YAML-NEXT: Hotness:         0
 ; YAML-NEXT: Args:
 ; YAML-NEXT:   - String:          'optimized '
-; YAML-NEXT:   - Intrinsic:       memcpy
+; YAML-NEXT:   - Memop:           memcpy
+; YAML-NEXT:   - String:          ' with count '
+; YAML-NEXT:   - Count:           '99'
+; YAML-NEXT:   - String:          ' out of '
+; YAML-NEXT:   - Total:           '556'
+; YAML-NEXT:   - String:          ' for '
+; YAML-NEXT:   - Versions:        '1'
+; YAML-NEXT:   - String:          ' versions'
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Passed
+; YAML-NEXT: Pass:            pgo-memop-opt
+; YAML-NEXT: Name:            memopt-opt
+; YAML-NEXT: Function:        foo_memcmp_bcmp
+; YAML-NEXT: Hotness:         0
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'optimized '
+; YAML-NEXT:   - Memop:           memcmp
+; YAML-NEXT:   - String:          ' with count '
+; YAML-NEXT:   - Count:           '99'
+; YAML-NEXT:   - String:          ' out of '
+; YAML-NEXT:   - Total:           '556'
+; YAML-NEXT:   - String:          ' for '
+; YAML-NEXT:   - Versions:        '1'
+; YAML-NEXT:   - String:          ' versions'
+; YAML-NEXT: ...
+; YAML-NEXT: --- !Passed
+; YAML-NEXT: Pass:            pgo-memop-opt
+; YAML-NEXT: Name:            memopt-opt
+; YAML-NEXT: Function:        foo_memcmp_bcmp
+; YAML-NEXT: Hotness:         0
+; YAML-NEXT: Args:
+; YAML-NEXT:   - String:          'optimized '
+; YAML-NEXT:   - Memop:           bcmp
 ; YAML-NEXT:   - String:          ' with count '
 ; YAML-NEXT:   - Count:           '99'
 ; YAML-NEXT:   - String:          ' out of '


        


More information about the llvm-commits mailing list