[clang] b1926f3 - Restore "[MemProf] Memprof profile matching and annotation"

Teresa Johnson via cfe-commits cfe-commits at lists.llvm.org
Fri Sep 23 11:39:05 PDT 2022


Author: Teresa Johnson
Date: 2022-09-23T11:38:47-07:00
New Revision: b1926f308f0939b365ee4940c7b1bd984b45e71a

URL: https://github.com/llvm/llvm-project/commit/b1926f308f0939b365ee4940c7b1bd984b45e71a
DIFF: https://github.com/llvm/llvm-project/commit/b1926f308f0939b365ee4940c7b1bd984b45e71a.diff

LOG: Restore "[MemProf] Memprof profile matching and annotation"

This reverts commit 794b7ea960ccc3222f2af582efadbc5e5c464292, and
thus restores commit a212d8da94d08e229aa8d65283e4b116310bba10, and
follow on fixes 0cd6763fa93159b84d70a5bb602c24996acaafaa,
e9ff53d42feac7fc157718523275619a8106f2f3, and
37c6a25e9ab230e5e21fa34e246d9fec55275df0.

Use a hash function (BLAKE3) instead of hash_combine/hash_code which are
not guaranteed to be stable across executions.

Additionally, it adds a "REQUIRES: x86_64-linux" to the tests that have
raw profile inputs to avoid failures on big endian bots.

Reviewers: snehasish, davidxl

Subscribers: llvm-commits

Differential Revision: https://reviews.llvm.org/D128142

Added: 
    clang/test/CodeGen/Inputs/memprof.exe
    clang/test/CodeGen/Inputs/memprof.memprofraw
    clang/test/CodeGen/memprof.cpp
    llvm/test/Transforms/PGOProfile/Inputs/memprof.exe
    llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw
    llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw
    llvm/test/Transforms/PGOProfile/memprof.ll
    llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll

Modified: 
    clang/lib/Frontend/CompilerInvocation.cpp
    llvm/include/llvm/Analysis/MemoryBuiltins.h
    llvm/include/llvm/ProfileData/InstrProfReader.h
    llvm/lib/Analysis/MemoryBuiltins.cpp
    llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 9f9241054b1ef..656e5950db988 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1306,7 +1306,10 @@ static void setPGOUseInstrumentor(CodeGenOptions &Opts,
   }
   std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader =
     std::move(ReaderOrErr.get());
-  if (PGOReader->isIRLevelProfile()) {
+  // Currently memprof profiles are only added at the IR level. Mark the profile
+  // type as IR in that case as well and the subsequent matching needs to detect
+  // which is available (might be one or both).
+  if (PGOReader->isIRLevelProfile() || PGOReader->hasMemoryProfile()) {
     if (PGOReader->hasCSIRLevelProfile())
       Opts.setProfileUse(CodeGenOptions::ProfileCSIRInstr);
     else

diff  --git a/clang/test/CodeGen/Inputs/memprof.exe b/clang/test/CodeGen/Inputs/memprof.exe
new file mode 100755
index 0000000000000..955c0d6b0e87a
Binary files /dev/null and b/clang/test/CodeGen/Inputs/memprof.exe 
diff er

diff  --git a/clang/test/CodeGen/Inputs/memprof.memprofraw b/clang/test/CodeGen/Inputs/memprof.memprofraw
new file mode 100644
index 0000000000000..07a3310c122af
Binary files /dev/null and b/clang/test/CodeGen/Inputs/memprof.memprofraw 
diff er

diff  --git a/clang/test/CodeGen/memprof.cpp b/clang/test/CodeGen/memprof.cpp
new file mode 100644
index 0000000000000..b246d1f086942
--- /dev/null
+++ b/clang/test/CodeGen/memprof.cpp
@@ -0,0 +1,38 @@
+// Test if memprof instrumentation and use pass are invoked.
+//
+// Instrumentation:
+// Ensure Pass MemProfilerPass and ModuleMemProfilerPass are invoked.
+// RUN: %clang_cc1 -O2 -fmemory-profile %s -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=INSTRUMENT
+// INSTRUMENT: Running pass: MemProfilerPass on main
+// INSTRUMENT: Running pass: ModuleMemProfilerPass on [module]
+
+// Avoid failures on big-endian systems that can't read the raw profile properly
+// REQUIRES: x86_64-linux
+
+// TODO: Use text profile inputs once that is available for memprof.
+//
+// The following commands were used to compile the source to instrumented
+// executables and collect raw binary format profiles:
+//
+// # Collect memory profile:
+// $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \
+//      -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \
+//      -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \
+//      memprof.cpp -o memprof.exe -fmemory-profile
+// $ env MEMPROF_OPTIONS=log_path=stdout ./memprof.exe > memprof.memprofraw
+//
+// RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata
+
+// Profile use:
+// Ensure Pass PGOInstrumentationUse is invoked with the memprof-only profile.
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.memprofdata %s -fdebug-pass-manager  -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=USE
+// USE: Running pass: PGOInstrumentationUse on [module]
+
+char *foo() {
+  return new char[10];
+}
+int main() {
+  char *a = foo();
+  delete[] a;
+  return 0;
+}

diff  --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h
index 949fe7270821a..41dc0c4ff452e 100644
--- a/llvm/include/llvm/Analysis/MemoryBuiltins.h
+++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h
@@ -56,6 +56,10 @@ bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI);
 bool isAllocationFn(const Value *V,
                     function_ref<const TargetLibraryInfo &(Function &)> GetTLI);
 
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory via new.
+bool isNewLikeFn(const Value *V, const TargetLibraryInfo *TLI);
+
 /// Tests if a value is a call or invoke to a library function that
 /// allocates memory similar to malloc or calloc.
 bool isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI);

diff  --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 1d1b59bb6c469..9a6a6f2cbf28e 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -118,6 +118,9 @@ class InstrProfReader {
   /// Return true if the profile only instruments function entries.
   virtual bool functionEntryOnly() const = 0;
 
+  /// Return true if profile includes a memory profile.
+  virtual bool hasMemoryProfile() const = 0;
+
   /// Returns a BitsetEnum describing the attributes of the profile. To check
   /// individual attributes prefer using the helpers above.
   virtual InstrProfKind getProfileKind() const = 0;
@@ -233,6 +236,11 @@ class TextInstrProfReader : public InstrProfReader {
     return static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly);
   }
 
+  bool hasMemoryProfile() const override {
+    // TODO: Add support for text format memory profiles.
+    return false;
+  }
+
   InstrProfKind getProfileKind() const override { return ProfileKind; }
 
   /// Read the header.
@@ -322,6 +330,12 @@ class RawInstrProfReader : public InstrProfReader {
     return (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
   }
 
+  bool hasMemoryProfile() const override {
+    // Memory profiles have a separate raw format, so this should never be set.
+    assert(!(Version & VARIANT_MASK_MEMPROF));
+    return false;
+  }
+
   /// Returns a BitsetEnum describing the attributes of the raw instr profile.
   InstrProfKind getProfileKind() const override;
 
@@ -466,6 +480,7 @@ struct InstrProfReaderIndexBase {
   virtual bool instrEntryBBEnabled() const = 0;
   virtual bool hasSingleByteCoverage() const = 0;
   virtual bool functionEntryOnly() const = 0;
+  virtual bool hasMemoryProfile() const = 0;
   virtual InstrProfKind getProfileKind() const = 0;
   virtual Error populateSymtab(InstrProfSymtab &) = 0;
 };
@@ -532,6 +547,10 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase {
     return (FormatVersion & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
   }
 
+  bool hasMemoryProfile() const override {
+    return (FormatVersion & VARIANT_MASK_MEMPROF) != 0;
+  }
+
   InstrProfKind getProfileKind() const override;
 
   Error populateSymtab(InstrProfSymtab &Symtab) override {
@@ -605,6 +624,8 @@ class IndexedInstrProfReader : public InstrProfReader {
 
   bool functionEntryOnly() const override { return Index->functionEntryOnly(); }
 
+  bool hasMemoryProfile() const override { return Index->hasMemoryProfile(); }
+
   /// Returns a BitsetEnum describing the attributes of the indexed instr
   /// profile.
   InstrProfKind getProfileKind() const override {

diff  --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index 31704c21358a5..351e81aac05db 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -303,6 +303,12 @@ bool llvm::isAllocationFn(
          checkFnAllocKind(V, AllocFnKind::Alloc | AllocFnKind::Realloc);
 }
 
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory via new.
+bool llvm::isNewLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
+  return getAllocationData(V, OpNewLike, TLI).has_value();
+}
+
 /// Tests if a value is a call or invoke to a library function that
 /// allocates uninitialized memory (such as malloc).
 static bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {

diff  --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 310e83df34dfa..6bb29a0f34acf 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -65,6 +65,8 @@
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryProfileInfo.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
@@ -97,6 +99,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/BLAKE3.h"
 #include "llvm/Support/BranchProbability.h"
 #include "llvm/Support/CRC.h"
 #include "llvm/Support/Casting.h"
@@ -106,6 +109,7 @@
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/HashBuilder.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Instrumentation.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -114,14 +118,17 @@
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
+#include <map>
 #include <memory>
 #include <numeric>
+#include <set>
 #include <string>
 #include <unordered_map>
 #include <utility>
 #include <vector>
 
 using namespace llvm;
+using namespace llvm::memprof;
 using ProfileCount = Function::ProfileCount;
 using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
 
@@ -136,6 +143,7 @@ STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
 STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
 STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
 STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
+STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
 STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
 STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
 STATISTIC(NumOfCSPGOSelectInsts,
@@ -296,6 +304,10 @@ static cl::opt<unsigned> PGOFunctionSizeThreshold(
     "pgo-function-size-threshold", cl::Hidden,
     cl::desc("Do not instrument functions smaller than this threshold"));
 
+static cl::opt<bool> MatchMemProf(
+    "pgo-match-memprof", cl::init(true), cl::Hidden,
+    cl::desc("Perform matching and annotation of memprof profiles."));
+
 namespace llvm {
 // Command line option to turn on CFG dot dump after profile annotation.
 // Defined in Analysis/BlockFrequencyInfo.cpp:  -pgo-view-counts
@@ -504,6 +516,7 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
   void renameComdatFunction();
 
 public:
+  const TargetLibraryInfo &TLI;
   std::vector<std::vector<VPCandidateInfo>> ValueSites;
   SelectInstVisitor SIVisitor;
   std::string FuncName;
@@ -542,7 +555,7 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
       BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
       bool InstrumentFuncEntry = true)
       : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
-        ValueSites(IPVK_Last + 1), SIVisitor(Func),
+        TLI(TLI), ValueSites(IPVK_Last + 1), SIVisitor(Func),
         MST(F, InstrumentFuncEntry, BPI, BFI) {
     // This should be done before CFG hash computation.
     SIVisitor.countSelects(Func);
@@ -1021,6 +1034,9 @@ class PGOUseFunc {
   bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
                     InstrProfRecord::CountPseudoKind &PseudoKind);
 
+  // Read memprof data for the instrumented function from profile.
+  bool readMemprof(IndexedInstrProfReader *PGOReader);
+
   // Populate the counts for all BBs.
   void populateCounters();
 
@@ -1221,6 +1237,257 @@ static void annotateFunctionWithHashMismatch(Function &F,
   F.setMetadata(LLVMContext::MD_annotation, MD);
 }
 
+static void addCallsiteMetadata(Instruction &I,
+                                std::vector<uint64_t> &InlinedCallStack,
+                                LLVMContext &Ctx) {
+  I.setMetadata(LLVMContext::MD_callsite,
+                buildCallstackMetadata(InlinedCallStack, Ctx));
+}
+
+static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
+                               uint32_t Column) {
+  llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
+      HashBuilder;
+  HashBuilder.add(Function, LineOffset, Column);
+  llvm::BLAKE3Result<8> Hash = HashBuilder.final();
+  uint64_t Id;
+  std::memcpy(&Id, Hash.data(), sizeof(Hash));
+  return Id;
+}
+
+static uint64_t computeStackId(const memprof::Frame &Frame) {
+  return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
+}
+
+static void addCallStack(CallStackTrie &AllocTrie,
+                         const AllocationInfo *AllocInfo) {
+  SmallVector<uint64_t> StackIds;
+  for (auto StackFrame : AllocInfo->CallStack)
+    StackIds.push_back(computeStackId(StackFrame));
+  auto AllocType = getAllocType(AllocInfo->Info.getMaxAccessCount(),
+                                AllocInfo->Info.getMinSize(),
+                                AllocInfo->Info.getMinLifetime());
+  AllocTrie.addCallStack(AllocType, StackIds);
+}
+
+// Helper to compare the InlinedCallStack computed from an instruction's debug
+// info to a list of Frames from profile data (either the allocation data or a
+// callsite). For callsites, the StartIndex to use in the Frame array may be
+// non-zero.
+static bool
+stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
+                                   ArrayRef<uint64_t> InlinedCallStack,
+                                   unsigned StartIndex = 0) {
+  auto StackFrame = ProfileCallStack.begin() + StartIndex;
+  auto InlCallStackIter = InlinedCallStack.begin();
+  for (; StackFrame != ProfileCallStack.end() &&
+         InlCallStackIter != InlinedCallStack.end();
+       ++StackFrame, ++InlCallStackIter) {
+    uint64_t StackId = computeStackId(*StackFrame);
+    if (StackId != *InlCallStackIter)
+      return false;
+  }
+  // Return true if we found and matched all stack ids from the call
+  // instruction.
+  return InlCallStackIter == InlinedCallStack.end();
+}
+
+bool PGOUseFunc::readMemprof(IndexedInstrProfReader *PGOReader) {
+  if (!MatchMemProf)
+    return true;
+
+  auto &Ctx = M->getContext();
+
+  auto FuncGUID = Function::getGUID(FuncInfo.FuncName);
+  Expected<memprof::MemProfRecord> MemProfResult =
+      PGOReader->getMemProfRecord(FuncGUID);
+  if (Error E = MemProfResult.takeError()) {
+    handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
+      auto Err = IPE.get();
+      bool SkipWarning = false;
+      LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
+                        << FuncInfo.FuncName << ": ");
+      if (Err == instrprof_error::unknown_function) {
+        NumOfMemProfMissing++;
+        SkipWarning = !PGOWarnMissing;
+        LLVM_DEBUG(dbgs() << "unknown function");
+      } else if (Err == instrprof_error::hash_mismatch) {
+        SkipWarning =
+            NoPGOWarnMismatch ||
+            (NoPGOWarnMismatchComdatWeak &&
+             (F.hasComdat() ||
+              F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
+        LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
+      }
+
+      if (SkipWarning)
+        return;
+
+      std::string Msg =
+          (IPE.message() + Twine(" ") + F.getName().str() + Twine(" Hash = ") +
+           std::to_string(FuncInfo.FunctionHash))
+              .str();
+
+      Ctx.diagnose(
+          DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
+    });
+    return false;
+  }
+
+  // Build maps of the location hash to all profile data with that leaf location
+  // (allocation info and the callsites).
+  std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
+  // For the callsites we need to record the index of the associated frame in
+  // the frame array (see comments below where the map entries are added).
+  std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
+      LocHashToCallSites;
+  const auto MemProfRec = std::move(MemProfResult.get());
+  for (auto &AI : MemProfRec.AllocSites) {
+    // Associate the allocation info with the leaf frame. The later matching
+    // code will match any inlined call sequences in the IR with a longer prefix
+    // of call stack frames.
+    uint64_t StackId = computeStackId(AI.CallStack[0]);
+    LocHashToAllocInfo[StackId].insert(&AI);
+  }
+  for (auto &CS : MemProfRec.CallSites) {
+    // Need to record all frames from leaf up to and including this function,
+    // as any of these may or may not have been inlined at this point.
+    unsigned Idx = 0;
+    for (auto &StackFrame : CS) {
+      uint64_t StackId = computeStackId(StackFrame);
+      LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
+      // Once we find this function, we can stop recording.
+      if (StackFrame.Function == FuncGUID)
+        break;
+    }
+    assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
+  }
+
+  auto GetOffset = [](const DILocation *DIL) {
+    return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
+           0xffff;
+  };
+
+  // Now walk the instructions, looking up the associated profile data using
+  // dbug locations.
+  for (auto &BB : F) {
+    for (auto &I : BB) {
+      if (I.isDebugOrPseudoInst())
+        continue;
+      // We are only interested in calls (allocation or interior call stack
+      // context calls).
+      auto *CI = dyn_cast<CallBase>(&I);
+      if (!CI)
+        continue;
+      auto *CalledFunction = CI->getCalledFunction();
+      if (CalledFunction && CalledFunction->isIntrinsic())
+        continue;
+      // List of call stack ids computed from the location hashes on debug
+      // locations (leaf to inlined at root).
+      std::vector<uint64_t> InlinedCallStack;
+      // Was the leaf location found in one of the profile maps?
+      bool LeafFound = false;
+      // If leaf was found in a map, iterators pointing to its location in both
+      // of the maps. It might exist in neither, one, or both (the latter case
+      // can happen because we don't currently have discriminators to
+      // distinguish the case when a single line/col maps to both an allocation
+      // and another callsite).
+      std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
+          AllocInfoIter;
+      std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
+                                            unsigned>>>::iterator CallSitesIter;
+      for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
+           DIL = DIL->getInlinedAt()) {
+        // Use C++ linkage name if possible. Need to compile with
+        // -fdebug-info-for-profiling to get linkage name.
+        StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
+        if (Name.empty())
+          Name = DIL->getScope()->getSubprogram()->getName();
+        auto CalleeGUID = Function::getGUID(Name);
+        auto StackId =
+            computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
+        // LeafFound will only be false on the first iteration, since we either
+        // set it true or break out of the loop below.
+        if (!LeafFound) {
+          AllocInfoIter = LocHashToAllocInfo.find(StackId);
+          CallSitesIter = LocHashToCallSites.find(StackId);
+          // Check if the leaf is in one of the maps. If not, no need to look
+          // further at this call.
+          if (AllocInfoIter == LocHashToAllocInfo.end() &&
+              CallSitesIter == LocHashToCallSites.end())
+            break;
+          LeafFound = true;
+        }
+        InlinedCallStack.push_back(StackId);
+      }
+      // If leaf not in either of the maps, skip inst.
+      if (!LeafFound)
+        continue;
+
+      // First add !memprof metadata from allocation info, if we found the
+      // instruction's leaf location in that map, and if the rest of the
+      // instruction's locations match the prefix Frame locations on an
+      // allocation context with the same leaf.
+      if (AllocInfoIter != LocHashToAllocInfo.end()) {
+        // Only consider allocations via new, to reduce unnecessary metadata,
+        // since those are the only allocations that will be targeted initially.
+        if (!isNewLikeFn(CI, &FuncInfo.TLI))
+          continue;
+        // We may match this instruction's location list to multiple MIB
+        // contexts. Add them to a Trie specialized for trimming the contexts to
+        // the minimal needed to disambiguate contexts with unique behavior.
+        CallStackTrie AllocTrie;
+        for (auto *AllocInfo : AllocInfoIter->second) {
+          // Check the full inlined call stack against this one.
+          // If we found and thus matched all frames on the call, include
+          // this MIB.
+          if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
+                                                 InlinedCallStack))
+            addCallStack(AllocTrie, AllocInfo);
+        }
+        // We might not have matched any to the full inlined call stack.
+        // But if we did, create and attach metadata, or a function attribute if
+        // all contexts have identical profiled behavior.
+        if (!AllocTrie.empty()) {
+          // MemprofMDAttached will be false if a function attribute was
+          // attached.
+          bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
+          assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
+          if (MemprofMDAttached) {
+            // Add callsite metadata for the instruction's location list so that
+            // it simpler later on to identify which part of the MIB contexts
+            // are from this particular instruction (including during inlining,
+            // when the callsite metdata will be updated appropriately).
+            // FIXME: can this be changed to strip out the matching stack
+            // context ids from the MIB contexts and not add any callsite
+            // metadata here to save space?
+            addCallsiteMetadata(I, InlinedCallStack, Ctx);
+          }
+        }
+        continue;
+      }
+
+      // Otherwise, add callsite metadata. If we reach here then we found the
+      // instruction's leaf location in the callsites map and not the allocation
+      // map.
+      assert(CallSitesIter != LocHashToCallSites.end());
+      for (auto CallStackIdx : CallSitesIter->second) {
+        // If we found and thus matched all frames on the call, create and
+        // attach call stack metadata.
+        if (stackFrameIncludesInlinedCallStack(
+                *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
+          addCallsiteMetadata(I, InlinedCallStack, Ctx);
+          // Only need to find one with a matching call stack and add a single
+          // callsite metadata.
+          break;
+        }
+      }
+    }
+  }
+
+  return true;
+}
+
 // Read the profile from ProfileFileName and assign the value to the
 // instrumented BB and the edges. This function also updates ProgramMaxCount.
 // Return true if the profile are successfully read, and false on errors.
@@ -1774,7 +2041,7 @@ static bool annotateAllFunctions(
     return false;
 
   // TODO: might need to change the warning once the clang option is finalized.
-  if (!PGOReader->isIRLevelProfile()) {
+  if (!PGOReader->isIRLevelProfile() && !PGOReader->hasMemoryProfile()) {
     Ctx.diagnose(DiagnosticInfoPGOProfile(
         ProfileFileName.data(), "Not an IR level instrumentation profile"));
     return false;
@@ -1821,6 +2088,14 @@ static bool annotateAllFunctions(
     SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
     PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
                     InstrumentFuncEntry);
+    // Read and match memprof first since we do this via debug info and can
+    // match even if there is an IR mismatch detected for regular PGO below.
+    if (PGOReader->hasMemoryProfile())
+      Func.readMemprof(PGOReader.get());
+
+    if (!PGOReader->isIRLevelProfile())
+      continue;
+
     // When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
     // it means the profile for the function is unrepresentative and this
     // function is actually hot / warm. We will reset the function hot / cold

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe
new file mode 100755
index 0000000000000..b6fe0fab30001
Binary files /dev/null and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw
new file mode 100644
index 0000000000000..fc638b2c51000
Binary files /dev/null and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw
new file mode 100644
index 0000000000000..87f1cded0988f
Binary files /dev/null and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw 
diff er

diff  --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
new file mode 100644
index 0000000000000..a000453628d78
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -0,0 +1,489 @@
+;; Tests memprof profile matching (with and without instrumentation profiles).
+
+;; Several requirements due to using raw profile inputs:
+;; PGO profile uses zlib compression
+; REQUIRES: zlib
+;; Avoid failures on big-endian systems that can't read the profile properly
+; REQUIRES: x86_64-linux
+
+;; TODO: Use text profile inputs once that is available for memprof.
+
+;; The input IR and raw profiles have been generated from the following source:
+;;
+;; #include <stdlib.h>
+;; #include <string.h>
+;; #include <unistd.h>
+;; char *foo() {
+;;   return new char[10];
+;; }
+;; char *foo2() {
+;;   return foo();
+;; }
+;; char *bar() {
+;;   return foo2();
+;; }
+;; char *baz() {
+;;   return foo2();
+;; }
+;; char *recurse(unsigned n) {
+;;   if (!n)
+;;     return foo();
+;;   return recurse(n-1);
+;; }
+;; int main(int argc, char **argv) {
+;;   // Test allocations with 
diff erent combinations of stack contexts and
+;;   // coldness (based on lifetime, since they are all accessed a single time
+;;   // per byte via the memset).
+;;   char *a = new char[10];
+;;   char *b = new char[10];
+;;   char *c = foo();
+;;   char *d = foo();
+;;   char *e = bar();
+;;   char *f = baz();
+;;   memset(a, 0, 10);
+;;   memset(b, 0, 10);
+;;   memset(c, 0, 10);
+;;   memset(d, 0, 10);
+;;   memset(e, 0, 10);
+;;   memset(f, 0, 10);
+;;   // a and c have short lifetimes
+;;   delete[] a;
+;;   delete[] c;
+;;   // b, d, e, and f have long lifetimes and will be detected as cold by default.
+;;   sleep(200);
+;;   delete[] b;
+;;   delete[] d;
+;;   delete[] e;
+;;   delete[] f;
+;;   // Loop ensures the two calls to recurse have stack contexts that only 
diff er
+;;   // in one level of recursion. We should get two stack contexts reflecting the
+;;   // 
diff erent levels of recursion and 
diff erent allocation behavior (since the
+;;   // first has a very long lifetime and the second has a short lifetime).
+;;   for (unsigned i = 0; i < 2; i++) {
+;;     char *g = recurse(i + 3);
+;;     memset(g, 0, 10);
+;;     if (!i)
+;;       sleep(200);
+;;     delete[] g;
+;;   }
+;;   return 0;
+;; }
+;;
+;; The following commands were used to compile the source to instrumented
+;; executables and collect raw binary format profiles:
+;;
+;; # Collect memory profile:
+;; $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \
+;; 	-fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \
+;;	-fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \
+;; 	memprof.cc -o memprof.exe -fmemory-profile
+;; $ env MEMPROF_OPTIONS=log_path=stdout ./memprof.exe > memprof.memprofraw
+;;
+;; # Collect IR PGO profile:
+;; $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \
+;; 	-fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \
+;;	-fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \
+;; 	memprof.cc -o pgo.exe -fprofile-generate=.
+;; $ ./pgo.exe
+;; $ mv default_*.profraw memprof_pgo.profraw
+;;
+;; # Generate below LLVM IR for use in matching:
+;; $ clang++ -gmlt -fdebug-info-for-profiling -fno-omit-frame-pointer \
+;;	-fno-optimize-sibling-calls memprof.cc -S -emit-llvm
+
+;; Generate indexed profiles of all combinations:
+; RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata
+; RUN: llvm-profdata merge %S/Inputs/memprof_pgo.profraw %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.pgomemprofdata
+; RUN: llvm-profdata merge %S/Inputs/memprof_pgo.profraw -o %t.pgoprofdata
+
+;; In all below cases we should not get any messages about missing profile data
+;; for any functions. Either we are not performing any matching for a particular
+;; profile type or we are performing the matching and it should be successful.
+; ALL-NOT: memprof record not found for function hash
+; ALL-NOT: no profile data available for function
+
+;; Feed back memprof-only profile
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY
+; There should not be any PGO metadata
+; MEMPROFONLY-NOT: !prof
+
+;; Feed back pgo-only profile
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgoprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=PGO,ALL,PGOONLY
+; There should not be any memprof related metadata
+; PGOONLY-NOT: !memprof
+; PGOONLY-NOT: !callsite
+
+;; Feed back pgo+memprof-only profile
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,PGO,ALL
+
+; ModuleID = 'memprof.cc'
+source_filename = "memprof.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: mustprogress noinline optnone uwtable
+; ALL-LABEL: define dso_local noundef ptr @_Z3foov()
+; There should be some PGO metadata
+; PGO: !prof
+define dso_local noundef ptr @_Z3foov() #0 !dbg !10 {
+entry:
+  ; MEMPROF: call {{.*}} @_Znam{{.*}} !memprof ![[M1:[0-9]+]], !callsite ![[C1:[0-9]+]]
+  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !dbg !13
+  ret ptr %call, !dbg !14
+}
+
+; Function Attrs: nobuiltin allocsize(0)
+declare noundef nonnull ptr @_Znam(i64 noundef) #1
+
+; Function Attrs: mustprogress noinline optnone uwtable
+; ALL-LABEL: define dso_local noundef ptr @_Z4foo2v()
+define dso_local noundef ptr @_Z4foo2v() #0 !dbg !15 {
+entry:
+  ; MEMPROF: call {{.*}} @_Z3foov{{.*}} !callsite ![[C2:[0-9]+]]
+  %call = call noundef ptr @_Z3foov(), !dbg !16
+  ret ptr %call, !dbg !17
+}
+
+; Function Attrs: mustprogress noinline optnone uwtable
+define dso_local noundef ptr @_Z3barv() #0 !dbg !18 {
+entry:
+  ; MEMPROF: call {{.*}} @_Z4foo2v{{.*}} !callsite ![[C3:[0-9]+]]
+  %call = call noundef ptr @_Z4foo2v(), !dbg !19
+  ret ptr %call, !dbg !20
+}
+
+; Function Attrs: mustprogress noinline optnone uwtable
+define dso_local noundef ptr @_Z3bazv() #0 !dbg !21 {
+entry:
+  ; MEMPROF: call {{.*}} @_Z4foo2v{{.*}} !callsite ![[C4:[0-9]+]]
+  %call = call noundef ptr @_Z4foo2v(), !dbg !22
+  ret ptr %call, !dbg !23
+}
+
+; Function Attrs: mustprogress noinline optnone uwtable
+define dso_local noundef ptr @_Z7recursej(i32 noundef %n) #0 !dbg !24 {
+entry:
+  %retval = alloca ptr, align 8
+  %n.addr = alloca i32, align 4
+  store i32 %n, ptr %n.addr, align 4
+  %0 = load i32, ptr %n.addr, align 4, !dbg !25
+  %tobool = icmp ne i32 %0, 0, !dbg !25
+  br i1 %tobool, label %if.end, label %if.then, !dbg !26
+
+if.then:                                          ; preds = %entry
+  ; MEMPROF: call {{.*}} @_Z3foov{{.*}} !callsite ![[C5:[0-9]+]]
+  %call = call noundef ptr @_Z3foov(), !dbg !27
+  store ptr %call, ptr %retval, align 8, !dbg !28
+  br label %return, !dbg !28
+
+if.end:                                           ; preds = %entry
+  %1 = load i32, ptr %n.addr, align 4, !dbg !29
+  %sub = sub i32 %1, 1, !dbg !30
+  ; MEMPROF: call {{.*}} @_Z7recursej{{.*}} !callsite ![[C6:[0-9]+]]
+  %call1 = call noundef ptr @_Z7recursej(i32 noundef %sub), !dbg !31
+  store ptr %call1, ptr %retval, align 8, !dbg !32
+  br label %return, !dbg !32
+
+return:                                           ; preds = %if.end, %if.then
+  %2 = load ptr, ptr %retval, align 8, !dbg !33
+  ret ptr %2, !dbg !33
+}
+
+; Function Attrs: mustprogress noinline norecurse optnone uwtable
+define dso_local noundef i32 @main(i32 noundef %argc, ptr noundef %argv) #2 !dbg !34 {
+entry:
+  %retval = alloca i32, align 4
+  %argc.addr = alloca i32, align 4
+  %argv.addr = alloca ptr, align 8
+  %a = alloca ptr, align 8
+  %b = alloca ptr, align 8
+  %c = alloca ptr, align 8
+  %d = alloca ptr, align 8
+  %e = alloca ptr, align 8
+  %f = alloca ptr, align 8
+  %i = alloca i32, align 4
+  %g = alloca ptr, align 8
+  store i32 0, ptr %retval, align 4
+  store i32 %argc, ptr %argc.addr, align 4
+  store ptr %argv, ptr %argv.addr, align 8
+  ; MEMPROF: call {{.*}} @_Znam{{.*}} #[[A1:[0-9]+]]
+  %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !dbg !35
+  store ptr %call, ptr %a, align 8, !dbg !36
+  ; MEMPROF: call {{.*}} @_Znam{{.*}} #[[A2:[0-9]+]]
+  %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !dbg !37
+  store ptr %call1, ptr %b, align 8, !dbg !38
+  ; MEMPROF: call {{.*}} @_Z3foov{{.*}} !callsite ![[C7:[0-9]+]]
+  %call2 = call noundef ptr @_Z3foov(), !dbg !39
+  store ptr %call2, ptr %c, align 8, !dbg !40
+  ; MEMPROF: call {{.*}} @_Z3foov{{.*}} !callsite ![[C8:[0-9]+]]
+  %call3 = call noundef ptr @_Z3foov(), !dbg !41
+  store ptr %call3, ptr %d, align 8, !dbg !42
+  ; MEMPROF: call {{.*}} @_Z3barv{{.*}} !callsite ![[C9:[0-9]+]]
+  %call4 = call noundef ptr @_Z3barv(), !dbg !43
+  store ptr %call4, ptr %e, align 8, !dbg !44
+  ; MEMPROF: call {{.*}} @_Z3bazv{{.*}} !callsite ![[C10:[0-9]+]]
+  %call5 = call noundef ptr @_Z3bazv(), !dbg !45
+  store ptr %call5, ptr %f, align 8, !dbg !46
+  %0 = load ptr, ptr %a, align 8, !dbg !47
+  call void @llvm.memset.p0.i64(ptr align 1 %0, i8 0, i64 10, i1 false), !dbg !48
+  %1 = load ptr, ptr %b, align 8, !dbg !49
+  call void @llvm.memset.p0.i64(ptr align 1 %1, i8 0, i64 10, i1 false), !dbg !50
+  %2 = load ptr, ptr %c, align 8, !dbg !51
+  call void @llvm.memset.p0.i64(ptr align 1 %2, i8 0, i64 10, i1 false), !dbg !52
+  %3 = load ptr, ptr %d, align 8, !dbg !53
+  call void @llvm.memset.p0.i64(ptr align 1 %3, i8 0, i64 10, i1 false), !dbg !54
+  %4 = load ptr, ptr %e, align 8, !dbg !55
+  call void @llvm.memset.p0.i64(ptr align 1 %4, i8 0, i64 10, i1 false), !dbg !56
+  %5 = load ptr, ptr %f, align 8, !dbg !57
+  call void @llvm.memset.p0.i64(ptr align 1 %5, i8 0, i64 10, i1 false), !dbg !58
+  %6 = load ptr, ptr %a, align 8, !dbg !59
+  %isnull = icmp eq ptr %6, null, !dbg !60
+  br i1 %isnull, label %delete.end, label %delete.notnull, !dbg !60
+
+delete.notnull:                                   ; preds = %entry
+  call void @_ZdaPv(ptr noundef %6) #7, !dbg !61
+  br label %delete.end, !dbg !61
+
+delete.end:                                       ; preds = %delete.notnull, %entry
+  %7 = load ptr, ptr %c, align 8, !dbg !63
+  %isnull6 = icmp eq ptr %7, null, !dbg !64
+  br i1 %isnull6, label %delete.end8, label %delete.notnull7, !dbg !64
+
+delete.notnull7:                                  ; preds = %delete.end
+  call void @_ZdaPv(ptr noundef %7) #7, !dbg !65
+  br label %delete.end8, !dbg !65
+
+delete.end8:                                      ; preds = %delete.notnull7, %delete.end
+  %call9 = call i32 @sleep(i32 noundef 200), !dbg !66
+  %8 = load ptr, ptr %b, align 8, !dbg !67
+  %isnull10 = icmp eq ptr %8, null, !dbg !68
+  br i1 %isnull10, label %delete.end12, label %delete.notnull11, !dbg !68
+
+delete.notnull11:                                 ; preds = %delete.end8
+  call void @_ZdaPv(ptr noundef %8) #7, !dbg !69
+  br label %delete.end12, !dbg !69
+
+delete.end12:                                     ; preds = %delete.notnull11, %delete.end8
+  %9 = load ptr, ptr %d, align 8, !dbg !70
+  %isnull13 = icmp eq ptr %9, null, !dbg !71
+  br i1 %isnull13, label %delete.end15, label %delete.notnull14, !dbg !71
+
+delete.notnull14:                                 ; preds = %delete.end12
+  call void @_ZdaPv(ptr noundef %9) #7, !dbg !72
+  br label %delete.end15, !dbg !72
+
+delete.end15:                                     ; preds = %delete.notnull14, %delete.end12
+  %10 = load ptr, ptr %e, align 8, !dbg !73
+  %isnull16 = icmp eq ptr %10, null, !dbg !74
+  br i1 %isnull16, label %delete.end18, label %delete.notnull17, !dbg !74
+
+delete.notnull17:                                 ; preds = %delete.end15
+  call void @_ZdaPv(ptr noundef %10) #7, !dbg !75
+  br label %delete.end18, !dbg !75
+
+delete.end18:                                     ; preds = %delete.notnull17, %delete.end15
+  %11 = load ptr, ptr %f, align 8, !dbg !76
+  %isnull19 = icmp eq ptr %11, null, !dbg !77
+  br i1 %isnull19, label %delete.end21, label %delete.notnull20, !dbg !77
+
+delete.notnull20:                                 ; preds = %delete.end18
+  call void @_ZdaPv(ptr noundef %11) #7, !dbg !78
+  br label %delete.end21, !dbg !78
+
+delete.end21:                                     ; preds = %delete.notnull20, %delete.end18
+  store i32 0, ptr %i, align 4, !dbg !79
+  br label %for.cond, !dbg !80
+
+for.cond:                                         ; preds = %for.inc, %delete.end21
+  %12 = load i32, ptr %i, align 4, !dbg !81
+  %cmp = icmp ult i32 %12, 2, !dbg !82
+  br i1 %cmp, label %for.body, label %for.end, !dbg !83
+
+for.body:                                         ; preds = %for.cond
+  %13 = load i32, ptr %i, align 4, !dbg !84
+  %add = add i32 %13, 3, !dbg !85
+  ; MEMPROF: call {{.*}} @_Z7recursej{{.*}} !callsite ![[C11:[0-9]+]]
+  %call22 = call noundef ptr @_Z7recursej(i32 noundef %add), !dbg !86
+  store ptr %call22, ptr %g, align 8, !dbg !87
+  %14 = load ptr, ptr %g, align 8, !dbg !88
+  call void @llvm.memset.p0.i64(ptr align 1 %14, i8 0, i64 10, i1 false), !dbg !89
+  %15 = load i32, ptr %i, align 4, !dbg !90
+  %tobool = icmp ne i32 %15, 0, !dbg !90
+  br i1 %tobool, label %if.end, label %if.then, !dbg !91
+
+if.then:                                          ; preds = %for.body
+  %call23 = call i32 @sleep(i32 noundef 200), !dbg !92
+  br label %if.end, !dbg !92
+
+if.end:                                           ; preds = %if.then, %for.body
+  %16 = load ptr, ptr %g, align 8, !dbg !93
+  %isnull24 = icmp eq ptr %16, null, !dbg !94
+  br i1 %isnull24, label %delete.end26, label %delete.notnull25, !dbg !94
+
+delete.notnull25:                                 ; preds = %if.end
+  call void @_ZdaPv(ptr noundef %16) #7, !dbg !95
+  br label %delete.end26, !dbg !95
+
+delete.end26:                                     ; preds = %delete.notnull25, %if.end
+  br label %for.inc, !dbg !96
+
+for.inc:                                          ; preds = %delete.end26
+  %17 = load i32, ptr %i, align 4, !dbg !97
+  %inc = add i32 %17, 1, !dbg !97
+  store i32 %inc, ptr %i, align 4, !dbg !97
+  br label %for.cond, !dbg !99, !llvm.loop !100
+
+for.end:                                          ; preds = %for.cond
+  ret i32 0, !dbg !103
+}
+
+; MEMPROF: #[[A1]] = { builtin allocsize(0) "memprof"="notcold" }
+; MEMPROF: #[[A2]] = { builtin allocsize(0) "memprof"="cold" }
+; MEMPROF: ![[M1]] = !{![[MIB1:[0-9]+]], ![[MIB2:[0-9]+]], ![[MIB3:[0-9]+]], ![[MIB4:[0-9]+]], ![[MIB5:[0-9]+]]}
+; MEMPROF: ![[MIB1]] = !{![[STACK1:[0-9]+]], !"cold"}
+; MEMPROF: ![[STACK1]] = !{i64 2732490490862098848, i64 748269490701775343}
+; MEMPROF: ![[MIB2]] = !{![[STACK2:[0-9]+]], !"cold"}
+; MEMPROF: ![[STACK2]] = !{i64 2732490490862098848, i64 2104812325165620841, i64 6281715513834610934, i64 6281715513834610934, i64 6281715513834610934, i64 1544787832369987002}
+; MEMPROF: ![[MIB3]] = !{![[STACK3:[0-9]+]], !"notcold"}
+; MEMPROF: ![[STACK3]] = !{i64 2732490490862098848, i64 2104812325165620841, i64 6281715513834610934, i64 6281715513834610934, i64 6281715513834610934, i64 6281715513834610934}
+; MEMPROF: ![[MIB4]] = !{![[STACK4:[0-9]+]], !"cold"}
+; MEMPROF: ![[STACK4]] = !{i64 2732490490862098848, i64 8467819354083268568}
+; MEMPROF: ![[MIB5]] = !{![[STACK5:[0-9]+]], !"notcold"}
+; MEMPROF: ![[STACK5]] = !{i64 2732490490862098848, i64 8690657650969109624}
+; MEMPROF: ![[C1]] = !{i64 2732490490862098848}
+; MEMPROF: ![[C2]] = !{i64 8467819354083268568}
+; MEMPROF: ![[C3]] = !{i64 9086428284934609951}
+; MEMPROF: ![[C4]] = !{i64 -5964873800580613432}
+; MEMPROF: ![[C5]] = !{i64 2104812325165620841}
+; MEMPROF: ![[C6]] = !{i64 6281715513834610934}
+; MEMPROF: ![[C7]] = !{i64 8690657650969109624}
+; MEMPROF: ![[C8]] = !{i64 748269490701775343}
+; MEMPROF: ![[C9]] = !{i64 -5747251260480066785}
+; MEMPROF: ![[C10]] = !{i64 2061451396820446691}
+; MEMPROF: ![[C11]] = !{i64 1544787832369987002}
+
+; Function Attrs: argmemonly nofree nounwind willreturn writeonly
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
+
+; Function Attrs: nobuiltin nounwind
+declare void @_ZdaPv(ptr noundef) #4
+
+declare i32 @sleep(i32 noundef) #5
+
+attributes #0 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nobuiltin allocsize(0) "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { mustprogress noinline norecurse optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { argmemonly nofree nounwind willreturn writeonly }
+attributes #4 = { nobuiltin nounwind "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #5 = { "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #6 = { builtin allocsize(0) }
+attributes #7 = { builtin nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 6cbe6284d1f0a088b5c6482ae27b738f03d82fe7)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "memprof.cc", directory: "/usr/local/google/home/tejohnson/llvm/tmp", checksumkind: CSK_MD5, checksum: "e8c40ebe4b21776b4d60e9632cbc13c2")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 7, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{i32 7, !"frame-pointer", i32 2}
+!9 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git 6cbe6284d1f0a088b5c6482ae27b738f03d82fe7)"}
+!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!11 = !DISubroutineType(types: !12)
+!12 = !{}
+!13 = !DILocation(line: 5, column: 10, scope: !10)
+!14 = !DILocation(line: 5, column: 3, scope: !10)
+!15 = distinct !DISubprogram(name: "foo2", linkageName: "_Z4foo2v", scope: !1, file: !1, line: 7, type: !11, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!16 = !DILocation(line: 8, column: 10, scope: !15)
+!17 = !DILocation(line: 8, column: 3, scope: !15)
+!18 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 10, type: !11, scopeLine: 10, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!19 = !DILocation(line: 11, column: 10, scope: !18)
+!20 = !DILocation(line: 11, column: 3, scope: !18)
+!21 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 13, type: !11, scopeLine: 13, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!22 = !DILocation(line: 14, column: 10, scope: !21)
+!23 = !DILocation(line: 14, column: 3, scope: !21)
+!24 = distinct !DISubprogram(name: "recurse", linkageName: "_Z7recursej", scope: !1, file: !1, line: 16, type: !11, scopeLine: 16, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!25 = !DILocation(line: 17, column: 8, scope: !24)
+!26 = !DILocation(line: 17, column: 7, scope: !24)
+!27 = !DILocation(line: 18, column: 12, scope: !24)
+!28 = !DILocation(line: 18, column: 5, scope: !24)
+!29 = !DILocation(line: 19, column: 18, scope: !24)
+!30 = !DILocation(line: 19, column: 19, scope: !24)
+!31 = !DILocation(line: 19, column: 10, scope: !24)
+!32 = !DILocation(line: 19, column: 3, scope: !24)
+!33 = !DILocation(line: 20, column: 1, scope: !24)
+!34 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 21, type: !11, scopeLine: 21, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!35 = !DILocation(line: 25, column: 13, scope: !34)
+!36 = !DILocation(line: 25, column: 9, scope: !34)
+!37 = !DILocation(line: 26, column: 13, scope: !34)
+!38 = !DILocation(line: 26, column: 9, scope: !34)
+!39 = !DILocation(line: 27, column: 13, scope: !34)
+!40 = !DILocation(line: 27, column: 9, scope: !34)
+!41 = !DILocation(line: 28, column: 13, scope: !34)
+!42 = !DILocation(line: 28, column: 9, scope: !34)
+!43 = !DILocation(line: 29, column: 13, scope: !34)
+!44 = !DILocation(line: 29, column: 9, scope: !34)
+!45 = !DILocation(line: 30, column: 13, scope: !34)
+!46 = !DILocation(line: 30, column: 9, scope: !34)
+!47 = !DILocation(line: 31, column: 10, scope: !34)
+!48 = !DILocation(line: 31, column: 3, scope: !34)
+!49 = !DILocation(line: 32, column: 10, scope: !34)
+!50 = !DILocation(line: 32, column: 3, scope: !34)
+!51 = !DILocation(line: 33, column: 10, scope: !34)
+!52 = !DILocation(line: 33, column: 3, scope: !34)
+!53 = !DILocation(line: 34, column: 10, scope: !34)
+!54 = !DILocation(line: 34, column: 3, scope: !34)
+!55 = !DILocation(line: 35, column: 10, scope: !34)
+!56 = !DILocation(line: 35, column: 3, scope: !34)
+!57 = !DILocation(line: 36, column: 10, scope: !34)
+!58 = !DILocation(line: 36, column: 3, scope: !34)
+!59 = !DILocation(line: 38, column: 12, scope: !34)
+!60 = !DILocation(line: 38, column: 3, scope: !34)
+!61 = !DILocation(line: 38, column: 3, scope: !62)
+!62 = !DILexicalBlockFile(scope: !34, file: !1, discriminator: 2)
+!63 = !DILocation(line: 39, column: 12, scope: !34)
+!64 = !DILocation(line: 39, column: 3, scope: !34)
+!65 = !DILocation(line: 39, column: 3, scope: !62)
+!66 = !DILocation(line: 41, column: 3, scope: !34)
+!67 = !DILocation(line: 42, column: 12, scope: !34)
+!68 = !DILocation(line: 42, column: 3, scope: !34)
+!69 = !DILocation(line: 42, column: 3, scope: !62)
+!70 = !DILocation(line: 43, column: 12, scope: !34)
+!71 = !DILocation(line: 43, column: 3, scope: !34)
+!72 = !DILocation(line: 43, column: 3, scope: !62)
+!73 = !DILocation(line: 44, column: 12, scope: !34)
+!74 = !DILocation(line: 44, column: 3, scope: !34)
+!75 = !DILocation(line: 44, column: 3, scope: !62)
+!76 = !DILocation(line: 45, column: 12, scope: !34)
+!77 = !DILocation(line: 45, column: 3, scope: !34)
+!78 = !DILocation(line: 45, column: 3, scope: !62)
+!79 = !DILocation(line: 51, column: 17, scope: !34)
+!80 = !DILocation(line: 51, column: 8, scope: !34)
+!81 = !DILocation(line: 51, column: 24, scope: !62)
+!82 = !DILocation(line: 51, column: 26, scope: !62)
+!83 = !DILocation(line: 51, column: 3, scope: !62)
+!84 = !DILocation(line: 52, column: 23, scope: !34)
+!85 = !DILocation(line: 52, column: 25, scope: !34)
+!86 = !DILocation(line: 52, column: 15, scope: !34)
+!87 = !DILocation(line: 52, column: 11, scope: !34)
+!88 = !DILocation(line: 53, column: 12, scope: !34)
+!89 = !DILocation(line: 53, column: 5, scope: !34)
+!90 = !DILocation(line: 54, column: 10, scope: !34)
+!91 = !DILocation(line: 54, column: 9, scope: !34)
+!92 = !DILocation(line: 55, column: 7, scope: !34)
+!93 = !DILocation(line: 56, column: 14, scope: !34)
+!94 = !DILocation(line: 56, column: 5, scope: !34)
+!95 = !DILocation(line: 56, column: 5, scope: !62)
+!96 = !DILocation(line: 57, column: 3, scope: !34)
+!97 = !DILocation(line: 51, column: 32, scope: !98)
+!98 = !DILexicalBlockFile(scope: !34, file: !1, discriminator: 4)
+!99 = !DILocation(line: 51, column: 3, scope: !98)
+!100 = distinct !{!100, !101, !96, !102}
+!101 = !DILocation(line: 51, column: 3, scope: !34)
+!102 = !{!"llvm.loop.mustprogress"}
+!103 = !DILocation(line: 58, column: 3, scope: !34)

diff  --git a/llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll b/llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll
new file mode 100644
index 0000000000000..068c8c91aa3f4
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll
@@ -0,0 +1,28 @@
+;; Tests that we get a missing memprof error for a function not in profile when
+;; using -pgo-warn-missing-function.
+
+;; Avoid failures on big-endian systems that can't read the raw profile properly
+; REQUIRES: x86_64-linux
+
+;; TODO: Use text profile inputs once that is available for memprof.
+
+;; The raw profiles have been generated from the source used for the memprof.ll
+;; test (see comments at the top of that file).
+
+; RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata
+
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s
+
+; CHECK: memprof record not found for function hash {{.*}} _Z16funcnotinprofilev
+
+; ModuleID = 'memprofmissingfunc.cc'
+source_filename = "memprofmissingfunc.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+define dso_local void @_Z16funcnotinprofilev() {
+entry:
+  ret void
+}
+


        


More information about the cfe-commits mailing list