[clang] b1926f3 - Restore "[MemProf] Memprof profile matching and annotation"
Teresa Johnson via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 23 11:39:05 PDT 2022
Author: Teresa Johnson
Date: 2022-09-23T11:38:47-07:00
New Revision: b1926f308f0939b365ee4940c7b1bd984b45e71a
URL: https://github.com/llvm/llvm-project/commit/b1926f308f0939b365ee4940c7b1bd984b45e71a
DIFF: https://github.com/llvm/llvm-project/commit/b1926f308f0939b365ee4940c7b1bd984b45e71a.diff
LOG: Restore "[MemProf] Memprof profile matching and annotation"
This reverts commit 794b7ea960ccc3222f2af582efadbc5e5c464292, and
thus restores commit a212d8da94d08e229aa8d65283e4b116310bba10, and
follow on fixes 0cd6763fa93159b84d70a5bb602c24996acaafaa,
e9ff53d42feac7fc157718523275619a8106f2f3, and
37c6a25e9ab230e5e21fa34e246d9fec55275df0.
Use a hash function (BLAKE3) instead of hash_combine/hash_code which are
not guaranteed to be stable across executions.
Additionally, it adds a "REQUIRES: x86_64-linux" to the tests that have
raw profile inputs to avoid failures on big endian bots.
Reviewers: snehasish, davidxl
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D128142
Added:
clang/test/CodeGen/Inputs/memprof.exe
clang/test/CodeGen/Inputs/memprof.memprofraw
clang/test/CodeGen/memprof.cpp
llvm/test/Transforms/PGOProfile/Inputs/memprof.exe
llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw
llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw
llvm/test/Transforms/PGOProfile/memprof.ll
llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll
Modified:
clang/lib/Frontend/CompilerInvocation.cpp
llvm/include/llvm/Analysis/MemoryBuiltins.h
llvm/include/llvm/ProfileData/InstrProfReader.h
llvm/lib/Analysis/MemoryBuiltins.cpp
llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Removed:
################################################################################
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 9f9241054b1ef..656e5950db988 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1306,7 +1306,10 @@ static void setPGOUseInstrumentor(CodeGenOptions &Opts,
}
std::unique_ptr<llvm::IndexedInstrProfReader> PGOReader =
std::move(ReaderOrErr.get());
- if (PGOReader->isIRLevelProfile()) {
+ // Currently memprof profiles are only added at the IR level. Mark the profile
+ // type as IR in that case as well and the subsequent matching needs to detect
+ // which is available (might be one or both).
+ if (PGOReader->isIRLevelProfile() || PGOReader->hasMemoryProfile()) {
if (PGOReader->hasCSIRLevelProfile())
Opts.setProfileUse(CodeGenOptions::ProfileCSIRInstr);
else
diff --git a/clang/test/CodeGen/Inputs/memprof.exe b/clang/test/CodeGen/Inputs/memprof.exe
new file mode 100755
index 0000000000000..955c0d6b0e87a
Binary files /dev/null and b/clang/test/CodeGen/Inputs/memprof.exe
diff er
diff --git a/clang/test/CodeGen/Inputs/memprof.memprofraw b/clang/test/CodeGen/Inputs/memprof.memprofraw
new file mode 100644
index 0000000000000..07a3310c122af
Binary files /dev/null and b/clang/test/CodeGen/Inputs/memprof.memprofraw
diff er
diff --git a/clang/test/CodeGen/memprof.cpp b/clang/test/CodeGen/memprof.cpp
new file mode 100644
index 0000000000000..b246d1f086942
--- /dev/null
+++ b/clang/test/CodeGen/memprof.cpp
@@ -0,0 +1,38 @@
+// Test if memprof instrumentation and use pass are invoked.
+//
+// Instrumentation:
+// Ensure Pass MemProfilerPass and ModuleMemProfilerPass are invoked.
+// RUN: %clang_cc1 -O2 -fmemory-profile %s -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=INSTRUMENT
+// INSTRUMENT: Running pass: MemProfilerPass on main
+// INSTRUMENT: Running pass: ModuleMemProfilerPass on [module]
+
+// Avoid failures on big-endian systems that can't read the raw profile properly
+// REQUIRES: x86_64-linux
+
+// TODO: Use text profile inputs once that is available for memprof.
+//
+// The following commands were used to compile the source to instrumented
+// executables and collect raw binary format profiles:
+//
+// # Collect memory profile:
+// $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \
+// -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \
+// -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \
+// memprof.cpp -o memprof.exe -fmemory-profile
+// $ env MEMPROF_OPTIONS=log_path=stdout ./memprof.exe > memprof.memprofraw
+//
+// RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata
+
+// Profile use:
+// Ensure Pass PGOInstrumentationUse is invoked with the memprof-only profile.
+// RUN: %clang_cc1 -O2 -fprofile-instrument-use-path=%t.memprofdata %s -fdebug-pass-manager -emit-llvm -o - 2>&1 | FileCheck %s -check-prefix=USE
+// USE: Running pass: PGOInstrumentationUse on [module]
+
+char *foo() {
+ return new char[10];
+}
+int main() {
+ char *a = foo();
+ delete[] a;
+ return 0;
+}
diff --git a/llvm/include/llvm/Analysis/MemoryBuiltins.h b/llvm/include/llvm/Analysis/MemoryBuiltins.h
index 949fe7270821a..41dc0c4ff452e 100644
--- a/llvm/include/llvm/Analysis/MemoryBuiltins.h
+++ b/llvm/include/llvm/Analysis/MemoryBuiltins.h
@@ -56,6 +56,10 @@ bool isAllocationFn(const Value *V, const TargetLibraryInfo *TLI);
bool isAllocationFn(const Value *V,
function_ref<const TargetLibraryInfo &(Function &)> GetTLI);
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory via new.
+bool isNewLikeFn(const Value *V, const TargetLibraryInfo *TLI);
+
/// Tests if a value is a call or invoke to a library function that
/// allocates memory similar to malloc or calloc.
bool isMallocOrCallocLikeFn(const Value *V, const TargetLibraryInfo *TLI);
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 1d1b59bb6c469..9a6a6f2cbf28e 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -118,6 +118,9 @@ class InstrProfReader {
/// Return true if the profile only instruments function entries.
virtual bool functionEntryOnly() const = 0;
+ /// Return true if profile includes a memory profile.
+ virtual bool hasMemoryProfile() const = 0;
+
/// Returns a BitsetEnum describing the attributes of the profile. To check
/// individual attributes prefer using the helpers above.
virtual InstrProfKind getProfileKind() const = 0;
@@ -233,6 +236,11 @@ class TextInstrProfReader : public InstrProfReader {
return static_cast<bool>(ProfileKind & InstrProfKind::FunctionEntryOnly);
}
+ bool hasMemoryProfile() const override {
+ // TODO: Add support for text format memory profiles.
+ return false;
+ }
+
InstrProfKind getProfileKind() const override { return ProfileKind; }
/// Read the header.
@@ -322,6 +330,12 @@ class RawInstrProfReader : public InstrProfReader {
return (Version & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
}
+ bool hasMemoryProfile() const override {
+ // Memory profiles have a separate raw format, so this should never be set.
+ assert(!(Version & VARIANT_MASK_MEMPROF));
+ return false;
+ }
+
/// Returns a BitsetEnum describing the attributes of the raw instr profile.
InstrProfKind getProfileKind() const override;
@@ -466,6 +480,7 @@ struct InstrProfReaderIndexBase {
virtual bool instrEntryBBEnabled() const = 0;
virtual bool hasSingleByteCoverage() const = 0;
virtual bool functionEntryOnly() const = 0;
+ virtual bool hasMemoryProfile() const = 0;
virtual InstrProfKind getProfileKind() const = 0;
virtual Error populateSymtab(InstrProfSymtab &) = 0;
};
@@ -532,6 +547,10 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase {
return (FormatVersion & VARIANT_MASK_FUNCTION_ENTRY_ONLY) != 0;
}
+ bool hasMemoryProfile() const override {
+ return (FormatVersion & VARIANT_MASK_MEMPROF) != 0;
+ }
+
InstrProfKind getProfileKind() const override;
Error populateSymtab(InstrProfSymtab &Symtab) override {
@@ -605,6 +624,8 @@ class IndexedInstrProfReader : public InstrProfReader {
bool functionEntryOnly() const override { return Index->functionEntryOnly(); }
+ bool hasMemoryProfile() const override { return Index->hasMemoryProfile(); }
+
/// Returns a BitsetEnum describing the attributes of the indexed instr
/// profile.
InstrProfKind getProfileKind() const override {
diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index 31704c21358a5..351e81aac05db 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -303,6 +303,12 @@ bool llvm::isAllocationFn(
checkFnAllocKind(V, AllocFnKind::Alloc | AllocFnKind::Realloc);
}
+/// Tests if a value is a call or invoke to a library function that
+/// allocates memory via new.
+bool llvm::isNewLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
+ return getAllocationData(V, OpNewLike, TLI).has_value();
+}
+
/// Tests if a value is a call or invoke to a library function that
/// allocates uninitialized memory (such as malloc).
static bool isMallocLikeFn(const Value *V, const TargetLibraryInfo *TLI) {
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 310e83df34dfa..6bb29a0f34acf 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -65,6 +65,8 @@
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/EHPersonalities.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -97,6 +99,7 @@
#include "llvm/IR/Value.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/BLAKE3.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/Casting.h"
@@ -106,6 +109,7 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/HashBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -114,14 +118,17 @@
#include <algorithm>
#include <cassert>
#include <cstdint>
+#include <map>
#include <memory>
#include <numeric>
+#include <set>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
using namespace llvm;
+using namespace llvm::memprof;
using ProfileCount = Function::ProfileCount;
using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
@@ -136,6 +143,7 @@ STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
+STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
STATISTIC(NumOfCSPGOSelectInsts,
@@ -296,6 +304,10 @@ static cl::opt<unsigned> PGOFunctionSizeThreshold(
"pgo-function-size-threshold", cl::Hidden,
cl::desc("Do not instrument functions smaller than this threshold"));
+static cl::opt<bool> MatchMemProf(
+ "pgo-match-memprof", cl::init(true), cl::Hidden,
+ cl::desc("Perform matching and annotation of memprof profiles."));
+
namespace llvm {
// Command line option to turn on CFG dot dump after profile annotation.
// Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
@@ -504,6 +516,7 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
void renameComdatFunction();
public:
+ const TargetLibraryInfo &TLI;
std::vector<std::vector<VPCandidateInfo>> ValueSites;
SelectInstVisitor SIVisitor;
std::string FuncName;
@@ -542,7 +555,7 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
BlockFrequencyInfo *BFI = nullptr, bool IsCS = false,
bool InstrumentFuncEntry = true)
: F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers), VPC(Func, TLI),
- ValueSites(IPVK_Last + 1), SIVisitor(Func),
+ TLI(TLI), ValueSites(IPVK_Last + 1), SIVisitor(Func),
MST(F, InstrumentFuncEntry, BPI, BFI) {
// This should be done before CFG hash computation.
SIVisitor.countSelects(Func);
@@ -1021,6 +1034,9 @@ class PGOUseFunc {
bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
InstrProfRecord::CountPseudoKind &PseudoKind);
+ // Read memprof data for the instrumented function from profile.
+ bool readMemprof(IndexedInstrProfReader *PGOReader);
+
// Populate the counts for all BBs.
void populateCounters();
@@ -1221,6 +1237,257 @@ static void annotateFunctionWithHashMismatch(Function &F,
F.setMetadata(LLVMContext::MD_annotation, MD);
}
+static void addCallsiteMetadata(Instruction &I,
+ std::vector<uint64_t> &InlinedCallStack,
+ LLVMContext &Ctx) {
+ I.setMetadata(LLVMContext::MD_callsite,
+ buildCallstackMetadata(InlinedCallStack, Ctx));
+}
+
+static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
+ uint32_t Column) {
+ llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
+ HashBuilder;
+ HashBuilder.add(Function, LineOffset, Column);
+ llvm::BLAKE3Result<8> Hash = HashBuilder.final();
+ uint64_t Id;
+ std::memcpy(&Id, Hash.data(), sizeof(Hash));
+ return Id;
+}
+
+static uint64_t computeStackId(const memprof::Frame &Frame) {
+ return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
+}
+
+static void addCallStack(CallStackTrie &AllocTrie,
+ const AllocationInfo *AllocInfo) {
+ SmallVector<uint64_t> StackIds;
+ for (auto StackFrame : AllocInfo->CallStack)
+ StackIds.push_back(computeStackId(StackFrame));
+ auto AllocType = getAllocType(AllocInfo->Info.getMaxAccessCount(),
+ AllocInfo->Info.getMinSize(),
+ AllocInfo->Info.getMinLifetime());
+ AllocTrie.addCallStack(AllocType, StackIds);
+}
+
+// Helper to compare the InlinedCallStack computed from an instruction's debug
+// info to a list of Frames from profile data (either the allocation data or a
+// callsite). For callsites, the StartIndex to use in the Frame array may be
+// non-zero.
+static bool
+stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
+ ArrayRef<uint64_t> InlinedCallStack,
+ unsigned StartIndex = 0) {
+ auto StackFrame = ProfileCallStack.begin() + StartIndex;
+ auto InlCallStackIter = InlinedCallStack.begin();
+ for (; StackFrame != ProfileCallStack.end() &&
+ InlCallStackIter != InlinedCallStack.end();
+ ++StackFrame, ++InlCallStackIter) {
+ uint64_t StackId = computeStackId(*StackFrame);
+ if (StackId != *InlCallStackIter)
+ return false;
+ }
+ // Return true if we found and matched all stack ids from the call
+ // instruction.
+ return InlCallStackIter == InlinedCallStack.end();
+}
+
+bool PGOUseFunc::readMemprof(IndexedInstrProfReader *PGOReader) {
+ if (!MatchMemProf)
+ return true;
+
+ auto &Ctx = M->getContext();
+
+ auto FuncGUID = Function::getGUID(FuncInfo.FuncName);
+ Expected<memprof::MemProfRecord> MemProfResult =
+ PGOReader->getMemProfRecord(FuncGUID);
+ if (Error E = MemProfResult.takeError()) {
+ handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
+ auto Err = IPE.get();
+ bool SkipWarning = false;
+ LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
+ << FuncInfo.FuncName << ": ");
+ if (Err == instrprof_error::unknown_function) {
+ NumOfMemProfMissing++;
+ SkipWarning = !PGOWarnMissing;
+ LLVM_DEBUG(dbgs() << "unknown function");
+ } else if (Err == instrprof_error::hash_mismatch) {
+ SkipWarning =
+ NoPGOWarnMismatch ||
+ (NoPGOWarnMismatchComdatWeak &&
+ (F.hasComdat() ||
+ F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
+ LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
+ }
+
+ if (SkipWarning)
+ return;
+
+ std::string Msg =
+ (IPE.message() + Twine(" ") + F.getName().str() + Twine(" Hash = ") +
+ std::to_string(FuncInfo.FunctionHash))
+ .str();
+
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
+ });
+ return false;
+ }
+
+ // Build maps of the location hash to all profile data with that leaf location
+ // (allocation info and the callsites).
+ std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
+ // For the callsites we need to record the index of the associated frame in
+ // the frame array (see comments below where the map entries are added).
+ std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
+ LocHashToCallSites;
+ const auto MemProfRec = std::move(MemProfResult.get());
+ for (auto &AI : MemProfRec.AllocSites) {
+ // Associate the allocation info with the leaf frame. The later matching
+ // code will match any inlined call sequences in the IR with a longer prefix
+ // of call stack frames.
+ uint64_t StackId = computeStackId(AI.CallStack[0]);
+ LocHashToAllocInfo[StackId].insert(&AI);
+ }
+ for (auto &CS : MemProfRec.CallSites) {
+ // Need to record all frames from leaf up to and including this function,
+ // as any of these may or may not have been inlined at this point.
+ unsigned Idx = 0;
+ for (auto &StackFrame : CS) {
+ uint64_t StackId = computeStackId(StackFrame);
+ LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
+ // Once we find this function, we can stop recording.
+ if (StackFrame.Function == FuncGUID)
+ break;
+ }
+ assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
+ }
+
+ auto GetOffset = [](const DILocation *DIL) {
+ return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
+ 0xffff;
+ };
+
+ // Now walk the instructions, looking up the associated profile data using
+ // dbug locations.
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (I.isDebugOrPseudoInst())
+ continue;
+ // We are only interested in calls (allocation or interior call stack
+ // context calls).
+ auto *CI = dyn_cast<CallBase>(&I);
+ if (!CI)
+ continue;
+ auto *CalledFunction = CI->getCalledFunction();
+ if (CalledFunction && CalledFunction->isIntrinsic())
+ continue;
+ // List of call stack ids computed from the location hashes on debug
+ // locations (leaf to inlined at root).
+ std::vector<uint64_t> InlinedCallStack;
+ // Was the leaf location found in one of the profile maps?
+ bool LeafFound = false;
+ // If leaf was found in a map, iterators pointing to its location in both
+ // of the maps. It might exist in neither, one, or both (the latter case
+ // can happen because we don't currently have discriminators to
+ // distinguish the case when a single line/col maps to both an allocation
+ // and another callsite).
+ std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
+ AllocInfoIter;
+ std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
+ unsigned>>>::iterator CallSitesIter;
+ for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
+ DIL = DIL->getInlinedAt()) {
+ // Use C++ linkage name if possible. Need to compile with
+ // -fdebug-info-for-profiling to get linkage name.
+ StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
+ if (Name.empty())
+ Name = DIL->getScope()->getSubprogram()->getName();
+ auto CalleeGUID = Function::getGUID(Name);
+ auto StackId =
+ computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
+ // LeafFound will only be false on the first iteration, since we either
+ // set it true or break out of the loop below.
+ if (!LeafFound) {
+ AllocInfoIter = LocHashToAllocInfo.find(StackId);
+ CallSitesIter = LocHashToCallSites.find(StackId);
+ // Check if the leaf is in one of the maps. If not, no need to look
+ // further at this call.
+ if (AllocInfoIter == LocHashToAllocInfo.end() &&
+ CallSitesIter == LocHashToCallSites.end())
+ break;
+ LeafFound = true;
+ }
+ InlinedCallStack.push_back(StackId);
+ }
+ // If leaf not in either of the maps, skip inst.
+ if (!LeafFound)
+ continue;
+
+ // First add !memprof metadata from allocation info, if we found the
+ // instruction's leaf location in that map, and if the rest of the
+ // instruction's locations match the prefix Frame locations on an
+ // allocation context with the same leaf.
+ if (AllocInfoIter != LocHashToAllocInfo.end()) {
+ // Only consider allocations via new, to reduce unnecessary metadata,
+ // since those are the only allocations that will be targeted initially.
+ if (!isNewLikeFn(CI, &FuncInfo.TLI))
+ continue;
+ // We may match this instruction's location list to multiple MIB
+ // contexts. Add them to a Trie specialized for trimming the contexts to
+ // the minimal needed to disambiguate contexts with unique behavior.
+ CallStackTrie AllocTrie;
+ for (auto *AllocInfo : AllocInfoIter->second) {
+ // Check the full inlined call stack against this one.
+ // If we found and thus matched all frames on the call, include
+ // this MIB.
+ if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
+ InlinedCallStack))
+ addCallStack(AllocTrie, AllocInfo);
+ }
+ // We might not have matched any to the full inlined call stack.
+ // But if we did, create and attach metadata, or a function attribute if
+ // all contexts have identical profiled behavior.
+ if (!AllocTrie.empty()) {
+ // MemprofMDAttached will be false if a function attribute was
+ // attached.
+ bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
+ assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
+ if (MemprofMDAttached) {
+ // Add callsite metadata for the instruction's location list so that
+ // it simpler later on to identify which part of the MIB contexts
+ // are from this particular instruction (including during inlining,
+ // when the callsite metdata will be updated appropriately).
+ // FIXME: can this be changed to strip out the matching stack
+ // context ids from the MIB contexts and not add any callsite
+ // metadata here to save space?
+ addCallsiteMetadata(I, InlinedCallStack, Ctx);
+ }
+ }
+ continue;
+ }
+
+ // Otherwise, add callsite metadata. If we reach here then we found the
+ // instruction's leaf location in the callsites map and not the allocation
+ // map.
+ assert(CallSitesIter != LocHashToCallSites.end());
+ for (auto CallStackIdx : CallSitesIter->second) {
+ // If we found and thus matched all frames on the call, create and
+ // attach call stack metadata.
+ if (stackFrameIncludesInlinedCallStack(
+ *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
+ addCallsiteMetadata(I, InlinedCallStack, Ctx);
+ // Only need to find one with a matching call stack and add a single
+ // callsite metadata.
+ break;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
// Read the profile from ProfileFileName and assign the value to the
// instrumented BB and the edges. This function also updates ProgramMaxCount.
// Return true if the profile are successfully read, and false on errors.
@@ -1774,7 +2041,7 @@ static bool annotateAllFunctions(
return false;
// TODO: might need to change the warning once the clang option is finalized.
- if (!PGOReader->isIRLevelProfile()) {
+ if (!PGOReader->isIRLevelProfile() && !PGOReader->hasMemoryProfile()) {
Ctx.diagnose(DiagnosticInfoPGOProfile(
ProfileFileName.data(), "Not an IR level instrumentation profile"));
return false;
@@ -1821,6 +2088,14 @@ static bool annotateAllFunctions(
SplitIndirectBrCriticalEdges(F, /*IgnoreBlocksWithoutPHI=*/false, BPI, BFI);
PGOUseFunc Func(F, &M, TLI, ComdatMembers, BPI, BFI, PSI, IsCS,
InstrumentFuncEntry);
+ // Read and match memprof first since we do this via debug info and can
+ // match even if there is an IR mismatch detected for regular PGO below.
+ if (PGOReader->hasMemoryProfile())
+ Func.readMemprof(PGOReader.get());
+
+ if (!PGOReader->isIRLevelProfile())
+ continue;
+
// When PseudoKind is set to a vaule other than InstrProfRecord::NotPseudo,
// it means the profile for the function is unrepresentative and this
// function is actually hot / warm. We will reset the function hot / cold
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe b/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe
new file mode 100755
index 0000000000000..b6fe0fab30001
Binary files /dev/null and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.exe
diff er
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw
new file mode 100644
index 0000000000000..fc638b2c51000
Binary files /dev/null and b/llvm/test/Transforms/PGOProfile/Inputs/memprof.memprofraw
diff er
diff --git a/llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw b/llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw
new file mode 100644
index 0000000000000..87f1cded0988f
Binary files /dev/null and b/llvm/test/Transforms/PGOProfile/Inputs/memprof_pgo.profraw
diff er
diff --git a/llvm/test/Transforms/PGOProfile/memprof.ll b/llvm/test/Transforms/PGOProfile/memprof.ll
new file mode 100644
index 0000000000000..a000453628d78
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprof.ll
@@ -0,0 +1,489 @@
+;; Tests memprof profile matching (with and without instrumentation profiles).
+
+;; Several requirements due to using raw profile inputs:
+;; PGO profile uses zlib compression
+; REQUIRES: zlib
+;; Avoid failures on big-endian systems that can't read the profile properly
+; REQUIRES: x86_64-linux
+
+;; TODO: Use text profile inputs once that is available for memprof.
+
+;; The input IR and raw profiles have been generated from the following source:
+;;
+;; #include <stdlib.h>
+;; #include <string.h>
+;; #include <unistd.h>
+;; char *foo() {
+;; return new char[10];
+;; }
+;; char *foo2() {
+;; return foo();
+;; }
+;; char *bar() {
+;; return foo2();
+;; }
+;; char *baz() {
+;; return foo2();
+;; }
+;; char *recurse(unsigned n) {
+;; if (!n)
+;; return foo();
+;; return recurse(n-1);
+;; }
+;; int main(int argc, char **argv) {
+;; // Test allocations with
diff erent combinations of stack contexts and
+;; // coldness (based on lifetime, since they are all accessed a single time
+;; // per byte via the memset).
+;; char *a = new char[10];
+;; char *b = new char[10];
+;; char *c = foo();
+;; char *d = foo();
+;; char *e = bar();
+;; char *f = baz();
+;; memset(a, 0, 10);
+;; memset(b, 0, 10);
+;; memset(c, 0, 10);
+;; memset(d, 0, 10);
+;; memset(e, 0, 10);
+;; memset(f, 0, 10);
+;; // a and c have short lifetimes
+;; delete[] a;
+;; delete[] c;
+;; // b, d, e, and f have long lifetimes and will be detected as cold by default.
+;; sleep(200);
+;; delete[] b;
+;; delete[] d;
+;; delete[] e;
+;; delete[] f;
+;; // Loop ensures the two calls to recurse have stack contexts that only
diff er
+;; // in one level of recursion. We should get two stack contexts reflecting the
+;; //
diff erent levels of recursion and
diff erent allocation behavior (since the
+;; // first has a very long lifetime and the second has a short lifetime).
+;; for (unsigned i = 0; i < 2; i++) {
+;; char *g = recurse(i + 3);
+;; memset(g, 0, 10);
+;; if (!i)
+;; sleep(200);
+;; delete[] g;
+;; }
+;; return 0;
+;; }
+;;
+;; The following commands were used to compile the source to instrumented
+;; executables and collect raw binary format profiles:
+;;
+;; # Collect memory profile:
+;; $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \
+;; -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \
+;; -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \
+;; memprof.cc -o memprof.exe -fmemory-profile
+;; $ env MEMPROF_OPTIONS=log_path=stdout ./memprof.exe > memprof.memprofraw
+;;
+;; # Collect IR PGO profile:
+;; $ clang++ -fuse-ld=lld -no-pie -Wl,--no-rosegment -gmlt \
+;; -fdebug-info-for-profiling -mno-omit-leaf-frame-pointer \
+;; -fno-omit-frame-pointer -fno-optimize-sibling-calls -m64 -Wl,-build-id \
+;; memprof.cc -o pgo.exe -fprofile-generate=.
+;; $ ./pgo.exe
+;; $ mv default_*.profraw memprof_pgo.profraw
+;;
+;; # Generate below LLVM IR for use in matching:
+;; $ clang++ -gmlt -fdebug-info-for-profiling -fno-omit-frame-pointer \
+;; -fno-optimize-sibling-calls memprof.cc -S -emit-llvm
+
+;; Generate indexed profiles of all combinations:
+; RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata
+; RUN: llvm-profdata merge %S/Inputs/memprof_pgo.profraw %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.pgomemprofdata
+; RUN: llvm-profdata merge %S/Inputs/memprof_pgo.profraw -o %t.pgoprofdata
+
+;; In all below cases we should not get any messages about missing profile data
+;; for any functions. Either we are not performing any matching for a particular
+;; profile type or we are performing the matching and it should be successful.
+; ALL-NOT: memprof record not found for function hash
+; ALL-NOT: no profile data available for function
+
+;; Feed back memprof-only profile
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,ALL,MEMPROFONLY
+; There should not be any PGO metadata
+; MEMPROFONLY-NOT: !prof
+
+;; Feed back pgo-only profile
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgoprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=PGO,ALL,PGOONLY
+; There should not be any memprof related metadata
+; PGOONLY-NOT: !memprof
+; PGOONLY-NOT: !callsite
+
+;; Feed back pgo+memprof-only profile
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.pgomemprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s --check-prefixes=MEMPROF,PGO,ALL
+
+; ModuleID = 'memprof.cc'
+source_filename = "memprof.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: mustprogress noinline optnone uwtable
+; ALL-LABEL: define dso_local noundef ptr @_Z3foov()
+; There should be some PGO metadata
+; PGO: !prof
+define dso_local noundef ptr @_Z3foov() #0 !dbg !10 {
+entry:
+ ; MEMPROF: call {{.*}} @_Znam{{.*}} !memprof ![[M1:[0-9]+]], !callsite ![[C1:[0-9]+]]
+ %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !dbg !13
+ ret ptr %call, !dbg !14
+}
+
+; Function Attrs: nobuiltin allocsize(0)
+declare noundef nonnull ptr @_Znam(i64 noundef) #1
+
+; Function Attrs: mustprogress noinline optnone uwtable
+; ALL-LABEL: define dso_local noundef ptr @_Z4foo2v()
+define dso_local noundef ptr @_Z4foo2v() #0 !dbg !15 {
+entry:
+ ; MEMPROF: call {{.*}} @_Z3foov{{.*}} !callsite ![[C2:[0-9]+]]
+ %call = call noundef ptr @_Z3foov(), !dbg !16
+ ret ptr %call, !dbg !17
+}
+
+; Function Attrs: mustprogress noinline optnone uwtable
+define dso_local noundef ptr @_Z3barv() #0 !dbg !18 {
+entry:
+ ; MEMPROF: call {{.*}} @_Z4foo2v{{.*}} !callsite ![[C3:[0-9]+]]
+ %call = call noundef ptr @_Z4foo2v(), !dbg !19
+ ret ptr %call, !dbg !20
+}
+
+; Function Attrs: mustprogress noinline optnone uwtable
+define dso_local noundef ptr @_Z3bazv() #0 !dbg !21 {
+entry:
+ ; MEMPROF: call {{.*}} @_Z4foo2v{{.*}} !callsite ![[C4:[0-9]+]]
+ %call = call noundef ptr @_Z4foo2v(), !dbg !22
+ ret ptr %call, !dbg !23
+}
+
+; Function Attrs: mustprogress noinline optnone uwtable
+define dso_local noundef ptr @_Z7recursej(i32 noundef %n) #0 !dbg !24 {
+entry:
+ %retval = alloca ptr, align 8
+ %n.addr = alloca i32, align 4
+ store i32 %n, ptr %n.addr, align 4
+ %0 = load i32, ptr %n.addr, align 4, !dbg !25
+ %tobool = icmp ne i32 %0, 0, !dbg !25
+ br i1 %tobool, label %if.end, label %if.then, !dbg !26
+
+if.then: ; preds = %entry
+ ; MEMPROF: call {{.*}} @_Z3foov{{.*}} !callsite ![[C5:[0-9]+]]
+ %call = call noundef ptr @_Z3foov(), !dbg !27
+ store ptr %call, ptr %retval, align 8, !dbg !28
+ br label %return, !dbg !28
+
+if.end: ; preds = %entry
+ %1 = load i32, ptr %n.addr, align 4, !dbg !29
+ %sub = sub i32 %1, 1, !dbg !30
+ ; MEMPROF: call {{.*}} @_Z7recursej{{.*}} !callsite ![[C6:[0-9]+]]
+ %call1 = call noundef ptr @_Z7recursej(i32 noundef %sub), !dbg !31
+ store ptr %call1, ptr %retval, align 8, !dbg !32
+ br label %return, !dbg !32
+
+return: ; preds = %if.end, %if.then
+ %2 = load ptr, ptr %retval, align 8, !dbg !33
+ ret ptr %2, !dbg !33
+}
+
+; Function Attrs: mustprogress noinline norecurse optnone uwtable
+define dso_local noundef i32 @main(i32 noundef %argc, ptr noundef %argv) #2 !dbg !34 {
+entry:
+ %retval = alloca i32, align 4
+ %argc.addr = alloca i32, align 4
+ %argv.addr = alloca ptr, align 8
+ %a = alloca ptr, align 8
+ %b = alloca ptr, align 8
+ %c = alloca ptr, align 8
+ %d = alloca ptr, align 8
+ %e = alloca ptr, align 8
+ %f = alloca ptr, align 8
+ %i = alloca i32, align 4
+ %g = alloca ptr, align 8
+ store i32 0, ptr %retval, align 4
+ store i32 %argc, ptr %argc.addr, align 4
+ store ptr %argv, ptr %argv.addr, align 8
+ ; MEMPROF: call {{.*}} @_Znam{{.*}} #[[A1:[0-9]+]]
+ %call = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !dbg !35
+ store ptr %call, ptr %a, align 8, !dbg !36
+ ; MEMPROF: call {{.*}} @_Znam{{.*}} #[[A2:[0-9]+]]
+ %call1 = call noalias noundef nonnull ptr @_Znam(i64 noundef 10) #6, !dbg !37
+ store ptr %call1, ptr %b, align 8, !dbg !38
+ ; MEMPROF: call {{.*}} @_Z3foov{{.*}} !callsite ![[C7:[0-9]+]]
+ %call2 = call noundef ptr @_Z3foov(), !dbg !39
+ store ptr %call2, ptr %c, align 8, !dbg !40
+ ; MEMPROF: call {{.*}} @_Z3foov{{.*}} !callsite ![[C8:[0-9]+]]
+ %call3 = call noundef ptr @_Z3foov(), !dbg !41
+ store ptr %call3, ptr %d, align 8, !dbg !42
+ ; MEMPROF: call {{.*}} @_Z3barv{{.*}} !callsite ![[C9:[0-9]+]]
+ %call4 = call noundef ptr @_Z3barv(), !dbg !43
+ store ptr %call4, ptr %e, align 8, !dbg !44
+ ; MEMPROF: call {{.*}} @_Z3bazv{{.*}} !callsite ![[C10:[0-9]+]]
+ %call5 = call noundef ptr @_Z3bazv(), !dbg !45
+ store ptr %call5, ptr %f, align 8, !dbg !46
+ %0 = load ptr, ptr %a, align 8, !dbg !47
+ call void @llvm.memset.p0.i64(ptr align 1 %0, i8 0, i64 10, i1 false), !dbg !48
+ %1 = load ptr, ptr %b, align 8, !dbg !49
+ call void @llvm.memset.p0.i64(ptr align 1 %1, i8 0, i64 10, i1 false), !dbg !50
+ %2 = load ptr, ptr %c, align 8, !dbg !51
+ call void @llvm.memset.p0.i64(ptr align 1 %2, i8 0, i64 10, i1 false), !dbg !52
+ %3 = load ptr, ptr %d, align 8, !dbg !53
+ call void @llvm.memset.p0.i64(ptr align 1 %3, i8 0, i64 10, i1 false), !dbg !54
+ %4 = load ptr, ptr %e, align 8, !dbg !55
+ call void @llvm.memset.p0.i64(ptr align 1 %4, i8 0, i64 10, i1 false), !dbg !56
+ %5 = load ptr, ptr %f, align 8, !dbg !57
+ call void @llvm.memset.p0.i64(ptr align 1 %5, i8 0, i64 10, i1 false), !dbg !58
+ %6 = load ptr, ptr %a, align 8, !dbg !59
+ %isnull = icmp eq ptr %6, null, !dbg !60
+ br i1 %isnull, label %delete.end, label %delete.notnull, !dbg !60
+
+delete.notnull: ; preds = %entry
+ call void @_ZdaPv(ptr noundef %6) #7, !dbg !61
+ br label %delete.end, !dbg !61
+
+delete.end: ; preds = %delete.notnull, %entry
+ %7 = load ptr, ptr %c, align 8, !dbg !63
+ %isnull6 = icmp eq ptr %7, null, !dbg !64
+ br i1 %isnull6, label %delete.end8, label %delete.notnull7, !dbg !64
+
+delete.notnull7: ; preds = %delete.end
+ call void @_ZdaPv(ptr noundef %7) #7, !dbg !65
+ br label %delete.end8, !dbg !65
+
+delete.end8: ; preds = %delete.notnull7, %delete.end
+ %call9 = call i32 @sleep(i32 noundef 200), !dbg !66
+ %8 = load ptr, ptr %b, align 8, !dbg !67
+ %isnull10 = icmp eq ptr %8, null, !dbg !68
+ br i1 %isnull10, label %delete.end12, label %delete.notnull11, !dbg !68
+
+delete.notnull11: ; preds = %delete.end8
+ call void @_ZdaPv(ptr noundef %8) #7, !dbg !69
+ br label %delete.end12, !dbg !69
+
+delete.end12: ; preds = %delete.notnull11, %delete.end8
+ %9 = load ptr, ptr %d, align 8, !dbg !70
+ %isnull13 = icmp eq ptr %9, null, !dbg !71
+ br i1 %isnull13, label %delete.end15, label %delete.notnull14, !dbg !71
+
+delete.notnull14: ; preds = %delete.end12
+ call void @_ZdaPv(ptr noundef %9) #7, !dbg !72
+ br label %delete.end15, !dbg !72
+
+delete.end15: ; preds = %delete.notnull14, %delete.end12
+ %10 = load ptr, ptr %e, align 8, !dbg !73
+ %isnull16 = icmp eq ptr %10, null, !dbg !74
+ br i1 %isnull16, label %delete.end18, label %delete.notnull17, !dbg !74
+
+delete.notnull17: ; preds = %delete.end15
+ call void @_ZdaPv(ptr noundef %10) #7, !dbg !75
+ br label %delete.end18, !dbg !75
+
+delete.end18: ; preds = %delete.notnull17, %delete.end15
+ %11 = load ptr, ptr %f, align 8, !dbg !76
+ %isnull19 = icmp eq ptr %11, null, !dbg !77
+ br i1 %isnull19, label %delete.end21, label %delete.notnull20, !dbg !77
+
+delete.notnull20: ; preds = %delete.end18
+ call void @_ZdaPv(ptr noundef %11) #7, !dbg !78
+ br label %delete.end21, !dbg !78
+
+delete.end21: ; preds = %delete.notnull20, %delete.end18
+ store i32 0, ptr %i, align 4, !dbg !79
+ br label %for.cond, !dbg !80
+
+for.cond: ; preds = %for.inc, %delete.end21
+ %12 = load i32, ptr %i, align 4, !dbg !81
+ %cmp = icmp ult i32 %12, 2, !dbg !82
+ br i1 %cmp, label %for.body, label %for.end, !dbg !83
+
+for.body: ; preds = %for.cond
+ %13 = load i32, ptr %i, align 4, !dbg !84
+ %add = add i32 %13, 3, !dbg !85
+ ; MEMPROF: call {{.*}} @_Z7recursej{{.*}} !callsite ![[C11:[0-9]+]]
+ %call22 = call noundef ptr @_Z7recursej(i32 noundef %add), !dbg !86
+ store ptr %call22, ptr %g, align 8, !dbg !87
+ %14 = load ptr, ptr %g, align 8, !dbg !88
+ call void @llvm.memset.p0.i64(ptr align 1 %14, i8 0, i64 10, i1 false), !dbg !89
+ %15 = load i32, ptr %i, align 4, !dbg !90
+ %tobool = icmp ne i32 %15, 0, !dbg !90
+ br i1 %tobool, label %if.end, label %if.then, !dbg !91
+
+if.then: ; preds = %for.body
+ %call23 = call i32 @sleep(i32 noundef 200), !dbg !92
+ br label %if.end, !dbg !92
+
+if.end: ; preds = %if.then, %for.body
+ %16 = load ptr, ptr %g, align 8, !dbg !93
+ %isnull24 = icmp eq ptr %16, null, !dbg !94
+ br i1 %isnull24, label %delete.end26, label %delete.notnull25, !dbg !94
+
+delete.notnull25: ; preds = %if.end
+ call void @_ZdaPv(ptr noundef %16) #7, !dbg !95
+ br label %delete.end26, !dbg !95
+
+delete.end26: ; preds = %delete.notnull25, %if.end
+ br label %for.inc, !dbg !96
+
+for.inc: ; preds = %delete.end26
+ %17 = load i32, ptr %i, align 4, !dbg !97
+ %inc = add i32 %17, 1, !dbg !97
+ store i32 %inc, ptr %i, align 4, !dbg !97
+ br label %for.cond, !dbg !99, !llvm.loop !100
+
+for.end: ; preds = %for.cond
+ ret i32 0, !dbg !103
+}
+
+; MEMPROF: #[[A1]] = { builtin allocsize(0) "memprof"="notcold" }
+; MEMPROF: #[[A2]] = { builtin allocsize(0) "memprof"="cold" }
+; MEMPROF: ![[M1]] = !{![[MIB1:[0-9]+]], ![[MIB2:[0-9]+]], ![[MIB3:[0-9]+]], ![[MIB4:[0-9]+]], ![[MIB5:[0-9]+]]}
+; MEMPROF: ![[MIB1]] = !{![[STACK1:[0-9]+]], !"cold"}
+; MEMPROF: ![[STACK1]] = !{i64 2732490490862098848, i64 748269490701775343}
+; MEMPROF: ![[MIB2]] = !{![[STACK2:[0-9]+]], !"cold"}
+; MEMPROF: ![[STACK2]] = !{i64 2732490490862098848, i64 2104812325165620841, i64 6281715513834610934, i64 6281715513834610934, i64 6281715513834610934, i64 1544787832369987002}
+; MEMPROF: ![[MIB3]] = !{![[STACK3:[0-9]+]], !"notcold"}
+; MEMPROF: ![[STACK3]] = !{i64 2732490490862098848, i64 2104812325165620841, i64 6281715513834610934, i64 6281715513834610934, i64 6281715513834610934, i64 6281715513834610934}
+; MEMPROF: ![[MIB4]] = !{![[STACK4:[0-9]+]], !"cold"}
+; MEMPROF: ![[STACK4]] = !{i64 2732490490862098848, i64 8467819354083268568}
+; MEMPROF: ![[MIB5]] = !{![[STACK5:[0-9]+]], !"notcold"}
+; MEMPROF: ![[STACK5]] = !{i64 2732490490862098848, i64 8690657650969109624}
+; MEMPROF: ![[C1]] = !{i64 2732490490862098848}
+; MEMPROF: ![[C2]] = !{i64 8467819354083268568}
+; MEMPROF: ![[C3]] = !{i64 9086428284934609951}
+; MEMPROF: ![[C4]] = !{i64 -5964873800580613432}
+; MEMPROF: ![[C5]] = !{i64 2104812325165620841}
+; MEMPROF: ![[C6]] = !{i64 6281715513834610934}
+; MEMPROF: ![[C7]] = !{i64 8690657650969109624}
+; MEMPROF: ![[C8]] = !{i64 748269490701775343}
+; MEMPROF: ![[C9]] = !{i64 -5747251260480066785}
+; MEMPROF: ![[C10]] = !{i64 2061451396820446691}
+; MEMPROF: ![[C11]] = !{i64 1544787832369987002}
+
+; Function Attrs: argmemonly nofree nounwind willreturn writeonly
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #3
+
+; Function Attrs: nobuiltin nounwind
+declare void @_ZdaPv(ptr noundef) #4
+
+declare i32 @sleep(i32 noundef) #5
+
+attributes #0 = { mustprogress noinline optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nobuiltin allocsize(0) "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { mustprogress noinline norecurse optnone uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { argmemonly nofree nounwind willreturn writeonly }
+attributes #4 = { nobuiltin nounwind "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #5 = { "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #6 = { builtin allocsize(0) }
+attributes #7 = { builtin nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
+!llvm.ident = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 15.0.0 (https://github.com/llvm/llvm-project.git 6cbe6284d1f0a088b5c6482ae27b738f03d82fe7)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "memprof.cc", directory: "/usr/local/google/home/tejohnson/llvm/tmp", checksumkind: CSK_MD5, checksum: "e8c40ebe4b21776b4d60e9632cbc13c2")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 7, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{i32 7, !"frame-pointer", i32 2}
+!9 = !{!"clang version 15.0.0 (https://github.com/llvm/llvm-project.git 6cbe6284d1f0a088b5c6482ae27b738f03d82fe7)"}
+!10 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !11, scopeLine: 4, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!11 = !DISubroutineType(types: !12)
+!12 = !{}
+!13 = !DILocation(line: 5, column: 10, scope: !10)
+!14 = !DILocation(line: 5, column: 3, scope: !10)
+!15 = distinct !DISubprogram(name: "foo2", linkageName: "_Z4foo2v", scope: !1, file: !1, line: 7, type: !11, scopeLine: 7, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!16 = !DILocation(line: 8, column: 10, scope: !15)
+!17 = !DILocation(line: 8, column: 3, scope: !15)
+!18 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 10, type: !11, scopeLine: 10, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!19 = !DILocation(line: 11, column: 10, scope: !18)
+!20 = !DILocation(line: 11, column: 3, scope: !18)
+!21 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 13, type: !11, scopeLine: 13, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!22 = !DILocation(line: 14, column: 10, scope: !21)
+!23 = !DILocation(line: 14, column: 3, scope: !21)
+!24 = distinct !DISubprogram(name: "recurse", linkageName: "_Z7recursej", scope: !1, file: !1, line: 16, type: !11, scopeLine: 16, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!25 = !DILocation(line: 17, column: 8, scope: !24)
+!26 = !DILocation(line: 17, column: 7, scope: !24)
+!27 = !DILocation(line: 18, column: 12, scope: !24)
+!28 = !DILocation(line: 18, column: 5, scope: !24)
+!29 = !DILocation(line: 19, column: 18, scope: !24)
+!30 = !DILocation(line: 19, column: 19, scope: !24)
+!31 = !DILocation(line: 19, column: 10, scope: !24)
+!32 = !DILocation(line: 19, column: 3, scope: !24)
+!33 = !DILocation(line: 20, column: 1, scope: !24)
+!34 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 21, type: !11, scopeLine: 21, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !12)
+!35 = !DILocation(line: 25, column: 13, scope: !34)
+!36 = !DILocation(line: 25, column: 9, scope: !34)
+!37 = !DILocation(line: 26, column: 13, scope: !34)
+!38 = !DILocation(line: 26, column: 9, scope: !34)
+!39 = !DILocation(line: 27, column: 13, scope: !34)
+!40 = !DILocation(line: 27, column: 9, scope: !34)
+!41 = !DILocation(line: 28, column: 13, scope: !34)
+!42 = !DILocation(line: 28, column: 9, scope: !34)
+!43 = !DILocation(line: 29, column: 13, scope: !34)
+!44 = !DILocation(line: 29, column: 9, scope: !34)
+!45 = !DILocation(line: 30, column: 13, scope: !34)
+!46 = !DILocation(line: 30, column: 9, scope: !34)
+!47 = !DILocation(line: 31, column: 10, scope: !34)
+!48 = !DILocation(line: 31, column: 3, scope: !34)
+!49 = !DILocation(line: 32, column: 10, scope: !34)
+!50 = !DILocation(line: 32, column: 3, scope: !34)
+!51 = !DILocation(line: 33, column: 10, scope: !34)
+!52 = !DILocation(line: 33, column: 3, scope: !34)
+!53 = !DILocation(line: 34, column: 10, scope: !34)
+!54 = !DILocation(line: 34, column: 3, scope: !34)
+!55 = !DILocation(line: 35, column: 10, scope: !34)
+!56 = !DILocation(line: 35, column: 3, scope: !34)
+!57 = !DILocation(line: 36, column: 10, scope: !34)
+!58 = !DILocation(line: 36, column: 3, scope: !34)
+!59 = !DILocation(line: 38, column: 12, scope: !34)
+!60 = !DILocation(line: 38, column: 3, scope: !34)
+!61 = !DILocation(line: 38, column: 3, scope: !62)
+!62 = !DILexicalBlockFile(scope: !34, file: !1, discriminator: 2)
+!63 = !DILocation(line: 39, column: 12, scope: !34)
+!64 = !DILocation(line: 39, column: 3, scope: !34)
+!65 = !DILocation(line: 39, column: 3, scope: !62)
+!66 = !DILocation(line: 41, column: 3, scope: !34)
+!67 = !DILocation(line: 42, column: 12, scope: !34)
+!68 = !DILocation(line: 42, column: 3, scope: !34)
+!69 = !DILocation(line: 42, column: 3, scope: !62)
+!70 = !DILocation(line: 43, column: 12, scope: !34)
+!71 = !DILocation(line: 43, column: 3, scope: !34)
+!72 = !DILocation(line: 43, column: 3, scope: !62)
+!73 = !DILocation(line: 44, column: 12, scope: !34)
+!74 = !DILocation(line: 44, column: 3, scope: !34)
+!75 = !DILocation(line: 44, column: 3, scope: !62)
+!76 = !DILocation(line: 45, column: 12, scope: !34)
+!77 = !DILocation(line: 45, column: 3, scope: !34)
+!78 = !DILocation(line: 45, column: 3, scope: !62)
+!79 = !DILocation(line: 51, column: 17, scope: !34)
+!80 = !DILocation(line: 51, column: 8, scope: !34)
+!81 = !DILocation(line: 51, column: 24, scope: !62)
+!82 = !DILocation(line: 51, column: 26, scope: !62)
+!83 = !DILocation(line: 51, column: 3, scope: !62)
+!84 = !DILocation(line: 52, column: 23, scope: !34)
+!85 = !DILocation(line: 52, column: 25, scope: !34)
+!86 = !DILocation(line: 52, column: 15, scope: !34)
+!87 = !DILocation(line: 52, column: 11, scope: !34)
+!88 = !DILocation(line: 53, column: 12, scope: !34)
+!89 = !DILocation(line: 53, column: 5, scope: !34)
+!90 = !DILocation(line: 54, column: 10, scope: !34)
+!91 = !DILocation(line: 54, column: 9, scope: !34)
+!92 = !DILocation(line: 55, column: 7, scope: !34)
+!93 = !DILocation(line: 56, column: 14, scope: !34)
+!94 = !DILocation(line: 56, column: 5, scope: !34)
+!95 = !DILocation(line: 56, column: 5, scope: !62)
+!96 = !DILocation(line: 57, column: 3, scope: !34)
+!97 = !DILocation(line: 51, column: 32, scope: !98)
+!98 = !DILexicalBlockFile(scope: !34, file: !1, discriminator: 4)
+!99 = !DILocation(line: 51, column: 3, scope: !98)
+!100 = distinct !{!100, !101, !96, !102}
+!101 = !DILocation(line: 51, column: 3, scope: !34)
+!102 = !{!"llvm.loop.mustprogress"}
+!103 = !DILocation(line: 58, column: 3, scope: !34)
diff --git a/llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll b/llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll
new file mode 100644
index 0000000000000..068c8c91aa3f4
--- /dev/null
+++ b/llvm/test/Transforms/PGOProfile/memprofmissingfunc.ll
@@ -0,0 +1,28 @@
+;; Tests that we get a missing memprof error for a function not in profile when
+;; using -pgo-warn-missing-function.
+
+;; Avoid failures on big-endian systems that can't read the raw profile properly
+; REQUIRES: x86_64-linux
+
+;; TODO: Use text profile inputs once that is available for memprof.
+
+;; The raw profiles have been generated from the source used for the memprof.ll
+;; test (see comments at the top of that file).
+
+; RUN: llvm-profdata merge %S/Inputs/memprof.memprofraw --profiled-binary %S/Inputs/memprof.exe -o %t.memprofdata
+
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.memprofdata -pgo-warn-missing-function -S 2>&1 | FileCheck %s
+
+; CHECK: memprof record not found for function hash {{.*}} _Z16funcnotinprofilev
+
+; ModuleID = 'memprofmissingfunc.cc'
+source_filename = "memprofmissingfunc.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: mustprogress noinline nounwind optnone uwtable
+define dso_local void @_Z16funcnotinprofilev() {
+entry:
+ ret void
+}
+
More information about the cfe-commits
mailing list