[llvm] 9501405 - Restore "[MemProf] Refactor memory profile matching into MemProfiler (NFC)"
Teresa Johnson via llvm-commits
llvm-commits at lists.llvm.org
Tue Jul 11 13:16:34 PDT 2023
Author: Teresa Johnson
Date: 2023-07-11T13:16:20-07:00
New Revision: 95014050dac528f53ab704d62a16a44ed0df032b
URL: https://github.com/llvm/llvm-project/commit/95014050dac528f53ab704d62a16a44ed0df032b
DIFF: https://github.com/llvm/llvm-project/commit/95014050dac528f53ab704d62a16a44ed0df032b.diff
LOG: Restore "[MemProf] Refactor memory profile matching into MemProfiler (NFC)"
This restores commit 29252fdd530f68d0de38a0cd26ed428bb2f5c16a, reverted
in 3498cf52ba1c23cbf8acdf99d649d2fa25291eef because it was thought to
cause a bot failure, which ended up being unrelated to this patch set.
Differential Revision: https://reviews.llvm.org/D154872
Added:
Modified:
llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
index 038f44644504fd..a05ed5f1b99710 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/MemProfiler.h
@@ -43,6 +43,13 @@ class ModuleMemProfilerPass : public PassInfoMixin<ModuleMemProfilerPass> {
static bool isRequired() { return true; }
};
+// TODO: Remove this declaration and make readMemprof static once the matching
+// is moved into its own pass.
+class IndexedInstrProfReader;
+class TargetLibraryInfo;
+void readMemprof(Module &M, Function &F, IndexedInstrProfReader *MemProfReader,
+ const TargetLibraryInfo &TLI);
+
} // namespace llvm
#endif
diff --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 2d384f4ff4418d..416060a847d443 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -18,9 +18,12 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constant.h"
#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DiagnosticInfo.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/GlobalValue.h"
#include "llvm/IR/IRBuilder.h"
@@ -30,16 +33,28 @@
#include "llvm/IR/Type.h"
#include "llvm/IR/Value.h"
#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/Support/BLAKE3.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/HashBuilder.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
+#include <map>
+#include <set>
using namespace llvm;
+using namespace llvm::memprof;
#define DEBUG_TYPE "memprof"
+namespace llvm {
+extern cl::opt<bool> PGOWarnMissing;
+extern cl::opt<bool> NoPGOWarnMismatch;
+extern cl::opt<bool> NoPGOWarnMismatchComdatWeak;
+} // namespace llvm
+
constexpr int LLVM_MEM_PROFILER_VERSION = 1;
// Size of memory mapped to a single shadow location.
@@ -128,6 +143,7 @@ STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
STATISTIC(NumSkippedStackReads, "Number of non-instrumented stack reads");
STATISTIC(NumSkippedStackWrites, "Number of non-instrumented stack writes");
+STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
namespace {
@@ -601,3 +617,251 @@ bool MemProfiler::instrumentFunction(Function &F) {
return FunctionModified;
}
+
+static void addCallsiteMetadata(Instruction &I,
+ std::vector<uint64_t> &InlinedCallStack,
+ LLVMContext &Ctx) {
+ I.setMetadata(LLVMContext::MD_callsite,
+ buildCallstackMetadata(InlinedCallStack, Ctx));
+}
+
+static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
+ uint32_t Column) {
+ llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
+ HashBuilder;
+ HashBuilder.add(Function, LineOffset, Column);
+ llvm::BLAKE3Result<8> Hash = HashBuilder.final();
+ uint64_t Id;
+ std::memcpy(&Id, Hash.data(), sizeof(Hash));
+ return Id;
+}
+
+static uint64_t computeStackId(const memprof::Frame &Frame) {
+ return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
+}
+
+static void addCallStack(CallStackTrie &AllocTrie,
+ const AllocationInfo *AllocInfo) {
+ SmallVector<uint64_t> StackIds;
+ for (const auto &StackFrame : AllocInfo->CallStack)
+ StackIds.push_back(computeStackId(StackFrame));
+ auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
+ AllocInfo->Info.getAllocCount(),
+ AllocInfo->Info.getTotalLifetime());
+ AllocTrie.addCallStack(AllocType, StackIds);
+}
+
+// Helper to compare the InlinedCallStack computed from an instruction's debug
+// info to a list of Frames from profile data (either the allocation data or a
+// callsite). For callsites, the StartIndex to use in the Frame array may be
+// non-zero.
+static bool
+stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
+ ArrayRef<uint64_t> InlinedCallStack,
+ unsigned StartIndex = 0) {
+ auto StackFrame = ProfileCallStack.begin() + StartIndex;
+ auto InlCallStackIter = InlinedCallStack.begin();
+ for (; StackFrame != ProfileCallStack.end() &&
+ InlCallStackIter != InlinedCallStack.end();
+ ++StackFrame, ++InlCallStackIter) {
+ uint64_t StackId = computeStackId(*StackFrame);
+ if (StackId != *InlCallStackIter)
+ return false;
+ }
+ // Return true if we found and matched all stack ids from the call
+ // instruction.
+ return InlCallStackIter == InlinedCallStack.end();
+}
+
+void llvm::readMemprof(Module &M, Function &F,
+ IndexedInstrProfReader *MemProfReader,
+ const TargetLibraryInfo &TLI) {
+ auto &Ctx = M.getContext();
+
+ auto FuncName = getPGOFuncName(F);
+ auto FuncGUID = Function::getGUID(FuncName);
+ Expected<memprof::MemProfRecord> MemProfResult =
+ MemProfReader->getMemProfRecord(FuncGUID);
+ if (Error E = MemProfResult.takeError()) {
+ handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
+ auto Err = IPE.get();
+ bool SkipWarning = false;
+ LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
+ << ": ");
+ if (Err == instrprof_error::unknown_function) {
+ NumOfMemProfMissing++;
+ SkipWarning = !PGOWarnMissing;
+ LLVM_DEBUG(dbgs() << "unknown function");
+ } else if (Err == instrprof_error::hash_mismatch) {
+ SkipWarning =
+ NoPGOWarnMismatch ||
+ (NoPGOWarnMismatchComdatWeak &&
+ (F.hasComdat() ||
+ F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
+ LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
+ }
+
+ if (SkipWarning)
+ return;
+
+ std::string Msg = (IPE.message() + Twine(" ") + F.getName().str() +
+ Twine(" Hash = ") + std::to_string(FuncGUID))
+ .str();
+
+ Ctx.diagnose(
+ DiagnosticInfoPGOProfile(M.getName().data(), Msg, DS_Warning));
+ });
+ return;
+ }
+
+ // Build maps of the location hash to all profile data with that leaf location
+ // (allocation info and the callsites).
+ std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
+ // For the callsites we need to record the index of the associated frame in
+ // the frame array (see comments below where the map entries are added).
+ std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
+ LocHashToCallSites;
+ const auto MemProfRec = std::move(MemProfResult.get());
+ for (auto &AI : MemProfRec.AllocSites) {
+ // Associate the allocation info with the leaf frame. The later matching
+ // code will match any inlined call sequences in the IR with a longer prefix
+ // of call stack frames.
+ uint64_t StackId = computeStackId(AI.CallStack[0]);
+ LocHashToAllocInfo[StackId].insert(&AI);
+ }
+ for (auto &CS : MemProfRec.CallSites) {
+ // Need to record all frames from leaf up to and including this function,
+ // as any of these may or may not have been inlined at this point.
+ unsigned Idx = 0;
+ for (auto &StackFrame : CS) {
+ uint64_t StackId = computeStackId(StackFrame);
+ LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
+ // Once we find this function, we can stop recording.
+ if (StackFrame.Function == FuncGUID)
+ break;
+ }
+ assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
+ }
+
+ auto GetOffset = [](const DILocation *DIL) {
+ return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
+ 0xffff;
+ };
+
+ // Now walk the instructions, looking up the associated profile data using
+ // dbug locations.
+ for (auto &BB : F) {
+ for (auto &I : BB) {
+ if (I.isDebugOrPseudoInst())
+ continue;
+ // We are only interested in calls (allocation or interior call stack
+ // context calls).
+ auto *CI = dyn_cast<CallBase>(&I);
+ if (!CI)
+ continue;
+ auto *CalledFunction = CI->getCalledFunction();
+ if (CalledFunction && CalledFunction->isIntrinsic())
+ continue;
+ // List of call stack ids computed from the location hashes on debug
+ // locations (leaf to inlined at root).
+ std::vector<uint64_t> InlinedCallStack;
+ // Was the leaf location found in one of the profile maps?
+ bool LeafFound = false;
+ // If leaf was found in a map, iterators pointing to its location in both
+ // of the maps. It might exist in neither, one, or both (the latter case
+ // can happen because we don't currently have discriminators to
+ // distinguish the case when a single line/col maps to both an allocation
+ // and another callsite).
+ std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
+ AllocInfoIter;
+ std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
+ unsigned>>>::iterator CallSitesIter;
+ for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
+ DIL = DIL->getInlinedAt()) {
+ // Use C++ linkage name if possible. Need to compile with
+ // -fdebug-info-for-profiling to get linkage name.
+ StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
+ if (Name.empty())
+ Name = DIL->getScope()->getSubprogram()->getName();
+ auto CalleeGUID = Function::getGUID(Name);
+ auto StackId =
+ computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
+ // LeafFound will only be false on the first iteration, since we either
+ // set it true or break out of the loop below.
+ if (!LeafFound) {
+ AllocInfoIter = LocHashToAllocInfo.find(StackId);
+ CallSitesIter = LocHashToCallSites.find(StackId);
+ // Check if the leaf is in one of the maps. If not, no need to look
+ // further at this call.
+ if (AllocInfoIter == LocHashToAllocInfo.end() &&
+ CallSitesIter == LocHashToCallSites.end())
+ break;
+ LeafFound = true;
+ }
+ InlinedCallStack.push_back(StackId);
+ }
+ // If leaf not in either of the maps, skip inst.
+ if (!LeafFound)
+ continue;
+
+ // First add !memprof metadata from allocation info, if we found the
+ // instruction's leaf location in that map, and if the rest of the
+ // instruction's locations match the prefix Frame locations on an
+ // allocation context with the same leaf.
+ if (AllocInfoIter != LocHashToAllocInfo.end()) {
+ // Only consider allocations via new, to reduce unnecessary metadata,
+ // since those are the only allocations that will be targeted initially.
+ if (!isNewLikeFn(CI, &TLI))
+ continue;
+ // We may match this instruction's location list to multiple MIB
+ // contexts. Add them to a Trie specialized for trimming the contexts to
+ // the minimal needed to disambiguate contexts with unique behavior.
+ CallStackTrie AllocTrie;
+ for (auto *AllocInfo : AllocInfoIter->second) {
+ // Check the full inlined call stack against this one.
+ // If we found and thus matched all frames on the call, include
+ // this MIB.
+ if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
+ InlinedCallStack))
+ addCallStack(AllocTrie, AllocInfo);
+ }
+ // We might not have matched any to the full inlined call stack.
+ // But if we did, create and attach metadata, or a function attribute if
+ // all contexts have identical profiled behavior.
+ if (!AllocTrie.empty()) {
+ // MemprofMDAttached will be false if a function attribute was
+ // attached.
+ bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
+ assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
+ if (MemprofMDAttached) {
+ // Add callsite metadata for the instruction's location list so that
+ // it simpler later on to identify which part of the MIB contexts
+ // are from this particular instruction (including during inlining,
+ // when the callsite metdata will be updated appropriately).
+ // FIXME: can this be changed to strip out the matching stack
+ // context ids from the MIB contexts and not add any callsite
+ // metadata here to save space?
+ addCallsiteMetadata(I, InlinedCallStack, Ctx);
+ }
+ }
+ continue;
+ }
+
+ // Otherwise, add callsite metadata. If we reach here then we found the
+ // instruction's leaf location in the callsites map and not the allocation
+ // map.
+ assert(CallSitesIter != LocHashToCallSites.end());
+ for (auto CallStackIdx : CallSitesIter->second) {
+ // If we found and thus matched all frames on the call, create and
+ // attach call stack metadata.
+ if (stackFrameIncludesInlinedCallStack(
+ *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
+ addCallsiteMetadata(I, InlinedCallStack, Ctx);
+ // Only need to find one with a matching call stack and add a single
+ // callsite metadata.
+ break;
+ }
+ }
+ }
+ }
+}
diff --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 9953896e8cf0e5..1a67710f748e39 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -62,8 +62,6 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
@@ -97,7 +95,6 @@
#include "llvm/IR/Value.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/ProfileData/InstrProfReader.h"
-#include "llvm/Support/BLAKE3.h"
#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/CRC.h"
#include "llvm/Support/Casting.h"
@@ -107,31 +104,28 @@
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/HashBuilder.h"
#include "llvm/Support/VirtualFileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/TargetParser/Triple.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Instrumentation/BlockCoverageInference.h"
#include "llvm/Transforms/Instrumentation/CFGMST.h"
+#include "llvm/Transforms/Instrumentation/MemProfiler.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/MisExpect.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"
#include <algorithm>
#include <cassert>
#include <cstdint>
-#include <map>
#include <memory>
#include <numeric>
#include <optional>
-#include <set>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
using namespace llvm;
-using namespace llvm::memprof;
using ProfileCount = Function::ProfileCount;
using VPCandidateInfo = ValueProfileCollector::CandidateInfo;
@@ -146,7 +140,6 @@ STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
-STATISTIC(NumOfMemProfMissing, "Number of functions without memory profile.");
STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
STATISTIC(NumOfCSPGOSelectInsts,
@@ -203,31 +196,31 @@ static cl::opt<bool> DoComdatRenaming(
cl::desc("Append function hash to the name of COMDAT function to avoid "
"function hash mismatch due to the preinliner"));
+namespace llvm {
// Command line option to enable/disable the warning about missing profile
// information.
-static cl::opt<bool>
- PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
- cl::desc("Use this option to turn on/off "
- "warnings about missing profile data for "
- "functions."));
+cl::opt<bool> PGOWarnMissing("pgo-warn-missing-function", cl::init(false),
+ cl::Hidden,
+ cl::desc("Use this option to turn on/off "
+ "warnings about missing profile data for "
+ "functions."));
-namespace llvm {
// Command line option to enable/disable the warning about a hash mismatch in
// the profile data.
cl::opt<bool>
NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
cl::desc("Use this option to turn off/on "
"warnings about profile cfg mismatch."));
-} // namespace llvm
// Command line option to enable/disable the warning about a hash mismatch in
// the profile data for Comdat functions, which often turns out to be false
// positive due to the pre-instrumentation inline.
-static cl::opt<bool> NoPGOWarnMismatchComdatWeak(
+cl::opt<bool> NoPGOWarnMismatchComdatWeak(
"no-pgo-warn-mismatch-comdat-weak", cl::init(true), cl::Hidden,
cl::desc("The option is used to turn on/off "
"warnings about hash mismatch for comdat "
"or weak functions."));
+} // namespace llvm
// Command line option to enable/disable select instruction instrumentation.
static cl::opt<bool>
@@ -321,10 +314,6 @@ static cl::opt<unsigned> PGOFunctionSizeThreshold(
"pgo-function-size-threshold", cl::Hidden,
cl::desc("Do not instrument functions smaller than this threshold."));
-static cl::opt<bool> MatchMemProf(
- "pgo-match-memprof", cl::init(true), cl::Hidden,
- cl::desc("Perform matching and annotation of memprof profiles."));
-
static cl::opt<unsigned> PGOFunctionCriticalEdgeThreshold(
"pgo-critical-edge-threshold", cl::init(20000), cl::Hidden,
cl::desc("Do not instrument functions with the number of critical edges "
@@ -1081,9 +1070,6 @@ class PGOUseFunc {
bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
InstrProfRecord::CountPseudoKind &PseudoKind);
- // Read memprof data for the instrumented function from profile.
- bool readMemprof(IndexedInstrProfReader *PGOReader);
-
// Populate the counts for all BBs.
void populateCounters();
@@ -1303,257 +1289,6 @@ static void annotateFunctionWithHashMismatch(Function &F, LLVMContext &ctx) {
F.setMetadata(LLVMContext::MD_annotation, MD);
}
-static void addCallsiteMetadata(Instruction &I,
- std::vector<uint64_t> &InlinedCallStack,
- LLVMContext &Ctx) {
- I.setMetadata(LLVMContext::MD_callsite,
- buildCallstackMetadata(InlinedCallStack, Ctx));
-}
-
-static uint64_t computeStackId(GlobalValue::GUID Function, uint32_t LineOffset,
- uint32_t Column) {
- llvm::HashBuilder<llvm::TruncatedBLAKE3<8>, llvm::support::endianness::little>
- HashBuilder;
- HashBuilder.add(Function, LineOffset, Column);
- llvm::BLAKE3Result<8> Hash = HashBuilder.final();
- uint64_t Id;
- std::memcpy(&Id, Hash.data(), sizeof(Hash));
- return Id;
-}
-
-static uint64_t computeStackId(const memprof::Frame &Frame) {
- return computeStackId(Frame.Function, Frame.LineOffset, Frame.Column);
-}
-
-static void addCallStack(CallStackTrie &AllocTrie,
- const AllocationInfo *AllocInfo) {
- SmallVector<uint64_t> StackIds;
- for (const auto &StackFrame : AllocInfo->CallStack)
- StackIds.push_back(computeStackId(StackFrame));
- auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
- AllocInfo->Info.getAllocCount(),
- AllocInfo->Info.getTotalLifetime());
- AllocTrie.addCallStack(AllocType, StackIds);
-}
-
-// Helper to compare the InlinedCallStack computed from an instruction's debug
-// info to a list of Frames from profile data (either the allocation data or a
-// callsite). For callsites, the StartIndex to use in the Frame array may be
-// non-zero.
-static bool
-stackFrameIncludesInlinedCallStack(ArrayRef<Frame> ProfileCallStack,
- ArrayRef<uint64_t> InlinedCallStack,
- unsigned StartIndex = 0) {
- auto StackFrame = ProfileCallStack.begin() + StartIndex;
- auto InlCallStackIter = InlinedCallStack.begin();
- for (; StackFrame != ProfileCallStack.end() &&
- InlCallStackIter != InlinedCallStack.end();
- ++StackFrame, ++InlCallStackIter) {
- uint64_t StackId = computeStackId(*StackFrame);
- if (StackId != *InlCallStackIter)
- return false;
- }
- // Return true if we found and matched all stack ids from the call
- // instruction.
- return InlCallStackIter == InlinedCallStack.end();
-}
-
-bool PGOUseFunc::readMemprof(IndexedInstrProfReader *PGOReader) {
- if (!MatchMemProf)
- return true;
-
- auto &Ctx = M->getContext();
-
- auto FuncGUID = Function::getGUID(FuncInfo.FuncName);
- Expected<memprof::MemProfRecord> MemProfResult =
- PGOReader->getMemProfRecord(FuncGUID);
- if (Error E = MemProfResult.takeError()) {
- handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
- auto Err = IPE.get();
- bool SkipWarning = false;
- LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
- << FuncInfo.FuncName << ": ");
- if (Err == instrprof_error::unknown_function) {
- NumOfMemProfMissing++;
- SkipWarning = !PGOWarnMissing;
- LLVM_DEBUG(dbgs() << "unknown function");
- } else if (Err == instrprof_error::hash_mismatch) {
- SkipWarning =
- NoPGOWarnMismatch ||
- (NoPGOWarnMismatchComdatWeak &&
- (F.hasComdat() ||
- F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
- LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
- }
-
- if (SkipWarning)
- return;
-
- std::string Msg =
- (IPE.message() + Twine(" ") + F.getName().str() + Twine(" Hash = ") +
- std::to_string(FuncInfo.FunctionHash))
- .str();
-
- Ctx.diagnose(
- DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
- });
- return false;
- }
-
- // Build maps of the location hash to all profile data with that leaf location
- // (allocation info and the callsites).
- std::map<uint64_t, std::set<const AllocationInfo *>> LocHashToAllocInfo;
- // For the callsites we need to record the index of the associated frame in
- // the frame array (see comments below where the map entries are added).
- std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
- LocHashToCallSites;
- const auto MemProfRec = std::move(MemProfResult.get());
- for (auto &AI : MemProfRec.AllocSites) {
- // Associate the allocation info with the leaf frame. The later matching
- // code will match any inlined call sequences in the IR with a longer prefix
- // of call stack frames.
- uint64_t StackId = computeStackId(AI.CallStack[0]);
- LocHashToAllocInfo[StackId].insert(&AI);
- }
- for (auto &CS : MemProfRec.CallSites) {
- // Need to record all frames from leaf up to and including this function,
- // as any of these may or may not have been inlined at this point.
- unsigned Idx = 0;
- for (auto &StackFrame : CS) {
- uint64_t StackId = computeStackId(StackFrame);
- LocHashToCallSites[StackId].insert(std::make_pair(&CS, Idx++));
- // Once we find this function, we can stop recording.
- if (StackFrame.Function == FuncGUID)
- break;
- }
- assert(Idx <= CS.size() && CS[Idx - 1].Function == FuncGUID);
- }
-
- auto GetOffset = [](const DILocation *DIL) {
- return (DIL->getLine() - DIL->getScope()->getSubprogram()->getLine()) &
- 0xffff;
- };
-
- // Now walk the instructions, looking up the associated profile data using
- // dbug locations.
- for (auto &BB : F) {
- for (auto &I : BB) {
- if (I.isDebugOrPseudoInst())
- continue;
- // We are only interested in calls (allocation or interior call stack
- // context calls).
- auto *CI = dyn_cast<CallBase>(&I);
- if (!CI)
- continue;
- auto *CalledFunction = CI->getCalledFunction();
- if (CalledFunction && CalledFunction->isIntrinsic())
- continue;
- // List of call stack ids computed from the location hashes on debug
- // locations (leaf to inlined at root).
- std::vector<uint64_t> InlinedCallStack;
- // Was the leaf location found in one of the profile maps?
- bool LeafFound = false;
- // If leaf was found in a map, iterators pointing to its location in both
- // of the maps. It might exist in neither, one, or both (the latter case
- // can happen because we don't currently have discriminators to
- // distinguish the case when a single line/col maps to both an allocation
- // and another callsite).
- std::map<uint64_t, std::set<const AllocationInfo *>>::iterator
- AllocInfoIter;
- std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *,
- unsigned>>>::iterator CallSitesIter;
- for (const DILocation *DIL = I.getDebugLoc(); DIL != nullptr;
- DIL = DIL->getInlinedAt()) {
- // Use C++ linkage name if possible. Need to compile with
- // -fdebug-info-for-profiling to get linkage name.
- StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
- if (Name.empty())
- Name = DIL->getScope()->getSubprogram()->getName();
- auto CalleeGUID = Function::getGUID(Name);
- auto StackId =
- computeStackId(CalleeGUID, GetOffset(DIL), DIL->getColumn());
- // LeafFound will only be false on the first iteration, since we either
- // set it true or break out of the loop below.
- if (!LeafFound) {
- AllocInfoIter = LocHashToAllocInfo.find(StackId);
- CallSitesIter = LocHashToCallSites.find(StackId);
- // Check if the leaf is in one of the maps. If not, no need to look
- // further at this call.
- if (AllocInfoIter == LocHashToAllocInfo.end() &&
- CallSitesIter == LocHashToCallSites.end())
- break;
- LeafFound = true;
- }
- InlinedCallStack.push_back(StackId);
- }
- // If leaf not in either of the maps, skip inst.
- if (!LeafFound)
- continue;
-
- // First add !memprof metadata from allocation info, if we found the
- // instruction's leaf location in that map, and if the rest of the
- // instruction's locations match the prefix Frame locations on an
- // allocation context with the same leaf.
- if (AllocInfoIter != LocHashToAllocInfo.end()) {
- // Only consider allocations via new, to reduce unnecessary metadata,
- // since those are the only allocations that will be targeted initially.
- if (!isNewLikeFn(CI, &FuncInfo.TLI))
- continue;
- // We may match this instruction's location list to multiple MIB
- // contexts. Add them to a Trie specialized for trimming the contexts to
- // the minimal needed to disambiguate contexts with unique behavior.
- CallStackTrie AllocTrie;
- for (auto *AllocInfo : AllocInfoIter->second) {
- // Check the full inlined call stack against this one.
- // If we found and thus matched all frames on the call, include
- // this MIB.
- if (stackFrameIncludesInlinedCallStack(AllocInfo->CallStack,
- InlinedCallStack))
- addCallStack(AllocTrie, AllocInfo);
- }
- // We might not have matched any to the full inlined call stack.
- // But if we did, create and attach metadata, or a function attribute if
- // all contexts have identical profiled behavior.
- if (!AllocTrie.empty()) {
- // MemprofMDAttached will be false if a function attribute was
- // attached.
- bool MemprofMDAttached = AllocTrie.buildAndAttachMIBMetadata(CI);
- assert(MemprofMDAttached == I.hasMetadata(LLVMContext::MD_memprof));
- if (MemprofMDAttached) {
- // Add callsite metadata for the instruction's location list so that
- // it simpler later on to identify which part of the MIB contexts
- // are from this particular instruction (including during inlining,
- // when the callsite metdata will be updated appropriately).
- // FIXME: can this be changed to strip out the matching stack
- // context ids from the MIB contexts and not add any callsite
- // metadata here to save space?
- addCallsiteMetadata(I, InlinedCallStack, Ctx);
- }
- }
- continue;
- }
-
- // Otherwise, add callsite metadata. If we reach here then we found the
- // instruction's leaf location in the callsites map and not the allocation
- // map.
- assert(CallSitesIter != LocHashToCallSites.end());
- for (auto CallStackIdx : CallSitesIter->second) {
- // If we found and thus matched all frames on the call, create and
- // attach call stack metadata.
- if (stackFrameIncludesInlinedCallStack(
- *CallStackIdx.first, InlinedCallStack, CallStackIdx.second)) {
- addCallsiteMetadata(I, InlinedCallStack, Ctx);
- // Only need to find one with a matching call stack and add a single
- // callsite metadata.
- break;
- }
- }
- }
- }
-
- return true;
-}
-
void PGOUseFunc::handleInstrProfError(Error Err, uint64_t MismatchedFuncSum) {
handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
auto &Ctx = M->getContext();
@@ -2310,7 +2045,7 @@ static bool annotateAllFunctions(
// Read and match memprof first since we do this via debug info and can
// match even if there is an IR mismatch detected for regular PGO below.
if (PGOReader->hasMemoryProfile())
- Func.readMemprof(PGOReader.get());
+ readMemprof(M, F, PGOReader.get(), TLI);
if (!PGOReader->isIRLevelProfile())
continue;
More information about the llvm-commits
mailing list