[llvm] 98ed423 - Restore "[MemProf] ThinLTO summary support" with fixes
Teresa Johnson via llvm-commits
llvm-commits at lists.llvm.org
Tue Nov 15 08:55:30 PST 2022
Author: Teresa Johnson
Date: 2022-11-15T08:55:17-08:00
New Revision: 98ed423361de2f9dc0113a31be2aa04524489ca9
URL: https://github.com/llvm/llvm-project/commit/98ed423361de2f9dc0113a31be2aa04524489ca9
DIFF: https://github.com/llvm/llvm-project/commit/98ed423361de2f9dc0113a31be2aa04524489ca9.diff
LOG: Restore "[MemProf] ThinLTO summary support" with fixes
This restores 47459455009db4790ffc3765a2ec0f8b4934c2a4, which was
reverted in commit 452a14efc84edf808d1e2953dad2c694972b312f, along with
fixes for a couple of bot failures.
Added:
llvm/test/Assembler/thinlto-memprof-summary.ll
llvm/test/ThinLTO/X86/memprof-summary.ll
Modified:
llvm/include/llvm/Analysis/MemoryProfileInfo.h
llvm/include/llvm/AsmParser/LLParser.h
llvm/include/llvm/AsmParser/LLToken.h
llvm/include/llvm/AsmParser/Parser.h
llvm/include/llvm/Bitcode/BitcodeReader.h
llvm/include/llvm/Bitcode/LLVMBitCodes.h
llvm/include/llvm/IR/ModuleSummaryIndex.h
llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
llvm/lib/Analysis/MemoryProfileInfo.cpp
llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
llvm/lib/AsmParser/LLLexer.cpp
llvm/lib/AsmParser/LLParser.cpp
llvm/lib/AsmParser/Parser.cpp
llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
llvm/lib/Bitcode/Reader/BitcodeReader.cpp
llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
llvm/lib/IR/AsmWriter.cpp
llvm/lib/LTO/LTO.cpp
llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index 1b12e78eaeba3..4758cbc23fe3c 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -17,18 +17,12 @@
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
#include <map>
namespace llvm {
namespace memprof {
-// Allocation type assigned to an allocation reached by a given context.
-// More can be added but initially this is just noncold and cold.
-// Values should be powers of two so that they can be ORed, in particular to
-// track allocations that have
diff erent behavior with
diff erent calling
-// contexts.
-enum class AllocationType : uint8_t { None = 0, NotCold = 1, Cold = 2 };
-
/// Return the allocation type for a given set of memory profile values.
AllocationType getAllocType(uint64_t MaxAccessCount, uint64_t MinSize,
uint64_t MinLifetime);
@@ -106,6 +100,62 @@ class CallStackTrie {
bool buildAndAttachMIBMetadata(CallBase *CI);
};
+/// Helper class to iterate through stack ids in both metadata (memprof MIB and
+/// callsite) and the corresponding ThinLTO summary data structures
+/// (CallsiteInfo and MIBInfo). This simplifies implementation of client code
+/// which doesn't need to worry about whether we are operating with IR (Regular
+/// LTO), or summary (ThinLTO).
+template <class NodeT, class IteratorT> class CallStack {
+public:
+ CallStack(const NodeT *N = nullptr) : N(N) {}
+
+ // Implement minimum required methods for range-based for loop.
+ // The default implementation assumes we are operating on ThinLTO data
+ // structures, which have a vector of StackIdIndices. There are specialized
+ // versions provided to iterate through metadata.
+ struct CallStackIterator {
+ const NodeT *N = nullptr;
+ IteratorT Iter;
+ CallStackIterator(const NodeT *N, bool End) : N(N) {
+ if (!N)
+ return;
+ Iter = End ? N->StackIdIndices.end() : N->StackIdIndices.begin();
+ }
+ uint64_t operator*() {
+ assert(Iter != N->StackIdIndices.end());
+ return *Iter;
+ }
+ bool operator==(const CallStackIterator &rhs) { return Iter == rhs.Iter; }
+ bool operator!=(const CallStackIterator &rhs) { return !(*this == rhs); }
+ void operator++() { ++Iter; }
+ };
+
+ bool empty() const { return N == nullptr; }
+
+ CallStackIterator begin() const {
+ return CallStackIterator(N, /*End*/ false);
+ }
+ CallStackIterator end() const { return CallStackIterator(N, /*End*/ true); }
+
+ CallStackIterator beginAfterSharedPrefix(CallStack &Other) {
+ CallStackIterator Cur = begin();
+ for (CallStackIterator OtherCur = Other.begin();
+ Cur != end() && OtherCur != Other.end(); ++Cur, ++OtherCur)
+ assert(*Cur == *OtherCur);
+ return Cur;
+ }
+
+private:
+ const NodeT *N = nullptr;
+};
+
+/// Specializations for iterating through IR metadata stack contexts.
+template <>
+CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::CallStackIterator(
+ const MDNode *N, bool End);
+template <>
+uint64_t CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::operator*();
+
} // end namespace memprof
} // end namespace llvm
diff --git a/llvm/include/llvm/AsmParser/LLParser.h b/llvm/include/llvm/AsmParser/LLParser.h
index e9813c34ce373..d936c6f36b6f5 100644
--- a/llvm/include/llvm/AsmParser/LLParser.h
+++ b/llvm/include/llvm/AsmParser/LLParser.h
@@ -406,6 +406,10 @@ namespace llvm {
void addGlobalValueToIndex(std::string Name, GlobalValue::GUID,
GlobalValue::LinkageTypes Linkage, unsigned ID,
std::unique_ptr<GlobalValueSummary> Summary);
+ bool parseOptionalAllocs(std::vector<AllocInfo> &Allocs);
+ bool parseMemProfs(std::vector<MIBInfo> &MIBs);
+ bool parseAllocType(uint8_t &AllocType);
+ bool parseOptionalCallsites(std::vector<CallsiteInfo> &Callsites);
// Type Parsing.
bool parseType(Type *&Result, const Twine &Msg, bool AllowVoid = false);
diff --git a/llvm/include/llvm/AsmParser/LLToken.h b/llvm/include/llvm/AsmParser/LLToken.h
index 5fc192f145aaf..8a679007a504f 100644
--- a/llvm/include/llvm/AsmParser/LLToken.h
+++ b/llvm/include/llvm/AsmParser/LLToken.h
@@ -406,6 +406,15 @@ enum Kind {
kw_byte,
kw_bit,
kw_varFlags,
+ // The following are used by MemProf summary info.
+ kw_callsites,
+ kw_clones,
+ kw_stackIds,
+ kw_allocs,
+ kw_versions,
+ kw_memProf,
+ kw_notcold,
+ kw_notcoldandcold,
// GV's with __attribute__((no_sanitize("address"))), or things in
// -fsanitize-ignorelist when built with ASan.
diff --git a/llvm/include/llvm/AsmParser/Parser.h b/llvm/include/llvm/AsmParser/Parser.h
index 6710ae6e358d4..336e95c2399f0 100644
--- a/llvm/include/llvm/AsmParser/Parser.h
+++ b/llvm/include/llvm/AsmParser/Parser.h
@@ -105,6 +105,17 @@ ParsedModuleAndIndex parseAssemblyFileWithIndexNoUpgradeDebugInfo(
std::unique_ptr<ModuleSummaryIndex>
parseSummaryIndexAssemblyFile(StringRef Filename, SMDiagnostic &Err);
+/// The function is a secondary interface to the LLVM Assembly Parser. It parses
+/// an ASCII string that (presumably) contains LLVM Assembly code for a module
+/// summary. It returns a a ModuleSummaryIndex with the corresponding features.
+/// Note that this does not verify that the generated Index is valid, so you
+/// should run the verifier after parsing the file to check that it is okay.
+/// Parse LLVM Assembly from a string
+/// \param AsmString The string containing assembly
+/// \param Err Error result info.
+std::unique_ptr<ModuleSummaryIndex>
+parseSummaryIndexAssemblyString(StringRef AsmString, SMDiagnostic &Err);
+
/// parseAssemblyFile and parseAssemblyString are wrappers around this function.
/// Parse LLVM Assembly from a MemoryBuffer.
/// \param F The MemoryBuffer containing assembly
diff --git a/llvm/include/llvm/Bitcode/BitcodeReader.h b/llvm/include/llvm/Bitcode/BitcodeReader.h
index 39ea48c33fc36..bc1f88f732290 100644
--- a/llvm/include/llvm/Bitcode/BitcodeReader.h
+++ b/llvm/include/llvm/Bitcode/BitcodeReader.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Bitstream/BitCodeEnums.h"
+#include "llvm/IR/GlobalValue.h"
#include "llvm/Support/Endian.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/ErrorOr.h"
@@ -117,8 +118,10 @@ typedef llvm::function_ref<Optional<std::string>(StringRef)>
/// Parse the specified bitcode buffer and merge its module summary index
/// into CombinedIndex.
- Error readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
- uint64_t ModuleId);
+ Error
+ readSummary(ModuleSummaryIndex &CombinedIndex, StringRef ModulePath,
+ uint64_t ModuleId,
+ std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
};
struct BitcodeFileContents {
diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index 74a51d5ce6907..2b474b67425c8 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -301,6 +301,22 @@ enum GlobalValueSummarySymtabCodes {
// Range information for accessed offsets for every argument.
// [n x (paramno, range, numcalls, numcalls x (callee_guid, paramno, range))]
FS_PARAM_ACCESS = 25,
+ // Summary of per-module memprof callsite metadata.
+ // [valueid, n x stackidindex]
+ FS_PERMODULE_CALLSITE_INFO = 26,
+ // Summary of per-module allocation memprof metadata.
+ // [n x (alloc type, nummib, nummib x stackidindex)]
+ FS_PERMODULE_ALLOC_INFO = 27,
+ // Summary of combined index memprof callsite metadata.
+ // [valueid, numstackindices, numver,
+ // numstackindices x stackidindex, numver x version]
+ FS_COMBINED_CALLSITE_INFO = 28,
+ // Summary of combined index allocation memprof metadata.
+ // [nummib, numver,
+ // nummib x (alloc type, numstackids, numstackids x stackidindex),
+ // numver x version]
+ FS_COMBINED_ALLOC_INFO = 29,
+ FS_STACK_IDS = 30,
};
enum MetadataCodes {
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 9f25b5a1e5503..4bcf21edfecb4 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -19,6 +19,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
@@ -284,6 +285,79 @@ template <> struct DenseMapInfo<ValueInfo> {
static unsigned getHashValue(ValueInfo I) { return (uintptr_t)I.getRef(); }
};
+/// Summary of memprof callsite metadata.
+struct CallsiteInfo {
+ // Actual callee function.
+ ValueInfo Callee;
+
+ // Used to record whole program analysis cloning decisions.
+ // The ThinLTO backend will need to create as many clones as there are entries
+ // in the vector (it is expected and should be confirmed that all such
+ // summaries in the same FunctionSummary have the same number of entries).
+ // Each index records version info for the corresponding clone of this
+ // function. The value is the callee clone it calls (becomes the appended
+ // suffix id). Index 0 is the original version, and a value of 0 calls the
+ // original callee.
+ SmallVector<unsigned> Clones{0};
+
+ // Represents stack ids in this context, recorded as indices into the
+ // StackIds vector in the summary index, which in turn holds the full 64-bit
+ // stack ids. This reduces memory as there are in practice far fewer unique
+ // stack ids than stack id references.
+ SmallVector<unsigned> StackIdIndices;
+
+ CallsiteInfo(ValueInfo Callee, SmallVector<unsigned> StackIdIndices)
+ : Callee(Callee), StackIdIndices(std::move(StackIdIndices)) {}
+ CallsiteInfo(ValueInfo Callee, SmallVector<unsigned> Clones,
+ SmallVector<unsigned> StackIdIndices)
+ : Callee(Callee), Clones(std::move(Clones)),
+ StackIdIndices(std::move(StackIdIndices)) {}
+};
+
+// Allocation type assigned to an allocation reached by a given context.
+// More can be added but initially this is just noncold and cold.
+// Values should be powers of two so that they can be ORed, in particular to
+// track allocations that have
diff erent behavior with
diff erent calling
+// contexts.
+enum class AllocationType : uint8_t { None = 0, NotCold = 1, Cold = 2 };
+
+/// Summary of a single MIB in a memprof metadata on allocations.
+struct MIBInfo {
+ // The allocation type for this profiled context.
+ AllocationType AllocType;
+
+ // Represents stack ids in this context, recorded as indices into the
+ // StackIds vector in the summary index, which in turn holds the full 64-bit
+ // stack ids. This reduces memory as there are in practice far fewer unique
+ // stack ids than stack id references.
+ SmallVector<unsigned> StackIdIndices;
+
+ MIBInfo(AllocationType AllocType, SmallVector<unsigned> StackIdIndices)
+ : AllocType(AllocType), StackIdIndices(std::move(StackIdIndices)) {}
+};
+
+/// Summary of memprof metadata on allocations.
+struct AllocInfo {
+ // Used to record whole program analysis cloning decisions.
+ // The ThinLTO backend will need to create as many clones as there are entries
+ // in the vector (it is expected and should be confirmed that all such
+ // summaries in the same FunctionSummary have the same number of entries).
+ // Each index records version info for the corresponding clone of this
+ // function. The value is the allocation type of the corresponding allocation.
+ // Index 0 is the original version. Before cloning, index 0 may have more than
+ // one allocation type.
+ SmallVector<uint8_t> Versions;
+
+ // Vector of MIBs in this memprof metadata.
+ std::vector<MIBInfo> MIBs;
+
+ AllocInfo(std::vector<MIBInfo> MIBs) : MIBs(std::move(MIBs)) {
+ Versions.push_back(0);
+ }
+ AllocInfo(SmallVector<uint8_t> Versions, std::vector<MIBInfo> MIBs)
+ : Versions(std::move(Versions)), MIBs(std::move(MIBs)) {}
+};
+
/// Function and variable summary information to aid decisions and
/// implementation of importing.
class GlobalValueSummary {
@@ -678,7 +752,8 @@ class FunctionSummary : public GlobalValueSummary {
std::vector<FunctionSummary::VFuncId>(),
std::vector<FunctionSummary::ConstVCall>(),
std::vector<FunctionSummary::ConstVCall>(),
- std::vector<FunctionSummary::ParamAccess>());
+ std::vector<FunctionSummary::ParamAccess>(),
+ std::vector<CallsiteInfo>(), std::vector<AllocInfo>());
}
/// A dummy node to reference external functions that aren't in the index
@@ -706,6 +781,25 @@ class FunctionSummary : public GlobalValueSummary {
using ParamAccessesTy = std::vector<ParamAccess>;
std::unique_ptr<ParamAccessesTy> ParamAccesses;
+ /// Optional list of memprof callsite metadata summaries. The correspondence
+ /// between the callsite summary and the callsites in the function is implied
+ /// by the order in the vector (and can be validated by comparing the stack
+ /// ids in the CallsiteInfo to those in the instruction callsite metadata).
+ /// As a memory savings optimization, we only create these for the prevailing
+ /// copy of a symbol when creating the combined index during LTO.
+ using CallsitesTy = std::vector<CallsiteInfo>;
+ std::unique_ptr<CallsitesTy> Callsites;
+
+ /// Optional list of allocation memprof metadata summaries. The correspondence
+ /// between the alloc memprof summary and the allocation callsites in the
+ /// function is implied by the order in the vector (and can be validated by
+ /// comparing the stack ids in the AllocInfo to those in the instruction
+ /// memprof metadata).
+ /// As a memory savings optimization, we only create these for the prevailing
+ /// copy of a symbol when creating the combined index during LTO.
+ using AllocsTy = std::vector<AllocInfo>;
+ std::unique_ptr<AllocsTy> Allocs;
+
public:
FunctionSummary(GVFlags Flags, unsigned NumInsts, FFlags FunFlags,
uint64_t EntryCount, std::vector<ValueInfo> Refs,
@@ -715,7 +809,8 @@ class FunctionSummary : public GlobalValueSummary {
std::vector<VFuncId> TypeCheckedLoadVCalls,
std::vector<ConstVCall> TypeTestAssumeConstVCalls,
std::vector<ConstVCall> TypeCheckedLoadConstVCalls,
- std::vector<ParamAccess> Params)
+ std::vector<ParamAccess> Params, CallsitesTy CallsiteList,
+ AllocsTy AllocList)
: GlobalValueSummary(FunctionKind, Flags, std::move(Refs)),
InstCount(NumInsts), FunFlags(FunFlags), EntryCount(EntryCount),
CallGraphEdgeList(std::move(CGEdges)) {
@@ -729,6 +824,10 @@ class FunctionSummary : public GlobalValueSummary {
std::move(TypeCheckedLoadConstVCalls)});
if (!Params.empty())
ParamAccesses = std::make_unique<ParamAccessesTy>(std::move(Params));
+ if (!CallsiteList.empty())
+ Callsites = std::make_unique<CallsitesTy>(std::move(CallsiteList));
+ if (!AllocList.empty())
+ Allocs = std::make_unique<AllocsTy>(std::move(AllocList));
}
// Gets the number of readonly and writeonly refs in RefEdgeList
std::pair<unsigned, unsigned> specialRefCounts() const;
@@ -832,6 +931,18 @@ class FunctionSummary : public GlobalValueSummary {
const TypeIdInfo *getTypeIdInfo() const { return TIdInfo.get(); };
+ ArrayRef<CallsiteInfo> callsites() const {
+ if (Callsites)
+ return *Callsites;
+ return {};
+ }
+
+ ArrayRef<AllocInfo> allocs() const {
+ if (Allocs)
+ return *Allocs;
+ return {};
+ }
+
friend struct GraphTraits<ValueInfo>;
};
@@ -1163,6 +1274,16 @@ class ModuleSummaryIndex {
// the total number of basic blocks in the LTO unit in the combined index.
uint64_t BlockCount;
+ // List of unique stack ids (hashes). We use a 4B index of the id in the
+ // stack id lists on the alloc and callsite summaries for memory savings,
+ // since the number of unique ids is in practice much smaller than the
+ // number of stack id references in the summaries.
+ std::vector<uint64_t> StackIds;
+
+ // Temporary map while building StackIds list. Clear when index is completely
+ // built via releaseTemporaryMemory.
+ std::map<uint64_t, unsigned> StackIdToIndex;
+
// YAML I/O support.
friend yaml::MappingTraits<ModuleSummaryIndex>;
@@ -1205,6 +1326,31 @@ class ModuleSummaryIndex {
const_gvsummary_iterator end() const { return GlobalValueMap.end(); }
size_t size() const { return GlobalValueMap.size(); }
+ const std::vector<uint64_t> &stackIds() const { return StackIds; }
+
+ unsigned addOrGetStackIdIndex(uint64_t StackId) {
+ auto Inserted = StackIdToIndex.insert({StackId, StackIds.size()});
+ if (Inserted.second)
+ StackIds.push_back(StackId);
+ return Inserted.first->second;
+ }
+
+ uint64_t getStackIdAtIndex(unsigned Index) const {
+ assert(StackIds.size() > Index);
+ return StackIds[Index];
+ }
+
+ // Facility to release memory from data structures only needed during index
+ // construction (including while building combined index). Currently this only
+ // releases the temporary map used while constructing a correspondence between
+ // stack ids and their index in the StackIds vector. Mostly impactful when
+ // building a large combined index.
+ void releaseTemporaryMemory() {
+ assert(StackIdToIndex.size() == StackIds.size());
+ StackIdToIndex.clear();
+ StackIds.shrink_to_fit();
+ }
+
/// Convenience function for doing a DFS on a ValueInfo. Marks the function in
/// the FunctionHasParent map.
static void discoverNodes(ValueInfo V,
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
index 74e92797f15ce..33e57e5f2102f 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -234,7 +234,8 @@ template <> struct CustomMappingTraits<GlobalValueSummaryMapTy> {
std::move(FSum.TypeCheckedLoadVCalls),
std::move(FSum.TypeTestAssumeConstVCalls),
std::move(FSum.TypeCheckedLoadConstVCalls),
- ArrayRef<FunctionSummary::ParamAccess>{}));
+ ArrayRef<FunctionSummary::ParamAccess>{}, ArrayRef<CallsiteInfo>{},
+ ArrayRef<AllocInfo>{}));
}
}
static void output(IO &io, GlobalValueSummaryMapTy &V) {
diff --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index f28ddbbfc8498..f42fff5f195f8 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -224,3 +224,21 @@ bool CallStackTrie::buildAndAttachMIBMetadata(CallBase *CI) {
CI->setMetadata(LLVMContext::MD_memprof, MDNode::get(Ctx, MIBNodes));
return true;
}
+
+template <>
+CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::CallStackIterator(
+ const MDNode *N, bool End)
+ : N(N) {
+ if (!N)
+ return;
+ Iter = End ? N->op_end() : N->op_begin();
+}
+
+template <>
+uint64_t
+CallStack<MDNode, MDNode::op_iterator>::CallStackIterator::operator*() {
+ assert(Iter != N->op_end());
+ ConstantInt *StackIdCInt = mdconst::dyn_extract<ConstantInt>(*Iter);
+ assert(StackIdCInt);
+ return StackIdCInt->getZExtValue();
+}
diff --git a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
index 52827c210b5ad..e8309f51a4987 100644
--- a/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
+++ b/llvm/lib/Analysis/ModuleSummaryAnalysis.cpp
@@ -24,6 +24,7 @@
#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryProfileInfo.h"
#include "llvm/Analysis/ProfileSummaryInfo.h"
#include "llvm/Analysis/StackSafetyAnalysis.h"
#include "llvm/Analysis/TypeMetadataUtils.h"
@@ -56,6 +57,7 @@
#include <vector>
using namespace llvm;
+using namespace llvm::memprof;
#define DEBUG_TYPE "module-summary-analysis"
@@ -275,6 +277,9 @@ static void computeFunctionSummary(
std::vector<const Instruction *> NonVolatileLoads;
std::vector<const Instruction *> NonVolatileStores;
+ std::vector<CallsiteInfo> Callsites;
+ std::vector<AllocInfo> Allocs;
+
bool HasInlineAsmMaybeReferencingInternal = false;
bool HasIndirBranchToBlockAddress = false;
bool HasUnknownCall = false;
@@ -417,6 +422,57 @@ static void computeFunctionSummary(
CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)]
.updateHotness(getHotness(Candidate.Count, PSI));
}
+
+ // TODO: Skip indirect calls for now. Need to handle these better, likely
+ // by creating multiple Callsites, one per target, then speculatively
+ // devirtualize while applying clone info in the ThinLTO backends. This
+ // will also be important because we will have a
diff erent set of clone
+ // versions per target. This handling needs to match that in the ThinLTO
+ // backend so we handle things consistently for matching of callsite
+ // summaries to instructions.
+ if (!CalledFunction)
+ continue;
+
+ // Compute the list of stack ids first (so we can trim them from the stack
+ // ids on any MIBs).
+ CallStack<MDNode, MDNode::op_iterator> InstCallsite(
+ I.getMetadata(LLVMContext::MD_callsite));
+ auto *MemProfMD = I.getMetadata(LLVMContext::MD_memprof);
+ if (MemProfMD) {
+ std::vector<MIBInfo> MIBs;
+ for (auto &MDOp : MemProfMD->operands()) {
+ auto *MIBMD = cast<const MDNode>(MDOp);
+ MDNode *StackNode = getMIBStackNode(MIBMD);
+ assert(StackNode);
+ SmallVector<unsigned> StackIdIndices;
+ CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode);
+ // Collapse out any on the allocation call (inlining).
+ for (auto ContextIter =
+ StackContext.beginAfterSharedPrefix(InstCallsite);
+ ContextIter != StackContext.end(); ++ContextIter) {
+ unsigned StackIdIdx = Index.addOrGetStackIdIndex(*ContextIter);
+ // If this is a direct recursion, simply skip the duplicate
+ // entries. If this is mutual recursion, handling is left to
+ // the LTO link analysis client.
+ if (StackIdIndices.empty() || StackIdIndices.back() != StackIdIdx)
+ StackIdIndices.push_back(StackIdIdx);
+ }
+ MIBs.push_back(
+ MIBInfo(getMIBAllocType(MIBMD), std::move(StackIdIndices)));
+ }
+ Allocs.push_back(AllocInfo(std::move(MIBs)));
+ } else if (!InstCallsite.empty()) {
+ SmallVector<unsigned> StackIdIndices;
+ for (auto StackId : InstCallsite)
+ StackIdIndices.push_back(Index.addOrGetStackIdIndex(StackId));
+ // Use the original CalledValue, in case it was an alias. We want
+ // to record the call edge to the alias in that case. Eventually
+ // an alias summary will be created to associate the alias and
+ // aliasee.
+ auto CalleeValueInfo =
+ Index.getOrInsertValueInfo(cast<GlobalValue>(CalledValue));
+ Callsites.push_back({CalleeValueInfo, StackIdIndices});
+ }
}
}
Index.addBlockCount(F.size());
@@ -508,7 +564,8 @@ static void computeFunctionSummary(
CallGraphEdges.takeVector(), TypeTests.takeVector(),
TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
TypeTestAssumeConstVCalls.takeVector(),
- TypeCheckedLoadConstVCalls.takeVector(), std::move(ParamAccesses));
+ TypeCheckedLoadConstVCalls.takeVector(), std::move(ParamAccesses),
+ std::move(Callsites), std::move(Allocs));
if (NonRenamableLocal)
CantBePromoted.insert(F.getGUID());
Index.addGlobalValueSummary(F, std::move(FuncSummary));
@@ -757,7 +814,8 @@ ModuleSummaryIndex llvm::buildModuleSummaryIndex(
ArrayRef<FunctionSummary::VFuncId>{},
ArrayRef<FunctionSummary::ConstVCall>{},
ArrayRef<FunctionSummary::ConstVCall>{},
- ArrayRef<FunctionSummary::ParamAccess>{});
+ ArrayRef<FunctionSummary::ParamAccess>{},
+ ArrayRef<CallsiteInfo>{}, ArrayRef<AllocInfo>{});
Index.addGlobalValueSummary(*GV, std::move(Summary));
} else {
std::unique_ptr<GlobalVarSummary> Summary =
diff --git a/llvm/lib/AsmParser/LLLexer.cpp b/llvm/lib/AsmParser/LLLexer.cpp
index 2a171df168fca..c33dc9710f35a 100644
--- a/llvm/lib/AsmParser/LLLexer.cpp
+++ b/llvm/lib/AsmParser/LLLexer.cpp
@@ -772,6 +772,14 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(byte);
KEYWORD(bit);
KEYWORD(varFlags);
+ KEYWORD(callsites);
+ KEYWORD(clones);
+ KEYWORD(stackIds);
+ KEYWORD(allocs);
+ KEYWORD(versions);
+ KEYWORD(memProf);
+ KEYWORD(notcold);
+ KEYWORD(notcoldandcold);
#undef KEYWORD
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index 7fc51682533f6..dde0672da8e48 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -8682,6 +8682,8 @@ bool LLParser::parseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
FunctionSummary::TypeIdInfo TypeIdInfo;
std::vector<FunctionSummary::ParamAccess> ParamAccesses;
std::vector<ValueInfo> Refs;
+ std::vector<CallsiteInfo> Callsites;
+ std::vector<AllocInfo> Allocs;
// Default is all-zeros (conservative values).
FunctionSummary::FFlags FFlags = {};
if (parseToken(lltok::colon, "expected ':' here") ||
@@ -8716,6 +8718,14 @@ bool LLParser::parseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
if (parseOptionalParamAccesses(ParamAccesses))
return true;
break;
+ case lltok::kw_allocs:
+ if (parseOptionalAllocs(Allocs))
+ return true;
+ break;
+ case lltok::kw_callsites:
+ if (parseOptionalCallsites(Callsites))
+ return true;
+ break;
default:
return error(Lex.getLoc(), "expected optional function summary field");
}
@@ -8731,7 +8741,7 @@ bool LLParser::parseFunctionSummary(std::string Name, GlobalValue::GUID GUID,
std::move(TypeIdInfo.TypeCheckedLoadVCalls),
std::move(TypeIdInfo.TypeTestAssumeConstVCalls),
std::move(TypeIdInfo.TypeCheckedLoadConstVCalls),
- std::move(ParamAccesses));
+ std::move(ParamAccesses), std::move(Callsites), std::move(Allocs));
FS->setModulePath(ModulePath);
@@ -9683,3 +9693,220 @@ bool LLParser::parseGVReference(ValueInfo &VI, unsigned &GVId) {
VI.setWriteOnly();
return false;
}
+
+/// OptionalAllocs
+/// := 'allocs' ':' '(' Alloc [',' Alloc]* ')'
+/// Alloc ::= '(' 'versions' ':' '(' Version [',' Version]* ')'
+/// ',' MemProfs ')'
+/// Version ::= UInt32
+bool LLParser::parseOptionalAllocs(std::vector<AllocInfo> &Allocs) {
+ assert(Lex.getKind() == lltok::kw_allocs);
+ Lex.Lex();
+
+ if (parseToken(lltok::colon, "expected ':' in allocs") ||
+ parseToken(lltok::lparen, "expected '(' in allocs"))
+ return true;
+
+ // parse each alloc
+ do {
+ if (parseToken(lltok::lparen, "expected '(' in alloc") ||
+ parseToken(lltok::kw_versions, "expected 'versions' in alloc") ||
+ parseToken(lltok::colon, "expected ':'") ||
+ parseToken(lltok::lparen, "expected '(' in versions"))
+ return true;
+
+ SmallVector<uint8_t> Versions;
+ do {
+ uint8_t V = 0;
+ if (parseAllocType(V))
+ return true;
+ Versions.push_back(V);
+ } while (EatIfPresent(lltok::comma));
+
+ if (parseToken(lltok::rparen, "expected ')' in versions") ||
+ parseToken(lltok::comma, "expected ',' in alloc"))
+ return true;
+
+ std::vector<MIBInfo> MIBs;
+ if (parseMemProfs(MIBs))
+ return true;
+
+ Allocs.push_back({Versions, MIBs});
+
+ if (parseToken(lltok::rparen, "expected ')' in alloc"))
+ return true;
+ } while (EatIfPresent(lltok::comma));
+
+ if (parseToken(lltok::rparen, "expected ')' in allocs"))
+ return true;
+
+ return false;
+}
+
+/// MemProfs
+/// := 'memProf' ':' '(' MemProf [',' MemProf]* ')'
+/// MemProf ::= '(' 'type' ':' AllocType
+/// ',' 'stackIds' ':' '(' StackId [',' StackId]* ')' ')'
+/// StackId ::= UInt64
+bool LLParser::parseMemProfs(std::vector<MIBInfo> &MIBs) {
+ assert(Lex.getKind() == lltok::kw_memProf);
+ Lex.Lex();
+
+ if (parseToken(lltok::colon, "expected ':' in memprof") ||
+ parseToken(lltok::lparen, "expected '(' in memprof"))
+ return true;
+
+ // parse each MIB
+ do {
+ if (parseToken(lltok::lparen, "expected '(' in memprof") ||
+ parseToken(lltok::kw_type, "expected 'type' in memprof") ||
+ parseToken(lltok::colon, "expected ':'"))
+ return true;
+
+ uint8_t AllocType;
+ if (parseAllocType(AllocType))
+ return true;
+
+ if (parseToken(lltok::comma, "expected ',' in memprof") ||
+ parseToken(lltok::kw_stackIds, "expected 'stackIds' in memprof") ||
+ parseToken(lltok::colon, "expected ':'") ||
+ parseToken(lltok::lparen, "expected '(' in stackIds"))
+ return true;
+
+ SmallVector<unsigned> StackIdIndices;
+ do {
+ uint64_t StackId = 0;
+ if (parseUInt64(StackId))
+ return true;
+ StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId));
+ } while (EatIfPresent(lltok::comma));
+
+ if (parseToken(lltok::rparen, "expected ')' in stackIds"))
+ return true;
+
+ MIBs.push_back({(AllocationType)AllocType, StackIdIndices});
+
+ if (parseToken(lltok::rparen, "expected ')' in memprof"))
+ return true;
+ } while (EatIfPresent(lltok::comma));
+
+ if (parseToken(lltok::rparen, "expected ')' in memprof"))
+ return true;
+
+ return false;
+}
+
+/// AllocType
+/// := ('none'|'notcold'|'cold'|'notcoldandcold')
+bool LLParser::parseAllocType(uint8_t &AllocType) {
+ switch (Lex.getKind()) {
+ case lltok::kw_none:
+ AllocType = (uint8_t)AllocationType::None;
+ break;
+ case lltok::kw_notcold:
+ AllocType = (uint8_t)AllocationType::NotCold;
+ break;
+ case lltok::kw_cold:
+ AllocType = (uint8_t)AllocationType::Cold;
+ break;
+ case lltok::kw_notcoldandcold:
+ AllocType =
+ (uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold;
+ break;
+ default:
+ return error(Lex.getLoc(), "invalid alloc type");
+ }
+ Lex.Lex();
+ return false;
+}
+
+/// OptionalCallsites
+/// := 'callsites' ':' '(' Callsite [',' Callsite]* ')'
+/// Callsite ::= '(' 'callee' ':' GVReference
+/// ',' 'clones' ':' '(' Version [',' Version]* ')'
+/// ',' 'stackIds' ':' '(' StackId [',' StackId]* ')' ')'
+/// Version ::= UInt32
+/// StackId ::= UInt64
+bool LLParser::parseOptionalCallsites(std::vector<CallsiteInfo> &Callsites) {
+ assert(Lex.getKind() == lltok::kw_callsites);
+ Lex.Lex();
+
+ if (parseToken(lltok::colon, "expected ':' in callsites") ||
+ parseToken(lltok::lparen, "expected '(' in callsites"))
+ return true;
+
+ IdToIndexMapType IdToIndexMap;
+ // parse each callsite
+ do {
+ if (parseToken(lltok::lparen, "expected '(' in callsite") ||
+ parseToken(lltok::kw_callee, "expected 'callee' in callsite") ||
+ parseToken(lltok::colon, "expected ':'"))
+ return true;
+
+ ValueInfo VI;
+ unsigned GVId = 0;
+ LocTy Loc = Lex.getLoc();
+ if (!EatIfPresent(lltok::kw_null)) {
+ if (parseGVReference(VI, GVId))
+ return true;
+ }
+
+ if (parseToken(lltok::comma, "expected ',' in callsite") ||
+ parseToken(lltok::kw_clones, "expected 'clones' in callsite") ||
+ parseToken(lltok::colon, "expected ':'") ||
+ parseToken(lltok::lparen, "expected '(' in clones"))
+ return true;
+
+ SmallVector<unsigned> Clones;
+ do {
+ unsigned V = 0;
+ if (parseUInt32(V))
+ return true;
+ Clones.push_back(V);
+ } while (EatIfPresent(lltok::comma));
+
+ if (parseToken(lltok::rparen, "expected ')' in clones") ||
+ parseToken(lltok::comma, "expected ',' in callsite") ||
+ parseToken(lltok::kw_stackIds, "expected 'stackIds' in callsite") ||
+ parseToken(lltok::colon, "expected ':'") ||
+ parseToken(lltok::lparen, "expected '(' in stackIds"))
+ return true;
+
+ SmallVector<unsigned> StackIdIndices;
+ do {
+ uint64_t StackId = 0;
+ if (parseUInt64(StackId))
+ return true;
+ StackIdIndices.push_back(Index->addOrGetStackIdIndex(StackId));
+ } while (EatIfPresent(lltok::comma));
+
+ if (parseToken(lltok::rparen, "expected ')' in stackIds"))
+ return true;
+
+ // Keep track of the Callsites array index needing a forward reference.
+ // We will save the location of the ValueInfo needing an update, but
+ // can only do so once the SmallVector is finalized.
+ if (VI.getRef() == FwdVIRef)
+ IdToIndexMap[GVId].push_back(std::make_pair(Callsites.size(), Loc));
+ Callsites.push_back({VI, Clones, StackIdIndices});
+
+ if (parseToken(lltok::rparen, "expected ')' in callsite"))
+ return true;
+ } while (EatIfPresent(lltok::comma));
+
+ // Now that the Callsites vector is finalized, it is safe to save the
+ // locations of any forward GV references that need updating later.
+ for (auto I : IdToIndexMap) {
+ auto &Infos = ForwardRefValueInfos[I.first];
+ for (auto P : I.second) {
+ assert(Callsites[P.first].Callee.getRef() == FwdVIRef &&
+ "Forward referenced ValueInfo expected to be empty");
+ Infos.emplace_back(&Callsites[P.first].Callee, P.second);
+ }
+ }
+
+ if (parseToken(lltok::rparen, "expected ')' in callsites"))
+ return true;
+
+ return false;
+}
diff --git a/llvm/lib/AsmParser/Parser.cpp b/llvm/lib/AsmParser/Parser.cpp
index 95b9079f0f9cc..4458ae757c160 100644
--- a/llvm/lib/AsmParser/Parser.cpp
+++ b/llvm/lib/AsmParser/Parser.cpp
@@ -177,6 +177,12 @@ llvm::parseSummaryIndexAssemblyFile(StringRef Filename, SMDiagnostic &Err) {
return parseSummaryIndexAssembly(FileOrErr.get()->getMemBufferRef(), Err);
}
+std::unique_ptr<ModuleSummaryIndex>
+llvm::parseSummaryIndexAssemblyString(StringRef AsmString, SMDiagnostic &Err) {
+ MemoryBufferRef F(AsmString, "<string>");
+ return parseSummaryIndexAssembly(F, Err);
+}
+
Constant *llvm::parseConstantValue(StringRef Asm, SMDiagnostic &Err,
const Module &M, const SlotMapping *Slots) {
SourceMgr SM;
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index dd3cac8b8a6fb..97bc828066a50 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -315,6 +315,11 @@ static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID,
STRINGIFY_CODE(FS, TYPE_ID_METADATA)
STRINGIFY_CODE(FS, BLOCK_COUNT)
STRINGIFY_CODE(FS, PARAM_ACCESS)
+ STRINGIFY_CODE(FS, PERMODULE_CALLSITE_INFO)
+ STRINGIFY_CODE(FS, PERMODULE_ALLOC_INFO)
+ STRINGIFY_CODE(FS, COMBINED_CALLSITE_INFO)
+ STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO)
+ STRINGIFY_CODE(FS, STACK_IDS)
}
case bitc::METADATA_ATTACHMENT_ID:
switch (CodeID) {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 0a2415cbfdefa..c4805602d1ad1 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -883,8 +883,10 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
// they are recorded in the summary index being built.
// We save a GUID which refers to the same global as the ValueInfo, but
// ignoring the linkage, i.e. for values other than local linkage they are
- // identical.
- DenseMap<unsigned, std::tuple<ValueInfo, GlobalValue::GUID>>
+ // identical (this is the second tuple member).
+ // The third tuple member is the real GUID of the ValueInfo.
+ DenseMap<unsigned,
+ std::tuple<ValueInfo, GlobalValue::GUID, GlobalValue::GUID>>
ValueIdToValueInfoMap;
/// Map populated during module path string table parsing, from the
@@ -904,10 +906,19 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
/// this module by the client.
unsigned ModuleId;
+ /// Callback to ask whether a symbol is the prevailing copy when invoked
+ /// during combined index building.
+ std::function<bool(GlobalValue::GUID)> IsPrevailing;
+
+ /// Saves the stack ids from the STACK_IDS record to consult when adding stack
+ /// ids from the lists in the callsite and alloc entries to the index.
+ std::vector<uint64_t> StackIds;
+
public:
- ModuleSummaryIndexBitcodeReader(BitstreamCursor Stream, StringRef Strtab,
- ModuleSummaryIndex &TheIndex,
- StringRef ModulePath, unsigned ModuleId);
+ ModuleSummaryIndexBitcodeReader(
+ BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex,
+ StringRef ModulePath, unsigned ModuleId,
+ std::function<bool(GlobalValue::GUID)> IsPrevailing = nullptr);
Error parseModule();
@@ -931,7 +942,8 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
std::vector<FunctionSummary::ParamAccess>
parseParamAccesses(ArrayRef<uint64_t> Record);
- std::tuple<ValueInfo, GlobalValue::GUID>
+ template <bool AllowNullValueInfo = false>
+ std::tuple<ValueInfo, GlobalValue::GUID, GlobalValue::GUID>
getValueInfoFromValueId(unsigned ValueId);
void addThisModule();
@@ -6643,9 +6655,10 @@ std::vector<StructType *> BitcodeReader::getIdentifiedStructTypes() const {
ModuleSummaryIndexBitcodeReader::ModuleSummaryIndexBitcodeReader(
BitstreamCursor Cursor, StringRef Strtab, ModuleSummaryIndex &TheIndex,
- StringRef ModulePath, unsigned ModuleId)
+ StringRef ModulePath, unsigned ModuleId,
+ std::function<bool(GlobalValue::GUID)> IsPrevailing)
: BitcodeReaderBase(std::move(Cursor), Strtab), TheIndex(TheIndex),
- ModulePath(ModulePath), ModuleId(ModuleId) {}
+ ModulePath(ModulePath), ModuleId(ModuleId), IsPrevailing(IsPrevailing) {}
void ModuleSummaryIndexBitcodeReader::addThisModule() {
TheIndex.addModule(ModulePath, ModuleId);
@@ -6656,10 +6669,15 @@ ModuleSummaryIndexBitcodeReader::getThisModule() {
return TheIndex.getModule(ModulePath);
}
-std::tuple<ValueInfo, GlobalValue::GUID>
+template <bool AllowNullValueInfo>
+std::tuple<ValueInfo, GlobalValue::GUID, GlobalValue::GUID>
ModuleSummaryIndexBitcodeReader::getValueInfoFromValueId(unsigned ValueId) {
auto VGI = ValueIdToValueInfoMap[ValueId];
- assert(std::get<0>(VGI));
+ // We can have a null value info for memprof callsite info records in
+ // distributed ThinLTO index files when the callee function summary is not
+ // included in the index. The bitcode writer records 0 in that case,
+ // and the caller of this helper will set AllowNullValueInfo to true.
+ assert(AllowNullValueInfo || std::get<0>(VGI));
return VGI;
}
@@ -6682,7 +6700,7 @@ void ModuleSummaryIndexBitcodeReader::setValueGUID(
ValueIdToValueInfoMap[ValueID] = std::make_tuple(
TheIndex.getOrInsertValueInfo(
ValueGUID, UseStrtab ? ValueName : TheIndex.saveString(ValueName)),
- OriginalNameID);
+ OriginalNameID, ValueGUID);
}
// Specialized value symbol table parser used when reading module index
@@ -6770,8 +6788,8 @@ Error ModuleSummaryIndexBitcodeReader::parseValueSymbolTable(
GlobalValue::GUID RefGUID = Record[1];
// The "original name", which is the second value of the pair will be
// overriden later by a FS_COMBINED_ORIGINAL_NAME in the combined index.
- ValueIdToValueInfoMap[ValueID] =
- std::make_tuple(TheIndex.getOrInsertValueInfo(RefGUID), RefGUID);
+ ValueIdToValueInfoMap[ValueID] = std::make_tuple(
+ TheIndex.getOrInsertValueInfo(RefGUID), RefGUID, RefGUID);
break;
}
}
@@ -7116,6 +7134,9 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
PendingTypeCheckedLoadConstVCalls;
std::vector<FunctionSummary::ParamAccess> PendingParamAccesses;
+ std::vector<CallsiteInfo> PendingCallsites;
+ std::vector<AllocInfo> PendingAllocs;
+
while (true) {
Expected<BitstreamEntry> MaybeEntry = Stream.advanceSkippingSubblocks();
if (!MaybeEntry)
@@ -7154,8 +7175,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
case bitc::FS_VALUE_GUID: { // [valueid, refguid]
uint64_t ValueID = Record[0];
GlobalValue::GUID RefGUID = Record[1];
- ValueIdToValueInfoMap[ValueID] =
- std::make_tuple(TheIndex.getOrInsertValueInfo(RefGUID), RefGUID);
+ ValueIdToValueInfoMap[ValueID] = std::make_tuple(
+ TheIndex.getOrInsertValueInfo(RefGUID), RefGUID, RefGUID);
break;
}
// FS_PERMODULE: [valueid, flags, instcount, fflags, numrefs,
@@ -7207,6 +7228,13 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
ArrayRef<uint64_t>(Record).slice(CallGraphEdgeStartIndex),
IsOldProfileFormat, HasProfile, HasRelBF);
setSpecialRefs(Refs, NumRORefs, NumWORefs);
+ auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID);
+ // In order to save memory, only record the memprof summaries if this is
+ // the prevailing copy of a symbol.
+ if (IsPrevailing && !IsPrevailing(std::get<2>(VIAndOriginalGUID))) {
+ PendingCallsites.clear();
+ PendingAllocs.clear();
+ }
auto FS = std::make_unique<FunctionSummary>(
Flags, InstCount, getDecodedFFlags(RawFunFlags), /*EntryCount=*/0,
std::move(Refs), std::move(Calls), std::move(PendingTypeTests),
@@ -7214,8 +7242,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
std::move(PendingTypeCheckedLoadVCalls),
std::move(PendingTypeTestAssumeConstVCalls),
std::move(PendingTypeCheckedLoadConstVCalls),
- std::move(PendingParamAccesses));
- auto VIAndOriginalGUID = getValueInfoFromValueId(ValueID);
+ std::move(PendingParamAccesses), std::move(PendingCallsites),
+ std::move(PendingAllocs));
FS->setModulePath(getThisModule()->first());
FS->setOriginalName(std::get<1>(VIAndOriginalGUID));
TheIndex.addGlobalValueSummary(std::get<0>(VIAndOriginalGUID),
@@ -7358,7 +7386,8 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
std::move(PendingTypeCheckedLoadVCalls),
std::move(PendingTypeTestAssumeConstVCalls),
std::move(PendingTypeCheckedLoadConstVCalls),
- std::move(PendingParamAccesses));
+ std::move(PendingParamAccesses), std::move(PendingCallsites),
+ std::move(PendingAllocs));
LastSeenSummary = FS.get();
LastSeenGUID = VI.getGUID();
FS->setModulePath(ModuleIdMap[ModuleId]);
@@ -7484,6 +7513,95 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
PendingParamAccesses = parseParamAccesses(Record);
break;
}
+
+ case bitc::FS_STACK_IDS: { // [n x stackid]
+ // Save stack ids in the reader to consult when adding stack ids from the
+ // lists in the stack node and alloc node entries.
+ StackIds = ArrayRef<uint64_t>(Record);
+ break;
+ }
+
+ case bitc::FS_PERMODULE_CALLSITE_INFO: {
+ unsigned ValueID = Record[0];
+ SmallVector<unsigned> StackIdList;
+ for (auto R = Record.begin() + 1; R != Record.end(); R++) {
+ assert(*R < StackIds.size());
+ StackIdList.push_back(TheIndex.addOrGetStackIdIndex(StackIds[*R]));
+ }
+ ValueInfo VI = std::get<0>(getValueInfoFromValueId(ValueID));
+ PendingCallsites.push_back(CallsiteInfo({VI, std::move(StackIdList)}));
+ break;
+ }
+
+ case bitc::FS_COMBINED_CALLSITE_INFO: {
+ auto RecordIter = Record.begin();
+ unsigned ValueID = *RecordIter++;
+ unsigned NumStackIds = *RecordIter++;
+ unsigned NumVersions = *RecordIter++;
+ assert(Record.size() == 3 + NumStackIds + NumVersions);
+ SmallVector<unsigned> StackIdList;
+ for (unsigned J = 0; J < NumStackIds; J++) {
+ assert(*RecordIter < StackIds.size());
+ StackIdList.push_back(
+ TheIndex.addOrGetStackIdIndex(StackIds[*RecordIter++]));
+ }
+ SmallVector<unsigned> Versions;
+ for (unsigned J = 0; J < NumVersions; J++)
+ Versions.push_back(*RecordIter++);
+ ValueInfo VI = std::get<0>(
+ getValueInfoFromValueId</*AllowNullValueInfo*/ true>(ValueID));
+ PendingCallsites.push_back(
+ CallsiteInfo({VI, std::move(Versions), std::move(StackIdList)}));
+ break;
+ }
+
+ case bitc::FS_PERMODULE_ALLOC_INFO: {
+ unsigned I = 0;
+ std::vector<MIBInfo> MIBs;
+ while (I < Record.size()) {
+ assert(Record.size() - I >= 2);
+ AllocationType AllocType = (AllocationType)Record[I++];
+ unsigned NumStackEntries = Record[I++];
+ assert(Record.size() - I >= NumStackEntries);
+ SmallVector<unsigned> StackIdList;
+ for (unsigned J = 0; J < NumStackEntries; J++) {
+ assert(Record[I] < StackIds.size());
+ StackIdList.push_back(
+ TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
+ }
+ MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
+ }
+ PendingAllocs.push_back(AllocInfo(std::move(MIBs)));
+ break;
+ }
+
+ case bitc::FS_COMBINED_ALLOC_INFO: {
+ unsigned I = 0;
+ std::vector<MIBInfo> MIBs;
+ unsigned NumMIBs = Record[I++];
+ unsigned NumVersions = Record[I++];
+ unsigned MIBsRead = 0;
+ while (MIBsRead++ < NumMIBs) {
+ assert(Record.size() - I >= 2);
+ AllocationType AllocType = (AllocationType)Record[I++];
+ unsigned NumStackEntries = Record[I++];
+ assert(Record.size() - I >= NumStackEntries);
+ SmallVector<unsigned> StackIdList;
+ for (unsigned J = 0; J < NumStackEntries; J++) {
+ assert(Record[I] < StackIds.size());
+ StackIdList.push_back(
+ TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
+ }
+ MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
+ }
+ assert(Record.size() - I >= NumVersions);
+ SmallVector<uint8_t> Versions;
+ for (unsigned J = 0; J < NumVersions; J++)
+ Versions.push_back(Record[I++]);
+ PendingAllocs.push_back(
+ AllocInfo(std::move(Versions), std::move(MIBs)));
+ break;
+ }
}
}
llvm_unreachable("Exit infinite loop");
@@ -7803,14 +7921,15 @@ BitcodeModule::getLazyModule(LLVMContext &Context, bool ShouldLazyLoadMetadata,
// We don't use ModuleIdentifier here because the client may need to control the
// module path used in the combined summary (e.g. when reading summaries for
// regular LTO modules).
-Error BitcodeModule::readSummary(ModuleSummaryIndex &CombinedIndex,
- StringRef ModulePath, uint64_t ModuleId) {
+Error BitcodeModule::readSummary(
+ ModuleSummaryIndex &CombinedIndex, StringRef ModulePath, uint64_t ModuleId,
+ std::function<bool(GlobalValue::GUID)> IsPrevailing) {
BitstreamCursor Stream(Buffer);
if (Error JumpFailed = Stream.JumpToBit(ModuleBit))
return JumpFailed;
ModuleSummaryIndexBitcodeReader R(std::move(Stream), Strtab, CombinedIndex,
- ModulePath, ModuleId);
+ ModulePath, ModuleId, IsPrevailing);
return R.parseModule();
}
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 4bf881a479170..bc81afbc40646 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -211,12 +211,10 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
void writePerModuleGlobalValueSummary();
private:
- void writePerModuleFunctionSummaryRecord(SmallVector<uint64_t, 64> &NameVals,
- GlobalValueSummary *Summary,
- unsigned ValueID,
- unsigned FSCallsAbbrev,
- unsigned FSCallsProfileAbbrev,
- const Function &F);
+ void writePerModuleFunctionSummaryRecord(
+ SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
+ unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
+ unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F);
void writeModuleLevelReferences(const GlobalVariable &V,
SmallVector<uint64_t, 64> &NameVals,
unsigned FSModRefsAbbrev,
@@ -424,6 +422,11 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
/// index and a value id generated by this class to use in references.
std::map<GlobalValue::GUID, unsigned> GUIDToValueIdMap;
+ // The sorted stack id indices actually used in the summary entries being
+ // written, which will be a subset of those in the full index in the case of
+ // distributed indexes.
+ std::vector<unsigned> StackIdIndices;
+
/// Tracks the last value id recorded in the GUIDToValueMap.
unsigned GlobalValueId = 0;
@@ -441,9 +444,28 @@ class IndexBitcodeWriter : public BitcodeWriterBase {
// in writing out the call graph edges. Save the mapping from GUID
// to the new global value id to use when writing those edges, which
// are currently saved in the index in terms of GUID.
- forEachSummary([&](GVInfo I, bool) {
+ forEachSummary([&](GVInfo I, bool IsAliasee) {
GUIDToValueIdMap[I.first] = ++GlobalValueId;
+ if (IsAliasee)
+ return;
+ auto *FS = dyn_cast<FunctionSummary>(I.second);
+ if (!FS)
+ return;
+ // Record all stack id indices actually used in the summary entries being
+ // written, so that we can compact them in the case of distributed ThinLTO
+ // indexes.
+ for (auto &CI : FS->callsites())
+ for (auto Idx : CI.StackIdIndices)
+ StackIdIndices.push_back(Idx);
+ for (auto &AI : FS->allocs())
+ for (auto &MIB : AI.MIBs)
+ for (auto Idx : MIB.StackIdIndices)
+ StackIdIndices.push_back(Idx);
});
+ llvm::sort(StackIdIndices);
+ StackIdIndices.erase(
+ std::unique(StackIdIndices.begin(), StackIdIndices.end()),
+ StackIdIndices.end());
}
/// The below iterator returns the GUID and associated summary.
@@ -3888,11 +3910,64 @@ static void writeTypeIdCompatibleVtableSummaryRecord(
}
}
+static void writeFunctionHeapProfileRecords(
+ BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev,
+ unsigned AllocAbbrev, bool PerModule,
+ std::function<unsigned(const ValueInfo &VI)> GetValueID,
+ std::function<unsigned(unsigned)> GetStackIndex) {
+ SmallVector<uint64_t> Record;
+
+ for (auto &CI : FS->callsites()) {
+ Record.clear();
+ // Per module callsite clones should always have a single entry of
+ // value 0.
+ assert(!PerModule || (CI.Clones.size() == 1 && CI.Clones[0] == 0));
+ Record.push_back(GetValueID(CI.Callee));
+ if (!PerModule) {
+ Record.push_back(CI.StackIdIndices.size());
+ Record.push_back(CI.Clones.size());
+ }
+ for (auto Id : CI.StackIdIndices)
+ Record.push_back(GetStackIndex(Id));
+ if (!PerModule) {
+ for (auto V : CI.Clones)
+ Record.push_back(V);
+ }
+ Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_CALLSITE_INFO
+ : bitc::FS_COMBINED_CALLSITE_INFO,
+ Record, CallsiteAbbrev);
+ }
+
+ for (auto &AI : FS->allocs()) {
+ Record.clear();
+ // Per module alloc versions should always have a single entry of
+ // value 0.
+ assert(!PerModule || (AI.Versions.size() == 1 && AI.Versions[0] == 0));
+ if (!PerModule) {
+ Record.push_back(AI.MIBs.size());
+ Record.push_back(AI.Versions.size());
+ }
+ for (auto &MIB : AI.MIBs) {
+ Record.push_back((uint8_t)MIB.AllocType);
+ Record.push_back(MIB.StackIdIndices.size());
+ for (auto Id : MIB.StackIdIndices)
+ Record.push_back(GetStackIndex(Id));
+ }
+ if (!PerModule) {
+ for (auto V : AI.Versions)
+ Record.push_back(V);
+ }
+ Stream.EmitRecord(PerModule ? bitc::FS_PERMODULE_ALLOC_INFO
+ : bitc::FS_COMBINED_ALLOC_INFO,
+ Record, AllocAbbrev);
+ }
+}
+
// Helper to emit a single function summary record.
void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
- const Function &F) {
+ unsigned CallsiteAbbrev, unsigned AllocAbbrev, const Function &F) {
NameVals.push_back(ValueID);
FunctionSummary *FS = cast<FunctionSummary>(Summary);
@@ -3902,6 +3977,12 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
return {VE.getValueID(VI.getValue())};
});
+ writeFunctionHeapProfileRecords(
+ Stream, FS, CallsiteAbbrev, AllocAbbrev,
+ /*PerModule*/ true,
+ /*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); },
+ /*GetStackIndex*/ [&](unsigned I) { return I; });
+
auto SpecialRefCnts = FS->specialRefCounts();
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
NameVals.push_back(FS->instCount());
@@ -4013,6 +4094,16 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
ArrayRef<uint64_t>{GVI.second, GVI.first});
}
+ if (!Index->stackIds().empty()) {
+ auto StackIdAbbv = std::make_shared<BitCodeAbbrev>();
+ StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS));
+ // numids x stackid
+ StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv));
+ Stream.EmitRecord(bitc::FS_STACK_IDS, Index->stackIds(), StackIdAbbvId);
+ }
+
// Abbrev for FS_PERMODULE_PROFILE.
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_PROFILE));
@@ -4084,6 +4175,21 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
unsigned TypeIdCompatibleVtableAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_CALLSITE_INFO));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
+ // n x stackidindex
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO));
+ // n x (alloc type, numstackids, numstackids x stackidindex)
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
SmallVector<uint64_t, 64> NameVals;
// Iterate over the list of functions instead of the Index to
// ensure the ordering is stable.
@@ -4102,7 +4208,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
}
auto *Summary = VI.getSummaryList()[0].get();
writePerModuleFunctionSummaryRecord(NameVals, Summary, VE.getValueID(&F),
- FSCallsAbbrev, FSCallsProfileAbbrev, F);
+ FSCallsAbbrev, FSCallsProfileAbbrev,
+ CallsiteAbbrev, AllocAbbrev, F);
}
// Capture references from GlobalVariable initializers, which are outside
@@ -4144,7 +4251,7 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
/// Emit the combined summary section into the combined index file.
void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
- Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 3);
+ Stream.EnterSubblock(bitc::GLOBALVAL_SUMMARY_BLOCK_ID, 4);
Stream.EmitRecord(
bitc::FS_VERSION,
ArrayRef<uint64_t>{ModuleSummaryIndex::BitcodeSummaryVersion});
@@ -4157,6 +4264,21 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
ArrayRef<uint64_t>{GVI.second, GVI.first});
}
+ if (!StackIdIndices.empty()) {
+ auto StackIdAbbv = std::make_shared<BitCodeAbbrev>();
+ StackIdAbbv->Add(BitCodeAbbrevOp(bitc::FS_STACK_IDS));
+ // numids x stackid
+ StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ StackIdAbbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned StackIdAbbvId = Stream.EmitAbbrev(std::move(StackIdAbbv));
+ // Write the stack ids used by this index, which will be a subset of those in
+ // the full index in the case of distributed indexes.
+ std::vector<uint64_t> StackIds;
+ for (auto &I : StackIdIndices)
+ StackIds.push_back(Index.getStackIdAtIndex(I));
+ Stream.EmitRecord(bitc::FS_STACK_IDS, StackIds, StackIdAbbvId);
+ }
+
// Abbrev for FS_COMBINED.
auto Abbv = std::make_shared<BitCodeAbbrev>();
Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED));
@@ -4210,6 +4332,26 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
unsigned FSAliasAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_CALLSITE_INFO));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8)); // valueid
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numstackindices
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
+ // numstackindices x stackidindex, numver x version
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned CallsiteAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+ Abbv = std::make_shared<BitCodeAbbrev>();
+ Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
+ // nummib x (alloc type, numstackids, numstackids x stackidindex),
+ // numver x version
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+ Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+ unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
// The aliases are emitted as a post-pass, and will point to the value
// id of the aliasee. Save them in a vector for post-processing.
SmallVector<AliasSummary *, 64> Aliases;
@@ -4286,6 +4428,8 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
}
auto GetValueId = [&](const ValueInfo &VI) -> Optional<unsigned> {
+ if (!VI)
+ return None;
return getValueId(VI.getGUID());
};
@@ -4293,6 +4437,27 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
writeFunctionTypeMetadataRecords(Stream, FS, GetValueId);
getReferencedTypeIds(FS, ReferencedTypeIds);
+ writeFunctionHeapProfileRecords(
+ Stream, FS, CallsiteAbbrev, AllocAbbrev,
+ /*PerModule*/ false,
+ /*GetValueId*/ [&](const ValueInfo &VI) -> unsigned {
+ Optional<unsigned> ValueID = GetValueId(VI);
+ // This can happen in shared index files for distributed ThinLTO if
+ // the callee function summary is not included. Record 0 which we
+ // will have to deal with conservatively when doing any kind of
+ // validation in the ThinLTO backends.
+ if (!ValueID)
+ return 0;
+ return *ValueID;
+ },
+ /*GetStackIndex*/ [&](unsigned I) {
+ // Get the corresponding index into the list of StackIdIndices
+ // actually being written for this combined index (which may be a
+ // subset in the case of distributed indexes).
+ auto Lower = llvm::lower_bound(StackIdIndices, I);
+ return std::distance(StackIdIndices.begin(), Lower);
+ });
+
NameVals.push_back(*ValueId);
NameVals.push_back(Index.getModuleId(FS->modulePath()));
NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index 22e12b29843e0..55672320e1dfc 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -3192,6 +3192,84 @@ void AssemblyWriter::printFunctionSummary(const FunctionSummary *FS) {
if (const auto *TIdInfo = FS->getTypeIdInfo())
printTypeIdInfo(*TIdInfo);
+ // The AllocationType identifiers capture the profiled context behavior
+ // reaching a specific static allocation site (possibly cloned). Thus
+ // "notcoldandcold" implies there are multiple contexts which reach this site,
+ // some of which are cold and some of which are not, and that need to
+ // disambiguate via cloning or other context identification.
+ auto AllocTypeName = [](uint8_t Type) {
+ switch (Type) {
+ case (uint8_t)AllocationType::None:
+ return "none";
+ break;
+ case (uint8_t)AllocationType::NotCold:
+ return "notcold";
+ break;
+ case (uint8_t)AllocationType::Cold:
+ return "cold";
+ break;
+ case (uint8_t)AllocationType::NotCold | (uint8_t)AllocationType::Cold:
+ return "notcoldandcold";
+ break;
+ }
+ llvm_unreachable("Unexpected alloc type");
+ };
+
+ if (!FS->allocs().empty()) {
+ Out << ", allocs: (";
+ FieldSeparator AFS;
+ for (auto &AI : FS->allocs()) {
+ Out << AFS;
+ Out << "(versions: (";
+ FieldSeparator VFS;
+ for (auto V : AI.Versions) {
+ Out << VFS;
+ Out << AllocTypeName(V);
+ }
+ Out << "), memProf: (";
+ FieldSeparator MIBFS;
+ for (auto &MIB : AI.MIBs) {
+ Out << MIBFS;
+ Out << "(type: " << AllocTypeName((uint8_t)MIB.AllocType);
+ Out << ", stackIds: (";
+ FieldSeparator SIDFS;
+ for (auto Id : MIB.StackIdIndices) {
+ Out << SIDFS;
+ Out << TheIndex->getStackIdAtIndex(Id);
+ }
+ Out << "))";
+ }
+ Out << "))";
+ }
+ Out << ")";
+ }
+
+ if (!FS->callsites().empty()) {
+ Out << ", callsites: (";
+ FieldSeparator SNFS;
+ for (auto &CI : FS->callsites()) {
+ Out << SNFS;
+ if (CI.Callee)
+ Out << "(callee: ^" << Machine.getGUIDSlot(CI.Callee.getGUID());
+ else
+ Out << "(callee: null";
+ Out << ", clones: (";
+ FieldSeparator VFS;
+ for (auto V : CI.Clones) {
+ Out << VFS;
+ Out << V;
+ }
+ Out << "), stackIds: (";
+ FieldSeparator SIDFS;
+ for (auto Id : CI.StackIdIndices) {
+ Out << SIDFS;
+ Out << TheIndex->getStackIdAtIndex(Id);
+ }
+ Out << "))";
+ }
+ Out << ")";
+ }
+
auto PrintRange = [&](const ConstantRange &Range) {
Out << "[" << Range.getSignedMin() << ", " << Range.getSignedMax() << "]";
};
diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp
index dc28b681a1515..9bfbabc17a08e 100644
--- a/llvm/lib/LTO/LTO.cpp
+++ b/llvm/lib/LTO/LTO.cpp
@@ -911,9 +911,25 @@ Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
const SymbolResolution *&ResI,
const SymbolResolution *ResE) {
+ const SymbolResolution *ResITmp = ResI;
+ for (const InputFile::Symbol &Sym : Syms) {
+ assert(ResITmp != ResE);
+ SymbolResolution Res = *ResITmp++;
+
+ if (!Sym.getIRName().empty()) {
+ auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier(
+ Sym.getIRName(), GlobalValue::ExternalLinkage, ""));
+ if (Res.Prevailing)
+ ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier();
+ }
+ }
+
if (Error Err =
BM.readSummary(ThinLTO.CombinedIndex, BM.getModuleIdentifier(),
- ThinLTO.ModuleMap.size()))
+ ThinLTO.ModuleMap.size(), [&](GlobalValue::GUID GUID) {
+ return ThinLTO.PrevailingModuleForGUID[GUID] ==
+ BM.getModuleIdentifier();
+ }))
return Err;
for (const InputFile::Symbol &Sym : Syms) {
@@ -924,7 +940,8 @@ Error LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
auto GUID = GlobalValue::getGUID(GlobalValue::getGlobalIdentifier(
Sym.getIRName(), GlobalValue::ExternalLinkage, ""));
if (Res.Prevailing) {
- ThinLTO.PrevailingModuleForGUID[GUID] = BM.getModuleIdentifier();
+ assert(ThinLTO.PrevailingModuleForGUID[GUID] ==
+ BM.getModuleIdentifier());
// For linker redefined symbols (via --wrap or --defsym) we want to
// switch the linkage to `weak` to prevent IPOs from happening.
@@ -1454,6 +1471,7 @@ ThinBackend lto::createWriteIndexesThinBackend(
Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
+ ThinLTO.CombinedIndex.releaseTemporaryMemory();
timeTraceProfilerBegin("ThinLink", StringRef(""));
auto TimeTraceScopeExit = llvm::make_scope_exit([]() {
if (llvm::timeTraceProfilerEnabled())
diff --git a/llvm/test/Assembler/thinlto-memprof-summary.ll b/llvm/test/Assembler/thinlto-memprof-summary.ll
new file mode 100644
index 0000000000000..92e085e5473be
--- /dev/null
+++ b/llvm/test/Assembler/thinlto-memprof-summary.ll
@@ -0,0 +1,24 @@
+;; Test memprof summary parsing (tests all types/fields in various combinations).
+; RUN: llvm-as %s -o - | llvm-dis -o - | FileCheck %s
+
+; ModuleID = 'thinlto-memprof-summary.thinlto.bc'
+
+^0 = module: (path: "thinlto-memprof-summary.o", hash: (1369602428, 2747878711, 259090915, 2507395659, 1141468049))
+;; Function with single alloc, multiple memprof MIBs, no versioning
+^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: notcoldandcold, stackIds: (23456789))))))))
+;; Function with callsite stack ids calling above function, no versioning
+^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (8632435727821051414)), (callee: ^1, clones: (0), stackIds: (15025054523792398438, 12345678)), (callee: ^1, clones: (0), stackIds: (23456789))))))
+;; Function with multiple allocs, multiple memprof MIBs, multiple versions
+^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789))))))))
+;; Function with callsite stack ids calling above function, multiple versions
+^4 = gv: (guid: 27, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^3)), callsites: ((callee: ^3, clones: (0, 1), stackIds: (3456789)), (callee: ^3, clones: (1, 1), stackIds: (456789))))))
+;; Function with null callsite stack id (can happen in distributed indexes if callsite not imported)
+^5 = gv: (guid: 28, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414))))))
+
+; Make sure we get back from llvm-dis what we put in via llvm-as.
+; CHECK: ^0 = module: (path: "thinlto-memprof-summary.o", hash: (1369602428, 2747878711, 259090915, 2507395659, 1141468049))
+; CHECK: ^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (8632435727821051414)), (type: cold, stackIds: (15025054523792398438, 12345678)), (type: notcoldandcold, stackIds: (23456789))))))))
+; CHECK: ^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (8632435727821051414)), (callee: ^1, clones: (0), stackIds: (15025054523792398438, 12345678)), (callee: ^1, clones: (0), stackIds: (23456789))))))
+; CHECK: ^3 = gv: (guid: 26, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (cold, notcold), memProf: ((type: notcold, stackIds: (3456789)), (type: cold, stackIds: (456789)))), (versions: (notcold, cold), memProf: ((type: cold, stackIds: (3456789)), (type: notcold, stackIds: (456789))))))))
+; CHECK: ^4 = gv: (guid: 27, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^3)), callsites: ((callee: ^3, clones: (0, 1), stackIds: (3456789)), (callee: ^3, clones: (1, 1), stackIds: (456789))))))
+; CHECK: ^5 = gv: (guid: 28, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), callsites: ((callee: null, clones: (0), stackIds: (8632435727821051414))))))
diff --git a/llvm/test/ThinLTO/X86/memprof-summary.ll b/llvm/test/ThinLTO/X86/memprof-summary.ll
new file mode 100644
index 0000000000000..ca3b668484b92
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/memprof-summary.ll
@@ -0,0 +1,185 @@
+;; Check memprof summaries (per module, combined index, and distributed indexes)
+
+; RUN: split-file %s %t
+; RUN: opt -module-summary %t/a.ll -o %ta.bc
+; RUN: opt -module-summary %t/b.ll -o %tb.bc
+
+; RUN: llvm-dis -o - %ta.bc | FileCheck %s --check-prefix=PRELINKDISA
+; PRELINKDISA: gv: (name: "main", {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438)))))) ; guid = 15822663052811949562
+
+; RUN: llvm-dis -o - %tb.bc | FileCheck %s --check-prefix=PRELINKDISB
+; PRELINKDISB: gv: (name: "_Z3foov", {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (2732490490862098848)))))) ; guid = 9191153033785521275
+; PRELINKDISB: gv: (name: "_Z3bazv", {{.*}} callsites: ((callee: ^3, clones: (0), stackIds: (12481870273128938184)))))) ; guid = 15176620447596392000
+; PRELINKDISB: gv: (name: "_Z3barv", {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438)))))))) ; guid = 17377440600225628772
+
+; RUN: llvm-bcanalyzer -dump %ta.bc | FileCheck %s --check-prefix=PRELINKBCANA
+; PRELINKBCANA: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178/>
+
+; RUN: llvm-bcanalyzer -dump %tb.bc | FileCheck %s --check-prefix=PRELINKBCANB
+; PRELINKBCANB: <STACK_IDS abbrevid=4 op0=-5964873800580613432 op1=2732490490862098848 op2=8632435727821051414 op3=-3421689549917153178/>
+
+; RUN: llvm-lto2 run %ta.bc %tb.bc -o %t -save-temps \
+; RUN: -thinlto-distributed-indexes \
+; RUN: -r=%ta.bc,main,plx \
+; RUN: -r=%ta.bc,_Z3foov, \
+; RUN: -r=%ta.bc,free, \
+; RUN: -r=%ta.bc,sleep, \
+; RUN: -r=%tb.bc,_Z3foov,pl \
+; RUN: -r=%tb.bc,_Znam, \
+; RUN: -r=%tb.bc,_Z3barv,pl \
+; RUN: -r=%tb.bc,_Z3bazv,pl
+
+; RUN: llvm-dis -o - %t.index.bc | FileCheck %s --check-prefix=COMBINEDDIS
+; COMBINEDDIS: gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^3, clones: (0), stackIds: (2732490490862098848))))))
+; COMBINEDDIS: gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^5, clones: (0), stackIds: (12481870273128938184))))))
+; COMBINEDDIS: gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438))))))
+; COMBINEDDIS: gv: (guid: 17377440600225628772, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438))))))))
+
+; RUN: llvm-bcanalyzer -dump %t.index.bc | FileCheck %s --check-prefix=COMBINEDBCAN
+; COMBINEDBCAN: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178 op2=-5964873800580613432 op3=2732490490862098848/>
+
+; RUN: llvm-dis -o - %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISA
+; DISTRIBUTEDDISA: gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: null, clones: (0), stackIds: (2732490490862098848))))))
+; DISTRIBUTEDDISA: gv: (guid: 15822663052811949562, {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (8632435727821051414)), (callee: ^2, clones: (0), stackIds: (15025054523792398438))))))
+
+; RUN: llvm-dis -o - %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDDISB
+; DISTRIBUTEDDISB: gv: (guid: 9191153033785521275, {{.*}} callsites: ((callee: ^2, clones: (0), stackIds: (2732490490862098848))))))
+; DISTRIBUTEDDISB: gv: (guid: 15176620447596392000, {{.*}} callsites: ((callee: ^3, clones: (0), stackIds: (12481870273128938184))))))
+; DISTRIBUTEDDISB: gv: (guid: 17377440600225628772, {{.*}} allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (12481870273128938184, 2732490490862098848, 8632435727821051414)), (type: cold, stackIds: (12481870273128938184, 2732490490862098848, 15025054523792398438))))))))
+
+; RUN: llvm-bcanalyzer -dump %ta.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANA
+; DISTRIBUTEDBCANA: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178 op2=2732490490862098848/>
+
+; RUN: llvm-bcanalyzer -dump %tb.bc.thinlto.bc | FileCheck %s --check-prefix=DISTRIBUTEDBCANB
+; DISTRIBUTEDBCANB: <STACK_IDS abbrevid=4 op0=8632435727821051414 op1=-3421689549917153178 op2=-5964873800580613432 op3=2732490490862098848/>
+
+;--- a.ll
+; ModuleID = 'a.cc'
+source_filename = "a.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: mustprogress norecurse uwtable
+define dso_local noundef i32 @main(i32 noundef %argc, ptr nocapture noundef readnone %argv) local_unnamed_addr #0 !dbg !39 {
+entry:
+ %call = call noundef ptr @_Z3foov(), !dbg !42, !callsite !43
+ %call1 = call noundef ptr @_Z3foov(), !dbg !44, !callsite !45
+ call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call, i8 0, i64 10, i1 false), !dbg !46
+ call void @llvm.memset.p0.i64(ptr noundef nonnull align 1 dereferenceable(10) %call1, i8 0, i64 10, i1 false), !dbg !47
+ call void @free(ptr noundef %call) #4, !dbg !48
+ %call2 = call i32 @sleep(i32 noundef 10), !dbg !49
+ call void @free(ptr noundef %call1) #4, !dbg !50
+ ret i32 0, !dbg !51
+}
+
+declare !dbg !52 noundef ptr @_Z3foov() local_unnamed_addr #1
+
+; Function Attrs: argmemonly mustprogress nocallback nofree nounwind willreturn writeonly
+declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1 immarg) #2
+
+; Function Attrs: inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free")
+declare void @free(ptr allocptr nocapture noundef) local_unnamed_addr #3
+
+declare !dbg !53 i32 @sleep(i32 noundef) local_unnamed_addr #1
+
+attributes #0 = { mustprogress norecurse uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { argmemonly mustprogress nocallback nofree nounwind willreturn writeonly }
+attributes #3 = { inaccessiblemem_or_argmemonly mustprogress nounwind willreturn allockind("free") "alloc-family"="malloc" "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git at github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "a.cc", directory: ".", checksumkind: CSK_MD5, checksum: "ebabd56909271a1d4a7cac81c10624d5")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{i32 7, !"frame-pointer", i32 2}
+!39 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
+!40 = !DISubroutineType(types: !41)
+!41 = !{}
+!42 = !DILocation(line: 6, column: 13, scope: !39)
+!43 = !{i64 8632435727821051414}
+!44 = !DILocation(line: 7, column: 13, scope: !39)
+!45 = !{i64 -3421689549917153178}
+!46 = !DILocation(line: 8, column: 3, scope: !39)
+!47 = !DILocation(line: 9, column: 3, scope: !39)
+!48 = !DILocation(line: 10, column: 3, scope: !39)
+!49 = !DILocation(line: 11, column: 3, scope: !39)
+!50 = !DILocation(line: 12, column: 3, scope: !39)
+!51 = !DILocation(line: 13, column: 3, scope: !39)
+!52 = !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 4, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41)
+!53 = !DISubprogram(name: "sleep", scope: !54, file: !54, line: 453, type: !40, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !41)
+!54 = !DIFile(filename: "include/unistd.h", directory: "/usr", checksumkind: CSK_MD5, checksum: "ee8f41a17f563f029d0e930ad871815a")
+
+;--- b.ll
+; ModuleID = 'b.cc'
+source_filename = "b.cc"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: mustprogress noinline uwtable
+define dso_local noalias noundef nonnull ptr @_Z3barv() local_unnamed_addr #0 !dbg !39 {
+entry:
+ %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10) #2, !dbg !42, !memprof !43, !callsite !48
+ ret ptr %call, !dbg !49
+}
+
+; Function Attrs: nobuiltin allocsize(0)
+declare noundef nonnull ptr @_Znam(i64 noundef) local_unnamed_addr #1
+
+; Function Attrs: mustprogress noinline uwtable
+define dso_local noalias noundef nonnull ptr @_Z3bazv() local_unnamed_addr #0 !dbg !50 {
+entry:
+ %call = call noundef ptr @_Z3barv(), !dbg !51, !callsite !52
+ ret ptr %call, !dbg !53
+}
+
+; Function Attrs: mustprogress uwtable
+define dso_local noalias noundef nonnull ptr @_Z3foov() local_unnamed_addr #3 !dbg !54 {
+entry:
+ %call = call noundef ptr @_Z3bazv(), !dbg !55, !callsite !56
+ ret ptr %call, !dbg !57
+}
+
+attributes #0 = { mustprogress noinline uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { nobuiltin allocsize(0) "disable-tail-calls"="true" "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { builtin allocsize(0) }
+attributes #3 = { mustprogress uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !1, producer: "clang version 16.0.0 (git at github.com:llvm/llvm-project.git ffecb643ee2c49e55e0689339b6d5921b5e6ff8b)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "b.cc", directory: ".", checksumkind: CSK_MD5, checksum: "335f81d275af57725cfc9ffc7be49bc2")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{i32 7, !"frame-pointer", i32 2}
+!39 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !1, file: !1, line: 1, type: !40, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
+!40 = !DISubroutineType(types: !41)
+!41 = !{}
+!42 = !DILocation(line: 2, column: 10, scope: !39)
+!43 = !{!44, !46}
+!44 = !{!45, !"notcold"}
+!45 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
+!46 = !{!47, !"cold"}
+!47 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
+!48 = !{i64 9086428284934609951}
+!49 = !DILocation(line: 2, column: 3, scope: !39)
+!50 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 5, type: !40, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
+!51 = !DILocation(line: 6, column: 10, scope: !50)
+!52 = !{i64 -5964873800580613432}
+!53 = !DILocation(line: 6, column: 3, scope: !50)
+!54 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 9, type: !40, scopeLine: 9, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !41)
+!55 = !DILocation(line: 10, column: 10, scope: !54)
+!56 = !{i64 2732490490862098848}
+!57 = !DILocation(line: 10, column: 3, scope: !54)
diff --git a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
index a3549aca81e51..be3b04175d5ac 100644
--- a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
+++ b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
@@ -11,6 +11,7 @@
#include "llvm/IR/Instructions.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
+#include "llvm/IR/ModuleSummaryIndex.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/SourceMgr.h"
#include "gtest/gtest.h"
@@ -34,6 +35,15 @@ class MemoryProfileInfoTest : public testing::Test {
return Mod;
}
+ std::unique_ptr<ModuleSummaryIndex> makeLLVMIndex(const char *Summary) {
+ SMDiagnostic Err;
+ std::unique_ptr<ModuleSummaryIndex> Index =
+ parseSummaryIndexAssemblyString(Summary, Err);
+ if (!Index)
+ Err.print("MemoryProfileInfoTest", errs());
+ return Index;
+ }
+
// This looks for a call that has the given value name, which
// is the name of the value being assigned the call return value.
CallBase *findCall(Function &F, const char *Name = nullptr) {
@@ -359,4 +369,99 @@ declare dso_local noalias noundef i8* @malloc(i64 noundef)
}
}
+TEST_F(MemoryProfileInfoTest, CallStackTestIR) {
+ LLVMContext C;
+ std::unique_ptr<Module> M = makeLLVMModule(C,
+ R"IR(
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+define ptr @test() {
+entry:
+ %call = call noalias noundef nonnull dereferenceable(10) ptr @_Znam(i64 noundef 10), !memprof !1, !callsite !6
+ ret ptr %call
+}
+declare noundef nonnull ptr @_Znam(i64 noundef)
+!1 = !{!2, !4}
+!2 = !{!3, !"notcold"}
+!3 = !{i64 1, i64 2, i64 3, i64 4}
+!4 = !{!5, !"cold"}
+!5 = !{i64 1, i64 2, i64 3, i64 5}
+!6 = !{i64 1}
+)IR");
+
+ Function *Func = M->getFunction("test");
+ CallBase *Call = findCall(*Func, "call");
+
+ CallStack<MDNode, MDNode::op_iterator> InstCallsite(
+ Call->getMetadata(LLVMContext::MD_callsite));
+
+ MDNode *MemProfMD = Call->getMetadata(LLVMContext::MD_memprof);
+ bool First = true;
+ for (auto &MIBOp : MemProfMD->operands()) {
+ auto *MIBMD = cast<const MDNode>(MIBOp);
+ MDNode *StackNode = getMIBStackNode(MIBMD);
+ CallStack<MDNode, MDNode::op_iterator> StackContext(StackNode);
+ std::vector<uint64_t> StackIds;
+ for (auto ContextIter = StackContext.beginAfterSharedPrefix(InstCallsite);
+ ContextIter != StackContext.end(); ++ContextIter)
+ StackIds.push_back(*ContextIter);
+ if (First) {
+ std::vector<uint64_t> Expected = {2, 3, 4};
+ EXPECT_EQ(makeArrayRef(StackIds), makeArrayRef(Expected));
+ } else {
+ std::vector<uint64_t> Expected = {2, 3, 5};
+ EXPECT_EQ(makeArrayRef(StackIds), makeArrayRef(Expected));
+ }
+ First = false;
+ }
+}
+
+TEST_F(MemoryProfileInfoTest, CallStackTestSummary) {
+ std::unique_ptr<ModuleSummaryIndex> Index = makeLLVMIndex(R"Summary(
+^0 = module: (path: "test.o", hash: (0, 0, 0, 0, 0))
+^1 = gv: (guid: 23, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 2, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 0, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), allocs: ((versions: (none), memProf: ((type: notcold, stackIds: (1, 2, 3, 4)), (type: cold, stackIds: (1, 2, 3, 5))))))))
+^2 = gv: (guid: 25, summaries: (function: (module: ^0, flags: (linkage: external, visibility: default, notEligibleToImport: 0, live: 0, dsoLocal: 1, canAutoHide: 0), insts: 22, funcFlags: (readNone: 0, readOnly: 0, noRecurse: 1, returnDoesNotAlias: 0, noInline: 1, alwaysInline: 0, noUnwind: 0, mayThrow: 0, hasUnknownCall: 0, mustBeUnreachable: 0), calls: ((callee: ^1)), callsites: ((callee: ^1, clones: (0), stackIds: (3, 4)), (callee: ^1, clones: (0), stackIds: (3, 5))))))
+)Summary");
+
+ ASSERT_NE(Index, nullptr);
+ auto *CallsiteSummary =
+ cast<FunctionSummary>(Index->getGlobalValueSummary(/*guid=*/25));
+ bool First = true;
+ for (auto &CI : CallsiteSummary->callsites()) {
+ CallStack<CallsiteInfo, SmallVector<unsigned>::const_iterator> InstCallsite(
+ &CI);
+ std::vector<uint64_t> StackIds;
+ for (auto StackIdIndex : InstCallsite)
+ StackIds.push_back(Index->getStackIdAtIndex(StackIdIndex));
+ if (First) {
+ std::vector<uint64_t> Expected = {3, 4};
+ EXPECT_EQ(makeArrayRef(StackIds), makeArrayRef(Expected));
+ } else {
+ std::vector<uint64_t> Expected = {3, 5};
+ EXPECT_EQ(makeArrayRef(StackIds), makeArrayRef(Expected));
+ }
+ First = false;
+ }
+
+ auto *AllocSummary =
+ cast<FunctionSummary>(Index->getGlobalValueSummary(/*guid=*/23));
+ for (auto &AI : AllocSummary->allocs()) {
+ bool First = true;
+ for (auto &MIB : AI.MIBs) {
+ CallStack<MIBInfo, SmallVector<unsigned>::const_iterator> StackContext(
+ &MIB);
+ std::vector<uint64_t> StackIds;
+ for (auto StackIdIndex : StackContext)
+ StackIds.push_back(Index->getStackIdAtIndex(StackIdIndex));
+ if (First) {
+ std::vector<uint64_t> Expected = {1, 2, 3, 4};
+ EXPECT_EQ(makeArrayRef(StackIds), makeArrayRef(Expected));
+ } else {
+ std::vector<uint64_t> Expected = {1, 2, 3, 5};
+ EXPECT_EQ(makeArrayRef(StackIds), makeArrayRef(Expected));
+ }
+ First = false;
+ }
+ }
+}
} // end anonymous namespace
More information about the llvm-commits
mailing list