[llvm-branch-commits] [InstrProf] Add vtables with type metadata into symtab to look it up with GUID (PR #81051)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Feb 7 15:46:34 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-pgo
Author: Mingming Liu (minglotus-6)
<details>
<summary>Changes</summary>
* The parent patch is https://github.com/llvm/llvm-project/pull/81024
---
Full diff: https://github.com/llvm/llvm-project/pull/81051.diff
3 Files Affected:
- (modified) llvm/include/llvm/ProfileData/InstrProf.h (+28)
- (modified) llvm/lib/ProfileData/InstrProf.cpp (+79-41)
- (modified) llvm/unittests/ProfileData/InstrProfTest.cpp (+47)
``````````diff
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 6cdceae5eeb96..d5d3c7f6b4b34 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -198,6 +198,15 @@ std::string getPGOFuncName(StringRef RawFuncName,
/// called from LTO optimization passes.
std::string getIRPGOFuncName(const Function &F, bool InLTO = false);
+/// Returns the PGO object name. This function has some special handling
+/// when called in LTO optimization:
+/// - In LTO mode, returns the name in `PGONameMetadata` if exists, otherwise
+/// returns the IRPGO name as if the GO has non-local linkage.
+/// - In non-LTO mode, returns the IRPGO name as it is.
+/// More rationale in the comment of function implementation.
+std::string getIRPGOObjectName(const GlobalObject &GO, bool InLTO = false,
+ MDNode *PGONameMetadata = nullptr);
+
/// \return the filename and the function name parsed from the output of
/// \c getIRPGOFuncName()
std::pair<StringRef, StringRef> getParsedIRPGOFuncName(StringRef IRPGOFuncName);
@@ -487,8 +496,25 @@ class InstrProfSymtab {
return "** External Symbol **";
}
+ // Returns the canonical name of the given PGOName by stripping the names
+ // suffixes that begins with ".". If MayHaveUniqueSuffix is true, ".__uniq."
+ // suffix is kept in the canonical name.
+ StringRef getCanonicalName(StringRef PGOName, bool MayHaveUniqueSuffix);
+
+ // Add the function into the symbol table, by creating the following
+ // map entries:
+ // - <MD5Hash(PGOFuncName), PGOFuncName>
+ // - <MD5Hash(PGOFuncName), F>
+ // - <MD5Hash(getCanonicalName(PGOFuncName), F)
Error addFuncWithName(Function &F, StringRef PGOFuncName);
+ // Add the vtable into the symbol table, by creating the following
+ // map entries:
+ // - <MD5Hash(PGOVTableName), PGOVTableName>
+ // - <MD5Hash(PGOVTableName), V>
+ // - <MD5Hash(getCanonicalName(PGOVTableName), V)
+ Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName);
+
// If the symtab is created by a series of calls to \c addFuncName, \c
// finalizeSymtab needs to be called before looking up function names.
// This is required because the underlying map is a vector (for space
@@ -543,6 +569,7 @@ class InstrProfSymtab {
Error create(const FuncNameIterRange &FuncIterRange,
const VTableNameIterRange &VTableIterRange);
+ // Map the MD5 of the symbol name to the name.
Error addSymbolName(StringRef SymbolName) {
if (SymbolName.empty())
return make_error<InstrProfError>(instrprof_error::malformed,
@@ -665,6 +692,7 @@ void InstrProfSymtab::finalizeSymtab() {
if (Sorted)
return;
llvm::sort(MD5NameMap, less_first());
+ llvm::sort(MD5VTableMap, less_first());
llvm::sort(MD5FuncMap, less_first());
llvm::sort(AddrToMD5Map, less_first());
AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 91e79e8b2e9ad..3b05faf0cc5d3 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -325,21 +325,20 @@ static std::optional<std::string> lookupPGONameFromMetadata(MDNode *MD) {
return {};
}
-// Returns the PGO object name. This function has some special handling
-// when called in LTO optimization. The following only applies when calling in
-// LTO passes (when \c InLTO is true): LTO's internalization privatizes many
-// global linkage symbols. This happens after value profile annotation, but
-// those internal linkage functions should not have a source prefix.
-// Additionally, for ThinLTO mode, exported internal functions are promoted
-// and renamed. We need to ensure that the original internal PGO name is
-// used when computing the GUID that is compared against the profiled GUIDs.
-// To differentiate compiler generated internal symbols from original ones,
-// PGOFuncName meta data are created and attached to the original internal
-// symbols in the value profile annotation step
-// (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta
-// data, its original linkage must be non-internal.
-static std::string getIRPGOObjectName(const GlobalObject &GO, bool InLTO,
- MDNode *PGONameMetadata) {
+std::string getIRPGOObjectName(const GlobalObject &GO, bool InLTO,
+ MDNode *PGONameMetadata) {
+ // The following only applies when calling in
+ // LTO passes (when \c InLTO is true): LTO's internalization privatizes many
+ // global linkage symbols. This happens after value profile annotation, but
+ // those internal linkage functions should not have a source prefix.
+ // Additionally, for ThinLTO mode, exported internal functions are promoted
+ // and renamed. We need to ensure that the original internal PGO name is
+ // used when computing the GUID that is compared against the profiled GUIDs.
+ // To differentiate compiler generated internal symbols from original ones,
+ // PGOFuncName meta data are created and attached to the original internal
+ // symbols in the value profile annotation step
+ // (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta
+ // data, its original linkage must be non-internal.
if (!InLTO) {
auto FileName = getStrippedSourceFileName(GO);
return getIRPGONameForGlobalObject(GO, GO.getLinkage(), FileName);
@@ -481,7 +480,9 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
Types.clear();
G.getMetadata(LLVMContext::MD_type, Types);
if (!Types.empty()) {
- MD5VTableMap.emplace_back(G.getGUID(), &G);
+ if (Error E = addVTableWithName(
+ G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr)))
+ return E;
}
}
Sorted = false;
@@ -489,6 +490,30 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
return Error::success();
}
+Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable,
+ StringRef VTablePGOName) {
+ if (Error E = addVTableName(VTablePGOName))
+ return E;
+
+ MD5VTableMap.emplace_back(GlobalValue::getGUID(VTablePGOName), &VTable);
+
+ // NOTE: `-funique-internal-linkage-names` doesn't uniqufy vtables, so no
+ // need to check ".__uniq."
+
+ // If a local-linkage vtable is promoted to have external linkage in ThinLTO,
+ // it will have `.llvm.` in its name. Use the name before externalization.
+ StringRef CanonicalName =
+ getCanonicalName(VTablePGOName, /* MayHaveUniqueSuffix= */ false);
+ if (CanonicalName != VTablePGOName) {
+ if (Error E = addVTableName(CanonicalName))
+ return E;
+
+ MD5VTableMap.emplace_back(GlobalValue::getGUID(CanonicalName), &VTable);
+ }
+
+ return Error::success();
+}
+
/// \c NameStrings is a string composed of one of more possibly encoded
/// sub-strings. The substrings are separated by 0 or more zero bytes. This
/// method decodes the string and calls `NameCallback` for each substring.
@@ -561,35 +586,51 @@ Error InstrProfSymtab::initVTableNamesFromCompressedStrings(
std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1));
}
-Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
- if (Error E = addFuncName(PGOFuncName))
- return E;
- MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName,
+ bool MayHaveUniqueSuffix) {
+ size_t pos = 0;
// In ThinLTO, local function may have been promoted to global and have
// suffix ".llvm." added to the function name. We need to add the
// stripped function name to the symbol table so that we can find a match
// from profile.
//
- // We may have other suffixes similar as ".llvm." which are needed to
- // be stripped before the matching, but ".__uniq." suffix which is used
- // to differentiate internal linkage functions in different modules
- // should be kept. Now this is the only suffix with the pattern ".xxx"
- // which is kept before matching.
- const std::string UniqSuffix = ".__uniq.";
- auto pos = PGOFuncName.find(UniqSuffix);
+ // ".__uniq." suffix is used to differentiate internal linkage functions in
+ // different modules and should be kept. Now this is the only suffix with the
+ // pattern ".xxx" which is kept before matching. In other words, other
+ // suffixes similar as
+ // ".llvm." will be stripped.
+ if (MayHaveUniqueSuffix) {
+ const std::string UniqSuffix = ".__uniq.";
+ pos = PGOName.find(UniqSuffix);
+ if (pos != StringRef::npos)
+ pos += UniqSuffix.length();
+ else
+ pos = 0;
+ }
+
// Search '.' after ".__uniq." if ".__uniq." exists, otherwise
// search '.' from the beginning.
- if (pos != std::string::npos)
- pos += UniqSuffix.length();
- else
- pos = 0;
- pos = PGOFuncName.find('.', pos);
- if (pos != std::string::npos && pos != 0) {
- StringRef OtherFuncName = PGOFuncName.substr(0, pos);
- if (Error E = addFuncName(OtherFuncName))
+ pos = PGOName.find('.', pos);
+ if (pos != StringRef::npos && pos != 0)
+ return PGOName.substr(0, pos);
+
+ return PGOName;
+}
+
+Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
+ if (Error E = addFuncName(PGOFuncName))
+ return E;
+ MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+
+ StringRef CanonicalName =
+ getCanonicalName(PGOFuncName, /* MayHaveUniqueSuffix= */ true);
+
+ if (CanonicalName != PGOFuncName) {
+ if (Error E = addFuncName(CanonicalName))
return E;
- MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
+ MD5FuncMap.emplace_back(Function::getGUID(CanonicalName), &F);
}
+
return Error::success();
}
@@ -1257,11 +1298,8 @@ void annotateValueSite(Module &M, Instruction &Inst,
}
void annotateValueSite(Module &M, Instruction &Inst,
- ArrayRef<InstrProfValueData> VDs,
- uint64_t Sum, InstrProfValueKind ValueKind,
- uint32_t MaxMDCount) {
- if (VDs.empty())
- return;
+ ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
+ InstrProfValueKind ValueKind, uint32_t MaxMDCount) {
LLVMContext &Ctx = M.getContext();
MDBuilder MDHelper(Ctx);
SmallVector<Metadata *, 3> Vals;
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index b007a374c2cf2..f0281bdf1525d 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
@@ -1606,6 +1607,41 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
Function::Create(FTy, Function::WeakODRLinkage, "Wblah", M.get());
Function::Create(FTy, Function::WeakODRLinkage, "Wbar", M.get());
+ ArrayType *VTableArrayType = ArrayType::get(
+ PointerType::get(Ctx, M->getDataLayout().getDefaultGlobalsAddressSpace()),
+ 3);
+ Constant *Int32TyNull =
+ llvm::ConstantExpr::getNullValue(PointerType::getUnqual(Ctx));
+ SmallVector<llvm::Type *, 1> tys;
+ tys.push_back(VTableArrayType);
+ StructType *VTableType = llvm::StructType::get(Ctx, tys);
+
+ GlobalVariable *ExternalGV = new llvm::GlobalVariable(
+ *M, VTableType, /* isConstant= */ true,
+ llvm::GlobalValue::ExternalLinkage,
+ llvm::ConstantStruct::get(
+ VTableType, {llvm::ConstantArray::get(
+ VTableArrayType,
+ {Int32TyNull, Int32TyNull,
+ Function::Create(FTy, Function::ExternalLinkage,
+ "VFuncInExternalGV", M.get())})}),
+ "ExternalGV");
+
+ GlobalVariable *PrivateGV = new llvm::GlobalVariable(
+ *M, VTableType, /* isConstant= */ true,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::ConstantStruct::get(
+ VTableType, {llvm::ConstantArray::get(
+ VTableArrayType,
+ {Int32TyNull, Int32TyNull,
+ Function::Create(FTy, Function::ExternalLinkage,
+ "VFuncInPrivateGV", M.get())})}),
+ "PrivateGV");
+
+ // Only vtables with type metadata are added to symtab.
+ ExternalGV->addTypeMetadata(16, MDString::get(Ctx, "ExternalGV"));
+ PrivateGV->addTypeMetadata(16, MDString::get(Ctx, "PrivateGV"));
+
InstrProfSymtab ProfSymtab;
EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded());
@@ -1628,6 +1664,17 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
EXPECT_EQ(StringRef(PGOName), PGOFuncName);
EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str()));
}
+
+ StringRef VTables[] = {"ExternalGV", "PrivateGV"};
+ for (size_t I = 0; I < std::size(VTables); I++) {
+ GlobalVariable *GV =
+ M->getGlobalVariable(VTables[I], /* AllowInternal=*/true);
+
+ std::string IRPGOName = getIRPGOObjectName(*GV);
+ EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName(
+ IndexedInstrProf::ComputeHash(IRPGOName)));
+ EXPECT_EQ(VTables[I], getParsedIRPGOFuncName(IRPGOName).second);
+ }
}
// Testing symtab serialization and creator/deserialization interface
``````````
</details>
https://github.com/llvm/llvm-project/pull/81051
More information about the llvm-branch-commits
mailing list