[llvm-branch-commits] [llvm] [InstrProf] Add vtables with type metadata into symtab to look it up with GUID (PR #81051)
Mingming Liu via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Feb 7 23:09:55 PST 2024
https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/81051
>From 58f8fdbc9d2862a2579aece6c313bd19b4329e34 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Wed, 7 Feb 2024 23:07:36 -0800
Subject: [PATCH] [InstrProf] Add vtables with type metadata into symtab to
look it up with GUID
---
llvm/include/llvm/ProfileData/InstrProf.h | 19 ++++
llvm/lib/ProfileData/InstrProf.cpp | 94 ++++++++++++++------
llvm/unittests/ProfileData/InstrProfTest.cpp | 47 ++++++++++
3 files changed, 132 insertions(+), 28 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 53108a093bf4d..6e799cf8aa273 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -487,8 +487,25 @@ class InstrProfSymtab {
return "** External Symbol **";
}
+ // Returns the canonical name of the given PGOName by stripping the names
+ // suffixes that begins with ".". If MayHaveUniqueSuffix is true, ".__uniq."
+ // suffix is kept in the canonical name.
+ StringRef getCanonicalName(StringRef PGOName, bool MayHaveUniqueSuffix);
+
+ // Add the function into the symbol table, by creating the following
+ // map entries:
+ // - <MD5Hash(PGOFuncName), PGOFuncName>
+ // - <MD5Hash(PGOFuncName), F>
+ // - <MD5Hash(getCanonicalName(PGOFuncName), F)
Error addFuncWithName(Function &F, StringRef PGOFuncName);
+ // Add the vtable into the symbol table, by creating the following
+ // map entries:
+ // - <MD5Hash(PGOVTableName), PGOVTableName>
+ // - <MD5Hash(PGOVTableName), V>
+ // - <MD5Hash(getCanonicalName(PGOVTableName), V)
+ Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName);
+
// If the symtab is created by a series of calls to \c addFuncName, \c
// finalizeSymtab needs to be called before looking up function names.
// This is required because the underlying map is a vector (for space
@@ -543,6 +560,7 @@ class InstrProfSymtab {
Error create(const FuncNameIterRange &FuncIterRange,
const VTableNameIterRange &VTableIterRange);
+ // Map the MD5 of the symbol name to the name.
Error addSymbolName(StringRef SymbolName) {
if (SymbolName.empty())
return make_error<InstrProfError>(instrprof_error::malformed,
@@ -665,6 +683,7 @@ void InstrProfSymtab::finalizeSymtab() {
if (Sorted)
return;
llvm::sort(MD5NameMap, less_first());
+ llvm::sort(MD5VTableMap, less_first());
llvm::sort(MD5FuncMap, less_first());
llvm::sort(AddrToMD5Map, less_first());
AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 9ebcba10c860f..edd6bf2c2971d 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -480,7 +480,9 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
Types.clear();
G.getMetadata(LLVMContext::MD_type, Types);
if (!Types.empty()) {
- MD5VTableMap.emplace_back(G.getGUID(), &G);
+ if (Error E = addVTableWithName(
+ G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr)))
+ return E;
}
}
Sorted = false;
@@ -488,6 +490,30 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
return Error::success();
}
+Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable,
+ StringRef VTablePGOName) {
+ if (Error E = addVTableName(VTablePGOName))
+ return E;
+
+ MD5VTableMap.emplace_back(GlobalValue::getGUID(VTablePGOName), &VTable);
+
+ // NOTE: `-funique-internal-linkage-names` doesn't uniqufy vtables, so no
+ // need to check ".__uniq."
+
+ // If a local-linkage vtable is promoted to have external linkage in ThinLTO,
+ // it will have `.llvm.` in its name. Use the name before externalization.
+ StringRef CanonicalName =
+ getCanonicalName(VTablePGOName, /* MayHaveUniqueSuffix= */ false);
+ if (CanonicalName != VTablePGOName) {
+ if (Error E = addVTableName(CanonicalName))
+ return E;
+
+ MD5VTableMap.emplace_back(GlobalValue::getGUID(CanonicalName), &VTable);
+ }
+
+ return Error::success();
+}
+
/// \c NameStrings is a string composed of one of more possibly encoded
/// sub-strings. The substrings are separated by 0 or more zero bytes. This
/// method decodes the string and calls `NameCallback` for each substring.
@@ -560,35 +586,50 @@ Error InstrProfSymtab::initVTableNamesFromCompressedStrings(
std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1));
}
-Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
- if (Error E = addFuncName(PGOFuncName))
- return E;
- MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName,
+ bool MayHaveUniqueSuffix) {
+ size_t pos = 0;
// In ThinLTO, local function may have been promoted to global and have
// suffix ".llvm." added to the function name. We need to add the
// stripped function name to the symbol table so that we can find a match
// from profile.
//
- // We may have other suffixes similar as ".llvm." which are needed to
- // be stripped before the matching, but ".__uniq." suffix which is used
- // to differentiate internal linkage functions in different modules
- // should be kept. Now this is the only suffix with the pattern ".xxx"
- // which is kept before matching.
- const std::string UniqSuffix = ".__uniq.";
- auto pos = PGOFuncName.find(UniqSuffix);
- // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
- // search '.' from the beginning.
- if (pos != std::string::npos)
- pos += UniqSuffix.length();
- else
- pos = 0;
- pos = PGOFuncName.find('.', pos);
- if (pos != std::string::npos && pos != 0) {
- StringRef OtherFuncName = PGOFuncName.substr(0, pos);
- if (Error E = addFuncName(OtherFuncName))
+ // ".__uniq." suffix is used to differentiate internal linkage functions in
+ // different modules and should be kept. Now this is the only suffix with the
+ // pattern ".xxx" which is kept before matching, other suffixes similar as
+ // ".llvm." will be stripped.
+ if (MayHaveUniqueSuffix) {
+ const std::string UniqSuffix = ".__uniq.";
+ pos = PGOName.find(UniqSuffix);
+ if (pos != StringRef::npos)
+ pos += UniqSuffix.length();
+ else
+ pos = 0;
+ }
+
+ // Search '.' after ".__uniq." if ".__uniq." exists, otherwise search '.' from
+ // the beginning.
+ pos = PGOName.find('.', pos);
+ if (pos != StringRef::npos && pos != 0)
+ return PGOName.substr(0, pos);
+
+ return PGOName;
+}
+
+Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
+ if (Error E = addFuncName(PGOFuncName))
+ return E;
+ MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+
+ StringRef CanonicalName =
+ getCanonicalName(PGOFuncName, /* MayHaveUniqueSuffix= */ true);
+
+ if (CanonicalName != PGOFuncName) {
+ if (Error E = addFuncName(CanonicalName))
return E;
- MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
+ MD5FuncMap.emplace_back(Function::getGUID(CanonicalName), &F);
}
+
return Error::success();
}
@@ -1256,11 +1297,8 @@ void annotateValueSite(Module &M, Instruction &Inst,
}
void annotateValueSite(Module &M, Instruction &Inst,
- ArrayRef<InstrProfValueData> VDs,
- uint64_t Sum, InstrProfValueKind ValueKind,
- uint32_t MaxMDCount) {
- if (VDs.empty())
- return;
+ ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
+ InstrProfValueKind ValueKind, uint32_t MaxMDCount) {
LLVMContext &Ctx = M.getContext();
MDBuilder MDHelper(Ctx);
SmallVector<Metadata *, 3> Vals;
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 4b99195c1b859..e858c5e2d1c62 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -6,6 +6,7 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
@@ -1605,6 +1606,41 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
Function::Create(FTy, Function::WeakODRLinkage, "Wblah", M.get());
Function::Create(FTy, Function::WeakODRLinkage, "Wbar", M.get());
+ ArrayType *VTableArrayType = ArrayType::get(
+ PointerType::get(Ctx, M->getDataLayout().getDefaultGlobalsAddressSpace()),
+ 3);
+ Constant *Int32TyNull =
+ llvm::ConstantExpr::getNullValue(PointerType::getUnqual(Ctx));
+ SmallVector<llvm::Type *, 1> tys;
+ tys.push_back(VTableArrayType);
+ StructType *VTableType = llvm::StructType::get(Ctx, tys);
+
+ GlobalVariable *ExternalGV = new llvm::GlobalVariable(
+ *M, VTableType, /* isConstant= */ true,
+ llvm::GlobalValue::ExternalLinkage,
+ llvm::ConstantStruct::get(
+ VTableType, {llvm::ConstantArray::get(
+ VTableArrayType,
+ {Int32TyNull, Int32TyNull,
+ Function::Create(FTy, Function::ExternalLinkage,
+ "VFuncInExternalGV", M.get())})}),
+ "ExternalGV");
+
+ GlobalVariable *PrivateGV = new llvm::GlobalVariable(
+ *M, VTableType, /* isConstant= */ true,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::ConstantStruct::get(
+ VTableType, {llvm::ConstantArray::get(
+ VTableArrayType,
+ {Int32TyNull, Int32TyNull,
+ Function::Create(FTy, Function::ExternalLinkage,
+ "VFuncInPrivateGV", M.get())})}),
+ "PrivateGV");
+
+ // Only vtables with type metadata are added to symtab.
+ ExternalGV->addTypeMetadata(16, MDString::get(Ctx, "ExternalGV"));
+ PrivateGV->addTypeMetadata(16, MDString::get(Ctx, "PrivateGV"));
+
InstrProfSymtab ProfSymtab;
EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded());
@@ -1626,6 +1662,17 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
EXPECT_EQ(StringRef(PGOName), PGOFuncName);
EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str()));
}
+
+ StringRef VTables[] = {"ExternalGV", "PrivateGV"};
+ for (size_t I = 0; I < std::size(VTables); I++) {
+ GlobalVariable *GV =
+ M->getGlobalVariable(VTables[I], /* AllowInternal=*/true);
+
+ std::string IRPGOName = getPGOName(*GV);
+ EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName(
+ IndexedInstrProf::ComputeHash(IRPGOName)));
+ EXPECT_EQ(VTables[I], getParsedIRPGOName(IRPGOName).second);
+ }
}
// Testing symtab serialization and creator/deserialization interface
More information about the llvm-branch-commits
mailing list