[llvm-branch-commits] [llvm] [InstrProf] Add vtables with type metadata into symtab to look it up with GUID (PR #81051)
Mingming Liu via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Tue Feb 13 13:00:00 PST 2024
https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/81051
>From 66dbbfef52bdc092cbd4ed619bba38c003f6063d Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 8 Feb 2024 09:07:27 -0800
Subject: [PATCH 1/2] [InstrProf] Add vtables with type metadata into symtab to
look it up with GUID
---
llvm/include/llvm/ProfileData/InstrProf.h | 19 +++++
llvm/lib/ProfileData/InstrProf.cpp | 87 ++++++++++++++------
llvm/unittests/ProfileData/InstrProfTest.cpp | 55 +++++++++++++
3 files changed, 138 insertions(+), 23 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 53108a093bf4dd..6e799cf8aa273e 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -487,8 +487,25 @@ class InstrProfSymtab {
return "** External Symbol **";
}
+ // Returns the canonical name of the given PGOName by stripping the names
+ // suffixes that begins with ".". If MayHaveUniqueSuffix is true, ".__uniq."
+ // suffix is kept in the canonical name.
+ StringRef getCanonicalName(StringRef PGOName, bool MayHaveUniqueSuffix);
+
+ // Add the function into the symbol table, by creating the following
+ // map entries:
+ // - <MD5Hash(PGOFuncName), PGOFuncName>
+ // - <MD5Hash(PGOFuncName), F>
+ // - <MD5Hash(getCanonicalName(PGOFuncName), F)
Error addFuncWithName(Function &F, StringRef PGOFuncName);
+ // Add the vtable into the symbol table, by creating the following
+ // map entries:
+ // - <MD5Hash(PGOVTableName), PGOVTableName>
+ // - <MD5Hash(PGOVTableName), V>
+ // - <MD5Hash(getCanonicalName(PGOVTableName), V)
+ Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName);
+
// If the symtab is created by a series of calls to \c addFuncName, \c
// finalizeSymtab needs to be called before looking up function names.
// This is required because the underlying map is a vector (for space
@@ -543,6 +560,7 @@ class InstrProfSymtab {
Error create(const FuncNameIterRange &FuncIterRange,
const VTableNameIterRange &VTableIterRange);
+ // Map the MD5 of the symbol name to the name.
Error addSymbolName(StringRef SymbolName) {
if (SymbolName.empty())
return make_error<InstrProfError>(instrprof_error::malformed,
@@ -665,6 +683,7 @@ void InstrProfSymtab::finalizeSymtab() {
if (Sorted)
return;
llvm::sort(MD5NameMap, less_first());
+ llvm::sort(MD5VTableMap, less_first());
llvm::sort(MD5FuncMap, less_first());
llvm::sort(AddrToMD5Map, less_first());
AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 9ebcba10c860ff..a09a2bb0ba77cb 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -480,7 +480,9 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
Types.clear();
G.getMetadata(LLVMContext::MD_type, Types);
if (!Types.empty()) {
- MD5VTableMap.emplace_back(G.getGUID(), &G);
+ if (Error E = addVTableWithName(
+ G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr)))
+ return E;
}
}
Sorted = false;
@@ -488,6 +490,30 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
return Error::success();
}
+Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable,
+ StringRef VTablePGOName) {
+ if (Error E = addVTableName(VTablePGOName))
+ return E;
+
+ MD5VTableMap.emplace_back(GlobalValue::getGUID(VTablePGOName), &VTable);
+
+ // NOTE: `-funique-internal-linkage-names` doesn't uniqufy vtables, so no
+ // need to check ".__uniq."
+
+ // If a local-linkage vtable is promoted to have external linkage in ThinLTO,
+ // it will have `.llvm.` in its name. Use the name before externalization.
+ StringRef CanonicalName =
+ getCanonicalName(VTablePGOName, /* MayHaveUniqueSuffix= */ false);
+ if (CanonicalName != VTablePGOName) {
+ if (Error E = addVTableName(CanonicalName))
+ return E;
+
+ MD5VTableMap.emplace_back(GlobalValue::getGUID(CanonicalName), &VTable);
+ }
+
+ return Error::success();
+}
+
/// \c NameStrings is a string composed of one of more possibly encoded
/// sub-strings. The substrings are separated by 0 or more zero bytes. This
/// method decodes the string and calls `NameCallback` for each substring.
@@ -560,35 +586,50 @@ Error InstrProfSymtab::initVTableNamesFromCompressedStrings(
std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1));
}
-Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
- if (Error E = addFuncName(PGOFuncName))
- return E;
- MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName,
+ bool MayHaveUniqueSuffix) {
+ size_t pos = 0;
// In ThinLTO, local function may have been promoted to global and have
// suffix ".llvm." added to the function name. We need to add the
// stripped function name to the symbol table so that we can find a match
// from profile.
//
- // We may have other suffixes similar as ".llvm." which are needed to
- // be stripped before the matching, but ".__uniq." suffix which is used
- // to differentiate internal linkage functions in different modules
- // should be kept. Now this is the only suffix with the pattern ".xxx"
- // which is kept before matching.
- const std::string UniqSuffix = ".__uniq.";
- auto pos = PGOFuncName.find(UniqSuffix);
- // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
- // search '.' from the beginning.
- if (pos != std::string::npos)
- pos += UniqSuffix.length();
- else
- pos = 0;
- pos = PGOFuncName.find('.', pos);
- if (pos != std::string::npos && pos != 0) {
- StringRef OtherFuncName = PGOFuncName.substr(0, pos);
- if (Error E = addFuncName(OtherFuncName))
+ // ".__uniq." suffix is used to differentiate internal linkage functions in
+ // different modules and should be kept. Now this is the only suffix with the
+ // pattern ".xxx" which is kept before matching, other suffixes similar as
+ // ".llvm." will be stripped.
+ if (MayHaveUniqueSuffix) {
+ const std::string UniqSuffix = ".__uniq.";
+ pos = PGOName.find(UniqSuffix);
+ if (pos != StringRef::npos)
+ pos += UniqSuffix.length();
+ else
+ pos = 0;
+ }
+
+ // Search '.' after ".__uniq." if ".__uniq." exists, otherwise search '.' from
+ // the beginning.
+ pos = PGOName.find('.', pos);
+ if (pos != StringRef::npos && pos != 0)
+ return PGOName.substr(0, pos);
+
+ return PGOName;
+}
+
+Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
+ if (Error E = addFuncName(PGOFuncName))
+ return E;
+ MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+
+ StringRef CanonicalName =
+ getCanonicalName(PGOFuncName, /* MayHaveUniqueSuffix= */ true);
+
+ if (CanonicalName != PGOFuncName) {
+ if (Error E = addFuncName(CanonicalName))
return E;
- MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
+ MD5FuncMap.emplace_back(Function::getGUID(CanonicalName), &F);
}
+
return Error::success();
}
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 4b99195c1b859a..edde544660e454 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -6,6 +6,8 @@
//
//===----------------------------------------------------------------------===//
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
@@ -1605,6 +1607,44 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
Function::Create(FTy, Function::WeakODRLinkage, "Wblah", M.get());
Function::Create(FTy, Function::WeakODRLinkage, "Wbar", M.get());
+ // [ptr, ptr, ptr]
+ ArrayType *VTableArrayType = ArrayType::get(
+ PointerType::get(Ctx, M->getDataLayout().getDefaultGlobalsAddressSpace()),
+ 3);
+ Constant *Int32TyNull =
+ llvm::ConstantExpr::getNullValue(PointerType::getUnqual(Ctx));
+ SmallVector<llvm::Type *, 1> tys = {VTableArrayType};
+ StructType *VTableType = llvm::StructType::get(Ctx, tys);
+
+ // Create a vtable definition with external linkage.
+ GlobalVariable *ExternalGV = new llvm::GlobalVariable(
+ *M, VTableType, /* isConstant= */ true,
+ llvm::GlobalValue::ExternalLinkage,
+ llvm::ConstantStruct::get(
+ VTableType, {llvm::ConstantArray::get(
+ VTableArrayType,
+ {Int32TyNull, Int32TyNull,
+ Function::Create(FTy, Function::ExternalLinkage,
+ "VFuncInExternalGV", M.get())})}),
+ "ExternalGV");
+
+ // Create a vtable definition for local-linkage function.
+ GlobalVariable *LocalGV = new llvm::GlobalVariable(
+ *M, VTableType, /* isConstant= */ true,
+ llvm::GlobalValue::InternalLinkage,
+ llvm::ConstantStruct::get(
+ VTableType,
+ {llvm::ConstantArray::get(
+ VTableArrayType, {Int32TyNull, Int32TyNull,
+ Function::Create(FTy, Function::ExternalLinkage,
+ "VFuncInLocalGV", M.get())})}),
+ "LocalGV");
+
+ // Add type metadata for the test data, since vtables with type metadata are
+ // added to symtab.
+ ExternalGV->addTypeMetadata(16, MDString::get(Ctx, "ExternalGV"));
+ LocalGV->addTypeMetadata(16, MDString::get(Ctx, "LocalGV"));
+
InstrProfSymtab ProfSymtab;
EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded());
@@ -1626,6 +1666,21 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
EXPECT_EQ(StringRef(PGOName), PGOFuncName);
EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str()));
}
+
+ StringRef VTables[] = {"ExternalGV", "LocalGV"};
+ for (StringRef VTableName : VTables) {
+ GlobalVariable *GV =
+ M->getGlobalVariable(VTableName, /* AllowInternal=*/true);
+
+ // Test that ProfSymtab returns the expected name given a hash.
+ std::string IRPGOName = getPGOName(*GV);
+ uint64_t GUID = IndexedInstrProf::ComputeHash(IRPGOName);
+ EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName(GUID));
+ EXPECT_EQ(VTableName, getParsedIRPGOName(IRPGOName).second);
+
+ // Test that ProfSymtab returns the expected global variable
+ EXPECT_EQ(GV, ProfSymtab.getGlobalVariable(GUID));
+ }
}
// Testing symtab serialization and creator/deserialization interface
>From 003319d8d1f791dbe0c34bf82f0043f71b330d68 Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Tue, 13 Feb 2024 12:02:54 -0800
Subject: [PATCH 2/2] Use 'InstrProf::addSymbolName' to add vtables and
simplify unit test
---
llvm/include/llvm/ProfileData/InstrProf.h | 7 +--
llvm/lib/ProfileData/InstrProf.cpp | 7 ++-
llvm/unittests/ProfileData/InstrProfTest.cpp | 50 ++++++++------------
3 files changed, 30 insertions(+), 34 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index c18ffa9a5a5116..ca2260e52ff069 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -502,9 +502,10 @@ class InstrProfSymtab {
// Add the vtable into the symbol table, by creating the following
// map entries:
- // - <MD5Hash(PGOVTableName), PGOVTableName>
- // - <MD5Hash(PGOVTableName), V>
- // - <MD5Hash(getCanonicalName(PGOVTableName), V)
+ // name-set = {PGOName} + {getCanonicalName(PGOName)} if the canonical name
+ // is different from pgo name.
+ // - In MD5NameMap: <MD5Hash(name), name> for name in name-set
+ // - In MD5VTableMap: <MD5Hash(name), name> for name in name-set
Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName);
// If the symtab is created by a series of calls to \c addFuncName, \c
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 4832370ab41f15..4fe4abeaf9041b 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -492,9 +492,12 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable,
StringRef VTablePGOName) {
-
+
auto mapName = [&](StringRef Name) -> Error {
- if (Error E = addVTableName(Name))
+ // Use 'addSymbolName' rather than 'addVTableName' as 'VTableNames' is
+ // needed by in InstrProfWriter from llvm-profdata, but this function is
+ // called by compiler and tools with LLVM IR.
+ if (Error E = addSymbolName(Name))
return E;
MD5VTableMap.emplace_back(GlobalValue::getGUID(Name), &VTable);
return Error::success();
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index edde544660e454..775a10a1f14005 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -1616,34 +1616,24 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
SmallVector<llvm::Type *, 1> tys = {VTableArrayType};
StructType *VTableType = llvm::StructType::get(Ctx, tys);
- // Create a vtable definition with external linkage.
- GlobalVariable *ExternalGV = new llvm::GlobalVariable(
- *M, VTableType, /* isConstant= */ true,
- llvm::GlobalValue::ExternalLinkage,
- llvm::ConstantStruct::get(
- VTableType, {llvm::ConstantArray::get(
- VTableArrayType,
- {Int32TyNull, Int32TyNull,
- Function::Create(FTy, Function::ExternalLinkage,
- "VFuncInExternalGV", M.get())})}),
- "ExternalGV");
-
- // Create a vtable definition for local-linkage function.
- GlobalVariable *LocalGV = new llvm::GlobalVariable(
- *M, VTableType, /* isConstant= */ true,
- llvm::GlobalValue::InternalLinkage,
- llvm::ConstantStruct::get(
- VTableType,
- {llvm::ConstantArray::get(
- VTableArrayType, {Int32TyNull, Int32TyNull,
- Function::Create(FTy, Function::ExternalLinkage,
- "VFuncInLocalGV", M.get())})}),
- "LocalGV");
-
- // Add type metadata for the test data, since vtables with type metadata are
- // added to symtab.
- ExternalGV->addTypeMetadata(16, MDString::get(Ctx, "ExternalGV"));
- LocalGV->addTypeMetadata(16, MDString::get(Ctx, "LocalGV"));
+ // Create two vtables in the module, one with external linkage and the other
+ // with local linkage.
+ for (auto [Name, Linkage] :
+ {std::pair{"ExternalGV", GlobalValue::ExternalLinkage},
+ {"LocalGV", GlobalValue::InternalLinkage}}) {
+ llvm::Twine FuncName(Name, StringRef("VFunc"));
+ Function *VFunc = Function::Create(FTy, Linkage, FuncName, M.get());
+ GlobalVariable *GV = new llvm::GlobalVariable(
+ *M, VTableType, /* isConstant= */ true, Linkage,
+ llvm::ConstantStruct::get(
+ VTableType,
+ {llvm::ConstantArray::get(VTableArrayType,
+ {Int32TyNull, Int32TyNull, VFunc})}),
+ Name);
+ // Add type metadata for the test data, since vtables with type metadata
+ // are added to symtab.
+ GV->addTypeMetadata(16, MDString::get(Ctx, Name));
+ }
InstrProfSymtab ProfSymtab;
EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded());
@@ -1668,12 +1658,14 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
}
StringRef VTables[] = {"ExternalGV", "LocalGV"};
- for (StringRef VTableName : VTables) {
+ for (auto [VTableName, PGOName] : {std::pair{"ExternalGV", "ExternalGV"},
+ {"LocalGV", "MyModule.cpp;LocalGV"}}) {
GlobalVariable *GV =
M->getGlobalVariable(VTableName, /* AllowInternal=*/true);
// Test that ProfSymtab returns the expected name given a hash.
std::string IRPGOName = getPGOName(*GV);
+ EXPECT_STREQ(IRPGOName.c_str(), PGOName);
uint64_t GUID = IndexedInstrProf::ComputeHash(IRPGOName);
EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName(GUID));
EXPECT_EQ(VTableName, getParsedIRPGOName(IRPGOName).second);
More information about the llvm-branch-commits
mailing list