[llvm-branch-commits] [InstrProf] Add vtables with type metadata into symtab to look it up with GUID (PR #81051)

via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Wed Feb 7 15:46:34 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-pgo

Author: Mingming Liu (minglotus-6)

<details>
<summary>Changes</summary>

* The parent patch is https://github.com/llvm/llvm-project/pull/81024

---
Full diff: https://github.com/llvm/llvm-project/pull/81051.diff


3 Files Affected:

- (modified) llvm/include/llvm/ProfileData/InstrProf.h (+28) 
- (modified) llvm/lib/ProfileData/InstrProf.cpp (+79-41) 
- (modified) llvm/unittests/ProfileData/InstrProfTest.cpp (+47) 


``````````diff
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 6cdceae5eeb96..d5d3c7f6b4b34 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -198,6 +198,15 @@ std::string getPGOFuncName(StringRef RawFuncName,
 /// called from LTO optimization passes.
 std::string getIRPGOFuncName(const Function &F, bool InLTO = false);
 
+/// Returns the PGO object name. This function has some special handling
+/// when called in LTO optimization:
+/// - In LTO mode, returns the name in `PGONameMetadata` if exists, otherwise
+/// returns the IRPGO name as if the GO has non-local linkage.
+/// - In non-LTO mode, returns the IRPGO name as it is.
+/// More rationale in the comment of function implementation.
+std::string getIRPGOObjectName(const GlobalObject &GO, bool InLTO = false,
+                               MDNode *PGONameMetadata = nullptr);
+
 /// \return the filename and the function name parsed from the output of
 /// \c getIRPGOFuncName()
 std::pair<StringRef, StringRef> getParsedIRPGOFuncName(StringRef IRPGOFuncName);
@@ -487,8 +496,25 @@ class InstrProfSymtab {
     return "** External Symbol **";
   }
 
+  // Returns the canonical name of the given PGOName by stripping the names
+  // suffixes that begins with ".". If MayHaveUniqueSuffix is true, ".__uniq."
+  // suffix is kept in the canonical name.
+  StringRef getCanonicalName(StringRef PGOName, bool MayHaveUniqueSuffix);
+
+  // Add the function into the symbol table, by creating the following
+  // map entries:
+  // - <MD5Hash(PGOFuncName), PGOFuncName>
+  // - <MD5Hash(PGOFuncName), F>
+  // - <MD5Hash(getCanonicalName(PGOFuncName), F)
   Error addFuncWithName(Function &F, StringRef PGOFuncName);
 
+  // Add the vtable into the symbol table, by creating the following
+  // map entries:
+  // - <MD5Hash(PGOVTableName), PGOVTableName>
+  // - <MD5Hash(PGOVTableName), V>
+  // - <MD5Hash(getCanonicalName(PGOVTableName), V)
+  Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName);
+
   // If the symtab is created by a series of calls to \c addFuncName, \c
   // finalizeSymtab needs to be called before looking up function names.
   // This is required because the underlying map is a vector (for space
@@ -543,6 +569,7 @@ class InstrProfSymtab {
   Error create(const FuncNameIterRange &FuncIterRange,
                const VTableNameIterRange &VTableIterRange);
 
+  // Map the MD5 of the symbol name to the name.
   Error addSymbolName(StringRef SymbolName) {
     if (SymbolName.empty())
       return make_error<InstrProfError>(instrprof_error::malformed,
@@ -665,6 +692,7 @@ void InstrProfSymtab::finalizeSymtab() {
   if (Sorted)
     return;
   llvm::sort(MD5NameMap, less_first());
+  llvm::sort(MD5VTableMap, less_first());
   llvm::sort(MD5FuncMap, less_first());
   llvm::sort(AddrToMD5Map, less_first());
   AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 91e79e8b2e9ad..3b05faf0cc5d3 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -325,21 +325,20 @@ static std::optional<std::string> lookupPGONameFromMetadata(MDNode *MD) {
   return {};
 }
 
-// Returns the PGO object name. This function has some special handling
-// when called in LTO optimization. The following only applies when calling in
-// LTO passes (when \c InLTO is true): LTO's internalization privatizes many
-// global linkage symbols. This happens after value profile annotation, but
-// those internal linkage functions should not have a source prefix.
-// Additionally, for ThinLTO mode, exported internal functions are promoted
-// and renamed. We need to ensure that the original internal PGO name is
-// used when computing the GUID that is compared against the profiled GUIDs.
-// To differentiate compiler generated internal symbols from original ones,
-// PGOFuncName meta data are created and attached to the original internal
-// symbols in the value profile annotation step
-// (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta
-// data, its original linkage must be non-internal.
-static std::string getIRPGOObjectName(const GlobalObject &GO, bool InLTO,
-                                      MDNode *PGONameMetadata) {
+std::string getIRPGOObjectName(const GlobalObject &GO, bool InLTO,
+                               MDNode *PGONameMetadata) {
+  // The following only applies when calling in
+  // LTO passes (when \c InLTO is true): LTO's internalization privatizes many
+  // global linkage symbols. This happens after value profile annotation, but
+  // those internal linkage functions should not have a source prefix.
+  // Additionally, for ThinLTO mode, exported internal functions are promoted
+  // and renamed. We need to ensure that the original internal PGO name is
+  // used when computing the GUID that is compared against the profiled GUIDs.
+  // To differentiate compiler generated internal symbols from original ones,
+  // PGOFuncName meta data are created and attached to the original internal
+  // symbols in the value profile annotation step
+  // (PGOUseFunc::annotateIndirectCallSites). If a symbol does not have the meta
+  // data, its original linkage must be non-internal.
   if (!InLTO) {
     auto FileName = getStrippedSourceFileName(GO);
     return getIRPGONameForGlobalObject(GO, GO.getLinkage(), FileName);
@@ -481,7 +480,9 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
     Types.clear();
     G.getMetadata(LLVMContext::MD_type, Types);
     if (!Types.empty()) {
-      MD5VTableMap.emplace_back(G.getGUID(), &G);
+      if (Error E = addVTableWithName(
+              G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr)))
+        return E;
     }
   }
   Sorted = false;
@@ -489,6 +490,30 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
   return Error::success();
 }
 
+Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable,
+                                         StringRef VTablePGOName) {
+  if (Error E = addVTableName(VTablePGOName))
+    return E;
+
+  MD5VTableMap.emplace_back(GlobalValue::getGUID(VTablePGOName), &VTable);
+
+  // NOTE: `-funique-internal-linkage-names` doesn't uniqufy vtables, so no
+  // need to check ".__uniq."
+
+  // If a local-linkage vtable is promoted to have external linkage in ThinLTO,
+  // it will have `.llvm.` in its name. Use the name before externalization.
+  StringRef CanonicalName =
+      getCanonicalName(VTablePGOName, /* MayHaveUniqueSuffix= */ false);
+  if (CanonicalName != VTablePGOName) {
+    if (Error E = addVTableName(CanonicalName))
+      return E;
+
+    MD5VTableMap.emplace_back(GlobalValue::getGUID(CanonicalName), &VTable);
+  }
+
+  return Error::success();
+}
+
 /// \c NameStrings is a string composed of one of more possibly encoded
 /// sub-strings. The substrings are separated by 0 or more zero bytes. This
 /// method decodes the string and calls `NameCallback` for each substring.
@@ -561,35 +586,51 @@ Error InstrProfSymtab::initVTableNamesFromCompressedStrings(
       std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1));
 }
 
-Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
-  if (Error E = addFuncName(PGOFuncName))
-    return E;
-  MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName,
+                                            bool MayHaveUniqueSuffix) {
+  size_t pos = 0;
   // In ThinLTO, local function may have been promoted to global and have
   // suffix ".llvm." added to the function name. We need to add the
   // stripped function name to the symbol table so that we can find a match
   // from profile.
   //
-  // We may have other suffixes similar as ".llvm." which are needed to
-  // be stripped before the matching, but ".__uniq." suffix which is used
-  // to differentiate internal linkage functions in different modules
-  // should be kept. Now this is the only suffix with the pattern ".xxx"
-  // which is kept before matching.
-  const std::string UniqSuffix = ".__uniq.";
-  auto pos = PGOFuncName.find(UniqSuffix);
+  // ".__uniq." suffix is used  to differentiate internal linkage functions in
+  // different modules and should be kept. Now this is the only suffix with the
+  // pattern ".xxx" which is kept before matching. In other words,  other
+  // suffixes similar as
+  // ".llvm." will be stripped.
+  if (MayHaveUniqueSuffix) {
+    const std::string UniqSuffix = ".__uniq.";
+    pos = PGOName.find(UniqSuffix);
+    if (pos != StringRef::npos)
+      pos += UniqSuffix.length();
+    else
+      pos = 0;
+  }
+
   // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
   // search '.' from the beginning.
-  if (pos != std::string::npos)
-    pos += UniqSuffix.length();
-  else
-    pos = 0;
-  pos = PGOFuncName.find('.', pos);
-  if (pos != std::string::npos && pos != 0) {
-    StringRef OtherFuncName = PGOFuncName.substr(0, pos);
-    if (Error E = addFuncName(OtherFuncName))
+  pos = PGOName.find('.', pos);
+  if (pos != StringRef::npos && pos != 0)
+    return PGOName.substr(0, pos);
+
+  return PGOName;
+}
+
+Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
+  if (Error E = addFuncName(PGOFuncName))
+    return E;
+  MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+
+  StringRef CanonicalName =
+      getCanonicalName(PGOFuncName, /* MayHaveUniqueSuffix= */ true);
+
+  if (CanonicalName != PGOFuncName) {
+    if (Error E = addFuncName(CanonicalName))
       return E;
-    MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
+    MD5FuncMap.emplace_back(Function::getGUID(CanonicalName), &F);
   }
+
   return Error::success();
 }
 
@@ -1257,11 +1298,8 @@ void annotateValueSite(Module &M, Instruction &Inst,
 }
 
 void annotateValueSite(Module &M, Instruction &Inst,
-                       ArrayRef<InstrProfValueData> VDs,
-                       uint64_t Sum, InstrProfValueKind ValueKind,
-                       uint32_t MaxMDCount) {
-  if (VDs.empty())
-    return;
+                       ArrayRef<InstrProfValueData> VDs, uint64_t Sum,
+                       InstrProfValueKind ValueKind, uint32_t MaxMDCount) {
   LLVMContext &Ctx = M.getContext();
   MDBuilder MDHelper(Ctx);
   SmallVector<Metadata *, 3> Vals;
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index b007a374c2cf2..f0281bdf1525d 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
@@ -1606,6 +1607,41 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
   Function::Create(FTy, Function::WeakODRLinkage, "Wblah", M.get());
   Function::Create(FTy, Function::WeakODRLinkage, "Wbar", M.get());
 
+  ArrayType *VTableArrayType = ArrayType::get(
+      PointerType::get(Ctx, M->getDataLayout().getDefaultGlobalsAddressSpace()),
+      3);
+  Constant *Int32TyNull =
+      llvm::ConstantExpr::getNullValue(PointerType::getUnqual(Ctx));
+  SmallVector<llvm::Type *, 1> tys;
+  tys.push_back(VTableArrayType);
+  StructType *VTableType = llvm::StructType::get(Ctx, tys);
+
+  GlobalVariable *ExternalGV = new llvm::GlobalVariable(
+      *M, VTableType, /* isConstant= */ true,
+      llvm::GlobalValue::ExternalLinkage,
+      llvm::ConstantStruct::get(
+          VTableType, {llvm::ConstantArray::get(
+                          VTableArrayType,
+                          {Int32TyNull, Int32TyNull,
+                           Function::Create(FTy, Function::ExternalLinkage,
+                                            "VFuncInExternalGV", M.get())})}),
+      "ExternalGV");
+
+  GlobalVariable *PrivateGV = new llvm::GlobalVariable(
+      *M, VTableType, /* isConstant= */ true,
+      llvm::GlobalValue::InternalLinkage,
+      llvm::ConstantStruct::get(
+          VTableType, {llvm::ConstantArray::get(
+                          VTableArrayType,
+                          {Int32TyNull, Int32TyNull,
+                           Function::Create(FTy, Function::ExternalLinkage,
+                                            "VFuncInPrivateGV", M.get())})}),
+      "PrivateGV");
+
+  // Only vtables with type metadata are added to symtab.
+  ExternalGV->addTypeMetadata(16, MDString::get(Ctx, "ExternalGV"));
+  PrivateGV->addTypeMetadata(16, MDString::get(Ctx, "PrivateGV"));
+
   InstrProfSymtab ProfSymtab;
   EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded());
 
@@ -1628,6 +1664,17 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
     EXPECT_EQ(StringRef(PGOName), PGOFuncName);
     EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str()));
   }
+
+  StringRef VTables[] = {"ExternalGV", "PrivateGV"};
+  for (size_t I = 0; I < std::size(VTables); I++) {
+    GlobalVariable *GV =
+        M->getGlobalVariable(VTables[I], /* AllowInternal=*/true);
+
+    std::string IRPGOName = getIRPGOObjectName(*GV);
+    EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName(
+                             IndexedInstrProf::ComputeHash(IRPGOName)));
+    EXPECT_EQ(VTables[I], getParsedIRPGOFuncName(IRPGOName).second);
+  }
 }
 
 // Testing symtab serialization and creator/deserialization interface

``````````

</details>


https://github.com/llvm/llvm-project/pull/81051


More information about the llvm-branch-commits mailing list