[llvm-branch-commits] [llvm] [InstrProf] Add vtables with type metadata into symtab to look it up with GUID (PR #81051)

Mingming Liu via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Feb 13 11:34:58 PST 2024


https://github.com/minglotus-6 updated https://github.com/llvm/llvm-project/pull/81051

>From 66dbbfef52bdc092cbd4ed619bba38c003f6063d Mon Sep 17 00:00:00 2001
From: mingmingl <mingmingl at google.com>
Date: Thu, 8 Feb 2024 09:07:27 -0800
Subject: [PATCH] [InstrProf] Add vtables with type metadata into symtab to
 look it up with GUID

---
 llvm/include/llvm/ProfileData/InstrProf.h    | 19 +++++
 llvm/lib/ProfileData/InstrProf.cpp           | 87 ++++++++++++++------
 llvm/unittests/ProfileData/InstrProfTest.cpp | 55 +++++++++++++
 3 files changed, 138 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 53108a093bf4dd..6e799cf8aa273e 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -487,8 +487,25 @@ class InstrProfSymtab {
     return "** External Symbol **";
   }
 
+  // Returns the canonical name of the given PGOName by stripping the names
+  // suffixes that begins with ".". If MayHaveUniqueSuffix is true, ".__uniq."
+  // suffix is kept in the canonical name.
+  StringRef getCanonicalName(StringRef PGOName, bool MayHaveUniqueSuffix);
+
+  // Add the function into the symbol table, by creating the following
+  // map entries:
+  // - <MD5Hash(PGOFuncName), PGOFuncName>
+  // - <MD5Hash(PGOFuncName), F>
+  // - <MD5Hash(getCanonicalName(PGOFuncName), F)
   Error addFuncWithName(Function &F, StringRef PGOFuncName);
 
+  // Add the vtable into the symbol table, by creating the following
+  // map entries:
+  // - <MD5Hash(PGOVTableName), PGOVTableName>
+  // - <MD5Hash(PGOVTableName), V>
+  // - <MD5Hash(getCanonicalName(PGOVTableName), V)
+  Error addVTableWithName(GlobalVariable &V, StringRef PGOVTableName);
+
   // If the symtab is created by a series of calls to \c addFuncName, \c
   // finalizeSymtab needs to be called before looking up function names.
   // This is required because the underlying map is a vector (for space
@@ -543,6 +560,7 @@ class InstrProfSymtab {
   Error create(const FuncNameIterRange &FuncIterRange,
                const VTableNameIterRange &VTableIterRange);
 
+  // Map the MD5 of the symbol name to the name.
   Error addSymbolName(StringRef SymbolName) {
     if (SymbolName.empty())
       return make_error<InstrProfError>(instrprof_error::malformed,
@@ -665,6 +683,7 @@ void InstrProfSymtab::finalizeSymtab() {
   if (Sorted)
     return;
   llvm::sort(MD5NameMap, less_first());
+  llvm::sort(MD5VTableMap, less_first());
   llvm::sort(MD5FuncMap, less_first());
   llvm::sort(AddrToMD5Map, less_first());
   AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()),
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 9ebcba10c860ff..a09a2bb0ba77cb 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -480,7 +480,9 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
     Types.clear();
     G.getMetadata(LLVMContext::MD_type, Types);
     if (!Types.empty()) {
-      MD5VTableMap.emplace_back(G.getGUID(), &G);
+      if (Error E = addVTableWithName(
+              G, getIRPGOObjectName(G, InLTO, /* PGONameMetadata */ nullptr)))
+        return E;
     }
   }
   Sorted = false;
@@ -488,6 +490,30 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
   return Error::success();
 }
 
+Error InstrProfSymtab::addVTableWithName(GlobalVariable &VTable,
+                                         StringRef VTablePGOName) {
+  if (Error E = addVTableName(VTablePGOName))
+    return E;
+
+  MD5VTableMap.emplace_back(GlobalValue::getGUID(VTablePGOName), &VTable);
+
+  // NOTE: `-funique-internal-linkage-names` doesn't uniqufy vtables, so no
+  // need to check ".__uniq."
+
+  // If a local-linkage vtable is promoted to have external linkage in ThinLTO,
+  // it will have `.llvm.` in its name. Use the name before externalization.
+  StringRef CanonicalName =
+      getCanonicalName(VTablePGOName, /* MayHaveUniqueSuffix= */ false);
+  if (CanonicalName != VTablePGOName) {
+    if (Error E = addVTableName(CanonicalName))
+      return E;
+
+    MD5VTableMap.emplace_back(GlobalValue::getGUID(CanonicalName), &VTable);
+  }
+
+  return Error::success();
+}
+
 /// \c NameStrings is a string composed of one of more possibly encoded
 /// sub-strings. The substrings are separated by 0 or more zero bytes. This
 /// method decodes the string and calls `NameCallback` for each substring.
@@ -560,35 +586,50 @@ Error InstrProfSymtab::initVTableNamesFromCompressedStrings(
       std::bind(&InstrProfSymtab::addVTableName, this, std::placeholders::_1));
 }
 
-Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
-  if (Error E = addFuncName(PGOFuncName))
-    return E;
-  MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+StringRef InstrProfSymtab::getCanonicalName(StringRef PGOName,
+                                            bool MayHaveUniqueSuffix) {
+  size_t pos = 0;
   // In ThinLTO, local function may have been promoted to global and have
   // suffix ".llvm." added to the function name. We need to add the
   // stripped function name to the symbol table so that we can find a match
   // from profile.
   //
-  // We may have other suffixes similar as ".llvm." which are needed to
-  // be stripped before the matching, but ".__uniq." suffix which is used
-  // to differentiate internal linkage functions in different modules
-  // should be kept. Now this is the only suffix with the pattern ".xxx"
-  // which is kept before matching.
-  const std::string UniqSuffix = ".__uniq.";
-  auto pos = PGOFuncName.find(UniqSuffix);
-  // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
-  // search '.' from the beginning.
-  if (pos != std::string::npos)
-    pos += UniqSuffix.length();
-  else
-    pos = 0;
-  pos = PGOFuncName.find('.', pos);
-  if (pos != std::string::npos && pos != 0) {
-    StringRef OtherFuncName = PGOFuncName.substr(0, pos);
-    if (Error E = addFuncName(OtherFuncName))
+  // ".__uniq." suffix is used to differentiate internal linkage functions in
+  // different modules and should be kept. Now this is the only suffix with the
+  // pattern ".xxx" which is kept before matching, other suffixes similar as
+  // ".llvm." will be stripped.
+  if (MayHaveUniqueSuffix) {
+    const std::string UniqSuffix = ".__uniq.";
+    pos = PGOName.find(UniqSuffix);
+    if (pos != StringRef::npos)
+      pos += UniqSuffix.length();
+    else
+      pos = 0;
+  }
+
+  // Search '.' after ".__uniq." if ".__uniq." exists, otherwise search '.' from
+  // the beginning.
+  pos = PGOName.find('.', pos);
+  if (pos != StringRef::npos && pos != 0)
+    return PGOName.substr(0, pos);
+
+  return PGOName;
+}
+
+Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
+  if (Error E = addFuncName(PGOFuncName))
+    return E;
+  MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+
+  StringRef CanonicalName =
+      getCanonicalName(PGOFuncName, /* MayHaveUniqueSuffix= */ true);
+
+  if (CanonicalName != PGOFuncName) {
+    if (Error E = addFuncName(CanonicalName))
       return E;
-    MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
+    MD5FuncMap.emplace_back(Function::getGUID(CanonicalName), &F);
   }
+
   return Error::success();
 }
 
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 4b99195c1b859a..edde544660e454 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -6,6 +6,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
@@ -1605,6 +1607,44 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
   Function::Create(FTy, Function::WeakODRLinkage, "Wblah", M.get());
   Function::Create(FTy, Function::WeakODRLinkage, "Wbar", M.get());
 
+  // [ptr, ptr, ptr]
+  ArrayType *VTableArrayType = ArrayType::get(
+      PointerType::get(Ctx, M->getDataLayout().getDefaultGlobalsAddressSpace()),
+      3);
+  Constant *Int32TyNull =
+      llvm::ConstantExpr::getNullValue(PointerType::getUnqual(Ctx));
+  SmallVector<llvm::Type *, 1> tys = {VTableArrayType};
+  StructType *VTableType = llvm::StructType::get(Ctx, tys);
+
+  // Create a vtable definition with external linkage.
+  GlobalVariable *ExternalGV = new llvm::GlobalVariable(
+      *M, VTableType, /* isConstant= */ true,
+      llvm::GlobalValue::ExternalLinkage,
+      llvm::ConstantStruct::get(
+          VTableType, {llvm::ConstantArray::get(
+                          VTableArrayType,
+                          {Int32TyNull, Int32TyNull,
+                           Function::Create(FTy, Function::ExternalLinkage,
+                                            "VFuncInExternalGV", M.get())})}),
+      "ExternalGV");
+
+  // Create a vtable definition for local-linkage function.
+  GlobalVariable *LocalGV = new llvm::GlobalVariable(
+      *M, VTableType, /* isConstant= */ true,
+      llvm::GlobalValue::InternalLinkage,
+      llvm::ConstantStruct::get(
+          VTableType,
+          {llvm::ConstantArray::get(
+              VTableArrayType, {Int32TyNull, Int32TyNull,
+                                Function::Create(FTy, Function::ExternalLinkage,
+                                                 "VFuncInLocalGV", M.get())})}),
+      "LocalGV");
+
+  // Add type metadata for the test data, since vtables with type metadata are
+  // added to symtab.
+  ExternalGV->addTypeMetadata(16, MDString::get(Ctx, "ExternalGV"));
+  LocalGV->addTypeMetadata(16, MDString::get(Ctx, "LocalGV"));
+
   InstrProfSymtab ProfSymtab;
   EXPECT_THAT_ERROR(ProfSymtab.create(*M), Succeeded());
 
@@ -1626,6 +1666,21 @@ TEST(SymtabTest, instr_prof_symtab_module_test) {
     EXPECT_EQ(StringRef(PGOName), PGOFuncName);
     EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str()));
   }
+
+  StringRef VTables[] = {"ExternalGV", "LocalGV"};
+  for (StringRef VTableName : VTables) {
+    GlobalVariable *GV =
+        M->getGlobalVariable(VTableName, /* AllowInternal=*/true);
+
+    // Test that ProfSymtab returns the expected name given a hash.
+    std::string IRPGOName = getPGOName(*GV);
+    uint64_t GUID = IndexedInstrProf::ComputeHash(IRPGOName);
+    EXPECT_EQ(IRPGOName, ProfSymtab.getFuncOrVarName(GUID));
+    EXPECT_EQ(VTableName, getParsedIRPGOName(IRPGOName).second);
+
+    // Test that ProfSymtab returns the expected global variable
+    EXPECT_EQ(GV, ProfSymtab.getGlobalVariable(GUID));
+  }
 }
 
 // Testing symtab serialization and creator/deserialization interface



More information about the llvm-branch-commits mailing list