[llvm] fe05193 - [InstrProf] Encode linkage names in IRPGO counter names

Ellis Hoag via llvm-commits llvm-commits at lists.llvm.org
Mon Aug 7 10:15:27 PDT 2023


Author: Ellis Hoag
Date: 2023-08-07T10:15:08-07:00
New Revision: fe051934cbb0aaf25d960d7d45305135635d650b

URL: https://github.com/llvm/llvm-project/commit/fe051934cbb0aaf25d960d7d45305135635d650b
DIFF: https://github.com/llvm/llvm-project/commit/fe051934cbb0aaf25d960d7d45305135635d650b.diff

LOG: [InstrProf] Encode linkage names in IRPGO counter names

Prior to this diff, names in the `__llvm_prf_names` section had the format `[<filepath>:]<function-name>`, e.g., `main.cpp:foo`, `bar`. `<filepath>` is used to discriminate between possibly identical function names when linkage is local and `<function-name>` simply comes from `F.getName()`. This has two problems:
  * `:` is commonly found in Objective-C functions so that names like `main.mm:-[C foo::]` and `-[C bar::]` are difficult to parse
  * `<function-name>` might be different from the linkage name, so it cannot be used to pass a function order to the linker via `-symbol-ordering-file` or `-order_file` (see https://discourse.llvm.org/t/rfc-temporal-profiling-extension-for-irpgo/68068)

Instead, this diff changes the format to `[<filepath>;]<linkage-name>`, e.g., `main.cpp;_foo`, `_bar`. The hope is that `;` won't realistically be found in either `<filepath>` or `<linkage-name>`.

To prevent invalidating all prior IRPGO profiles, we also lookup the prior name format when a record is not found (see `InstrProfSymtab::create()`, `readMemprof()`, and `getInstrProfRecord()`). It seems that Swift and Clang FE-PGO rely on the original `getPGOFuncName()`, so we cannot simply replace it.

Reviewed By: MaskRay

Differential Revision: https://reviews.llvm.org/D156569

Added: 
    

Modified: 
    llvm/include/llvm/ProfileData/InstrProf.h
    llvm/include/llvm/ProfileData/InstrProfReader.h
    llvm/lib/ProfileData/InstrProf.cpp
    llvm/lib/ProfileData/InstrProfReader.cpp
    llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
    llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
    llvm/test/Transforms/PGOProfile/comdat_internal.ll
    llvm/test/Transforms/PGOProfile/criticaledge.ll
    llvm/test/Transforms/PGOProfile/statics_counter_naming.ll
    llvm/tools/llvm-profdata/llvm-profdata.cpp
    llvm/unittests/ProfileData/InstrProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index f64d2e6cb73924..f9096b46157200 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -184,6 +184,15 @@ std::string getPGOFuncName(StringRef RawFuncName,
                            StringRef FileName,
                            uint64_t Version = INSTR_PROF_INDEX_VERSION);
 
+/// \return the modified name for function \c F suitable to be
+/// used as the key for IRPGO profile lookup. \c InLTO indicates if this is
+/// called from LTO optimization passes.
+std::string getIRPGOFuncName(const Function &F, bool InLTO = false);
+
+/// \return the filename and the function name parsed from the output of
+/// \c getIRPGOFuncName()
+std::pair<StringRef, StringRef> getParsedIRPGOFuncName(StringRef IRPGOFuncName);
+
 /// Return the name of the global variable used to store a function
 /// name in PGO instrumentation. \c FuncName is the name of the function
 /// returned by the \c getPGOFuncName call.
@@ -434,6 +443,8 @@ class InstrProfSymtab {
     return "** External Symbol **";
   }
 
+  Error addFuncWithName(Function &F, StringRef PGOFuncName);
+
   // If the symtab is created by a series of calls to \c addFuncName, \c
   // finalizeSymtab needs to be called before looking up function names.
   // This is required because the underlying map is a vector (for space
@@ -516,11 +527,6 @@ class InstrProfSymtab {
   /// Return function from the name's md5 hash. Return nullptr if not found.
   inline Function *getFunction(uint64_t FuncMD5Hash);
 
-  /// Return the function's original assembly name by stripping off
-  /// the prefix attached (to symbols with priviate linkage). For
-  /// global functions, it returns the same string as getFuncName.
-  inline StringRef getOrigFuncName(uint64_t FuncMD5Hash);
-
   /// Return the name section data.
   inline StringRef getNameData() const { return Data; }
 
@@ -586,16 +592,6 @@ Function* InstrProfSymtab::getFunction(uint64_t FuncMD5Hash) {
   return nullptr;
 }
 
-// See also getPGOFuncName implementation. These two need to be
-// matched.
-StringRef InstrProfSymtab::getOrigFuncName(uint64_t FuncMD5Hash) {
-  StringRef PGOName = getFuncName(FuncMD5Hash);
-  size_t S = PGOName.find_first_of(':');
-  if (S == StringRef::npos)
-    return PGOName;
-  return PGOName.drop_front(S + 1);
-}
-
 // To store the sums of profile count values, or the percentage of
 // the sums of the total count values.
 struct CountSumOrPercent {

diff  --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 80c5284d8a7dde..74e921e10c47b9 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -716,9 +716,12 @@ class IndexedInstrProfReader : public InstrProfReader {
   /// When return a hash_mismatch error and MismatchedFuncSum is not nullptr,
   /// the sum of all counters in the mismatched function will be set to
   /// MismatchedFuncSum. If there are multiple instances of mismatched
-  /// functions, MismatchedFuncSum returns the maximum.
+  /// functions, MismatchedFuncSum returns the maximum. If \c FuncName is not
+  /// found, try to lookup \c DeprecatedFuncName to handle profiles built by
+  /// older compilers.
   Expected<InstrProfRecord>
   getInstrProfRecord(StringRef FuncName, uint64_t FuncHash,
+                     StringRef DeprecatedFuncName = "",
                      uint64_t *MismatchedFuncSum = nullptr);
 
   /// Return the memprof record for the function identified by

diff  --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index 0f9c33de3f5229..835dd697bc7b6a 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -27,6 +27,7 @@
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/MDBuilder.h"
+#include "llvm/IR/Mangler.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/IR/Type.h"
@@ -264,6 +265,67 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) {
   return PathNameStr.substr(LastPos);
 }
 
+static StringRef getStrippedSourceFileName(const Function &F) {
+  StringRef FileName(F.getParent()->getSourceFileName());
+  uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1;
+  if (StripLevel < StaticFuncStripDirNamePrefix)
+    StripLevel = StaticFuncStripDirNamePrefix;
+  if (StripLevel)
+    FileName = stripDirPrefix(FileName, StripLevel);
+  return FileName;
+}
+
+// The PGO name has the format [<filepath>;]<linkage-name> where <filepath>; is
+// provided if linkage is local and <linkage-name> is the mangled function
+// name. The filepath is used to discriminate possibly identical function names.
+// ; is used because it is unlikely to be found in either <filepath> or
+// <linkage-name>.
+//
+// Older compilers used getPGOFuncName() which has the format
+// [<filepath>:]<function-name>. <filepath> is used to discriminate between
+// possibly identical function names when linkage is local and <function-name>
+// simply comes from F.getName(). This caused trouble for Objective-C functions
+// which commonly have :'s in their names. Also, since <function-name> is not
+// mangled, they cannot be passed to Mach-O linkers via -order_file. We still
+// need to compute this name to lookup functions from profiles built by older
+// compilers.
+static std::string getIRPGOFuncName(const Function &F,
+                                    GlobalValue::LinkageTypes Linkage,
+                                    StringRef FileName) {
+  SmallString<64> Name;
+  if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
+    Name.append(FileName.empty() ? "<unknown>" : FileName);
+    Name.append(";");
+  }
+  Mangler().getNameWithPrefix(Name, &F, /*CannotUsePrivateLabel=*/true);
+  return Name.str().str();
+}
+
+static std::optional<std::string> lookupPGOFuncName(const Function &F) {
+  if (MDNode *MD = getPGOFuncNameMetadata(F)) {
+    StringRef S = cast<MDString>(MD->getOperand(0))->getString();
+    return S.str();
+  }
+  return {};
+}
+
+// See getPGOFuncName()
+std::string getIRPGOFuncName(const Function &F, bool InLTO) {
+  if (!InLTO) {
+    auto FileName = getStrippedSourceFileName(F);
+    return getIRPGOFuncName(F, F.getLinkage(), FileName);
+  }
+
+  // In LTO mode (when InLTO is true), first check if there is a meta data.
+  if (auto IRPGOFuncName = lookupPGOFuncName(F))
+    return *IRPGOFuncName;
+
+  // If there is no meta data, the function must be a global before the value
+  // profile annotation pass. Its current linkage may be internal if it is
+  // internalized in LTO mode.
+  return getIRPGOFuncName(F, GlobalValue::ExternalLinkage, "");
+}
+
 // Return the PGOFuncName. This function has some special handling when called
 // in LTO optimization. The following only applies when calling in LTO passes
 // (when \c InLTO is true): LTO's internalization privatizes many global linkage
@@ -279,20 +341,13 @@ static StringRef stripDirPrefix(StringRef PathNameStr, uint32_t NumPrefix) {
 // data, its original linkage must be non-internal.
 std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
   if (!InLTO) {
-    StringRef FileName(F.getParent()->getSourceFileName());
-    uint32_t StripLevel = StaticFuncFullModulePrefix ? 0 : (uint32_t)-1;
-    if (StripLevel < StaticFuncStripDirNamePrefix)
-      StripLevel = StaticFuncStripDirNamePrefix;
-    if (StripLevel)
-      FileName = stripDirPrefix(FileName, StripLevel);
+    auto FileName = getStrippedSourceFileName(F);
     return getPGOFuncName(F.getName(), F.getLinkage(), FileName, Version);
   }
 
   // In LTO mode (when InLTO is true), first check if there is a meta data.
-  if (MDNode *MD = getPGOFuncNameMetadata(F)) {
-    StringRef S = cast<MDString>(MD->getOperand(0))->getString();
-    return S.str();
-  }
+  if (auto PGOFuncName = lookupPGOFuncName(F))
+    return *PGOFuncName;
 
   // If there is no meta data, the function must be a global before the value
   // profile annotation pass. Its current linkage may be internal if it is
@@ -300,6 +355,15 @@ std::string getPGOFuncName(const Function &F, bool InLTO, uint64_t Version) {
   return getPGOFuncName(F.getName(), GlobalValue::ExternalLinkage, "");
 }
 
+// See getIRPGOFuncName() for a discription of the format.
+std::pair<StringRef, StringRef>
+getParsedIRPGOFuncName(StringRef IRPGOFuncName) {
+  auto [FileName, FuncName] = IRPGOFuncName.split(';');
+  if (FuncName.empty())
+    return std::make_pair(StringRef(), IRPGOFuncName);
+  return std::make_pair(FileName, FuncName);
+}
+
 StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName) {
   if (FileName.empty())
     return PGOFuncName;
@@ -320,7 +384,7 @@ std::string getPGOFuncNameVarName(StringRef FuncName,
     return VarName;
 
   // Now fix up illegal chars in local VarName that may upset the assembler.
-  const char *InvalidChars = "-:<>/\"'";
+  const char InvalidChars[] = "-:;<>/\"'";
   size_t found = VarName.find_first_of(InvalidChars);
   while (found != std::string::npos) {
     VarName[found] = '_';
@@ -366,41 +430,49 @@ Error InstrProfSymtab::create(Module &M, bool InLTO) {
     // Ignore in this case.
     if (!F.hasName())
       continue;
-    const std::string &PGOFuncName = getPGOFuncName(F, InLTO);
-    if (Error E = addFuncName(PGOFuncName))
+    if (Error E = addFuncWithName(F, getIRPGOFuncName(F, InLTO)))
+      return E;
+    // Also use getPGOFuncName() so that we can find records from older profiles
+    if (Error E = addFuncWithName(F, getPGOFuncName(F, InLTO)))
       return E;
-    MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
-    // In ThinLTO, local function may have been promoted to global and have
-    // suffix ".llvm." added to the function name. We need to add the
-    // stripped function name to the symbol table so that we can find a match
-    // from profile.
-    //
-    // We may have other suffixes similar as ".llvm." which are needed to
-    // be stripped before the matching, but ".__uniq." suffix which is used
-    // to 
diff erentiate internal linkage functions in 
diff erent modules
-    // should be kept. Now this is the only suffix with the pattern ".xxx"
-    // which is kept before matching.
-    const std::string UniqSuffix = ".__uniq.";
-    auto pos = PGOFuncName.find(UniqSuffix);
-    // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
-    // search '.' from the beginning.
-    if (pos != std::string::npos)
-      pos += UniqSuffix.length();
-    else
-      pos = 0;
-    pos = PGOFuncName.find('.', pos);
-    if (pos != std::string::npos && pos != 0) {
-      const std::string &OtherFuncName = PGOFuncName.substr(0, pos);
-      if (Error E = addFuncName(OtherFuncName))
-        return E;
-      MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
-    }
   }
   Sorted = false;
   finalizeSymtab();
   return Error::success();
 }
 
+Error InstrProfSymtab::addFuncWithName(Function &F, StringRef PGOFuncName) {
+  if (Error E = addFuncName(PGOFuncName))
+    return E;
+  MD5FuncMap.emplace_back(Function::getGUID(PGOFuncName), &F);
+  // In ThinLTO, local function may have been promoted to global and have
+  // suffix ".llvm." added to the function name. We need to add the
+  // stripped function name to the symbol table so that we can find a match
+  // from profile.
+  //
+  // We may have other suffixes similar as ".llvm." which are needed to
+  // be stripped before the matching, but ".__uniq." suffix which is used
+  // to 
diff erentiate internal linkage functions in 
diff erent modules
+  // should be kept. Now this is the only suffix with the pattern ".xxx"
+  // which is kept before matching.
+  const std::string UniqSuffix = ".__uniq.";
+  auto pos = PGOFuncName.find(UniqSuffix);
+  // Search '.' after ".__uniq." if ".__uniq." exists, otherwise
+  // search '.' from the beginning.
+  if (pos != std::string::npos)
+    pos += UniqSuffix.length();
+  else
+    pos = 0;
+  pos = PGOFuncName.find('.', pos);
+  if (pos != std::string::npos && pos != 0) {
+    StringRef OtherFuncName = PGOFuncName.substr(0, pos);
+    if (Error E = addFuncName(OtherFuncName))
+      return E;
+    MD5FuncMap.emplace_back(Function::getGUID(OtherFuncName), &F);
+  }
+  return Error::success();
+}
+
 uint64_t InstrProfSymtab::getFunctionHashFromAddress(uint64_t Address) {
   finalizeSymtab();
   auto It = partition_point(AddrToMD5Map, [=](std::pair<uint64_t, uint64_t> A) {

diff  --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 4160f7e6dfd557..7f856c4802dad7 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -1214,12 +1214,25 @@ InstrProfSymtab &IndexedInstrProfReader::getSymtab() {
 }
 
 Expected<InstrProfRecord> IndexedInstrProfReader::getInstrProfRecord(
-    StringRef FuncName, uint64_t FuncHash, uint64_t *MismatchedFuncSum) {
+    StringRef FuncName, uint64_t FuncHash, StringRef DeprecatedFuncName,
+    uint64_t *MismatchedFuncSum) {
   ArrayRef<NamedInstrProfRecord> Data;
   uint64_t FuncSum = 0;
-  Error Err = Remapper->getRecords(FuncName, Data);
-  if (Err)
-    return std::move(Err);
+  auto Err = Remapper->getRecords(FuncName, Data);
+  if (Err) {
+    // If we don't find FuncName, try DeprecatedFuncName to handle profiles
+    // built by older compilers.
+    auto Err2 =
+        handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error {
+          if (IE.get() != instrprof_error::unknown_function)
+            return make_error<InstrProfError>(IE);
+          if (auto Err = Remapper->getRecords(DeprecatedFuncName, Data))
+            return Err;
+          return Error::success();
+        });
+    if (Err2)
+      return std::move(Err2);
+  }
   // Found it. Look for counters with the right hash.
 
   // A flag to indicate if the records are from the same type

diff  --git a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
index 789ed005d03ddf..453dd00488b8ff 100644
--- a/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemProfiler.cpp
@@ -679,12 +679,26 @@ static void readMemprof(Module &M, Function &F,
                         const TargetLibraryInfo &TLI) {
   auto &Ctx = M.getContext();
 
-  auto FuncName = getPGOFuncName(F);
+  auto FuncName = getIRPGOFuncName(F);
   auto FuncGUID = Function::getGUID(FuncName);
-  Expected<memprof::MemProfRecord> MemProfResult =
-      MemProfReader->getMemProfRecord(FuncGUID);
-  if (Error E = MemProfResult.takeError()) {
-    handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
+  std::optional<memprof::MemProfRecord> MemProfRec;
+  auto Err = MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec);
+  if (Err) {
+    // If we don't find getIRPGOFuncName(), try getPGOFuncName() to handle
+    // profiles built by older compilers
+    Err = handleErrors(std::move(Err), [&](const InstrProfError &IE) -> Error {
+      if (IE.get() != instrprof_error::unknown_function)
+        return make_error<InstrProfError>(IE);
+      auto FuncName = getPGOFuncName(F);
+      auto FuncGUID = Function::getGUID(FuncName);
+      if (auto Err =
+              MemProfReader->getMemProfRecord(FuncGUID).moveInto(MemProfRec))
+        return Err;
+      return Error::success();
+    });
+  }
+  if (Err) {
+    handleAllErrors(std::move(Err), [&](const InstrProfError &IPE) {
       auto Err = IPE.get();
       bool SkipWarning = false;
       LLVM_DEBUG(dbgs() << "Error in reading profile for Func " << FuncName
@@ -722,15 +736,14 @@ static void readMemprof(Module &M, Function &F,
   // the frame array (see comments below where the map entries are added).
   std::map<uint64_t, std::set<std::pair<const SmallVector<Frame> *, unsigned>>>
       LocHashToCallSites;
-  const auto MemProfRec = std::move(MemProfResult.get());
-  for (auto &AI : MemProfRec.AllocSites) {
+  for (auto &AI : MemProfRec->AllocSites) {
     // Associate the allocation info with the leaf frame. The later matching
     // code will match any inlined call sequences in the IR with a longer prefix
     // of call stack frames.
     uint64_t StackId = computeStackId(AI.CallStack[0]);
     LocHashToAllocInfo[StackId].insert(&AI);
   }
-  for (auto &CS : MemProfRec.CallSites) {
+  for (auto &CS : MemProfRec->CallSites) {
     // Need to record all frames from leaf up to and including this function,
     // as any of these may or may not have been inlined at this point.
     unsigned Idx = 0;

diff  --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index 3c8f25d73c623d..1fe9c57e550a41 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -525,6 +525,7 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
   std::vector<std::vector<VPCandidateInfo>> ValueSites;
   SelectInstVisitor SIVisitor;
   std::string FuncName;
+  std::string DeprecatedFuncName;
   GlobalVariable *FuncNameVar;
 
   // CFG hash value for this function.
@@ -590,7 +591,8 @@ template <class Edge, class BBInfo> class FuncPGOInstrumentation {
       NumOfCSPGOBB += MST.BBInfos.size();
     }
 
-    FuncName = getPGOFuncName(F);
+    FuncName = getIRPGOFuncName(F);
+    DeprecatedFuncName = getPGOFuncName(F);
     computeCFGHash();
     if (!ComdatMembers.empty())
       renameComdatFunction();
@@ -1336,7 +1338,8 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
   auto &Ctx = M->getContext();
   uint64_t MismatchedFuncSum = 0;
   Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
-      FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
+      FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
+      &MismatchedFuncSum);
   if (Error E = Result.takeError()) {
     handleInstrProfError(std::move(E), MismatchedFuncSum);
     return false;
@@ -1381,7 +1384,8 @@ bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros,
 void PGOUseFunc::populateCoverage(IndexedInstrProfReader *PGOReader) {
   uint64_t MismatchedFuncSum = 0;
   Expected<InstrProfRecord> Result = PGOReader->getInstrProfRecord(
-      FuncInfo.FuncName, FuncInfo.FunctionHash, &MismatchedFuncSum);
+      FuncInfo.FuncName, FuncInfo.FunctionHash, FuncInfo.DeprecatedFuncName,
+      &MismatchedFuncSum);
   if (auto Err = Result.takeError()) {
     handleInstrProfError(std::move(Err), MismatchedFuncSum);
     return;

diff  --git a/llvm/test/Transforms/PGOProfile/comdat_internal.ll b/llvm/test/Transforms/PGOProfile/comdat_internal.ll
index a1cf115f910ad7..1c44a274f3c047 100644
--- a/llvm/test/Transforms/PGOProfile/comdat_internal.ll
+++ b/llvm/test/Transforms/PGOProfile/comdat_internal.ll
@@ -13,7 +13,7 @@ $foo = comdat any
 ; CHECK: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat
 ; CHECK-NOT: __profn__stdin__foo
 ; CHECK: @__profc__stdin__foo.[[#FOO_HASH]] = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat, align 8
-; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, ptr, ptr, i32, [2 x i16] } { i64 -5640069336071256030, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), ptr null
+; CHECK: @__profd__stdin__foo.[[#FOO_HASH]] = private global { i64, i64, i64, ptr, ptr, i32, [2 x i16] } { i64 {{.*}}, i64 [[#FOO_HASH]], i64 sub (i64 ptrtoint (ptr @__profc__stdin__foo.742261418966908927 to i64), i64 ptrtoint (ptr @__profd__stdin__foo.742261418966908927 to i64)), ptr null
 ; CHECK-NOT: @foo
 ; CHECK-SAME: , ptr null, i32 1, [2 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profc__stdin__foo.[[#FOO_HASH]]), align 8
 ; CHECK: @__llvm_prf_nm

diff  --git a/llvm/test/Transforms/PGOProfile/criticaledge.ll b/llvm/test/Transforms/PGOProfile/criticaledge.ll
index 616d0fa3a4cf2a..c24925c68fa32d 100644
--- a/llvm/test/Transforms/PGOProfile/criticaledge.ll
+++ b/llvm/test/Transforms/PGOProfile/criticaledge.ll
@@ -11,7 +11,7 @@ target triple = "x86_64-unknown-linux-gnu"
 ; GEN: $__llvm_profile_raw_version = comdat any
 ; GEN: @__llvm_profile_raw_version = hidden constant i64 {{[0-9]+}}, comdat
 ; GEN: @__profn_test_criticalEdge = private constant [17 x i8] c"test_criticalEdge"
-; GEN: @__profn__stdin__bar = private constant [11 x i8] c"<stdin>:bar"
+; GEN: @__profn__stdin__bar = private constant [11 x i8] c"<stdin>;bar"
 
 define i32 @test_criticalEdge(i32 %i, i32 %j) {
 entry:

diff  --git a/llvm/test/Transforms/PGOProfile/statics_counter_naming.ll b/llvm/test/Transforms/PGOProfile/statics_counter_naming.ll
index fe8df33324ab3e..4bffc87a93901d 100644
--- a/llvm/test/Transforms/PGOProfile/statics_counter_naming.ll
+++ b/llvm/test/Transforms/PGOProfile/statics_counter_naming.ll
@@ -4,8 +4,8 @@
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
-; NOPATH: @__profn_statics_counter_naming.ll_func = private constant [30 x i8] c"statics_counter_naming.ll:func"
-; HASPATH-NOT: @__profn_statics_counter_naming.ll_func = private constant [30 x i8] c"statics_counter_naming.ll:func"
+; NOPATH: @__profn_statics_counter_naming.ll_func = private constant [30 x i8] c"statics_counter_naming.ll;func"
+; HASPATH-NOT: @__profn_statics_counter_naming.ll_func = private constant [30 x i8] c"statics_counter_naming.ll;func"
 
 define internal i32 @func() {
 entry:

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index da10ddcc58c642..9da5368f5a4233 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -3069,17 +3069,11 @@ static int order_main(int argc, const char *argv[]) {
 
   WithColor::note() << "# Ordered " << Nodes.size() << " functions\n";
   for (auto &N : Nodes) {
-    auto FuncName = Reader->getSymtab().getFuncName(N.Id);
-    if (FuncName.contains(':')) {
-      // GlobalValue::getGlobalIdentifier() prefixes the filename if the symbol
-      // is local. This logic will break if there is a colon in the filename,
-      // but we cannot use rsplit() because ObjC symbols can have colons.
-      auto [Filename, ParsedFuncName] = FuncName.split(':');
-      // Emit a comment describing where this symbol came from
+    auto [Filename, ParsedFuncName] =
+        getParsedIRPGOFuncName(Reader->getSymtab().getFuncName(N.Id));
+    if (!Filename.empty())
       OS << "# " << Filename << "\n";
-      FuncName = ParsedFuncName;
-    }
-    OS << FuncName << "\n";
+    OS << ParsedFuncName << "\n";
   }
   return 0;
 }

diff  --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 30c4dac0bd2f25..76f88c058424d0 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -17,11 +17,11 @@
 #include "llvm/Support/Compression.h"
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Testing/Support/Error.h"
-#include "llvm/Testing/Support/SupportHelpers.h"
 #include "gtest/gtest.h"
 #include <cstdarg>
 
 using namespace llvm;
+using ::testing::EndsWith;
 using ::testing::IsSubsetOf;
 using ::testing::SizeIs;
 using ::testing::UnorderedElementsAre;
@@ -520,6 +520,119 @@ TEST_F(InstrProfTest, test_memprof_merge) {
   EXPECT_THAT(WantRecord, EqualsRecord(Record));
 }
 
+TEST_F(InstrProfTest, test_irpgo_function_name) {
+  LLVMContext Ctx;
+  auto M = std::make_unique<Module>("MyModule.cpp", Ctx);
+  // Use Mach-O mangling so that non-private symbols get a `_` prefix.
+  M->setDataLayout(DataLayout("m:o"));
+  auto *FTy = FunctionType::get(Type::getVoidTy(Ctx), /*isVarArg=*/false);
+
+  std::vector<std::tuple<StringRef, Function::LinkageTypes, StringRef>> Data;
+  Data.emplace_back("ExternalFoo", Function::ExternalLinkage, "_ExternalFoo");
+  Data.emplace_back("InternalFoo", Function::InternalLinkage,
+                    "MyModule.cpp;_InternalFoo");
+  Data.emplace_back("PrivateFoo", Function::PrivateLinkage,
+                    "MyModule.cpp;l_PrivateFoo");
+  Data.emplace_back("WeakODRFoo", Function::WeakODRLinkage, "_WeakODRFoo");
+  // Test Objective-C symbols
+  Data.emplace_back("\01-[C dynamicFoo:]", Function::ExternalLinkage,
+                    "-[C dynamicFoo:]");
+  Data.emplace_back("-<C directFoo:>", Function::ExternalLinkage,
+                    "_-<C directFoo:>");
+  Data.emplace_back("\01-[C internalFoo:]", Function::InternalLinkage,
+                    "MyModule.cpp;-[C internalFoo:]");
+
+  for (auto &[Name, Linkage, ExpectedIRPGOFuncName] : Data)
+    Function::Create(FTy, Linkage, Name, M.get());
+
+  for (auto &[Name, Linkage, ExpectedIRPGOFuncName] : Data) {
+    auto *F = M->getFunction(Name);
+    auto IRPGOFuncName = getIRPGOFuncName(*F);
+    EXPECT_EQ(IRPGOFuncName, ExpectedIRPGOFuncName);
+
+    auto [Filename, ParsedIRPGOFuncName] =
+        getParsedIRPGOFuncName(IRPGOFuncName);
+    StringRef ExpectedParsedIRPGOFuncName = IRPGOFuncName;
+    if (ExpectedParsedIRPGOFuncName.consume_front("MyModule.cpp;")) {
+      EXPECT_EQ(Filename, "MyModule.cpp");
+    } else {
+      EXPECT_EQ(Filename, "");
+    }
+    EXPECT_EQ(ParsedIRPGOFuncName, ExpectedParsedIRPGOFuncName);
+  }
+}
+
+TEST_F(InstrProfTest, test_pgo_function_name) {
+  LLVMContext Ctx;
+  auto M = std::make_unique<Module>("MyModule.cpp", Ctx);
+  auto *FTy = FunctionType::get(Type::getVoidTy(Ctx), /*isVarArg=*/false);
+
+  std::vector<std::tuple<StringRef, Function::LinkageTypes, StringRef>> Data;
+  Data.emplace_back("ExternalFoo", Function::ExternalLinkage, "ExternalFoo");
+  Data.emplace_back("InternalFoo", Function::InternalLinkage,
+                    "MyModule.cpp:InternalFoo");
+  Data.emplace_back("PrivateFoo", Function::PrivateLinkage,
+                    "MyModule.cpp:PrivateFoo");
+  Data.emplace_back("WeakODRFoo", Function::WeakODRLinkage, "WeakODRFoo");
+  // Test Objective-C symbols
+  Data.emplace_back("\01-[C externalFoo:]", Function::ExternalLinkage,
+                    "-[C externalFoo:]");
+  Data.emplace_back("\01-[C internalFoo:]", Function::InternalLinkage,
+                    "MyModule.cpp:-[C internalFoo:]");
+
+  for (auto &[Name, Linkage, ExpectedPGOFuncName] : Data)
+    Function::Create(FTy, Linkage, Name, M.get());
+
+  for (auto &[Name, Linkage, ExpectedPGOFuncName] : Data) {
+    auto *F = M->getFunction(Name);
+    EXPECT_EQ(getPGOFuncName(*F), ExpectedPGOFuncName);
+  }
+}
+
+TEST_F(InstrProfTest, test_irpgo_read_deprecated_names) {
+  LLVMContext Ctx;
+  auto M = std::make_unique<Module>("MyModule.cpp", Ctx);
+  // Use Mach-O mangling so that non-private symbols get a `_` prefix.
+  M->setDataLayout(DataLayout("m:o"));
+  auto *FTy = FunctionType::get(Type::getVoidTy(Ctx), /*isVarArg=*/false);
+  auto *InternalFooF =
+      Function::Create(FTy, Function::InternalLinkage, "InternalFoo", M.get());
+  auto *ExternalFooF =
+      Function::Create(FTy, Function::ExternalLinkage, "ExternalFoo", M.get());
+
+  auto *InternalBarF =
+      Function::Create(FTy, Function::InternalLinkage, "InternalBar", M.get());
+  auto *ExternalBarF =
+      Function::Create(FTy, Function::ExternalLinkage, "ExternalBar", M.get());
+
+  Writer.addRecord({getIRPGOFuncName(*InternalFooF), 0x1234, {1}}, Err);
+  Writer.addRecord({getIRPGOFuncName(*ExternalFooF), 0x5678, {1}}, Err);
+  // Write a record with a deprecated name
+  Writer.addRecord({getPGOFuncName(*InternalBarF), 0x1111, {2}}, Err);
+  Writer.addRecord({getPGOFuncName(*ExternalBarF), 0x2222, {2}}, Err);
+
+  auto Profile = Writer.writeBuffer();
+  readProfile(std::move(Profile));
+
+  EXPECT_THAT_EXPECTED(
+      Reader->getInstrProfRecord(getIRPGOFuncName(*InternalFooF), 0x1234,
+                                 getPGOFuncName(*InternalFooF)),
+      Succeeded());
+  EXPECT_THAT_EXPECTED(
+      Reader->getInstrProfRecord(getIRPGOFuncName(*ExternalFooF), 0x5678,
+                                 getPGOFuncName(*ExternalFooF)),
+      Succeeded());
+  // Ensure we can still read this old record name
+  EXPECT_THAT_EXPECTED(
+      Reader->getInstrProfRecord(getIRPGOFuncName(*InternalBarF), 0x1111,
+                                 getPGOFuncName(*InternalBarF)),
+      Succeeded());
+  EXPECT_THAT_EXPECTED(
+      Reader->getInstrProfRecord(getIRPGOFuncName(*ExternalBarF), 0x2222,
+                                 getPGOFuncName(*ExternalBarF)),
+      Succeeded());
+}
+
 static const char callee1[] = "callee1";
 static const char callee2[] = "callee2";
 static const char callee3[] = "callee3";
@@ -1215,12 +1328,19 @@ TEST_P(MaybeSparseInstrProfTest, instr_prof_symtab_module_test) {
 
   for (unsigned I = 0; I < std::size(Funcs); I++) {
     Function *F = M->getFunction(Funcs[I]);
-    ASSERT_TRUE(F != nullptr);
+
+    std::string IRPGOName = getIRPGOFuncName(*F);
+    auto IRPGOFuncName =
+        ProfSymtab.getFuncName(IndexedInstrProf::ComputeHash(IRPGOName));
+    EXPECT_EQ(StringRef(IRPGOName), IRPGOFuncName);
+    EXPECT_EQ(StringRef(Funcs[I]),
+              getParsedIRPGOFuncName(IRPGOFuncName).second);
+    // Ensure we can still read this old record name.
     std::string PGOName = getPGOFuncName(*F);
-    uint64_t Key = IndexedInstrProf::ComputeHash(PGOName);
-    ASSERT_EQ(StringRef(PGOName),
-              ProfSymtab.getFuncName(Key));
-    ASSERT_EQ(StringRef(Funcs[I]), ProfSymtab.getOrigFuncName(Key));
+    auto PGOFuncName =
+        ProfSymtab.getFuncName(IndexedInstrProf::ComputeHash(PGOName));
+    EXPECT_EQ(StringRef(PGOName), PGOFuncName);
+    EXPECT_THAT(PGOFuncName.str(), EndsWith(Funcs[I].str()));
   }
 }
 


        


More information about the llvm-commits mailing list