[clang] db18f26 - [llvm-profdata] Handle internal linkage functions in profile supplementation

Rong Xu via cfe-commits cfe-commits at lists.llvm.org
Mon Aug 29 16:21:05 PDT 2022


Author: Rong Xu
Date: 2022-08-29T16:15:12-07:00
New Revision: db18f26567be47ec1da7fba48179a3001f0d34ab

URL: https://github.com/llvm/llvm-project/commit/db18f26567be47ec1da7fba48179a3001f0d34ab
DIFF: https://github.com/llvm/llvm-project/commit/db18f26567be47ec1da7fba48179a3001f0d34ab.diff

LOG: [llvm-profdata] Handle internal linkage functions in profile supplementation

This patch has the following changes:
(1) Handling of internal linkage functions (static functions)
Static functions in FDO have a prefix of source file name, while they do not
have one in SampleFDO. Current implementation does not handle this and we are
not updating the profile for static functions. This patch fixes this.

(2) Handling of -funique-internal-linakge-symbols
Again this is for the internal linkage functions. Option
-funique-internal-linakge-symbols can now be applied to both FDO and SampleFDO
compilation. When it is used, it demangles internal linkage function names and
adds a hash value as the postfix.

When both SampleFDO and FDO profiles use this option, or both
not use this option, changes in (1) should handle this.

Here we also handle when the SampleFDO profile using this option while FDO
profile not using this option, or vice versa.

There is one case where this patch won't work: If one of the profiles used
mangled name and the other does not. For example, if the SampleFDO profile
uses clang c-compiler and without -funique-internal-linakge-symbols, while
the FDO profile uses -funique-internal-linakge-symbols. The SampleFDO profile
contains unmangled names while the FDO profile contains mangled names. If
both profiles use c++ compiler, this won't happen. We think this use case
is rare and does not justify the effort to fix.

Differential Revision: https://reviews.llvm.org/D132600

Added: 
    llvm/test/tools/llvm-profdata/Inputs/FUnique.afdotext
    llvm/test/tools/llvm-profdata/Inputs/FUnique.proftext
    llvm/test/tools/llvm-profdata/Inputs/NoFUnique.afdotext
    llvm/test/tools/llvm-profdata/Inputs/NoFUnique.proftext
    llvm/test/tools/llvm-profdata/suppl-instr-with-sample-static-func.test

Modified: 
    clang/lib/CodeGen/CodeGenModule.cpp
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/tools/llvm-profdata/llvm-profdata.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 62e906a87af48..bb0970e3e36fa 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -60,12 +60,12 @@
 #include "llvm/IR/Module.h"
 #include "llvm/IR/ProfileSummary.h"
 #include "llvm/ProfileData/InstrProfReader.h"
+#include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/CRC.h"
 #include "llvm/Support/CodeGen.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/ConvertUTF.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/MD5.h"
 #include "llvm/Support/TimeProfiler.h"
 #include "llvm/Support/X86TargetParser.h"
 #include "llvm/Support/xxhash.h"
@@ -208,22 +208,7 @@ CodeGenModule::CodeGenModule(ASTContext &C,
         Path = Entry.second + Path.substr(Entry.first.size());
         break;
       }
-    llvm::MD5 Md5;
-    Md5.update(Path);
-    llvm::MD5::MD5Result R;
-    Md5.final(R);
-    SmallString<32> Str;
-    llvm::MD5::stringifyResult(R, Str);
-    // Convert MD5hash to Decimal. Demangler suffixes can either contain
-    // numbers or characters but not both.
-    llvm::APInt IntHash(128, Str.str(), 16);
-    // Prepend "__uniq" before the hash for tools like profilers to understand
-    // that this symbol is of internal linkage type.  The "__uniq" is the
-    // pre-determined prefix that is used to tell tools that this symbol was
-    // created with -funique-internal-linakge-symbols and the tools can strip or
-    // keep the prefix as needed.
-    ModuleNameHash = (Twine(".__uniq.") +
-        Twine(toString(IntHash, /* Radix = */ 10, /* Signed = */false))).str();
+    ModuleNameHash = llvm::getUniqueInternalLinkagePostfix(Path);
   }
 }
 

diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 1ad83c2f5b5af..a82b4544199fc 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -16,6 +16,7 @@
 
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/IR/Function.h"
@@ -1331,6 +1332,26 @@ template <> struct DenseMapInfo<SampleContext> {
     return LHS == RHS;
   }
 };
+
+// Prepend "__uniq" before the hash for tools like profilers to understand
+// that this symbol is of internal linkage type.  The "__uniq" is the
+// pre-determined prefix that is used to tell tools that this symbol was
+// created with -funique-internal-linakge-symbols and the tools can strip or
+// keep the prefix as needed.
+inline std::string getUniqueInternalLinkagePostfix(const StringRef &FName) {
+  llvm::MD5 Md5;
+  Md5.update(FName);
+  llvm::MD5::MD5Result R;
+  Md5.final(R);
+  SmallString<32> Str;
+  llvm::MD5::stringifyResult(R, Str);
+  // Convert MD5hash to Decimal. Demangler suffixes can either contain
+  // numbers or characters but not both.
+  llvm::APInt IntHash(128, Str.str(), 16);
+  return toString(IntHash, /* Radix = */ 10, /* Signed = */ false)
+      .insert(0, FunctionSamples::UniqSuffix);
+};
+
 } // end namespace llvm
 
 #endif // LLVM_PROFILEDATA_SAMPLEPROF_H

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/FUnique.afdotext b/llvm/test/tools/llvm-profdata/Inputs/FUnique.afdotext
new file mode 100644
index 0000000000000..fa3963346c8c1
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/FUnique.afdotext
@@ -0,0 +1,17 @@
+_ZL3foom.__uniq.276699478366846449772231447066107882794:14064855:0
+ 0: 0
+ 2.1: 0
+ 2.2: 290944
+ 2.3073: 290944
+ 3: 290944
+ 4: 256 bar:256
+ 5: 304048 bar:307919
+ 7: 0
+_Z3barmi:6447198:308175
+ 1: 302866
+ 2: 5551
+ 3: 296598
+ 3.13824: 5551
+ 3.2952803840: 296598
+ 4: 5551
+ 4.4160749568: 296598

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/FUnique.proftext b/llvm/test/tools/llvm-profdata/Inputs/FUnique.proftext
new file mode 100644
index 0000000000000..be2c27df9746c
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/FUnique.proftext
@@ -0,0 +1,30 @@
+# IR level Instrumentation Flag
+:ir
+_Z3barmi
+# Func Hash:
+784007056844089447
+# Num Counters:
+2
+# Counter Values:
+0
+0
+
+main
+# Func Hash:
+784007059655560962
+# Num Counters:
+2
+# Counter Values:
+1
+0
+
+test.c:_ZL3foom.__uniq.276699478366846449772231447066107882794
+# Func Hash:
+1124680652115249575
+# Num Counters:
+3
+# Counter Values:
+0
+0
+0
+

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/NoFUnique.afdotext b/llvm/test/tools/llvm-profdata/Inputs/NoFUnique.afdotext
new file mode 100644
index 0000000000000..34a9088da41b3
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/NoFUnique.afdotext
@@ -0,0 +1,17 @@
+_ZL3foom:14064855:0
+ 0: 0
+ 2.1: 0
+ 2.2: 290944
+ 2.3073: 290944
+ 3: 290944
+ 4: 256 bar:256
+ 5: 304048 bar:307919
+ 7: 0
+_Z3barmi:6447198:308175
+ 1: 302866
+ 2: 5551
+ 3: 296598
+ 3.13824: 5551
+ 3.2952803840: 296598
+ 4: 5551
+ 4.4160749568: 296598

diff  --git a/llvm/test/tools/llvm-profdata/Inputs/NoFUnique.proftext b/llvm/test/tools/llvm-profdata/Inputs/NoFUnique.proftext
new file mode 100644
index 0000000000000..7485a33875e11
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/Inputs/NoFUnique.proftext
@@ -0,0 +1,30 @@
+# IR level Instrumentation Flag
+:ir
+_Z3barmi
+# Func Hash:
+784007056844089447
+# Num Counters:
+2
+# Counter Values:
+0
+0
+
+main
+# Func Hash:
+784007059655560962
+# Num Counters:
+2
+# Counter Values:
+1
+0
+
+test.c:_ZL3foom
+# Func Hash:
+1124680652115249575
+# Num Counters:
+3
+# Counter Values:
+0
+0
+0
+

diff  --git a/llvm/test/tools/llvm-profdata/suppl-instr-with-sample-static-func.test b/llvm/test/tools/llvm-profdata/suppl-instr-with-sample-static-func.test
new file mode 100644
index 0000000000000..4d4c724f24b2f
--- /dev/null
+++ b/llvm/test/tools/llvm-profdata/suppl-instr-with-sample-static-func.test
@@ -0,0 +1,15 @@
+Some basic tests for supplementing instrumentation profile with sample profile for static funcs.
+
+RUN: llvm-profdata merge -supplement-instr-with-sample=%p/Inputs/NoFUnique.afdotext -suppl-min-size-threshold=2 %p/Inputs/NoFUnique.proftext -o %t1
+RUN: llvm-profdata show -function=foo -counts %t1 | FileCheck %s
+
+RUN: llvm-profdata merge -supplement-instr-with-sample=%p/Inputs/FUnique.afdotext -suppl-min-size-threshold=2 %p/Inputs/FUnique.proftext -o %t2
+RUN: llvm-profdata show -function=foo -counts %t2 | FileCheck %s
+
+RUN: llvm-profdata merge -supplement-instr-with-sample=%p/Inputs/NoFUnique.afdotext -suppl-min-size-threshold=2 %p/Inputs/FUnique.proftext -o %t3
+RUN: llvm-profdata show -function=foo -counts %t3 | FileCheck %s
+
+RUN: llvm-profdata merge -supplement-instr-with-sample=%p/Inputs/FUnique.afdotext -suppl-min-size-threshold=2 %p/Inputs/NoFUnique.proftext -o %t4
+RUN: llvm-profdata show -function=foo -counts %t4 | FileCheck %s
+
+CHECK: Block counts: [18446744073709551615, 18446744073709551615, 18446744073709551615]

diff  --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index de34f39eb391d..df7b43a2c5655 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -31,6 +31,7 @@
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormattedStream.h"
 #include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/MD5.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/ThreadPool.h"
@@ -42,6 +43,10 @@
 
 using namespace llvm;
 
+// We use this string to indicate that there are
+// multiple static functions map to the same name.
+const std::string DuplicateNameStr = "----";
+
 enum ProfileFormat {
   PF_None = 0,
   PF_Text,
@@ -480,7 +485,7 @@ InstrProfileEntry::InstrProfileEntry(InstrProfRecord *Record) {
   NumEdgeCounters = CntNum;
 }
 
-/// Either set all the counters in the instr profile entry \p IFE to -1
+// Either set all the counters in the instr profile entry \p IFE to -1
 /// in order to drop the profile or scale up the counters in \p IFP to
 /// be above hot threshold. We use the ratio of zero counters in the
 /// profile of a function to decide the profile is helpful or harmful
@@ -541,7 +546,73 @@ adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
                    unsigned InstrProfColdThreshold) {
   // Function to its entry in instr profile.
   StringMap<InstrProfileEntry> InstrProfileMap;
+  StringMap<StringRef> StaticFuncMap;
   InstrProfSummaryBuilder IPBuilder(ProfileSummaryBuilder::DefaultCutoffs);
+
+  auto checkSampleProfileHasFUnique = [&Reader]() {
+    for (const auto &PD : Reader->getProfiles()) {
+      auto &FContext = PD.first;
+      if (FContext.toString().find(FunctionSamples::UniqSuffix) !=
+          std::string::npos) {
+        return true;
+      }
+    }
+    return false;
+  };
+
+  bool SampleProfileHasFUnique = checkSampleProfileHasFUnique();
+
+  auto buildStaticFuncMap = [&StaticFuncMap,
+                             SampleProfileHasFUnique](const StringRef Name) {
+    std::string Prefixes[] = {".cpp:", "cc:", ".c:", ".hpp:", ".h:"};
+    size_t PrefixPos = StringRef::npos;
+    for (auto &Prefix : Prefixes) {
+      PrefixPos = Name.find_insensitive(Prefix);
+      if (PrefixPos == StringRef::npos)
+        continue;
+      PrefixPos += Prefix.size();
+      break;
+    }
+
+    if (PrefixPos == StringRef::npos) {
+      return;
+    }
+
+    StringRef NewName = Name.drop_front(PrefixPos);
+    StringRef FName = Name.substr(0, PrefixPos - 1);
+    if (NewName.size() == 0) {
+      return;
+    }
+
+    // This name should have a static linkage.
+    size_t PostfixPos = NewName.find(FunctionSamples::UniqSuffix);
+    bool ProfileHasFUnique = (PostfixPos != StringRef::npos);
+
+    // If sample profile and instrumented profile do not agree on symbol
+    // uniqification.
+    if (SampleProfileHasFUnique != ProfileHasFUnique) {
+      // If instrumented profile uses -funique-internal-linakge-symbols,
+      // we need to trim the name.
+      if (ProfileHasFUnique) {
+        NewName = NewName.substr(0, PostfixPos);
+      } else {
+        // If sample profile uses -funique-internal-linakge-symbols,
+        // we build the map.
+        std::string NStr =
+            NewName.str() + getUniqueInternalLinkagePostfix(FName);
+        NewName = StringRef(NStr);
+        StaticFuncMap[NewName] = Name;
+        return;
+      }
+    }
+
+    if (StaticFuncMap.find(NewName) == StaticFuncMap.end()) {
+      StaticFuncMap[NewName] = Name;
+    } else {
+      StaticFuncMap[NewName] = DuplicateNameStr;
+    }
+  };
+
   for (auto &PD : WC->Writer.getProfileData()) {
     // Populate IPBuilder.
     for (const auto &PDV : PD.getValue()) {
@@ -555,7 +626,9 @@ adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
 
     // Initialize InstrProfileMap.
     InstrProfRecord *R = &PD.getValue().begin()->second;
-    InstrProfileMap[PD.getKey()] = InstrProfileEntry(R);
+    StringRef FullName = PD.getKey();
+    InstrProfileMap[FullName] = InstrProfileEntry(R);
+    buildStaticFuncMap(FullName);
   }
 
   ProfileSummary InstrPS = *IPBuilder.getSummary();
@@ -583,16 +656,28 @@ adjustInstrProfile(std::unique_ptr<WriterContext> &WC,
   // Find hot/warm functions in sample profile which is cold in instr profile
   // and adjust the profiles of those functions in the instr profile.
   for (const auto &PD : Reader->getProfiles()) {
-    auto &FContext = PD.first;
     const sampleprof::FunctionSamples &FS = PD.second;
+    if (FS.getMaxCountInside() <= ColdSampleThreshold)
+      continue;
+    auto &FContext = PD.first;
     auto It = InstrProfileMap.find(FContext.toString());
-    if (FS.getMaxCountInside() > ColdSampleThreshold &&
-        It != InstrProfileMap.end() &&
-        It->second.MaxCount <= ColdInstrThreshold &&
-        It->second.NumEdgeCounters >= SupplMinSizeThreshold) {
-      updateInstrProfileEntry(It->second, HotInstrThreshold,
-                              ZeroCounterThreshold);
+    if (It == InstrProfileMap.end()) {
+      auto NewName = StaticFuncMap.find(FContext.toString());
+      if (NewName != StaticFuncMap.end()) {
+        It = InstrProfileMap.find(NewName->second.str());
+        if (NewName->second == DuplicateNameStr) {
+          WithColor::warning()
+              << "Static function " << FContext.toString()
+              << " has multiple promoted names, cannot adjust profile.\n";
+        }
+      }
     }
+    if (It == InstrProfileMap.end() ||
+        It->second.MaxCount > ColdInstrThreshold ||
+        It->second.NumEdgeCounters < SupplMinSizeThreshold)
+      continue;
+    updateInstrProfileEntry(It->second, HotInstrThreshold,
+                            ZeroCounterThreshold);
   }
 }
 


        


More information about the cfe-commits mailing list