[PATCH] D159132: [memprof] Canonicalize the function name prior to hashing.

Snehasish Kumar via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 29 12:02:07 PDT 2023


snehasish created this revision.
snehasish added a reviewer: tejohnson.
Herald added a subscriber: hiraditya.
Herald added a project: All.
snehasish requested review of this revision.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Canonicalize the function name (strip suffixes etc) to ensure that
function name suffixes added by late stage passes do not cause
mismatches when memprof profile data is consumed.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D159132

Files:
  llvm/lib/ProfileData/MemProf.cpp
  llvm/lib/ProfileData/RawMemProfReader.cpp
  llvm/unittests/ProfileData/MemProfTest.cpp


Index: llvm/unittests/ProfileData/MemProfTest.cpp
===================================================================
--- llvm/unittests/ProfileData/MemProfTest.cpp
+++ llvm/unittests/ProfileData/MemProfTest.cpp
@@ -99,7 +99,7 @@
 MATCHER_P4(FrameContains, FunctionName, LineOffset, Column, Inline, "") {
   const Frame &F = arg;
 
-  const uint64_t ExpectedHash = llvm::Function::getGUID(FunctionName);
+  const uint64_t ExpectedHash = IndexedMemProfRecord::getGUID(FunctionName);
   if (F.Function != ExpectedHash) {
     *result_listener << "Hash mismatch";
     return false;
@@ -147,7 +147,7 @@
                                                 specifier(), false))
       .Times(1)
       .WillRepeatedly(Return(makeInliningInfo({
-          {"xyz", 10, 5, 30},
+          {"xyz.llvm.123", 10, 5, 30},
           {"abc", 10, 5, 30},
       })));
 
Index: llvm/lib/ProfileData/RawMemProfReader.cpp
===================================================================
--- llvm/lib/ProfileData/RawMemProfReader.cpp
+++ llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -34,6 +34,7 @@
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/ProfileData/MemProfData.inc"
 #include "llvm/ProfileData/RawMemProfReader.h"
+#include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
@@ -507,12 +508,16 @@
         const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
                       // Only the last entry is not an inlined location.
                       I != NumFrames - 1);
-        // Here we retain a mapping from the GUID to symbol name instead of
-        // adding it to the frame object directly to reduce memory overhead.
-        // This is because there can be many unique frames, particularly for
-        // callsite frames.
-        if (KeepSymbolName)
-          GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
+        // Here we retain a mapping from the GUID to canonical symbol name
+        // instead of adding it to the frame object directly to reduce memory
+        // overhead. This is because there can be many unique frames,
+        // particularly for callsite frames.
+        if (KeepSymbolName) {
+          StringRef CanonicalName =
+              sampleprof::FunctionSamples::getCanonicalFnName(
+                  DIFrame.FunctionName);
+          GuidToSymbolName.insert({Guid, CanonicalName.str()});
+        }
 
         const FrameId Hash = F.hash();
         IdToFrame.insert({Hash, F});
Index: llvm/lib/ProfileData/MemProf.cpp
===================================================================
--- llvm/lib/ProfileData/MemProf.cpp
+++ llvm/lib/ProfileData/MemProf.cpp
@@ -2,6 +2,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/Function.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
 
@@ -71,14 +72,19 @@
 }
 
 GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
-  const auto Pos = FunctionName.find(".llvm.");
+  // Canonicalize the function name to drop suffixes such as ".llvm.", ".uniq."
+  // etc. We can then match functions in the profile use phase prior to the
+  // addition of these suffixes. Note that this applies to both instrumented and
+  // sampled function names.
+  StringRef CanonicalName =
+      sampleprof::FunctionSamples::getCanonicalFnName(FunctionName);
 
   // We use the function guid which we expect to be a uint64_t. At
   // this time, it is the lower 64 bits of the md5 of the function
   // name. Any suffix with .llvm. is trimmed since these are added by
   // thinLTO global promotion. At the time the profile is consumed,
   // these suffixes will not be present.
-  return Function::getGUID(FunctionName.take_front(Pos));
+  return Function::getGUID(CanonicalName);
 }
 
 Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D159132.554449.patch
Type: text/x-patch
Size: 3982 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230829/e867cd75/attachment.bin>


More information about the llvm-commits mailing list