[PATCH] D159132: [memprof] Canonicalize the function name prior to hashing.

Snehasish Kumar via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 29 13:45:59 PDT 2023


This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rG0edc32fda5a7: [memprof] Canonicalize the function name prior to hashing. (authored by snehasish).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D159132/new/

https://reviews.llvm.org/D159132

Files:
  llvm/lib/ProfileData/MemProf.cpp
  llvm/lib/ProfileData/RawMemProfReader.cpp
  llvm/unittests/ProfileData/MemProfTest.cpp


Index: llvm/unittests/ProfileData/MemProfTest.cpp
===================================================================
--- llvm/unittests/ProfileData/MemProfTest.cpp
+++ llvm/unittests/ProfileData/MemProfTest.cpp
@@ -99,7 +99,7 @@
 MATCHER_P4(FrameContains, FunctionName, LineOffset, Column, Inline, "") {
   const Frame &F = arg;
 
-  const uint64_t ExpectedHash = llvm::Function::getGUID(FunctionName);
+  const uint64_t ExpectedHash = IndexedMemProfRecord::getGUID(FunctionName);
   if (F.Function != ExpectedHash) {
     *result_listener << "Hash mismatch";
     return false;
@@ -147,7 +147,7 @@
                                                 specifier(), false))
       .Times(1)
       .WillRepeatedly(Return(makeInliningInfo({
-          {"xyz", 10, 5, 30},
+          {"xyz.llvm.123", 10, 5, 30},
           {"abc", 10, 5, 30},
       })));
 
Index: llvm/lib/ProfileData/RawMemProfReader.cpp
===================================================================
--- llvm/lib/ProfileData/RawMemProfReader.cpp
+++ llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -34,6 +34,7 @@
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/ProfileData/MemProfData.inc"
 #include "llvm/ProfileData/RawMemProfReader.h"
+#include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
@@ -507,12 +508,16 @@
         const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
                       // Only the last entry is not an inlined location.
                       I != NumFrames - 1);
-        // Here we retain a mapping from the GUID to symbol name instead of
-        // adding it to the frame object directly to reduce memory overhead.
-        // This is because there can be many unique frames, particularly for
-        // callsite frames.
-        if (KeepSymbolName)
-          GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
+        // Here we retain a mapping from the GUID to canonical symbol name
+        // instead of adding it to the frame object directly to reduce memory
+        // overhead. This is because there can be many unique frames,
+        // particularly for callsite frames.
+        if (KeepSymbolName) {
+          StringRef CanonicalName =
+              sampleprof::FunctionSamples::getCanonicalFnName(
+                  DIFrame.FunctionName);
+          GuidToSymbolName.insert({Guid, CanonicalName.str()});
+        }
 
         const FrameId Hash = F.hash();
         IdToFrame.insert({Hash, F});
Index: llvm/lib/ProfileData/MemProf.cpp
===================================================================
--- llvm/lib/ProfileData/MemProf.cpp
+++ llvm/lib/ProfileData/MemProf.cpp
@@ -2,6 +2,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/Function.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
 
@@ -71,14 +72,17 @@
 }
 
 GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
-  const auto Pos = FunctionName.find(".llvm.");
+  // Canonicalize the function name to drop suffixes such as ".llvm.", ".uniq."
+  // etc. We can then match functions in the profile use phase prior to the
+  // addition of these suffixes. Note that this applies to both instrumented and
+  // sampled function names.
+  StringRef CanonicalName =
+      sampleprof::FunctionSamples::getCanonicalFnName(FunctionName);
 
   // We use the function guid which we expect to be a uint64_t. At
-  // this time, it is the lower 64 bits of the md5 of the function
-  // name. Any suffix with .llvm. is trimmed since these are added by
-  // thinLTO global promotion. At the time the profile is consumed,
-  // these suffixes will not be present.
-  return Function::getGUID(FunctionName.take_front(Pos));
+  // this time, it is the lower 64 bits of the md5 of the canonical
+  // function name.
+  return Function::getGUID(CanonicalName);
 }
 
 Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D159132.554482.patch
Type: text/x-patch
Size: 4072 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20230829/153556c7/attachment.bin>


More information about the llvm-commits mailing list