[llvm] 0edc32f - [memprof] Canonicalize the function name prior to hashing.

Snehasish Kumar via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 29 13:45:46 PDT 2023


Author: Snehasish Kumar
Date: 2023-08-29T20:45:39Z
New Revision: 0edc32fda5a75c6ea8719f709baa3ad493942512

URL: https://github.com/llvm/llvm-project/commit/0edc32fda5a75c6ea8719f709baa3ad493942512
DIFF: https://github.com/llvm/llvm-project/commit/0edc32fda5a75c6ea8719f709baa3ad493942512.diff

LOG: [memprof] Canonicalize the function name prior to hashing.

Canonicalize the function name (strip suffixes etc) to ensure that
function name suffixes added by late stage passes do not cause
mismatches when memprof profile data is consumed.

Reviewed By: tejohnson

Differential Revision: https://reviews.llvm.org/D159132

Added: 
    

Modified: 
    llvm/lib/ProfileData/MemProf.cpp
    llvm/lib/ProfileData/RawMemProfReader.cpp
    llvm/unittests/ProfileData/MemProfTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 3d44cf0b4c37cc..3255cba4dd0ca8 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -2,6 +2,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/IR/Function.h"
 #include "llvm/ProfileData/InstrProf.h"
+#include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/EndianStream.h"
 
@@ -71,14 +72,17 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema,
 }
 
 GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) {
-  const auto Pos = FunctionName.find(".llvm.");
+  // Canonicalize the function name to drop suffixes such as ".llvm.", ".uniq."
+  // etc. We can then match functions in the profile use phase prior to the
+  // addition of these suffixes. Note that this applies to both instrumented and
+  // sampled function names.
+  StringRef CanonicalName =
+      sampleprof::FunctionSamples::getCanonicalFnName(FunctionName);
 
   // We use the function guid which we expect to be a uint64_t. At
-  // this time, it is the lower 64 bits of the md5 of the function
-  // name. Any suffix with .llvm. is trimmed since these are added by
-  // thinLTO global promotion. At the time the profile is consumed,
-  // these suffixes will not be present.
-  return Function::getGUID(FunctionName.take_front(Pos));
+  // this time, it is the lower 64 bits of the md5 of the canonical
+  // function name.
+  return Function::getGUID(CanonicalName);
 }
 
 Expected<MemProfSchema> readMemProfSchema(const unsigned char *&Buffer) {

diff  --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index bccb205fb24335..dbf67ecae6846b 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -34,6 +34,7 @@
 #include "llvm/ProfileData/MemProf.h"
 #include "llvm/ProfileData/MemProfData.inc"
 #include "llvm/ProfileData/RawMemProfReader.h"
+#include "llvm/ProfileData/SampleProf.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/Endian.h"
 #include "llvm/Support/Error.h"
@@ -507,12 +508,16 @@ Error RawMemProfReader::symbolizeAndFilterStackFrames() {
         const Frame F(Guid, DIFrame.Line - DIFrame.StartLine, DIFrame.Column,
                       // Only the last entry is not an inlined location.
                       I != NumFrames - 1);
-        // Here we retain a mapping from the GUID to symbol name instead of
-        // adding it to the frame object directly to reduce memory overhead.
-        // This is because there can be many unique frames, particularly for
-        // callsite frames.
-        if (KeepSymbolName)
-          GuidToSymbolName.insert({Guid, DIFrame.FunctionName});
+        // Here we retain a mapping from the GUID to canonical symbol name
+        // instead of adding it to the frame object directly to reduce memory
+        // overhead. This is because there can be many unique frames,
+        // particularly for callsite frames.
+        if (KeepSymbolName) {
+          StringRef CanonicalName =
+              sampleprof::FunctionSamples::getCanonicalFnName(
+                  DIFrame.FunctionName);
+          GuidToSymbolName.insert({Guid, CanonicalName.str()});
+        }
 
         const FrameId Hash = F.hash();
         IdToFrame.insert({Hash, F});

diff  --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp
index 840ba8ea0ac066..8c01e4d0593232 100644
--- a/llvm/unittests/ProfileData/MemProfTest.cpp
+++ b/llvm/unittests/ProfileData/MemProfTest.cpp
@@ -99,7 +99,7 @@ const DILineInfoSpecifier specifier() {
 MATCHER_P4(FrameContains, FunctionName, LineOffset, Column, Inline, "") {
   const Frame &F = arg;
 
-  const uint64_t ExpectedHash = llvm::Function::getGUID(FunctionName);
+  const uint64_t ExpectedHash = IndexedMemProfRecord::getGUID(FunctionName);
   if (F.Function != ExpectedHash) {
     *result_listener << "Hash mismatch";
     return false;
@@ -147,7 +147,7 @@ TEST(MemProf, FillsValue) {
                                                 specifier(), false))
       .Times(1)
       .WillRepeatedly(Return(makeInliningInfo({
-          {"xyz", 10, 5, 30},
+          {"xyz.llvm.123", 10, 5, 30},
           {"abc", 10, 5, 30},
       })));
 


        


More information about the llvm-commits mailing list