[llvm] [SPGO] Use std::hash instead of MD5 to avoid run time regression in llvm-profgen (PR #180581)

via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 9 12:04:38 PST 2026


https://github.com/HighW4y2H3ll updated https://github.com/llvm/llvm-project/pull/180581

>From e46e4ccd6fb6d3e8db7164e078efa97315018aa8 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Mon, 9 Feb 2026 10:21:51 -0800
Subject: [PATCH 1/2] [SPGO] Use std::hash instead of MD5 to avoid run time
 regression

---
 llvm/include/llvm/ProfileData/SampleProf.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index b75dffaff19f7..8766ab23ac1da 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -522,7 +522,9 @@ struct SampleContextFrame {
   }
 
   uint64_t getHashCode() const {
-    uint64_t NameHash = Func.getHashCode();
+    // Context frame hash is heavily used in llvm-profgen context-sensitive
+    // pre-inliner. Use a lightweight hashing here to avoid speed regression.
+    uint64_t NameHash = std::hash<std::string>{}(Func.str());
     uint64_t LocId = Location.getHashCode();
     return NameHash + (LocId << 5) + LocId;
   }

>From f68533c5d959c87e3b8e005da41549dfbc724190 Mon Sep 17 00:00:00 2001
From: h2h <h2h at meta.com>
Date: Mon, 9 Feb 2026 12:04:20 -0800
Subject: [PATCH 2/2] Don't recompute hash if FunctionId is MD5 already

---
 llvm/include/llvm/ProfileData/SampleProf.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 8766ab23ac1da..14243688d075a 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -524,7 +524,11 @@ struct SampleContextFrame {
   uint64_t getHashCode() const {
     // Context frame hash is heavily used in llvm-profgen context-sensitive
     // pre-inliner. Use a lightweight hashing here to avoid speed regression.
-    uint64_t NameHash = std::hash<std::string>{}(Func.str());
+    uint64_t NameHash = 0;
+    if (Func.isStringRef())
+      NameHash = std::hash<std::string>{}(Func.str());
+    else
+      NameHash = Func.getHashCode();
     uint64_t LocId = Location.getHashCode();
     return NameHash + (LocId << 5) + LocId;
   }



More information about the llvm-commits mailing list