[llvm] r260197 - Refactor PGO function naming and MD5 hashing support out of ProfileData

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 8 21:12:44 PST 2016


Author: tejohnson
Date: Mon Feb  8 23:12:44 2016
New Revision: 260197

URL: http://llvm.org/viewvc/llvm-project?rev=260197&view=rev
Log:
Refactor PGO function naming and MD5 hashing support out of ProfileData

Summary:
Move the function renaming logic into the Function class, and the
MD5Hash routine into the MD5 header.

This will enable these routines to be shared with ThinLTO, which
will be changed to store the MD5 hash instead of full function name
in the combined index for significant size reductions. And using the same
function naming for locals in the function index facilitates future
integration with indirect call value profiles.

Reviewers: davidxl

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D17006

Modified:
    llvm/trunk/include/llvm/IR/Function.h
    llvm/trunk/include/llvm/ProfileData/InstrProf.h
    llvm/trunk/include/llvm/Support/MD5.h
    llvm/trunk/lib/IR/Function.cpp
    llvm/trunk/lib/ProfileData/InstrProf.cpp

Modified: llvm/trunk/include/llvm/IR/Function.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/IR/Function.h?rev=260197&r1=260196&r2=260197&view=diff
==============================================================================
--- llvm/trunk/include/llvm/IR/Function.h (original)
+++ llvm/trunk/include/llvm/IR/Function.h Mon Feb  8 23:12:44 2016
@@ -639,6 +639,14 @@ public:
   /// to \a DISubprogram.
   DISubprogram *getSubprogram() const;
 
+  /// Return the modified name for a function suitable to be
+  /// used as the key for a global lookup (e.g. profile or ThinLTO).
+  /// The function's original name is \c FuncName and has linkage of type
+  /// \c Linkage. The function is defined in module \c FileName.
+  static std::string getGlobalIdentifier(StringRef FuncName,
+                                         GlobalValue::LinkageTypes Linkage,
+                                         StringRef FileName);
+
 private:
   void allocHungoffUselist();
   template<int Idx> void setHungoffOperand(Constant *C);

Modified: llvm/trunk/include/llvm/ProfileData/InstrProf.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ProfileData/InstrProf.h?rev=260197&r1=260196&r2=260197&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ProfileData/InstrProf.h (original)
+++ llvm/trunk/include/llvm/ProfileData/InstrProf.h Mon Feb  8 23:12:44 2016
@@ -626,21 +626,10 @@ enum class HashT : uint32_t {
   Last = MD5
 };
 
-static inline uint64_t MD5Hash(StringRef Str) {
-  MD5 Hash;
-  Hash.update(Str);
-  llvm::MD5::MD5Result Result;
-  Hash.final(Result);
-  // Return the least significant 8 bytes. Our MD5 implementation returns the
-  // result in little endian, so we may need to swap bytes.
-  using namespace llvm::support;
-  return endian::read<uint64_t, little, unaligned>(Result);
-}
-
 inline uint64_t ComputeHash(HashT Type, StringRef K) {
   switch (Type) {
   case HashT::MD5:
-    return IndexedInstrProf::MD5Hash(K);
+    return MD5Hash(K);
   }
   llvm_unreachable("Unhandled hash type");
 }

Modified: llvm/trunk/include/llvm/Support/MD5.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Support/MD5.h?rev=260197&r1=260196&r2=260197&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Support/MD5.h (original)
+++ llvm/trunk/include/llvm/Support/MD5.h Mon Feb  8 23:12:44 2016
@@ -31,6 +31,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/Support/DataTypes.h"
+#include "llvm/Support/Endian.h"
 
 namespace llvm {
 
@@ -65,6 +66,18 @@ private:
   const uint8_t *body(ArrayRef<uint8_t> Data);
 };
 
+/// Helper to compute and return a 64-bit MD5 Hash of a given string.
+inline uint64_t MD5Hash(StringRef Str) {
+  MD5 Hash;
+  Hash.update(Str);
+  llvm::MD5::MD5Result Result;
+  Hash.final(Result);
+  // Return the least significant 8 bytes. Our MD5 implementation returns the
+  // result in little endian, so we may need to swap bytes.
+  using namespace llvm::support;
+  return endian::read<uint64_t, little, unaligned>(Result);
+}
+
 }
 
 #endif

Modified: llvm/trunk/lib/IR/Function.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/IR/Function.cpp?rev=260197&r1=260196&r2=260197&view=diff
==============================================================================
--- llvm/trunk/lib/IR/Function.cpp (original)
+++ llvm/trunk/lib/IR/Function.cpp Mon Feb  8 23:12:44 2016
@@ -997,3 +997,27 @@ Optional<uint64_t> Function::getEntryCou
       }
   return None;
 }
+
+std::string Function::getGlobalIdentifier(StringRef FuncName,
+                                          GlobalValue::LinkageTypes Linkage,
+                                          StringRef FileName) {
+
+  // Function names may be prefixed with a binary '1' to indicate
+  // that the backend should not modify the symbols due to any platform
+  // naming convention. Do not include that '1' in the PGO profile name.
+  if (FuncName[0] == '\1')
+    FuncName = FuncName.substr(1);
+
+  std::string NewFuncName = FuncName;
+  if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
+    // For local symbols, prepend the main file name to distinguish them.
+    // Do not include the full path in the file name since there's no guarantee
+    // that it will stay the same, e.g., if the files are checked out from
+    // version control in different locations.
+    if (FileName.empty())
+      NewFuncName = NewFuncName.insert(0, "<unknown>:");
+    else
+      NewFuncName = NewFuncName.insert(0, FileName.str() + ":");
+  }
+  return NewFuncName;
+}

Modified: llvm/trunk/lib/ProfileData/InstrProf.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ProfileData/InstrProf.cpp?rev=260197&r1=260196&r2=260197&view=diff
==============================================================================
--- llvm/trunk/lib/ProfileData/InstrProf.cpp (original)
+++ llvm/trunk/lib/ProfileData/InstrProf.cpp Mon Feb  8 23:12:44 2016
@@ -80,25 +80,7 @@ std::string getPGOFuncName(StringRef Raw
                            GlobalValue::LinkageTypes Linkage,
                            StringRef FileName,
                            uint64_t Version LLVM_ATTRIBUTE_UNUSED) {
-
-  // Function names may be prefixed with a binary '1' to indicate
-  // that the backend should not modify the symbols due to any platform
-  // naming convention. Do not include that '1' in the PGO profile name.
-  if (RawFuncName[0] == '\1')
-    RawFuncName = RawFuncName.substr(1);
-
-  std::string FuncName = RawFuncName;
-  if (llvm::GlobalValue::isLocalLinkage(Linkage)) {
-    // For local symbols, prepend the main file name to distinguish them.
-    // Do not include the full path in the file name since there's no guarantee
-    // that it will stay the same, e.g., if the files are checked out from
-    // version control in different locations.
-    if (FileName.empty())
-      FuncName = FuncName.insert(0, "<unknown>:");
-    else
-      FuncName = FuncName.insert(0, FileName.str() + ":");
-  }
-  return FuncName;
+  return Function::getGlobalIdentifier(RawFuncName, Linkage, FileName);
 }
 
 std::string getPGOFuncName(const Function &F, uint64_t Version) {




More information about the llvm-commits mailing list