[llvm] a4bdb27 - [MemProf] Use profiled lifetime access density directly

Teresa Johnson via llvm-commits llvm-commits at lists.llvm.org
Tue May 2 15:19:49 PDT 2023


Author: Teresa Johnson
Date: 2023-05-02T15:19:34-07:00
New Revision: a4bdb27538c3bc5b757976e47d663e63880451e3

URL: https://github.com/llvm/llvm-project/commit/a4bdb27538c3bc5b757976e47d663e63880451e3
DIFF: https://github.com/llvm/llvm-project/commit/a4bdb27538c3bc5b757976e47d663e63880451e3.diff

LOG: [MemProf] Use profiled lifetime access density directly

Now that the runtime tracks the lifetime access density directly, we can
use that directly in the threshold checks instead of less accurately
computing from other statistics.

Differential Revision: https://reviews.llvm.org/D149684

Added: 
    

Modified: 
    llvm/include/llvm/Analysis/MemoryProfileInfo.h
    llvm/lib/Analysis/MemoryProfileInfo.cpp
    llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
    llvm/test/ThinLTO/X86/memprof-basic.ll
    llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
    llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll
    llvm/test/ThinLTO/X86/memprof-indirectcall.ll
    llvm/test/ThinLTO/X86/memprof-inlined.ll
    llvm/test/ThinLTO/X86/memprof-inlined2.ll
    llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
    llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
    llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll
    llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
    llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
    llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll
    llvm/unittests/Analysis/MemoryProfileInfoTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Analysis/MemoryProfileInfo.h b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
index 9a97c76a4dd04..bbade1b337916 100644
--- a/llvm/include/llvm/Analysis/MemoryProfileInfo.h
+++ b/llvm/include/llvm/Analysis/MemoryProfileInfo.h
@@ -24,8 +24,8 @@ namespace llvm {
 namespace memprof {
 
 /// Return the allocation type for a given set of memory profile values.
-AllocationType getAllocType(uint64_t MaxAccessCount, uint64_t MinSize,
-                            uint64_t MinLifetime);
+AllocationType getAllocType(uint64_t TotalLifetimeAccessDensity,
+                            uint64_t AllocCount, uint64_t TotalLifetime);
 
 /// Build callstack metadata from the provided list of call stack ids. Returns
 /// the resulting metadata node.

diff  --git a/llvm/lib/Analysis/MemoryProfileInfo.cpp b/llvm/lib/Analysis/MemoryProfileInfo.cpp
index e7284da2163b8..366b21be1fbce 100644
--- a/llvm/lib/Analysis/MemoryProfileInfo.cpp
+++ b/llvm/lib/Analysis/MemoryProfileInfo.cpp
@@ -18,25 +18,31 @@ using namespace llvm::memprof;
 
 #define DEBUG_TYPE "memory-profile-info"
 
-// Upper bound on accesses per byte for marking an allocation cold.
-cl::opt<float> MemProfAccessesPerByteColdThreshold(
-    "memprof-accesses-per-byte-cold-threshold", cl::init(10.0), cl::Hidden,
-    cl::desc("The threshold the accesses per byte must be under to consider "
-             "an allocation cold"));
+// Upper bound on lifetime access density (accesses per byte per lifetime sec)
+// for marking an allocation cold.
+cl::opt<float> MemProfLifetimeAccessDensityColdThreshold(
+    "memprof-lifetime-access-density-cold-threshold", cl::init(0.05),
+    cl::Hidden,
+    cl::desc("The threshold the lifetime access density (accesses per byte per "
+             "lifetime sec) must be under to consider an allocation cold"));
 
 // Lower bound on lifetime to mark an allocation cold (in addition to accesses
-// per byte above). This is to avoid pessimizing short lived objects.
-cl::opt<unsigned> MemProfMinLifetimeColdThreshold(
-    "memprof-min-lifetime-cold-threshold", cl::init(200), cl::Hidden,
-    cl::desc("The minimum lifetime (s) for an allocation to be considered "
+// per byte per sec above). This is to avoid pessimizing short lived objects.
+cl::opt<unsigned> MemProfAveLifetimeColdThreshold(
+    "memprof-ave-lifetime-cold-threshold", cl::init(200), cl::Hidden,
+    cl::desc("The average lifetime (s) for an allocation to be considered "
              "cold"));
 
-AllocationType llvm::memprof::getAllocType(uint64_t MaxAccessCount,
-                                           uint64_t MinSize,
-                                           uint64_t MinLifetime) {
-  if (((float)MaxAccessCount) / MinSize < MemProfAccessesPerByteColdThreshold &&
-      // MinLifetime is expected to be in ms, so convert the threshold to ms.
-      MinLifetime >= MemProfMinLifetimeColdThreshold * 1000)
+AllocationType llvm::memprof::getAllocType(uint64_t TotalLifetimeAccessDensity,
+                                           uint64_t AllocCount,
+                                           uint64_t TotalLifetime) {
+  // The access densities are multiplied by 100 to hold 2 decimal places of
+  // precision, so need to divide by 100.
+  if (((float)TotalLifetimeAccessDensity) / AllocCount / 100 <
+          MemProfLifetimeAccessDensityColdThreshold
+      // Lifetime is expected to be in ms, so convert the threshold to ms.
+      && ((float)TotalLifetime) / AllocCount >=
+             MemProfAveLifetimeColdThreshold * 1000)
     return AllocationType::Cold;
   return AllocationType::NotCold;
 }

diff  --git a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
index e7fb036df2b48..2aea2a66cbb02 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOInstrumentation.cpp
@@ -1330,9 +1330,9 @@ static void addCallStack(CallStackTrie &AllocTrie,
   SmallVector<uint64_t> StackIds;
   for (const auto &StackFrame : AllocInfo->CallStack)
     StackIds.push_back(computeStackId(StackFrame));
-  auto AllocType = getAllocType(AllocInfo->Info.getMaxAccessCount(),
-                                AllocInfo->Info.getMinSize(),
-                                AllocInfo->Info.getMinLifetime());
+  auto AllocType = getAllocType(AllocInfo->Info.getTotalLifetimeAccessDensity(),
+                                AllocInfo->Info.getAllocCount(),
+                                AllocInfo->Info.getTotalLifetime());
   AllocTrie.addCallStack(AllocType, StackIds);
 }
 

diff  --git a/llvm/test/ThinLTO/X86/memprof-basic.ll b/llvm/test/ThinLTO/X86/memprof-basic.ll
index 4d11244b2b42b..eaac271fbef2c 100644
--- a/llvm/test/ThinLTO/X86/memprof-basic.ll
+++ b/llvm/test/ThinLTO/X86/memprof-basic.ll
@@ -26,7 +26,7 @@
 ;;   return 0;
 ;; }
 ;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
+;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
 ;; memory freed after sleep(10) results in cold lifetimes.
 ;;
 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.

diff  --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
index 3b297dd96cede..6f89b369897ec 100644
--- a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
+++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
@@ -38,7 +38,7 @@
 ;;   return 0;
 ;; }
 ;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
+;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
 ;; memory freed after sleep(10) results in cold lifetimes.
 ;;
 ;; The code below was created by forcing inlining of C into both B and E.

diff  --git a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll
index af7dece9421a9..b77df883d35b7 100644
--- a/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll
+++ b/llvm/test/ThinLTO/X86/memprof-duplicate-context-ids2.ll
@@ -76,7 +76,7 @@
 ;;   return 0;
 ;; }
 ;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
+;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
 ;; memory freed after sleep(10) results in cold lifetimes.
 ;;
 ;; The code below was created by forcing inlining of A into its callers,

diff  --git a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll
index 9cf209271ce4d..8ba958a1ccd76 100644
--- a/llvm/test/ThinLTO/X86/memprof-indirectcall.ll
+++ b/llvm/test/ThinLTO/X86/memprof-indirectcall.ll
@@ -44,7 +44,7 @@
 ;;   return 0;
 ;; }
 ;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
+;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
 ;; memory freed after sleep(10) results in cold lifetimes.
 ;;
 ;; Compiled without optimization to prevent inlining and devirtualization.

diff  --git a/llvm/test/ThinLTO/X86/memprof-inlined.ll b/llvm/test/ThinLTO/X86/memprof-inlined.ll
index 7a2304f7202b4..d6fa0d39a9cea 100644
--- a/llvm/test/ThinLTO/X86/memprof-inlined.ll
+++ b/llvm/test/ThinLTO/X86/memprof-inlined.ll
@@ -27,7 +27,7 @@
 ;;   return 0;
 ;; }
 ;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
+;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
 ;; memory freed after sleep(10) results in cold lifetimes.
 ;;
 ;; The code below was created by forcing inlining of baz into foo, and

diff  --git a/llvm/test/ThinLTO/X86/memprof-inlined2.ll b/llvm/test/ThinLTO/X86/memprof-inlined2.ll
index 1ffae8cd59cef..02baf9fb4cd8e 100644
--- a/llvm/test/ThinLTO/X86/memprof-inlined2.ll
+++ b/llvm/test/ThinLTO/X86/memprof-inlined2.ll
@@ -29,7 +29,7 @@
 ;;   return 0;
 ;; }
 ;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
+;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
 ;; memory freed after sleep(10) results in cold lifetimes.
 ;;
 ;; Both foo and baz are inlined into main, at both foo callsites.

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
index 7a48d66af47e0..27e65219b8c60 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/basic.ll
@@ -26,7 +26,7 @@
 ;;   return 0;
 ;; }
 ;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
+;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
 ;; memory freed after sleep(10) results in cold lifetimes.
 ;;
 ;; The IR was then reduced using llvm-reduce with the expected FileCheck input.

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
index aa5f539a61832..193b31b4a705a 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids.ll
@@ -38,7 +38,7 @@
 ;;   return 0;
 ;; }
 ;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
+;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
 ;; memory freed after sleep(10) results in cold lifetimes.
 ;;
 ;; The code below was created by forcing inlining of C into both B and E.

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll
index da0fd3f44b45e..d1659b524860a 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/duplicate-context-ids2.ll
@@ -76,7 +76,7 @@
 ;;   return 0;
 ;; }
 ;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
+;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
 ;; memory freed after sleep(10) results in cold lifetimes.
 ;;
 ;; The code below was created by forcing inlining of A into its callers,

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
index e66ec0d76fe8b..f28435f7ee3a6 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/indirectcall.ll
@@ -44,7 +44,7 @@
 ;;   return 0;
 ;; }
 ;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
+;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
 ;; memory freed after sleep(10) results in cold lifetimes.
 ;;
 ;; Compiled without optimization to prevent inlining and devirtualization.

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
index a2fa703e04e7b..81f52638ee935 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined.ll
@@ -27,7 +27,7 @@
 ;;   return 0;
 ;; }
 ;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
+;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
 ;; memory freed after sleep(10) results in cold lifetimes.
 ;;
 ;; The code below was created by forcing inlining of baz into foo, and

diff  --git a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll b/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll
index a3a056ade8c49..5a247f8607996 100644
--- a/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll
+++ b/llvm/test/Transforms/MemProfContextDisambiguation/inlined2.ll
@@ -29,7 +29,7 @@
 ;;   return 0;
 ;; }
 ;;
-;; Code compiled with -mllvm -memprof-min-lifetime-cold-threshold=5 so that the
+;; Code compiled with -mllvm -memprof-ave-lifetime-cold-threshold=5 so that the
 ;; memory freed after sleep(10) results in cold lifetimes.
 ;;
 ;; Both foo and baz are inlined into main, at both foo callsites.

diff  --git a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
index ff07666ef8325..8d22cca3869de 100644
--- a/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
+++ b/llvm/unittests/Analysis/MemoryProfileInfoTest.cpp
@@ -20,8 +20,8 @@
 using namespace llvm;
 using namespace llvm::memprof;
 
-extern cl::opt<float> MemProfAccessesPerByteColdThreshold;
-extern cl::opt<unsigned> MemProfMinLifetimeColdThreshold;
+extern cl::opt<float> MemProfLifetimeAccessDensityColdThreshold;
+extern cl::opt<unsigned> MemProfAveLifetimeColdThreshold;
 
 namespace {
 
@@ -60,30 +60,36 @@ class MemoryProfileInfoTest : public testing::Test {
 // Basic checks on the allocation type for values just above and below
 // the thresholds.
 TEST_F(MemoryProfileInfoTest, GetAllocType) {
-  // Long lived with more accesses per byte than threshold is not cold.
-  EXPECT_EQ(
-      getAllocType(/*MaxAccessCount=*/MemProfAccessesPerByteColdThreshold + 1,
-                   /*MinSize=*/1,
-                   /*MinLifetime=*/MemProfMinLifetimeColdThreshold * 1000 + 1),
-      AllocationType::NotCold);
-  // Long lived with less accesses per byte than threshold is cold.
-  EXPECT_EQ(
-      getAllocType(/*MaxAccessCount=*/MemProfAccessesPerByteColdThreshold - 1,
-                   /*MinSize=*/1,
-                   /*MinLifetime=*/MemProfMinLifetimeColdThreshold * 1000 + 1),
-      AllocationType::Cold);
-  // Short lived with more accesses per byte than threshold is not cold.
-  EXPECT_EQ(
-      getAllocType(/*MaxAccessCount=*/MemProfAccessesPerByteColdThreshold + 1,
-                   /*MinSize=*/1,
-                   /*MinLifetime=*/MemProfMinLifetimeColdThreshold * 1000 - 1),
-      AllocationType::NotCold);
-  // Short lived with less accesses per byte than threshold is not cold.
-  EXPECT_EQ(
-      getAllocType(/*MaxAccessCount=*/MemProfAccessesPerByteColdThreshold - 1,
-                   /*MinSize=*/1,
-                   /*MinLifetime=*/MemProfMinLifetimeColdThreshold * 1000 - 1),
-      AllocationType::NotCold);
+  const uint64_t AllocCount = 2;
+  // To be cold we require that
+  // ((float)TotalLifetimeAccessDensity) / AllocCount / 100 <
+  //    MemProfLifetimeAccessDensityColdThreshold
+  // so compute the TotalLifetimeAccessDensity right at the threshold.
+  const uint64_t TotalLifetimeAccessDensityThreshold =
+      (uint64_t)(MemProfLifetimeAccessDensityColdThreshold * AllocCount * 100);
+  // To be cold we require that
+  // ((float)TotalLifetime) / AllocCount >=
+  //    MemProfAveLifetimeColdThreshold * 1000
+  // so compute the TotalLifetime right at the threshold.
+  const uint64_t TotalLifetimeThreshold =
+      MemProfAveLifetimeColdThreshold * AllocCount * 1000;
+
+  // Long lived with more accesses per byte per sec than threshold is not cold.
+  EXPECT_EQ(getAllocType(TotalLifetimeAccessDensityThreshold + 1, AllocCount,
+                         TotalLifetimeThreshold + 1),
+            AllocationType::NotCold);
+  // Long lived with less accesses per byte per sec than threshold is cold.
+  EXPECT_EQ(getAllocType(TotalLifetimeAccessDensityThreshold - 1, AllocCount,
+                         TotalLifetimeThreshold + 1),
+            AllocationType::Cold);
+  // Short lived with more accesses per byte per sec than threshold is not cold.
+  EXPECT_EQ(getAllocType(TotalLifetimeAccessDensityThreshold + 1, AllocCount,
+                         TotalLifetimeThreshold - 1),
+            AllocationType::NotCold);
+  // Short lived with less accesses per byte per sec than threshold is not cold.
+  EXPECT_EQ(getAllocType(TotalLifetimeAccessDensityThreshold - 1, AllocCount,
+                         TotalLifetimeThreshold - 1),
+            AllocationType::NotCold);
 }
 
 // Test the hasSingleAllocType helper.


        


More information about the llvm-commits mailing list