[compiler-rt] r270141 - [profile] entry eviction support in value profiler

Xinliang David Li via llvm-commits llvm-commits at lists.llvm.org
Thu May 19 14:35:35 PDT 2016


Author: davidxl
Date: Thu May 19 16:35:34 2016
New Revision: 270141

URL: http://llvm.org/viewvc/llvm-project?rev=270141&view=rev
Log:
[profile] entry eviction support in value profiler

Differential revision: http://reviews.llvm.org/D20408


Added:
    compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-evict.c
    compiler-rt/trunk/test/profile/instrprof-value-prof-evict.test
Modified:
    compiler-rt/trunk/lib/profile/InstrProfilingInternal.h
    compiler-rt/trunk/lib/profile/InstrProfilingValue.c
    compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-real.c

Modified: compiler-rt/trunk/lib/profile/InstrProfilingInternal.h
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/profile/InstrProfilingInternal.h?rev=270141&r1=270140&r2=270141&view=diff
==============================================================================
--- compiler-rt/trunk/lib/profile/InstrProfilingInternal.h (original)
+++ compiler-rt/trunk/lib/profile/InstrProfilingInternal.h Thu May 19 16:35:34 2016
@@ -150,6 +150,10 @@ void lprofMergeValueProfData(struct Valu
 
 VPDataReaderType *lprofGetVPDataReader();
 
+/* Internal interface used by test to reset the max number of 
+ * tracked values per value site to be \p MaxVals.
+ */
+void lprofSetMaxValsPerSite(uint32_t MaxVals);
 void lprofSetupValueProfiler();
 
 COMPILER_RT_VISIBILITY extern char *(*GetEnvHook)(const char *);

Modified: compiler-rt/trunk/lib/profile/InstrProfilingValue.c
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/lib/profile/InstrProfilingValue.c?rev=270141&r1=270140&r2=270141&view=diff
==============================================================================
--- compiler-rt/trunk/lib/profile/InstrProfilingValue.c (original)
+++ compiler-rt/trunk/lib/profile/InstrProfilingValue.c Thu May 19 16:35:34 2016
@@ -36,6 +36,10 @@ COMPILER_RT_VISIBILITY void lprofSetupVa
     VPMaxNumValsPerSite = INSTR_PROF_MAX_NUM_VAL_PER_SITE;
 }
 
+COMPILER_RT_VISIBILITY void lprofSetMaxValsPerSite(uint32_t MaxVals) {
+  VPMaxNumValsPerSite = MaxVals;
+}
+
 /* This method is only used in value profiler mock testing.  */
 COMPILER_RT_VISIBILITY void
 __llvm_profile_set_num_value_sites(__llvm_profile_data *Data,
@@ -93,7 +97,9 @@ __llvm_profile_instrument_target(uint64_
 
   ValueProfNode **ValueCounters = (ValueProfNode **)PData->Values;
   ValueProfNode *PrevVNode = NULL;
+  ValueProfNode *MinCountVNode = NULL;
   ValueProfNode *CurrentVNode = ValueCounters[CounterIndex];
+  uint64_t MinCount = UINT64_MAX;
 
   uint8_t VDataCount = 0;
   while (CurrentVNode) {
@@ -101,13 +107,51 @@ __llvm_profile_instrument_target(uint64_
       CurrentVNode->Count++;
       return;
     }
+    if (CurrentVNode->Count < MinCount) {
+      MinCount = CurrentVNode->Count;
+      MinCountVNode = CurrentVNode;
+    }
     PrevVNode = CurrentVNode;
     CurrentVNode = CurrentVNode->Next;
     ++VDataCount;
   }
 
-  if (VDataCount >= VPMaxNumValsPerSite)
+  if (VDataCount >= VPMaxNumValsPerSite) {
+    /* Bump down the min count node's count. If it reaches 0,
+     * evict it. This eviction/replacement policy makes hot
+     * targets more sticky while cold targets less so. In other
+     * words, it makes it less likely for the hot targets to be
+     * prematurally evicted during warmup/establishment period,
+     * when their counts are still low. In a special case when
+     * the number of values tracked is reduced to only one, this
+     * policy will guarantee that the dominating target with >50%
+     * total count will survive in the end. Note that this scheme
+     * allows the runtime to track the min count node in an adaptive
+     * manner. It can correct previous mistakes and eventually
+     * lock on a cold target that is alread in stable state.
+     *
+     * In very rare cases,  this replacement scheme may still lead
+     * to target loss. For instance, out of \c N value slots, \c N-1
+     * slots are occupied by luke warm targets during the warmup
+     * period and the remaining one slot is competed by two or more
+     * very hot targets. If those hot targets occur in an interleaved
+     * way, none of them will survive (gain enough weight to throw out
+     * other established entries) due to the ping-pong effect.
+     * To handle this situation, user can choose to increase the max
+     * number of tracked values per value site. Alternatively, a more
+     * expensive eviction mechanism can be implemented. It requires
+     * the runtime to track the total number of evictions per-site.
+     * When the total number of evictions reaches certain threshold,
+     * the runtime can wipe out more than one lowest count entries
+     * to give space for hot targets.
+     */
+    if (!(--MinCountVNode->Count)) {
+      CurrentVNode = MinCountVNode;
+      CurrentVNode->Value = TargetValue;
+      CurrentVNode->Count++;
+    }
     return;
+  }
 
   CurrentVNode = (ValueProfNode *)calloc(1, sizeof(ValueProfNode));
   if (!CurrentVNode)

Added: compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-evict.c
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-evict.c?rev=270141&view=auto
==============================================================================
--- compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-evict.c (added)
+++ compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-evict.c Thu May 19 16:35:34 2016
@@ -0,0 +1,141 @@
+void callee_0() {}
+void callee_1() {}
+void callee_2() {}
+void callee_3() {}
+
+void *CalleeAddrs[] = {callee_0, callee_1, callee_2, callee_3};
+extern void lprofSetMaxValsPerSite(unsigned);
+
+// sequences of callee ids
+
+// In the following sequences,
+// there are two targets, the dominating target is
+// target 0.
+int CallSeqTwoTarget_1[] = {0, 0, 0, 0, 0, 1, 1};
+int CallSeqTwoTarget_2[] = {1, 1, 0, 0, 0, 0, 0};
+int CallSeqTwoTarget_3[] = {1, 0, 0, 1, 0, 0, 0};
+int CallSeqTwoTarget_4[] = {0, 0, 0, 1, 0, 1, 0};
+
+// In the following sequences, there are three targets
+// The dominating target is 0 and has > 50% of total
+// counts.
+int CallSeqThreeTarget_1[] = {0, 0, 0, 0, 0, 0, 1, 2, 1};
+int CallSeqThreeTarget_2[] = {1, 2, 1, 0, 0, 0, 0, 0, 0};
+int CallSeqThreeTarget_3[] = {1, 0, 0, 2, 0, 0, 0, 1, 0};
+int CallSeqThreeTarget_4[] = {0, 0, 0, 1, 0, 1, 0, 0, 2};
+
+// Four target sequence --
+// There are two cold targets which occupies the value counters
+// early. There is also a very hot target and a medium hot target
+// which are invoked in an interleaved fashion -- the length of each
+// hot period in the sequence is shorter than the cold targets' count.
+//  1. If only two values are tracked, the Hot and Medium hot targets
+//     should surive in the end
+//  2. If only three values are tracked, the top three targets should
+//     surive in the end.
+int CallSeqFourTarget_1[] = {1, 1, 1, 2, 2, 2, 2, 0, 0, 3, 0, 0, 3, 0, 0, 3,
+                             0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3};
+
+// Same as above, but the cold entries are invoked later.
+int CallSeqFourTarget_2[] = {0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 0, 0, 3, 0,
+                             0, 3, 0, 0, 3, 0, 0, 3, 1, 1, 1, 2, 2, 2, 2};
+
+// Same as above, but all the targets are interleaved.
+int CallSeqFourTarget_3[] = {0, 3, 0, 0, 1, 3, 0, 0, 0, 2, 0, 0, 3, 3, 0, 3,
+                             2, 2, 0, 3, 3, 1, 0, 0, 1, 0, 0, 3, 0, 2, 0};
+
+typedef void (*FPT)(void);
+
+
+// Testing value profiling eviction algorithm.
+FPT getCalleeFunc(int I) { return CalleeAddrs[I]; }
+
+int main() {
+  int I;
+
+#define INDIRECT_CALLSITE(Sequence, NumValsTracked)                            \
+  lprofSetMaxValsPerSite(NumValsTracked);                                      \
+  for (I = 0; I < sizeof(Sequence) / sizeof(*Sequence); I++) {                 \
+    FPT FP = getCalleeFunc(Sequence[I]);                                       \
+    FP();                                                                      \
+  }
+
+  // check site, target patterns
+  // CHECK: 0, callee_0
+  INDIRECT_CALLSITE(CallSeqTwoTarget_1, 1);
+
+  // CHECK-NEXT: 1, callee_0
+  INDIRECT_CALLSITE(CallSeqTwoTarget_2, 1);
+
+  // CHECK-NEXT: 2, callee_0
+  INDIRECT_CALLSITE(CallSeqTwoTarget_3, 1);
+
+  // CHECK-NEXT: 3, callee_0
+  INDIRECT_CALLSITE(CallSeqTwoTarget_4, 1);
+
+  // CHECK-NEXT: 4, callee_0
+  INDIRECT_CALLSITE(CallSeqThreeTarget_1, 1);
+
+  // CHECK-NEXT: 5, callee_0
+  INDIRECT_CALLSITE(CallSeqThreeTarget_2, 1);
+
+  // CHECK-NEXT: 6, callee_0
+  INDIRECT_CALLSITE(CallSeqThreeTarget_3, 1);
+
+  // CHECK-NEXT: 7, callee_0
+  INDIRECT_CALLSITE(CallSeqThreeTarget_4, 1);
+
+  // CHECK-NEXT: 8, callee_0
+  // CHECK-NEXT: 8, callee_1
+  INDIRECT_CALLSITE(CallSeqThreeTarget_1, 2);
+
+  // CHECK-NEXT: 9, callee_0
+  // CHECK-NEXT: 9, callee_1
+  INDIRECT_CALLSITE(CallSeqThreeTarget_2, 2);
+
+  // CHECK-NEXT: 10, callee_0
+  // CHECK-NEXT: 10, callee_1
+  INDIRECT_CALLSITE(CallSeqThreeTarget_3, 2);
+
+  // CHECK-NEXT: 11, callee_0
+  // CHECK-NEXT: 11, callee_1
+  INDIRECT_CALLSITE(CallSeqThreeTarget_4, 2);
+
+  // CHECK-NEXT: 12, callee_0
+  INDIRECT_CALLSITE(CallSeqFourTarget_1, 1);
+
+  // CHECK-NEXT: 13, callee_0
+  INDIRECT_CALLSITE(CallSeqFourTarget_2, 1);
+
+  // CHECK-NEXT: 14, callee_0
+  INDIRECT_CALLSITE(CallSeqFourTarget_3, 1);
+
+  // CHECK-NEXT: 15, callee_0
+  // CHECK-NEXT: 15, callee_3
+  INDIRECT_CALLSITE(CallSeqFourTarget_1, 2);
+
+  // CHECK-NEXT: 16, callee_0
+  // CHECK-NEXT: 16, callee_3
+  INDIRECT_CALLSITE(CallSeqFourTarget_2, 2);
+
+  // CHECK-NEXT: 17, callee_0
+  // CHECK-NEXT: 17, callee_3
+  INDIRECT_CALLSITE(CallSeqFourTarget_3, 2);
+
+  // CHECK-NEXT: 18, callee_0
+  // CHECK-NEXT: 18, callee_3
+  // CHECK-NEXT: 18, callee_2
+  INDIRECT_CALLSITE(CallSeqFourTarget_1, 3);
+
+  // CHECK-NEXT: 19, callee_0
+  // CHECK-NEXT: 19, callee_3
+  // CHECK-NEXT: 19, callee_2
+  INDIRECT_CALLSITE(CallSeqFourTarget_2, 3);
+
+  // CHECK-NEXT: 20, callee_0
+  // CHECK-NEXT: 20, callee_3
+  // CHECK-NEXT: 20, callee_2
+  INDIRECT_CALLSITE(CallSeqFourTarget_3, 3);
+
+  return 0;
+}

Modified: compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-real.c
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-real.c?rev=270141&r1=270140&r2=270141&view=diff
==============================================================================
--- compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-real.c (original)
+++ compiler-rt/trunk/test/profile/Inputs/instrprof-value-prof-real.c Thu May 19 16:35:34 2016
@@ -309,7 +309,7 @@ int main() {
 // CHECK-NEXT:	[ 0, foo_1_2_2_2_2_2_1_2_2, 749 ]
 // CHECK-NEXT:	[ 0, foo_1_2_2_2_2_2_2_1_1, 748 ]
 // CHECK-NEXT:	[ 0, foo_1_2_2_2_2_2_2_1_2, 747 ]
-// CHECK-NEXT:	[ 0, foo_1_2_2_2_2_2_2_2_1, 746 ]
+// CHECK-NEXT:	[ 0, foo
 // CHECK-NEXT:	[ 1, foo_2_2_2_2_2_2_2_2_2, 2000 ]
 // CHECK-NEXT:	[ 1, foo_2_2_2_2_2_2_2_2_1, 1999 ]
 // CHECK-NEXT:	[ 1, foo_2_2_2_2_2_2_2_1_2, 1998 ]
@@ -564,7 +564,7 @@ int main() {
 // CHECK-NEXT:	[ 1, foo_2_1_1_1_1_1_2_1_1, 1749 ]
 // CHECK-NEXT:	[ 1, foo_2_1_1_1_1_1_1_2_2, 1748 ]
 // CHECK-NEXT:	[ 1, foo_2_1_1_1_1_1_1_2_1, 1747 ]
-// CHECK-NEXT:	[ 1, foo_2_1_1_1_1_1_1_1_2, 1746 ]
+// CHECK-NEXT:	[ 1, foo
 
 // SHARED-LABEL:  shared_entry:
 // SHARED:	[ 0, foo_1_1_1_1_1_1_1_1_1, 1000 ]
@@ -821,7 +821,7 @@ int main() {
 // SHARED-NEXT:	[ 0, foo_1_2_2_2_2_2_1_2_2, 749 ]
 // SHARED-NEXT:	[ 0, foo_1_2_2_2_2_2_2_1_1, 748 ]
 // SHARED-NEXT:	[ 0, foo_1_2_2_2_2_2_2_1_2, 747 ]
-// SHARED-NEXT:	[ 0, foo_1_2_2_2_2_2_2_2_1, 746 ]
+// SHARED-NEXT:	[ 0, foo
 // SHARED-NEXT:	[ 1, foo_2_2_2_2_2_2_2_2_2, 2000 ]
 // SHARED-NEXT:	[ 1, foo_2_2_2_2_2_2_2_2_1, 1999 ]
 // SHARED-NEXT:	[ 1, foo_2_2_2_2_2_2_2_1_2, 1998 ]
@@ -1076,4 +1076,4 @@ int main() {
 // SHARED-NEXT:	[ 1, foo_2_1_1_1_1_1_2_1_1, 1749 ]
 // SHARED-NEXT:	[ 1, foo_2_1_1_1_1_1_1_2_2, 1748 ]
 // SHARED-NEXT:	[ 1, foo_2_1_1_1_1_1_1_2_1, 1747 ]
-// SHARED-NEXT:	[ 1, foo_2_1_1_1_1_1_1_1_2, 1746 ]
+// SHARED-NEXT:	[ 1, foo

Added: compiler-rt/trunk/test/profile/instrprof-value-prof-evict.test
URL: http://llvm.org/viewvc/llvm-project/compiler-rt/trunk/test/profile/instrprof-value-prof-evict.test?rev=270141&view=auto
==============================================================================
--- compiler-rt/trunk/test/profile/instrprof-value-prof-evict.test (added)
+++ compiler-rt/trunk/test/profile/instrprof-value-prof-evict.test Thu May 19 16:35:34 2016
@@ -0,0 +1,10 @@
+// RUN: %clang_profgen -O2 -mllvm -enable-value-profiling=true -o %t %S/Inputs/instrprof-value-prof-evict.c
+// RUN: env LLVM_PROFILE_FILE=%t.profraw %run %t
+// RUN: llvm-profdata merge -o %t.profdata %t.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.profdata | FileCheck  %S/Inputs/instrprof-value-prof-evict.c
+
+// IR level instrumentation
+// RUN: %clang_profgen -O2 -mllvm -disable-vp=false -Xclang -fprofile-instrument=llvm -o %t.ir  %S/Inputs/instrprof-value-prof-evict.c
+// RUN: env LLVM_PROFILE_FILE=%t.ir.profraw %run %t.ir
+// RUN: llvm-profdata merge -o %t.ir.profdata %t.ir.profraw
+// RUN: llvm-profdata show --all-functions -ic-targets  %t.ir.profdata | FileCheck  %S/Inputs/instrprof-value-prof-evict.c




More information about the llvm-commits mailing list